## R Programming Codes submitted to Peer J Computer Science Journal ## For MSCI ### Weekly Data w.data.msci.all <- read.table("FilePath\Weekly_data_MSCI_with_MSCI_Covid.txt", header = TRUE) str(w.data.msci.all) head(w.data.msci.all) tail(w.data.msci.all) w.data.msci <- w.data.msci.all[1: 48, ] str(w.data.msci) head(w.data.msci) tail(w.data.msci) plot(w.data.msci$Cases_MSCI_Weekly_Ceilling, w.data.msci$Close_Price_Weekly) cor.test(w.data.msci$Cases_MSCI_Weekly_Ceilling, w.data.msci$Close_Price_Weekly, method = "pearson") # w.data.msci.redu <- w.data.msci[-(1:6), ] # plot(w.data.msci.redu$Cases_MSCI_Weekly_Ceilling, w.data.msci.redu$Close_Price_Weekly) # cor.test(w.data.msci.redu$Cases_MSCI_Weekly_Ceilling, w.data.msci.redu$Close_Price_Weekly, method = "pearson") price.msci <- ts(w.data.msci$Close_Price_Weekly) plot.ts(price.msci) # cases.usa <- ts(w.data.usa.redu$Cases_US_Weekly_Ceilling) # Diagnosing the ACF and PACF Plots of our Time-Series Object par(mfrow=c(2,3)) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot.ts(price.msci, col = "ForestGreen", lwd = 2, main = "Weekly MSCI Index", ylab = "Index") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) acf(price.msci, col = "Maroon", lwd = 2, main = "Weekly MSCI Index") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) pacf(price.msci, main = "Weekly MSCI Index", col = "orange", lwd = 2) # Differencing our objects to adjust for non-stationary price.msci.dif <- diff(price.msci) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot.ts(price.msci.dif, col = "ForestGreen", lwd = 2, main = "First Differenced Weekly MSCI Index ", ylab = "First Difference Index") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) acf(price.msci.dif, col = "Maroon", lwd = 2, main = "First Differenced Weekly MSCI Index") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) pacf(price.msci.dif, col = "orange", lwd = 2, main = "First Differenced Weekly MSCI Index") # Ljung-Box test lag.length = 10 Box.test(price.msci, lag=lag.length, type="Ljung-Box") # test non-stationary signal Box.test(price.msci.dif, lag=lag.length, type="Ljung-Box") # test stationary signal # Augmented Dickey-Fuller (ADF) test for unit root adf.test(price.msci) adf.test(price.msci.dif) # Kwiatkowski-Phillips-Schmidt-Shin (KPSS) for level or trend stationarity kpss.test(price.msci, null="Trend") kpss.test(price.msci.dif, null="Trend") ############################################## # Test for random walk runs.test(price.msci) # Less powerful bartels.test(price.msci) # More powerful runs.test(price.msci.dif) # Less powerful bartels.test(price.msci.dif) # More powerful ############################################## Index_usa <- 1:length(w.data.usa$Cases_US_Weekly_Ceilling) Index_msci <- 1:length(w.data.msci$Cases_MSCI_Weekly_Ceilling) #win.graph() par(mfrow=c(1,2)) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) barplot(Cases_US_Weekly_Ceilling ~ Index_usa, data = w.data.usa, col = "gold", xlab = "Week", main = "(a)", ylab = "Cases") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) barplot(Cases_MSCI_Weekly_Ceilling ~ Index_msci, data = w.data.msci, col = "lightskyblue", xlab = "Week", main = "(b)", ylab = "Cases") par(mfrow=c(1,1)) ######################################## par(mfrow=c(1,3)) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) barplot(Close_Price_Weekly ~ Index_usa, data = w.data.usa, col = "gold", xlab = "Week", main = "(a)", ylab = "S & P 500") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) barplot(Close_Price_Weekly ~ Index_msci, data = w.data.msci, col = "lightskyblue", xlab = "Week", main = "(b)", ylab = "MSCI") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) barplot(Close_Price_Weekly ~ Index_usa, data = w.data.vix, col = "springgreen3", xlab = "Week", main = "(c)", ylab = "VIX") par(mfrow=c(1,1)) summary(w.data.usa$Close_Price_Weekly) summary(w.data.msci$Close_Price_Weekly) summary(w.data.vix$Close_Price_Weekly) #### Forecasting # Average Method w.data.msci.test <- w.data.msci.all[49:nrow(w.data.msci.all), ] horizon3 <- nrow(w.data.msci.test) # forcast horizon mean.forecast.msci <- meanf(price.msci, horizon3, level = c(80, 95, 99)) round(as.data.frame(mean.forecast.msci), 3) names(mean.forecast.msci) max(abs((price.msci - mean.forecast.msci$fitted) - mean.forecast.msci$residuals)) # 0 # Naove method naive.forecast.msci <- naive(price.msci, horizon3, level = c(80, 95, 99)) round(as.data.frame(naive.forecast.msci), 3) # Drift method drift.forecast.msci <- rwf(price.msci, horizon3, drift=TRUE, level = c(80, 95, 99)) round(as.data.frame(drift.forecast.msci), 3) test.data.msci <- w.data.msci.test$Close_Price_Weekly round(accuracy(mean.forecast.msci, test.data.msci), 3) round(accuracy(naive.forecast.msci, test.data.msci), 3) round(accuracy(drift.forecast.msci, test.data.msci), 3) par(mfrow=c(1,3)) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot(mean.forecast.msci, col = "ForestGreen", lwd = 2, ylim = c(1500, 3500), xlab = "Time (Week)", ylab = "MSCI Index", main = " (a) ") lines(49:57, w.data.msci.test$Close_Price_Weekly, col = "ForestGreen", lwd = 2, lty = 3) legend("topleft", legend = c("Train", "Test", "Forecast"), lty = c(1, 3, 1), lwd = c(2, 2, 2), col = c("ForestGreen", "ForestGreen", "deepskyblue3"), text.col = c("ForestGreen", "ForestGreen", "deepskyblue3"), bty = "n") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot(naive.forecast.msci, col = "ForestGreen", lwd = 2, ylim = c(1500, 3500), xlab = "Time (Week)", ylab = "MSCI Index", main = " (b) ") lines(49:57, w.data.msci.test$Close_Price_Weekly, col = "ForestGreen", lwd = 2, lty = 3) legend("topleft", legend = c("Train", "Test", "Forecast"), lty = c(1, 3, 1), lwd = c(2, 2, 2), col = c("ForestGreen", "ForestGreen", "deepskyblue3"), text.col = c("ForestGreen", "ForestGreen", "deepskyblue3"), bty = "n") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot(drift.forecast.msci, col = "ForestGreen", lwd = 2, ylim = c(1500, 3500), xlab = "Time (Week)", ylab = "MSCI Index", main = " (c) ") lines(49:57, w.data.msci.test$Close_Price_Weekly, col = "ForestGreen", lwd = 2, lty = 3) legend("topleft", legend = c("Train", "Test", "Forecast"), lty = c(1, 3, 1), lwd = c(2, 2, 2), col = c("ForestGreen", "ForestGreen", "deepskyblue3"), text.col = c("ForestGreen", "ForestGreen", "deepskyblue3"), bty = "n") par(mfrow = c(1,1)) ############################################# ## For S & P ### Weekly Data w.data.usa.all <- read.table("FilePath\\Weekly_data_S&P_with_US_Covid.txt", header = TRUE) str(w.data.usa.all) head(w.data.usa.all) tail(w.data.usa.all) w.data.usa <- w.data.usa.all[1:47, ] # Upto December 11, 2020 str(w.data.usa) head(w.data.usa) tail(w.data.usa) # horizon <- ceiling(dim(w.data.usa)[1]/4) # forcast horizon # w.data.usa <- w.data.usa[-(1:horizon), ] # dim(w.data.usa) plot(w.data.usa$Cases_US_Weekly_Ceilling, w.data.usa$Close_Price_Weekly) cor.test(w.data.usa$Cases_US_Weekly_Ceilling, w.data.usa$Close_Price_Weekly, method = "pearson") # w.data.usa.redu <- w.data.usa[-(1:6), ] # plot(w.data.usa.redu$Cases_US_Weekly_Ceilling, w.data.usa.redu$Close_Price_Weekly) # cor.test(w.data.usa.redu$Cases_US_Weekly_Ceilling, w.data.usa.redu$Close_Price_Weekly, method = "pearson") price.usa <- ts(w.data.usa$Close_Price_Weekly) plot.ts(price.usa) # cases.usa <- ts(w.data.usa.redu$Cases_US_Weekly_Ceilling) # Diagnosing the ACF and PACF Plots of our Time-Series Object par(mfrow=c(2,3)) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot.ts(price.usa, col = "ForestGreen", lwd = 2, main = "Weekly S & P 500 Stock Index", ylab = "Index") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) acf(price.usa, col = "Maroon", lwd = 2, main = "Weekly S & P 500 Stock Index") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) pacf(price.usa, main = "Weekly S & P 500 Stock Index", col = "orange", lwd = 2) # Differencing our objects to adjust for non-stationary price.usa.dif <- diff(price.usa) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot.ts(price.usa.dif, col = "ForestGreen", lwd = 2, main = "First Differenced Weekly S & P 500 Stock Index ", ylab = "First Difference Index") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) acf(price.usa.dif, col = "Maroon", lwd = 2, main = "First Differenced Weekly S & P 500 Stock Index") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) pacf(price.usa.dif, col = "orange", lwd = 2, main = "First Differenced Weekly S & P 500 Stock Index") # Ljung-Box test lag.length = 10 Box.test(price.usa, lag=lag.length, type="Ljung-Box") # test non-stationary signal Box.test(price.usa.dif, lag=lag.length, type="Ljung-Box") # test stationary signal # Augmented Dickey-Fuller (ADF) test for unit root adf.test(price.usa) adf.test(price.usa.dif) # Kwiatkowski-Phillips-Schmidt-Shin (KPSS) for level or trend stationarity kpss.test(price.usa, null="Trend") kpss.test(price.usa.dif, null="Trend") ############################################## # Test for random walk runs.test(price.usa) # Less powerful bartels.test(price.usa) # More powerful BartelsRankTest(price.usa, alternative="trend", method="beta") runs.test(price.usa.dif) # Less powerful bartels.test(price.usa.dif) # More powerful BartelsRankTest(price.usa.dif, alternative="trend", method="beta") ############################################## ######################################################## ## Order selection by Cross-Validation ## auto.arima does cross validation by default arima.usa <- auto.arima(price.usa, xreg = w.data.usa$Cases_US_Weekly_Ceilling) (lowest.aic.usa <- AIC(arima.usa)) (order.usa <- arimaorder(arima.usa)) ######################################################################### #### Forecasting w.data.usa.test <- w.data.usa.all[48:nrow(w.data.usa.all), ] horizon <- nrow(w.data.usa.test) # forecast horizon forecast.usa <- forecast(arima.usa, xreg = w.data.usa.test$Cases_US_Weekly_Ceilling, horizon, level = c(80, 95, 99)) forecast.usa library(lmtest) coeftest(arima.usa) snp.mod <- arima.usa (1-pnorm(abs(snp.mod$coef)/sqrt(diag(snp.mod$var.coef))))*2 # For X regressor 2*(1-pnorm(0.125/.068)) test.data.snp <- w.data.usa.test$Close_Price_Weekly forecast.snp.val <- as.numeric(forecast.usa$mean) round(accuracy(forecast.usa, test.data.snp),3) # MAE.SNP <- mean(abs(test.data.snp - forecast.snp.val)) # RMSE.SNP <- sqrt(mean((test.data.snp - forecast.snp.val)^2)) # MPE.SNP <- mean(((test.data.snp - forecast.usa.val)/test.data.snp)*100) # MAPE.SNP <- mean((abs(test.data.snp - forecast.usa.val)/abs(test.data.snp))*100) # c(MAE.SNP, RMSE.SNP, MPE.SNP, MAPE.SNP) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot(forecast.usa, col = "ForestGreen", lwd = 2, ylim = c(2200, 5000), main = " ", xlab = "Time (Week)", ylab = "S & P 500 Index") lines(48:56, w.data.usa.test$Close_Price_Weekly, col = "ForestGreen", lwd = 2, lty = 3) legend("topright", legend = c("Train", "Test", "Forecast"), lty = c(1, 3, 1), lwd = c(2, 2, 2), col = c("ForestGreen", "ForestGreen", "deepskyblue3"), text.col = c("ForestGreen", "ForestGreen", "deepskyblue3"), bty = "n") ################################################### ## For VIX ### Weekly Data w.data.vix.all <- read.table("FilePath\\Weekly_data_VIX_with_US_Covid.txt", header = TRUE) str(w.data.vix.all) head(w.data.vix.all) tail(w.data.vix.all) w.data.vix <- w.data.vix.all[1:47, ] # Upto December 11, 2020 str(w.data.vix) head(w.data.vix) tail(w.data.vix) plot(w.data.vix$Cases_US_Weekly_Ceilling, w.data.vix$Close_Price_Weekly) cor.test(w.data.vix$Cases_US_Weekly_Ceilling, w.data.vix$Close_Price_Weekly, method = "pearson") # w.data.vix.redu <- w.data.vix[-(1:6), ] # plot(w.data.vix.redu$Cases_US_Weekly_Ceilling, w.data.vix.redu$Close_Price_Weekly) # cor.test(w.data.vix.redu$Cases_US_Weekly_Ceilling, w.data.vix.redu$Close_Price_Weekly, method = "pearson") price.vix <- ts(w.data.vix$Close_Price_Weekly) plot.ts(price.vix) # cases.usa <- ts(w.data.usa.redu$Cases_US_Weekly_Ceilling) # Diagnosing the ACF and PACF Plots of our Time-Series Object par(mfrow=c(2,3)) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot.ts(price.vix, col = "ForestGreen", lwd = 2, main = "Weekly VIX", ylab = "Index") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) acf(price.vix, col = "Maroon", lwd = 2, main = "Weekly VIX") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) pacf(price.vix, main = "Weekly VIX", col = "orange", lwd = 2) # Differencing our objects to adjust for non-stationary price.vix.dif <- diff(price.vix) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot.ts(price.vix.dif, col = "ForestGreen", lwd = 2, main = "First Differenced Weekly VIX", ylab = "First Difference Index") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) acf(price.vix.dif, col = "Maroon", lwd = 2, main = "First Differenced Weekly VIX") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) pacf(price.vix.dif, col = "orange", lwd = 2, main = "First Differenced Weekly VIX") # Ljung-Box test lag.length = 10 Box.test(price.vix, lag=lag.length, type="Ljung-Box") # test non-stationary signal Box.test(price.vix.dif, lag=lag.length, type="Ljung-Box") # test stationary signal # Augmented Dickey-Fuller (ADF) test for unit root adf.test(price.vix) adf.test(price.vix.dif) # Kwiatkowski-Phillips-Schmidt-Shin (KPSS) for level or trend stationarity kpss.test(price.vix, null="Trend") kpss.test(price.vix.dif, null="Trend") ############################################## # Test for random walk runs.test(price.vix) # Less powerful bartels.test(price.vix) # More powerful runs.test(price.vix.dif) # Less powerful bartels.test(price.vix.dif) # More powerful ############################################## win.graph() par(mfrow=c(1,2)) barplot(Cases_USA ~ , data = covid.usa, col = "gold", xlab = "Week", main = "Confirmed Cases in USA", ylab = "Cases") barplot(Cases_MSCI ~ , data = covid.msci, col = "Maroon", xlab = "Week", main = "Confirmed Cases in 23 MSCI Countries", ylab = "Cases") par(mfrow=c(1,1)) ######################################################## ## Order selection by Cross-Validation ## auto.arima does cross validation by default arima.vix <- auto.arima(price.vix, xreg = w.data.vix$Cases_US_Weekly_Ceilling) (lowest.aic.vix <- AIC(arima.vix)) (order.vix<- arimaorder(arima.vix)) par(mfrow=c(1,2)) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) acf(resid(arima.usa), col = "Maroon", lwd = 2, main = "(a)") plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) acf(resid(arima.vix), col = "Maroon", lwd = 2, main = "(b)") par(mfrow=c(1,1)) ######################################################################### #### Forecasting w.data.vix.test <- w.data.vix.all[48:nrow(w.data.vix.all), ] horizon2 <- nrow(w.data.vix.test) # forecast horizon forecast.vix <- forecast(arima.vix, xreg = w.data.vix.test$Cases_US_Weekly_Ceilling, h = horizon2, level = c(80, 95, 99)) round(as.data.frame(forecast.vix), 3) library(lmtest) coeftest(arima.vix) vix.mod <- arima.vix (1-pnorm(abs(vix.mod$coef)/sqrt(diag(vix.mod$var.coef))))*2 # For X regressor 2*(1-pnorm(0.103/.061)) test.data.vix <- w.data.vix.test$Close_Price_Weekly round(accuracy(forecast.vix, test.data.vix), 3) plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE) plot(forecast.vix, col = "ForestGreen", lwd = 2, ylim = c(-7, 80), main = " ", xlab = "Time (Week)", ylab = "VIX") lines(48:56, w.data.vix.test$Close_Price_Weekly, col = "ForestGreen", lwd = 2, lty = 3) legend("topright", legend = c("Train", "Test", "Forecast"), lty = c(1, 3, 1), lwd = c(2, 2, 2), col = c("ForestGreen", "ForestGreen", "deepskyblue3"), text.col = c("ForestGreen", "ForestGreen", "deepskyblue3"), bty = "n") #### End of Codes #######################