## R Programming Codes submitted to Peer J Computer Science Journal

## For MSCI 

### Weekly Data
w.data.msci.all <- read.table("FilePath\Weekly_data_MSCI_with_MSCI_Covid.txt", header = TRUE)
str(w.data.msci.all)
head(w.data.msci.all)
tail(w.data.msci.all)

w.data.msci <- w.data.msci.all[1: 48, ]
str(w.data.msci)
head(w.data.msci)
tail(w.data.msci)

plot(w.data.msci$Cases_MSCI_Weekly_Ceilling, w.data.msci$Close_Price_Weekly)
cor.test(w.data.msci$Cases_MSCI_Weekly_Ceilling, w.data.msci$Close_Price_Weekly, method = "pearson")


# w.data.msci.redu <- w.data.msci[-(1:6), ]
# plot(w.data.msci.redu$Cases_MSCI_Weekly_Ceilling, w.data.msci.redu$Close_Price_Weekly)
# cor.test(w.data.msci.redu$Cases_MSCI_Weekly_Ceilling, w.data.msci.redu$Close_Price_Weekly, method = "pearson")

price.msci <- ts(w.data.msci$Close_Price_Weekly) 
plot.ts(price.msci)
# cases.usa <- ts(w.data.usa.redu$Cases_US_Weekly_Ceilling) 


# Diagnosing the ACF and PACF Plots of our Time-Series Object
par(mfrow=c(2,3))

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot.ts(price.msci, col = "ForestGreen", lwd = 2, main = "Weekly MSCI Index", ylab = "Index")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
acf(price.msci,  col = "Maroon", lwd = 2, main = "Weekly MSCI Index")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
pacf(price.msci, main = "Weekly MSCI Index", col = "orange", lwd = 2)

# Differencing our objects to adjust for non-stationary
price.msci.dif <- diff(price.msci)

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot.ts(price.msci.dif,  col = "ForestGreen", lwd = 2, main = "First Differenced Weekly MSCI Index ", ylab = "First Difference Index")


plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
acf(price.msci.dif, col = "Maroon", lwd = 2, main = "First Differenced Weekly MSCI Index")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
pacf(price.msci.dif, col = "orange", lwd = 2, main = "First Differenced Weekly MSCI Index")

# Ljung-Box test
lag.length = 10
Box.test(price.msci, lag=lag.length, type="Ljung-Box") # test non-stationary signal
Box.test(price.msci.dif, lag=lag.length, type="Ljung-Box") # test stationary signal

# Augmented Dickey-Fuller (ADF) test for unit root
adf.test(price.msci)
adf.test(price.msci.dif)

# Kwiatkowski-Phillips-Schmidt-Shin (KPSS) for level or trend stationarity
kpss.test(price.msci, null="Trend")
kpss.test(price.msci.dif, null="Trend")
##############################################

# Test for random walk
runs.test(price.msci) # Less powerful
bartels.test(price.msci) # More powerful

runs.test(price.msci.dif) # Less powerful
bartels.test(price.msci.dif) # More powerful
##############################################

Index_usa <- 1:length(w.data.usa$Cases_US_Weekly_Ceilling)
Index_msci <- 1:length(w.data.msci$Cases_MSCI_Weekly_Ceilling)
#win.graph()
par(mfrow=c(1,2))
plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
barplot(Cases_US_Weekly_Ceilling ~ Index_usa, data = w.data.usa, col = "gold",  xlab = "Week", main = "(a)", ylab = "Cases")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
barplot(Cases_MSCI_Weekly_Ceilling ~ Index_msci, data = w.data.msci, col = "lightskyblue", xlab = "Week", main = "(b)", ylab = "Cases")

par(mfrow=c(1,1))
########################################
par(mfrow=c(1,3))
plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
barplot(Close_Price_Weekly ~ Index_usa, data = w.data.usa, col = "gold",  xlab = "Week", main = "(a)", ylab = "S & P 500")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
barplot(Close_Price_Weekly ~ Index_msci, data = w.data.msci, col = "lightskyblue", xlab = "Week", main = "(b)", ylab = "MSCI")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
barplot(Close_Price_Weekly ~ Index_usa, data = w.data.vix, col = "springgreen3", xlab = "Week", main = "(c)", ylab = "VIX")

par(mfrow=c(1,1))

summary(w.data.usa$Close_Price_Weekly)
summary(w.data.msci$Close_Price_Weekly)
summary(w.data.vix$Close_Price_Weekly)

#### Forecasting

# Average Method
w.data.msci.test <- w.data.msci.all[49:nrow(w.data.msci.all), ]
horizon3 <- nrow(w.data.msci.test) # forcast horizon

mean.forecast.msci <- meanf(price.msci, horizon3, level = c(80, 95, 99))
round(as.data.frame(mean.forecast.msci), 3)

names(mean.forecast.msci)
max(abs((price.msci - mean.forecast.msci$fitted) - mean.forecast.msci$residuals))
# 0

# Naove method
naive.forecast.msci <- naive(price.msci, horizon3, level = c(80, 95, 99))
round(as.data.frame(naive.forecast.msci), 3)

# Drift method
drift.forecast.msci <- rwf(price.msci, horizon3, drift=TRUE, level = c(80, 95, 99))
round(as.data.frame(drift.forecast.msci), 3)

test.data.msci <- w.data.msci.test$Close_Price_Weekly
round(accuracy(mean.forecast.msci, test.data.msci), 3)
round(accuracy(naive.forecast.msci, test.data.msci), 3)
round(accuracy(drift.forecast.msci, test.data.msci), 3)

par(mfrow=c(1,3))

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot(mean.forecast.msci, col = "ForestGreen", lwd = 2, ylim = c(1500, 3500), xlab = "Time (Week)", ylab = "MSCI Index", main = " (a) ")
lines(49:57, w.data.msci.test$Close_Price_Weekly, col = "ForestGreen", lwd = 2, lty = 3)
legend("topleft", legend = c("Train", "Test", "Forecast"), lty = c(1, 3, 1), lwd = c(2, 2, 2), 
       col = c("ForestGreen", "ForestGreen", "deepskyblue3"), 
       text.col = c("ForestGreen", "ForestGreen", "deepskyblue3"), bty = "n")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot(naive.forecast.msci, col = "ForestGreen", lwd = 2, ylim = c(1500, 3500), xlab = "Time (Week)", ylab = "MSCI Index",  main = " (b) ")
lines(49:57, w.data.msci.test$Close_Price_Weekly, col = "ForestGreen", lwd = 2, lty = 3)
legend("topleft", legend = c("Train", "Test", "Forecast"), lty = c(1, 3, 1), lwd = c(2, 2, 2), 
       col = c("ForestGreen", "ForestGreen", "deepskyblue3"), 
       text.col = c("ForestGreen", "ForestGreen", "deepskyblue3"), bty = "n")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot(drift.forecast.msci,  col = "ForestGreen", lwd = 2, ylim = c(1500, 3500), xlab = "Time (Week)", ylab = "MSCI Index", main = " (c) ")
lines(49:57, w.data.msci.test$Close_Price_Weekly, col = "ForestGreen", lwd = 2, lty = 3)
legend("topleft", legend = c("Train", "Test", "Forecast"), lty = c(1, 3, 1), lwd = c(2, 2, 2), 
       col = c("ForestGreen", "ForestGreen", "deepskyblue3"), 
       text.col = c("ForestGreen", "ForestGreen", "deepskyblue3"), bty = "n")

par(mfrow = c(1,1))
#############################################

## For S & P
### Weekly Data
w.data.usa.all <- read.table("FilePath\\Weekly_data_S&P_with_US_Covid.txt", header = TRUE)
str(w.data.usa.all)
head(w.data.usa.all)
tail(w.data.usa.all)

w.data.usa <- w.data.usa.all[1:47, ] # Upto December 11, 2020
str(w.data.usa)
head(w.data.usa)
tail(w.data.usa)

# horizon <- ceiling(dim(w.data.usa)[1]/4) # forcast horizon
# w.data.usa <- w.data.usa[-(1:horizon), ]
# dim(w.data.usa)

plot(w.data.usa$Cases_US_Weekly_Ceilling, w.data.usa$Close_Price_Weekly)
cor.test(w.data.usa$Cases_US_Weekly_Ceilling, w.data.usa$Close_Price_Weekly, method = "pearson")


# w.data.usa.redu <- w.data.usa[-(1:6), ]
# plot(w.data.usa.redu$Cases_US_Weekly_Ceilling, w.data.usa.redu$Close_Price_Weekly)
# cor.test(w.data.usa.redu$Cases_US_Weekly_Ceilling, w.data.usa.redu$Close_Price_Weekly, method = "pearson")

price.usa <- ts(w.data.usa$Close_Price_Weekly) 
plot.ts(price.usa)
# cases.usa <- ts(w.data.usa.redu$Cases_US_Weekly_Ceilling) 


# Diagnosing the ACF and PACF Plots of our Time-Series Object
par(mfrow=c(2,3))
plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot.ts(price.usa, col = "ForestGreen", lwd = 2, main = "Weekly S & P 500 Stock Index", ylab = "Index")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
acf(price.usa,  col = "Maroon", lwd = 2, main = "Weekly S & P 500 Stock Index")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
pacf(price.usa, main = "Weekly S & P 500 Stock Index", col = "orange", lwd = 2)

# Differencing our objects to adjust for non-stationary
price.usa.dif <- diff(price.usa)

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot.ts(price.usa.dif,  col = "ForestGreen", lwd = 2, main = "First Differenced Weekly S & P 500 Stock Index ", ylab = "First Difference Index")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
acf(price.usa.dif, col = "Maroon", lwd = 2, main = "First Differenced Weekly S & P 500 Stock Index")


plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
pacf(price.usa.dif, col = "orange", lwd = 2, main = "First Differenced Weekly S & P 500 Stock Index")

# Ljung-Box test
lag.length = 10
Box.test(price.usa, lag=lag.length, type="Ljung-Box") # test non-stationary signal
Box.test(price.usa.dif, lag=lag.length, type="Ljung-Box") # test stationary signal

# Augmented Dickey-Fuller (ADF) test for unit root
adf.test(price.usa)
adf.test(price.usa.dif)

# Kwiatkowski-Phillips-Schmidt-Shin (KPSS) for level or trend stationarity
kpss.test(price.usa, null="Trend")
kpss.test(price.usa.dif, null="Trend")
##############################################

# Test for random walk
runs.test(price.usa) # Less powerful
bartels.test(price.usa) # More powerful
BartelsRankTest(price.usa, alternative="trend", method="beta")

runs.test(price.usa.dif) # Less powerful
bartels.test(price.usa.dif) # More powerful
BartelsRankTest(price.usa.dif, alternative="trend", method="beta")
##############################################

########################################################
## Order selection by Cross-Validation
## auto.arima does cross validation by default
arima.usa <- auto.arima(price.usa, xreg = w.data.usa$Cases_US_Weekly_Ceilling)
(lowest.aic.usa <- AIC(arima.usa))
(order.usa <- arimaorder(arima.usa))
#########################################################################

#### Forecasting 
w.data.usa.test <- w.data.usa.all[48:nrow(w.data.usa.all), ]
horizon <- nrow(w.data.usa.test) # forecast horizon
forecast.usa <- forecast(arima.usa, xreg = w.data.usa.test$Cases_US_Weekly_Ceilling, horizon,
                         level = c(80, 95, 99))
forecast.usa 

library(lmtest)

coeftest(arima.usa)
snp.mod <- arima.usa
(1-pnorm(abs(snp.mod$coef)/sqrt(diag(snp.mod$var.coef))))*2
# For X regressor
2*(1-pnorm(0.125/.068))


test.data.snp <- w.data.usa.test$Close_Price_Weekly
forecast.snp.val <- as.numeric(forecast.usa$mean)
round(accuracy(forecast.usa, test.data.snp),3)

# MAE.SNP <- mean(abs(test.data.snp - forecast.snp.val))
# RMSE.SNP <- sqrt(mean((test.data.snp - forecast.snp.val)^2))
# MPE.SNP <- mean(((test.data.snp - forecast.usa.val)/test.data.snp)*100)
# MAPE.SNP <- mean((abs(test.data.snp - forecast.usa.val)/abs(test.data.snp))*100)
# c(MAE.SNP, RMSE.SNP, MPE.SNP, MAPE.SNP)

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot(forecast.usa, col = "ForestGreen", lwd = 2, ylim = c(2200, 5000), 
     main = " ", xlab = "Time (Week)", ylab = "S & P 500 Index")
lines(48:56, w.data.usa.test$Close_Price_Weekly, col = "ForestGreen", lwd = 2, lty = 3)
legend("topright", legend = c("Train", "Test", "Forecast"), lty = c(1, 3, 1), lwd = c(2, 2, 2), 
       col = c("ForestGreen", "ForestGreen", "deepskyblue3"), 
       text.col = c("ForestGreen", "ForestGreen", "deepskyblue3"), bty = "n")

###################################################

## For VIX
### Weekly Data
w.data.vix.all <- read.table("FilePath\\Weekly_data_VIX_with_US_Covid.txt", header = TRUE)
str(w.data.vix.all)
head(w.data.vix.all)
tail(w.data.vix.all)

w.data.vix <- w.data.vix.all[1:47, ] # Upto December 11, 2020
str(w.data.vix)
head(w.data.vix)
tail(w.data.vix)

plot(w.data.vix$Cases_US_Weekly_Ceilling, w.data.vix$Close_Price_Weekly)
cor.test(w.data.vix$Cases_US_Weekly_Ceilling, w.data.vix$Close_Price_Weekly, method = "pearson")


# w.data.vix.redu <- w.data.vix[-(1:6), ]
# plot(w.data.vix.redu$Cases_US_Weekly_Ceilling, w.data.vix.redu$Close_Price_Weekly)
# cor.test(w.data.vix.redu$Cases_US_Weekly_Ceilling, w.data.vix.redu$Close_Price_Weekly, method = "pearson")

price.vix <- ts(w.data.vix$Close_Price_Weekly) 
plot.ts(price.vix)
# cases.usa <- ts(w.data.usa.redu$Cases_US_Weekly_Ceilling) 


# Diagnosing the ACF and PACF Plots of our Time-Series Object
par(mfrow=c(2,3))

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot.ts(price.vix, col = "ForestGreen", lwd = 2, main = "Weekly VIX", ylab = "Index")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
acf(price.vix,  col = "Maroon", lwd = 2, main = "Weekly VIX")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
pacf(price.vix, main = "Weekly VIX", col = "orange", lwd = 2)

# Differencing our objects to adjust for non-stationary
price.vix.dif <- diff(price.vix)

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot.ts(price.vix.dif,  col = "ForestGreen", lwd = 2, main = "First Differenced Weekly VIX", ylab = "First Difference Index")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
acf(price.vix.dif, col = "Maroon", lwd = 2, main = "First Differenced Weekly VIX")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
pacf(price.vix.dif, col = "orange", lwd = 2, main = "First Differenced Weekly VIX")

# Ljung-Box test
lag.length = 10
Box.test(price.vix, lag=lag.length, type="Ljung-Box") # test non-stationary signal
Box.test(price.vix.dif, lag=lag.length, type="Ljung-Box") # test stationary signal

# Augmented Dickey-Fuller (ADF) test for unit root
adf.test(price.vix)
adf.test(price.vix.dif)

# Kwiatkowski-Phillips-Schmidt-Shin (KPSS) for level or trend stationarity
kpss.test(price.vix, null="Trend")
kpss.test(price.vix.dif, null="Trend")
##############################################

# Test for random walk
runs.test(price.vix) # Less powerful
bartels.test(price.vix) # More powerful

runs.test(price.vix.dif) # Less powerful
bartels.test(price.vix.dif) # More powerful
##############################################

win.graph()
par(mfrow=c(1,2))
barplot(Cases_USA ~ , data = covid.usa, col = "gold",  xlab = "Week", main = "Confirmed Cases in USA", ylab = "Cases")
barplot(Cases_MSCI ~ , data = covid.msci, col = "Maroon", xlab = "Week", main = "Confirmed Cases in 23 MSCI Countries", ylab = "Cases")
par(mfrow=c(1,1))

########################################################
## Order selection by Cross-Validation
## auto.arima does cross validation by default
arima.vix <- auto.arima(price.vix, xreg = w.data.vix$Cases_US_Weekly_Ceilling)
(lowest.aic.vix <- AIC(arima.vix))
(order.vix<- arimaorder(arima.vix))

par(mfrow=c(1,2))
plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
acf(resid(arima.usa), col = "Maroon", lwd = 2, main = "(a)")

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
acf(resid(arima.vix), col = "Maroon", lwd = 2, main = "(b)")
par(mfrow=c(1,1))

#########################################################################

#### Forecasting 
w.data.vix.test <- w.data.vix.all[48:nrow(w.data.vix.all), ]
horizon2 <- nrow(w.data.vix.test) # forecast horizon
forecast.vix <- forecast(arima.vix, xreg = w.data.vix.test$Cases_US_Weekly_Ceilling,
                         h = horizon2, level = c(80, 95, 99))
round(as.data.frame(forecast.vix), 3)

library(lmtest)

coeftest(arima.vix)
vix.mod <- arima.vix
(1-pnorm(abs(vix.mod$coef)/sqrt(diag(vix.mod$var.coef))))*2

# For X regressor
2*(1-pnorm(0.103/.061))

test.data.vix <- w.data.vix.test$Close_Price_Weekly
round(accuracy(forecast.vix, test.data.vix), 3)

plot.new(); grid(nx = NULL, ny = NULL,lty = 2, col = "gray", lwd = 1); par(new = TRUE)
plot(forecast.vix, col = "ForestGreen", lwd = 2, ylim = c(-7, 80), 
     main = "  ", xlab = "Time (Week)", ylab = "VIX")
lines(48:56, w.data.vix.test$Close_Price_Weekly, col = "ForestGreen", lwd = 2, lty = 3)
legend("topright", legend = c("Train", "Test", "Forecast"), lty = c(1, 3, 1), lwd = c(2, 2, 2), 
       col = c("ForestGreen", "ForestGreen", "deepskyblue3"), 
       text.col = c("ForestGreen", "ForestGreen", "deepskyblue3"), bty = "n")

#### End of Codes #######################