###### R CODE ########## ###Installing Packages### install.packages("rio") install.packages("forecast") install.packages("tseries") install.packages("tidyverse") install.packages("GGally") library('rio') library('forecast') library('tseries') library('tidyverse') library(GGally) ###Reading Dataset#### getwd() setwd("C:/Users/akini/OneDrive/Desktop") death1<-read.csv("Deaths1.csv",header = TRUE) attach(death1) ####Naming variables from Dataset#### Fx<-Confirmed Gx<-Recovered Hx<-Deaths ####New cases/Velocity/1st derivative### t<-323 Fxp<-vector() Fxp[1]<-0 Hxp<-vector() Hxp[1]<-0 for (i in 1:t){ Fxp[i+1]<-Fx[i+1] - Fx[i] Hxp[i+1]<-Hx[i+1] - Hx[i] } ####Acceleration/2nd Derivative### Fxp2<-vector() Fxp2[1]<-0 Hxp2<-vector() Hxp2[1]<-0 for (i in 1:t){ Fxp2[i+1]<-Fxp[i+1] - Fxp[i] Hxp2[i+1]<-Hxp[i+1] - Hxp[i] } ###Plot of Cumulative cases### ggplot()+geom_line(death1, mapping=aes(Time_Day,Fx, colour = "Cumulative Confirmed Infection")) + geom_line(death1, mapping=aes(Time_Day,Hx, colour = "Cumulative Infection")) +ylab('Cumulative Infection')+ xlab("Days from 1st Fatality") ###Plot of Velocity and Acceleration functions(Figure 1)### ggplot()+ geom_line(death1, mapping=aes(Time_Day,Fxp2, colour = "Acceleration Confirmed Infection")) +ylab('Confirmed Infection')+ xlab("Days from 1st Fatality") ggplot()+ geom_line(death1, mapping=aes(Time_Day,Hxp2, colour = "Acceleration of Fatality")) +ylab('Fatality')+ xlab("Days from 1st Fatality") ###Correlation of Raw Data and Log Transformed Data### datadeath<-data.frame(Fxp, Hxp,Fxp2, Hxp2) ggpairs(datadeath) data.VA_transf<-data.frame(log1p(Fxp), log1p(Hxp),log1p(Fxp2+51285.001),log1p(Hxp2+974.001)) ggpairs(data.VA_transf) ###Fitting a Linear Model for acceleration### fit3<-lm(log1p(Hxp2+974.001)~log1p(Fxp2+51285.001) +0,datadeath) summary(fit3) par(mfrow=c(2,2)) plot(fit3) ### Linear model Functions for Acceleration### death_acc <- function(Fxp2) 0.630929*log1p(Fxp2+51285.001) ###Adding acceleration multivariate fn to dataset### datadeath$death_acc<-death_acc(Fxp2) ###Acceleration of Fatality Composite Function### HF1<-vector() for (i in 1:t+1){ if (Fxp2[i]==0){ HF1[i]<-0 }else { HF1[i]<-Hxp2[i]/Fxp2[i] } HF1[1]<-0 } ggplot()+geom_line(death1, mapping=aes(Time_Day,HF1, colour = "Acceleration of Deaths as Confimred Case Accelerate")) ###Adding acceleration multivariate fns to dataset### datadeath$HF1<-HF1 ###Adding other variables to dataset### datadeath$Date<- death1$Date datadeath$Death<- death1$Death datadeath$Month<- death1$Month datadeath$Week<- death1$Week ####################################################################################################### ####################################################################################################### ####################################################################################################### #########################################ACCELERATION FUNCTION ################################## ###EXPLORATORY DATA ANALYSIS (EDA)### ###Breakout of Data over week and month### datadeath$DATES=as.Date(death1$Date) ggplot(datadeath,aes(DATES,death_acc))+geom_line() + scale_x_date('Week')+ylab("death_acc")+ xlab("") ggplot(datadeath,aes(DATES,death_acc))+geom_point(color="navyblue") + facet_wrap(~Week)+scale_x_date('week') ggplot(datadeath,aes(DATES,death_acc))+geom_point(color="navyblue") + facet_wrap(~Month)+scale_x_date('month') ###Cleaned Functions and graphing of cleaned fn### TS_death_acc=ts(datadeath[,c('death_acc')]) datadeath$clean.TS_death_acc=tsclean(TS_death_acc) ggplot() + geom_line(data=datadeath, aes(x=DATES,y=clean.TS_death_acc))+ ylab('Clean death_acc')#Graph cleaned data-ignore type ts scale warning ###Get monthly and weekly moving average (MA) and compare to cleaned daily data which still### datadeath$deathacc_ma = ma(datadeath$clean.TS_death_acc, order = 7) datadeath$deathacc_ma30 = ma(datadeath$clean.TS_death_acc, order = 30) ggplot() + geom_line(data = datadeath, aes(x=DATES, y=clean.TS_death_acc, colour = "deathacc"))+ geom_line(data = datadeath, aes(x=DATES, y=deathacc_ma, colour = "Weekly Moving Average"))+geom_line(data = datadeath, aes(x=DATES, y=deathacc_ma30, colour = "Monthly Moving Average"))+ylab('Acceleration of Deaths') ###DECOMPOSITON OF THE DATA#### #calculate seasonal component with stl() clean.TS_death_acc=ts(na.omit(datadeath$clean.TS_death_acc),frequency=15) decomp1=stl(clean.TS_death_acc,s.window="periodic") deseasonal_deathacc<- seasadj(decomp1) #used in ARIMA model later plot(decomp1) ###STATIONARITY### adf.test(clean.TS_death_acc,alternative="stationary")#first visual check for stationary variance ###AUTOCORRELATION AND CHOOSING MODEL ORDER### ###ACF and PACF plots### acf(clean.TS_death_acc,main='') pacf(clean.TS_death_acc,main='') ###Taking difference of 1 and adf test### deathacc_d1=diff(deseasonal_deathacc, differences=1) plot(deathacc_d1) adf.test(deathacc_d1, alternative="stationary") ###ACF and PACF plots### acf(deathacc_d1,main='ACF for Differenced Series') #Look for spikes at specific lag points of the differenced series pacf(deathacc_d1,main='ACF for Differenced Series') ###FITTING AN ARIMA MODEL### #Get auto fit p,d,q values auto.arima(deseasonal_deathacc, seasonal = FALSE) #EVALUATE AND ITERATE###-does the model make sense? fit1.deathacc<- auto.arima(deseasonal_deathacc, seasonal = FALSE) tsdisplay(residuals(fit1.deathacc), lag.max=45, main='(4,1,5) Model Residuals') #graph shows serious lags, so modify for p or q fit2.deathacc=arima(deseasonal_deathacc, order=c(7,1,5)) tsdisplay(residuals(fit2.deathacc), lag.max=40, main='Seasonal Model Residuals') fit2.deathacc ###TEST MODEL PERFORMANCE WITH A HOLDOUT SET### hold<-window(ts(deseasonal_deathacc), start=260) fit2_no_holdout= arima(ts(deseasonal_deathacc[-c(260:324)]),order=c(7,1,5)) fcast2_no_holdout<-forecast(fit2_no_holdout,h=64) plot(fcast2_no_holdout,main=" ") lines(ts(deseasonal_deathacc)) #Seasonality added back.However, model does not seem as though it need seasnality to be added back to it. fit1_w_seasonality=fit2.deathacc seas_fcast1<- forecast(fit1_w_seasonality, h=90) plot(seas_fcast1) lines(ts(clean.TS_death_acc)) lines(ts(deseasonal_deathacc)) fit1_w_seasonality ###FURTHER TESTING AND ANALYSISOF OUR MODEL### #deseasonal fit1=auto.arima(deseasonal_deathacc, seasonal=FALSE) tsdisplay(residuals(fit1), lag.max=15, main='Seasonal Model Residuals') #Adjusted lag model fit2=arima(deseasonal_deathacc, order=c(7,1,5)) tsdisplay(residuals(fit2), lag.max=15, main='Seasonal Model Residuals') #seasonality fit1_w_seasonality=auto.arima(deseasonal_deathacc, seasonal=TRUE) tsdisplay(residuals(fit1_w_seasonality),lag.max=20, main='Seasonal Model Residuals') #####Final Fit and Tested ARIMA forecast par(mfrow=c(2,2)) #auto arima without seasonality fcast1_1<-forecast(fit1, h=30) plot(fcast1_1) #Adjusted lag model fcast1_2<-forecast(fit2, h=30) plot(fcast1_2) #auto arima with seasonality fcast1_3<-forecast(fit1_w_seasonality, h=30) plot(fcast1_3) ###Selected Model### par(mfrow=c(1,1)) fcast1_2<-forecast(fit2, h=180) plot(fcast1_2) lines(ts(clean.TS_death_acc)) lines(ts(deseasonal_deathacc)) ####################################################################################################### ####################################################################################################### #########################################ACCELERATION COMPOSITE FUNCTION ################################## ###EXPLORATORY DATA ANALYSIS (EDA)### ###Breakout of Data over week and month### datadeath$DATES=as.Date(death1$Date) ggplot(datadeath,aes(DATES,HF1))+geom_line() + scale_x_date('Week')+ylab("HF1")+ xlab("") ggplot(datadeath,aes(DATES,HF1))+geom_point(color="navyblue") + facet_wrap(~Week)+scale_x_date('week') ggplot(datadeath,aes(DATES,HF1))+geom_point(color="navyblue") + facet_wrap(~Month)+scale_x_date('month') ###Cleaned Functions and graphing of cleaned fn### TS_HF1=ts(datadeath[,c('HF1')]) datadeath$clean.TS_HF1=tsclean(TS_HF1) ggplot() + geom_line(data=datadeath, aes(x=DATES,y=clean.TS_HF1))+ ylab('Clean HF1')#Graph cleaned data-ignore type ts scale warning ###Get monthly and weekly moving average (MA) and compare to cleaned daily data which still### datadeath$HF1_ma = ma(datadeath$clean.TS_HF1, order = 7) datadeath$HF1_ma30 = ma(datadeath$clean.TS_HF1, order = 30) ggplot() + geom_line(data = datadeath, aes(x=DATES, y=clean.TS_HF1, colour = "HF1"))+ geom_line(data = datadeath, aes(x=DATES, y=HF1_ma, colour = "Weekly Moving Average"))+geom_line(data = datadeath, aes(x=DATES, y=HF1_ma30, colour = "Monthly Moving Average"))+ylab('Acceleration of Deaths as cases change') ###DECOMPOSITON OF THE DATA#### #calculate seasonal component with stl() clean.TS_HF1=ts(na.omit(datadeath$clean.TS_HF1),frequency=15) decomp1=stl(clean.TS_HF1,s.window="periodic") deseasonal_HF1<- seasadj(decomp1) #used in ARIMA model later plot(decomp1) ###STATIONARITY### adf.test(clean.TS_HF1,alternative="stationary")#first visual check for stationary variance ###AUTOCORRELATION AND CHOOSING MODEL ORDER### ###ACF and PACF plots### acf(clean.TS_HF1,main='') pacf(clean.TS_HF1,main='') ###Taking difference of 1 and adf test### HF1_d1=diff(deseasonal_HF1, differences=1) plot(HF1_d1) adf.test(HF1_d1, alternative="stationary") ###ACF and PACF plots### acf(HF1_d1,main='ACF for Differenced Series') #Look for spikes at specific lag points of the differenced series pacf(HF1_d1,main='ACF for Differenced Series') ###FITTING AN ARIMA MODEL### #Get auto fit p,d,q values auto.arima(deseasonal_HF1, seasonal = FALSE) #EVALUATE AND ITERATE###-does the model make sense? fit1.HF1<- auto.arima(deseasonal_HF1, seasonal = FALSE) tsdisplay(residuals(fit1.HF1), lag.max=45, main='(1,1,4) Model Residuals') #graph shows serious lags, so modify for p or q fit2.HF1=arima(deseasonal_HF1, order=c(7,1,4)) tsdisplay(residuals(fit2.deathacc), lag.max=40, main='Seasonal Model Residuals') fit2.HF1 ###TEST MODEL PERFORMANCE WITH A HOLDOUT SET### hold<-window(ts(deseasonal_HF1), start=260) fit2_no_holdout= arima(ts(deseasonal_HF1[-c(260:324)]),order=c(7,1,4)) fcast2_no_holdout<-forecast(fit2_no_holdout,h=64) plot(fcast2_no_holdout,main=" ") lines(ts(deseasonal_HF1)) #Seasonality added back.However, model does not seem as though it need seasnality to be added back to it. fit1_w_seasonality=auto.arima(deseasonal_HF1, seasonal=TRUE) seas_fcast1<- forecast(fit1_w_seasonality, h=90) plot(seas_fcast1) lines(ts(clean.TS_HF1)) lines(ts(deseasonal_HF1)) fit1_w_seasonality ###FURTHER TESTING AND ANALYSISOF OUR MODEL### #deseasonal fit1=auto.arima(deseasonal_HF1, seasonal=FALSE) tsdisplay(residuals(fit1), lag.max=15, main='Seasonal Model Residuals') #Adjusted lag model fit2=arima(deseasonal_deathacc, order=c(7,1,4)) tsdisplay(residuals(fit2), lag.max=15, main='Seasonal Model Residuals') #seasonality fit1_w_seasonality=auto.arima(deseasonal_HF1, seasonal=TRUE) tsdisplay(residuals(fit1_w_seasonality),lag.max=20, main='Seasonal Model Residuals') #####Final Fit and Tested ARIMA forecast par(mfrow=c(2,2)) #auto arima without seasonality fcast1_1<-forecast(fit1, h=30) plot(fcast1_1) #Adjusted lag model fcast1_2<-forecast(fit2, h=30) plot(fcast1_2) #auto arima with seasonality fcast1_3<-forecast(fit1_w_seasonality, h=30) plot(fcast1_3) ###Selected Model### par(mfrow=c(1,1)) fcast1_1<-forecast(fit1_w_seasonality, h=180) plot(fcast1_1) lines(ts(clean.TS_HF1)) lines(ts(deseasonal_HF1))