#(Install packages and) Load libraries library(gtrendsR) library(reshape2) library(forecast) library(caret) library(feasts) library(ggforce) library(ggrepel) library(dplyr) library(TSA) library(nonlinearTseries) # Source GT relative search interest for "Keyword" (specifiy relevant category, 45 for Health) trends <- gtrends(keyword = c( "colonoscopy"),time = "all", category=45) #extract the "interest_over_time" dataframe and then the relative search interest index; next, define it as time series for modeling and forecasting RSI <- trends$interest_over_time x<- ts(RSI$hits, start=2004, frequency=12) #tests for nonlinearity Keenan.test(x) Tsay.test(x) thresholdTest(x) #Seasonality plots ggseasonplot(x, year.labels=T, year.labels.left=TRUE) + ylab("RSV") + ggtitle("Seasonal plot for the RSV index") ggseasonplot(x, polar=TRUE) + ylab("") + ggtitle("Polar seasonal plot") # Split data into train and test windows #test period starts in aprilie 2019 and spans 45 months ending in December 2022 test_x <- window(x, start=c(185, 1), end = c(229,1)) x <- window(x, end=c(184, 1)) #Train models models <- list( mod_arima = auto.arima(x, ic='aicc', stepwise=T), mod_exp = ets(x, ic='aicc', model = "ZZZ",restrict=T, allow.multiplicative.trend=T), mod_neural = nnetar(x), mod_tbats = tbats(x, ic='aicc', seasonal.periods=1), mod_bats = bats(x, ic='aicc', seasonal.periods=1), mod_sts = StructTS(x), mod_hw=HoltWinters(x,gamma=FALSE) ) #Test models out-of-sample and assess accuracy forecasts <- lapply(models, forecast::forecast, 45) forecasts$naive <- naive(x, 45) par(mfrow=c(1, 1)) for(f in forecasts){ plot(f) lines(test_x, col='red') } acc <- lapply(forecasts, function(f){ forecast::accuracy(f, test_x)[2,,drop=FALSE] }) acc <- Reduce(rbind, acc) row.names(acc) <- names(forecasts) acc <- acc[order(acc[,'RMSE']),] round(acc, 2)# NNAR is best on the test window (lowest RMSE) #Thus, retrain NNAR over the entire series #redefine x and fit model x<- ts(RSI$hits) nnarfit <- nnetar(x) #Forecast RSI for the next 24 months, then plot fcast <- forecast::forecast(nnarfit,24) plot(fcast) plot(fcast, PI=FALSE, xaxt='n', srt=1,main="Prediction for global CRC search interest") axis(side = 1, # Draw x-axis c(0, 229, 255), labels=c("Jan 2004", "Jan 2023", "Jan 2025")) fcast