#rf
#variable importance
library(plyr)
library(randomForest)
library (ROCR)
set.seed(100)

#read the data
data <- read.csv ( "allVariableRFdata.csv")

#10 fold cross validated MSE
k = 10
data$id <- sample(1:k, nrow(data), replace = TRUE)
list <- 1:k

progress.bar <- create_progress_bar("text")
progress.bar$init(k)


for (i in 1:k){
    trainingset <- subset(data, id %in% list[-i])
    testset <- subset(data, id %in% list(i))
    
    mymodel <- randomForest(trainingset$AL ~ ., data = trainingset[,-19], importance = TRUE, ntree= 2000  )
    #print(mymodel)
    capture.output(mymodel,file="mymodel.txt", append=TRUE)    

#to plot the variable importance
varImpPlot(mymodel, main="Variable Importance for Adherence level")
capture.output(importance(mymodel),file="vi.txt", append=TRUE)
cat("\n",file="vi.txt", append=TRUE)    

    progress.bar$step()
}


#model development 

library(plyr)
library(randomForest)
library (ROCR)
set.seed(100)

#read the data
data <- read.csv ( "allVariableRFdata.csv")

#10 fold cross validation
k = 10
data$id <- sample(1:k, nrow(data), replace = TRUE)
list <- 1:k

progress.bar <- create_progress_bar("text")
progress.bar$init(k)

msetraining<- NULL
msetest<- NULL

#dividing the dataset into training and testing
for (i in 1:k){
    trainingset <- subset(data, id %in% list[-i])
    testset <- subset(data, id %in% list[i])
    
#train the model
    mymodel <- randomForest(trainingset$AL ~ ., data = trainingset[,-19], importance = TRUE )
    print(mymodel)
    capture.output(mymodel,file="mymodel.txt", append=TRUE)    

#testing the model
    predict.test <- predict(mymodel, testset[, -c(18:19)])  
  
#calculate the MSE
    msetest[i]<- sum(((testset[,-19]$AL) - predict.test)^2)/nrow(testset[,-19])

#capture prediction
temp <- as.data.frame(predict.test)
temp1<- as.data.frame(testset[,18])
pred <- cbind(temp, temp1)
names(pred) <- c("Predicted","Actual")

capture.output(pred,file="prediction.txt", append=TRUE)    

    progress.bar$step()
}

#msetraining
msetest

cat("Result\n",file="result.txt", append=TRUE)

#capture the MSE in a file
cat("\nTesting Error\n",file="result.txt", append=TRUE)
capture.output(msetest,file="result.txt", append=TRUE)

# capture the average MSE in a file
cat("Average MSE\n",file="result.txt", append=TRUE)
capture.output(mean(msetest),file="result.txt", append=TRUE)

#capture avergae RMSE in a file
cat("Average RMSE\n",file="result.txt", append=TRUE)
capture.output(sqrt(mean(msetest)),file="result.txt", append=TRUE)

#plot the results as a boxplot
boxplot(msetest,xlab='MSE CV',col='cyan',
        border='blue',names='CV error (MSE)',
        main='CV error (MSE) for Random Forest',horizontal=TRUE)