#rf #variable importance library(plyr) library(randomForest) library (ROCR) set.seed(100) #read the data data <- read.csv ( "allVariableRFdata.csv") #10 fold cross validated MSE k = 10 data$id <- sample(1:k, nrow(data), replace = TRUE) list <- 1:k progress.bar <- create_progress_bar("text") progress.bar$init(k) for (i in 1:k){ trainingset <- subset(data, id %in% list[-i]) testset <- subset(data, id %in% list(i)) mymodel <- randomForest(trainingset$AL ~ ., data = trainingset[,-19], importance = TRUE, ntree= 2000 ) #print(mymodel) capture.output(mymodel,file="mymodel.txt", append=TRUE) #to plot the variable importance varImpPlot(mymodel, main="Variable Importance for Adherence level") capture.output(importance(mymodel),file="vi.txt", append=TRUE) cat("\n",file="vi.txt", append=TRUE) progress.bar$step() } #model development library(plyr) library(randomForest) library (ROCR) set.seed(100) #read the data data <- read.csv ( "allVariableRFdata.csv") #10 fold cross validation k = 10 data$id <- sample(1:k, nrow(data), replace = TRUE) list <- 1:k progress.bar <- create_progress_bar("text") progress.bar$init(k) msetraining<- NULL msetest<- NULL #dividing the dataset into training and testing for (i in 1:k){ trainingset <- subset(data, id %in% list[-i]) testset <- subset(data, id %in% list[i]) #train the model mymodel <- randomForest(trainingset$AL ~ ., data = trainingset[,-19], importance = TRUE ) print(mymodel) capture.output(mymodel,file="mymodel.txt", append=TRUE) #testing the model predict.test <- predict(mymodel, testset[, -c(18:19)]) #calculate the MSE msetest[i]<- sum(((testset[,-19]$AL) - predict.test)^2)/nrow(testset[,-19]) #capture prediction temp <- as.data.frame(predict.test) temp1<- as.data.frame(testset[,18]) pred <- cbind(temp, temp1) names(pred) <- c("Predicted","Actual") capture.output(pred,file="prediction.txt", append=TRUE) progress.bar$step() } #msetraining msetest cat("Result\n",file="result.txt", append=TRUE) #capture the MSE in a file cat("\nTesting Error\n",file="result.txt", append=TRUE) capture.output(msetest,file="result.txt", append=TRUE) # capture the average MSE in a file cat("Average MSE\n",file="result.txt", append=TRUE) capture.output(mean(msetest),file="result.txt", append=TRUE) #capture avergae RMSE in a file cat("Average RMSE\n",file="result.txt", append=TRUE) capture.output(sqrt(mean(msetest)),file="result.txt", append=TRUE) #plot the results as a boxplot boxplot(msetest,xlab='MSE CV',col='cyan', border='blue',names='CV error (MSE)', main='CV error (MSE) for Random Forest',horizontal=TRUE)