# # Library install # library(pdp) # Develop the final model with all dataset set.seed(1) model <- randomForest(diarrhea~age+sex+rankin_scale+tube_feeding+abx, mtry=1, # Use determined hyperparameter data=all_data) # # Variable importance (Figure 2) # variable_importance <- importance(model) sort(variable_importance[,"MeanDecreaseGini"]) # Bar plotting barplot(sort(variable_importance[,'MeanDecreaseGini'], decreasing=T), names.arg=c('Tube feeding','mRS score','Antibiotic','Age', 'Sex'), ylab='Mean decrease of Gini coefficient', cex.lab=1.2, cex.axis=1.2, font.lab=2, font.axis=2, space=rep(0.3, 5)) # # Partial dependence plots (Figure 3) # plot(partial(model, pred.var='tube_feeding', type='classification', prob=TRUE, which.class='Yes'), xlab='Tube feeding use (days)', ylab='Probability', ylim=c(0, 1), type='l', font.lab=2, font.axis=2, lwd=4, cex.lab=1.2, cex.axis=1.2) plot(partial(model, pred.var='rankin_scale', type='classification', prob=TRUE, which.class='Yes'), xlab='mRS score', ylab='Probability', ylim=c(0, 1), type='l', font.lab=2, font.axis=2, lwd=4, cex.lab=1.2, cex.axis=1.2) plot(partial(model, pred.var='abx', type='classification', prob=TRUE, which.class='Yes'), xlab='Antibiotic use (days)', ylab='Probability', ylim=c(0, 1), type='l', font.lab=2, font.axis=2, lwd=4, cex.lab=1.2, cex.axis=1.2) plot(partial(model, pred.var='age', type='classification', prob=TRUE, which.class='Yes'), xlab='Age (years)', ylab='Probability', ylim=c(0, 1), type='l', font.lab=2, font.axis=2, lwd=4, cex.lab=1.2, cex.axis=1.2) barplot(partial(model, pred.var='sex', type='classification', prob=TRUE, which.class='Yes')[,'yhat'], names.arg=c('Female','Male'), ylab='Probability', ylim=c(0,1), cex.lab=1.2, cex.axis=1.2, font.lab=2, font.axis=2, space=rep(0.2, 2))