library(pROC) install.packages("pROC") install.packages("caret") install.packages("CBCgrps") install.packages("tidyverse") install.packages("car") install.packages("rms") install.packages("mice") install.packages("VIM") install.packages("rmda") install.packages("ResourceSelection") install.packages("foreign") install.packages("forcats") install.packages("glmnet") install.packages("survival") install.packages("dplyr") install.packages("ggplot2") library(pROC) library(pROC) library(caret) library(CBCgrps) library(tidyverse) library(car) library(rms) library(mice) library(VIM) library(rmda) library(ResourceSelection) library(foreign) library(forcats) library(glmnet) library(survival) library(dplyr) library(DataExplorer) library(visreg) library(psych) library(boot) library(Hmisc) library(boot) library(gbm) library(caretEnsemble) library(PerformanceAnalytics) library(party) library(zoo) library(mvtnorm) library(modeltools) library(stats19) library(strucchange) library(C50) library(xgboost) library(mlbench) library(kernlab) library(randomForest) library(mlr) library(reportROC) library(dplyr) setwd("D:/123") data<-read.csv("data.csv",header=TRUE,sep=",",na="") data$map<-data$DBP+(data$SBP-data$DBP)/3 data$fib<-cut(data$fib,breaks = c(0,2,4,10000)) data$fib<-factor(data$fib,labels = c("<2","2-4",">4")) data$fib<-fct_relevel(data$fib,"2-4") data$wbc<-cut(data$wbc,breaks = c(0,4,10,10000)) data$wbc<-factor(data$wbc,labels = c("<4","4-10",">10")) data$wbc<-fct_relevel(data$wbc,"4-10") data$hbg<-cut(data$hbg,breaks = c(0,110,160,10000)) data$hbg<-factor(data$hbg,labels = c("<110","110-160",">160")) data$hbg<-fct_relevel(data$hbg,"110-160") data$plt<-cut(data$plt,breaks = c(0,100,300,10000)) data$plt<-factor(data$plt,labels = c("<100","100-300",">300")) data$plt<-fct_relevel(data$plt,"100-300") data$map<-cut(data$map,breaks = c(0,70,105,10000)) data$map<-factor(data$map,labels = c("<70","70-105",">105")) data$map<-fct_relevel(data$map,"70-105") data$k<-cut(data$k,breaks = c(0,3.5,5.5,10000)) data$k<-factor(data$k,labels = c("<3.5","3.5-5.5",">5.5")) data$k<-fct_relevel(data$k,"3.5-5.5") data$na<-cut(data$na,breaks = c(0,135,145,10000)) data$na<-factor(data$na,labels = c("<135","135-145",">145")) data$na<-fct_relevel(data$na,"135-145") data$mg<-cut(data$mg,breaks = c(0,0.75,1.25,10000)) data$mg<-factor(data$mg,labels = c("<0.75","0.75-1.25",">1.25")) data$mg<-fct_relevel(data$mg,"0.75-1.25") data$ca<-cut(data$ca,breaks = c(0,2.25,2.75,10000)) data$ca<-factor(data$ca,labels = c("<2.25","2.25-2.75",">2.25")) data$ca<-fct_relevel(data$ca,"2.25-2.75") data$T<-cut(data$T,breaks = c(0,36,37.5,10000)) data$T<-factor(data$T,labels = c("<36","36-37.5",">37.5")) data$T<-fct_relevel(data$T,"36-37.5") data$clugd<-factor(data$clugd,labels = c("no clugd","clugd")) data$diabetes<-factor(data$diabetes,labels = c("no diabetes","diabetes")) data$hypertension<-factor(data$hypertension,labels = c("no hypertension","hypertension")) data$cerebral.infarction<-factor(data$cerebral.infarction,labels = c("no cerebral.infarction","cerebral.infarction")) data$chd<-factor(data$chd,labels = c("no chd","chd")) data$cld<-factor(data$cld,labels = c("no cld","cld")) data$ckd<-factor(data$ckd,labels = c("no ckd","ckd")) data$cancer<-factor(data$cancer,labels = c("no cancer","cancer")) data$leukemia<-factor(data$leukemia,labels = c("no leukemia","leukemia")) data$head<-factor(data$head,labels = c("no","yes")) data$lung<-factor(data$lung,labels = c("no","yes")) data$bilinay<-factor(data$bilinay,labels = c("no","yes")) data$urinary<-factor(data$urinary,labels = c("no","yes")) data$gastro<-factor(data$urinary,labels = c("no","yes")) set.seed(123) trianandvad<- createDataPartition(y=data$ventilator,p=0.70,list=FALSE) dev<- data[trianandvad,] vad<-data[-trianandvad,] dev$group<-1 vad$group<-2 df<-rbind(dev,vad) table1<-twogrps(df,gvar = "group",c("gender","age","crp","alt","tg","tbi","cr", "lac","bnp","che","pt","d.d","k","na","mg","ca", "wbc","hbg","plt","albumin","glob","spo2","T","map", "HP","R","GCS","diabetes","hypertension","cerebral.infarction","cancer", "clugd","chd","cld","ckd","leukemia","head","lung","bilinay", "urinary","gastro","sofa","news"),minfactorlevels=2) print(table1,quote = T) modfull<-glm(ventilator~tg+cr+lac+bnp+che+pt+d.d+albumin+R+plt+lung,data =dev,family = "binomial") summary(modfull) cbind(coef= coef(modfull),confint(modfull)) exp(cbind(OR= coef(modfull),confint(modfull))) # Stepwise regression analysis(both) modelend <- step(object=modfull,direction="both") summary(modelend) cbind(coef= coef(modelend),confint(modelend)) exp(cbind(OR= coef(modelend),confint(modelend))) modend<-glm(ventilator~lac+bnp+d.d+albumin+lung+pt,data = dev,family = "binomial") dev$pred<-predict(modend,dev,"response") ddist <- datadist(dev) options(datadist='ddist') gmodel1 <- roc(ventilator ~pred, data = dev) plot(gmodel1, print.auc=TRUE, print.thres=TRUE, main = "ROC CURVE", col= "blue",print.thres.col="blue",identity.col="blue", identity.lty=1,identity.lwd=1) cc1 <- reportROC(gold=dev$ventilator, predictor=dev$pred, important="se", plot=TRUE) summary(cc1) write.csv(cc1, "cc1.csv") val.prob(dev$pred,dev$ventilator) ci.auc(gmodel1) ci.thresholds(gmodel1) dcamodelB<- decision_curve(ventilator~lac+bnp+d.d+albumin+lung+pt, data=dev, family = binomial(link ='logit'), thresholds= seq(0,1, by = 0.01),confidence.intervals = 0.95, study.design = 'case-control', population.prevalence = 0.3) List<- list(dcamodelB) plot_decision_curve ( List,curve.names=c('Training'),cost.benefit.axis =FALSE, col= c('red'), confidence.intervals = FALSE,standardize = FALSE) f<-lrm(ventilator~lac+bnp+d.d+albumin+lung+pt,data=dev) nom <- nomogram(f, fun=function(x)1/(1+exp(-x)), fun.at=c(.001,.01,seq(.1,.9,by=.30),.90,.999), funlabel="Risk of failure") plot(nom, xfrac=.45) data2<-data %>% select(gender,age,crp,alt,tg,tbi,cr, lac,bnp,che,pt,d.d,k,na,mg,ca, wbc,hbg,plt,albumin,glob,spo2,T,map, HP,R,GCS,diabetes,hypertension,cerebral.infarction,cancer, clugd,chd,cld,ckd,leukemia,head,lung,bilinay, urinary,ventilator,gastro) data2<-as.data.frame(data2) data2$ventilator<-as.factor(data2$ventilator) classif.task = makeClassifTask(data = data2, target = "ventilator", positive = 1) train.set = seq(1,nrow(dev),1) test.set = seq(nrow(dev)+1,nrow(data2),1) data3<-data2 levels(data3$ventilator)<-c("No","Yes") training.data<-data3[train.set,] testing.data<-data3[test.set,] ctrl=trainControl(method = "repeatedcv", number = 3, repeats=3, savePredictions = TRUE,classProbs = TRUE) methods.list = c('rpart', 'rf','glm','xgbTree',"svmRadial") models <- caretList(ventilator~.,data=training.data, trControl=ctrl, methodList=methods.list) results <- resamples(models) summary(results) datTrain<-dev %>% select(gender,age,crp,alt,tg,tbi,cr, lac,bnp,che,pt,d.d,k,na,mg,ca, wbc,hbg,plt,albumin,glob,spo2,T,map, HP,R,GCS,diabetes,hypertension,cerebral.infarction,cancer, clugd,chd,cld,ckd,leukemia,head,lung,bilinay, urinary,ventilator,gastro) datTest<-vad%>% select(gender,age,crp,alt,tg,tbi,cr, lac,bnp,che,pt,d.d,k,na,mg,ca, wbc,hbg,plt,albumin,glob,spo2,T,map, HP,R,GCS,diabetes,hypertension,cerebral.infarction,cancer, clugd,chd,cld,ckd,leukemia,head,lung,bilinay, urinary,ventilator,gastro) stacked.Ensemble.glm = caretStack(models, method="glm", trControl=ctrl) stacked.Ensemble.glm pred.stacked.Ensemble.glm=predict(stacked.Ensemble.glm,newdata = testing.data,type="prob") pred.stacked.Ensemble.glm roc_data <- roc(testing.data$ventilator, pred.stacked.Ensemble.glm) roc.test(roc1,roc_data,method = 'delong') datTrain<-dev %>% select(gender,age,crp,alt,tg,tbi,cr, lac,bnp,che,pt,d.d,k,na,mg,ca, wbc,hbg,plt,albumin,glob,spo2,T,map, HP,R,GCS,diabetes,hypertension,cerebral.infarction,cancer, clugd,chd,cld,ckd,leukemia,head,lung,bilinay, urinary,ventilator,gastro) datTest<-vad%>% select(gender,age,crp,alt,tg,tbi,cr, lac,bnp,che,pt,d.d,k,na,mg,ca, wbc,hbg,plt,albumin,glob,spo2,T,map, HP,R,GCS,diabetes,hypertension,cerebral.infarction,cancer, clugd,chd,cld,ckd,leukemia,head,lung,bilinay, urinary,ventilator,gastro) set.seed(123) datTest$ventilator<-factor(datTest$ventilator,labels=c('no','yes')) datTrain$ventilator<-factor(datTrain$ventilator,labels=c('no','yes')) set.seed(123) bagControl = bagControl(fit = ctreeBag$fit, predict = ctreeBag$pred, aggregate = ctreeBag$aggregate) #baggedCT <- bag( x = datTrain[, -PostComplication], # y = datTrain$PostComplication, # B = 50, bagControl = bagControl ) baggedCT <- bag( x = select(datTrain,-c("ventilator")), y = datTrain$ventilator, B = 50, bagControl = bagControl ) summary(baggedCT) fitControl <- trainControl( method = "repeatedcv", number = 10, repeats = 10) gbmGrid <- expand.grid( interaction.depth = c(1, 5, 9), n.trees = (1:30)*50, shrinkage = 0.1, n.minobsinnode = 20) gbmFit <- caret::train( ventilator ~ ., data = datTrain, method = "gbm", trControl = fitControl, tuneGrid = gbmGrid, verbose = FALSE) summary(gbmFit) fitControl <- trainControl( method = "repeatedcv", number = 10, repeats = 10) gbmGrid <- expand.grid( interaction.depth = c(1, 5, 9), n.trees = (1:30)*50, shrinkage = 0.1, n.minobsinnode = 20) gbmFit <- caret::train( ventilator ~ ., data = datTrain, method = "gbm", trControl = fitControl, tuneGrid = gbmGrid, verbose = FALSE) summary(gbmFit) #### train_control <- trainControl( method="boot", number=25, savePredictions="final", classProbs=TRUE, index=createResample(datTrain$ventilator, 25), summaryFunction=twoClassSummary) model_list <- caretList( ventilator~., data=datTrain, trControl=train_control, metric="ROC", tuneList=list( SVM=caretModelSpec( method="svmLinearWeights", tuneGrid=expand.grid( cost=seq(0.1,1,0.2), weight=c(0.5,0.8,1))), C5.0=caretModelSpec( method="C5.0", tuneGrid=expand.grid( trials=(1:5)*10, model=c("tree", "rules"), winnow=c(TRUE, FALSE))), XGboost=caretModelSpec( method="xgbTree", tuneGrid=expand.grid( nrounds=(1:5)*10, max_depth= 6, eta=c(0.1),gamma= c(0.1), colsample_bytree=1, min_child_weight=c(0.5,0.8,1), subsample=c(0.3,0.5,0.8))) )) gbm_ensemble <- caretStack( model_list, method="gbm", verbose=FALSE, tuneLength=10, metric="ROC", trControl=trainControl( method="boot", number=10, savePredictions="final", classProbs=TRUE, summaryFunction=twoClassSummary )) summary(gbm_ensemble) plot(model_list$C5.0) library("PerformanceAnalytics") dtResample <- resamples(model_list)$values %>% dplyr::select(ends_with("~ROC")) %>% rename_with(~str_replace(.x,"~ROC","")) %>% chart.Correlation() library(cowplot) model_preds <- lapply( model_list, predict, newdata=datTrain, type="prob") model_preds <- lapply( model_preds, function(x) x[,"yes"]) model_preds <- data.frame(model_preds) model_preds$Ensemble <- 1-predict( gbm_ensemble, newdata=datTrain, type="prob") model_roc <- lapply( model_preds, function(xx){ roc(response=datTrain$ventilator, direction = "<",predictor = xx) }) #Add model performance of the ensemble model model_roc$Ensemble <- roc( response=datTrain$ventilator, direction = "<", predictor = model_preds$Ensemble) model_TextAUC <- lapply(model_roc, function(xx){ paste("AUC: ",round(pROC::auc(xx),3), "[",round(pROC::ci.auc(xx)[1],3),",", round(pROC::ci.auc(xx)[3],3),"]",sep = "") }) names(model_roc) <- paste(names(model_TextAUC),unlist(model_TextAUC)) plotROC <- ggroc(model_roc)+ theme(legend.position=c(0.6,0.2))+ guides(color=guide_legend(title="Models and AUCs")) datCalib <- cbind(model_preds,testSetY=datTrain$ventilator) cal_obj <- calibration(relevel(testSetY,ref = "yes") ~ SVM +C5.0+XGboost+Ensemble, data = datCalib, cuts = 6) calplot <- plot(cal_obj, type = "b", auto.key = list(columns = 3, lines = TRUE, points = T),xlab="Predicted Event Percentage") ggdraw() + draw_plot(calplot, 0,0.5, 1, 0.5) + draw_plot(plotROC, 0, 0, 1, 0.5) + draw_plot_label(c("A", "B"), c(0, 0), c(1, 0.5), size = 15) = 15) library(gbm) ggplot(caret::varImp(gbmFit)) plot(model_roc$XGboost, print.auc=TRUE, print.thres=TRUE, main = "ROC CURVE", col= "blue",print.thres.col="blue",identity.col="blue", identity.lty=1,identity.lwd=1) modend<-glm(ventilator~lac+bnp+d.d+albumin+lung+pt,data = dev,family = "binomial") dev$pred<-predict(modend,dev,"response") ddist <- datadist(dev) options(datadist='ddist') roc1<-roc(ventilator ~pred, data = dev) roc2<-roc(ventilator ~sofa, data = dev) roc3<-roc(ventilator ~news, data = dev) plot(roc1,col="red",main = "ROC Comparison in model population",legacy.axes=T) plot(roc2,add=TRUE,col="blue") plot(roc3,add=TRUE,col="green") plot(model_roc$Ensemble,add=TRUE,col="black") plot(model_roc$C5.0,add=TRUE,col="yellow") plot(model_roc$SVM,add=TRUE,col="purple") plot(model_roc$XGboost,add=TRUE,col="orange") modend<-glm(ventilator~lac+bnp+d.d+albumin+lung+pt,data = vad,family = "binomial") vad$pred<-predict(modend,vad,"response") ddist <- datadist(vad) options(datadist='ddist') gmodel<- roc(ventilator ~pred, data = vad) plot(gmodel, print.auc=TRUE, print.thres=TRUE, main = "ROC CURVE", col= "blue",print.thres.col="blue",identity.col="blue", identity.lty=1,identity.lwd=1) modend<-glm(ventilator~lac+bnp+d.d+albumin+lung+pt,data = dev,family = "binomial") dev$pred<-predict(modend,dev,"response") ddist <- datadist(dev) options(datadist='ddist') gmodel1 <- roc(ventilator ~pred, data = dev) plot(gmodel1, print.auc=TRUE, print.thres=TRUE, main = "ROC CURVE", col= "blue",print.thres.col="blue",identity.col="blue", identity.lty=1,identity.lwd=1) legend("bottomleft",legend=c("Logistic","Sofa","News","Ensemble","C5.0","SVM","XGBoost"), col = c("red","blue","green","black","yellow","purple","orange"),lty=1) plot(roc1,col="red",main = "ROC Comparison in model population",legacy.axes=T) plot(roc2,add=TRUE,col="blue") plot(roc3,add=TRUE,col="green") plot(model_roc$Ensemble,add=TRUE,col="black") plot(model_roc$C5.0,add=TRUE,col="yellow") plot(model_roc$SVM,add=TRUE,col="purple") plot(model_roc$XGboost,add=TRUE,col="orange") legend("bottomleft",legend=c("Logistic","Sofa","News","Ensemble","C5.0","SVM","XGBoost"), col = c("red","blue","green","black","yellow","purple","orange"),lty=1) plot(roc1,col="red",main = "ROC Comparison in model population",legacy.axes=T) plot(roc2,add=TRUE,col="blue") plot(roc3,add=TRUE,col="green") plot(model_roc$Ensemble,add=TRUE,col="black") plot(model_roc$C5.0,add=TRUE,col="yellow") plot(model_roc$SVM,add=TRUE,col="purple") plot(model_roc$XGboost,add=TRUE,col="orange") #SVM:0.820(0.764-0.876) #C5.0:0.999(0.998-1) #CGboost:0.994(0.987-1) #Ensemble:0.996(0.994-0.999) #Logistic:0.820(0.769-0.870) #Sofa:0.696(0.631-0.761) #News:0.626(0.560-0.692) legend("bottom",legend=c("Logistic","Sofa","News","Ensemble","C5.0","SVM","XGBoost"), col = c("red","blue","green","black","yellow","purple","orange"),lty=1) legend("bottom",legend = c("Logistic-AUC:0.820(0.769-0.870)","Sofa-AUC:0.696(0.631-0.761)", "News-AUC:0.626(0.560-0.692)","Ensemble-AUC:0.996(0.994-0.999)","C5.0-AUC:0.999(0.998-1)","SVM-AUC:0.820(0.764-0.876)","XGBoost-AUC:0.994(0.987-1)"), col = c("red","blue","green","black","yellow","purple","orange"),lty=1) roc.test(roc1,model_roc$Ensemble,method='delong') roc.test(roc1,model_roc$SVM,method='delong') roc.test(roc1,model_roc$C5.0,method='delong') roc.test(roc1,model_roc$XGboost,method='delong') roc.test(roc1,roc2,method='delong') roc.test(roc1,roc3,method='delong')