#Data input setwd("D:/SCI/11.11") library(foreign) library(rms) bmt<-read.spss("Raw_data.sav",to.data.frame=TRUE) str(bmt) bmt$Types_of_operation<-as.factor(bmt$Types_of_operation) #Set value label bmt$Types_of_operation<-factor(bmt$Types_of_operation, levels = c(0,1,2,3,4,5,6,7,8), labels = c("Trans-thoracic vagotomy", "Wege resection", "Segmental resection", "Lobectomy", "Total pneumonectomy", "Partial bilateral pneumonectomy", "Mediastinal surgery", "Esophageal surgery", "Others")) bmt$Age<-factor(bmt$Age, levels=c(1,2,3,4,5), labels=c("≥45","46-55","56-65","66-75",">75")) bmt$Sex<-factor(bmt$Sex, levels=c(0,1), labels=c("Female","Male")) bmt$ASA<-factor(bmt$ASA, levels = c(2,3), labels=c("Ⅰ/Ⅱ","Ⅲ/Ⅳ")) bmt$Duration_of_operation<-factor(bmt$Duration_of_operation, levels=c(1,2,3), labels = c("<180","180-300",">300")) bmt$Airway_management<-factor(bmt$Airway_management, levels=c(1,2,3,4), labels=c("single-lumen", "double-lumen", "bronchial occluder", "LMA")) bmt$Duration_of_one_lung_ventilation<-factor(bmt$Duration_of_one_lung_ventilation, levels = c(1,2), labels = c("≤60",">60")) bmt$BMI<-factor(bmt$BMI, levels = c(0,1,2,3), labels = c("≤18.5","18.5≤BMI<24","24≤BMI<28","≥28")) bmt$History_of_hypertension<-factor(bmt$History_of_hypertension, levels=c(0,1), labels=c("No","Yes")) bmt$History_of_diabetes_mellitus<-factor(bmt$History_of_diabetes_mellitus, levels=c(0,1), labels=c("No","Yes")) bmt$History_of_stroke<-factor(bmt$History_of_stroke, levels=c(0,1), labels=c("No","Yes")) bmt$History_of_heart_disease<-factor(bmt$History_of_heart_disease, levels=c(0,1), labels=c("No","Yes")) bmt$History_of_COPD<-factor(bmt$History_of_COPD, levels=c(0,1), labels=c("No","Yes")) bmt$History_of_smoking<-factor(bmt$History_of_smoking, levels=c(0,1), labels=c("No","Yes")) bmt$History_of_alcohol_use<-factor(bmt$History_of_alcohol_use, levels=c(0,1), labels=c("No","Yes")) bmt$Leukocyte_counts<-factor(bmt$Leukocyte_counts, levels=c(0,1,2), labels=c("4-10","<4",">10")) bmt$Red_blood_cell_counts<-factor(bmt$Red_blood_cell_counts, levels=c(0,1,2), labels=c("Normal","Low","Hight")) bmt$Platelet_counts<-factor(bmt$Platelet_counts, levels=c(0,1,2), labels=c("100-300","<100",">300")) bmt$Aspartate_aminotransferase<-factor(bmt$Aspartate_aminotransferase, levels=c(0,1), labels=c("<40","≥40")) bmt$Alanine_aminotransferase<-factor(bmt$Alanine_aminotransferase, levels=c(0,1), labels=c("<50","≥50")) bmt$Blood_glucose<-factor(bmt$Blood_glucose, levels=c(0,1), labels=c("Normal","Abnormal")) bmt$Blood_creatinine<-factor(bmt$Blood_creatinine, levels=c(0,1), labels=c("<111","≥111")) bmt$Blood_urea_nitrogen<-factor(bmt$Blood_urea_nitrogen, levels=c(0,1), labels=c("<9.5","≥9.5")) bmt$Serum_sodium<-factor(bmt$Serum_sodium, levels=c(0,1,2), labels=c("137-147","<137",">147")) bmt$Serum_potassium<-factor(bmt$Serum_potassium, levels=c(0,1), labels=c("Normal","Abnormal")) bmt$Serum_albumin<-factor(bmt$Serum_albumin, levels=c(0,1), labels=c("Normal","Low")) bmt$PPCs<-factor(bmt$PPCs, levels=c(0,1), labels=c("No","Yes")) bmt$MAP<-factor(bmt$MAP, levels=c(0,1,2), labels=c("70-105","<70",">105")) #Randomly divide training set and test set library(caTools) set.seed(1234567) split=sample.split(bmt$PPCs,SplitRatio = 0.8) training_set=subset(bmt,split==TRUE) test_set=subset(bmt,split==FALSE) head(training_set) test_set$group<-2 training_set$group<-1 data_all<-rbind(training_set,test_set) str(data_all) data_all$group<-as.factor(data_all$group) #Compare the differences between the training set and the test set library(tableone) vars <- c("Types_of_operation","Age","Sex","ASA","MAP","Duration_of_operation","Airway_management","Duration_of_one_lung_ventilation","BMI","History_of_hypertension","History_of_diabetes_mellitus","History_of_stroke","History_of_heart_disease","History_of_COPD","History_of_smoking","History_of_alcohol_use","Leukocyte_counts","Red_blood_cell_counts","Platelet_counts","Aspartate_aminotransferase","Alanine_aminotransferase","Blood_glucose","Blood_creatinine","Blood_urea_nitrogen","Serum_sodium","Serum_potassium","Serum_albumin","PPCs") tabUmatched<-CreateTableOne(vars=vars,strata="group",data=data_all) print(tabUmatched,smd=TRUE) write.csv(data_all,"classed.csv") write.csv(training_set,"training_data.csv") save(data_all,file="bmt.Rdata") load('bmt.Rdata') head(data,5) #Univariate logistic regression pre1<-glm(formula = PPCs~Age,family = binomial(link = "logit"), data = training_set) summary(pre1) confint(pre1) pre2<-glm(formula = PPCs~Sex,family = binomial(link = "logit"), data = training_set) summary(pre2) confint(pre2) pre3<-glm(formula = PPCs~ASA,family = binomial(link = "logit"), data = training_set) summary(pre3) confint(pre3) pre4<-glm(formula = PPCs~MAP,family = binomial(link = "logit"), data = training_set) summary(pre4) confint(pre4) pre5<-glm(formula = PPCs~Duration_of_operation,family = binomial(link = "logit"), data = training_set) summary(pre5) confint(pre5) pre6<-glm(formula = PPCs~Airway_management,family = binomial(link = "logit"), data = training_set) summary(pre6) confint(pre6) pre7<-glm(formula = PPCs~Duration_of_one_lung_ventilation,family = binomial(link = "logit"), data = training_set) summary(pre7) confint(pre7) pre9<-glm(formula = PPCs~BMI,family = binomial(link = "logit"), data = training_set) summary(pre9) confint(pre9) pre10<-glm(formula = PPCs~History_of_hypertension,family = binomial(link = "logit"), data = training_set) summary(pre10) confint(pre10) pre11<-glm(formula = PPCs~History_of_diabetes_mellitus,family = binomial(link = "logit"), data = training_set) summary(pre11) confint(pre11) pre12<-glm(formula = PPCs~History_of_stroke,family = binomial(link = "logit"), data = training_set) summary(pre12) confint(pre12) pre13<-glm(formula = PPCs~History_of_heart_disease,family = binomial(link = "logit"), data = training_set) summary(pre13) confint(pre13) pre14<-glm(formula = PPCs~History_of_COPD,family = binomial(link = "logit"), data = training_set) summary(pre14) confint(pre14) pre15<-glm(formula = PPCs~History_of_smoking,family = binomial(link = "logit"), data = training_set) summary(pre15) confint(pre15) pre16<-glm(formula = PPCs~History_of_alcohol_use,family = binomial(link = "logit"), data = training_set) summary(pre16) confint(pre16) pre17<-glm(formula = PPCs~Leukocyte_counts,family = binomial(link = "logit"), data = training_set) summary(pre17) confint(pre17) pre20<-glm(formula = PPCs~Red_blood_cell_counts,family = binomial(link = "logit"), data = training_set) summary(pre20) confint(pre20) pre21<-glm(formula = PPCs~Platelet_counts,family = binomial(link = "logit"), data = training_set) summary(pre21) confint(pre21) pre22<-glm(formula = PPCs~Aspartate_aminotransferase,family = binomial(link = "logit"), data = training_set) summary(pre22) confint(pre22) pre23<-glm(formula = PPCs~Alanine_aminotransferase,family = binomial(link = "logit"), data = training_set) summary(pre23) confint(pre23) pre24<-glm(formula = PPCs~Blood_glucose,family = binomial(link = "logit"), data = training_set) summary(pre24) confint(pre24) pre25<-glm(formula = PPCs~Blood_creatinine,family = binomial(link = "logit"), data = training_set) summary(pre25) confint(pre25) pre26<-glm(formula = PPCs~Blood_urea_nitrogen,family = binomial(link = "logit"), data = training_set) summary(pre26) confint(pre26) pre27<-glm(formula = PPCs~Serum_sodium,family = binomial(link = "logit"), data = training_set) summary(pre27) confint(pre27) pre28<-glm(formula = PPCs~Serum_potassium,family = binomial(link = "logit"), data = training_set) summary(pre28) confint(pre28) pre29<-glm(formula = PPCs~Serum_albumin,family = binomial(link = "logit"), data = training_set) summary(pre29) confint(pre29) #Multivariate logistic regression pre<-glm(formula = PPCs~Age+Sex+ASA+Duration_of_operation+Duration_of_one_lung_ventilation+ History_of_hypertension+History_of_diabetes_mellitus+History_of_stroke+History_of_heart_disease+History_of_COPD+History_of_smoking+Leukocyte_counts+ Red_blood_cell_counts+Serum_albumin,family = binomial(link = "logit"), data = training_set) summary(pre) confint(pre) #AIC stepwise regression step(pre,direction = "both") steppre<-glm(formula = PPCs~ASA+Duration_of_operation+Duration_of_one_lung_ventilation+ History_of_stroke+History_of_heart_disease+History_of_COPD+History_of_smoking ,family = binomial(link = "logit"), data = training_set) summary(steppre) confint(steppre) #Build nomogram diagram dd<-datadist(training_set) options(datadist="dd") modelf<-lrm(PPCs~ASA+Duration_of_operation+Duration_of_one_lung_ventilation+ History_of_stroke+History_of_heart_disease+History_of_COPD+History_of_smoking,data=training_set) nom <- nomogram(modelf, fun=plogis, fun.at=c(.001, .01, .05, seq(.1,.9, by=.1), .95, .99, .999), lp=F, funlabel="Risk of PPCs") plot(nom,col.conf=c(1, 0.3), conf.space=c(.08,.2), label.every=1, force.label = FALSE, xfrac=.35, cex.axis=1.1, cex.var=1.3,varname.label=FALSE, varname.label.sep="=", ia.space=.7, tck=NA, tcl=-0.3, lmgp=.4, total.sep.page=FALSE, cap.labels=FALSE) nom <- nomogram(modelf, fun=plogis, fun.at=c(.001, .01, .05, seq(.1,.9, by=.1), .95, .99, .999), lp=T, funlabel="Risk") #Show each specific score nom #Manual calculation of linear predictive value of output training set and test set write.csv(modelf$linear.predictors,"Linear_predictive_training.csv") testpre<-predict(modelf,type='lp',test_set) write.csv(testpre,"Linear_predictive_test.csv") training_set #Read in the manually calculated score trainP<-read.csv("training_point.csv") testP<-read.csv("testpoint.csv") mean(trainP$point) sd(trainP$point) min(trainP$point) max(trainP$point) describe(test_set$PPCs) mean(testP$point) sd(testP$point) min(testP$point) max(testP$point) #Draw ROC curve library(pROC) logROC1<-roc(training_set$PPCs,trainP$point) ci(logROC1) plot(logROC1, print.auc=TRUE, auc.polygon=TRUE, grid=c(0.1, 0.2),grid.col=c("green", "red"), max.auc.polygon=TRUE,auc.polygon.col="skyblue", print.thres=TRUE,main=' ') logROC2<-roc(test_set$PPCs,testP$point) ci(logROC2) testP$pre<-ifelse(testP$point>=134,"Yes","No") table(test_set$PPCs,testP$pre) plot(logROC2, print.auc=TRUE, auc.polygon=TRUE, grid=c(0.1, 0.2),grid.col=c("green", "red"), max.auc.polygon=T,auc.polygon.col="skyblue", print.thres=TRUE,main=' ') g1 <- ggroc(list(logROC1,logROC2),aes=c("linetype"), size = 1.4) g1+xlab('Specificity')+ylab('Sensitivity')+ geom_segment(aes(x = 1, xend = 0, y = 0, yend = 1))+ scale_linetype_discrete("",breaks=c("1","2"), labels=c("Nomogram for Training cohort(area=0.894[0.866-0.922])", "Nomogram for Validation cohort(area=0.867[0.810-0.925])" ))+ theme(plot.margin=unit(c(0.5, 1, 2, 0.5), "lines"),legend.position=c(0.72,0.11),axis.text=element_text(size=12,face = "bold"), axis.title.x=element_text(size=14),axis.title.y=element_text(size=14), legend.text=element_text(size=13), panel.background=element_rect(fill="white",color="grey50"), panel.grid=element_line(color="grey50",size=2), panel.grid.major=element_line(size=1,linetype =3,color="NA"), panel.grid.minor=element_line(size=1,linetype =3,color="NA")) g2 <- ggroc(list(logROC1,logROC2), size = 1.4) g2+xlab('Specificity')+ylab('Sensitivity')+ geom_segment(aes(x =1, xend = 0, y = 0, yend = 1))+ scale_color_hue("",breaks=c("1","2"), labels=c("Nomogram for Training cohort(area=0.894[0.866-0.922])", "Nomogram for Validation cohort(area=0.867[0.810-0.925])" ))+ theme(plot.margin=unit(c(0.5, 1, 2, 0.5), "lines"),legend.position=c(0.72,0.11),axis.text=element_text(size=12,face = "bold"), axis.title.x=element_text(size=14),axis.title.y=element_text(size=14), legend.text=element_text(size=13), panel.background=element_rect(fill="white",color="grey50"), panel.grid=element_line(color="grey50",size=2), panel.grid.major=element_line(size=1,linetype =3,color="NA"), panel.grid.minor=element_line(size=1,linetype =3,color="NA")) ## C-index C <- rcorrcens(training_set$PPCs~ predict(modelf,newdata = training_set),data = training_set) (C_index <- round(C[1,1],3)) (C_index_Low <- round(c(C[1,1] - 1.96*C[1,4]/2),3)) (C_index_High <- round(c(C[1,1] + 1.96*C[1,4]/2),3)) C <- rcorrcens(test_set$PPCs~ predict(modelf,newdata = test_set),data = test_set) (C_index <- round(C[1,1],3)) (C_index_Low <- round(c(C[1,1] - 1.96*C[1,4]/2),3)) (C_index_High <- round(c(C[1,1] + 1.96*C[1,4]/2),3)) #Calibration line model_1<-lrm(PPCs~ASA+Duration_of_operation+Duration_of_one_lung_ventilation+ History_of_stroke+History_of_heart_disease+History_of_COPD+History_of_smoking,x=T,y=T,data=training_set) cal1_1<-calibrate(model_1,method="boot",B=100) plot(cal1_1,lwd=3,lty=3,cex=3,xlim=c(0,1),ylim=c(0,1), xlab="Nomogram Predicted Probability",legend="bottomright", ylab="Actual Probability",col="black",cex.lab=1.2, subtitles = FALSE) model_2<-lrm(PPCs~ASA+Duration_of_operation+Duration_of_one_lung_ventilation+ History_of_stroke+History_of_heart_disease+History_of_COPD+History_of_smoking,x=T,y=T,data=test_set) cal1_2<-calibrate(model_2,method="boot",B=100) plot(cal1_2,lwd=3,lty=3,cex=3,xlim=c(0,1),ylim=c(0,1), xlab="Nomogram Predicted Probability",legend="bottomright", ylab="Actual Probability",col="black",cex.lab=1.2, subtitles = FALSE)