###Cox regression analy library(ggplot2) library(lattice) library(caret) library(survival) library(rms) library(foreign) dev<-read.csv("data.csv") head(dev) str(dev) set.seed(131) trianandvad<- createDataPartition(y=dev$id,p=0.70,list=FALSE) train <- dev[trianandvad, ] vad<-dev[-trianandvad,] write.csv(train, "train.csv") write.csv(vad, "vad.csv") ##training set for OS dev<-read.csv("train.csv") head(dev) str(dev) dev$Gender<-factor(dev$Gender,labels=c('Male','Female')) dev$Grade<-factor(dev$Grade,labels=c('high differentiated','low differentiated','unknown')) dev$Pathological.types<-factor(dev$Pathological.types,labels=c('ductal adenocarcinoma','non-ductal adenocarcinoma')) dev$AJCC.stage<-factor(dev$AJCC.stage,labels=c('I','II','III','IV')) dev$Surgery<-factor(dev$Surgery,labels=c('No','Yes')) dev$Race<-factor(dev$Race,labels = c('white','black','others')) str(dev) #nomogram for OS in training set ddist<-datadist(dev) options(datadist='ddist') units(dev$Time)<-"Month" fcox<-cph(Surv(Time,Status)~Gender+Grade+Race+Pathological.types+Surgery+AJCC.stage,surv= T,x=T,y=T,data = dev) surv <- Survival(fcox) nom <- nomogram(fcox, fun=list(function(x) surv(12, x), function(x) surv(36, x),function(x) surv(60, x)), funlabel=c("1-year OS","3-years OS","5-years OS"),lp=F) plot(nom) library(nomogramEx) nomogramEx(nomo=nom,np=3,digit=9) #C-index in training set set.seed(7) require(caret) attach(dev) folds<-createFolds(Surv(Time,Status),k = 10) for(i in 1:10){ fold_test<-dev[folds[[i]],] fold_train<-dev[-folds[[i]],] fcox <- cph(Surv(Time,Status) ~ Gender+Grade+Primary.site+Pathological.types+Surgery+AJCC.stage,surv= T,x=T,y=T,data = fold_train) f2 <- cph(Surv(Time,Status) ~ predict(fcox, newdata=fold_test), x=T, y=T, surv=T, data=fold_test) validate(f2, method="boot", B=1000, dxy=T) print(rcorrcens(Surv(Time,Status) ~ predict(fcox, newdata=fold_test), data = fold_test)) } f2 <- cph(Surv(Time,Status) ~ predict(fcox, newdata=dev), x=T, y=T, surv=T, data=dev) validate(f2, method="boot", B=1000, dxy=T) rcorrcens(Surv(Time,Status) ~ predict(fcox, newdata=dev), data = dev) fcoxajcc<-cph(Surv(Time,Status)~AJCC.stage,surv= T,x=T,y=T,data = dev) surv <- Survival(fcoxajcc) nomajcc <- nomogram(fcoxajcc, fun=list(function(x) surv(12, x), function(x) surv(36, x),function(x) surv(60, x)), funlabel=c("1-year OS","3-years OS","5-years OS"),lp=F) fajcc <- cph(Surv(Time,Status) ~ predict(fcoxajcc, newdata=dev), x=T, y=T, surv=T, data=dev) validate(fcoxajcc, method="boot", B=1000, dxy=T) rcorrcens(Surv(Time,Status) ~ predict(fcoxajcc, newdata=dev), data = dev) #calibration curve for training set fcox12<- cph(Surv(Time,Status)~Gender+Grade+Pathological.types+Surgery+Race+AJCC.stage,surv=T,x=T, y=T,time.inc = 12,data=dev) cal12<- calibrate(fcox12, cmethod="KM", method="boot", u=12, m=900, B=1000) plot(cal12,cex.axis=2,cex.lab=1.5) fcox36 <- cph(Surv(Time,Status)~Gender+Grade+Pathological.types+Surgery+Race+AJCC.stage,surv=T,x=T, y=T,time.inc = 36,data=dev) cal36<- calibrate(fcox36, cmethod="KM", method="boot", u=36, m=900, B=1000) plot(cal36,cex.axis=2,cex.lab=1.5) fcox60 <- cph(Surv(Time,Status)~Gender+Grade+Pathological.types+Surgery+Race+AJCC.stage,surv=T,x=T, y=T,time.inc = 60,data=dev) cal60<- calibrate(fcox60, cmethod="KM", method="boot", u=60, m=900, B=1000) plot(cal60,cex.axis=2,cex.lab=1.5) ##validation for OS vad<-read.csv("vad.csv") vad$Gender<-factor(vad$Gender,labels=c('Male','Female')) vad$Grade<-factor(vad$Grade,labels=c('high differentiated','low differentiated','unknown')) vad$Pathological.types<-factor(vad$Pathological.types,labels=c('ductal adenocarcinoma','non-ductal adenocarcinoma')) vad$AJCC.stage<-factor(vad$AJCC.stage,labels=c('I','II','III','IV')) vad$Surgery<-factor(vad$Surgery,labels=c('No','Yes')) vad$Race<-factor(vad$Race,labels = c('white','black','others')) str(vad) ddist <- datadist(vad) options(datadist='ddist') units(vad$Time) <- "Month" #C-index in validation set fvad<-cph(Surv(Time, Status) ~ predict(fcox,newdata=vad),x=T, y=T,surv=T, data=vad) validate(fvad, method="boot", B=1000, dxy=T) rcorrcens(Surv(Time, Status) ~ predict(fcox, newdata=vad), data = vad) fvadajcc <-cph(Surv(Time,Status) ~ predict(fcoxajcc,newdata=vad),x=T, y=T,surv=T, data=vad) validate(fvadajcc, method="boot", B=1000, dxy=T) rcorrcens(Surv(Time,Status) ~ predict(fcoxajcc, newdata=vad), data = vad) #calibration curve in validation set fvad12 <- cph(Surv(Time,Status) ~predict(fcox, newdata=vad), x=T, y=T, surv=T, data=vad, time.inc=12) cfvad12 <- calibrate(fvad12, cmethod="KM", method="boot", u=12, m=400, B=1000) plot(cfvad12,cex.axis=2,cex.lab=1.5) fvad36 <- cph(Surv(Time,Status) ~predict(fcox, newdata=vad), x=T, y=T, surv=T, data=vad, time.inc=36) cfvad36 <- calibrate(fvad36, cmethod="KM", method="boot", u=36, m=400, B=1000) plot(cfvad36,cex.axis=2,cex.lab=1.5) fvad60 <- cph(Surv(Time,Status) ~predict(fcox, newdata=vad), x=T, y=T, surv=T, data=vad, time.inc=60) cfvad60 <-calibrate(fvad60, cmethod="KM", method="boot", u=60, m=400, B=1000) plot(cfvad60,cex.axis=2,cex.lab=1.5) ###Competing risk analysis library(survival) library(cmprsk) library(riskRegression) library(prodlim) library(mstate) library(rms) library(pec) library(ggplot2) ##training set for CSS train<-read.csv("train.csv") units(train$Time) <- "Month" #Fine and Gray analysis ajcc.stage2=(train$AJCC.stage==2)+0 ajcc.stage3=(train$AJCC.stage==3)+0 ajcc.stage4=(train$AJCC.stage==4)+0 ajcc.stage=cbind(ajcc.stage2,ajcc.stage3,ajcc.stage4) fitajcc.stage=crr(train$Time,train$cause,ajcc.stage) summary(fitajcc.stage) grade2=(train$Grade==2)+0 grade3=(train$Grade==3)+0 grade=cbind(grade2,grade3) fitgrade=crr(train$Time,train$cause,grade) summary(fitgrade) race2=(train$Race==2)+0 race3=(train$Race==3)+0 race=cbind(race2,race3) fitrace=crr(train$Time,train$cause,race) summary(fitrace) gender2=(train$Gender==2)+0 gender=cbind(gender2) fitgender=crr(train$Time,train$cause,gender) summary(fitgender) surgery2=(train$Surgery==2)+0 surgery=cbind(surgery2) fitsurgery=crr(train$Time,train$cause,surgery) summary(fitsurgery) pathological.types2=(train$Pathological.types==2)+0 pathological.types=cbind(pathological.types2) fitpathological.types=crr(train$Time,train$cause,pathological.types) summary(fitpathological.types) parimary.site2=(train$Primary.site==2)+0 parimary.site3=(train$Primary.site==3)+0 parimary.site4=(train$Primary.site==4)+0 parimary.site=cbind(parimary.site2,parimary.site3,parimary.site4) fitparimary.site=crr(train$Time,train$cause,parimary.site) summary(fitparimary.site) #multivariate competing risk analysis factors=cbind(pathological.types,ajcc.stage,surgery,parimary.site,grade) fit2factors=crr(train$Time,train$cause,factors) summary(fit2factors) #nomogram for CSS train$Pathological.types<-factor(train$Pathological.types,labels=c('ductal adenocarcinoma','non-ductal adenocarcinoma')) train$AJCC.stage<-factor(train$AJCC.stage,labels=c('I','II','III','IV')) train$Surgery<-factor(train$Surgery,labels=c('No','Yes')) train$Primary.site<-factor(train$Primary.site,labels = c('head','body','tail','other')) str(train) train$cause<-ifelse(is.na(train$cause),0,train$cause) train.w<-crprep("Time","cause",data=train,trans = c(1,2),cens = 0,id="ID",keep=c("Primary.site","Pathological.types","AJCC.stage","Surgery")) with(train.w,table(failcode,status)) ddist<-datadist(train.w) options(datadist='ddist') mod<-cph(Surv(Tstart,Tstop,status==1)~Primary.site+Pathological.types+AJCC.stage+Surgery,data = train.w,weight=weight.cens,subset = failcode==1,surv = T) surv<-Survival(mod) nom.sur<-nomogram(mod,fun=list(function(x) surv(12, x), function(x) surv(36, x),function(x) surv(60, x)), funlabel=c("1-year CSS","3-years CSS","5-years CSS"),lp=F) plot(nom.sur) #Score for variables in nomogram for CSS library(nomogramEx) nomogramEx(nomo=nom.sur,np=3,digit=9) #C-index in training set set.seed(7) require(caret) attach(train.w) folds<-createFolds(Surv(Time,cause),k = 10) for(i in 1:10){ fold_test<-train.w[folds[[i]],] fold_train<-train.w[-folds[[i]],] fitCSS<-cph(Surv(Tstart,Tstop,status==1)~Primary.site+Pathological.types+AJCC.stage+Surgery,data = fold_test,weight=weight.cens,subset = failcode==1,surv = T) survConcordance(Surv(fold_test$Tstart,fold_test$Tstop,fold_test$status==1)~predict(fitCSS,fold_test)) print(survConcordance(Surv(fold_test$Tstart,fold_test$Tstop,fold_test$status==1)~predict(fitCSS,fold_test))) } fitCSS<-cph(Surv(Tstart,Tstop,status==1)~Primary.site+Pathological.types+AJCC.stage+Surgery,data = train.w,weight=weight.cens,subset = failcode==1,surv = T) survConcordance(Surv(train.w$Tstart,train.w$Tstop,train.w$status==1)~predict(fitCSS,train.w)) fitCSSAJCC<-cph(Surv(Tstart,Tstop,status==1)~AJCC.stage,data = train.w,weight=weight.cens,subset = failcode==1,surv = T) survConcordance(Surv(train.w$Tstart,train.w$Tstop,train.w$status==1)~predict(fitCSSAJCC,train.w)) #calibration curve in training set fcsst1<- FGR(Hist(Time,cause,cens.code = "0")~ Primary.site+Pathological.types+AJCC.stage+Surgery,data =train,cause=1) predict.crr <- cmprsk:::predict.crr valcsst1=calPlot(fcsst1,time=12,data =train, cause = "1",splitMethod = "BootCv",B = 1000, M = 290, pseudo = TRUE,plot = FALSE,method = "quantile",legend = F,lwd = 3,ylab = "Observed events probability") plot(valcsst1) valcsst3=calPlot(fcsst1,time=36,data =train, cause = "1",splitMethod = "BootCv",B = 1000, M = 290, pseudo = TRUE,plot = FALSE,method = "quantile",legend = F,lwd = 3,ylab = "Observed events probability") plot(valcsst3) valcsst5=calPlot(fcsst1,time=60,data =train, cause = "1",splitMethod = "BootCv",B = 1000, M = 290, pseudo = TRUE,plot = FALSE,method = "quantile",legend = F,lwd = 3,ylab = "Observed events probability") plot(valcsst5) ##validation for CSS vad<-read.csv("vad.csv") str(vad) vad$Pathological.types<-factor(vad$Pathological.types,labels=c('ductal adenocarcinoma','non-ductal adenocarcinoma')) vad$AJCC.stage<-factor(vad$AJCC.stage,labels=c('I','II','III','IV')) vad$Surgery<-factor(vad$Surgery,labels=c('No','Yes')) vad$Primary.site<-factor(vad$Primary.site,labels = c('head','body','tail','other')) units(vad$Time) <- "Month" vad$cause<-ifelse(is.na(vad$cause),0,vad$cause) vad.w<-crprep("Time","cause",data=vad,trans = c(1,2),cens = 0,id="ID",keep=c("Primary.site","Pathological.types","AJCC.stage","Surgery")) with(vad.w,table(failcode,status)) ddist<-datadist(vad.w) options(datadist='ddist') #C-index in vaalidation set survConcordance(Surv(vad.w$Tstart,vad.w$Tstop,vad.w$status==1)~predict(fitCSS,newdata=vad.w),data = vad.w) survConcordance(Surv(vad.w$Tstart,vad.w$Tstop,vad.w$status==1)~predict(fitCSSAJCC,newdata=vad.w),data = vad.w) #calibration curve in validation set predict.crr <- cmprsk:::predict.crr fcssv1 <- FGR(Hist(Time,cause,cens.code = "0") ~Primary.site+Pathological.types+AJCC.stage+Surgery, data=vad,cause="1") valcsst1=calPlot(fcssv1,time=12,data =vad, cause = "1",splitMethod = "BootCv",B = 1000, M =120 , pseudo = TRUE,plot = FALSE,method = "quantile",legend = F,lwd = 3,ylab = "Observed events probability") plot(valcsst1) valcsst3=calPlot(fcssv1,time=36,data =vad, cause = "1",splitMethod = "BootCv",B = 1000, M =120 , pseudo = TRUE,plot = FALSE,method = "quantile",legend = F,lwd = 3,ylab = "Observed events probability") plot(valcsst3) valcsst5=calPlot(fcssv1,time=60,data =vad, cause = "1",splitMethod = "BootCv",B = 1000, M =120 , pseudo = TRUE,plot = FALSE,method = "quantile",legend = F,lwd = 3,ylab = "Observed events probability") plot(valcsst5)