##------------------------------------------------------------ ## compare RF-SRC to Cox regression ## Illustrates C-index and Brier score measures of performance ## assumes "pec" and "survival" libraries are loaded ##------------------------------------------------------------ if (library("survival", logical.return = TRUE) & library("pec", logical.return = TRUE) & library("party", logical.return = TRUE) & library("randomForestSRC", logical.return = TRUE) & library("prodlim", logical.return = TRUE)) { ##prediction function required for pec predictSurvProb.rfsrc <- function(object, newdata, times, ...){ ptemp <- predict(object,newdata=newdata,...)$survival pos <- sindex(jump.times = object$time.interest, eval.times = times) p <- cbind(1,ptemp)[, pos + 1] if (NROW(p) != NROW(newdata) || NCOL(p) != length(times)) stop("Prediction failed") p } ## data, formula specifications data(GBSG2) trainrows <- sample(1:nrow(GBSG2), .7*nrow(GBSG2)) # GBSG2 <- GBSG2 %>% mutate(event = factor(cens, # labels = c("censored", "failed"))) #table(GBSG2$event) #GBSG2 <- GBSG2[,-10] surv.f <- as.formula(Surv(time, cens) ~.) pec.f <- as.formula(Hist(time, cens) ~ 1) ## run cox/rfsrc models ## for illustration we use a small number of trees cox.obj <- coxph(surv.f, data = GBSG2, x = TRUE) rfsrc.obj <- rfsrc(surv.f, GBSG2, ntree = 1000) fitcforest <- pecCforest(surv.f, data = GBSG2, controls = cforest_classical(ntree = 1000)) ### estimate conditional Kaplan-Meier curves bst <- cforest(Surv(time, cens) ~ ., data = GBSG2, control = cforest_unbiased(ntree = 50)) treeresponse(bst, newdata = GBSG2[1:2,], OOB = TRUE) ### if you can't resist to look at individual trees ... party:::prettytree(bst@ensemble[[1]], names(bst@data@get("input"))) vi <- varimp(bst) ### compare variable importances and absolute z-statistics layout(matrix(1:2)) barplot(vi) barplot(abs(summary(coxph(Surv(time, cens) ~ ., data = GBSG2))$coeff[,"z"])) ### looks more or less the same plot.variable(rfsrc.obj, surv.type = "rel.freq", partial = TRUE) vip(bst)#Variable importance for survival forests (cforest) rsf.cindex <- get.cindex(time = GBSG2$time, censoring = GBSG2$cens, predicted = rfsrc.obj$predicted.oob) calPlot(rfsrc.obj, data = GBSG2) newData <- data.frame(horTh=factor("no",levels=c("no", "yes")), age=as.integer(c(24,53,73)), menostat=factor(cbind("Pre", "Post", "Post"),levels=c("Post","Pre")), tsize=as.integer(rep(25, 3)), tgrade=factor("II",levels=c("I","II", "III")), pnodes=as.integer(rep(3, 3)), progrec=as.integer(rep(32, 3)), estrec=as.integer(rep(36, 3))) print(newData) cox.obj2 <- coxph(surv.f, data = GBSG2[trainrows,], x = TRUE) rfsrc.obj2 <- rfsrc(surv.f, GBSG2[trainrows,], ntree = 1000) fitcforest2 <- pecCforest(surv.f, data = GBSG2[trainrows,], controls = cforest_classical(ntree = 1000)) cforestImpPlot <- function(x) { cforest_importance <<- v <- varimp(x) dotchart(v[order(v)]) } cforestImpPlot(bst) ######################################################################################## #################predicted survival curves for newdata############################# ######################################################################################## pcox <- predictSurvProb(cox.obj, newdata = newData, times = quantile(GBSG2$time)) prsf <- predictSurvProb(rfsrc.obj, newdata = newData, times = quantile(GBSG2$time)) pcf <- predictSurvProb(fitcforest, newdata = newData, times = quantile(GBSG2$time)) psp <- round(cbind(pcox,prsf, pcf),2) newNames <- paste("newData", 1:3) mat <- cbind( newData$age, psp) colnames(mat) <- c( "Age", rep("Coxph", 5), rep("rsf",5), rep("cforest",5)) mat1 <- data.frame(newNames, mat) print(mat1) par(mfrow = c(1, 3)) lapply(1:3, function(x) { plotPredictSurvProb(cox.obj, newdata = newData[x, ], col = 1) plotPredictSurvProb(rfsrc.obj, newdata = newData[x, ], add = TRUE, col = 2) plotPredictSurvProb(fitcforest, newdata = newData[x, ], add = TRUE,col = 3) }) legend(x = "bottom",inset = 0, legend = c("Cox PH", "randomSurvivalForest", "cforest"), lwd = 3, col=1:3,bty="n",cex=1.1) ######################################################################################## ######################################################################################### ######################################################################################## ######################################################################################## ##################pec curves for bootstrap and bootstrap .632+ estimates############# ######################################################################################## ## compute bootstrap cross-validation estimate of expected Brier score ## see Mogensen, Ishwaran and Gerds (2012) Journal of Statistical Software set.seed(17743) ptm <- proc.time() prederror.GBSG2 <- pec(list(cox.obj,rfsrc.obj, fitcforest), data = GBSG2[-trainrows,], formula = pec.f, splitMethod = "bootcv", B = 500, M=200) print(prederror.GBSG2, times = seq(8, 2659,50)) plot(prederror.GBSG2) stoptime<- proc.time() - ptm # Stop the clock print(stoptime[3]/60) set.seed(17743) ptm <- proc.time() prederror.GBSG2.632 <- pec(list(cox.obj,rfsrc.obj, fitcforest), data = GBSG2[-trainrows,], formula = pec.f, splitMethod = "Boot632", B = 500, M=200) print(prederror.GBSG2.632, times = seq(8, 2659,50)) plot(prederror.GBSG2.632) stoptime<- proc.time() - ptm # Stop the clock print(stoptime[3]/60) par(mfrow=c(2,2)) plot(prederror.GBSG2, legend=FALSE) legend("bottomright", legend=c("Reference", "Cox PH","RSF", "Cforest"), col=c("black", "red","green", "blue"), lty=1) title(main = "GBSG2 bootstrap estimates") plot(prederror.GBSG2.632, legend=FALSE) legend("bottomright", legend=c("Reference", "Cox PH","RSF", "Cforest"), col=c("black", "red","green", "blue"), lty=1) title(main = "GBSG2 bootstrap.632 estimates") ######################################################################################## ######################################################################################### ######################################################################################## ######################################################################################### ###########cindex############################################################################## ######################################################################################## # compute the bootstrap-crossvalidation estimate of # the C-index at different time points # ptm <- proc.time() set.seed(142) bcvCindex <- pec::cindex(list("Cox"=cox.obj, "RSF"=rfsrc.obj, "Cforest"= fitcforest), formula=surv.f, data=GBSG2, splitMethod="Bootcv", B = 500, M=200, eval.times=c(50,quantile(GBSG2$time)[-1])) bcvCindex.632 <- pec::cindex(list("Cox"=cox.obj, "RSF"=rfsrc.obj, "Cforest"= fitcforest), formula=surv.f, data=GBSG2, splitMethod="Boot632", B = 500, M=200, eval.times=c(50,quantile(GBSG2$time)[-1])) print(bcvCindex) plot(bcvCindex, legend=FALSE) legend("topleft", legend=c("Cox PH","RSF", "Cforest"), col=c("black", "red","green"), lty=1) title(main = "GBSG2 bootstrap estimates") print(bcvCindex.632) plot(bcvCindex.632, legend=FALSE) legend("topleft", legend=c("Cox PH","RSF", "Cforest"), col=c("black", "red","green"), lty=1) title(main = "GBSG2 bootstrap .632 estimates") stoptime<- proc.time() - ptm # Stop the clock print(stoptime[3]/60) ######################################################################################## ######################################################################################### ######################################################################################## #Rkare <- R2(prederror.GBSG2,times=seq(8, 2659,50)) ######calibration plot cizdirmeye calistim ama olmadi.... #veriyi train diye bolup oyle model kurdum hata bulmaya calisiyorum set.seed(17743) prederror2 <- pec(list(cox.obj2,rfsrc.obj2, fitcforest2), data = GBSG2[trainrows,], formula = pec.f, splitMethod = "bootcv", B = 50) print(prederror2) plot(prederror2) cf2=calPlot(list("Cox regression"=cox.obj2,"RSF"=rfsrc.obj2), time=3, type="survival", data=GBSG2[-trainrows,]) print(cf2) plot(cf2) calPlot(cox.obj2,time=3,data=GBSG2[-trainrows],type="survival") # oo <- subsample(rfsrc.obj) # # take a delete-d-jackknife procedure for example # vimpCI <- extract.subsample(oo)$var.jk.sel.Z # # Confidence Intervals for VIMP # plot.subsample(oo) # # take the variable "Month" for example for partial plot # plot.variable(rfsrc.obj, xvar.names = "days", partial = TRUE) ## compute out-of-bag C-index for cox regression and compare to rfsrc rfsrc.obj <- rfsrc(surv.f, GBSG2) cat("out-of-bag Cox Analysis ...", "\n") cox.err <- sapply(1:100, function(b) { if (b%%10 == 0) cat("cox bootstrap:", b, "\n") train <- sample(1:nrow(GBSG2), nrow(GBSG2), replace = TRUE) cox.obj <- tryCatch({coxph(surv.f, GBSG2[train, ])}, error=function(ex){NULL}) if (!is.null(cox.obj)) { get.cindex(GBSG2$time[-train], GBSG2$cens[-train], predict(cox.obj, GBSG2[-train, ])) } else NA }) cat("\n\tOOB error rates\n\n") cat("\tRSF : ", rfsrc.obj$err.rate[rfsrc.obj$ntree], "\n") cat("\tCox regression : ", mean(cox.err, na.rm = TRUE), "\n") } ## illustrates the various splitting rules ## illustrates event specific and non-event specific variable selection ## events are transplant (1) and death (2) GBSG2$id <- NULL ## modified Gray's weighted log-rank splitting ## (equivalent to cause=c(1,1) and splitrule="logrankCR") GBSG2.cr <- rfsrc(Surv(time, cens) ~ ., GBSG2) ## log-rank cause-1 specific splitting and targeted VIMP for cause 1 GBSG2.log1 <- rfsrc(Surv(time, cens) ~ ., GBSG2, splitrule = "logrank",importance="permute") GBSG2.log <- rfsrc(Surv(time, cens) ~ ., GBSG2, importance="permute") ## log-rank cause-2 specific splitting and targeted VIMP for cause 2 #GBSG2.log2 <- rfsrc(Surv(time, cens) ~ ., GBSG2, # splitrule = "logrank", cause = c(0,1), importance = TRUE) ## extract VIMP from the log-rank forests: event-specific ## extract minimal depth from the Gray log-rank forest: non-event specific var.perf <- data.frame(md = max.subtree(GBSG2.cr)$order[, 1], vimp = 100 * GBSG2.log1$importance) lap <- print(var.perf[order(var.perf$md), ]) stargazer(cox.obj,summary=TRUE, type = "text", title = "var per", out="cox.obj.txt") plot(GBSG2.log1) plot(GBSG2.log)# bunu ekledim sonuclara ############print results A2 <- summary(cox.obj2) write.table(round(A2$coefficients,4), file = "cox.obj2.txt", sep = ",", quote = FALSE)