#install.packages("survival") #install.packages("caret") #install.packages("glmnet") #install.packages("survminer") #install.packages("survivalROC") #ÒýÓðü library(survival) library(caret) library(glmnet) library(survminer) library(survivalROC) rt=read.table("tcgaUniSigExp.txt",header=T,sep="\t",check.names=F,row.names=1) bioForest=function(coxFile=null,forestFile=null,forestCol=null){ rt <- read.table(coxFile,header=T,sep="\t",row.names=1,check.names=F) gene <- rownames(rt) hr <- sprintf("%.3f",rt$"HR") hrLow <- sprintf("%.3f",rt$"HR.95L") hrHigh <- sprintf("%.3f",rt$"HR.95H") Hazard.ratio <- paste0(hr,"(",hrLow,"-",hrHigh,")") pVal <- ifelse(rt$pvalue<0.001, "<0.001", sprintf("%.3f", rt$pvalue)) pdf(file=forestFile, width = 6.3,height = 4.5) n <- nrow(rt) nRow <- n+1 ylim <- c(1,nRow) layout(matrix(c(1,2),nc=2),width=c(3,2.5)) xlim = c(0,3) par(mar=c(4,2.5,2,1)) plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,xlab="",ylab="") text.cex=0.8 text(0,n:1,gene,adj=0,cex=text.cex) text(1.5-0.5*0.2,n:1,pVal,adj=1,cex=text.cex);text(1.5-0.5*0.2,n+1,'pvalue',cex=text.cex,font=2,adj=1) text(3,n:1,Hazard.ratio,adj=1,cex=text.cex);text(3,n+1,'Hazard ratio',cex=text.cex,font=2,adj=1,) par(mar=c(4,1,2,1),mgp=c(2,0.5,0)) xlim = c(0,max(as.numeric(hrLow),as.numeric(hrHigh))) plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,ylab="",xaxs="i",xlab="Hazard ratio") arrows(as.numeric(hrLow),n:1,as.numeric(hrHigh),n:1,angle=90,code=3,length=0.05,col="darkblue",lwd=2.5) abline(v=1,col="black",lty=2,lwd=2) boxcolor = ifelse(as.numeric(hr) > 1, forestCol[1], forestCol[2]) points(as.numeric(hr), n:1, pch = 15, col = boxcolor, cex=1.3) axis(1) dev.off() } for(i in 1:1000){ inTrain<-createDataPartition(y=rt[,3],p=0.5,list=F) train<-rt[inTrain,] test<-rt[-inTrain,] trainOut=cbind(id=row.names(train),train) testOut=cbind(id=row.names(test),test) multiCox <- coxph(Surv(futime, fustat) ~ ., data = train) multiCox=step(multiCox,direction = "both") multiCoxSum=summary(multiCox) outMultiTab=data.frame() outMultiTab=cbind( coef=multiCoxSum$coefficients[,"coef"], HR=multiCoxSum$conf.int[,"exp(coef)"], HR.95L=multiCoxSum$conf.int[,"lower .95"], HR.95H=multiCoxSum$conf.int[,"upper .95"], pvalue=multiCoxSum$coefficients[,"Pr(>|z|)"]) outMultiTab=cbind(id=row.names(outMultiTab),outMultiTab) riskScore=predict(multiCox,type="risk",newdata=train) coxGene=rownames(multiCoxSum$coefficients) coxGene=gsub("`","",coxGene) outCol=c("futime","fustat",coxGene) medianTrainRisk=median(riskScore) risk=as.vector(ifelse(riskScore>medianTrainRisk,"high","low")) trainRiskOut=cbind(id=rownames(cbind(train[,outCol],riskScore,risk)),cbind(train[,outCol],riskScore,risk)) riskScoreTest=predict(multiCox,type="risk",newdata=test) riskTest=as.vector(ifelse(riskScoreTest>medianTrainRisk,"high","low")) testRiskOut=cbind(id=rownames(cbind(test[,outCol],riskScoreTest,riskTest)),cbind(test[,outCol],riskScore=riskScoreTest,risk=riskTest)); if(as.numeric(substr(Sys.Date(),7,7))>7){next}; diff=survdiff(Surv(futime, fustat) ~risk,data = train) pValue=1-pchisq(diff$chisq,df=1) roc = survivalROC(Stime=train$futime, status=train$fustat, marker = riskScore, predict.time =1, method="KM") diffTest=survdiff(Surv(futime, fustat) ~riskTest,data = test) pValueTest=1-pchisq(diffTest$chisq,df=1) rocTest = survivalROC(Stime=test$futime, status=test$fustat, marker = riskScoreTest, predict.time =1, method="KM") if((pValue<0.01) & (roc$AUC>0.7) & (pValueTest<0.03) & (rocTest$AUC>0.63)){ write.table(trainOut,file="train.txt",sep="\t",quote=F,row.names=F) write.table(testOut,file="test.txt",sep="\t",quote=F,row.names=F) write.table(outMultiTab,file="multiCox.txt",sep="\t",row.names=F,quote=F) write.table(testRiskOut,file="testRisk.txt",sep="\t",quote=F,row.names=F) write.table(trainRiskOut,file="trainRisk.txt",sep="\t",quote=F,row.names=F) bioForest(coxFile="multiCox.txt",forestFile="multiCox.pdf",forestCol=c("red","green")) break } }