arg<-c() #variables input n<-length(arg) if((n %% 3)!=0){ stop() }else{ filelist<-arg[1:(n/3)] namelist<-arg[(n/3+1):(n*2/3)] survlist<-arg[(n*2/3+1):n] } library("randomForestSRC") library("survival") binary_trans<-function(x){ for(i in 3:ncol(x)){ x[,i]<-(as.numeric(x[,i])-mean(as.numeric(x[,i])))/sd(as.numeric(x[,i])) } return(x) } cox.analysis<-function(genes,coeff,dataset,name,type){ dataset$sampleID<-rownames(dataset) dataset<-dataset[,c(ncol(dataset),1:(ncol(dataset)-1))] genes<-c(genes, "OS", "OS_IND") genes<-as.data.frame(genes,stringsAsFactors = FALSE) sample<-dataset$sampleID workplace<- as.data.frame(sample, stringsAsFactors=F) RFName <- names(dataset) for(i in 1:(length(genes[,1])-2)) { g <- genes[i,1] index <- which(RFName == g) workplace <- cbind(dataset[[index]], workplace) names(workplace)[1] <- g } workplace$OS<-dataset$OS workplace$OS_IND<-dataset$OS_IND workplace$sample<-NULL OS<-as.numeric(workplace$OS) OS_IND<-as.numeric(workplace$OS_IND) workplace$OS<-NULL workplace$OS_IND<-NULL score<-as.data.frame(sample, stringsAsFactors=FALSE) for(i in 1:length(workplace[1,])) { score1 <- workplace[,i]*(as.numeric(coeff[i,1])) score <- cbind(score1, score) } score$sample<-NULL riskscore<-apply(score, 1, sum) rr<-summary(as.numeric(riskscore)) median <- as.numeric(rr[3]) medianB <- as.numeric(as.numeric(riskscore)>=median) ss <- survdiff(Surv(OS, OS_IND) ~ medianB) p_os <- 1-pchisq(ss$chisq, 1) sf <- survfit(Surv(OS, OS_IND) ~ medianB) pdf(file=paste(name,type,"survial.pdf",sep="_")) plot(sf,xlab="survival time",ylab="survival probabilty",col=1:2,axes=F,lwd=2) axis(1);axis(2) legend("bottomleft",paste("p=",p_os),col=2) dev.off() data.write<-cbind(sample,workplace,OS, OS_IND, riskscore, medianB, stringsAsFactors=FALSE) data.write<-as.data.frame(data.write, stringsAsFactors=FALSE) write.csv(data.write, file = paste("data",name,type,"_exp_profile.csv",sep="_"), row.names = F, quote = F) } for(i in 1:length(filelist)){ x<-read.table(filelist[i],header=T,sep="\t") rownames(x)<-x$sampleID x<-x[,-1] if(i==1){ total.out <- var.select(Surv(OS, OS_IND) ~ ., x, ntree=1000,importance = TRUE, method = "vh", nrep = 100, nstep = 100) sink("rfsrc_summary_total.out_rfsrc_glance.txt") print(total.out$rfsrc.refit.obj) sink() write.table(total.out$rfsrc.refit.obj$err.rate, file="rfsrc_total.out_every_erro_rate.txt", quote=F, sep="\t") pdf("rfsrc_var_selected_plot_A.pdf", width=5.9, height=6.0) plot(total.out$err.rate) dev.off() importance<-total.out$varselect vimportance<-as.data.frame(importance, stringsAsFactors=FALSE) write.table(vimportance, file="rfsrc_total.out_topvars_importance.txt", sep="\t", quote=F, row.names=T) Gene<-total.out$topvars indexes<-c() for(j in 1:length(Gene)){ index<-which(Gene[j]==colnames(x)) indexes<-c(indexes,index) } workplace<-x[,indexes] workplace$OS<-x$OS workplace$OS_IND<-x$OS_IND coxm<-coxph(Surv(OS, OS_IND) ~ ., workplace) b<-as.data.frame(summary(coxm)$coefficient, stringsAsFactors=FALSE) write.table(b, file="coefficient_of_select_gene.txt", sep='\t', quote=F, row.names=F) x<-binary_trans(x) x$sampleID<-rownames(x) x<-x[,c(ncol(x),1:(ncol(x)-1))] cox.analysis(Gene,b,x,namelist[i],survlist[i]) }else{ x<-binary_trans(x) cox.analysis(Gene,b,x,namelist[i],survlist[i]) } }