# The resistance-related DEGs has been downloaded from https://www.ncbi.nlm.nih.gov/geo/, file name is " geneMatrix.txt" library(limma) library(miscTools) #the number of MCF7 parental cell lines conNum<- # the number of MCF7 resistant cell lines treatNum<- #the significance threshold of |log2foldchange| and p value logFoldChange<-1.5 adjustP<-0.05 data<-read.table("geneMatrix.txt",sep=",",header = T,row.names=1,check.names=F) data<-normalizeBetweenArrays(as.matrix(data)) modType<-c(rep("con",conNum),rep("treat",treatNum)) design <- model.matrix(~0+factor(modType),ordered = F) colnames(design) <- c("con","treat") cont.matrix<-makeContrasts(treat-con,levels=design) fit<-lmFit(data,design) fit2<-contrasts.fit(fit,cont.matrix) fit2<-eBayes(fit2) all<-topTable(fit2,adjust='fdr',coef=1,number=Inf) write.table(all,file = "limma.csv",sep=",",quote = F) #get the DEGs allDiff<-all[with(all,(abs(logFC)>logFoldChange & adj.P.Val < adjustP )),] write.table(allDiff,file="all.diff.csv",sep=",",quote = F) updiff<-all[with(all,(logFC>logFoldChange & adj.P.Val < adjustP )),] write.table(updiff,file="up.diff.csv",sep=",",quote = F) downdiff<-all[with(all,(logFC<(-logFoldChange) & adj.P.Val < adjustP )),] write.table(downdiff,file="down.diff.csv",sep=",",quote = F) # the volcano plots of DEGs, the data were calculated by ˇ°limmaˇ± package, file name is " limma.csv" library(ggpubr) library(ggthemes) #the significance threshold of |log2foldchange| and p value adj.p<-0.05 logFC<-1.5 data<-read.csv(file = "limma.csv", header = T, sep = ",") data$logP <- -log10(data$adj.P.Val) # defined the DEGs data$Group<-"Group" data$Group[which( (data$adj.P.Val < adj.p) & (data$logFC > logFC) )] = "up-regulated" data$Group[which( (data$adj.P.Val < adj.p) & (data$logFC < -logFC) )] = "down-regulated" data$Group[which( (data$adj.P.Val> adj.p)|(abs(data$logFC) <= logFC) )] = "not-significant" p<-ggscatter(data, x = "logFC", y = "logP", color = "Group", palette = c("#2f5688", "#BBBBBB", "#CC0000"), size = 1, repel = T, fontsize = 2, xlim=c(-5,5), ylim=c(0,(max(data$logP)+0.2)), xlab = "log2FoldChange", ylab = "-log10(Adjust P-value)") + theme_bw()+ geom_hline(yintercept = -log10(adj.p), linetype="dashed") + geom_vline(xintercept = c(-logFC,logFC), linetype="dashed") q<-p + theme(panel.grid = element_blank(), axis.line = element_line(colour = "black")) print(q) ggsave('volcano.pdf', q,width = 10, height = 8) #the enrichment analyses of the 37 DEGs, file name is "37DEGs.txt" library(clusterProfiler) library(topGO) library(Rgraphviz) library(pathview) library(org.Hs.eg.db) gene<-read.table("37DEGs.txt",sep = '\t',header = F) gene_symbol<-as.vector(gene[,1]) entrez_id<-gene[,2] entrez_id<-mget(gene_symbol, org.Hs.egSYMBOL2EG, ifnotfound=NA) entrez_id<-as.character(entrez_id) symbole_id<-cbind(gene,entrez_id=entrez_id) #GO enrichment analyses enrich.go<-enrichGO(gene=gene_symbol, OrgDb = org.Hs.eg.db, keyType = 'SYMBOL', ont ='ALL', pvalueCutoff = 0.05, qvalueCutoff = 0.05) write.csv(enrich.go,"GO.csv",quote=F,row.names = F) #KEGG enrichment analysis enrich.go.KEGG<-enrichKEGG(gene = entrez_id, organism = 'hsa', pvalueCutoff = 0.05, qvalueCutoff =0.1) write.table(enrich.go.KEGG,file="KEGGid.csv",sep=",",quote=F,row.names = F) # visualization of GO and KEGG enrichment, the data were calculated by R software, file names are ˇ°GO.csvˇ± and ˇ°KEGGid.csvˇ± library(ggplot2) library(data.table) library(dplyr) library(Hmisc) library(stringr) #the result of GO analyses goinput <- fread("GO.csv",data.table = F) # filtration of the p value goinput <- goinput[goinput$pvalue<0.05,] goinput$ONTOLOGY[goinput$ONTOLOGY=="BP"] <- "Biological Process" goinput$ONTOLOGY[goinput$ONTOLOGY=="CC"] <- "Cellular Component" goinput$ONTOLOGY[goinput$ONTOLOGY=="MF"] <- "Molecular Function" goinput <- arrange(goinput,goinput$ONTOLOGY,goinput$Count) goinput$Description <- capitalize(goinput$Description) library(DOSE) goinput$GeneRatio<-parse_ratio(goinput$GeneRatio) x<-goinput$GeneRatio y<-factor(goinput$Description,levels = goinput$Description) p <- ggplot(goinput,aes(x,y)) p1 <- p + geom_point(aes(size=Count,color=-1*log(pvalue),shape=ONTOLOGY))+ scale_color_gradient(low = "#ff8b14", high = "#fe0b00") p1 p2 <- p1 + labs(color=expression(-log[10](pvalue)), size="Gene count", x="GeneRatio", y="Go Term", title="Go enrichment") p2 p3 <- p2 +theme_bw()+ theme(plot.margin=unit(c(1,2,1,1),'lines')) p3 ggsave("GO.pdf", plot = last_plot(), width = 12,height = 12) #the result of KEGG analysis kegginput <- fread("KEGGid.csv",data.table = F) kegginput <- kegginput[kegginput$pvalue<0.05,] kegginput <- arrange(kegginput,kegginput$ONTOLOGY,kegginput$Count) kegginput$Description <- capitalize(kegginput$Description) library(DOSE) kegginput$GeneRatio<-parse_ratio(kegginput$GeneRatio) x<-kegginput$GeneRatio y<-factor(kegginput$Description,levels = kegginput$Description) p <- ggplot(kegginput,aes(x,y)) p1 <- p + geom_point(aes(size=Count,color=-1*log(pvalue)))+ scale_color_gradient(low = "#ff8b14", high = "#fe0b00") p1 p2 <- p1 + labs(color=expression(-log[10](pvalue)), size="Gene count", x="GeneRatio", y="KEGG Term", title="KEGG enrichment") p2 p3 <- p2 +theme_bw()+ theme(plot.margin=unit(c(1,2,1,1),'lines')) p3 ggsave("KEGG.pdf", plot = last_plot(), width = 8,height = 10) #Cox regression analysis of the 37 DEGs, these data were download from TCGA database (https://portal.gdc.cancer.gov/) and cBioportal database (http://www.cbioportal.org), filenames is "mRNAclinialexp.csv". #univariate Cox regression analysis library(survival) pFilter<-0.05 data<-read.csv("mRNAclinialexp.csv",header=T,check.names=F,row.names = 1) outTab<-data.frame() sigGenes<-c("futime","fustat") for(i in colnames(data[,3:ncol(data)])){ cox <- coxph(Surv(futime, fustat) ~ data[,i], data = data) coxSummary <- summary(cox) coxP<-coxSummary$coefficients[,"Pr(>|z|)"] if(coxP|z|)"]) ) } } write.table(outTab,file="uniCox.csv",sep=",",row.names=F,quote=F) uniSigExp<-data[,sigGenes] uniSigExp<-cbind(id=row.names(uniSigExp),uniSigExp) write.table(uniSigExp,file="uniSigExp.csv",sep=",",row.names=F,quote=F) # map the forest data <- read.table("uniCox.csv", sep=",",header=T, row.names=1,check.names=F) gene <- rownames(data) hr <- sprintf("%.3f",data$"HR") hrLow <- sprintf("%.3f",data$"HR.95L") hrHigh <- sprintf("%.3f",data$"HR.95H") Hazard.ratio <- paste0(hr,"(",hrLow,"-",hrHigh,")") pVal <- ifelse(data$pvalue<0.001, "<0.001", sprintf("%.3f", data$pvalue)) pdf(file="uni.forest.pdf", width = 6,height = 4.5) n <- nrow(data) nRow <- n+1 ylim <- c(1,nRow) layout(matrix(c(1,2),nc=2),width=c(3,2)) xlim = c(0,3) par(mar=c(4,2.5,2,1)) plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,xlab="",ylab="") text.cex=0.8 text(0,n:1,gene,adj=0,cex=text.cex) text(1.5-0.5*0.2,n:1,pVal,adj=1,cex=text.cex);text(1.5-0.5*0.2,n+1,'pvalue',cex=text.cex,font=2,adj=1) text(3,n:1,Hazard.ratio,adj=1,cex=text.cex);text(3,n+1,'Hazard ratio',cex=text.cex,font=2,adj=1,) par(mar=c(4,1,2,1),mgp=c(2,0.5,0)) xlim <- c(0,max(as.numeric(hrLow),as.numeric(hrHigh))) plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,ylab="",xaxs="i",xlab="Hazard ratio") arrows(as.numeric(hrLow),n:1,as.numeric(hrHigh),n:1,angle=90,code=3,length=0.05,col="darkblue",lwd=2.5) abline(v=1,col="black",lty=2,lwd=2) boxcolor <- ifelse(as.numeric(hr) > 1, 'red', 'blue') points(as.numeric(hr), n:1, pch = 15, col = boxcolor, cex=1.3) axis(1) dev.off() #multivariate Cox regression analysis data<-read.table("uniSigExp.csv",header=T,sep=",",check.names=F,row.names=1) #stepwise multivariate Cox regression analysis multiCox<-coxph(Surv(futime, fustat) ~ ., data = data) multiCox<-step(multiCox,direction = "both") multiCoxSum<-summary(multiCox) outTab<-data.frame() outTab<-cbind( coef=multiCoxSum$coefficients[,"coef"], HR=multiCoxSum$conf.int[,"exp(coef)"], HR.95L=multiCoxSum$conf.int[,"lower .95"], HR.95H=multiCoxSum$conf.int[,"upper .95"], pvalue=multiCoxSum$coefficients[,"Pr(>|z|)"]) outTab<-cbind(id=row.names(outTab),outTab) outTab<-gsub("`","",outTab) write.table(outTab,file="multiCox.csv",sep=",",row.names=F,quote=F) # map the forest data<-read.csv("multiCox.csv",row.names = 1,header = T) data<-data[,-1] gene <- rownames(data) hr <- sprintf("%.3f",data$"HR") hrLow <- sprintf("%.3f",data$"HR.95L") hrHigh <- sprintf("%.3f",data$"HR.95H") Hazard.ratio <- paste0(hr,"(",hrLow,"-",hrHigh,")") pVal <- ifelse(data$pvalue<0.001, "<0.001", sprintf("%.3f", data$pvalue)) pdf(file="multi.forest.pdf", width = 6,height = 4.5) n <- nrow(data) nRow <- n+1 ylim <- c(1,nRow) layout(matrix(c(1,2),nc=2),width=c(3,2)) xlim <- c(0,3) par(mar=c(4,2.5,2,1)) plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,xlab="",ylab="") text.cex<-0.8 text(0,n:1,gene,adj=0,cex=text.cex) text(1.5-0.5*0.2,n:1,pVal,adj=1,cex=text.cex);text(1.5-0.5*0.2,n+1,'pvalue',cex=text.cex,font=2,adj=1) text(3,n:1,Hazard.ratio,adj=1,cex=text.cex);text(3,n+1,'Hazard ratio',cex=text.cex,font=2,adj=1,) par(mar=c(4,1,2,1),mgp=c(2,0.5,0)) xlim <- c(0,max(as.numeric(hrLow),as.numeric(hrHigh))) plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,ylab="",xaxs="i",xlab="Hazard ratio") arrows(as.numeric(hrLow),n:1,as.numeric(hrHigh),n:1,angle=90,code=3,length=0.05,col="darkblue",lwd=2.5) abline(v=1,col="black",lty=2,lwd=2) boxcolor <- ifelse(as.numeric(hr) > 1, 'red', 'green') points(as.numeric(hr), n:1, pch = 15, col = boxcolor, cex=1.3) axis(1) dev.off() #prognostic nomogram, this file is calculated by cox regression analysis and named "Input.csv". library(rms) library(foreign) library(survival) library(survivalROC) rt<-read.table("Input.csv",sep=",",header=T,row.names = 1) rt$futime<-rt$futime/12 rt$age<-factor(rt$age,labels = c(">=40","<40")) rt$T<-factor(rt$T,labels=c("T1","T2","T3-4")) rt$N<-factor(rt$N,labels=c("N0","N1","N2","N3")) rt$riskScore<-factor(rt$riskScore,labels = c("low","high")) ddist <- datadist(rt) options(datadist='ddist') cox <- cph(Surv(futime,fustat==1) ~ T+N+riskScore, surv=T,x=T, y=T, dist='lognormal', data=rt) surv <- Survival(cox) sur_3_year<-function(x)surv(1*3,lp=x) sur_5_year<-function(x)surv(1*5,lp=x) nom2<-nomogram(cox, fun=list(sur_3_year,sur_5_year), lp= F, funlabel=c('3-Year Survival','5-Year survival'), maxscale=100, fun.at = c (0.05, seq (0,1, by=0.1),0.95)) pdf("nomogram.pdf",width = 8,height = 5.5) plot(nom2,xfrac=0.25) dev.off() #Calculate the C-index of the nomogram library(survcomp) fit<-coxph(Surv(futime,fustat==1) ~ T+N+riskScore, data=rt) c_index<-concordance.index(predict(fit),surv.time = rt$futime,surv.event = rt$fustat,method = "noether") c_index$c.index;c_index$lower;c_index$upper #the calibration plots of the nomogram #3-year cox_3<- cph(Surv(futime,fustat==1) ~T+N+riskScore, surv=T,x=T, y=T, time.inc = 3, data=rt) cal_3<- calibrate(cox_3, cmethod="KM", method="boot", u=3, m=120, B=468) pdf(file = "calibrations.pdf",width = 6,height = 6) par(mar=c(6,5,1,1)) plot(cal_3,lwd=3,lty=2,errbar.col="black", xlim = c(0.6,1),ylim = c(0.5,1), col="blue", xlab ="Nomogram-Predicted DFS", ylab="Observed DFS") lines(cal_3[,c('mean.predicted','KM')], type = 'b',lwd = 3, col ="blue",pch = 16) box(lwd = 1) abline(0,1,lty = 2,lwd = 2,col = "black") #5-year cox_5 <- cph(Surv(futime,fustat==1) ~T+N+riskScore, surv=T,x=T, y=T, time.inc = 5, data=rt) cal_5 <- calibrate(cox_5, cmethod="KM", method="boot", u=5, m= 120, B=468) plot(cal_5,lwd=3,lty=2,errbar.col="black", xlim = c(0.6,1),ylim = c(0.5,1), col="red", xlab ="Nomogram-Predicted DFS", ylab="Observed DFS", add = T) lines(cal_5[,c('mean.predicted','KM')], type = 'b',lwd = 3, col ="red",pch = 16) box(lwd = 1) legend("bottomright", c("3 year","5year"), lwd=2,bty="n",col=c("blue","red")) dev.off() #Time-dependent ROC analyses of the nomogram rm(list = ls()) rt<-read.csv("Input.csv") #3 year ROC aucText<-c() pdf(file = "roc3.pdf",width = 6,height = 6) predict_time<-3 myroc<-survivalROC(Stime=rt$futime, status=rt$fustat, marker=rt$riskScore, predict.time=predict_time, method="KM") plot(myroc$FP,myroc$TP,type="l", xlim=c(0,1),ylim=c(0,1), col="blue", lwd=1.6, xlab="1-Specificity", ylab="Sensitivity", main="3 year ROC" ) aucText<-c(aucText,paste0("riskScore"," (AUC=",sprintf("%.3f",myroc$AUC),")")) myroc1<-survivalROC(Stime=rt$futime, status=rt$fustat, marker=rt$N, predict.time=predict_time, method="KM") lines(myroc1$FP,myroc1$TP,type="l", xlim=c(0,1),ylim=c(0,1), col="green", lwd=1.6, xlab="1-Specificity", ylab="Sensitivity", main="3 year ROC" ) aucText<-c(aucText,paste0("N"," (AUC=",sprintf("%.3f",myroc1$AUC),")")) myroc3<-survivalROC(Stime=rt$futime, status=rt$fustat, marker=rt$T, predict.time=predict_time, method="KM") lines(myroc3$FP,myroc3$TP,type="l", xlim=c(0,1),ylim=c(0,1), col="#FF00CC", lwd=1.6, xlab="1-Specificity", ylab="Sensitivity", main="3 year ROC" ) aucText<-c(aucText,paste0("T"," (AUC=",sprintf("%.3f",myroc3$AUC),")")) myroc2<-survivalROC(Stime=rt$futime, status=rt$fustat, marker=rt$risk_Score, predict.time=predict_time, method="KM") lines(myroc2$FP,myroc2$TP,type="l", xlim=c(0,1),ylim=c(0,1), col="red", lwd=1.6, xlab="1-Specificity", ylab="Sensitivity", main="3 year ROC") aucText<-c(aucText,paste0("Combine"," (AUC=",sprintf("%.3f",myroc2$AUC),")")) legend("bottomright", aucText,lwd=2,bty="n",col=c("blue","green","#FF00CC","red")) abline(0,1) dev.off() #5_year ROC aucText<-c() pdf(file = "roc5.pdf",width = 6,height = 6) predict_time<-5 myroc<-survivalROC(Stime=rt$futime, status=rt$fustat, marker=rt$riskScore, predict.time=predict_time, method="KM") plot(myroc$FP,myroc$TP,type="l", xlim=c(0,1),ylim=c(0,1), col="blue", lwd=1.6, xlab="1-Specificity", ylab="Sensitivity", main="3 year ROC" ) aucText<-c(aucText,paste0("riskScore"," (AUC=",sprintf("%.3f",myroc$AUC),")")) myroc1<-survivalROC(Stime=rt$futime, status=rt$fustat, marker=rt$N, predict.time=predict_time, method="KM") lines(myroc1$FP,myroc1$TP,type="l", xlim=c(0,1),ylim=c(0,1), col="green", lwd=1.6, xlab="1-Specificity", ylab="Sensitivity", main="3 year ROC" ) aucText<-c(aucText,paste0("N"," (AUC=",sprintf("%.3f",myroc1$AUC),")")) myroc3<-survivalROC(Stime=rt$futime, status=rt$fustat, marker=rt$T, predict.time=predict_time, method="KM") lines(myroc3$FP,myroc3$TP,type="l", xlim=c(0,1),ylim=c(0,1), col="#FF00CC", lwd=1.6, xlab="1-Specificity", ylab="Sensitivity", main="3 year ROC" ) aucText<-c(aucText,paste0("T"," (AUC=",sprintf("%.3f",myroc3$AUC),")")) myroc2<-survivalROC(Stime=rt$futime, status=rt$fustat, marker=rt$risk_Score, predict.time=predict_time, method="KM") lines(myroc2$FP,myroc2$TP,type="l", xlim=c(0,1),ylim=c(0,1), col="red", lwd=1.6, xlab="1-Specificity", ylab="Sensitivity", main="3 year ROC") aucText<-c(aucText,paste0("Combine"," (AUC=",sprintf("%.3f",myroc2$AUC),")")) legend("bottomright", aucText,lwd=2,bty="n",col=c("blue","green","#FF00CC","red")) abline(0,1) dev.off()