# The resistance-related DEGs has been downloaded from https://www.ncbi.nlm.nih.gov/geo/, file name is " geneMatrix.txt"

library(limma)
library(miscTools)

#the number of MCF7 parental cell lines
conNum<-
# the number of MCF7 resistant cell lines
treatNum<-
#the significance threshold of |log2foldchange| and p value
logFoldChange<-1.5
adjustP<-0.05
data<-read.table("geneMatrix.txt",sep=",",header = T,row.names=1,check.names=F)
data<-normalizeBetweenArrays(as.matrix(data))
modType<-c(rep("con",conNum),rep("treat",treatNum))
design <- model.matrix(~0+factor(modType),ordered = F) 
colnames(design) <- c("con","treat")
cont.matrix<-makeContrasts(treat-con,levels=design)
fit<-lmFit(data,design)
fit2<-contrasts.fit(fit,cont.matrix)
fit2<-eBayes(fit2)
all<-topTable(fit2,adjust='fdr',coef=1,number=Inf)
write.table(all,file = "limma.csv",sep=",",quote = F)
#get the DEGs
allDiff<-all[with(all,(abs(logFC)>logFoldChange & adj.P.Val < adjustP )),]
write.table(allDiff,file="all.diff.csv",sep=",",quote = F)
updiff<-all[with(all,(logFC>logFoldChange & adj.P.Val < adjustP )),]
write.table(updiff,file="up.diff.csv",sep=",",quote = F)
downdiff<-all[with(all,(logFC<(-logFoldChange) & adj.P.Val < adjustP )),]
write.table(downdiff,file="down.diff.csv",sep=",",quote = F)


# the volcano plots of DEGs, the data were calculated by “limma” package, file name is " limma.csv"
library(ggpubr)
library(ggthemes)
#the significance threshold of |log2foldchange| and p value
adj.p<-0.05 
logFC<-1.5 
data<-read.csv(file = "limma.csv", header = T, sep = ",")
data$logP <- -log10(data$adj.P.Val)
# defined the DEGs
data$Group<-"Group"
data$Group[which( (data$adj.P.Val < adj.p) & (data$logFC > logFC) )] = "up-regulated"
data$Group[which( (data$adj.P.Val < adj.p) & (data$logFC < -logFC) )] = "down-regulated"
data$Group[which( (data$adj.P.Val> adj.p)|(abs(data$logFC) <= logFC) )] = "not-significant"
p<-ggscatter(data, 
             x = "logFC", y = "logP",
             color = "Group", 
             palette = c("#2f5688", "#BBBBBB", "#CC0000"),
             size = 1,
             repel = T,
             fontsize = 2,
             xlim=c(-5,5),
             ylim=c(0,(max(data$logP)+0.2)),
             xlab = "log2FoldChange", 
             ylab = "-log10(Adjust P-value)") + 
  theme_bw()+
  geom_hline(yintercept = -log10(adj.p), linetype="dashed") +
  geom_vline(xintercept = c(-logFC,logFC), linetype="dashed")
q<-p + theme(panel.grid = element_blank(),
             axis.line = element_line(colour = "black"))
print(q)
ggsave('volcano.pdf', q,width = 10, height = 8)

#the enrichment analyses of the 37 DEGs, file name is "37DEGs.txt"
library(clusterProfiler)
library(topGO)
library(Rgraphviz)
library(pathview)
library(org.Hs.eg.db)
gene<-read.table("37DEGs.txt",sep = '\t',header = F)
gene_symbol<-as.vector(gene[,1])
entrez_id<-gene[,2]
entrez_id<-mget(gene_symbol, org.Hs.egSYMBOL2EG, ifnotfound=NA) 
entrez_id<-as.character(entrez_id)
symbole_id<-cbind(gene,entrez_id=entrez_id)
#GO enrichment analyses
enrich.go<-enrichGO(gene=gene_symbol,
                    OrgDb = org.Hs.eg.db,
                    keyType = 'SYMBOL',
                    ont ='ALL',
                    pvalueCutoff = 0.05,
                    qvalueCutoff = 0.05)
write.csv(enrich.go,"GO.csv",quote=F,row.names = F)
#KEGG enrichment analysis
enrich.go.KEGG<-enrichKEGG(gene = entrez_id,
                           organism = 'hsa',
                           pvalueCutoff = 0.05,
                           qvalueCutoff =0.1)
write.table(enrich.go.KEGG,file="KEGGid.csv",sep=",",quote=F,row.names = F)

# visualization of GO and KEGG enrichment, the data were calculated by R software, file names are “GO.csv” and “KEGGid.csv”
library(ggplot2)
library(data.table)
library(dplyr)
library(Hmisc)
library(stringr)
#the result of GO analyses
goinput <- fread("GO.csv",data.table = F)
# filtration of the p value
goinput <- goinput[goinput$pvalue<0.05,] 
goinput$ONTOLOGY[goinput$ONTOLOGY=="BP"] <- "Biological Process"
goinput$ONTOLOGY[goinput$ONTOLOGY=="CC"] <- "Cellular Component"
goinput$ONTOLOGY[goinput$ONTOLOGY=="MF"] <- "Molecular Function"
goinput <- arrange(goinput,goinput$ONTOLOGY,goinput$Count)
goinput$Description <- capitalize(goinput$Description)
library(DOSE)
goinput$GeneRatio<-parse_ratio(goinput$GeneRatio)
x<-goinput$GeneRatio
y<-factor(goinput$Description,levels = goinput$Description)
p <- ggplot(goinput,aes(x,y))
p1 <- p + geom_point(aes(size=Count,color=-1*log(pvalue),shape=ONTOLOGY))+
  scale_color_gradient(low = "#ff8b14", high = "#fe0b00")
p1
p2 <- p1 + labs(color=expression(-log[10](pvalue)),
                size="Gene count",
                x="GeneRatio",
                y="Go Term",
                title="Go enrichment")
p2
p3 <- p2 +theme_bw()+
  theme(plot.margin=unit(c(1,2,1,1),'lines')) 
p3
ggsave("GO.pdf",
       plot = last_plot(),
       width = 12,height = 12)
#the result of KEGG analysis
kegginput <- fread("KEGGid.csv",data.table = F)
kegginput <- kegginput[kegginput$pvalue<0.05,] 
kegginput <- arrange(kegginput,kegginput$ONTOLOGY,kegginput$Count)
kegginput$Description <- capitalize(kegginput$Description)
library(DOSE)
kegginput$GeneRatio<-parse_ratio(kegginput$GeneRatio)
x<-kegginput$GeneRatio
y<-factor(kegginput$Description,levels = kegginput$Description)
p <- ggplot(kegginput,aes(x,y))
p1 <- p + geom_point(aes(size=Count,color=-1*log(pvalue)))+
  scale_color_gradient(low = "#ff8b14", high = "#fe0b00")
p1
p2 <- p1 + labs(color=expression(-log[10](pvalue)),
                size="Gene count",
                x="GeneRatio",
                y="KEGG Term",
                title="KEGG enrichment")
p2
p3 <- p2 +theme_bw()+
  theme(plot.margin=unit(c(1,2,1,1),'lines'))
p3
ggsave("KEGG.pdf",
       plot = last_plot(),
       width = 8,height = 10)

#Cox regression analysis of the 37 DEGs, these data were download from TCGA database (https://portal.gdc.cancer.gov/) and cBioportal database (http://www.cbioportal.org), filenames is "mRNAclinialexp.csv".
#univariate Cox regression analysis
library(survival)
pFilter<-0.05
data<-read.csv("mRNAclinialexp.csv",header=T,check.names=F,row.names = 1)
outTab<-data.frame()
sigGenes<-c("futime","fustat")
for(i in colnames(data[,3:ncol(data)])){
 cox <- coxph(Surv(futime, fustat) ~ data[,i], data = data)
 coxSummary <- summary(cox)
 coxP<-coxSummary$coefficients[,"Pr(>|z|)"]
 if(coxP<pFilter){
     sigGenes<-c(sigGenes,i)
		 outTab<-rbind(outTab,
		              cbind(id=i,
		              HR=coxSummary$conf.int[,"exp(coef)"],
		              HR.95L=coxSummary$conf.int[,"lower .95"],
		              HR.95H=coxSummary$conf.int[,"upper .95"],
		              pvalue=coxSummary$coefficients[,"Pr(>|z|)"])
		              )
  }
}
write.table(outTab,file="uniCox.csv",sep=",",row.names=F,quote=F)
uniSigExp<-data[,sigGenes]
uniSigExp<-cbind(id=row.names(uniSigExp),uniSigExp)
write.table(uniSigExp,file="uniSigExp.csv",sep=",",row.names=F,quote=F)
# map the forest
data <- read.table("uniCox.csv", sep=",",header=T, row.names=1,check.names=F)
gene <- rownames(data)
hr <- sprintf("%.3f",data$"HR")
hrLow  <- sprintf("%.3f",data$"HR.95L")
hrHigh <- sprintf("%.3f",data$"HR.95H")
Hazard.ratio <- paste0(hr,"(",hrLow,"-",hrHigh,")")
pVal <- ifelse(data$pvalue<0.001, "<0.001", sprintf("%.3f", data$pvalue))
pdf(file="uni.forest.pdf", width = 6,height = 4.5)
n <- nrow(data)
nRow <- n+1
ylim <- c(1,nRow)
layout(matrix(c(1,2),nc=2),width=c(3,2))
xlim = c(0,3)
par(mar=c(4,2.5,2,1))
plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,xlab="",ylab="")
text.cex=0.8
text(0,n:1,gene,adj=0,cex=text.cex)
text(1.5-0.5*0.2,n:1,pVal,adj=1,cex=text.cex);text(1.5-0.5*0.2,n+1,'pvalue',cex=text.cex,font=2,adj=1)
text(3,n:1,Hazard.ratio,adj=1,cex=text.cex);text(3,n+1,'Hazard ratio',cex=text.cex,font=2,adj=1,)
par(mar=c(4,1,2,1),mgp=c(2,0.5,0))
xlim <- c(0,max(as.numeric(hrLow),as.numeric(hrHigh)))
plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,ylab="",xaxs="i",xlab="Hazard ratio")
arrows(as.numeric(hrLow),n:1,as.numeric(hrHigh),n:1,angle=90,code=3,length=0.05,col="darkblue",lwd=2.5)
abline(v=1,col="black",lty=2,lwd=2)
boxcolor <- ifelse(as.numeric(hr) > 1, 'red', 'blue')
points(as.numeric(hr), n:1, pch = 15, col = boxcolor, cex=1.3)
axis(1)
dev.off()
#multivariate Cox regression analysis
data<-read.table("uniSigExp.csv",header=T,sep=",",check.names=F,row.names=1) 
#stepwise multivariate Cox regression analysis
multiCox<-coxph(Surv(futime, fustat) ~ ., data = data)
multiCox<-step(multiCox,direction = "both")
multiCoxSum<-summary(multiCox)
outTab<-data.frame()
outTab<-cbind(
             coef=multiCoxSum$coefficients[,"coef"],
             HR=multiCoxSum$conf.int[,"exp(coef)"],
             HR.95L=multiCoxSum$conf.int[,"lower .95"],
             HR.95H=multiCoxSum$conf.int[,"upper .95"],
             pvalue=multiCoxSum$coefficients[,"Pr(>|z|)"])
outTab<-cbind(id=row.names(outTab),outTab)
outTab<-gsub("`","",outTab)
write.table(outTab,file="multiCox.csv",sep=",",row.names=F,quote=F)
# map the forest
data<-read.csv("multiCox.csv",row.names = 1,header = T)
data<-data[,-1]
gene <- rownames(data)
hr <- sprintf("%.3f",data$"HR")
hrLow  <- sprintf("%.3f",data$"HR.95L")
hrHigh <- sprintf("%.3f",data$"HR.95H")
Hazard.ratio <- paste0(hr,"(",hrLow,"-",hrHigh,")")
pVal <- ifelse(data$pvalue<0.001, "<0.001", sprintf("%.3f", data$pvalue))
pdf(file="multi.forest.pdf", width = 6,height = 4.5)
n <- nrow(data)
nRow <- n+1
ylim <- c(1,nRow)
layout(matrix(c(1,2),nc=2),width=c(3,2))
xlim <- c(0,3)
par(mar=c(4,2.5,2,1))
plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,xlab="",ylab="")
text.cex<-0.8
text(0,n:1,gene,adj=0,cex=text.cex)
text(1.5-0.5*0.2,n:1,pVal,adj=1,cex=text.cex);text(1.5-0.5*0.2,n+1,'pvalue',cex=text.cex,font=2,adj=1)
text(3,n:1,Hazard.ratio,adj=1,cex=text.cex);text(3,n+1,'Hazard ratio',cex=text.cex,font=2,adj=1,)
par(mar=c(4,1,2,1),mgp=c(2,0.5,0))
xlim <- c(0,max(as.numeric(hrLow),as.numeric(hrHigh)))
plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,ylab="",xaxs="i",xlab="Hazard ratio")
arrows(as.numeric(hrLow),n:1,as.numeric(hrHigh),n:1,angle=90,code=3,length=0.05,col="darkblue",lwd=2.5)
abline(v=1,col="black",lty=2,lwd=2)
boxcolor <- ifelse(as.numeric(hr) > 1, 'red', 'green')
points(as.numeric(hr), n:1, pch = 15, col = boxcolor, cex=1.3)
axis(1)
dev.off()

#prognostic nomogram, this file is calculated by cox regression analysis and named "Input.csv".
library(rms)
library(foreign)
library(survival)
library(survivalROC)

rt<-read.table("Input.csv",sep=",",header=T,row.names = 1)
rt$futime<-rt$futime/12
 
rt$age<-factor(rt$age,labels = c(">=40","<40"))
rt$T<-factor(rt$T,labels=c("T1","T2","T3-4"))
rt$N<-factor(rt$N,labels=c("N0","N1","N2","N3"))
rt$riskScore<-factor(rt$riskScore,labels = c("low","high"))

ddist <- datadist(rt)
options(datadist='ddist')

cox <- cph(Surv(futime,fustat==1) ~ T+N+riskScore,
           surv=T,x=T, y=T,
           dist='lognormal',
           data=rt) 

surv <- Survival(cox)

sur_3_year<-function(x)surv(1*3,lp=x)
sur_5_year<-function(x)surv(1*5,lp=x)

nom2<-nomogram(cox,
               fun=list(sur_3_year,sur_5_year),
               lp= F,
               funlabel=c('3-Year Survival','5-Year survival'),
               maxscale=100,
               fun.at = c (0.05, seq (0,1, by=0.1),0.95))
pdf("nomogram.pdf",width = 8,height = 5.5)
plot(nom2,xfrac=0.25)
dev.off()

#Calculate the C-index of the nomogram
library(survcomp)
fit<-coxph(Surv(futime,fustat==1) ~ T+N+riskScore,
           data=rt)
c_index<-concordance.index(predict(fit),surv.time = rt$futime,surv.event = rt$fustat,method = "noether")
c_index$c.index;c_index$lower;c_index$upper

#the calibration plots of the nomogram
#3-year 
cox_3<- cph(Surv(futime,fustat==1) ~T+N+riskScore,
            surv=T,x=T, y=T,
            time.inc = 3,
            data=rt) 

cal_3<- calibrate(cox_3, cmethod="KM", method="boot", 
                  u=3,  
                  m=120, 
                  B=468) 

pdf(file = "calibrations.pdf",width = 6,height = 6)
par(mar=c(6,5,1,1))
plot(cal_3,lwd=3,lty=2,errbar.col="black",
     xlim = c(0.6,1),ylim = c(0.5,1),
     col="blue",
     xlab ="Nomogram-Predicted DFS",
     ylab="Observed DFS")
lines(cal_3[,c('mean.predicted','KM')],
      type = 'b',lwd = 3,
      col ="blue",pch = 16)
box(lwd = 1)
abline(0,1,lty = 2,lwd = 2,col = "black")

#5-year
cox_5 <- cph(Surv(futime,fustat==1) ~T+N+riskScore,
             surv=T,x=T, y=T,
             time.inc = 5,
             data=rt) 

cal_5 <- calibrate(cox_5, cmethod="KM", method="boot", 
                   u=5,  
                   m= 120, 
                   B=468)   

plot(cal_5,lwd=3,lty=2,errbar.col="black",
     xlim = c(0.6,1),ylim = c(0.5,1),
     col="red",
     xlab ="Nomogram-Predicted DFS",
     ylab="Observed DFS",
     add = T)
lines(cal_5[,c('mean.predicted','KM')],
      type = 'b',lwd = 3,
      col ="red",pch = 16)
box(lwd = 1)
legend("bottomright",
       c("3 year","5year"),
       lwd=2,bty="n",col=c("blue","red"))
dev.off()    

#Time-dependent ROC analyses of the nomogram 
rm(list = ls())
rt<-read.csv("Input.csv")

#3 year ROC
aucText<-c()
pdf(file = "roc3.pdf",width = 6,height = 6)

predict_time<-3 
myroc<-survivalROC(Stime=rt$futime, 
                   status=rt$fustat, 
                   marker=rt$riskScore, 
                   predict.time=predict_time,
                   method="KM")
plot(myroc$FP,myroc$TP,type="l",
     xlim=c(0,1),ylim=c(0,1),
     col="blue",
     lwd=1.6,
     xlab="1-Specificity", 
     ylab="Sensitivity",
     main="3 year ROC"
)
aucText<-c(aucText,paste0("riskScore"," (AUC=",sprintf("%.3f",myroc$AUC),")"))

myroc1<-survivalROC(Stime=rt$futime, 
                    status=rt$fustat, 
                    marker=rt$N, 
                    predict.time=predict_time,
                    method="KM")
lines(myroc1$FP,myroc1$TP,type="l",
      xlim=c(0,1),ylim=c(0,1),
      col="green",
      lwd=1.6,
      xlab="1-Specificity", 
      ylab="Sensitivity",
      main="3 year ROC"
)
aucText<-c(aucText,paste0("N"," (AUC=",sprintf("%.3f",myroc1$AUC),")"))

myroc3<-survivalROC(Stime=rt$futime, 
                    status=rt$fustat, 
                    marker=rt$T, 
                    predict.time=predict_time,
                    method="KM")
lines(myroc3$FP,myroc3$TP,type="l",
      xlim=c(0,1),ylim=c(0,1),
      col="#FF00CC",
      lwd=1.6,
      xlab="1-Specificity", 
      ylab="Sensitivity",
      main="3 year ROC"
)
aucText<-c(aucText,paste0("T"," (AUC=",sprintf("%.3f",myroc3$AUC),")"))

myroc2<-survivalROC(Stime=rt$futime, 
                    status=rt$fustat, 
                    marker=rt$risk_Score, 
                    predict.time=predict_time,
                    method="KM")

lines(myroc2$FP,myroc2$TP,type="l",
      xlim=c(0,1),ylim=c(0,1),
      col="red",
      lwd=1.6,
      xlab="1-Specificity", 
      ylab="Sensitivity",
      main="3 year ROC")
aucText<-c(aucText,paste0("Combine"," (AUC=",sprintf("%.3f",myroc2$AUC),")"))

legend("bottomright", aucText,lwd=2,bty="n",col=c("blue","green","#FF00CC","red"))

abline(0,1)
dev.off()

#5_year ROC
aucText<-c()
pdf(file = "roc5.pdf",width = 6,height = 6)

predict_time<-5
myroc<-survivalROC(Stime=rt$futime, 
                   status=rt$fustat, 
                   marker=rt$riskScore, 
                   predict.time=predict_time,
                   method="KM")
plot(myroc$FP,myroc$TP,type="l",
     xlim=c(0,1),ylim=c(0,1),
     col="blue",
     lwd=1.6,
     xlab="1-Specificity", 
     ylab="Sensitivity",
     main="3 year ROC"
)
aucText<-c(aucText,paste0("riskScore"," (AUC=",sprintf("%.3f",myroc$AUC),")"))

myroc1<-survivalROC(Stime=rt$futime, 
                    status=rt$fustat, 
                    marker=rt$N, 
                    predict.time=predict_time,
                    method="KM")
lines(myroc1$FP,myroc1$TP,type="l",
      xlim=c(0,1),ylim=c(0,1),
      col="green",
      lwd=1.6,
      xlab="1-Specificity", 
      ylab="Sensitivity",
      main="3 year ROC"
)
aucText<-c(aucText,paste0("N"," (AUC=",sprintf("%.3f",myroc1$AUC),")"))

myroc3<-survivalROC(Stime=rt$futime, 
                    status=rt$fustat, 
                    marker=rt$T, 
                    predict.time=predict_time,
                    method="KM")
lines(myroc3$FP,myroc3$TP,type="l",
      xlim=c(0,1),ylim=c(0,1),
      col="#FF00CC",
      lwd=1.6,
      xlab="1-Specificity", 
      ylab="Sensitivity",
      main="3 year ROC"
)
aucText<-c(aucText,paste0("T"," (AUC=",sprintf("%.3f",myroc3$AUC),")"))
myroc2<-survivalROC(Stime=rt$futime, 
                    status=rt$fustat, 
                    marker=rt$risk_Score, 
                    predict.time=predict_time,
                    method="KM")

lines(myroc2$FP,myroc2$TP,type="l",
      xlim=c(0,1),ylim=c(0,1),
      col="red",
      lwd=1.6,
      xlab="1-Specificity", 
      ylab="Sensitivity",
      main="3 year ROC")
aucText<-c(aucText,paste0("Combine"," (AUC=",sprintf("%.3f",myroc2$AUC),")"))
legend("bottomright", aucText,lwd=2,bty="n",col=c("blue","green","#FF00CC","red"))

abline(0,1)
dev.off()