##############################Before methylation data screening####################################

rm(list = ls())
options(stringsAsFactors = F)

require(GEOquery)
require(Biobase)
library("impute")

info=read.table("Effective sample end of cancer.txt",sep="\t",header=T)
library(data.table)
b=info
rownames(b)=b[,1]
a=fread("datExpr0 end.txt",data.table = F )
a[1:4,1:4]
rownames(a)=a[,1]
a=a[,-1]
beta=as.matrix(a)
beta=impute.knn(beta)
betaData=beta$data
betaData=betaData+0.00001
a=betaData
a[1:4,1:4]
identical(colnames(a),rownames(b))
boxplot(a,las=2)
# Make sure that the methylation signal value matrix is one-to-one corresponding to the phenotypic information
library(ChAMP)
# The beta signal value matrix cannot have NA value
myLoad=champ.filter(beta = a,pd = b)
myLoad
save(myLoad,file = 'step1-output.Rdata')


rm(list = ls())   
options(stringsAsFactors = F)
library("ChAMP")
library("minfi")
require(GEOquery)
require(Biobase)
load(file = 'step1-output.Rdata')

myLoad  
# The time-consuming steps are commented out after running once
if(F){
  myNorm <- champ.norm(beta=myLoad$beta,arraytype="450K",cores=5)
  dim(myNorm) 
  pD=myLoad$pd
  save(myNorm,pD,file = 'step2-champ_myNorm.Rdata')
}
load(file = 'step2-champ_myNorm.Rdata')
# The original 450K is 400K after quality control filtration
beta.m=myNorm
group_list=myLoad$pd$group
dim(beta.m) 
# The following are the quality control measures of the three standard pictures of the expression matrix
if(T){
  
  
  dat=t(beta.m)
  dat[1:4,1:4] 
  library("FactoMineR")#These two packages need to be loaded to draw the principal component analysis diagram
  library("factoextra")  
  # Because the methylation chip is 450K or 850K, hundreds of thousands of lines of methylation sites, PCA will not be too fast
  dat.pca <- PCA(dat , graph = FALSE) 
  fviz_pca_ind(dat.pca,
               geom.ind = "point", # show points only (nbut not "text")
               col.ind = group_list, # color by groups
               # palette = c("#00AFBB", "#E7B800"),
               addEllipses = TRUE, # Concentration ellipses
               legend.title = "Groups"
  )
  ggsave('all_samples_PCA.png')
  
  dat=beta.m
  dat[1:4,1:4] 
  cg=names(tail(sort(apply(dat,1,sd)),1000))#apply by line（'1'is retrieved by row，'2'is taken by column）take the variance of each row, from the smallest to the largest, and take the largest 1000
  library(pheatmap)
  pheatmap(dat[cg,],show_colnames =F,show_rownames = F) #Take out each row of the 1000 genes extracted and combine them into a new expression matrix
  n=t(scale(t(dat[cg,]))) # 'scale'can normalize the log-ratio value
  n[n>2]=2 
  n[n< -2]= -2
  n[1:4,1:4]
  pheatmap(n,show_colnames =F,show_rownames = F)
  ac=data.frame(group=group_list)
  rownames(ac)=colnames(n)  
  pheatmap(n,show_colnames =F,show_rownames = F,
           annotation_col=ac,filename = 'heatmap_top1000_sd.png')
  dev.off()
  
  exprSet=beta.m
  pheatmap::pheatmap(cor(exprSet)) 
  # The similarity of samples within the group should be higher than that between groups!
  colD=data.frame(group_list=group_list)
  rownames(colD)=colnames(exprSet)
  pheatmap::pheatmap(cor(exprSet),
                     annotation_col = colD,
                     show_rownames = F,
                     filename = 'cor_all.png')
  dev.off() 
  exprSet=exprSet[names(sort(apply(exprSet, 1,mad),decreasing = T)[1:500]),]
  dim(exprSet)
  # M=cor(log2(exprSet+1)) 
  M=cor(exprSet)
  pheatmap::pheatmap(M,annotation_col = colD)
  pheatmap::pheatmap(M,
                     show_rownames = F,
                     annotation_col = colD,
                     filename = 'cor_top500.png')
  dev.off() 
  
}


###################################After methylation data screening####################################

rm(list = ls())
options(stringsAsFactors = F)

require(GEOquery)
require(Biobase)
library("impute")

info=read.table("Effective sample end of cancer.txt",sep="\t",header=T)
library(data.table)
b=info
rownames(b)=b[,1]
a=fread("datExpr0 end.txt",data.table = F )

a[1:4,1:4]
rownames(a)=a[,1]
a=a[,-1]
beta=as.matrix(a)
beta=impute.knn(beta)
betaData=beta$data
betaData=betaData+0.00001
a=betaData
a[1:4,1:4]
identical(colnames(a),rownames(b))
boxplot(a,las=2)
# Make sure that the methylation signal value matrix is one-to-one corresponding to the phenotypic information
library(ChAMP)
# The beta signal value matrix cannot have NA value
myLoad=champ.filter(beta = a,pd = b)
myLoad
save(myLoad,file = 'step1-output.Rdata')


rm(list = ls())   
options(stringsAsFactors = F)
library("ChAMP")
library("minfi")
require(GEOquery)
require(Biobase)
load(file = 'step1-output.Rdata')

myLoad  
# The time-consuming steps are commented out after running once
if(F){
  myNorm <- champ.norm(beta=myLoad$beta,arraytype="450K",cores=5)
  dim(myNorm) 
  pD=myLoad$pd
  save(myNorm,pD,file = 'step2-champ_myNorm.Rdata')
}
load(file = 'step2-champ_myNorm.Rdata')
# The original 450K is 400K after quality control filtration
beta.m=myNorm
group_list=myLoad$pd$group
dim(beta.m) 
# The following are the quality control measures of the three standard pictures of the expression matrix
if(T){
  
  
  dat=t(beta.m)
  dat[1:4,1:4] 
  library("FactoMineR")#These two packages need to be loaded to draw the principal component analysis diagram
  library("factoextra")  
  # Because the methylation chip is 450K or 850K, hundreds of thousands of lines of methylation sites, PCA will not be too fast
  dat.pca <- PCA(dat , graph = FALSE) 
  fviz_pca_ind(dat.pca,
               geom.ind = "point", # show points only (nbut not "text")
               col.ind = group_list, # color by groups
               # palette = c("#00AFBB", "#E7B800"),
               addEllipses = TRUE, # Concentration ellipses
               legend.title = "Groups"
  )
  ggsave('all_samples_PCA.png')
  
  dat=beta.m
  dat[1:4,1:4] 
  cg=names(tail(sort(apply(dat,1,sd)),1000))#apply by line（'1'is retrieved by row，'2'is taken by column）take the variance of each row, from the smallest to the largest, and take the largest 1000
  library(pheatmap)
  pheatmap(dat[cg,],show_colnames =F,show_rownames = F) #Take out each row of the 1000 genes extracted and combine them into a new expression matrix
  n=t(scale(t(dat[cg,]))) # 'scale'can normalize the log-ratio value
  n[n>2]=2 
  n[n< -2]= -2
  n[1:4,1:4]
  pheatmap(n,show_colnames =F,show_rownames = F)
  ac=data.frame(group=group_list)
  rownames(ac)=colnames(n)  
  pheatmap(n,show_colnames =F,show_rownames = F,
           annotation_col=ac,filename = 'heatmap_top1000_sd.png')
  dev.off()
  
  exprSet=beta.m
  pheatmap::pheatmap(cor(exprSet)) 
  # The similarity of samples within the group should be higher than that between groups!
  colD=data.frame(group_list=group_list)
  rownames(colD)=colnames(exprSet)
  pheatmap::pheatmap(cor(exprSet),
                     annotation_col = colD,
                     show_rownames = F,
                     filename = 'cor_all.png')
  dev.off() 
  exprSet=exprSet[names(sort(apply(exprSet, 1,mad),decreasing = T)[1:500]),]
  dim(exprSet)
  # M=cor(log2(exprSet+1)) 
  M=cor(exprSet)
  pheatmap::pheatmap(M,annotation_col = colD)
  pheatmap::pheatmap(M,
                     show_rownames = F,
                     annotation_col = colD,
                     filename = 'cor_top500.png')
  dev.off() 
  
}

#save(dat.pca,pD,file = 'dat.pca.Rdata')


###################################Methylation difference analysis#####################
rm(list = ls())   
options(stringsAsFactors = F)
library("ChAMP")
library("minfi")
require(GEOquery)
require(Biobase)
load(file = 'step1-output.Rdata')
myLoad    # The methylation signal matrix and phenotype information are stored.
load(file = 'step2-champ_myNorm.Rdata')
group_list=myLoad$pd$group
table(group_list)
myDMP <- champ.DMP(beta = myNorm,pheno=group_list,adjPVal = 0.05,#threshold
                   adjust.method = "BH",##P-value adjustment method for limma analysis (default=BH (Benjamin Hochberg)）
                   arraytype="EPIC")
head(myDMP[[1]])
save(myDMP,file = 'step3-output-myDMP.Rdata')
write.table(myDMP,file="ALLDMP_analysis_result.txt",quote=T,row.names = T)
# You can also use the interactive interface to modify the threshold and adjust the differential probe
# DMP.GUI(DMP=myDMP[[1]],beta=myNorm,group_list)

rm(list = ls())   
options(stringsAsFactors = F)
library("ChAMP")
library("minfi")
require(GEOquery)
require(Biobase)

load(file = 'step3-output-myDMP.Rdata')
deg=myDMP[[1]]
head(deg)
length(unique(deg$gene)) 
deg$g=ifelse(abs(deg$logFC) < 0,'stable',
             ifelse(deg$logFC > 0,'UP','DOWN'))
table(deg$g)
head(deg)
deg$symbol=deg$gene
write.table(deg,file="champDiff.xls",sep="\t",quote=F)



########################Methylated differential volcanic map######################
library(ggplot2)
gene<-read.delim("volcanic map.txt",sep = "\t",stringsAsFactors = FALSE)

#Here we customize the difference type according to | log2FC |>=2 and adj.P.Val<0.01
gene[which(gene$adj.P.Val < 0.05 & gene$logFC <= -0),'sig'] <- 'Down'
gene[which(gene$adj.P.Val < 0.05 & gene$logFC >= 0),'sig'] <- 'Up'
gene[which(gene$adj.P.Val >= 0.05 | abs(gene$logFC) < 0),'sig'] <- 'None'

#Horizontal axis log2FC, vertical axis - log10 (adj. P.Val), color indicates difference
p <- ggplot(gene, aes(x = logFC, y = -log10(adj.P.Val), color = sig)) +
  geom_point(alpha = 0.6, size = 1) + xlim(-2, 2) + ylim(0, 10) +
  labs(x = '\nLog2 Fold Change', y = '-log10 FDR\n', color = '') +
  scale_colour_manual(values  = c('red2', '#2F5688', 'gray'), limits = c('Up', 'Down', 'None')) +
  theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black',
                                                                      fill = 'transparent'), 
        plot.title = element_text(hjust = 0.5)) +
  theme(legend.key = element_rect(fill = 'transparent'), legend.background = element_rect(fill = 'transparent')) +
  geom_vline(xintercept = c(-0, 0), color = 'black', size = 0.6,lty=2) +
  geom_hline(yintercept = -log(0.05, 10), color = 'black', size = 0.6,lty=2) 
p

################mRNA differential expression#####################
library(limma)
#setwd("C:\\Users\\18768\\Desktop\\Hypoxia and immunity of cervical cancer\\7. High and low grouping\\4. Differential expression")
rt<-read.table("Bladder urothelial carcinoma 363 236 for N0 compare to 127 for N1-N3.txt",header = T,row.names = 1,sep = "\t")
group <- c(rep("N0",236),rep("N",127)) 
group <- factor(group,levels = c("N0","N"),ordered = F)
design<- model.matrix(~group)
colnames(design) <- levels(group)
design
fit <- lmFit(rt, design)
fit2 <- eBayes(fit)
allDiff<-topTable(fit2,coef=2,n=nrow(fit2))
head(allDiff)
write.table(allDiff,"Differential expression mRNA.csv",sep = ",")

logFoldChange=0
adjustP=0.05

diffSig <- allDiff[with(allDiff, (abs(logFC)>=logFoldChange & adj.P.Val < adjustP )), ]
write.table(diffSig,file="diff.xls",sep="\t",quote=F)
diffUp <- allDiff[with(allDiff, (logFC>=logFoldChange & adj.P.Val < adjustP )), ]
write.table(diffUp,file="up.xls",sep="\t",quote=F)
diffDown <- allDiff[with(allDiff, (logFC<=(-logFoldChange) & adj.P.Val < adjustP )), ]
write.table(diffDown,file="down.xls",sep="\t",quote=F)

#############Volcanic map##########
library(ggplot2)
gene<-read.delim("Volcanic map.txt",sep = "\t",stringsAsFactors = FALSE)

#Here we customize the difference type according to | log2FC |>=2 and adj.P.Val<0.01
gene[which(gene$adj.P.Val < 0.05 & gene$logFC <= -0),'sig'] <- 'Down'
gene[which(gene$adj.P.Val < 0.05 & gene$logFC >= 0),'sig'] <- 'Up'
gene[which(gene$adj.P.Val >= 0.05 | abs(gene$logFC) < 0),'sig'] <- 'None'

#Horizontal axis log2FC, vertical axis - log10 (adj. P.Val), color indicates difference
p <- ggplot(gene, aes(x = logFC, y = -log10(adj.P.Val), color = sig)) +
  geom_point(alpha = 0.6, size = 1) + xlim(-2, 2) + ylim(0, 10) +
  labs(x = '\nLog2 Fold Change', y = '-log10 FDR\n', color = '') +
  scale_colour_manual(values  = c('red2', '#2F5688', 'gray'), limits = c('Up', 'Down', 'None')) +
  theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black',
                                                                      fill = 'transparent'), 
        plot.title = element_text(hjust = 0.5)) +
  theme(legend.key = element_rect(fill = 'transparent'), legend.background = element_rect(fill = 'transparent')) +
  geom_vline(xintercept = c(-0, 0), color = 'black', size = 0.6,lty=2) +
  geom_hline(yintercept = -log(0.05, 10), color = 'black', size = 0.6,lty=2) 
p


############################## GO enrichment #################
library(ggplot2)
pathway<-read.table("GO enrichment.txt",header=T,sep="\t",quote="")
p = ggplot(pathway,aes(GeneRatio,Term))
p=p + geom_point(aes(size=Count))
pbubble = p+ geom_point(aes(size=Count,color=-1*log10(PValue)))
pr = pbubble+scale_color_gradient(low="blue",high = "red")
pr = pr+labs(color=expression(-log[10](PValue)),size="Count",
             x="GeneRatio",y="GO Terms")
pr=pr+ facet_grid(Category ~ .,scale="free")
p=pr+ theme_bw()
p+theme(axis.text.x   = element_text(color = "black", size = 12, angle = 0),#X-axis dimension size
        axis.text.y   = element_text(color = "black", size = 10.5, angle = 0),#y-axis dimension size
        axis.title.y  = element_text(color = "black", size = 11, angle = 90))

########################### Survival analysis of regional lymph node metastasis grouping ##################
data<-read.table("LASSO Cox result.csv",header = T,row.names = 1,sep = ",")
library(survival)
library(survminer)
library(ggplot2)
fit<-survfit(Surv(time,status) ~ group,data = data)
p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #add median survival curve
               palette = c("#00468B","#ED0000"), #change the color of the line
               legend.labs=c("N0","N1-3"), #label
               legend.title="riskScore",
               #title="Overall suvival", #title
               xlab="Time(Years)",ylab="Cumulative Survival(percentage)",
               break.x.by=2, #abscissa interval
               censor.shape=124,censor.size=2, #Shape and size of deleted points
               risk.table = T,tables.height = 0.3,
               tables.theme = theme_cleantable(),
               ggtheme = theme_bw(),xlim = c(0, 14),
               conf.int = T,#add confidence interval
               pval = T,pval.coord=c(0,0.20)# add P value and adjust P value position
)
p3

######################### Univariate Cox analysis results showed univariate Cox and forest maps of related genes#########################
library("survival")
library("survminer")
library(readr)
library(forestplot)
data<-read.table("LASSO.txt",header = T,sep = "\t",row.names = 1)
uniTab=data.frame()
for(i in colnames(data[,3:ncol(data)])){
  cox <- coxph(Surv(time, status) ~ data[[i]], data = data)
  coxSummary = summary(cox)
  uniTab=rbind(uniTab,
               cbind(id=i,
                     HR=coxSummary$conf.int[,"exp(coef)"],
                     HR.95L=coxSummary$conf.int[,"lower .95"],
                     HR.95H=coxSummary$conf.int[,"upper .95"],
                     #logtest.pvalue=coxSummary$logtest["pvalue"],
                     #wald.pvalue=coxSummary$waldtest["pvalue"],
                     pvalue=coxSummary$coefficients[,"Pr(>|z|)"])
  )
}
write.csv(uniTab,"Univariate Cox analysis results.csv")

data<-read_tsv("forest map.txt")
forest_table <- cbind(c("Symbol", data$Symbol),
                      c("Hazard Ratio (95% CI)", data$HR),
                      c("Pvalue", data$pvalue))
csize <- data.frame(mean=c(NA, as.numeric(data$HR2)),
                    lower=c(NA, as.numeric(data$`HR.95L`)),
                    upper=c(NA, as.numeric(data$`HR.95H`)))
forestplot(labeltext = forest_table, 
           csize,
           graph.pos = 3, #forest map position
           graphwidth = unit(10, "cm"), #width of the forest diagram in the icon
           zero = 1, #the position of the datum line in the forest map
           cex = 0.9,
           lineheight = unit(1.2,"cm"), #row height
           boxsize = 0.2, #square size
           lty.ci=2, #the line type of the confidence interval
           fn.ci_norm = fpDrawNormalCI, #Point shape，fpDrawNormalCIf square，fpDrawDiamondCI rhombus
           lwd.ci = 1, #width of confidence interval line
           ci.vertices = TRUE,#add vertical lines at both ends of the confidence interval
           lwd.xaxis = 1,  #X axis width
           colgap = unit(0.7,'cm'), #column spacing
           xlab = "Hazard_Ratio", #abscissa label
           #xticks = c(0.5,1,1.5), # X-axis scale spacing
           hrzl_lines = list("2" = gpar(lwd=1, col = "#8B008B"),
                             "14" = gpar(lwd=1, col = "#8B008B")),#Set the type and influence range of lines in the table
           ci.vertices.height = 0.1, #Height of vertical lines at both ends of the confidence interval
           col = fpColors(box = "#458B00", 
                          line = "black",
                          zero = "#7AC5CD")) #Color of squares and lines

############################ LASSO Cox #######################################
library("glmnet") 
library("survival")
### Read data
data<-read.table("LASSO.txt",header = T,row.names = 1,sep = "\t")
### LASSO Cox regression model was constructed
#set.seed(6531)  
x=as.matrix(data[,c(3:ncol(data))]) 
y=data.matrix(Surv(data$time,data$status)) 
fit=glmnet(x, y, family = "cox", maxit = 10000) 
plot(fit, xvar = "lambda", label = F)
cvfit = cv.glmnet(x, y, family="cox", maxit = 10000) 
plot(cvfit) 
abline(v = log(c(cvfit$lambda.min,cvfit$lambda.1se)),lty="dashed") 
### Then, output the correlation coefficient and riskScore of prediction model
### correlation coefficient
coef = coef(fit, s = cvfit$lambda.min) 
index = which(coef != 0) 
actCoef = coef[index] 
lassoGene = row.names(coef)[index] 
geneCoef = cbind(Gene=lassoGene,Coef=actCoef) 
geneCoef  # View the correlation coefficient of the model
### Calculate riskScore
FinalGeneExp = data[,lassoGene] 
myFun = function(x){crossprod(as.numeric(x),actCoef)} 
riskScore = apply(FinalGeneExp,1,myFun) 
outCol = c("time", "status", lassoGene) 
risk = as.vector(ifelse(riskScore > median(riskScore), "high", "low")) 
dat = cbind(data[,outCol], riskScore=as.vector(riskScore), risk)
write.csv(dat,"LASSO Cox result.csv")

########################### Survival of patients in high and low risk score of training group##################
data<-read.table("LASSO Cox result.csv",header = T,row.names = 1,sep = ",")
library(survival)
library(survminer)
library(ggplot2)
fit<-survfit(Surv(time,status) ~ risk,data = data)
p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #add median survival curve
               palette = c("#ED0000","#00468B"), #change the color of the line
               legend.labs=c("High Score","Low Score"), #label
               legend.title="riskScore",
               #title="Overall suvival", #title
               xlab="Time(Years)",ylab="Cumulative Survival(percentage)",
               break.x.by=2, #abscissa interval
               censor.shape=124,censor.size=2, #shape and size of deleted points
               risk.table = T,tables.height = 0.3,
               tables.theme = theme_cleantable(),
               ggtheme = theme_bw(),xlim = c(0, 16),
               conf.int = T,#add confidence interval
               pval = T,pval.coord=c(0,0.20)# add P value and adjust P value position
)
p3

######################### Risk factors of training group ########################
library(ggplot2)
library(pheatmap)
library(gridExtra)
library(ggplotify)
#install.packages("ggplotify")
data<-read.csv("risk factors.txt",header = T,sep = "\t",row.names = 1)
#### Figure 1
p1=ggplot(data = data,aes(x=ID,y=riskScore))+geom_point(aes(color=risk))+
  scale_colour_manual(values = c("#E20B0B","#0B45A5"))+
  theme_bw()+labs(x="",y="Risk score")+
  geom_hline(yintercept=median(data$riskScore),colour="black", linetype="dotted",size=0.8)+
  geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8)
p1
#### Figure 2
p2=ggplot(data,aes(x=ID,y=time))+geom_point(aes(col=status))+theme_bw()+
  scale_colour_manual(values = c("#0B45A5","#E20B0B"))+
  labs(x="",y="Survival time(year)")+
  geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8)
p2
##### Figure 3
exp_dat<-data.frame(AKAP7=data$AKAP7,ARHGAP29=data$ARHGAP29,EFEMP1=data$EFEMP1,EPN2=data$EPN2,
                    LAMA2=data$LAMA2,RPS6KA1=data$RPS6KA1,SLC1A6=data$SLC1A6,STIM2=data$STIM2,
                    SULT1C2=data$SULT1C2,TMEM109=data$TMEM109,TRABD=data$TRABD,ZNRD1=data$ZNRD1)


mycolors <- colorRampPalette(c("#08147B", "white", "#CA2F33"), bias = 1.2)(100)
tmp=data.frame((scale(exp_dat)))
rownames(tmp)<-rownames(data)
tmp[tmp > 2] = 2
tmp[tmp < -2] = -2
tmp2<-t(tmp)
annotation_col = data.frame(rsikScore = factor(c(rep("Low",182),rep("High",181))))
rownames(annotation_col)=colnames(tmp2)
ann_colors=list(rsikScore=c(Low="#0B45A5",High="#E20B0B"))
p3=pheatmap(tmp2,col= mycolors,show_colnames = F,cluster_cols = F,annotation_col = annotation_col,annotation_colors = ann_colors)
#### Jigsaw to realize linkage of three pictures
plots = list(p1,p2,as.ggplot(as.grob(p3)))
lay1 = rbind(c(rep(1,7)),c(rep(2,7)),c(rep(3,7))) #Layout matrix
grid.arrange(grobs = plots, layout_matrix = lay1, heigths = c(2,2,2),weights=c(10,10,10))

############################### Training group time dependent ROC #############################
rt<-read.csv("time dependent ROC.txt",header = T,row.names = 1,sep = "\t")
library(timeROC)
library(survival)
#install.packages("timeROC")
ROC_rt<-timeROC(T=rt$time,delta=rt$status,
                marker=rt$riskScore,cause=1,
                weighting="marginal",
                times=c(1,3,5),ROC=TRUE)
ROC_rt
plot(ROC_rt,time=1,title=FALSE,lwd=2,col = "#DAAC36")
plot(ROC_rt,time=3,col="#3397CE",add=TRUE,title=FALSE,lwd=2)
plot(ROC_rt,time=5,col="#EE2A29",add=TRUE,title=FALSE,lwd=2)
legend("bottomright",
       c(paste0("AUC at 1 years: ",round(ROC_rt$AUC[1],3)),
         paste0("AUC at 3 years: ",round(ROC_rt$AUC[2],3)),
         paste0("AUC at 5 years: ",round(ROC_rt$AUC[3],3))),
       col=c("#DAAC36","#3397CE","#EE2A29"),lwd=2,bty = "n")

########################### Survival of patients in high and low risk score of validation group 1 ##################
data<-read.table("LASSO Cox result.csv",header = T,row.names = 1,sep = ",")
library(survival)
library(survminer)
library(ggplot2)
fit<-survfit(Surv(time,status) ~ risk,data = data)
p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #add median survival curve
               palette = c("#ED0000","#00468B"), #change the color of the line
               legend.labs=c("High Score","Low Score"), #label
               legend.title="riskScore",
               #title="Overall suvival", #title
               xlab="Time(Years)",ylab="Cumulative Survival(percentage)",
               break.x.by=1, #Abscissa interval
               censor.shape=124,censor.size=2, #Shape and size of deleted points
               risk.table = T,tables.height = 0.3,
               tables.theme = theme_cleantable(),
               ggtheme = theme_bw(),xlim = c(0, 8),
               conf.int = T,#Add confidence interval
               pval = T,pval.coord=c(0,0.20)# Add P value and adjust P value position
)
p3
############################### Validation group 1 time dependent ROC #############################
rt<-read.csv("time dependent ROC.txt",header = T,row.names = 1,sep = "\t")
library(timeROC)
library(survival)
#install.packages("timeROC")
ROC_rt<-timeROC(T=rt$time,delta=rt$status,
                marker=rt$riskScore,cause=1,
                weighting="marginal",
                times=c(1,3,5),ROC=TRUE)
ROC_rt
plot(ROC_rt,time=1,title=FALSE,lwd=2,col = "#DAAC36")
plot(ROC_rt,time=3,col="#3397CE",add=TRUE,title=FALSE,lwd=2)
plot(ROC_rt,time=5,col="#EE2A29",add=TRUE,title=FALSE,lwd=2)
legend("bottomright",
       c(paste0("AUC at 1 years: ",round(ROC_rt$AUC[1],3)),
         paste0("AUC at 3 years: ",round(ROC_rt$AUC[2],3)),
         paste0("AUC at 5 years: ",round(ROC_rt$AUC[3],3))),
       col=c("#DAAC36","#3397CE","#EE2A29"),lwd=2,bty = "n")
######################### Risk factors of validation group 1 ########################
library(ggplot2)
library(pheatmap)
library(gridExtra)
library(ggplotify)
#install.packages("ggplotify")
data<-read.csv("risk factors.txt",header = T,sep = "\t",row.names = 1)
#### Figure 1
p1=ggplot(data = data,aes(x=ID,y=riskScore))+geom_point(aes(color=risk))+
  scale_colour_manual(values = c("#E20B0B","#0B45A5"))+
  theme_bw()+labs(x="",y="Risk score")+
  geom_hline(yintercept=median(data$riskScore),colour="black", linetype="dotted",size=0.8)+
  geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8)
p1
#### Figure 2
p2=ggplot(data,aes(x=ID,y=time))+geom_point(aes(col=status))+theme_bw()+
  scale_colour_manual(values = c("#0B45A5","#E20B0B"))+
  labs(x="",y="Survival time(year)")+
  geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8)
p2
##### Figure 3
exp_dat<-data.frame(AKAP7=data$AKAP7,ARHGAP29=data$ARHGAP29,EFEMP1=data$EFEMP1,EPN2=data$EPN2,
                    LAMA2=data$LAMA2,RPS6KA1=data$RPS6KA1,SLC1A6=data$SLC1A6,STIM2=data$STIM2,
                    SULT1C2=data$SULT1C2,TMEM109=data$TMEM109,TRABD=data$TRABD,ZNRD1=data$ZNRD1)


mycolors <- colorRampPalette(c("#08147B", "white", "#CA2F33"), bias = 1.2)(100)
tmp=data.frame((scale(exp_dat)))
rownames(tmp)<-rownames(data)
tmp[tmp > 2] = 2
tmp[tmp < -2] = -2
tmp2<-t(tmp)
annotation_col = data.frame(rsikScore = factor(c(rep("Low",19),rep("High",19))))
rownames(annotation_col)=colnames(tmp2)
ann_colors=list(rsikScore=c(Low="#0B45A5",High="#E20B0B"))
p3=pheatmap(tmp2,col= mycolors,show_colnames = F,cluster_cols = F,annotation_col = annotation_col,annotation_colors = ann_colors)
#### Jigsaw to realize linkage of three pictures
plots = list(p1,p2,as.ggplot(as.grob(p3)))
lay1 = rbind(c(rep(1,7)),c(rep(2,7)),c(rep(3,7))) #Layout matrix
grid.arrange(grobs = plots, layout_matrix = lay1, heigths = c(2,2,2),weights=c(10,10,10))

########################### Survival of patients in high and low risk score of validation group 2 ##################
data<-read.table("LASSO Cox result.csv",header = T,row.names = 1,sep = ",")
library(survival)
library(survminer)
library(ggplot2)
fit<-survfit(Surv(time,status) ~ risk,data = data)
p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #Add median survival curve
               palette = c("#ED0000","#00468B"), #Change the color of the line
               legend.labs=c("High Score","Low Score"), #label
               legend.title="riskScore",
               #title="Overall suvival", #title
               xlab="Time(Years)",ylab="Cumulative Survival(percentage)",
               break.x.by=1, #Abscissa interval
               censor.shape=124,censor.size=2, #Shape and size of deleted points
               risk.table = T,tables.height = 0.3,
               tables.theme = theme_cleantable(),
               ggtheme = theme_bw(),xlim = c(0, 12),
               conf.int = T,#Add confidence interval
               pval = T,pval.coord=c(0,0.20)# Add P value and adjust P value position
)
p3

############################### Validation group 2 time dependent ROC #############################
rt<-read.csv("时间依赖ROC.txt",header = T,row.names = 1,sep = "\t")
library(timeROC)
library(survival)
#install.packages("timeROC")
ROC_rt<-timeROC(T=rt$time,delta=rt$status,
                marker=rt$riskScore,cause=1,
                weighting="marginal",
                times=c(1,3,5),ROC=TRUE)
ROC_rt
plot(ROC_rt,time=1,title=FALSE,lwd=2,col = "#DAAC36")
plot(ROC_rt,time=3,col="#3397CE",add=TRUE,title=FALSE,lwd=2)
plot(ROC_rt,time=5,col="#EE2A29",add=TRUE,title=FALSE,lwd=2)
legend("bottomright",
       c(paste0("AUC at 1 years: ",round(ROC_rt$AUC[1],3)),
         paste0("AUC at 3 years: ",round(ROC_rt$AUC[2],3)),
         paste0("AUC at 5 years: ",round(ROC_rt$AUC[3],3))),
       col=c("#DAAC36","#3397CE","#EE2A29"),lwd=2,bty = "n")

######################### Risk factors of validation group 2 ########################
library(ggplot2)
library(pheatmap)
library(gridExtra)
library(ggplotify)
#install.packages("ggplotify")
data<-read.csv("Risk factors.txt",header = T,sep = "\t",row.names = 1)
#### Figure 1
p1=ggplot(data = data,aes(x=ID,y=riskScore))+geom_point(aes(color=risk))+
  scale_colour_manual(values = c("#E20B0B","#0B45A5"))+
  theme_bw()+labs(x="",y="Risk score")+
  geom_hline(yintercept=median(data$riskScore),colour="black", linetype="dotted",size=0.8)+
  geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8)
p1
#### Figure 2
p2=ggplot(data,aes(x=ID,y=time))+geom_point(aes(col=status))+theme_bw()+
  scale_colour_manual(values = c("#0B45A5","#E20B0B"))+
  labs(x="",y="Survival time(year)")+
  geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8)
p2
##### Figure 3
exp_dat<-data.frame(AKAP7=data$AKAP7,ARHGAP29=data$ARHGAP29,EFEMP1=data$EFEMP1,EPN2=data$EPN2,
                    LAMA2=data$LAMA2,RPS6KA1=data$RPS6KA1,SLC1A6=data$SLC1A6,STIM2=data$STIM2,
                    SULT1C2=data$SULT1C2,TMEM109=data$TMEM109,TRABD=data$TRABD,ZNRD1=data$ZNRD1)


mycolors <- colorRampPalette(c("#08147B", "white", "#CA2F33"), bias = 1.2)(100)
tmp=data.frame((scale(exp_dat)))
rownames(tmp)<-rownames(data)
tmp[tmp > 2] = 2
tmp[tmp < -2] = -2
tmp2<-t(tmp)
annotation_col = data.frame(rsikScore = factor(c(rep("Low",82),rep("High",82))))
rownames(annotation_col)=colnames(tmp2)
ann_colors=list(rsikScore=c(Low="#0B45A5",High="#E20B0B"))
p3=pheatmap(tmp2,col= mycolors,show_colnames = F,cluster_cols = F,annotation_col = annotation_col,annotation_colors = ann_colors)
#### Jigsaw to realize linkage of three pictures
plots = list(p1,p2,as.ggplot(as.grob(p3)))
lay1 = rbind(c(rep(1,7)),c(rep(2,7)),c(rep(3,7))) #Layout matrix
grid.arrange(grobs = plots, layout_matrix = lay1, heigths = c(2,2,2),weights=c(10,10,10))

########################## Survival curve of signature genes ####################
library(survival)
library(survminer)
data<-read.table("Signature gene survival analysis.txt",header = T,sep = "\t",row.names = 1)
for (gene in colnames(data)[3:ncol(data)]) {
  group<-ifelse(data[[gene]]>median(data[[gene]]),
                "high","low")
  diff=survdiff(Surv(time,status) ~ group , data = data)
  pValue=1-pchisq(diff$chisq,df=1)
  if(pValue<0.001){
    pValue="p<0.001"
  }else{
    pValue=paste0("p=",sprintf("%.03f",pValue))
  }
  fit <- survfit(Surv(time, status) ~ group, data = data)
  surPlot<-ggsurvplot(fit,
                      #surv.median.line = "hv", #Add median survival curve
                      data=data,
                      conf.int=TRUE,
                      pval=pValue,
                      pval.size=5,
                      legend.labs=c("High","Low"),
                      legend.title=gene,
                      xlab="Time (years)",
                      ylab="overall survival",
                      break.time.by = 2,
                      #tables.theme = theme_cleantable(),
                      #ggtheme = theme_bw(),
                      risk.table.title="",
                      palette = c("#d7191c","#2b83ba"),
                      risk.table = F,risk.table.height=.35)
  pdf(file = paste0( gene,".pdf"),onefile = F,width = 6,height = 5.5)
  print(surPlot)
  dev.off()
}

############################## Boxplots of signature genes in cancer and adjacent cancer in training group #################
library(ggpubr)
library(magrittr)
library(ggsignif)
library(tidyverse)
data<-read.table("Boxplots.csv",header=T,sep=",",row.names=1,check.names = F)
data2 <- data %>%
  #add_column(group = rep(c("Control","BPD"), c(14,17)), .before = 1) %>%
  gather("gene","expression", -group) %>%
  mutate(gene = factor(gene, levels = colnames(data))) %>%
  mutate(group = factor(group, levels = c("Control","BLCA")))
ggboxplot(data2, "gene", "expression", fill = "group", 
          xlab="",ylab="Expression",
          palette = c('#2F5688', 'red2')) +
  stat_compare_means(aes(group=group),label = "p.signif",method = "wilcox.test")+
  theme(
    plot.title    = element_text(color = "black", size   = 10, hjust = 0.5),
    plot.subtitle = element_text(color = "black", size   = 10,hjust = 0.5),
    plot.caption  = element_text(color = "black", size   = 10,face = "italic", hjust = 1),
    axis.text.x   = element_text(color = "black", size =10, angle = 45,vjust = 0.6),
    axis.text.y   = element_text(color = "black", size = 10, angle = 0),
    legend.title  = element_text(color = "black", size  = 11),
    legend.text   = element_text(color = "black", size   = 11),
    axis.line.y = element_line(color = "black", linetype = "solid"),
    axis.line.x = element_line (color = "black",linetype = "solid"), 
    panel.border = element_rect(linetype = "solid", size = 1,fill = NA) 
  )

############################## Boxplots of differential expression of signature genes in N0 and N1-N3 groups in training group #################
library(ggpubr)
library(magrittr)
library(ggsignif)
library(tidyverse)
data<-read.table("Boxplots.csv",header=T,sep=",",row.names=1,check.names = F)
data2 <- data %>%
  #add_column(group = rep(c("Control","BPD"), c(14,17)), .before = 1) %>%
  gather("gene","expression", -group) %>%
  mutate(gene = factor(gene, levels = colnames(data))) %>%
  mutate(group = factor(group, levels = c("N0","N1-N3")))
ggboxplot(data2, "gene", "expression", fill = "group", 
          xlab="",ylab="Expression",
          palette = c('#2F5688', 'red2')) +
  stat_compare_means(aes(group=group),label = "p.signif",method = "wilcox.test")+
  theme(
    plot.title    = element_text(color = "black", size   = 10, hjust = 0.5),
    plot.subtitle = element_text(color = "black", size   = 10,hjust = 0.5),
    plot.caption  = element_text(color = "black", size   = 10,face = "italic", hjust = 1),
    axis.text.x   = element_text(color = "black", size =10, angle = 45,vjust = 0.6),
    axis.text.y   = element_text(color = "black", size = 10, angle = 0),
    legend.title  = element_text(color = "black", size  = 11),
    legend.text   = element_text(color = "black", size   = 11),
    axis.line.y = element_line(color = "black", linetype = "solid"),
    axis.line.x = element_line (color = "black",linetype = "solid"), 
    panel.border = element_rect(linetype = "solid", size = 1,fill = NA) 
  )

############################## Boxplots of signature genes in the validation group  #################
library(ggpubr)
library(magrittr)
library(ggsignif)
library(tidyverse)
data<-read.table("Boxplots1.csv",header=T,sep=",",row.names=1,check.names = F)
data2 <- data %>%
  #add_column(group = rep(c("Control","BPD"), c(14,17)), .before = 1) %>%
  gather("gene","expression", -group) %>%
  mutate(gene = factor(gene, levels = colnames(data))) %>%
  mutate(group = factor(group, levels = c("Control","BLCA")))
ggboxplot(data2, "gene", "expression", fill = "group", 
          xlab="",ylab="Expression",
          palette = c('#2F5688', 'red2')) +
  stat_compare_means(aes(group=group),label = "p.signif",method = "wilcox.test")+
  theme(
    plot.title    = element_text(color = "black", size   = 10, hjust = 0.5),
    plot.subtitle = element_text(color = "black", size   = 10,hjust = 0.5),
    plot.caption  = element_text(color = "black", size   = 10,face = "italic", hjust = 1),
    axis.text.x   = element_text(color = "black", size =10, angle = 45,vjust = 0.6),
    axis.text.y   = element_text(color = "black", size = 10, angle = 0),
    legend.title  = element_text(color = "black", size  = 11),
    legend.text   = element_text(color = "black", size   = 11),
    axis.line.y = element_line(color = "black", linetype = "solid"),
    axis.line.x = element_line (color = "black",linetype = "solid"), 
    panel.border = element_rect(linetype = "solid", size = 1,fill = NA) 
  )


############################## Boxplots of differential expression of signature genes in N0 and N1-N3 groups in validation group #################
library(ggpubr)
library(magrittr)
library(ggsignif)
library(tidyverse)
data<-read.table("Boxplots 2.csv",header=T,sep=",",row.names=1,check.names = F)
data2 <- data %>%
  #add_column(group = rep(c("Control","BPD"), c(14,17)), .before = 1) %>%
  gather("gene","expression", -group) %>%
  mutate(gene = factor(gene, levels = colnames(data))) %>%
  mutate(group = factor(group, levels = c("N0","N1-N3")))
ggboxplot(data2, "gene", "expression", fill = "group", 
          xlab="",ylab="Expression",
          palette = c('#2F5688', 'red2')) +
  stat_compare_means(aes(group=group),label = "p.signif",method = "t.test")+
  theme(
    plot.title    = element_text(color = "black", size   = 10, hjust = 0.5),
    plot.subtitle = element_text(color = "black", size   = 10,hjust = 0.5),
    plot.caption  = element_text(color = "black", size   = 10,face = "italic", hjust = 1),
    axis.text.x   = element_text(color = "black", size =10, angle = 45,vjust = 0.6),
    axis.text.y   = element_text(color = "black", size = 10, angle = 0),
    legend.title  = element_text(color = "black", size  = 11),
    legend.text   = element_text(color = "black", size   = 11),
    axis.line.y = element_line(color = "black", linetype = "solid"),
    axis.line.x = element_line (color = "black",linetype = "solid"), 
    panel.border = element_rect(linetype = "solid", size = 1,fill = NA) 
  )

####################### Waterfall diagram of signature gene mutation in training group ######################################
library(maftools)
#clinicaldata <- read.table("TCGA_LUAD_Clinical.csv",sep = ",",header = T)
maf<-read.maf(maf="TCGA.BLCA.maf")
vc_cols = RColorBrewer::brewer.pal(n = 8, name = 'Paired')
names(vc_cols) = c(
  'Frame_Shift_Del',
  'Missense_Mutation',
  'Nonsense_Mutation',
  'Multi_Hit',
  'Frame_Shift_Ins',
  'In_Frame_Ins',
  'Splice_Site',
  'In_Frame_Del'
)
genes = c(
  "AKAP7",	"ARHGAP29",	"EFEMP1",	"EPN2",	"LAMA2",	"RPS6KA1",
  "SLC1A6",	"STIM2",	"SULT1C2",	"TMEM109",	"TRABD",	"ZNRD1"
  
)
oncoplot(maf = maf, fontSize =0.62,draw_titv = T,legendFontSize=1.2,
         annotationFontSize=1.2,#clinicalFeatures = c('Sex',"Stage","Survival"),
         colors = vc_cols,genes = genes,drawColBar=T,logColBar=T) 


####################### Correlation analysis of transcription factors and signature genes in training group ######################
mRNA<-read.table("transcription factors.txt",sep="\t",header=T,row.names=1)
lncRNA<-read.table("signature genes.txt",sep="\t",header=T,row.names=1)

pcor_lnc<-matrix(data = NA, nrow = nrow(mRNA), ncol = nrow(lncRNA))
rownames(pcor_lnc)<-rownames(mRNA)
colnames(pcor_lnc)<-rownames(lncRNA)
pval_lnc<-matrix(data = NA, nrow = nrow(mRNA), ncol = nrow(lncRNA))
rownames(pval_lnc)<-rownames(mRNA)
colnames(pval_lnc)<-rownames(lncRNA)
for (i in 1:nrow(mRNA))
{
  x<-t(mRNA[i,])
  x[which(!is.finite(x))]<-NA
  for (j in 1:nrow(lncRNA))
  {
    y<-t(lncRNA[j,])
    y[which(!is.finite(y))]<-NA
    if(length(which(!is.na(x+y)))>5 )
    {
      corres<-cor.test(x,y)
      pcor_lnc[i,j]<-corres$estimate
      pval_lnc[i,j]<-corres$p.value
    }
  }
}
ind<-which( abs(pcor_lnc) > 0.3 & pval_lnc < 0.05, arr.ind = TRUE)
length(ind)
mRNAsig<-rownames(mRNA)[ind[,1]]
lncRNAsig<-rownames(lncRNA)[ind[,2]]
corsig<-pcor_lnc[ind]
pvalsig<-pval_lnc[ind]
lncp<-cbind(mRNAsig,lncRNAsig,corsig,pvalsig)
write.table(lncp,file="correlation-0.3.txt",sep="\t",row.names=FALSE)

######################## Mulberry diagram #####################
library(ggplot2)
library(ggalluvial)
library(RColorBrewer)

miRNA_mRNA<-read.table("relationship.txt",sep = "\t",header = T)
miRNA_mRNA$Freq=1#Define the ordinate, which is generally 1 by default
miRNA_mRNA_long<- to_lodes_form(miRNA_mRNA,
                                axes = 1:2,#Numbering miRNA and mRNA respectively
                                id = "Cohort") #Change to long data for drawing

ggplot(miRNA_mRNA_long,
       aes(x =factor(x,level = c("Drug","Gene")),y=Freq,stratum = stratum, alluvium = Cohort,fill = stratum, label =stratum)) +
  geom_flow( width = 1/3)+#Draw flow chart
  geom_stratum( width = 1/3,linetype=1,size=0.5,alpha =0.9,color = "black") +#Draw impact diagram
  geom_text(stat ="stratum" , size =3) +#Add First Name
  scale_x_discrete(limits = c() )+#Remove abscissa
  theme_bw()+#Define theme
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text.y= element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor  = element_blank(),
        panel.border = element_blank())+#Remove boundary line
  scale_fill_manual(values = colorRampPalette(brewer.pal(8, "Accent"))(10))#define color

######################### In validation group 1, riskscore was judged as an independent prognostic factor #########################
library("survival")
library("survminer")

options(digits=2)

data<-read.table("Independent factor proof.txt",header = T,row.names = 1,sep = "\t")

### Univariate
model <- coxph(Surv(time, status) ~., data = data )
ggforest(model,main="hazard ratio",cpositions=c(0.02,0.1,0.35),
         fontsize=0.8,refLabel="reference",noDigits=2,data = data)

### multivariate 
res.cox <- coxph(Surv(time, status) ~ risk+Age, data =  data)
summary(res.cox)

ggforest(res.cox,main="hazard ratio",cpositions=c(0.02,0.22,0.4),
         fontsize=0.8,refLabel="reference",noDigits=2,data = data)

#########################In validation group 2, riskscore was judged as an independent prognostic factor #########################
library("survival")
library("survminer")

options(digits=2)

data<-read.table("Independent factor proof.txt",header = T,row.names = 1,sep = "\t")

### Univariate
model <- coxph(Surv(time, status) ~., data = data )
ggforest(model,main="hazard ratio",cpositions=c(0.02,0.1,0.35),
         fontsize=0.8,refLabel="reference",noDigits=2,data = data)

### multivariate 
res.cox <- coxph(Surv(time, status) ~ risk+age+N, data =  data)
summary(res.cox)

ggforest(res.cox,main="hazard ratio",cpositions=c(0.02,0.22,0.4),
         fontsize=0.8,refLabel="reference",noDigits=2,data = data)


######################### In training group 2, riskscore was judged as an independent prognostic factor #########################
library("survival")
library("survminer")

options(digits=2)

data<-read.table("Independent factor proof.txt",header = T,row.names = 1,sep = "\t")

### Univariate 

for(i in colnames(data[,3:ncol(data)])){
  cox <- coxph(Surv(time, status) ~ data[,i], data = data)
  summary(cox)
  
}

ggforest(cox,main="hazard ratio",cpositions=c(0.02,0.1,0.35),
         fontsize=0.8,refLabel="reference",noDigits=2,data = data)

### multivariate 
res.cox <- coxph(Surv(time, status) ~ risk+age+stage+gender+pN+pT , data =  data)
summary(res.cox)

ggforest(res.cox,main="hazard ratio",cpositions=c(0.02,0.22,0.4),
         fontsize=0.8,refLabel="reference",noDigits=2,data = data)


######################## Nomograph ############################
#install.packages("regplot")
library(regplot)
library(rms)
library(survival)
data <- read.table("Independent factor proof.txt",header=TRUE,sep = "\t")
dd=datadist(data)
options(datadist="dd")
mod <- cph(formula =  as.formula(paste("Surv(time, status) ~ ",paste(colnames(data)[4:6],collapse = "+"))),
           data=data,x=T,y=T,surv = T)
regplot(mod,points = T,failtime = c(365,1095,1825))


######################## Calibration curve ############################
library(regplot)
library(rms)
library(survival)
data <- read.table("Independent factor proof.txt",header=TRUE,sep = "\t")
f1 <- cph(formula =  as.formula(paste("Surv(time, status) ~ ",paste(colnames(data)[4:6],collapse = "+"))),
          data=data,x=T,y=T,surv = T, time.inc=365)
cal1 <- calibrate(f1, cmethod="KM", method="boot", u=365, m=50, B=1000)
f3 <- cph(formula =  as.formula(paste("Surv(time, status) ~ ",paste(colnames(data)[4:6],collapse = "+"))),
          data=data,x=T,y=T,surv = T, time.inc=1095)
cal3 <- calibrate(f3, cmethod="KM", method="boot", u=1095, m=50, B=1000)
f5 <- cph(formula =  as.formula(paste("Surv(time, status) ~ ",paste(colnames(data)[4:6],collapse = "+"))),
          data=data,x=T,y=T,surv = T,  time.inc=1825)
cal5 <- calibrate(f5, cmethod="KM", method="boot", u=1825, m=50, B=1000)
plot(cal1,lwd = 2,lty = 0,errbar.col = c("#5399C8"),
     xlim = c(0,1),ylim= c(0,1),
     #bty = "l", #Draw only the left and bottom borders
     xlab = "Nomogram-prediced OS (%)",ylab = "Observed OS (%)",
     col = c("#5399C8"),
     cex.lab=1.2,cex.axis=1, cex.main=1.2, cex.sub=0.6)
lines(cal1[,c('mean.predicted',"KM")],
      type = 'b', lwd = 1, col = c("#5399C8"), pch = 16)
mtext("")
plot(cal3,lwd = 2,lty = 0,errbar.col = c("#E43022"),
     xlim = c(0,1),ylim= c(0,1),
     xlab = "Nomogram-prediced OS (%)",ylab = "Observed OS (%)",
     col = c("#E43022"),add = T)
lines(cal3[,c('mean.predicted',"KM")],
      type = 'b', lwd = 1, col = c("#E43022"), pch = 16)
mtext("")
plot(cal5,lwd = 2,lty = 0,errbar.col = c("#E0AD16"),
     xlim = c(0,1),ylim= c(0,1),
     xlab = "Nomogram-prediced OS (%)",ylab = "Observed OS (%)",
     col = c("#E0AD16"),add = T)
lines(cal5[,c('mean.predicted',"KM")],
      type = 'b', lwd = 1, col = c("#E0AD16"), pch = 16)
abline(0,1, lwd = 2, lty = 3, col = c("#224444"))
legend("topleft", #Location of legend
       legend = c("1-year","3-year","5-year"), #Legend text
       col =c("#5399C8","#E43022","#E0AD16"), #Legend line color, corresponding to text
       lwd = 2,#Thickness of legend centerline
       cex = 1.2,#Legend font size
       bty = "n")#Do not display legend border
######################## Difference of risk score in different clinical subgroups ############################
library(ggpubr)
library(magrittr)
library(ggsignif)
library(export)
data<-read.table("Comparison of different clinical subgroups.txt",sep = "\t",header = T)
data$group<-as.factor(data$group)
ggboxplot(data, x = "group", y ="riskScore",fill  = "group",add="jitter",
          add.params = list(color="group",alpha=0.1),
          xlab="",ylab="Risk Score",
          order=c(">=65","<65",  
                  "Alive","Dead",
                  "stage i/ii","stage iii/iv",
                  "N0","N1-N3",
                  "T1-T2","T3-T4",
                  "male","female"),
          palette=c("#BFBCD1","#BFBCD1",
                    "#FECDE7","#FECDE7",
                    "#009ACD","#009ACD",
                    "#CD5555","#CD5555",
                    "#CCEAC4","#CCEAC4",
                    "#D1C45B","#D1C45B"))+theme(legend.position ="none")+
  theme(axis.text.x   = element_text(color = "black", size = 10, angle = 0),#X-axis label size
        axis.text.y   = element_text(color = "black", size = 13, angle = 0),#Number size of y-axis
        axis.title.y  = element_text(color = "black", size = 11, angle = 90))#y-axis label size
graph2ppt()


################################# SsGSEA immune infiltration###########################

library(GSVA)
immunecell<-read.csv("immune cell.txt", stringsAsFactors = FALSE, check.names = FALSE,sep = "\t")
#Select two lines of specific genes and corresponding immune cells
mark<-immunecell[,1:2]
list<- split(as.matrix(mark)[,1], mark[,2])
data<-read.table("363-BLCA  182-low 181-high.txt", stringsAsFactors = FALSE, header = TRUE, row.names = 1, sep = "\t")
data <- as.matrix(data)
ssGSEA_Score = as.data.frame(gsva(data, list, method = "ssgsea"))
write.csv(ssGSEA_Score,file="ssGSEA result.csv")

############################## Boxplots of immune cell infiltration in high and low risk score groups #################
library(ggpubr)
library(magrittr)
library(ggsignif)
library(tidyverse)
data<-read.table("ssGSEA result.csv",header=T,sep=",",row.names=1,check.names = F)
data2 <- data %>%
  add_column(group = rep(c("Low","High"), c(182,181)), .before = 1) %>%
  gather("gene","expression", -group) %>%
  mutate(gene = factor(gene, levels = colnames(data))) %>%
  mutate(group = factor(group, levels = c("Low","High")))
ggboxplot(data2, "gene", "expression", fill = "group", 
          xlab="",ylab="Immune Infiltration",
          palette = c("#5399C8", "#E43022")) +
  stat_compare_means(aes(group=group),label = "p.signif",method = "wilcox.test")+
  theme(
    plot.title    = element_text(color = "black", size   = 10, hjust = 0.5),
    plot.subtitle = element_text(color = "black", size   = 10,hjust = 0.5),
    plot.caption  = element_text(color = "black", size   = 10,face = "italic", hjust = 1),
    axis.text.x   = element_text(color = "black", size =7, angle = 45,vjust = 0.6),
    axis.text.y   = element_text(color = "black", size = 10, angle = 0),
    legend.title  = element_text(color = "black", size  = 11),
    legend.text   = element_text(color = "black", size   = 11),
    axis.line.y = element_line(color = "black", linetype = "solid"),
    axis.line.x = element_line (color = "black",linetype = "solid"), 
    panel.border = element_rect(linetype = "solid", size = 0.8,fill = NA) 
  )

############################## Comparison of immune cell infiltration in N0 and N1-N3 groups #################
library(ggpubr)
library(magrittr)
library(ggsignif)
library(tidyverse)
data<-read.table("ssGSEA结果.csv",header=T,sep=",",row.names=1,check.names = F)
data2 <- data %>%
  add_column(group = rep(c("N0","N1-3"), c(236,127)), .before = 1) %>%
  gather("gene","expression", -group) %>%
  mutate(gene = factor(gene, levels = colnames(data))) %>%
  mutate(group = factor(group, levels = c("N0","N1-3")))
ggboxplot(data2, "gene", "expression", fill = "group", 
          xlab="",ylab="Immune Infiltration",
          palette = c("#5399C8", "#E43022")) +
  stat_compare_means(aes(group=group),label = "p.signif",method = "wilcox.test")+
  theme(
    plot.title    = element_text(color = "black", size   = 10, hjust = 0.5),
    plot.subtitle = element_text(color = "black", size   = 10,hjust = 0.5),
    plot.caption  = element_text(color = "black", size   = 10,face = "italic", hjust = 1),
    axis.text.x   = element_text(color = "black", size =7, angle = 45,vjust = 0.6),
    axis.text.y   = element_text(color = "black", size = 10, angle = 0),
    legend.title  = element_text(color = "black", size  = 11),
    legend.text   = element_text(color = "black", size   = 11),
    axis.line.y = element_line(color = "black", linetype = "solid"),
    axis.line.x = element_line (color = "black",linetype = "solid"), 
    panel.border = element_rect(linetype = "solid", size = 0.8,fill = NA) 
  )

################################# ssGSEA EMT result #################################

library(GSVA)
immunecell<-read.csv("EMT.txt", stringsAsFactors = FALSE, check.names = FALSE,sep = "\t")
#Select two lines of specific genes and corresponding immune cells
mark<-immunecell[,1:2]
list<- split(as.matrix(mark)[,1], mark[,2])
data<-read.table("363-BLCA  182-low 181-high.txt", stringsAsFactors = FALSE, header = TRUE, row.names = 1, sep = "\t")
data <- as.matrix(data)
ssGSEA_Score = as.data.frame(gsva(data, list, method = "ssgsea"))
write.csv(ssGSEA_Score,file="EMT result.csv")

############################## EMT difference boxplots in high and low risk score group #################
library(ggpubr)
library(magrittr)
library(ggsignif)
library(tidyverse)
data<-read.table("EMT result transpose.csv",header=T,sep=",",row.names=1,check.names = F)
data2 <- data %>%
  add_column(group = rep(c("Low","High"), c(182,181)), .before = 1) %>%
  gather("gene","expression", -group) %>%
  mutate(gene = factor(gene, levels = colnames(data))) %>%
  mutate(group = factor(group, levels = c("Low","High")))
ggboxplot(data2, "gene", "expression", fill = "group", 
          xlab="",ylab="Enrichment Score",
          #palette = c("#5399C8", "#E43022")
) +
  stat_compare_means(aes(group=group),label = "p.signif")+
  theme(
    plot.title    = element_text(color = "black", size   = 10, hjust = 0.5),
    plot.subtitle = element_text(color = "black", size   = 10,hjust = 0.5),
    plot.caption  = element_text(color = "black", size   = 10,face = "italic", hjust = 1),
    axis.text.x   = element_text(color = "black", size =10, angle = 45,vjust = 0.6),
    axis.text.y   = element_text(color = "black", size = 10, angle = 0),
    legend.title  = element_text(color = "black", size  = 11),
    legend.text   = element_text(color = "black", size   = 11),
    axis.line.y = element_line(color = "black", linetype = "solid"),
    axis.line.x = element_line (color = "black",linetype = "solid"), 
    panel.border = element_rect(linetype = "solid", size = 0.5,fill = NA) 
  )

######################## ESTIMATE algorithm evaluation stroma/immune score #########################
library(estimate)

file_dir="363-BLCA  182-low 181-high.txt"
filterCommonGenes(input.f = file_dir,output.f = "gene.gct",id="GeneSymbol")
estimateScore("gene.gct","estimat_score.gct",platform = "affymetrix")
score_table=read.table("estimat_score.gct",skip = 2,header = 1)
rownames(score_table)=score_table[,1]
score=t(score_table)
colnames(score)=score[1,]
score_matrix=score[-1,]
write.csv(score_matrix,"ESTIMATE result.csv")


####################### Boxplots of immune/stroma score and tumor purity ###############
library(ggpubr)
data<-read.csv("ESTIMATE结果.csv",header = T,row.names = 1,sep = ",")
p <-ggboxplot(data, x = "group", y = "ImmuneScore",fill="group",
              order = c("Low","High"),xlab = "",
              palette =c("#7AA6DC", "#CD534C"),notch = F)+
  stat_compare_means(aes(group=group))
p+theme(plot.title = element_text(hjust = 0.5), 
        plot.subtitle = element_text(hjust = 0.5))

p <-ggboxplot(data, x = "group", y = "ESTIMATEScore",fill="group",
              order = c("Low","High"),xlab = "",
              palette =c("#E69F00", "#56B4E9"),notch = F)+
  stat_compare_means(aes(group=group))
p+theme(plot.title = element_text(hjust = 0.5), 
        plot.subtitle = element_text(hjust = 0.5))

p <-ggboxplot(data, x = "group", y = "TumorPurity",fill="group",
              order = c("Low","High"),xlab = "",
              palette =c("#288080", "#C03463"),notch = F)+
  stat_compare_means(aes(group=group))
p+theme(plot.title = element_text(hjust = 0.5), 
        plot.subtitle = element_text(hjust = 0.5))

p <-ggboxplot(data, x = "group", y = "StromalScore",fill="group",
              order = c("Low","High"),xlab = "",
              palette =c("#8D0E39", "#679D39"),notch = F)+
  stat_compare_means(aes(group=group))
p+theme(plot.title = element_text(hjust = 0.5), 
        plot.subtitle = element_text(hjust = 0.5))

############################Heat map of correlation between immune cells and signature genes and risk score###############

feature_1<-read.table("signature genes.txt",sep="\t",header=T,row.names=1)

feature_2<-read.table("immune cells.txt",sep="\t",header=T,row.names=1)

dir.create("Result")  # Create the result directory


pcor_lnc<-matrix(data = NA, nrow = nrow(feature_1), ncol = nrow(feature_2))
rownames(pcor_lnc)<-rownames(feature_1)
colnames(pcor_lnc)<-rownames(feature_2)
pval_lnc<-matrix(data = NA, nrow = nrow(feature_1), ncol = nrow(feature_2))
rownames(pval_lnc)<-rownames(feature_1)
colnames(pval_lnc)<-rownames(feature_2)
for (i in 1:nrow(feature_1))
{
  x<-t(feature_1[i,])
  x[which(!is.finite(x))]<-NA
  for (j in 1:nrow(feature_2))
  {
    y<-t(feature_2[j,])
    y[which(!is.finite(y))]<-NA
    if(length(which(!is.na(x+y)))>5 )
    {
      corres<-cor.test(x,y)
      pcor_lnc[i,j]<-corres$estimate
      pval_lnc[i,j]<-corres$p.value
    }
  }
}
ind<-which( abs(pcor_lnc) > 0 & pval_lnc < 1, arr.ind = TRUE)
length(ind)
feature_1<-rownames(feature_1)[ind[,1]]
feature_2<-rownames(feature_2)[ind[,2]]
corsig<-pcor_lnc[ind]
pvalsig<-pval_lnc[ind]
lncp<-cbind(feature_1,feature_2,corsig,pvalsig)
write.table(lncp,file="Correlation_result.txt",sep="\t",row.names=FALSE)

library(reshape2)
library(pheatmap)

correlate_pheatmap = function(infile, route)
{
  data=read.table(paste(route, infile, sep="/"), sep="\t", header=T)
  
  data_r=dcast(data, feature_1 ~ feature_2, value.var="corsig")
  data_p=dcast(data, feature_1 ~ feature_2, value.var="pvalsig")
  rownames(data_r)=data_r[,1]
  data_r=data_r[,-1]
  rownames(data_p)=data_p[,1]
  data_p=data_p[,-1]
  
  # Remove inconspicuous rows
  #  del_row = c()
  #  for(i in 1:length(data_p[, 1]))
  #  {
  #    if(all(data_p[i, ] > 0.05))
  #    {
  #      del_row = c(del_row, i)
  #    }
  #  }
  #  
  # Exclude non-significant columns
  #  del_col = c()
  #  for(j in 1:length(data_p[1, ]))
  #  {
  #    if(all(data_p[, j] > 0.05))
  #    {
  #      del_col = c(del_col, j)
  #   }
  #  }
  
  # Null value processing
  #  if(is.null(del_row) && !(is.null(del_col)))
  #  {
  #    data_p = data_p[, -del_col]
  #    data_r = data_r[, -del_col]
  #  }else if(is.null(del_col) && !(is.null(del_row)))
  # {
  #    data_p = data_p[-del_row,]
  #    data_r = data_r[-del_row,]
  #  }else if(is.null(del_row) && is.null(del_col))
  #  {
  #    print("delete none")
  #  }else if(!(is.null(del_row)) && !(is.null(del_col)))
  #  {
  #    data_p = data_p[-del_row, -del_col]
  #    data_r = data_r[-del_row, -del_col]
  #  }
  
  
  write.csv(data_p, file=paste(route, "data_p.csv", sep="/"))
  write.csv(data_r, file=paste(route, "data_r.csv", sep="/"))
  
  # Use "*" to replace the p value of<=0.05, and "" to replace the relative abundance of>0.05
  data_mark=data_p
  for(i in 1:length(data_p[,1])){
    for(j in 1:length(data_p[1,])){
      #data_mark[i,j]=ifelse(data_p[i,j] <= 0.05, "*", "")
      if(data_p[i,j] <= 0.001)
      {
        data_mark[i,j]="***"
      }
      else if(data_p[i,j] <= 0.01 && data_p[i,j] > 0.001)
      {
        data_mark[i,j]="**"
      }
      else if(data_p[i,j] <= 0.05 && data_p[i,j] > 0.01)
      {
        data_mark[i,j]="*"
      }
      else
      {
        data_mark[i,j]=""
      }
    }
  }
  write.csv(data_mark, file=paste(route, "data_mark.csv", sep="/"))
  
  pheatmap(data_r, display_numbers=data_mark, cellwidth=20, cellheight=20, fontsize_number=18, filename=paste(route, "Correlation_result.pdf", sep="/"))
  pheatmap(data_r, display_numbers=data_mark, cellwidth=20, cellheight=20, fontsize_number=18, filename=paste(route, "Correlation_result.png", sep="/"))
}


correlate_pheatmap("Correlation_result.txt","Result")


#################### GSVA analysis of differential expression pathways between high and low risk score groups ###################
library(GSVA)
library(pheatmap)
library(GSEABase)
exp <- read.table("363-BLCA  182-low 181-high.txt",header=T,row.names = 1,sep = "\t") 
mydata= as.matrix(exp) 
geneSets <- getGmt("c2.cp.kegg.v7.2.symbols.gmt") 
res_es <- gsva(mydata, geneSets, mx.diff=TRUE,min.sz=10, max.sz=500, 
               verbose=FALSE, parallel.sz=1)
write.csv(res_es,"GSVA result.csv")
#differential pathway
library(limma)
eset<-read.csv("GSVA result.csv",header = T,row.names = 1)
group <- c(rep("low",182),rep("high",181)) 
group <- factor(group,levels = c("low","high"),ordered = F)
design<- model.matrix(~group)
colnames(design) <- levels(group)
design
fit <- lmFit(eset, design)
fit2 <- eBayes(fit)
dif<-topTable(fit2,coef=2,n=nrow(fit2))
head(dif)
write.table(dif,"differential expression pathway.csv",sep = ",")

###################### Result heat map of GSVA #######################
library(pheatmap)
data<-read.table("differential pathway.txt",header = T,row.names = 1,sep = "\t")
data[!is.finite(data.matrix(data))]<-0
data<-t(scale(t(data)))
data<-data.matrix(data)
data[!is.finite(data.matrix(data))]<-0
data[data>1.2]<- 1.2
data[data< -1.2]<- -1.2
annotation<-read.table("Column comments.txt",header = T,row.names = 1,sep = "\t")
annotation_col=data.frame(risk=annotation$risk,
                          pN=annotation$pN,
                          age=annotation$age,
                          status=annotation$status,
                          stage=annotation$stage)

rownames(annotation_col) <- colnames(data)

pheatmap(data, annotation_col = annotation_col,
         annotation_names_row = F, annotation_legend = T,
         #annotation_colors = ann_colors,
         annotation_names_col = T,
         show_colnames = F,show_rownames = TRUE,
         cluster_rows = T,cluster_cols = F,fontsize =5,
         cellwidth = 0.8,cellheight =20,border_color = "gray40",
         #colors <- colorRampPalette(c("#6C809B", "white", "#A83C5C"))(100)
)


###################### Differential expression analysis of high and low risk score groups #######################
library(limma)
rt<-read.table("363-BLCA  182-low 181-high.txt",header = T,row.names = 1,sep = "\t")
group <- c(rep("Low",182),rep("High",181)) 
group <- factor(group,levels = c("Low","High"),ordered = F)
design<- model.matrix(~group)
colnames(design) <- levels(group)
design
fit <- lmFit(rt, design)
fit2 <- eBayes(fit)
allDiff<-topTable(fit2,coef=2,n=nrow(fit2))
head(allDiff)
write.table(allDiff,"Differential expressionmRNA.csv",sep = ",")

logFoldChange=1
adjustP=0.05

diffSig <- allDiff[with(allDiff, (abs(logFC)>=logFoldChange & adj.P.Val < adjustP )), ]
write.table(diffSig,file="diff.xls",sep="\t",quote=F)
diffUp <- allDiff[with(allDiff, (logFC>=logFoldChange & adj.P.Val < adjustP )), ]
write.table(diffUp,file="up.xls",sep="\t",quote=F)
diffDown <- allDiff[with(allDiff, (logFC<=(-logFoldChange) & adj.P.Val < adjustP )), ]
write.table(diffDown,file="down.xls",sep="\t",quote=F)


hmExp=rt[rownames(diffSig),]
diffExp=rbind(id=colnames(hmExp),hmExp)
write.table(diffExp,file="diffExp.txt",sep="\t",quote=F,col.names=F)


##Volcanic map
library(ggplot2)
gene<-read.delim("Volcanic map.txt",sep = "\t",stringsAsFactors = FALSE)

#Here we customize the difference type according to | log2FC |>=2 and adj.P.Val<0.01
gene[which(gene$adj.P.Val < 0.05 & gene$logFC <= -1),'sig'] <- 'Down'
gene[which(gene$adj.P.Val < 0.05 & gene$logFC >= 1),'sig'] <- 'Up'
gene[which(gene$adj.P.Val >= 0.05 | abs(gene$logFC) < 1),'sig'] <- 'None'

#Horizontal axis log2FC, vertical axis - log10 (adj. P.Val), color indicates difference
p <- ggplot(gene, aes(x = logFC, y = -log10(adj.P.Val), color = sig)) +
  geom_point(alpha = 0.6, size = 1) + xlim(-4, 4) + ylim(0, 25) +
  labs(x = '\nLog2 Fold Change', y = '-log10 FDR\n', color = '') +
  scale_colour_manual(values  = c('red2', '#2F5688', 'gray'), limits = c('Up', 'Down', 'None')) +
  theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black',
                                                                      fill = 'transparent'), 
        plot.title = element_text(hjust = 0.5)) +
  theme(legend.key = element_rect(fill = 'transparent'), legend.background = element_rect(fill = 'transparent')) +
  geom_vline(xintercept = c(-1, 1), color = 'black', size = 0.6,lty=2) +
  geom_hline(yintercept = -log(0.05, 10), color = 'black', size = 0.6,lty=2) 
p

####################### Display of GO enrichment results of different genes in high and low risk score groups #################
library(ggplot2)
pathway<-read.table("GO enrichment.txt",header=T,sep="\t",quote="")
p = ggplot(pathway,aes(GeneRatio,Term))
p=p + geom_point(aes(size=Count))
pbubble = p+ geom_point(aes(size=Count,color=-1*log10(FDR)))
pr = pbubble+scale_color_gradient(low="blue",high = "red")
pr = pr+labs(color=expression(-log[10](FDR)),size="Count",
             x="GeneRatio",y="GO Terms")
pr=pr+ facet_grid(Category ~ .,scale="free")
p=pr+ theme_bw()
p+theme(axis.text.x   = element_text(color = "black", size = 12, angle = 0),#X-axis label size
        axis.text.y   = element_text(color = "black", size = 10.5, angle = 0),#Number size of y-axis
        axis.title.y  = element_text(color = "black", size = 11, angle = 90))

########################### Calculate tumor mutation burden ##################
#if (!require("BiocManager"))
#install.packages("BiocManager")
#BiocManager::install("maftools")

library(maftools)

laml <- read.maf(maf = "TCGA.BLCA.maf")
x = tmb(maf = laml)
data<-as.data.frame(x)
write.csv(data,"BLCA-TMB.csv")


########################## Survival of patients in different groups ##################
data<-read.table("Survival difference between high and low TMB.csv",header = T,row.names = 1,sep = ",")
library(survival)
library(survminer)
library(ggplot2)
fit<-survfit(Surv(time,status) ~ type,data = data)
p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #Add median survival curve
               palette = c("#d7191c", "#2b83ba","#E0AD16","gray40"), #Change the color of the line
               legend.labs=c("Risk-High-TMB-High","Risk-High-TMB-Low","Risk-Low-TMB-High","Risk-Low-TMB-Low"), #label
               legend.title="group",
               #title="Overall suvival", #title
               xlab="Time(Years)",ylab="Cumulative Survival(percentage)",
               break.x.by=2, #Abscissa interval
               censor.shape=124,censor.size=2, #Shape and size of deleted points
               risk.table = T,tables.height = 0.3,
               tables.theme = theme_cleantable(),
               ggtheme = theme_bw(),xlim = c(0, 17),
               conf.int = T,#Add confidence interval
               pval = T,pval.coord=c(0,0.20)# Add P value and adjust P value position
)
p3


########################### Survival of patients in different groups ##################
data<-read.table("Survival difference between high and low TMB.csv",header = T,row.names = 1,sep = ",")
library(survival)
library(survminer)
library(ggplot2)
fit<-survfit(Surv(time,status) ~ group,data = data)
p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #Add median survival curve
               palette = c("#d7191c", "#2b83ba"), #Change the color of the line
               legend.labs=c("High","Low"), #label
               legend.title="TMB",
               #title="Overall suvival", #title
               xlab="Time(Years)",ylab="Cumulative Survival(percentage)",
               break.x.by=2, #Abscissa interval
               censor.shape=124,censor.size=2, #Shape and size of deleted points
               risk.table = T,tables.height = 0.3,
               tables.theme = theme_cleantable(),
               ggtheme = theme_bw(),xlim = c(0, 17),
               conf.int = T,#Add confidence interval
               pval = T,pval.coord=c(0,0.20)# Add P value and adjust P value position
)
p3


#################Compare TMB between high and low risk score groups ###################
library(ggpubr)
library(magrittr)
library(ggsignif)
data<-read.table("input file.txt",sep = "\t",header = T,row.names = 1)
data$group<-as.factor(data$risk)
my_comparisons = list(c("low", "high"))
p<-ggviolin(data, x = "risk", y = "TMB" ,
            xlab="",ylab="Tumor Burden Mutation",add.params = list(fill = "white"),
            fill  = "risk",order=c("low","high"),add="boxplot",
            bxp.errorbar = T,bxp.errorbar.width = 0.2,palette = c("#FF8C00", "#00468B"))+
  stat_compare_means(comparisons = my_comparisons,label = "p.signif",method = "wilcox.test")+
  theme(
    plot.title    = element_text(color = "black", size   = 12, hjust = 0.5),
    plot.subtitle = element_text(color = "black", size   = 12,hjust = 0.5),
    plot.caption  = element_text(color = "black", size   = 12,face = "italic", hjust = 1),
    axis.text.x   = element_text(color = "black", size = 12, angle = 0),
    axis.text.y   = element_text(color = "black", size = 12, angle = 0),
    axis.title.x  = element_text(color = "black", size = 12.5, angle = 0),
    axis.title.y  = element_text(color = "black", size = 12.5, angle = 90),
    legend.title  = element_text(color = "black", size  = 12),
    legend.text   = element_text(color = "black", size   = 12),
    axis.line.y = element_line(color = "black", linetype = "solid"), # y-axis feature
    axis.line.x = element_line (color = "black",linetype = "solid") # x-axis feature
  )
p


############### Calculate the correlation between TMB and risk score ############
library(ggplot2)
library(ggpubr)
data <- read.table("input file.txt",header=TRUE,sep = "\t")
gg <- ggplot(data, aes(x=riskScore, y=TMB)) + 
  geom_point( size=2) +
  geom_smooth(method="lm", col="#2b83ba", size=2) +
  labs(x="riskScore",y="TMB") +
  stat_cor(data=data, method = "pearson")
plot(gg) + theme_bw()+theme(legend.position ="top")


############################## Analysis of the difference and correlation of immune checkpoints in high and low risk scoring groups#################
library(ggpubr)
library(magrittr)
library(ggsignif)
library(tidyverse)
data<-read.table("Immune checkpoint.csv",header=T,sep=",",row.names=1,check.names = F)
data2 <- data %>%
  add_column(group = rep(c("Low","High"), c(182,181)), .before = 1) %>%
  gather("gene","expression", -group) %>%
  mutate(gene = factor(gene, levels = colnames(data))) %>%
  mutate(group = factor(group, levels = c("Low","High")))
ggboxplot(data2, "gene", "expression", fill = "group", 
          xlab="",ylab="Expression",
          palette = c("#5399C8", "#E43022")) +
  stat_compare_means(aes(group=group),label = "p.signif")+
  theme(
    plot.title    = element_text(color = "black", size   = 10, hjust = 0.5),
    plot.subtitle = element_text(color = "black", size   = 10,hjust = 0.5),
    plot.caption  = element_text(color = "black", size   = 10,face = "italic", hjust = 1),
    axis.text.x   = element_text(color = "black", size =7, angle = 45,vjust = 0.6),
    axis.text.y   = element_text(color = "black", size = 10, angle = 0),
    legend.title  = element_text(color = "black", size  = 11),
    legend.text   = element_text(color = "black", size   = 11),
    axis.line.y = element_line(color = "black", linetype = "solid"),
    axis.line.x = element_line (color = "black",linetype = "solid"), 
    panel.border = element_rect(linetype = "solid", size = 0.8,fill = NA) 
  )

############################################Data read in####################################

feature_1<-read.table("Immune checkpoint.txt",sep="\t",header=T,row.names=1)

feature_2<-read.table("signature gene.txt",sep="\t",header=T,row.names=1)

dir.create("Result")  # Create result directory


pcor_lnc<-matrix(data = NA, nrow = nrow(feature_1), ncol = nrow(feature_2))
rownames(pcor_lnc)<-rownames(feature_1)
colnames(pcor_lnc)<-rownames(feature_2)
pval_lnc<-matrix(data = NA, nrow = nrow(feature_1), ncol = nrow(feature_2))
rownames(pval_lnc)<-rownames(feature_1)
colnames(pval_lnc)<-rownames(feature_2)
for (i in 1:nrow(feature_1))
{
  x<-t(feature_1[i,])
  x[which(!is.finite(x))]<-NA
  for (j in 1:nrow(feature_2))
  {
    y<-t(feature_2[j,])
    y[which(!is.finite(y))]<-NA
    if(length(which(!is.na(x+y)))>5 )
    {
      corres<-cor.test(x,y)
      pcor_lnc[i,j]<-corres$estimate
      pval_lnc[i,j]<-corres$p.value
    }
  }
}
ind<-which( abs(pcor_lnc) > 0 & pval_lnc < 1, arr.ind = TRUE)
length(ind)
feature_1<-rownames(feature_1)[ind[,1]]
feature_2<-rownames(feature_2)[ind[,2]]
corsig<-pcor_lnc[ind]
pvalsig<-pval_lnc[ind]
lncp<-cbind(feature_1,feature_2,corsig,pvalsig)
write.table(lncp,file="Correlation_result.txt",sep="\t",row.names=FALSE)

########################################Convert the result into a graph function###############################################

library(reshape2)
library(pheatmap)

correlate_pheatmap = function(infile, route)
{
  data=read.table(paste(route, infile, sep="/"), sep="\t", header=T)
  
  data_r=dcast(data, feature_1 ~ feature_2, value.var="corsig")
  data_p=dcast(data, feature_1 ~ feature_2, value.var="pvalsig")
  rownames(data_r)=data_r[,1]
  data_r=data_r[,-1]
  rownames(data_p)=data_p[,1]
  data_p=data_p[,-1]
  
  # Remove inconspicuous rows
  #  del_row = c()
  #  for(i in 1:length(data_p[, 1]))
  #  {
  #    if(all(data_p[i, ] > 0.05))
  #    {
  #      del_row = c(del_row, i)
  #    }
  #  }
  #  
  # Exclude non-significant columns
  #  del_col = c()
  #  for(j in 1:length(data_p[1, ]))
  #  {
  #    if(all(data_p[, j] > 0.05))
  #    {
  #      del_col = c(del_col, j)
  #   }
  #  }
  
  # null value processing
  #  if(is.null(del_row) && !(is.null(del_col)))
  #  {
  #    data_p = data_p[, -del_col]
  #    data_r = data_r[, -del_col]
  #  }else if(is.null(del_col) && !(is.null(del_row)))
  # {
  #    data_p = data_p[-del_row,]
  #    data_r = data_r[-del_row,]
  #  }else if(is.null(del_row) && is.null(del_col))
  #  {
  #    print("delete none")
  #  }else if(!(is.null(del_row)) && !(is.null(del_col)))
  #  {
  #    data_p = data_p[-del_row, -del_col]
  #    data_r = data_r[-del_row, -del_col]
  #  }
  
  
  write.csv(data_p, file=paste(route, "data_p.csv", sep="/"))
  write.csv(data_r, file=paste(route, "data_r.csv", sep="/"))
  
  # Use "*" to replace the p value of<=0.05, and "" to replace the relative abundance of>0.05
  data_mark=data_p
  for(i in 1:length(data_p[,1])){
    for(j in 1:length(data_p[1,])){
      #data_mark[i,j]=ifelse(data_p[i,j] <= 0.05, "*", "")
      if(data_p[i,j] <= 0.001)
      {
        data_mark[i,j]="***"
      }
      else if(data_p[i,j] <= 0.01 && data_p[i,j] > 0.001)
      {
        data_mark[i,j]="**"
      }
      else if(data_p[i,j] <= 0.05 && data_p[i,j] > 0.01)
      {
        data_mark[i,j]="*"
      }
      else
      {
        data_mark[i,j]=""
      }
    }
  }
  write.csv(data_mark, file=paste(route, "data_mark.csv", sep="/"))
  
  pheatmap(data_r, display_numbers=data_mark, cellwidth=20, cellheight=20, fontsize_number=18, filename=paste(route, "Correlation_result.pdf", sep="/"))
  pheatmap(data_r, display_numbers=data_mark, cellwidth=20, cellheight=20, fontsize_number=18, filename=paste(route, "Correlation_result.png", sep="/"))
}

#####################################################################

correlate_pheatmap("Correlation_result.txt","Result")