##############################Before methylation data screening#################################### rm(list = ls()) options(stringsAsFactors = F) require(GEOquery) require(Biobase) library("impute") info=read.table("Effective sample end of cancer.txt",sep="\t",header=T) library(data.table) b=info rownames(b)=b[,1] a=fread("datExpr0 end.txt",data.table = F ) a[1:4,1:4] rownames(a)=a[,1] a=a[,-1] beta=as.matrix(a) beta=impute.knn(beta) betaData=beta$data betaData=betaData+0.00001 a=betaData a[1:4,1:4] identical(colnames(a),rownames(b)) boxplot(a,las=2) # Make sure that the methylation signal value matrix is one-to-one corresponding to the phenotypic information library(ChAMP) # The beta signal value matrix cannot have NA value myLoad=champ.filter(beta = a,pd = b) myLoad save(myLoad,file = 'step1-output.Rdata') rm(list = ls()) options(stringsAsFactors = F) library("ChAMP") library("minfi") require(GEOquery) require(Biobase) load(file = 'step1-output.Rdata') myLoad # The time-consuming steps are commented out after running once if(F){ myNorm <- champ.norm(beta=myLoad$beta,arraytype="450K",cores=5) dim(myNorm) pD=myLoad$pd save(myNorm,pD,file = 'step2-champ_myNorm.Rdata') } load(file = 'step2-champ_myNorm.Rdata') # The original 450K is 400K after quality control filtration beta.m=myNorm group_list=myLoad$pd$group dim(beta.m) # The following are the quality control measures of the three standard pictures of the expression matrix if(T){ dat=t(beta.m) dat[1:4,1:4] library("FactoMineR")#These two packages need to be loaded to draw the principal component analysis diagram library("factoextra") # Because the methylation chip is 450K or 850K, hundreds of thousands of lines of methylation sites, PCA will not be too fast dat.pca <- PCA(dat , graph = FALSE) fviz_pca_ind(dat.pca, geom.ind = "point", # show points only (nbut not "text") col.ind = group_list, # color by groups # palette = c("#00AFBB", "#E7B800"), addEllipses = TRUE, # Concentration ellipses legend.title = "Groups" ) ggsave('all_samples_PCA.png') dat=beta.m dat[1:4,1:4] cg=names(tail(sort(apply(dat,1,sd)),1000))#apply by line('1'is retrieved by row,'2'is taken by column)take the variance of each row, from the smallest to the largest, and take the largest 1000 library(pheatmap) pheatmap(dat[cg,],show_colnames =F,show_rownames = F) #Take out each row of the 1000 genes extracted and combine them into a new expression matrix n=t(scale(t(dat[cg,]))) # 'scale'can normalize the log-ratio value n[n>2]=2 n[n< -2]= -2 n[1:4,1:4] pheatmap(n,show_colnames =F,show_rownames = F) ac=data.frame(group=group_list) rownames(ac)=colnames(n) pheatmap(n,show_colnames =F,show_rownames = F, annotation_col=ac,filename = 'heatmap_top1000_sd.png') dev.off() exprSet=beta.m pheatmap::pheatmap(cor(exprSet)) # The similarity of samples within the group should be higher than that between groups! colD=data.frame(group_list=group_list) rownames(colD)=colnames(exprSet) pheatmap::pheatmap(cor(exprSet), annotation_col = colD, show_rownames = F, filename = 'cor_all.png') dev.off() exprSet=exprSet[names(sort(apply(exprSet, 1,mad),decreasing = T)[1:500]),] dim(exprSet) # M=cor(log2(exprSet+1)) M=cor(exprSet) pheatmap::pheatmap(M,annotation_col = colD) pheatmap::pheatmap(M, show_rownames = F, annotation_col = colD, filename = 'cor_top500.png') dev.off() } ###################################After methylation data screening#################################### rm(list = ls()) options(stringsAsFactors = F) require(GEOquery) require(Biobase) library("impute") info=read.table("Effective sample end of cancer.txt",sep="\t",header=T) library(data.table) b=info rownames(b)=b[,1] a=fread("datExpr0 end.txt",data.table = F ) a[1:4,1:4] rownames(a)=a[,1] a=a[,-1] beta=as.matrix(a) beta=impute.knn(beta) betaData=beta$data betaData=betaData+0.00001 a=betaData a[1:4,1:4] identical(colnames(a),rownames(b)) boxplot(a,las=2) # Make sure that the methylation signal value matrix is one-to-one corresponding to the phenotypic information library(ChAMP) # The beta signal value matrix cannot have NA value myLoad=champ.filter(beta = a,pd = b) myLoad save(myLoad,file = 'step1-output.Rdata') rm(list = ls()) options(stringsAsFactors = F) library("ChAMP") library("minfi") require(GEOquery) require(Biobase) load(file = 'step1-output.Rdata') myLoad # The time-consuming steps are commented out after running once if(F){ myNorm <- champ.norm(beta=myLoad$beta,arraytype="450K",cores=5) dim(myNorm) pD=myLoad$pd save(myNorm,pD,file = 'step2-champ_myNorm.Rdata') } load(file = 'step2-champ_myNorm.Rdata') # The original 450K is 400K after quality control filtration beta.m=myNorm group_list=myLoad$pd$group dim(beta.m) # The following are the quality control measures of the three standard pictures of the expression matrix if(T){ dat=t(beta.m) dat[1:4,1:4] library("FactoMineR")#These two packages need to be loaded to draw the principal component analysis diagram library("factoextra") # Because the methylation chip is 450K or 850K, hundreds of thousands of lines of methylation sites, PCA will not be too fast dat.pca <- PCA(dat , graph = FALSE) fviz_pca_ind(dat.pca, geom.ind = "point", # show points only (nbut not "text") col.ind = group_list, # color by groups # palette = c("#00AFBB", "#E7B800"), addEllipses = TRUE, # Concentration ellipses legend.title = "Groups" ) ggsave('all_samples_PCA.png') dat=beta.m dat[1:4,1:4] cg=names(tail(sort(apply(dat,1,sd)),1000))#apply by line('1'is retrieved by row,'2'is taken by column)take the variance of each row, from the smallest to the largest, and take the largest 1000 library(pheatmap) pheatmap(dat[cg,],show_colnames =F,show_rownames = F) #Take out each row of the 1000 genes extracted and combine them into a new expression matrix n=t(scale(t(dat[cg,]))) # 'scale'can normalize the log-ratio value n[n>2]=2 n[n< -2]= -2 n[1:4,1:4] pheatmap(n,show_colnames =F,show_rownames = F) ac=data.frame(group=group_list) rownames(ac)=colnames(n) pheatmap(n,show_colnames =F,show_rownames = F, annotation_col=ac,filename = 'heatmap_top1000_sd.png') dev.off() exprSet=beta.m pheatmap::pheatmap(cor(exprSet)) # The similarity of samples within the group should be higher than that between groups! colD=data.frame(group_list=group_list) rownames(colD)=colnames(exprSet) pheatmap::pheatmap(cor(exprSet), annotation_col = colD, show_rownames = F, filename = 'cor_all.png') dev.off() exprSet=exprSet[names(sort(apply(exprSet, 1,mad),decreasing = T)[1:500]),] dim(exprSet) # M=cor(log2(exprSet+1)) M=cor(exprSet) pheatmap::pheatmap(M,annotation_col = colD) pheatmap::pheatmap(M, show_rownames = F, annotation_col = colD, filename = 'cor_top500.png') dev.off() } #save(dat.pca,pD,file = 'dat.pca.Rdata') ###################################Methylation difference analysis##################### rm(list = ls()) options(stringsAsFactors = F) library("ChAMP") library("minfi") require(GEOquery) require(Biobase) load(file = 'step1-output.Rdata') myLoad # The methylation signal matrix and phenotype information are stored. load(file = 'step2-champ_myNorm.Rdata') group_list=myLoad$pd$group table(group_list) myDMP <- champ.DMP(beta = myNorm,pheno=group_list,adjPVal = 0.05,#threshold adjust.method = "BH",##P-value adjustment method for limma analysis (default=BH (Benjamin Hochberg)) arraytype="EPIC") head(myDMP[[1]]) save(myDMP,file = 'step3-output-myDMP.Rdata') write.table(myDMP,file="ALLDMP_analysis_result.txt",quote=T,row.names = T) # You can also use the interactive interface to modify the threshold and adjust the differential probe # DMP.GUI(DMP=myDMP[[1]],beta=myNorm,group_list) rm(list = ls()) options(stringsAsFactors = F) library("ChAMP") library("minfi") require(GEOquery) require(Biobase) load(file = 'step3-output-myDMP.Rdata') deg=myDMP[[1]] head(deg) length(unique(deg$gene)) deg$g=ifelse(abs(deg$logFC) < 0,'stable', ifelse(deg$logFC > 0,'UP','DOWN')) table(deg$g) head(deg) deg$symbol=deg$gene write.table(deg,file="champDiff.xls",sep="\t",quote=F) ########################Methylated differential volcanic map###################### library(ggplot2) gene<-read.delim("volcanic map.txt",sep = "\t",stringsAsFactors = FALSE) #Here we customize the difference type according to | log2FC |>=2 and adj.P.Val<0.01 gene[which(gene$adj.P.Val < 0.05 & gene$logFC <= -0),'sig'] <- 'Down' gene[which(gene$adj.P.Val < 0.05 & gene$logFC >= 0),'sig'] <- 'Up' gene[which(gene$adj.P.Val >= 0.05 | abs(gene$logFC) < 0),'sig'] <- 'None' #Horizontal axis log2FC, vertical axis - log10 (adj. P.Val), color indicates difference p <- ggplot(gene, aes(x = logFC, y = -log10(adj.P.Val), color = sig)) + geom_point(alpha = 0.6, size = 1) + xlim(-2, 2) + ylim(0, 10) + labs(x = '\nLog2 Fold Change', y = '-log10 FDR\n', color = '') + scale_colour_manual(values = c('red2', '#2F5688', 'gray'), limits = c('Up', 'Down', 'None')) + theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'), plot.title = element_text(hjust = 0.5)) + theme(legend.key = element_rect(fill = 'transparent'), legend.background = element_rect(fill = 'transparent')) + geom_vline(xintercept = c(-0, 0), color = 'black', size = 0.6,lty=2) + geom_hline(yintercept = -log(0.05, 10), color = 'black', size = 0.6,lty=2) p ################mRNA differential expression##################### library(limma) #setwd("C:\\Users\\18768\\Desktop\\Hypoxia and immunity of cervical cancer\\7. High and low grouping\\4. Differential expression") rt<-read.table("Bladder urothelial carcinoma 363 236 for N0 compare to 127 for N1-N3.txt",header = T,row.names = 1,sep = "\t") group <- c(rep("N0",236),rep("N",127)) group <- factor(group,levels = c("N0","N"),ordered = F) design<- model.matrix(~group) colnames(design) <- levels(group) design fit <- lmFit(rt, design) fit2 <- eBayes(fit) allDiff<-topTable(fit2,coef=2,n=nrow(fit2)) head(allDiff) write.table(allDiff,"Differential expression mRNA.csv",sep = ",") logFoldChange=0 adjustP=0.05 diffSig <- allDiff[with(allDiff, (abs(logFC)>=logFoldChange & adj.P.Val < adjustP )), ] write.table(diffSig,file="diff.xls",sep="\t",quote=F) diffUp <- allDiff[with(allDiff, (logFC>=logFoldChange & adj.P.Val < adjustP )), ] write.table(diffUp,file="up.xls",sep="\t",quote=F) diffDown <- allDiff[with(allDiff, (logFC<=(-logFoldChange) & adj.P.Val < adjustP )), ] write.table(diffDown,file="down.xls",sep="\t",quote=F) #############Volcanic map########## library(ggplot2) gene<-read.delim("Volcanic map.txt",sep = "\t",stringsAsFactors = FALSE) #Here we customize the difference type according to | log2FC |>=2 and adj.P.Val<0.01 gene[which(gene$adj.P.Val < 0.05 & gene$logFC <= -0),'sig'] <- 'Down' gene[which(gene$adj.P.Val < 0.05 & gene$logFC >= 0),'sig'] <- 'Up' gene[which(gene$adj.P.Val >= 0.05 | abs(gene$logFC) < 0),'sig'] <- 'None' #Horizontal axis log2FC, vertical axis - log10 (adj. P.Val), color indicates difference p <- ggplot(gene, aes(x = logFC, y = -log10(adj.P.Val), color = sig)) + geom_point(alpha = 0.6, size = 1) + xlim(-2, 2) + ylim(0, 10) + labs(x = '\nLog2 Fold Change', y = '-log10 FDR\n', color = '') + scale_colour_manual(values = c('red2', '#2F5688', 'gray'), limits = c('Up', 'Down', 'None')) + theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'), plot.title = element_text(hjust = 0.5)) + theme(legend.key = element_rect(fill = 'transparent'), legend.background = element_rect(fill = 'transparent')) + geom_vline(xintercept = c(-0, 0), color = 'black', size = 0.6,lty=2) + geom_hline(yintercept = -log(0.05, 10), color = 'black', size = 0.6,lty=2) p ############################## GO enrichment ################# library(ggplot2) pathway<-read.table("GO enrichment.txt",header=T,sep="\t",quote="") p = ggplot(pathway,aes(GeneRatio,Term)) p=p + geom_point(aes(size=Count)) pbubble = p+ geom_point(aes(size=Count,color=-1*log10(PValue))) pr = pbubble+scale_color_gradient(low="blue",high = "red") pr = pr+labs(color=expression(-log[10](PValue)),size="Count", x="GeneRatio",y="GO Terms") pr=pr+ facet_grid(Category ~ .,scale="free") p=pr+ theme_bw() p+theme(axis.text.x = element_text(color = "black", size = 12, angle = 0),#X-axis dimension size axis.text.y = element_text(color = "black", size = 10.5, angle = 0),#y-axis dimension size axis.title.y = element_text(color = "black", size = 11, angle = 90)) ########################### Survival analysis of regional lymph node metastasis grouping ################## data<-read.table("LASSO Cox result.csv",header = T,row.names = 1,sep = ",") library(survival) library(survminer) library(ggplot2) fit<-survfit(Surv(time,status) ~ group,data = data) p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #add median survival curve palette = c("#00468B","#ED0000"), #change the color of the line legend.labs=c("N0","N1-3"), #label legend.title="riskScore", #title="Overall suvival", #title xlab="Time(Years)",ylab="Cumulative Survival(percentage)", break.x.by=2, #abscissa interval censor.shape=124,censor.size=2, #Shape and size of deleted points risk.table = T,tables.height = 0.3, tables.theme = theme_cleantable(), ggtheme = theme_bw(),xlim = c(0, 14), conf.int = T,#add confidence interval pval = T,pval.coord=c(0,0.20)# add P value and adjust P value position ) p3 ######################### Univariate Cox analysis results showed univariate Cox and forest maps of related genes######################### library("survival") library("survminer") library(readr) library(forestplot) data<-read.table("LASSO.txt",header = T,sep = "\t",row.names = 1) uniTab=data.frame() for(i in colnames(data[,3:ncol(data)])){ cox <- coxph(Surv(time, status) ~ data[[i]], data = data) coxSummary = summary(cox) uniTab=rbind(uniTab, cbind(id=i, HR=coxSummary$conf.int[,"exp(coef)"], HR.95L=coxSummary$conf.int[,"lower .95"], HR.95H=coxSummary$conf.int[,"upper .95"], #logtest.pvalue=coxSummary$logtest["pvalue"], #wald.pvalue=coxSummary$waldtest["pvalue"], pvalue=coxSummary$coefficients[,"Pr(>|z|)"]) ) } write.csv(uniTab,"Univariate Cox analysis results.csv") data<-read_tsv("forest map.txt") forest_table <- cbind(c("Symbol", data$Symbol), c("Hazard Ratio (95% CI)", data$HR), c("Pvalue", data$pvalue)) csize <- data.frame(mean=c(NA, as.numeric(data$HR2)), lower=c(NA, as.numeric(data$`HR.95L`)), upper=c(NA, as.numeric(data$`HR.95H`))) forestplot(labeltext = forest_table, csize, graph.pos = 3, #forest map position graphwidth = unit(10, "cm"), #width of the forest diagram in the icon zero = 1, #the position of the datum line in the forest map cex = 0.9, lineheight = unit(1.2,"cm"), #row height boxsize = 0.2, #square size lty.ci=2, #the line type of the confidence interval fn.ci_norm = fpDrawNormalCI, #Point shape,fpDrawNormalCIf square,fpDrawDiamondCI rhombus lwd.ci = 1, #width of confidence interval line ci.vertices = TRUE,#add vertical lines at both ends of the confidence interval lwd.xaxis = 1, #X axis width colgap = unit(0.7,'cm'), #column spacing xlab = "Hazard_Ratio", #abscissa label #xticks = c(0.5,1,1.5), # X-axis scale spacing hrzl_lines = list("2" = gpar(lwd=1, col = "#8B008B"), "14" = gpar(lwd=1, col = "#8B008B")),#Set the type and influence range of lines in the table ci.vertices.height = 0.1, #Height of vertical lines at both ends of the confidence interval col = fpColors(box = "#458B00", line = "black", zero = "#7AC5CD")) #Color of squares and lines ############################ LASSO Cox ####################################### library("glmnet") library("survival") ### Read data data<-read.table("LASSO.txt",header = T,row.names = 1,sep = "\t") ### LASSO Cox regression model was constructed #set.seed(6531) x=as.matrix(data[,c(3:ncol(data))]) y=data.matrix(Surv(data$time,data$status)) fit=glmnet(x, y, family = "cox", maxit = 10000) plot(fit, xvar = "lambda", label = F) cvfit = cv.glmnet(x, y, family="cox", maxit = 10000) plot(cvfit) abline(v = log(c(cvfit$lambda.min,cvfit$lambda.1se)),lty="dashed") ### Then, output the correlation coefficient and riskScore of prediction model ### correlation coefficient coef = coef(fit, s = cvfit$lambda.min) index = which(coef != 0) actCoef = coef[index] lassoGene = row.names(coef)[index] geneCoef = cbind(Gene=lassoGene,Coef=actCoef) geneCoef # View the correlation coefficient of the model ### Calculate riskScore FinalGeneExp = data[,lassoGene] myFun = function(x){crossprod(as.numeric(x),actCoef)} riskScore = apply(FinalGeneExp,1,myFun) outCol = c("time", "status", lassoGene) risk = as.vector(ifelse(riskScore > median(riskScore), "high", "low")) dat = cbind(data[,outCol], riskScore=as.vector(riskScore), risk) write.csv(dat,"LASSO Cox result.csv") ########################### Survival of patients in high and low risk score of training group################## data<-read.table("LASSO Cox result.csv",header = T,row.names = 1,sep = ",") library(survival) library(survminer) library(ggplot2) fit<-survfit(Surv(time,status) ~ risk,data = data) p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #add median survival curve palette = c("#ED0000","#00468B"), #change the color of the line legend.labs=c("High Score","Low Score"), #label legend.title="riskScore", #title="Overall suvival", #title xlab="Time(Years)",ylab="Cumulative Survival(percentage)", break.x.by=2, #abscissa interval censor.shape=124,censor.size=2, #shape and size of deleted points risk.table = T,tables.height = 0.3, tables.theme = theme_cleantable(), ggtheme = theme_bw(),xlim = c(0, 16), conf.int = T,#add confidence interval pval = T,pval.coord=c(0,0.20)# add P value and adjust P value position ) p3 ######################### Risk factors of training group ######################## library(ggplot2) library(pheatmap) library(gridExtra) library(ggplotify) #install.packages("ggplotify") data<-read.csv("risk factors.txt",header = T,sep = "\t",row.names = 1) #### Figure 1 p1=ggplot(data = data,aes(x=ID,y=riskScore))+geom_point(aes(color=risk))+ scale_colour_manual(values = c("#E20B0B","#0B45A5"))+ theme_bw()+labs(x="",y="Risk score")+ geom_hline(yintercept=median(data$riskScore),colour="black", linetype="dotted",size=0.8)+ geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8) p1 #### Figure 2 p2=ggplot(data,aes(x=ID,y=time))+geom_point(aes(col=status))+theme_bw()+ scale_colour_manual(values = c("#0B45A5","#E20B0B"))+ labs(x="",y="Survival time(year)")+ geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8) p2 ##### Figure 3 exp_dat<-data.frame(AKAP7=data$AKAP7,ARHGAP29=data$ARHGAP29,EFEMP1=data$EFEMP1,EPN2=data$EPN2, LAMA2=data$LAMA2,RPS6KA1=data$RPS6KA1,SLC1A6=data$SLC1A6,STIM2=data$STIM2, SULT1C2=data$SULT1C2,TMEM109=data$TMEM109,TRABD=data$TRABD,ZNRD1=data$ZNRD1) mycolors <- colorRampPalette(c("#08147B", "white", "#CA2F33"), bias = 1.2)(100) tmp=data.frame((scale(exp_dat))) rownames(tmp)<-rownames(data) tmp[tmp > 2] = 2 tmp[tmp < -2] = -2 tmp2<-t(tmp) annotation_col = data.frame(rsikScore = factor(c(rep("Low",182),rep("High",181)))) rownames(annotation_col)=colnames(tmp2) ann_colors=list(rsikScore=c(Low="#0B45A5",High="#E20B0B")) p3=pheatmap(tmp2,col= mycolors,show_colnames = F,cluster_cols = F,annotation_col = annotation_col,annotation_colors = ann_colors) #### Jigsaw to realize linkage of three pictures plots = list(p1,p2,as.ggplot(as.grob(p3))) lay1 = rbind(c(rep(1,7)),c(rep(2,7)),c(rep(3,7))) #Layout matrix grid.arrange(grobs = plots, layout_matrix = lay1, heigths = c(2,2,2),weights=c(10,10,10)) ############################### Training group time dependent ROC ############################# rt<-read.csv("time dependent ROC.txt",header = T,row.names = 1,sep = "\t") library(timeROC) library(survival) #install.packages("timeROC") ROC_rt<-timeROC(T=rt$time,delta=rt$status, marker=rt$riskScore,cause=1, weighting="marginal", times=c(1,3,5),ROC=TRUE) ROC_rt plot(ROC_rt,time=1,title=FALSE,lwd=2,col = "#DAAC36") plot(ROC_rt,time=3,col="#3397CE",add=TRUE,title=FALSE,lwd=2) plot(ROC_rt,time=5,col="#EE2A29",add=TRUE,title=FALSE,lwd=2) legend("bottomright", c(paste0("AUC at 1 years: ",round(ROC_rt$AUC[1],3)), paste0("AUC at 3 years: ",round(ROC_rt$AUC[2],3)), paste0("AUC at 5 years: ",round(ROC_rt$AUC[3],3))), col=c("#DAAC36","#3397CE","#EE2A29"),lwd=2,bty = "n") ########################### Survival of patients in high and low risk score of validation group 1 ################## data<-read.table("LASSO Cox result.csv",header = T,row.names = 1,sep = ",") library(survival) library(survminer) library(ggplot2) fit<-survfit(Surv(time,status) ~ risk,data = data) p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #add median survival curve palette = c("#ED0000","#00468B"), #change the color of the line legend.labs=c("High Score","Low Score"), #label legend.title="riskScore", #title="Overall suvival", #title xlab="Time(Years)",ylab="Cumulative Survival(percentage)", break.x.by=1, #Abscissa interval censor.shape=124,censor.size=2, #Shape and size of deleted points risk.table = T,tables.height = 0.3, tables.theme = theme_cleantable(), ggtheme = theme_bw(),xlim = c(0, 8), conf.int = T,#Add confidence interval pval = T,pval.coord=c(0,0.20)# Add P value and adjust P value position ) p3 ############################### Validation group 1 time dependent ROC ############################# rt<-read.csv("time dependent ROC.txt",header = T,row.names = 1,sep = "\t") library(timeROC) library(survival) #install.packages("timeROC") ROC_rt<-timeROC(T=rt$time,delta=rt$status, marker=rt$riskScore,cause=1, weighting="marginal", times=c(1,3,5),ROC=TRUE) ROC_rt plot(ROC_rt,time=1,title=FALSE,lwd=2,col = "#DAAC36") plot(ROC_rt,time=3,col="#3397CE",add=TRUE,title=FALSE,lwd=2) plot(ROC_rt,time=5,col="#EE2A29",add=TRUE,title=FALSE,lwd=2) legend("bottomright", c(paste0("AUC at 1 years: ",round(ROC_rt$AUC[1],3)), paste0("AUC at 3 years: ",round(ROC_rt$AUC[2],3)), paste0("AUC at 5 years: ",round(ROC_rt$AUC[3],3))), col=c("#DAAC36","#3397CE","#EE2A29"),lwd=2,bty = "n") ######################### Risk factors of validation group 1 ######################## library(ggplot2) library(pheatmap) library(gridExtra) library(ggplotify) #install.packages("ggplotify") data<-read.csv("risk factors.txt",header = T,sep = "\t",row.names = 1) #### Figure 1 p1=ggplot(data = data,aes(x=ID,y=riskScore))+geom_point(aes(color=risk))+ scale_colour_manual(values = c("#E20B0B","#0B45A5"))+ theme_bw()+labs(x="",y="Risk score")+ geom_hline(yintercept=median(data$riskScore),colour="black", linetype="dotted",size=0.8)+ geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8) p1 #### Figure 2 p2=ggplot(data,aes(x=ID,y=time))+geom_point(aes(col=status))+theme_bw()+ scale_colour_manual(values = c("#0B45A5","#E20B0B"))+ labs(x="",y="Survival time(year)")+ geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8) p2 ##### Figure 3 exp_dat<-data.frame(AKAP7=data$AKAP7,ARHGAP29=data$ARHGAP29,EFEMP1=data$EFEMP1,EPN2=data$EPN2, LAMA2=data$LAMA2,RPS6KA1=data$RPS6KA1,SLC1A6=data$SLC1A6,STIM2=data$STIM2, SULT1C2=data$SULT1C2,TMEM109=data$TMEM109,TRABD=data$TRABD,ZNRD1=data$ZNRD1) mycolors <- colorRampPalette(c("#08147B", "white", "#CA2F33"), bias = 1.2)(100) tmp=data.frame((scale(exp_dat))) rownames(tmp)<-rownames(data) tmp[tmp > 2] = 2 tmp[tmp < -2] = -2 tmp2<-t(tmp) annotation_col = data.frame(rsikScore = factor(c(rep("Low",19),rep("High",19)))) rownames(annotation_col)=colnames(tmp2) ann_colors=list(rsikScore=c(Low="#0B45A5",High="#E20B0B")) p3=pheatmap(tmp2,col= mycolors,show_colnames = F,cluster_cols = F,annotation_col = annotation_col,annotation_colors = ann_colors) #### Jigsaw to realize linkage of three pictures plots = list(p1,p2,as.ggplot(as.grob(p3))) lay1 = rbind(c(rep(1,7)),c(rep(2,7)),c(rep(3,7))) #Layout matrix grid.arrange(grobs = plots, layout_matrix = lay1, heigths = c(2,2,2),weights=c(10,10,10)) ########################### Survival of patients in high and low risk score of validation group 2 ################## data<-read.table("LASSO Cox result.csv",header = T,row.names = 1,sep = ",") library(survival) library(survminer) library(ggplot2) fit<-survfit(Surv(time,status) ~ risk,data = data) p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #Add median survival curve palette = c("#ED0000","#00468B"), #Change the color of the line legend.labs=c("High Score","Low Score"), #label legend.title="riskScore", #title="Overall suvival", #title xlab="Time(Years)",ylab="Cumulative Survival(percentage)", break.x.by=1, #Abscissa interval censor.shape=124,censor.size=2, #Shape and size of deleted points risk.table = T,tables.height = 0.3, tables.theme = theme_cleantable(), ggtheme = theme_bw(),xlim = c(0, 12), conf.int = T,#Add confidence interval pval = T,pval.coord=c(0,0.20)# Add P value and adjust P value position ) p3 ############################### Validation group 2 time dependent ROC ############################# rt<-read.csv("时间依赖ROC.txt",header = T,row.names = 1,sep = "\t") library(timeROC) library(survival) #install.packages("timeROC") ROC_rt<-timeROC(T=rt$time,delta=rt$status, marker=rt$riskScore,cause=1, weighting="marginal", times=c(1,3,5),ROC=TRUE) ROC_rt plot(ROC_rt,time=1,title=FALSE,lwd=2,col = "#DAAC36") plot(ROC_rt,time=3,col="#3397CE",add=TRUE,title=FALSE,lwd=2) plot(ROC_rt,time=5,col="#EE2A29",add=TRUE,title=FALSE,lwd=2) legend("bottomright", c(paste0("AUC at 1 years: ",round(ROC_rt$AUC[1],3)), paste0("AUC at 3 years: ",round(ROC_rt$AUC[2],3)), paste0("AUC at 5 years: ",round(ROC_rt$AUC[3],3))), col=c("#DAAC36","#3397CE","#EE2A29"),lwd=2,bty = "n") ######################### Risk factors of validation group 2 ######################## library(ggplot2) library(pheatmap) library(gridExtra) library(ggplotify) #install.packages("ggplotify") data<-read.csv("Risk factors.txt",header = T,sep = "\t",row.names = 1) #### Figure 1 p1=ggplot(data = data,aes(x=ID,y=riskScore))+geom_point(aes(color=risk))+ scale_colour_manual(values = c("#E20B0B","#0B45A5"))+ theme_bw()+labs(x="",y="Risk score")+ geom_hline(yintercept=median(data$riskScore),colour="black", linetype="dotted",size=0.8)+ geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8) p1 #### Figure 2 p2=ggplot(data,aes(x=ID,y=time))+geom_point(aes(col=status))+theme_bw()+ scale_colour_manual(values = c("#0B45A5","#E20B0B"))+ labs(x="",y="Survival time(year)")+ geom_vline(xintercept=sum(data$risk=="low"),colour="black", linetype="dotted",size=0.8) p2 ##### Figure 3 exp_dat<-data.frame(AKAP7=data$AKAP7,ARHGAP29=data$ARHGAP29,EFEMP1=data$EFEMP1,EPN2=data$EPN2, LAMA2=data$LAMA2,RPS6KA1=data$RPS6KA1,SLC1A6=data$SLC1A6,STIM2=data$STIM2, SULT1C2=data$SULT1C2,TMEM109=data$TMEM109,TRABD=data$TRABD,ZNRD1=data$ZNRD1) mycolors <- colorRampPalette(c("#08147B", "white", "#CA2F33"), bias = 1.2)(100) tmp=data.frame((scale(exp_dat))) rownames(tmp)<-rownames(data) tmp[tmp > 2] = 2 tmp[tmp < -2] = -2 tmp2<-t(tmp) annotation_col = data.frame(rsikScore = factor(c(rep("Low",82),rep("High",82)))) rownames(annotation_col)=colnames(tmp2) ann_colors=list(rsikScore=c(Low="#0B45A5",High="#E20B0B")) p3=pheatmap(tmp2,col= mycolors,show_colnames = F,cluster_cols = F,annotation_col = annotation_col,annotation_colors = ann_colors) #### Jigsaw to realize linkage of three pictures plots = list(p1,p2,as.ggplot(as.grob(p3))) lay1 = rbind(c(rep(1,7)),c(rep(2,7)),c(rep(3,7))) #Layout matrix grid.arrange(grobs = plots, layout_matrix = lay1, heigths = c(2,2,2),weights=c(10,10,10)) ########################## Survival curve of signature genes #################### library(survival) library(survminer) data<-read.table("Signature gene survival analysis.txt",header = T,sep = "\t",row.names = 1) for (gene in colnames(data)[3:ncol(data)]) { group<-ifelse(data[[gene]]>median(data[[gene]]), "high","low") diff=survdiff(Surv(time,status) ~ group , data = data) pValue=1-pchisq(diff$chisq,df=1) if(pValue<0.001){ pValue="p<0.001" }else{ pValue=paste0("p=",sprintf("%.03f",pValue)) } fit <- survfit(Surv(time, status) ~ group, data = data) surPlot<-ggsurvplot(fit, #surv.median.line = "hv", #Add median survival curve data=data, conf.int=TRUE, pval=pValue, pval.size=5, legend.labs=c("High","Low"), legend.title=gene, xlab="Time (years)", ylab="overall survival", break.time.by = 2, #tables.theme = theme_cleantable(), #ggtheme = theme_bw(), risk.table.title="", palette = c("#d7191c","#2b83ba"), risk.table = F,risk.table.height=.35) pdf(file = paste0( gene,".pdf"),onefile = F,width = 6,height = 5.5) print(surPlot) dev.off() } ############################## Boxplots of signature genes in cancer and adjacent cancer in training group ################# library(ggpubr) library(magrittr) library(ggsignif) library(tidyverse) data<-read.table("Boxplots.csv",header=T,sep=",",row.names=1,check.names = F) data2 <- data %>% #add_column(group = rep(c("Control","BPD"), c(14,17)), .before = 1) %>% gather("gene","expression", -group) %>% mutate(gene = factor(gene, levels = colnames(data))) %>% mutate(group = factor(group, levels = c("Control","BLCA"))) ggboxplot(data2, "gene", "expression", fill = "group", xlab="",ylab="Expression", palette = c('#2F5688', 'red2')) + stat_compare_means(aes(group=group),label = "p.signif",method = "wilcox.test")+ theme( plot.title = element_text(color = "black", size = 10, hjust = 0.5), plot.subtitle = element_text(color = "black", size = 10,hjust = 0.5), plot.caption = element_text(color = "black", size = 10,face = "italic", hjust = 1), axis.text.x = element_text(color = "black", size =10, angle = 45,vjust = 0.6), axis.text.y = element_text(color = "black", size = 10, angle = 0), legend.title = element_text(color = "black", size = 11), legend.text = element_text(color = "black", size = 11), axis.line.y = element_line(color = "black", linetype = "solid"), axis.line.x = element_line (color = "black",linetype = "solid"), panel.border = element_rect(linetype = "solid", size = 1,fill = NA) ) ############################## Boxplots of differential expression of signature genes in N0 and N1-N3 groups in training group ################# library(ggpubr) library(magrittr) library(ggsignif) library(tidyverse) data<-read.table("Boxplots.csv",header=T,sep=",",row.names=1,check.names = F) data2 <- data %>% #add_column(group = rep(c("Control","BPD"), c(14,17)), .before = 1) %>% gather("gene","expression", -group) %>% mutate(gene = factor(gene, levels = colnames(data))) %>% mutate(group = factor(group, levels = c("N0","N1-N3"))) ggboxplot(data2, "gene", "expression", fill = "group", xlab="",ylab="Expression", palette = c('#2F5688', 'red2')) + stat_compare_means(aes(group=group),label = "p.signif",method = "wilcox.test")+ theme( plot.title = element_text(color = "black", size = 10, hjust = 0.5), plot.subtitle = element_text(color = "black", size = 10,hjust = 0.5), plot.caption = element_text(color = "black", size = 10,face = "italic", hjust = 1), axis.text.x = element_text(color = "black", size =10, angle = 45,vjust = 0.6), axis.text.y = element_text(color = "black", size = 10, angle = 0), legend.title = element_text(color = "black", size = 11), legend.text = element_text(color = "black", size = 11), axis.line.y = element_line(color = "black", linetype = "solid"), axis.line.x = element_line (color = "black",linetype = "solid"), panel.border = element_rect(linetype = "solid", size = 1,fill = NA) ) ############################## Boxplots of signature genes in the validation group ################# library(ggpubr) library(magrittr) library(ggsignif) library(tidyverse) data<-read.table("Boxplots1.csv",header=T,sep=",",row.names=1,check.names = F) data2 <- data %>% #add_column(group = rep(c("Control","BPD"), c(14,17)), .before = 1) %>% gather("gene","expression", -group) %>% mutate(gene = factor(gene, levels = colnames(data))) %>% mutate(group = factor(group, levels = c("Control","BLCA"))) ggboxplot(data2, "gene", "expression", fill = "group", xlab="",ylab="Expression", palette = c('#2F5688', 'red2')) + stat_compare_means(aes(group=group),label = "p.signif",method = "wilcox.test")+ theme( plot.title = element_text(color = "black", size = 10, hjust = 0.5), plot.subtitle = element_text(color = "black", size = 10,hjust = 0.5), plot.caption = element_text(color = "black", size = 10,face = "italic", hjust = 1), axis.text.x = element_text(color = "black", size =10, angle = 45,vjust = 0.6), axis.text.y = element_text(color = "black", size = 10, angle = 0), legend.title = element_text(color = "black", size = 11), legend.text = element_text(color = "black", size = 11), axis.line.y = element_line(color = "black", linetype = "solid"), axis.line.x = element_line (color = "black",linetype = "solid"), panel.border = element_rect(linetype = "solid", size = 1,fill = NA) ) ############################## Boxplots of differential expression of signature genes in N0 and N1-N3 groups in validation group ################# library(ggpubr) library(magrittr) library(ggsignif) library(tidyverse) data<-read.table("Boxplots 2.csv",header=T,sep=",",row.names=1,check.names = F) data2 <- data %>% #add_column(group = rep(c("Control","BPD"), c(14,17)), .before = 1) %>% gather("gene","expression", -group) %>% mutate(gene = factor(gene, levels = colnames(data))) %>% mutate(group = factor(group, levels = c("N0","N1-N3"))) ggboxplot(data2, "gene", "expression", fill = "group", xlab="",ylab="Expression", palette = c('#2F5688', 'red2')) + stat_compare_means(aes(group=group),label = "p.signif",method = "t.test")+ theme( plot.title = element_text(color = "black", size = 10, hjust = 0.5), plot.subtitle = element_text(color = "black", size = 10,hjust = 0.5), plot.caption = element_text(color = "black", size = 10,face = "italic", hjust = 1), axis.text.x = element_text(color = "black", size =10, angle = 45,vjust = 0.6), axis.text.y = element_text(color = "black", size = 10, angle = 0), legend.title = element_text(color = "black", size = 11), legend.text = element_text(color = "black", size = 11), axis.line.y = element_line(color = "black", linetype = "solid"), axis.line.x = element_line (color = "black",linetype = "solid"), panel.border = element_rect(linetype = "solid", size = 1,fill = NA) ) ####################### Waterfall diagram of signature gene mutation in training group ###################################### library(maftools) #clinicaldata <- read.table("TCGA_LUAD_Clinical.csv",sep = ",",header = T) maf<-read.maf(maf="TCGA.BLCA.maf") vc_cols = RColorBrewer::brewer.pal(n = 8, name = 'Paired') names(vc_cols) = c( 'Frame_Shift_Del', 'Missense_Mutation', 'Nonsense_Mutation', 'Multi_Hit', 'Frame_Shift_Ins', 'In_Frame_Ins', 'Splice_Site', 'In_Frame_Del' ) genes = c( "AKAP7", "ARHGAP29", "EFEMP1", "EPN2", "LAMA2", "RPS6KA1", "SLC1A6", "STIM2", "SULT1C2", "TMEM109", "TRABD", "ZNRD1" ) oncoplot(maf = maf, fontSize =0.62,draw_titv = T,legendFontSize=1.2, annotationFontSize=1.2,#clinicalFeatures = c('Sex',"Stage","Survival"), colors = vc_cols,genes = genes,drawColBar=T,logColBar=T) ####################### Correlation analysis of transcription factors and signature genes in training group ###################### mRNA<-read.table("transcription factors.txt",sep="\t",header=T,row.names=1) lncRNA<-read.table("signature genes.txt",sep="\t",header=T,row.names=1) pcor_lnc<-matrix(data = NA, nrow = nrow(mRNA), ncol = nrow(lncRNA)) rownames(pcor_lnc)<-rownames(mRNA) colnames(pcor_lnc)<-rownames(lncRNA) pval_lnc<-matrix(data = NA, nrow = nrow(mRNA), ncol = nrow(lncRNA)) rownames(pval_lnc)<-rownames(mRNA) colnames(pval_lnc)<-rownames(lncRNA) for (i in 1:nrow(mRNA)) { x<-t(mRNA[i,]) x[which(!is.finite(x))]<-NA for (j in 1:nrow(lncRNA)) { y<-t(lncRNA[j,]) y[which(!is.finite(y))]<-NA if(length(which(!is.na(x+y)))>5 ) { corres<-cor.test(x,y) pcor_lnc[i,j]<-corres$estimate pval_lnc[i,j]<-corres$p.value } } } ind<-which( abs(pcor_lnc) > 0.3 & pval_lnc < 0.05, arr.ind = TRUE) length(ind) mRNAsig<-rownames(mRNA)[ind[,1]] lncRNAsig<-rownames(lncRNA)[ind[,2]] corsig<-pcor_lnc[ind] pvalsig<-pval_lnc[ind] lncp<-cbind(mRNAsig,lncRNAsig,corsig,pvalsig) write.table(lncp,file="correlation-0.3.txt",sep="\t",row.names=FALSE) ######################## Mulberry diagram ##################### library(ggplot2) library(ggalluvial) library(RColorBrewer) miRNA_mRNA<-read.table("relationship.txt",sep = "\t",header = T) miRNA_mRNA$Freq=1#Define the ordinate, which is generally 1 by default miRNA_mRNA_long<- to_lodes_form(miRNA_mRNA, axes = 1:2,#Numbering miRNA and mRNA respectively id = "Cohort") #Change to long data for drawing ggplot(miRNA_mRNA_long, aes(x =factor(x,level = c("Drug","Gene")),y=Freq,stratum = stratum, alluvium = Cohort,fill = stratum, label =stratum)) + geom_flow( width = 1/3)+#Draw flow chart geom_stratum( width = 1/3,linetype=1,size=0.5,alpha =0.9,color = "black") +#Draw impact diagram geom_text(stat ="stratum" , size =3) +#Add First Name scale_x_discrete(limits = c() )+#Remove abscissa theme_bw()+#Define theme theme(legend.position = "none", axis.title = element_blank(), axis.text.y= element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank())+#Remove boundary line scale_fill_manual(values = colorRampPalette(brewer.pal(8, "Accent"))(10))#define color ######################### In validation group 1, riskscore was judged as an independent prognostic factor ######################### library("survival") library("survminer") options(digits=2) data<-read.table("Independent factor proof.txt",header = T,row.names = 1,sep = "\t") ### Univariate model <- coxph(Surv(time, status) ~., data = data ) ggforest(model,main="hazard ratio",cpositions=c(0.02,0.1,0.35), fontsize=0.8,refLabel="reference",noDigits=2,data = data) ### multivariate res.cox <- coxph(Surv(time, status) ~ risk+Age, data = data) summary(res.cox) ggforest(res.cox,main="hazard ratio",cpositions=c(0.02,0.22,0.4), fontsize=0.8,refLabel="reference",noDigits=2,data = data) #########################In validation group 2, riskscore was judged as an independent prognostic factor ######################### library("survival") library("survminer") options(digits=2) data<-read.table("Independent factor proof.txt",header = T,row.names = 1,sep = "\t") ### Univariate model <- coxph(Surv(time, status) ~., data = data ) ggforest(model,main="hazard ratio",cpositions=c(0.02,0.1,0.35), fontsize=0.8,refLabel="reference",noDigits=2,data = data) ### multivariate res.cox <- coxph(Surv(time, status) ~ risk+age+N, data = data) summary(res.cox) ggforest(res.cox,main="hazard ratio",cpositions=c(0.02,0.22,0.4), fontsize=0.8,refLabel="reference",noDigits=2,data = data) ######################### In training group 2, riskscore was judged as an independent prognostic factor ######################### library("survival") library("survminer") options(digits=2) data<-read.table("Independent factor proof.txt",header = T,row.names = 1,sep = "\t") ### Univariate for(i in colnames(data[,3:ncol(data)])){ cox <- coxph(Surv(time, status) ~ data[,i], data = data) summary(cox) } ggforest(cox,main="hazard ratio",cpositions=c(0.02,0.1,0.35), fontsize=0.8,refLabel="reference",noDigits=2,data = data) ### multivariate res.cox <- coxph(Surv(time, status) ~ risk+age+stage+gender+pN+pT , data = data) summary(res.cox) ggforest(res.cox,main="hazard ratio",cpositions=c(0.02,0.22,0.4), fontsize=0.8,refLabel="reference",noDigits=2,data = data) ######################## Nomograph ############################ #install.packages("regplot") library(regplot) library(rms) library(survival) data <- read.table("Independent factor proof.txt",header=TRUE,sep = "\t") dd=datadist(data) options(datadist="dd") mod <- cph(formula = as.formula(paste("Surv(time, status) ~ ",paste(colnames(data)[4:6],collapse = "+"))), data=data,x=T,y=T,surv = T) regplot(mod,points = T,failtime = c(365,1095,1825)) ######################## Calibration curve ############################ library(regplot) library(rms) library(survival) data <- read.table("Independent factor proof.txt",header=TRUE,sep = "\t") f1 <- cph(formula = as.formula(paste("Surv(time, status) ~ ",paste(colnames(data)[4:6],collapse = "+"))), data=data,x=T,y=T,surv = T, time.inc=365) cal1 <- calibrate(f1, cmethod="KM", method="boot", u=365, m=50, B=1000) f3 <- cph(formula = as.formula(paste("Surv(time, status) ~ ",paste(colnames(data)[4:6],collapse = "+"))), data=data,x=T,y=T,surv = T, time.inc=1095) cal3 <- calibrate(f3, cmethod="KM", method="boot", u=1095, m=50, B=1000) f5 <- cph(formula = as.formula(paste("Surv(time, status) ~ ",paste(colnames(data)[4:6],collapse = "+"))), data=data,x=T,y=T,surv = T, time.inc=1825) cal5 <- calibrate(f5, cmethod="KM", method="boot", u=1825, m=50, B=1000) plot(cal1,lwd = 2,lty = 0,errbar.col = c("#5399C8"), xlim = c(0,1),ylim= c(0,1), #bty = "l", #Draw only the left and bottom borders xlab = "Nomogram-prediced OS (%)",ylab = "Observed OS (%)", col = c("#5399C8"), cex.lab=1.2,cex.axis=1, cex.main=1.2, cex.sub=0.6) lines(cal1[,c('mean.predicted',"KM")], type = 'b', lwd = 1, col = c("#5399C8"), pch = 16) mtext("") plot(cal3,lwd = 2,lty = 0,errbar.col = c("#E43022"), xlim = c(0,1),ylim= c(0,1), xlab = "Nomogram-prediced OS (%)",ylab = "Observed OS (%)", col = c("#E43022"),add = T) lines(cal3[,c('mean.predicted',"KM")], type = 'b', lwd = 1, col = c("#E43022"), pch = 16) mtext("") plot(cal5,lwd = 2,lty = 0,errbar.col = c("#E0AD16"), xlim = c(0,1),ylim= c(0,1), xlab = "Nomogram-prediced OS (%)",ylab = "Observed OS (%)", col = c("#E0AD16"),add = T) lines(cal5[,c('mean.predicted',"KM")], type = 'b', lwd = 1, col = c("#E0AD16"), pch = 16) abline(0,1, lwd = 2, lty = 3, col = c("#224444")) legend("topleft", #Location of legend legend = c("1-year","3-year","5-year"), #Legend text col =c("#5399C8","#E43022","#E0AD16"), #Legend line color, corresponding to text lwd = 2,#Thickness of legend centerline cex = 1.2,#Legend font size bty = "n")#Do not display legend border ######################## Difference of risk score in different clinical subgroups ############################ library(ggpubr) library(magrittr) library(ggsignif) library(export) data<-read.table("Comparison of different clinical subgroups.txt",sep = "\t",header = T) data$group<-as.factor(data$group) ggboxplot(data, x = "group", y ="riskScore",fill = "group",add="jitter", add.params = list(color="group",alpha=0.1), xlab="",ylab="Risk Score", order=c(">=65","<65", "Alive","Dead", "stage i/ii","stage iii/iv", "N0","N1-N3", "T1-T2","T3-T4", "male","female"), palette=c("#BFBCD1","#BFBCD1", "#FECDE7","#FECDE7", "#009ACD","#009ACD", "#CD5555","#CD5555", "#CCEAC4","#CCEAC4", "#D1C45B","#D1C45B"))+theme(legend.position ="none")+ theme(axis.text.x = element_text(color = "black", size = 10, angle = 0),#X-axis label size axis.text.y = element_text(color = "black", size = 13, angle = 0),#Number size of y-axis axis.title.y = element_text(color = "black", size = 11, angle = 90))#y-axis label size graph2ppt() ################################# SsGSEA immune infiltration########################### library(GSVA) immunecell<-read.csv("immune cell.txt", stringsAsFactors = FALSE, check.names = FALSE,sep = "\t") #Select two lines of specific genes and corresponding immune cells mark<-immunecell[,1:2] list<- split(as.matrix(mark)[,1], mark[,2]) data<-read.table("363-BLCA 182-low 181-high.txt", stringsAsFactors = FALSE, header = TRUE, row.names = 1, sep = "\t") data <- as.matrix(data) ssGSEA_Score = as.data.frame(gsva(data, list, method = "ssgsea")) write.csv(ssGSEA_Score,file="ssGSEA result.csv") ############################## Boxplots of immune cell infiltration in high and low risk score groups ################# library(ggpubr) library(magrittr) library(ggsignif) library(tidyverse) data<-read.table("ssGSEA result.csv",header=T,sep=",",row.names=1,check.names = F) data2 <- data %>% add_column(group = rep(c("Low","High"), c(182,181)), .before = 1) %>% gather("gene","expression", -group) %>% mutate(gene = factor(gene, levels = colnames(data))) %>% mutate(group = factor(group, levels = c("Low","High"))) ggboxplot(data2, "gene", "expression", fill = "group", xlab="",ylab="Immune Infiltration", palette = c("#5399C8", "#E43022")) + stat_compare_means(aes(group=group),label = "p.signif",method = "wilcox.test")+ theme( plot.title = element_text(color = "black", size = 10, hjust = 0.5), plot.subtitle = element_text(color = "black", size = 10,hjust = 0.5), plot.caption = element_text(color = "black", size = 10,face = "italic", hjust = 1), axis.text.x = element_text(color = "black", size =7, angle = 45,vjust = 0.6), axis.text.y = element_text(color = "black", size = 10, angle = 0), legend.title = element_text(color = "black", size = 11), legend.text = element_text(color = "black", size = 11), axis.line.y = element_line(color = "black", linetype = "solid"), axis.line.x = element_line (color = "black",linetype = "solid"), panel.border = element_rect(linetype = "solid", size = 0.8,fill = NA) ) ############################## Comparison of immune cell infiltration in N0 and N1-N3 groups ################# library(ggpubr) library(magrittr) library(ggsignif) library(tidyverse) data<-read.table("ssGSEA结果.csv",header=T,sep=",",row.names=1,check.names = F) data2 <- data %>% add_column(group = rep(c("N0","N1-3"), c(236,127)), .before = 1) %>% gather("gene","expression", -group) %>% mutate(gene = factor(gene, levels = colnames(data))) %>% mutate(group = factor(group, levels = c("N0","N1-3"))) ggboxplot(data2, "gene", "expression", fill = "group", xlab="",ylab="Immune Infiltration", palette = c("#5399C8", "#E43022")) + stat_compare_means(aes(group=group),label = "p.signif",method = "wilcox.test")+ theme( plot.title = element_text(color = "black", size = 10, hjust = 0.5), plot.subtitle = element_text(color = "black", size = 10,hjust = 0.5), plot.caption = element_text(color = "black", size = 10,face = "italic", hjust = 1), axis.text.x = element_text(color = "black", size =7, angle = 45,vjust = 0.6), axis.text.y = element_text(color = "black", size = 10, angle = 0), legend.title = element_text(color = "black", size = 11), legend.text = element_text(color = "black", size = 11), axis.line.y = element_line(color = "black", linetype = "solid"), axis.line.x = element_line (color = "black",linetype = "solid"), panel.border = element_rect(linetype = "solid", size = 0.8,fill = NA) ) ################################# ssGSEA EMT result ################################# library(GSVA) immunecell<-read.csv("EMT.txt", stringsAsFactors = FALSE, check.names = FALSE,sep = "\t") #Select two lines of specific genes and corresponding immune cells mark<-immunecell[,1:2] list<- split(as.matrix(mark)[,1], mark[,2]) data<-read.table("363-BLCA 182-low 181-high.txt", stringsAsFactors = FALSE, header = TRUE, row.names = 1, sep = "\t") data <- as.matrix(data) ssGSEA_Score = as.data.frame(gsva(data, list, method = "ssgsea")) write.csv(ssGSEA_Score,file="EMT result.csv") ############################## EMT difference boxplots in high and low risk score group ################# library(ggpubr) library(magrittr) library(ggsignif) library(tidyverse) data<-read.table("EMT result transpose.csv",header=T,sep=",",row.names=1,check.names = F) data2 <- data %>% add_column(group = rep(c("Low","High"), c(182,181)), .before = 1) %>% gather("gene","expression", -group) %>% mutate(gene = factor(gene, levels = colnames(data))) %>% mutate(group = factor(group, levels = c("Low","High"))) ggboxplot(data2, "gene", "expression", fill = "group", xlab="",ylab="Enrichment Score", #palette = c("#5399C8", "#E43022") ) + stat_compare_means(aes(group=group),label = "p.signif")+ theme( plot.title = element_text(color = "black", size = 10, hjust = 0.5), plot.subtitle = element_text(color = "black", size = 10,hjust = 0.5), plot.caption = element_text(color = "black", size = 10,face = "italic", hjust = 1), axis.text.x = element_text(color = "black", size =10, angle = 45,vjust = 0.6), axis.text.y = element_text(color = "black", size = 10, angle = 0), legend.title = element_text(color = "black", size = 11), legend.text = element_text(color = "black", size = 11), axis.line.y = element_line(color = "black", linetype = "solid"), axis.line.x = element_line (color = "black",linetype = "solid"), panel.border = element_rect(linetype = "solid", size = 0.5,fill = NA) ) ######################## ESTIMATE algorithm evaluation stroma/immune score ######################### library(estimate) file_dir="363-BLCA 182-low 181-high.txt" filterCommonGenes(input.f = file_dir,output.f = "gene.gct",id="GeneSymbol") estimateScore("gene.gct","estimat_score.gct",platform = "affymetrix") score_table=read.table("estimat_score.gct",skip = 2,header = 1) rownames(score_table)=score_table[,1] score=t(score_table) colnames(score)=score[1,] score_matrix=score[-1,] write.csv(score_matrix,"ESTIMATE result.csv") ####################### Boxplots of immune/stroma score and tumor purity ############### library(ggpubr) data<-read.csv("ESTIMATE结果.csv",header = T,row.names = 1,sep = ",") p <-ggboxplot(data, x = "group", y = "ImmuneScore",fill="group", order = c("Low","High"),xlab = "", palette =c("#7AA6DC", "#CD534C"),notch = F)+ stat_compare_means(aes(group=group)) p+theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) p <-ggboxplot(data, x = "group", y = "ESTIMATEScore",fill="group", order = c("Low","High"),xlab = "", palette =c("#E69F00", "#56B4E9"),notch = F)+ stat_compare_means(aes(group=group)) p+theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) p <-ggboxplot(data, x = "group", y = "TumorPurity",fill="group", order = c("Low","High"),xlab = "", palette =c("#288080", "#C03463"),notch = F)+ stat_compare_means(aes(group=group)) p+theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) p <-ggboxplot(data, x = "group", y = "StromalScore",fill="group", order = c("Low","High"),xlab = "", palette =c("#8D0E39", "#679D39"),notch = F)+ stat_compare_means(aes(group=group)) p+theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) ############################Heat map of correlation between immune cells and signature genes and risk score############### feature_1<-read.table("signature genes.txt",sep="\t",header=T,row.names=1) feature_2<-read.table("immune cells.txt",sep="\t",header=T,row.names=1) dir.create("Result") # Create the result directory pcor_lnc<-matrix(data = NA, nrow = nrow(feature_1), ncol = nrow(feature_2)) rownames(pcor_lnc)<-rownames(feature_1) colnames(pcor_lnc)<-rownames(feature_2) pval_lnc<-matrix(data = NA, nrow = nrow(feature_1), ncol = nrow(feature_2)) rownames(pval_lnc)<-rownames(feature_1) colnames(pval_lnc)<-rownames(feature_2) for (i in 1:nrow(feature_1)) { x<-t(feature_1[i,]) x[which(!is.finite(x))]<-NA for (j in 1:nrow(feature_2)) { y<-t(feature_2[j,]) y[which(!is.finite(y))]<-NA if(length(which(!is.na(x+y)))>5 ) { corres<-cor.test(x,y) pcor_lnc[i,j]<-corres$estimate pval_lnc[i,j]<-corres$p.value } } } ind<-which( abs(pcor_lnc) > 0 & pval_lnc < 1, arr.ind = TRUE) length(ind) feature_1<-rownames(feature_1)[ind[,1]] feature_2<-rownames(feature_2)[ind[,2]] corsig<-pcor_lnc[ind] pvalsig<-pval_lnc[ind] lncp<-cbind(feature_1,feature_2,corsig,pvalsig) write.table(lncp,file="Correlation_result.txt",sep="\t",row.names=FALSE) library(reshape2) library(pheatmap) correlate_pheatmap = function(infile, route) { data=read.table(paste(route, infile, sep="/"), sep="\t", header=T) data_r=dcast(data, feature_1 ~ feature_2, value.var="corsig") data_p=dcast(data, feature_1 ~ feature_2, value.var="pvalsig") rownames(data_r)=data_r[,1] data_r=data_r[,-1] rownames(data_p)=data_p[,1] data_p=data_p[,-1] # Remove inconspicuous rows # del_row = c() # for(i in 1:length(data_p[, 1])) # { # if(all(data_p[i, ] > 0.05)) # { # del_row = c(del_row, i) # } # } # # Exclude non-significant columns # del_col = c() # for(j in 1:length(data_p[1, ])) # { # if(all(data_p[, j] > 0.05)) # { # del_col = c(del_col, j) # } # } # Null value processing # if(is.null(del_row) && !(is.null(del_col))) # { # data_p = data_p[, -del_col] # data_r = data_r[, -del_col] # }else if(is.null(del_col) && !(is.null(del_row))) # { # data_p = data_p[-del_row,] # data_r = data_r[-del_row,] # }else if(is.null(del_row) && is.null(del_col)) # { # print("delete none") # }else if(!(is.null(del_row)) && !(is.null(del_col))) # { # data_p = data_p[-del_row, -del_col] # data_r = data_r[-del_row, -del_col] # } write.csv(data_p, file=paste(route, "data_p.csv", sep="/")) write.csv(data_r, file=paste(route, "data_r.csv", sep="/")) # Use "*" to replace the p value of<=0.05, and "" to replace the relative abundance of>0.05 data_mark=data_p for(i in 1:length(data_p[,1])){ for(j in 1:length(data_p[1,])){ #data_mark[i,j]=ifelse(data_p[i,j] <= 0.05, "*", "") if(data_p[i,j] <= 0.001) { data_mark[i,j]="***" } else if(data_p[i,j] <= 0.01 && data_p[i,j] > 0.001) { data_mark[i,j]="**" } else if(data_p[i,j] <= 0.05 && data_p[i,j] > 0.01) { data_mark[i,j]="*" } else { data_mark[i,j]="" } } } write.csv(data_mark, file=paste(route, "data_mark.csv", sep="/")) pheatmap(data_r, display_numbers=data_mark, cellwidth=20, cellheight=20, fontsize_number=18, filename=paste(route, "Correlation_result.pdf", sep="/")) pheatmap(data_r, display_numbers=data_mark, cellwidth=20, cellheight=20, fontsize_number=18, filename=paste(route, "Correlation_result.png", sep="/")) } correlate_pheatmap("Correlation_result.txt","Result") #################### GSVA analysis of differential expression pathways between high and low risk score groups ################### library(GSVA) library(pheatmap) library(GSEABase) exp <- read.table("363-BLCA 182-low 181-high.txt",header=T,row.names = 1,sep = "\t") mydata= as.matrix(exp) geneSets <- getGmt("c2.cp.kegg.v7.2.symbols.gmt") res_es <- gsva(mydata, geneSets, mx.diff=TRUE,min.sz=10, max.sz=500, verbose=FALSE, parallel.sz=1) write.csv(res_es,"GSVA result.csv") #differential pathway library(limma) eset<-read.csv("GSVA result.csv",header = T,row.names = 1) group <- c(rep("low",182),rep("high",181)) group <- factor(group,levels = c("low","high"),ordered = F) design<- model.matrix(~group) colnames(design) <- levels(group) design fit <- lmFit(eset, design) fit2 <- eBayes(fit) dif<-topTable(fit2,coef=2,n=nrow(fit2)) head(dif) write.table(dif,"differential expression pathway.csv",sep = ",") ###################### Result heat map of GSVA ####################### library(pheatmap) data<-read.table("differential pathway.txt",header = T,row.names = 1,sep = "\t") data[!is.finite(data.matrix(data))]<-0 data<-t(scale(t(data))) data<-data.matrix(data) data[!is.finite(data.matrix(data))]<-0 data[data>1.2]<- 1.2 data[data< -1.2]<- -1.2 annotation<-read.table("Column comments.txt",header = T,row.names = 1,sep = "\t") annotation_col=data.frame(risk=annotation$risk, pN=annotation$pN, age=annotation$age, status=annotation$status, stage=annotation$stage) rownames(annotation_col) <- colnames(data) pheatmap(data, annotation_col = annotation_col, annotation_names_row = F, annotation_legend = T, #annotation_colors = ann_colors, annotation_names_col = T, show_colnames = F,show_rownames = TRUE, cluster_rows = T,cluster_cols = F,fontsize =5, cellwidth = 0.8,cellheight =20,border_color = "gray40", #colors <- colorRampPalette(c("#6C809B", "white", "#A83C5C"))(100) ) ###################### Differential expression analysis of high and low risk score groups ####################### library(limma) rt<-read.table("363-BLCA 182-low 181-high.txt",header = T,row.names = 1,sep = "\t") group <- c(rep("Low",182),rep("High",181)) group <- factor(group,levels = c("Low","High"),ordered = F) design<- model.matrix(~group) colnames(design) <- levels(group) design fit <- lmFit(rt, design) fit2 <- eBayes(fit) allDiff<-topTable(fit2,coef=2,n=nrow(fit2)) head(allDiff) write.table(allDiff,"Differential expressionmRNA.csv",sep = ",") logFoldChange=1 adjustP=0.05 diffSig <- allDiff[with(allDiff, (abs(logFC)>=logFoldChange & adj.P.Val < adjustP )), ] write.table(diffSig,file="diff.xls",sep="\t",quote=F) diffUp <- allDiff[with(allDiff, (logFC>=logFoldChange & adj.P.Val < adjustP )), ] write.table(diffUp,file="up.xls",sep="\t",quote=F) diffDown <- allDiff[with(allDiff, (logFC<=(-logFoldChange) & adj.P.Val < adjustP )), ] write.table(diffDown,file="down.xls",sep="\t",quote=F) hmExp=rt[rownames(diffSig),] diffExp=rbind(id=colnames(hmExp),hmExp) write.table(diffExp,file="diffExp.txt",sep="\t",quote=F,col.names=F) ##Volcanic map library(ggplot2) gene<-read.delim("Volcanic map.txt",sep = "\t",stringsAsFactors = FALSE) #Here we customize the difference type according to | log2FC |>=2 and adj.P.Val<0.01 gene[which(gene$adj.P.Val < 0.05 & gene$logFC <= -1),'sig'] <- 'Down' gene[which(gene$adj.P.Val < 0.05 & gene$logFC >= 1),'sig'] <- 'Up' gene[which(gene$adj.P.Val >= 0.05 | abs(gene$logFC) < 1),'sig'] <- 'None' #Horizontal axis log2FC, vertical axis - log10 (adj. P.Val), color indicates difference p <- ggplot(gene, aes(x = logFC, y = -log10(adj.P.Val), color = sig)) + geom_point(alpha = 0.6, size = 1) + xlim(-4, 4) + ylim(0, 25) + labs(x = '\nLog2 Fold Change', y = '-log10 FDR\n', color = '') + scale_colour_manual(values = c('red2', '#2F5688', 'gray'), limits = c('Up', 'Down', 'None')) + theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'), plot.title = element_text(hjust = 0.5)) + theme(legend.key = element_rect(fill = 'transparent'), legend.background = element_rect(fill = 'transparent')) + geom_vline(xintercept = c(-1, 1), color = 'black', size = 0.6,lty=2) + geom_hline(yintercept = -log(0.05, 10), color = 'black', size = 0.6,lty=2) p ####################### Display of GO enrichment results of different genes in high and low risk score groups ################# library(ggplot2) pathway<-read.table("GO enrichment.txt",header=T,sep="\t",quote="") p = ggplot(pathway,aes(GeneRatio,Term)) p=p + geom_point(aes(size=Count)) pbubble = p+ geom_point(aes(size=Count,color=-1*log10(FDR))) pr = pbubble+scale_color_gradient(low="blue",high = "red") pr = pr+labs(color=expression(-log[10](FDR)),size="Count", x="GeneRatio",y="GO Terms") pr=pr+ facet_grid(Category ~ .,scale="free") p=pr+ theme_bw() p+theme(axis.text.x = element_text(color = "black", size = 12, angle = 0),#X-axis label size axis.text.y = element_text(color = "black", size = 10.5, angle = 0),#Number size of y-axis axis.title.y = element_text(color = "black", size = 11, angle = 90)) ########################### Calculate tumor mutation burden ################## #if (!require("BiocManager")) #install.packages("BiocManager") #BiocManager::install("maftools") library(maftools) laml <- read.maf(maf = "TCGA.BLCA.maf") x = tmb(maf = laml) data<-as.data.frame(x) write.csv(data,"BLCA-TMB.csv") ########################## Survival of patients in different groups ################## data<-read.table("Survival difference between high and low TMB.csv",header = T,row.names = 1,sep = ",") library(survival) library(survminer) library(ggplot2) fit<-survfit(Surv(time,status) ~ type,data = data) p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #Add median survival curve palette = c("#d7191c", "#2b83ba","#E0AD16","gray40"), #Change the color of the line legend.labs=c("Risk-High-TMB-High","Risk-High-TMB-Low","Risk-Low-TMB-High","Risk-Low-TMB-Low"), #label legend.title="group", #title="Overall suvival", #title xlab="Time(Years)",ylab="Cumulative Survival(percentage)", break.x.by=2, #Abscissa interval censor.shape=124,censor.size=2, #Shape and size of deleted points risk.table = T,tables.height = 0.3, tables.theme = theme_cleantable(), ggtheme = theme_bw(),xlim = c(0, 17), conf.int = T,#Add confidence interval pval = T,pval.coord=c(0,0.20)# Add P value and adjust P value position ) p3 ########################### Survival of patients in different groups ################## data<-read.table("Survival difference between high and low TMB.csv",header = T,row.names = 1,sep = ",") library(survival) library(survminer) library(ggplot2) fit<-survfit(Surv(time,status) ~ group,data = data) p3<-ggsurvplot(fit,data = data,surv.median.line = "hv", #Add median survival curve palette = c("#d7191c", "#2b83ba"), #Change the color of the line legend.labs=c("High","Low"), #label legend.title="TMB", #title="Overall suvival", #title xlab="Time(Years)",ylab="Cumulative Survival(percentage)", break.x.by=2, #Abscissa interval censor.shape=124,censor.size=2, #Shape and size of deleted points risk.table = T,tables.height = 0.3, tables.theme = theme_cleantable(), ggtheme = theme_bw(),xlim = c(0, 17), conf.int = T,#Add confidence interval pval = T,pval.coord=c(0,0.20)# Add P value and adjust P value position ) p3 #################Compare TMB between high and low risk score groups ################### library(ggpubr) library(magrittr) library(ggsignif) data<-read.table("input file.txt",sep = "\t",header = T,row.names = 1) data$group<-as.factor(data$risk) my_comparisons = list(c("low", "high")) p<-ggviolin(data, x = "risk", y = "TMB" , xlab="",ylab="Tumor Burden Mutation",add.params = list(fill = "white"), fill = "risk",order=c("low","high"),add="boxplot", bxp.errorbar = T,bxp.errorbar.width = 0.2,palette = c("#FF8C00", "#00468B"))+ stat_compare_means(comparisons = my_comparisons,label = "p.signif",method = "wilcox.test")+ theme( plot.title = element_text(color = "black", size = 12, hjust = 0.5), plot.subtitle = element_text(color = "black", size = 12,hjust = 0.5), plot.caption = element_text(color = "black", size = 12,face = "italic", hjust = 1), axis.text.x = element_text(color = "black", size = 12, angle = 0), axis.text.y = element_text(color = "black", size = 12, angle = 0), axis.title.x = element_text(color = "black", size = 12.5, angle = 0), axis.title.y = element_text(color = "black", size = 12.5, angle = 90), legend.title = element_text(color = "black", size = 12), legend.text = element_text(color = "black", size = 12), axis.line.y = element_line(color = "black", linetype = "solid"), # y-axis feature axis.line.x = element_line (color = "black",linetype = "solid") # x-axis feature ) p ############### Calculate the correlation between TMB and risk score ############ library(ggplot2) library(ggpubr) data <- read.table("input file.txt",header=TRUE,sep = "\t") gg <- ggplot(data, aes(x=riskScore, y=TMB)) + geom_point( size=2) + geom_smooth(method="lm", col="#2b83ba", size=2) + labs(x="riskScore",y="TMB") + stat_cor(data=data, method = "pearson") plot(gg) + theme_bw()+theme(legend.position ="top") ############################## Analysis of the difference and correlation of immune checkpoints in high and low risk scoring groups################# library(ggpubr) library(magrittr) library(ggsignif) library(tidyverse) data<-read.table("Immune checkpoint.csv",header=T,sep=",",row.names=1,check.names = F) data2 <- data %>% add_column(group = rep(c("Low","High"), c(182,181)), .before = 1) %>% gather("gene","expression", -group) %>% mutate(gene = factor(gene, levels = colnames(data))) %>% mutate(group = factor(group, levels = c("Low","High"))) ggboxplot(data2, "gene", "expression", fill = "group", xlab="",ylab="Expression", palette = c("#5399C8", "#E43022")) + stat_compare_means(aes(group=group),label = "p.signif")+ theme( plot.title = element_text(color = "black", size = 10, hjust = 0.5), plot.subtitle = element_text(color = "black", size = 10,hjust = 0.5), plot.caption = element_text(color = "black", size = 10,face = "italic", hjust = 1), axis.text.x = element_text(color = "black", size =7, angle = 45,vjust = 0.6), axis.text.y = element_text(color = "black", size = 10, angle = 0), legend.title = element_text(color = "black", size = 11), legend.text = element_text(color = "black", size = 11), axis.line.y = element_line(color = "black", linetype = "solid"), axis.line.x = element_line (color = "black",linetype = "solid"), panel.border = element_rect(linetype = "solid", size = 0.8,fill = NA) ) ############################################Data read in#################################### feature_1<-read.table("Immune checkpoint.txt",sep="\t",header=T,row.names=1) feature_2<-read.table("signature gene.txt",sep="\t",header=T,row.names=1) dir.create("Result") # Create result directory pcor_lnc<-matrix(data = NA, nrow = nrow(feature_1), ncol = nrow(feature_2)) rownames(pcor_lnc)<-rownames(feature_1) colnames(pcor_lnc)<-rownames(feature_2) pval_lnc<-matrix(data = NA, nrow = nrow(feature_1), ncol = nrow(feature_2)) rownames(pval_lnc)<-rownames(feature_1) colnames(pval_lnc)<-rownames(feature_2) for (i in 1:nrow(feature_1)) { x<-t(feature_1[i,]) x[which(!is.finite(x))]<-NA for (j in 1:nrow(feature_2)) { y<-t(feature_2[j,]) y[which(!is.finite(y))]<-NA if(length(which(!is.na(x+y)))>5 ) { corres<-cor.test(x,y) pcor_lnc[i,j]<-corres$estimate pval_lnc[i,j]<-corres$p.value } } } ind<-which( abs(pcor_lnc) > 0 & pval_lnc < 1, arr.ind = TRUE) length(ind) feature_1<-rownames(feature_1)[ind[,1]] feature_2<-rownames(feature_2)[ind[,2]] corsig<-pcor_lnc[ind] pvalsig<-pval_lnc[ind] lncp<-cbind(feature_1,feature_2,corsig,pvalsig) write.table(lncp,file="Correlation_result.txt",sep="\t",row.names=FALSE) ########################################Convert the result into a graph function############################################### library(reshape2) library(pheatmap) correlate_pheatmap = function(infile, route) { data=read.table(paste(route, infile, sep="/"), sep="\t", header=T) data_r=dcast(data, feature_1 ~ feature_2, value.var="corsig") data_p=dcast(data, feature_1 ~ feature_2, value.var="pvalsig") rownames(data_r)=data_r[,1] data_r=data_r[,-1] rownames(data_p)=data_p[,1] data_p=data_p[,-1] # Remove inconspicuous rows # del_row = c() # for(i in 1:length(data_p[, 1])) # { # if(all(data_p[i, ] > 0.05)) # { # del_row = c(del_row, i) # } # } # # Exclude non-significant columns # del_col = c() # for(j in 1:length(data_p[1, ])) # { # if(all(data_p[, j] > 0.05)) # { # del_col = c(del_col, j) # } # } # null value processing # if(is.null(del_row) && !(is.null(del_col))) # { # data_p = data_p[, -del_col] # data_r = data_r[, -del_col] # }else if(is.null(del_col) && !(is.null(del_row))) # { # data_p = data_p[-del_row,] # data_r = data_r[-del_row,] # }else if(is.null(del_row) && is.null(del_col)) # { # print("delete none") # }else if(!(is.null(del_row)) && !(is.null(del_col))) # { # data_p = data_p[-del_row, -del_col] # data_r = data_r[-del_row, -del_col] # } write.csv(data_p, file=paste(route, "data_p.csv", sep="/")) write.csv(data_r, file=paste(route, "data_r.csv", sep="/")) # Use "*" to replace the p value of<=0.05, and "" to replace the relative abundance of>0.05 data_mark=data_p for(i in 1:length(data_p[,1])){ for(j in 1:length(data_p[1,])){ #data_mark[i,j]=ifelse(data_p[i,j] <= 0.05, "*", "") if(data_p[i,j] <= 0.001) { data_mark[i,j]="***" } else if(data_p[i,j] <= 0.01 && data_p[i,j] > 0.001) { data_mark[i,j]="**" } else if(data_p[i,j] <= 0.05 && data_p[i,j] > 0.01) { data_mark[i,j]="*" } else { data_mark[i,j]="" } } } write.csv(data_mark, file=paste(route, "data_mark.csv", sep="/")) pheatmap(data_r, display_numbers=data_mark, cellwidth=20, cellheight=20, fontsize_number=18, filename=paste(route, "Correlation_result.pdf", sep="/")) pheatmap(data_r, display_numbers=data_mark, cellwidth=20, cellheight=20, fontsize_number=18, filename=paste(route, "Correlation_result.png", sep="/")) } ##################################################################### correlate_pheatmap("Correlation_result.txt","Result")