library(tidyr) library(dplyr) library(stringi) library(stringr) library(DESeq2) library(tinyarray) library("tidyverse") library(limma) Work_dir <- "./" setwd(Work_dir) ##################################################差异分析甲基化基因筛选分析################################################################# dir.create(paste0(Work_dir,"/差异分析甲基化基因筛选分析")) tcga.cli <- read.table('./TCGA_clinical.txt',header = T,sep = '\t',check.names = F) colnames(tcga.cli) <- c("Patient","DFStatus","DFStime") exprSet <- read.table('TCGA-STAD.htseq_fpkm (3).tsv.gz', sep = '\t',check.names = F,header = T,row.names = 1) k = apply(exprSet,1, function(x){sum(x==0)<=0.25*ncol(exprSet)});table(k) exprSet <- exprSet[k,] expr = trans_exp(exprSet,mrna_only = T) group_list1 <- ifelse(as.numeric(str_sub(colnames(expr),14,15)) < 10,'tumor','normal');table(group_list1) group_list1 <- factor(group_list1,levels = c("tumor","normal")) design <- model.matrix(~0+group_list1) colnames(design)= levels(group_list1) rownames(design)= colnames(expr) table(group_list1) fit <- lmFit(expr, design) max(expr) constrasts = paste((levels(group_list1)),collapse = "-");constrasts cont.matrix <- makeContrasts(contrasts=constrasts,levels = design) fit2=contrasts.fit(fit,cont.matrix) fit2=eBayes(fit2) DEG_TCGA = topTable(fit2, coef=constrasts, n=Inf) DEG_TCGA = na.omit(DEG_TCGA) logFC_cutoff <- 0.5 k1 = (DEG_TCGA$P.Value < 0.05)&(DEG_TCGA$logFC < -logFC_cutoff) k2 = (DEG_TCGA$P.Value < 0.05)&(DEG_TCGA$logFC > logFC_cutoff) DEG_TCGA$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT")) table(DEG_TCGA$change) head(DEG_TCGA) write.table(DEG_TCGA,"./差异分析甲基化基因筛选分析/TCGA_mRNA_DEG.txt") DEG_TCGA <- read.table("TCGA_DEG.txt",sep = "\ ",header = T) library(ggplot2) library(tinyarray) dat <- expr cg3_tcga = rownames(DEG_TCGA)[DEG_TCGA$change !="NOT"] v3 = draw_volcano(DEG_TCGA,pkg = 3,logFC_cutoff = 0.5) v3 ggsave("./差异分析甲基化基因筛选分析/fig1A.TCGA肿瘤与正常差异基因火山图.pdf", v3, width = 10, height = 6) ##################################################筛选差异甲基化差异基因(VENN) M1A <- c("TRMT10C", "TRMT61B", "TRMT6", "TRMT61A","ALKBH3", "ALKBH1", "NSUN1","NOP2","NSUN2", "NSUN3","NSUN4", "NSUN5", "NSUN6", "NSUN7", "DNMT1", "DNMT2","TRDMT1","DNMT3A","DNMT3B","ALYREF","TET2") M6A <- c("METTL3", "METTL14", "RBM15", "WTAP", "ZC3H13", "KIAA1429","HNRNPC", "YTHDC1", "YTHDC2", "YTHDF1", "YTHDF2","ALKBH5", "FTO", "METTL1","WDR4") METTL <- c("TRMT10C", "TRMT61B", "TRMT6", "TRMT61A","ALKBH3", "ALKBH1", "YTHDC1", "YTHDF1", "YTHDF2","YTHDF3", "DNMT1", "DNMT3A", "DNMT3B", "MBD1","MBD2", "MBD3", "MBD4", "MECP2", "NEIL1", "NTHL1", "SMUG1", "TDG", "UHRF1","UHRF2", "UNG", "ZBTB33","ZBTB38", "ZBTB4", "TET1", "TET2", "TET3","METTL3","METTL14", "RBM15", "RBM15B", "WTAP", "KIAA1429","CBLL1", "ZC3H13", "ALKBH5", "FTO", "YTHDC1", "YTHDC2", "YTHDF1", "YTHDF2", "YTHDF3", "IGF2BP1", "HNRNPA2B1", "HNRNPC", "FMR1", "LRPPRC","ELAVL","METTL1", "WDR4", "WBSCR22", "TRMT112", "CBP80", "CBP20") genes <- list(TCGA=cg3_tcga, METTL_gene=METTL) plot =draw_venn(genes,"genes") plot ggsave("./差异分析甲基化基因筛选分析/fig.1B TGCA_DEG__METTL.plot.pdf", plot, width = 10, height = 6) gen_con <- intersect(cg3_tcga,union(M1A,M6A));gen_con write.csv(file = "./差异分析甲基化基因筛选分析/差异甲基化调控基因.csv",gen_con) gen_con <- read.csv("./差异分析甲基化基因筛选分析/差异甲基化调控基因.csv") %>% .$x exprSet_con = expr[gen_con,] group1 <- data.frame(id=colnames(exprSet_con),group=group_list1) group1 <- group1[order(group1$group),] exprSet_con = exprSet_con[,match(group1$id,colnames(exprSet_con))] h3 = draw_heatmap(exprSet_con,group1$group,n_cutoff = 2,legend = T,annotation_legend = T,cluster_cols = F,show_rownames = T,scale = T,color_an = c( "#66C2A5", "#FC8D62")) h3 ggsave("./差异分析甲基化基因筛选分析/fig.1C 差异甲基化调控基因表达热图.pdf",h3, width = 10, height = 6) exp_con = as.data.frame(t(exprSet_con)) exp_con$class = ifelse(as.numeric(str_sub(rownames(exp_con),14,15)) < 10,'tumor','normal') exp_con <- exp_con[,c(ncol(exp_con),1:ncol(exp_con)-1)] #甲基化修饰调控表达的关键模块DEGs表达模式分析 library("tidyverse") data2 <- exp_con %>% gather("cell_types","Gene_expression", -class) %>% mutate(cell_types = factor(cell_types, levels = colnames(exp_con))) %>% mutate(class = factor(class, levels = c('tumor','normal'))) data3 <- data2 %>% group_by(class,cell_types) %>% summarise("Mx" =max(Gene_expression)) %>% group_by(cell_types) %>% summarise(lable = max(Mx)) x = c() xend = c() for(i in 1:length(unique(data2$cell_types))){ x[i] = i - 0.2 xend[i] = i + 0.2} y = data3$lable +0.01 yend = data3$lable +0.01 line <- data.frame(x, xend, y, yend) P <- ggplot()+ geom_violin(data = data2, aes(x = cell_types, y = Gene_expression,fill = class), position = position_dodge(1),scale = "width")+ geom_boxplot(data = data2, aes(cell_types, Gene_expression,fill = class), position = position_dodge(1),width=.4, outlier.shape = NA)+ stat_summary(data = data2, aes(cell_types, Gene_expression,fill = class), fun.y = "median",geom = "point",shape = 16, size = 2, color = "white",position = position_dodge(1))+ ggsci::scale_fill_aaas()+ ggpubr::stat_compare_means(data = data2,aes(cell_types, Gene_expression,group = class), label = "p.signif",method = "wilcox.test")+ #geom_segment(data = line, aes(x=x, y=y, xend= xend, yend=yend))+ labs(x = "", fill = "")+ theme_few(base_size = 14)+ theme(axis.text.x = element_text(angle = 45, hjust = 1,vjust = 1, colour = "black"), legend.position = "top") P ggsave("./差异分析甲基化基因筛选分析/Fig1D.差异甲基化调控基因表达小提琴图.pdf",P,width = 10,height = 7) ##################################################一致性聚类################################################################# dir.create(paste0(Work_dir,"/2一致性聚类")) #BiocManager::install("ConsensusClusterPlus") library(ConsensusClusterPlus) tcga.cli <- read.table('./TCGA_clinical.txt',header = T,sep = '\t',check.names = F) colnames(tcga.cli) <- c("Patient","DFStatus","DFStime") tumor <- colnames(expr)[which(substr(colnames(expr),14,16)=='01A' | substr(colnames(expr),14,16)=='02A')] expr <- expr[,tumor] data_nmf <- expr[gen_con,] colnames(data_nmf) <- substr(colnames(data_nmf),1,12) a <- as.data.frame(t(data_nmf)) a$Patient <- rownames(a) indep_rt <- merge(tcga.cli,a,by = 'Patient') %>% na.omit(.) outTab2 = data.frame() for(i in colnames(indep_rt[,4:ncol(indep_rt)])){ cox <- coxph(Surv(DFStime, DFStatus) ~ indep_rt[,i], data = indep_rt) coxSummary = summary(cox) coxP=coxSummary$coefficients[,"Pr(>|z|)"] outTab2=rbind(outTab2, cbind(id=i, HR=coxSummary$conf.int[,"exp(coef)"], HR.95L=coxSummary$conf.int[,"lower .95"], HR.95H=coxSummary$conf.int[,"upper .95"], pvalue=coxSummary$coefficients[,"Pr(>|z|)"]) ) } outTab2[,c(2:5)] <- apply(outTab2[,c(2:5)],2,function(x){as.numeric(x)}) outTab2 <- outTab2[order(outTab2$pvalue),] write.table(outTab2,'./一致性聚类/indep.Cox.txt',row.names = F,col.names = T,sep = '\t',quote = F) bioForest(coxFile="./一致性聚类/indep.Cox.txt", forestFile="./2一致性聚类/Fig.mulCox_indep_forest.pdf", forestCol=c("red","green")) ###一致性聚类 set.seed(1000) title=tempdir() results = ConsensusClusterPlus(as.matrix(data_nmf[outTab2$id[outTab2$pvalue < 0.05],]),maxK=6,reps=50,pItem=0.8,pFeature=1, title=title,clusterAlg="km",distance="euclidean",innerLinkage = "complete") cluster <- results[[2]][["consensusClass"]] table(cluster) cluster_d <- data.frame(Patient = colnames(data_nmf),cluster=as.factor(cluster)) write.table(cluster_d,'./一致性聚类/cluster_result.txt',row.names = F,col.names = T,quote=F,sep = '\t') data <- data.frame(t(data_nmf)) colnames(data) <- gsub("\\.","-",colnames(data)) expr_clu <- cbind(cluster_d,data) expr_meta <- merge(tcga.cli,expr_clu,by = 'Patient') %>% na.omit(.) expr_meta <- expr_meta[order(expr_meta$cluster),] diff <- survdiff(Surv(DFStime,DFStatus) ~ cluster, data = expr_meta) pValue1=format((1-pchisq(diff$chisq,df=1)),digits=15) pValue1=as.numeric(pValue1) fit <- survfit(Surv(expr_meta$DFStime,expr_meta$DFStatus)~ cluster,data = expr_meta) p1 <- ggsurvplot(fit, data = expr_meta, ggtheme = theme_bw(), #想要网格就运行这 conf.int = F, #不画置信区间,想画置信区间就把F改成T conf.int.style = "ribbon",#置信区间的类型,还可改为ribbon risk.table = T, # 绘制累计风险曲线 #tables.theme = theme_void(), tables.height = 0.25, censor = T, #不显示观察值所在的位置 palette = c("black","Red","green"), #线的颜色对应高、低(自定义调色板) xlab = "Time (month)", legend.title = "",#基因名写在图例题目的位置 font.legend = 9, pval = T ) p1 write.csv(file = "tsneinfo3.csv",tsneinfo3) save(expr_meta,file = "./一致性聚类/expr_meta_cluster.data") #tSNE算法降维 library(Rtsne) library(Rcpp) main_theme = theme(panel.background=element_blank(), panel.grid=element_blank(), axis.line.x=element_line(size=.5, colour="black"), axis.line.y=element_line(size=.5, colour="black"), axis.ticks=element_line(color="black"), axis.text=element_text(color="black", size=), legend.position="right", legend.background=element_blank(), legend.key=element_blank(), legend.text= element_text(size=), text=element_text(family="sans", size=)) tsne_out <- Rtsne(as.matrix(t(data_nmf[outTab2$id,])),pca=FALSE, perplexity=50,verbose=TRUE, max_iter = 500) tsnes=tsne_out$Y colnames(tsnes) <- c("tSNE1", "tSNE2") tsnes=as.data.frame(tsnes) group=cluster tsnes$group= as.factor(cluster) p <- ggplot(tsnes, aes(x = tSNE1, y = tSNE2))+ geom_point(aes(col=group))+ main_theme p ggsave(file = "./一致性聚类/Fig1C.tSNE聚类.pdf",p,width = 10,height = 7) #不同亚型聚类热图不同亚型 library(Heatmap) library(circlize) library(ComplexHeatmap) library(ConsensusClusterPlus) geng <- outTab2$id[outTab2$pvalue < 0.1] geng <- c("RBM15","DNMT1","WDR4","ALYREF") exprset_con_gen <- expr_meta[,c(geng,"Patient","cluster")] PD <- read.table("临床数据处理(tnm).txt",sep = "\t",header = T)#临床分期信息 pd <- PD[PD$Patient %in% exprset_con_gen$Patient,] data <- merge(pd,exprset_con_gen,by = "Patient") %>% na.omit(.) data$cluster <- ifelse(data$cluster == 1,"cluster1","cluster2") data <- data[order(data$cluster),] con_gen <- as.data.frame(data[,geng]) gene <- as.data.frame(t(apply(con_gen, 2, scale))) colnames(gene) <- data$Patient col_fun = circlize::colorRamp2(c(-2, 0, 2), c("#377EB8", "white", "#E41A1C")) ha = HeatmapAnnotation( T=data$ajcc_pathologic_t, N=data$ajcc_pathologic_n, M=data$ajcc_pathologic_m, age_group = ifelse(data$Age>60,">60","<=60"), gender =data$gender, stage= data$ajcc_pathologic_stage,cluster = data$cluster) #set.seed(14) Heatmap(as.matrix(gene), col = col_fun, cluster_rows = T, cluster_columns = F, show_column_names = F, show_row_names = TRUE, top_annotation = ha) max(gene) #卡方检验临床因子再不同组之间的差异 data$age_group <- ifelse(data$Age>60,">60","<=60") ceshi <- data[,c(1,5:9,14,15)] for (i in (2:ncol(ceshi))){ newdat<-ceshi[,i] ceshi.data <- data.frame(newdat, ceshi$cluster) kuangzi=table(ceshi.data) mydata1<-matrix(data=NA,ncol=ncol(kuangzi),nrow=nrow(kuangzi)) for(j in 1:length(kuangzi)/nrow(kuangzi) ){ mydata1[,j]<-as.numeric(kuangzi[,j]) } mydata1<-as.data.frame(mydata1) colnames(mydata1)<-colnames(kuangzi) rownames(mydata1)<-rownames(kuangzi) colSums(mydata1) per = t(t(mydata1)/colSums(mydata1,na.rm = TRUE)) * 100 colSums(per) per char2<-matrix(data=NA,ncol=3,nrow=nrow(kuangzi)) rownames(char2)<-rownames(kuangzi) char2 for (k in 1:nrow(kuangzi)) { qq<-chisq.test(kuangzi[k,]) char2[k,1]<-round(qq$statistic,5) char2[k,2]<-round(qq$p.value,5) char2[k,3] <-round(p.adjust(qq$p.value, method="fdr",n=2),5) char2<-data.frame(char2) colnames(char2) <- c("Stat","Pvalue","adjPvalue") } char2 <- cbind(per,char2) laoli<-rbind(laoli,colnames(ceshi[i]),char2) } laoli<-cbind(row.names(laoli),laoli) write.csv(laoli, "./一致性聚类/chisq_验临床因子12.csv") #柱状堆叠图 library(ggplot2) data <- read.csv(file = "不同亚型中乳腺癌临床特征堆叠.csv",header = T) data <- read.csv(file = "asd.csv",header = T) data_all <- as.data.frame(reshape2::melt(data[11:14,],id.var ="Clin")) mycolor <- c("#FF0000","#99CC33","#66CCCC","#66FFCC","#FF0033","#FF6600","#666696","#009966","#FF0066","#663333","#CC6666","#FF6666", "#FFCC66" , "#FF9900" , "#FF9966","#666699", "#660066", "#333366", "#0066CC", "#99CCFF", "#9933FF", "#330099") main_theme = theme(panel.background=element_blank(), panel.grid=element_blank(), panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.line.x=element_line(size=.5, colour="black"), axis.line.y=element_line(size=.5, colour="black"), plot.title = element_text(vjust = -8.5,hjust = 0.1), axis.title.y =element_text(size = 20,colour = "black"), axis.title.x =element_text(size = 20,colour = "black"), axis.text = element_text(size = 20), axis.text.x = element_text(colour = "black",size = 12), axis.text.y = element_text(colour = "black",size = 12), legend.text = element_text(size = 15) #legend.position = "none"#是否删除图例 ) main_theme = theme(panel.background=element_blank(), panel.grid=element_blank(), axis.line.x=element_line(size=.5, colour="black"), axis.line.y=element_line(size=.5, colour="black"), axis.ticks=element_line(color="black"), axis.text.x =element_text(color="black",angle=, size=15), axis.text.y =element_text(color="black",angle=, size=15), axis.title.y =element_text(size = 15,colour = "black"), axis.title.x =element_text(size = 15,colour = "black"), legend.position="right", legend.background=element_blank(), legend.key=element_blank(), legend.text= element_text(size=15), text=element_text(family="sans", size=)) p = ggplot(data_all, aes(x=variable, y = value, fill = Clin,order=FALSE)) + geom_bar(stat = "identity",position="fill", width=0.7)+ scale_fill_manual(values=c("#0066CC", "#99CCFF", "#9933FF", "#330099"))+#"" #scale_y_continuous(labels = scales::percent) + ylab("Relative abundance(%)")+main_theme p ggsave("men.pdf",p,width = 5.5,height = 6 ) ##################################################cluster 1与cluster2 差异分析################################################################# dir.create(paste0(Work_dir,"/不同亚型差异分析")) clust_meta <- expr_meta #cluster 1与cluster2 差异分析 meta <- expr_meta %>% select(Patient,everything()) expr_data <- expr[,substr(colnames(LUAD_expr),1,12) %in% meta$Patient] expr_data <- expr_data[,match(meta$Patient,substr(colnames(expr_data),1,12))] group_list=ifelse(meta$cluster == "1","Cluster1","Cluster2") %>% factor(.,levels = c("Cluster1","Cluster2"),ordered = F) table(group_list) design <- model.matrix(~0+group_list) colnames(design)= levels(group_list) rownames(design)= colnames(expr_data) fit <- lmFit(expr_data, design) constrasts = paste((levels(group_list)),collapse = "-");constrasts cont.matrix <- makeContrasts(contrasts=constrasts,levels = design) fit2=contrasts.fit(fit,cont.matrix) fit2=eBayes(fit2) DEG_cluster = topTable(fit2, coef=constrasts, n=Inf) DEG_cluster = na.omit(DEG_cluster) logFC_cutoff <- 1#可适当调整为0.05 k1 = (DEG_cluster$P.Value < 0.05)&(DEG_cluster$logFC < -logFC_cutoff) k2 = (DEG_cluster$P.Value < 0.05)&(DEG_cluster$logFC > logFC_cutoff) DEG_cluster$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT")) table(DEG_cluster$change) head(DEG_cluster) write.table(DEG_cluster,"./不同亚型差异分析/cluster_mRNA_DEG.txt") DEG_cluster <- read.table("./不同亚型差异分析/cluster_mRNA_DEG.txt",sep = "\ ",header = T) dat <- expr_data cg3_cluster = rownames(DEG_cluster)[DEG_cluster$change !="NOT"] h3 = draw_heatmap(dat[cg3_cluster,],group_list,n_cutoff = 2,legend = T,annotation_legend = T,cluster_cols = F,scale = T,color_an = c( "#66C2A5", "#FC8D62")) h3 ggsave("./不同亚型差异分析/fig3B.cluster差异基因热图.pdf", h3, width = 10, height = 6) v3 = draw_volcano(DEG_cluster,pkg = 3,logFC_cutoff = 1) v3 ggsave("./不同亚型差异分析/fig3A.cluster差异基因火山图.pdf", v3, width = 10, height = 6) ##富集分析 library(clusterProfiler) library(ggplot2) library(enrichplot) gene_con <- cg3_cluster gene<-bitr(gene_con,fromType = 'SYMBOL',toType = 'ENTREZID',OrgDb = 'org.Hs.eg.db') #基因名ID转换,把基因名转换成ENTREZID GO<-enrichGO( gene$ENTREZID, OrgDb = 'org.Hs.eg.db', keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.5, pAdjustMethod = "BH", qvalueCutoff = 0.05, readable = TRUE ) #GO富集 KEGG<-enrichKEGG( gene$ENTREZID, organism = "hsa", keyType = "kegg", pvalueCutoff = 0.05, pAdjustMethod = "BH", qvalueCutoff = 0.5, ) #KEGG富集 barplot(GO, split="ONTOLOGY")+facet_grid(ONTOLOGY~., scale="free") #GO聚类条形图 go <- as.data.frame(GO) write.table(file = "./不同亚型差异分析/go_mRNA.txt",go) ggsave("./不同亚型差异分析/图4A.GO富集条形图.pdf", width = 10, height = 6) barplot(KEGG,title = 'KEGG Pathway') #KEGG聚类条形图 kegg <- as.data.frame(KEGG) write.table(file = "./4不同亚型差异分析/kegg_mRNA.txt",kegg) ggsave("./不同亚型差异分析/图4A.KEGG富集条形图.pdf", width = 10, height = 6) ##################################################风险模型的构建################################################################# dir.create(paste0(Work_dir,"/风险模型的构建")) normalized <- expr tar <- cg3_cluster tcgaReplicateFilter = function(tsb, analyte_target=c("DNA","RNA"), decreasing=TRUE, analyte_position=20, plate=c(22,25), portion=c(18,19), filter_FFPE=FALSE, full_barcode=FALSE){ # basically, user provide tsb and analyte_target is fine. If you # want to filter FFPE samples, please set filter_FFPE and full_barcode # all to TRUE, and tsb must have nchar of 28 analyte_target = match.arg(analyte_target) # Strings in R are largely lexicographic # see ??base::Comparison # filter FFPE samples # provide by if(full_barcode & filter_FFPE){ ffpe = c("TCGA-44-2656-01B-06D-A271-08", "TCGA-44-2656-01B-06D-A273-01", "TCGA-44-2656-01B-06D-A276-05", "TCGA-44-2656-01B-06D-A27C-26", "TCGA-44-2656-01B-06R-A277-07", "TCGA-44-2662-01B-02D-A271-08", "TCGA-44-2662-01B-02D-A273-01", "TCGA-44-2662-01B-02R-A277-07", "TCGA-44-2665-01B-06D-A271-08", "TCGA-44-2665-01B-06D-A273-01", "TCGA-44-2665-01B-06D-A276-05", "TCGA-44-2665-01B-06R-A277-07", "TCGA-44-2666-01B-02D-A271-08", "TCGA-44-2666-01B-02D-A273-01", "TCGA-44-2666-01B-02D-A276-05", "TCGA-44-2666-01B-02D-A27C-26", "TCGA-44-2666-01B-02R-A277-07", "TCGA-44-2668-01B-02D-A271-08", "TCGA-44-2668-01B-02D-A273-01", "TCGA-44-2668-01B-02D-A276-05", "TCGA-44-2668-01B-02D-A27C-26", "TCGA-44-2668-01B-02R-A277-07", "TCGA-44-3917-01B-02D-A271-08", "TCGA-44-3917-01B-02D-A273-01", "TCGA-44-3917-01B-02D-A276-05", "TCGA-44-3917-01B-02D-A27C-26", "TCGA-44-3917-01B-02R-A277-07", "TCGA-44-3918-01B-02D-A271-08", "TCGA-44-3918-01B-02D-A273-01", "TCGA-44-3918-01B-02D-A276-05", "TCGA-44-3918-01B-02D-A27C-26", "TCGA-44-3918-01B-02R-A277-07", "TCGA-44-4112-01B-06D-A271-08", "TCGA-44-4112-01B-06D-A273-01", "TCGA-44-4112-01B-06D-A276-05", "TCGA-44-4112-01B-06D-A27C-26", "TCGA-44-4112-01B-06R-A277-07", "TCGA-44-5645-01B-04D-A271-08", "TCGA-44-5645-01B-04D-A273-01", "TCGA-44-5645-01B-04D-A276-05", "TCGA-44-5645-01B-04D-A27C-26", "TCGA-44-5645-01B-04R-A277-07", "TCGA-44-6146-01B-04D-A271-08", "TCGA-44-6146-01B-04D-A273-01", "TCGA-44-6146-01B-04D-A276-05", "TCGA-44-6146-01B-04D-A27C-26", "TCGA-44-6146-01B-04R-A277-07", "TCGA-44-6146-01B-04R-A27D-13", "TCGA-44-6147-01B-06D-A271-08", "TCGA-44-6147-01B-06D-A273-01", "TCGA-44-6147-01B-06D-A276-05", "TCGA-44-6147-01B-06D-A27C-26", "TCGA-44-6147-01B-06R-A277-07", "TCGA-44-6147-01B-06R-A27D-13", "TCGA-44-6775-01C-02D-A271-08", "TCGA-44-6775-01C-02D-A273-01", "TCGA-44-6775-01C-02D-A276-05", "TCGA-44-6775-01C-02D-A27C-26", "TCGA-44-6775-01C-02R-A277-07", "TCGA-44-6775-01C-02R-A27D-13", "TCGA-A6-2674-01B-04D-A270-10", "TCGA-A6-2674-01B-04R-A277-07", "TCGA-A6-2677-01B-02D-A270-10", "TCGA-A6-2677-01B-02D-A274-01", "TCGA-A6-2677-01B-02D-A27A-05", "TCGA-A6-2677-01B-02D-A27E-26", "TCGA-A6-2677-01B-02R-A277-07", "TCGA-A6-2684-01C-08D-A270-10", "TCGA-A6-2684-01C-08D-A274-01", "TCGA-A6-2684-01C-08D-A27A-05", "TCGA-A6-2684-01C-08D-A27E-26", "TCGA-A6-2684-01C-08R-A277-07", "TCGA-A6-3809-01B-04D-A270-10", "TCGA-A6-3809-01B-04D-A274-01", "TCGA-A6-3809-01B-04D-A27A-05", "TCGA-A6-3809-01B-04D-A27E-26", "TCGA-A6-3809-01B-04R-A277-07", "TCGA-A6-3810-01B-04D-A270-10", "TCGA-A6-3810-01B-04D-A274-01", "TCGA-A6-3810-01B-04D-A27A-05", "TCGA-A6-3810-01B-04D-A27E-26", "TCGA-A6-3810-01B-04R-A277-07", "TCGA-A6-5656-01B-02D-A270-10", "TCGA-A6-5656-01B-02D-A274-01", "TCGA-A6-5656-01B-02D-A27A-05", "TCGA-A6-5656-01B-02D-A27E-26", "TCGA-A6-5656-01B-02R-A277-07", "TCGA-A6-5656-01B-02R-A27D-13", "TCGA-A6-5659-01B-04D-A270-10", "TCGA-A6-5659-01B-04D-A274-01", "TCGA-A6-5659-01B-04D-A27A-05", "TCGA-A6-5659-01B-04D-A27E-26", "TCGA-A6-5659-01B-04R-A277-07", "TCGA-A6-6650-01B-02D-A270-10", "TCGA-A6-6650-01B-02D-A274-01", "TCGA-A6-6650-01B-02D-A27A-05", "TCGA-A6-6650-01B-02D-A27E-26", "TCGA-A6-6650-01B-02R-A277-07", "TCGA-A6-6650-01B-02R-A27D-13", "TCGA-A6-6780-01B-04D-A270-10", "TCGA-A6-6780-01B-04D-A274-01", "TCGA-A6-6780-01B-04D-A27A-05", "TCGA-A6-6780-01B-04D-A27E-26", "TCGA-A6-6780-01B-04R-A277-07", "TCGA-A6-6780-01B-04R-A27D-13", "TCGA-A6-6781-01B-06D-A270-10", "TCGA-A6-6781-01B-06D-A274-01", "TCGA-A6-6781-01B-06D-A27A-05", "TCGA-A6-6781-01B-06R-A277-07", "TCGA-A6-6781-01B-06R-A27D-13", "TCGA-A7-A0DB-01C-02D-A272-09", "TCGA-A7-A0DB-01C-02R-A277-07", "TCGA-A7-A0DB-01C-02R-A27D-13", "TCGA-A7-A13D-01B-04D-A272-09", "TCGA-A7-A13D-01B-04R-A277-07", "TCGA-A7-A13D-01B-04R-A27D-13", "TCGA-A7-A13E-01B-06D-A272-09", "TCGA-A7-A13E-01B-06R-A277-07", "TCGA-A7-A13E-01B-06R-A27D-13", "TCGA-A7-A26E-01B-06D-A272-09", "TCGA-A7-A26E-01B-06D-A275-01", "TCGA-A7-A26E-01B-06D-A27B-05", "TCGA-A7-A26E-01B-06R-A277-07", "TCGA-A7-A26E-01B-06R-A27D-13", "TCGA-A7-A26J-01B-02D-A272-09", "TCGA-A7-A26J-01B-02D-A275-01", "TCGA-A7-A26J-01B-02D-A27B-05", "TCGA-A7-A26J-01B-02D-A27F-26", "TCGA-A7-A26J-01B-02R-A277-07", "TCGA-A7-A26J-01B-02R-A27D-13", "TCGA-B2-3923-01B-10D-A270-10", "TCGA-B2-3923-01B-10R-A277-07", "TCGA-B2-3923-01B-10R-A27D-13", "TCGA-B2-3924-01B-03D-A270-10", "TCGA-B2-3924-01B-03D-A274-01", "TCGA-B2-3924-01B-03D-A27A-05", "TCGA-B2-3924-01B-03D-A27E-26", "TCGA-B2-3924-01B-03R-A277-07", "TCGA-B2-3924-01B-03R-A27D-13", "TCGA-B2-5633-01B-04D-A270-10", "TCGA-B2-5633-01B-04D-A274-01", "TCGA-B2-5633-01B-04D-A27A-05", "TCGA-B2-5633-01B-04D-A27E-26", "TCGA-B2-5633-01B-04R-A277-07", "TCGA-B2-5633-01B-04R-A27D-13", "TCGA-B2-5635-01B-04D-A270-10", "TCGA-B2-5635-01B-04D-A274-01", "TCGA-B2-5635-01B-04D-A27A-05", "TCGA-B2-5635-01B-04D-A27E-26", "TCGA-B2-5635-01B-04R-A277-07", "TCGA-B2-5635-01B-04R-A27D-13", "TCGA-BK-A0CA-01B-02D-A272-09", "TCGA-BK-A0CA-01B-02D-A275-01", "TCGA-BK-A0CA-01B-02D-A27B-05", "TCGA-BK-A0CA-01B-02D-A27F-26", "TCGA-BK-A0CA-01B-02R-A277-07", "TCGA-BK-A0CA-01B-02R-A27D-13", "TCGA-BK-A0CC-01B-04D-A272-09", "TCGA-BK-A0CC-01B-04D-A275-01", "TCGA-BK-A0CC-01B-04D-A27B-05", "TCGA-BK-A0CC-01B-04R-A277-07", "TCGA-BK-A0CC-01B-04R-A27D-13", "TCGA-BK-A139-01C-08D-A272-09", "TCGA-BK-A139-01C-08D-A275-01", "TCGA-BK-A139-01C-08D-A27B-05", "TCGA-BK-A139-01C-08D-A27F-26", "TCGA-BK-A139-01C-08R-A277-07", "TCGA-BK-A139-01C-08R-A27D-13", "TCGA-BK-A26L-01C-04D-A272-09", "TCGA-BK-A26L-01C-04D-A275-01", "TCGA-BK-A26L-01C-04D-A27B-05", "TCGA-BK-A26L-01C-04D-A27F-26", "TCGA-BK-A26L-01C-04R-A277-07", "TCGA-BK-A26L-01C-04R-A27D-13", "TCGA-BL-A0C8-01B-04D-A271-08", "TCGA-BL-A0C8-01B-04D-A273-01", "TCGA-BL-A0C8-01B-04D-A276-05", "TCGA-BL-A0C8-01B-04D-A27C-26", "TCGA-BL-A0C8-01B-04R-A277-07", "TCGA-BL-A0C8-01B-04R-A27D-13", "TCGA-BL-A13I-01B-04D-A271-08", "TCGA-BL-A13I-01B-04D-A276-05", "TCGA-BL-A13I-01B-04R-A277-07", "TCGA-BL-A13I-01B-04R-A27D-13", "TCGA-BL-A13J-01B-04D-A271-08", "TCGA-BL-A13J-01B-04D-A273-01", "TCGA-BL-A13J-01B-04D-A276-05", "TCGA-BL-A13J-01B-04D-A27C-26", "TCGA-BL-A13J-01B-04R-A277-07", "TCGA-BL-A13J-01B-04R-A27D-13") tsb = setdiff(tsb, tsb[which(tsb %in% ffpe)]) } # find repeated samples sampleID = substr(tsb, start = 1, stop = 15) dp_samples = unique(sampleID[duplicated(sampleID)]) if(length(dp_samples)==0){ message("ooo Not find any duplicated barcodes, return original input..") tsb }else{ uniq_tsb = tsb[! sampleID %in% dp_samples] dp_tsb = setdiff(tsb, uniq_tsb) add_tsb = c() # analyte = substr(dp_tsb, start = analyte_position, stop = analyte_position) # if analyte_target = "DNA" # analyte: D > G,W,X if(analyte_target == "DNA"){ for(x in dp_samples){ mulaliquots = dp_tsb[substr(dp_tsb,1,15) == x] analytes = substr(mulaliquots, start = analyte_position, stop = analyte_position) if(any(analytes == "D") & !(all(analytes == "D"))){ aliquot = mulaliquots[which(analytes == "D")] if (length(aliquot) != 1) { # Still have repeats # Remove the samples and add repeated id back to list dp_tsb = c(setdiff(dp_tsb, mulaliquots), aliquot) } else { # If have no repeats # Just remove samples from list and # add unique id to result list add_tsb = c(add_tsb, aliquot) dp_tsb = setdiff(dp_tsb, mulaliquots) } } } }else{ # if analyte_target = "RNA" # analyte: H > R > T for(x in dp_samples){ mulaliquots = dp_tsb[substr(dp_tsb,1,15) == x] analytes = substr(mulaliquots, start = analyte_position, stop = analyte_position) if(any(analytes == "H") & !(all(analytes == "H"))){ aliquot = mulaliquots[which(analytes == "H")] if (length(aliquot) != 1) { # Still have repeats # Remove the samples and add repeated id back to list dp_tsb = c(setdiff(dp_tsb, mulaliquots), aliquot) } else { # If have no repeats # Just remove samples from list and # add unique id to result list add_tsb = c(add_tsb, aliquot) dp_tsb = setdiff(dp_tsb, mulaliquots) } }else if(any(analytes == "R") & !(all(analytes == "R"))){ aliquot = mulaliquots[which(analytes == "R")] if (length(aliquot) != 1) { # Still have repeats # Remove the samples and add repeated id back to list dp_tsb = c(setdiff(dp_tsb, mulaliquots), aliquot) } else { # If have no repeats # Just remove samples from list and # add unique id to result list add_tsb = c(add_tsb, aliquot) dp_tsb = setdiff(dp_tsb, mulaliquots) } }else if(any(analytes == "T") & !(all(analytes == "T"))){ aliquot = mulaliquots[which(analytes == "T")] if (length(aliquot) != 1) { # Still have repeats # Remove the samples and add repeated id back to list dp_tsb = c(setdiff(dp_tsb, mulaliquots), aliquot) } else { # If have no repeats # Just remove samples from list and # add unique id to result list add_tsb = c(add_tsb, aliquot) dp_tsb = setdiff(dp_tsb, mulaliquots) } } } } if(length(dp_tsb) == 0){ message("ooo Filter barcodes successfully!") c(uniq_tsb, add_tsb) }else{ # filter according to portion number sampleID_res = substr(dp_tsb, start=1, stop=15) dp_samples_res = unique(sampleID_res[duplicated(sampleID_res)]) for(x in dp_samples_res){ mulaliquots = dp_tsb[substr(dp_tsb,1,15) == x] portion_codes = substr(mulaliquots, start = portion[1], stop = portion[2]) portion_keep = sort(portion_codes, decreasing = decreasing)[1] if(!all(portion_codes == portion_keep)){ if(length(which(portion_codes == portion_keep)) == 1){ add_tsb = c(add_tsb, mulaliquots[which(portion_codes == portion_keep)]) dp_tsb = setdiff(dp_tsb, mulaliquots) }else{ dp_tsb = setdiff(dp_tsb, mulaliquots[which(portion_codes != portion_keep)]) } } } if(length(dp_tsb)==0){ message("ooo Filter barcodes successfully!") c(uniq_tsb, add_tsb) }else{ # filter according to plate number sampleID_res = substr(dp_tsb, start=1, stop=15) dp_samples_res = unique(sampleID_res[duplicated(sampleID_res)]) for(x in dp_samples_res){ mulaliquots = dp_tsb[substr(dp_tsb,1,15) == x] plate_codes = substr(mulaliquots, start = plate[1], stop = plate[2]) plate_keep = sort(plate_codes, decreasing = decreasing)[1] add_tsb = c(add_tsb, mulaliquots[which(plate_codes == plate_keep)]) dp_tsb = setdiff(dp_tsb, mulaliquots) } if(length(dp_tsb)==0){ message("ooo Filter barcodes successfully!") c(uniq_tsb, add_tsb) }else{ message("ooo barcodes ", dp_tsb, " failed in filter process, other barcodes will be returned.") c(uniq_tsb, add_tsb) } } } } } dup.filt <- tcgaReplicateFilter(colnames(normalized),'RNA') normalized1 <- data.frame(normalized,check.names = F)[,dup.filt] rownames(normalized1) <- rownames(normalized) tumor <- colnames(normalized1)[which(substr(colnames(normalized1),14,16)=='01A' | substr(colnames(normalized1),14,16)=='02A')] tarexp <- normalized1[tar,tumor] %>% apply(.,2,function(x){log2(x+1)}) %>% t(.) %>% data.frame(.,check.names = F) tarexp$Patient <- substr(rownames(tarexp),1,12) tarexp <- tarexp %>% dplyr::select(Patient,everything()) tarexp$fullid <- rownames(tarexp) tarexp <- tarexp %>% dplyr::select(fullid,everything()) clinical <- read.table('./TCGA_clinical.txt',header = T,sep = '\t',check.names = F) colnames(clinical)[1] <- 'Patient' OSinput <- merge(clinical,tarexp,by = 'Patient') %>% na.omit(.) OSinput <- OSinput[order(OSinput$fullid),] rownames(OSinput) <- OSinput$fullid tmp <- OSinput[,c('Patient','fullid')] tmp1 <- aggregate(.~Patient,tmp,min) OSinput <- OSinput[tmp1$fullid,] colnames(OSinput)[2] <- "DFStatus" colnames(OSinput)[3] <- "DFStime" write.table(OSinput,'./风险模型的构建/OSinput.txt',sep = '\t',quote = F,row.names = F) load("GSE84426.rdata") GSE41116.cli <- data_frame(Acc = pdata$geo_accession,DFStime = as.numeric(pdata$`duration overall survival:ch1`), DFStatus= pdata$`death:ch1`) GSE41116.cli$DFStime <- GSE41116.cli$DFStime*30 GSE41116.final <- log2(rnaCounts) max(GSE41116.final) table(rownames(rnaCounts)%in% tar) save(LUAD_expr,tar, OSinput ,file = "TCGA_OSinput.RDATA") load("TCGA_OSinput.RDATA") rtt <- OSinput rtt <- rtt %>% dplyr::select(fullid,everything())#排序 rtt$Type <-'tumor' library(tidyr) library(dplyr) library(stringi) library(stringr) library(survival) library(survminer) library(glmnet) library(survivalROC) library(rms) set.seed(1351) index <- caret::createDataPartition(rtt[,'Type'], p =0.7) train <- rtt[index$Resample1,] test<- rtt[-index$Resample1,] write.table(train,'train.coxinput.txt',row.names = F,col.names = T,quote=F,sep = '\t') write.table(test,'test.coxinput.txt',row.names = F,col.names = T,quote=F,sep = '\t') coxinput <- train coxinput <- coxinput[,-ncol(coxinput)] #2.单因素COX分析#### outTab=data.frame() #从第一个基因开始循环,示例数据第一个基因位于第7列 for(i in colnames(coxinput[,7:ncol(coxinput)])){ cox <- coxph(Surv(DFStime, DFStatus) ~ coxinput[,i], data = coxinput) coxSummary = summary(cox) coxP=coxSummary$coefficients[,"Pr(>|z|)"] outTab=rbind(outTab, cbind(id=i, HR=coxSummary$conf.int[,"exp(coef)"], HR.95L=coxSummary$conf.int[,"lower .95"], HR.95H=coxSummary$conf.int[,"upper .95"], pvalue=coxSummary$coefficients[,"Pr(>|z|)"]) ) } write.table(outTab[outTab$pvalue <= 0.05,],file="./4风险模型的构建/uniCox.txt",sep="\t",row.names=F,quote=F) bioForest(coxFile="./4风险模型的构建/uniCox.txt", forestFile="./4风险模型的构建/Fig4A.uniCox_forest.pdf", forestCol=c("red","green")) outTab[,c(2:ncol(outTab))] <- apply(outTab[,c(2:ncol(outTab))],2,function(x){as.numeric(x)}) outTab <- outTab[order(outTab$pvalue),] #3.LASSO#### #outTab$pvalue <- sapply(outTab$pvalue,function(x){as.numeric(x)}) unicox <- outTab[outTab$pvalue <= 0.05,]$id if (length(unicox)<=1){ next } lassoinput <- coxinput[,c('fullid','DFStime','DFStatus',unicox)] #去掉生存时间为0的数据 #lassoinput <- lassoinput[!lassoinput$DFStime==0,] x=as.matrix(lassoinput[,c(4:ncol(lassoinput))]) y=data.matrix(Surv(lassoinput$DFStime,lassoinput$DFStatus)) set.seed(2333)#有时间的小伙伴可以在这里再进行一个小循环 fit <- glmnet(x, y, family = "cox", maxit = 5000) cvfit <- cv.glmnet(x, y, family="cox", maxit = 5000) pdf(file = "./风险模型的构建/Fig4B.lambda.pdf") plot(fit, xvar = "lambda", label = TRUE) dev.off() pdf(file = "./风险模型的构建/Fig4C.cvfit.pdf") plot(cvfit) abline(v=log(c(cvfit$lambda.min)),lty="dashed")#8.8是文字在横轴的位置,可以AI调整 text(log(cvfit$lambda.min),8.8,cex=1.5, labels = paste0('lambda.min = \n',round(cvfit$lambda.min,2)))#8.8是文字在横轴的位置,可以AI调整 text(log(cvfit$lambda.1se),8.8,cex=1.5, labels = paste0('lambda.lse = \n',round(cvfit$lambda.1se,2))) dev.off() #coef是系数 coef <- coef(fit, s = cvfit$lambda.min) index <- which(as.matrix(coef)!= 0) actCoef <- coef[index] lassoGene=row.names(coef)[index] if (length(lassoGene)<=1){ next } myCoefs <- coef(cvfit, s="lambda.min") write.csv(file = "./风险模型的构建/LASSO_ceo.csv",as.matrix(myCoefs)) #4.预测训练集、测试集、验证集的风险评分#### #训练集风险评分 rs.tcga <- apply(lassoinput[,lassoGene], 1, function(x) {x %*% myCoefs@x}) lassoOut <- lassoinput[,c('fullid','DFStatus','DFStime',lassoGene)] lassoOut$riskScore <- rs.tcga rownames(lassoOut) <- coxinput$Sample lassoOut$risk <- ifelse(lassoOut$riskScore>=median(lassoOut$riskScore),'High_risk','Low_risk') lassoOut <- lassoOut[order(lassoOut$risk,decreasing = T),] lassoOut <- lassoOut[,c('fullid','DFStime','DFStatus',lassoGene,'riskScore','risk')] write.table(lassoOut, file="risk.txt",sep="\t",quote=F,row.names=T,col.names=T) #测试集风险评分 test.exp <- read.table('test.coxinput.txt',header = T,sep = '\t',check.names = F) rs.tcga.test <- apply(test.exp[,lassoGene], 1, function(x) {x %*% myCoefs@x}) test.exp$riskScore <- rs.tcga.test test.exp$risk <- ifelse(test.exp$riskScore>=median(test.exp$riskScore),'High_risk','Low_risk') test.exp <- test.exp[order(test.exp$risk,decreasing = T),] test.exp <- test.exp[,c('fullid','DFStime','DFStatus',lassoGene,'riskScore','risk')] write.table(test.exp, file="test.risk.txt",sep="\t",quote=F,row.names=F) #验证集风险评分 #当外部验证中不包含模型基因时,进入下一循环 if (!length(lassoGene[lassoGene %in% rownames(GSE41116.final)]) == length(lassoGene)){ next } GSE41116.exp.tar <- GSE41116.final[lassoGene,] %>% t(.) %>% data.frame(.,check.names = F) GSE41116.exp.tar$Acc <- rownames(GSE41116.exp.tar) GSE41116.surv <- GSE41116.cli[,c('Acc','DFStime','DFStatus')] %>% na.omit(.) GSE41116.data <- merge(GSE41116.surv,GSE41116.exp.tar,by='Acc') rownames(GSE41116.data) <- GSE41116.data$Acc write.table(GSE41116.data,'GSE41116.coxinput.txt',sep = '\t',quote = F,row.names = F) #验证集风险评分 vali.score <- apply(GSE41116.data[,lassoGene], 1, function(x) {x %*% myCoefs@x}) GSE41116.data$riskScore <- vali.score GSE41116.data$risk <- ifelse(GSE41116.data$riskScore>=median(GSE41116.data$riskScore),'High_risk','Low_risk') GSE41116.data <- GSE41116.data[order(GSE41116.data$risk,decreasing = T),] GSE41116.data <- GSE41116.data[,c('Acc','DFStime','DFStatus',lassoGene,'riskScore','risk')] write.table(GSE41116.data, file="GSE41116.risk.txt",sep="\t",quote=F,row.names=F) #需要看一下验证集的最大风险评分是否极端 GSE41116.max <- max(GSE41116.data$riskScore) #5.高低风险组的预后#### #训练集 survival <- read.table('risk.txt',sep = '\t',check.names = F,header = T) diff <- survdiff(Surv(DFStime,DFStatus) ~ risk, data = survival) pValue1=format((1-pchisq(diff$chisq,df=1)),digits=15) pValue1=as.numeric(pValue1) fit <- survfit(Surv(DFStime,DFStatus) ~ risk,data = survival) p1 <- ggsurvplot(fit, data = survival, #ggtheme = theme_bw(), #想要网格就运行这 conf.int = F, #不画置信区间,想画置信区间就把F改成T conf.int.style = "ribbon",#置信区间的类型,还可改为ribbon risk.table = T, # 绘制累计风险曲线 #tables.theme = theme_void(), tables.height = 0.25, censor = T, #不显示观察值所在的位置 palette = c("#D95F02","#1B9E77"), #线的颜色对应高、低(自定义调色板) xlab = "Time (day)", ylab = "Survival probability", legend.title = "TCGA Train", font.legend = 9, pval = T ) pdf('./风险模型的构建/fig4D训练集生存分析.pdf',width=7,height = 5) print(p1,newpage = FALSE) dev.off() #画三段风险热图 #画顶部散点图 data <- survival rs <- data$riskScore names(rs) <- row.names(data) rs_data <- data.frame(x=1:length(rs),rs=as.numeric(sort(rs))) # 用中值分组 rs_data$Risk <- ifelse(rs_data$rs>=median(rs_data$rs), "High-risk", "Low-risk") head(rs_data) plot.A <- ggplot(rs_data, aes(x=x,y=rs))+ geom_point(aes(col=Risk),size=0.5)+ scale_color_manual(labels=c("High-risk","Low-risk"), #guide_legend(guide = NULL), #如果不想画图例就删掉# name="Risk score", values =c("#DC0000FF", "#00A087FF")) + # 画竖向虚线 geom_segment(aes(x = sum(rs_data$Risk=="Low-risk"), y = 0, xend = sum(rs_data$Risk=="Low-risk"), yend = max(rs_data$rs)), linetype="dashed", size = 0.6)+ # 画横线 geom_segment(aes(x=0,y=median(rs_data$rs), xend=nrow(rs_data), yend=median(rs_data$rs)),linetype="dashed", size = 0.3)+ # 写文字Cutoff: geom_text(aes(x=sum(rs_data$Risk=="Low-risk")/2, y=median(rs_data$rs)+1, label=paste0("Cutoff: ",round(median(rs_data$rs),3))), col ="black",size = 4,alpha=0.8)+ theme(axis.title.x=element_blank()) + labs(y="Risk score",x="",fill="Risk") + #scale_colour_discrete(name="Risk scores") + theme_classic() + theme(axis.ticks.x=element_blank(), axis.line = element_blank(), #如果想像example2那样画坐标轴,就删掉这行 axis.text.x=element_blank()) plot.A # follow-up,用于画中间B图 surv_data <- data.frame(x=1:length(rs), t=data[names(sort(rs)),'DFStime'], s=data[names(sort(rs)),'DFStatus']) surv_data$Status <- as.factor(ifelse(surv_data$s==0,'alive','death')) table(surv_data$Status) head(surv_data) plot.B <- ggplot(surv_data,aes(x=x,y=t))+ geom_point(aes(col=Status),size=0.5)+ geom_vline(aes(xintercept=sum(rs_data$Risk=="Low-risk")),size=0.6,linetype="dashed")+ scale_x_continuous(limits = c(0,NA),expand = c(0,0))+ scale_color_manual(labels=c('alive','death'), values =c("#00A087FF","#DC0000FF"))+ labs(y="OS(days)",x="")+ theme_classic()+ theme(axis.ticks.x=element_blank(), axis.line = element_blank(), #如果想像example2那样不画坐标轴,就删掉前面的# axis.text.x=element_blank()) plot.B # 提取signature对应的data,并按risk score排序,用于画底部热图 exp_data <- data[names(sort(rs)),which(colnames(data) %in% tar)] tmp <- t(scale(exp_data)) tmp[tmp > 1] = 1 tmp[tmp < -1] = -1 reorder_cormat <- function(cormat){ dd <- dist(cormat) hc <- hclust(dd,method = "average") cormat <-cormat[hc$order,] } tmp1 <- reorder_cormat(tmp) tmp1 <- rbind(tmp1,ifelse(rs_data$Risk=="Low-risk",-1.5,1.5)) tmp.m <- melt(tmp1) tmp.m$Var2 <- factor(tmp.m$Var2,levels = colnames(tmp1)) p2 <-ggplot(tmp.m, aes(Var2, Var1),size=0.5) + geom_tile(aes(fill = value)) plot.C <- p2 + scale_fill_gradient2(name="Genes\nexpression", low="#00A087FF", high="#DC0000FF", mid="white") + labs(x = "", y = "")+ theme_classic()+ theme(legend.title = element_text(size = 12), legend.position = "right", axis.line = element_blank(), axis.ticks=element_blank(), axis.text.x=element_blank()) plot.C p <- cowplot::plot_grid(plot.A, plot.B, plot.C, #labels = c("B", "",""), # 或者按顺序标注ABC rel_heights = c(1,1,1), # 3个图的比例 #label_x=0, #label_y=1, align = 'v',ncol = 1, axis="lr", scale = c(1,1,1), greedy = F) p ggsave("./风险模型的构建/fig4E训练集三段风险热图.pdf",p,width = 6,height = 7) #测试集 survival <- read.table('test.risk.txt',sep = '\t',check.names = F,header = T) diff <- survdiff(Surv(DFStime,DFStatus) ~ risk, data = survival) pValue2=format(1-pchisq(diff$chisq,df=1),digits=5) pValue2=as.numeric(pValue2) fit <- survfit(Surv(DFStime,DFStatus) ~ risk,data = survival) p1 <- ggsurvplot(fit, data = survival, #ggtheme = theme_bw(), #想要网格就运行这 conf.int = F, #不画置信区间,想画置信区间就把F改成T conf.int.style = "ribbon",#置信区间的类型,还可改为ribbon risk.table = T, # 绘制累计风险曲线 #tables.theme = theme_void(), tables.height = 0.25, censor = T, #不显示观察值所在的位置 palette = c("#D95F02","#1B9E77"), #线的颜色对应高、低(自定义调色板) xlab = "Time (day)", ylab = "Survival probability", legend.title = "TCGA Test", font.legend = 9, pval = T ) pdf('./风险模型的构建/fig4F内部测试集生存分析.pdf',width=7,height = 5) print(p1,newpage = FALSE) dev.off() #画三段风险热图 #画顶部散点图 data <- survival rs <- data$riskScore names(rs) <- row.names(data) rs_data <- data.frame(x=1:length(rs),rs=as.numeric(sort(rs))) # 用中值分组 rs_data$Risk <- ifelse(rs_data$rs>=median(rs_data$rs), "High-risk", "Low-risk") head(rs_data) plot.A <- ggplot(rs_data, aes(x=x,y=rs))+ geom_point(aes(col=Risk),size=0.5)+ scale_color_manual(labels=c("High-risk","Low-risk"), #guide_legend(guide = NULL), #如果不想画图例就删掉# name="Risk score", values =c("#DC0000FF", "#00A087FF")) + # 画竖向虚线 geom_segment(aes(x = sum(rs_data$Risk=="Low-risk"), y = 0, xend = sum(rs_data$Risk=="Low-risk"), yend = max(rs_data$rs)), linetype="dashed", size = 0.6)+ # 画横线 geom_segment(aes(x=0,y=median(rs_data$rs), xend=nrow(rs_data), yend=median(rs_data$rs)),linetype="dashed", size = 0.3)+ # 写文字Cutoff: geom_text(aes(x=sum(rs_data$Risk=="Low-risk")/2, y=median(rs_data$rs)+1, label=paste0("Cutoff: ",round(median(rs_data$rs),3))), col ="black",size = 4,alpha=0.8)+ theme(axis.title.x=element_blank()) + labs(y="Risk score",x="",fill="Risk") + #scale_colour_discrete(name="Risk scores") + theme_classic() + theme(axis.ticks.x=element_blank(), axis.line = element_blank(), #如果想像example2那样画坐标轴,就删掉这行 axis.text.x=element_blank()) plot.A # follow-up,用于画中间B图 surv_data <- data.frame(x=1:length(rs), t=data[names(sort(rs)),'DFStime'], s=data[names(sort(rs)),'DFStatus']) surv_data$Status <- as.factor(ifelse(surv_data$s==0,'alive','death')) table(surv_data$Status) head(surv_data) plot.B <- ggplot(surv_data,aes(x=x,y=t))+ geom_point(aes(col=Status),size=0.5)+ geom_vline(aes(xintercept=sum(rs_data$Risk=="Low-risk")),size=0.6,linetype="dashed")+ scale_x_continuous(limits = c(0,NA),expand = c(0,0))+ scale_color_manual(labels=c('alive','death'), values =c("#00A087FF","#DC0000FF"))+ labs(y="OS(days)",x="")+ theme_classic()+ theme(axis.ticks.x=element_blank(), axis.line = element_blank(), #如果想像example2那样不画坐标轴,就删掉前面的# axis.text.x=element_blank()) plot.B # 提取signature对应的data,并按risk score排序,用于画底部热图 exp_data <- data[names(sort(rs)),which(colnames(data) %in% tar)] tmp <- t(scale(exp_data)) tmp[tmp > 1] = 1 tmp[tmp < -1] = -1 reorder_cormat <- function(cormat){ dd <- dist(cormat) hc <- hclust(dd,method = "average") cormat <-cormat[hc$order,] } tmp1 <- reorder_cormat(tmp) tmp1 <- rbind(tmp1,ifelse(rs_data$Risk=="Low-risk",-1.5,1.5)) tmp.m <- melt(tmp1) tmp.m$Var2 <- factor(tmp.m$Var2,levels = colnames(tmp1)) p2 <-ggplot(tmp.m, aes(Var2, Var1),size=0.5) + geom_tile(aes(fill = value)) plot.C <- p2 + scale_fill_gradient2(name="Genes\nexpression", low="#00A087FF", high="#DC0000FF", mid="white") + labs(x = "", y = "")+ theme_classic()+ theme(legend.title = element_text(size = 12), legend.position = "right", axis.line = element_blank(), axis.ticks=element_blank(), axis.text.x=element_blank()) plot.C p <- cowplot::plot_grid(plot.A, plot.B, plot.C, #labels = c("B", "",""), # 或者按顺序标注ABC rel_heights = c(1,1,1), # 3个图的比例 #label_x=0, #label_y=1, align = 'v',ncol = 1, axis="lr", scale = c(1,1,1), greedy = F) p ggsave("./风险模型的构建/fig4G内部验证集三段风险热图.pdf",p,width = 6,height = 7) #验证集read.table('GSE41116.risk.txt',sep = '\t',check.names = F,header = T) survival <- GSE41116.data diff <- survdiff(Surv(DFStime,DFStatus) ~ risk, data = survival) pValue3=format((1-pchisq(diff$chisq,df=1)),digits=10) pValue3 <- as.numeric(pValue3) fit <- survfit(Surv(DFStime,DFStatus) ~ risk,data = survival) p1 <- ggsurvplot(fit, data = survival, #ggtheme = theme_bw(), #想要网格就运行这 conf.int = F, #不画置信区间,想画置信区间就把F改成T conf.int.style = "ribbon",#置信区间的类型,还可改为ribbon risk.table = T, # 绘制累计风险曲线 #tables.theme = theme_void(), tables.height = 0.25, censor = T, #不显示观察值所在的位置 palette = c("#D95F02","#1B9E77"), #线的颜色对应高、低(自定义调色板) xlab = "Time (day)", ylab = "Survival probability", legend.title = "GEO Test", font.legend = 9, pval = T ) pdf('./风险模型的构建/fig4H外部验证集生存分析.pdf',width=7,height = 5) print(p1,newpage = FALSE) dev.off() #画三段风险热图 #画顶部散点图 data <- survival rs <- data$riskScore names(rs) <- row.names(data) rs_data <- data.frame(x=1:length(rs),rs=as.numeric(sort(rs))) # 用中值分组 rs_data$Risk <- ifelse(rs_data$rs>=median(rs_data$rs), "High-risk", "Low-risk") head(rs_data) plot.A <- ggplot(rs_data, aes(x=x,y=rs))+ geom_point(aes(col=Risk),size=0.5)+ scale_color_manual(labels=c("High-risk","Low-risk"), #guide_legend(guide = NULL), #如果不想画图例就删掉# name="Risk score", values =c("#DC0000FF", "#00A087FF")) + # 画竖向虚线 geom_segment(aes(x = sum(rs_data$Risk=="Low-risk"), #y = 0, xend = sum(rs_data$Risk=="Low-risk"), yend = max(rs_data$rs)), linetype="dashed", size = 0.6)+ # 画横线 geom_segment(aes(x=0,y=median(rs_data$rs), xend=nrow(rs_data), yend=median(rs_data$rs)),linetype="dashed", size = 0.3)+ # 写文字Cutoff: geom_text(aes(x=sum(rs_data$Risk=="Low-risk")/2, y=median(rs_data$rs)+1, label=paste0("Cutoff: ",round(median(rs_data$rs),3))), col ="black",size = 4,alpha=0.8)+ theme(axis.title.x=element_blank()) + labs(y="Risk score",x="",fill="Risk") + #scale_colour_discrete(name="Risk scores") + theme_classic() + theme(axis.ticks.x=element_blank(), axis.line = element_blank(), #如果想像example2那样画坐标轴,就删掉这行 axis.text.x=element_blank()) plot.A # follow-up,用于画中间B图 surv_data <- data.frame(x=1:length(rs), t=data[names(sort(rs)),'DFStime'], s=data[names(sort(rs)),'DFStatus']) surv_data$Status <- as.factor(ifelse(surv_data$s==0,'alive','death')) table(surv_data$Status) head(surv_data) plot.B <- ggplot(surv_data,aes(x=x,y=t))+ geom_point(aes(col=Status),size=0.5)+ geom_vline(aes(xintercept=sum(rs_data$Risk=="Low-risk")),size=0.6,linetype="dashed")+ scale_x_continuous(limits = c(0,NA),expand = c(0,0))+ scale_color_manual(labels=c('alive','death'), values =c("#00A087FF","#DC0000FF"))+ labs(y="OS(days)",x="")+ theme_classic()+ theme(axis.ticks.x=element_blank(), axis.line = element_blank(), #如果想像example2那样不画坐标轴,就删掉前面的# axis.text.x=element_blank()) plot.B # 提取signature对应的data,并按risk score排序,用于画底部热图 exp_data <- data[names(sort(rs)),which(colnames(data) %in% tar)] tmp <- t(scale(exp_data)) tmp[tmp > 1] = 1 tmp[tmp < -1] = -1 reorder_cormat <- function(cormat){ dd <- dist(cormat) hc <- hclust(dd,method = "average") cormat <-cormat[hc$order,] } tmp1 <- reorder_cormat(tmp) tmp1 <- rbind(tmp1,ifelse(rs_data$Risk=="Low-risk",-1.5,1.5)) tmp.m <- melt(tmp1) tmp.m$Var2 <- factor(tmp.m$Var2,levels = colnames(tmp1)) p2 <-ggplot(tmp.m, aes(Var2, Var1),size=0.5) + geom_tile(aes(fill = value)) plot.C <- p2 + scale_fill_gradient2(name="Genes\nexpression", low="#00A087FF", high="#DC0000FF", mid="white") + labs(x = "", y = "")+ theme_classic()+ theme(legend.title = element_text(size = 12), legend.position = "right", axis.line = element_blank(), axis.ticks=element_blank(), axis.text.x=element_blank()) plot.C p <- cowplot::plot_grid(plot.A, plot.B, plot.C, #labels = c("B", "",""), # 或者按顺序标注ABC rel_heights = c(1,1,1), # 3个图的比例 #label_x=0, #label_y=1, align = 'v',ncol = 1, axis="lr", scale = c(1,1,1), greedy = F) p ggsave("./风险模型的构建/fig4I外部验证集三段风险热图.pdf",p,width = 6,height = 7) #6.高低风险组的ROC#### #训练集 riskdata <- read.table('risk.txt',header = T,sep = '\t',check.names = F) mycol <- c('OrangeRed','Gold','GreenYellow','LightSlateBlue','DarkTurquoise') roc <- survivalROC(Stime = riskdata$DFStime, status = riskdata$DFStatus, marker = riskdata$riskScore, predict.time = 365,method='KM') aucText=paste0("AUC for 1-Year Survival: ",sprintf("%.3f",roc$AUC)) auc <- data.frame(AUC1=as.numeric(roc$AUC)) pdf('./风险模型的构建/fig4J.train.ROC.pdf',width = 6.5,height = 7) plot(roc$FP,roc$TP, type="l",lwd = 3,xlim=c(0,1), ylim=c(0,1.1), xlab = "False Positive rate", ylab = "True Positive rate", col="OrangeRed", main = 'SurvivalROC for Training Set') abline(0,1) for (i in c(2:5)){ roc1 <- survivalROC(Stime = riskdata$DFStime, status = riskdata$DFStatus, marker = riskdata$riskScore, predict.time = 365*i,method='KM') aucText=c(aucText,paste0("AUC for ",i,"-Year Survival:",sprintf("%.3f",roc1$AUC))) auc <- cbind(auc,AUC1=as.numeric(roc1$AUC)) lines(roc1$FP, roc1$TP, type="l", xlim=c(0,1), ylim=c(0,1),col=mycol[i],lwd = 3) } legend("bottomright", aucText,lwd=2,bty="n",col=mycol[1:5]) dev.off() #测试集 riskdata <- read.table('test.risk.txt',header = T,sep = '\t',check.names = F) mycol <- c('OrangeRed','Gold','GreenYellow','LightSlateBlue','DarkTurquoise') roc <- survivalROC(Stime = riskdata$DFStime, status = riskdata$DFStatus, marker = riskdata$riskScore, predict.time = 365,method='KM') aucText=paste0("AUC for 1-Year Survival: ",sprintf("%.3f",roc$AUC)) auc <- cbind(auc,AUC2=as.numeric(roc$AUC)) pdf('./风险模型的构建/fig4k.Test.ROC.pdf',width = 6.5,height = 7) plot(roc$FP,roc$TP, type="l",lwd = 3,xlim=c(0,1), ylim=c(0,1.1), xlab = "False Positive rate", ylab = "True Positive rate", col="OrangeRed", main = 'SurvivalROC for Training Set') abline(0,1) for (i in c(2:5)){ roc1 <- survivalROC(Stime = riskdata$DFStime, status = riskdata$DFStatus, marker = riskdata$riskScore, predict.time = 365*i,method='KM') aucText=c(aucText,paste0("AUC for ",i,"-Year Survival:",sprintf("%.3f",roc1$AUC))) auc <- cbind(auc,AUC2=as.numeric(roc1$AUC)) lines(roc1$FP, roc1$TP, type="l", xlim=c(0,1), ylim=c(0,1),col=mycol[i],lwd = 3) } legend("bottomright", aucText,lwd=2,bty="n",col=mycol[1:5]) dev.off() #验证集 riskdata <- read.table('GSE41116.risk.txt',header = T,sep = '\t',check.names = F) mycol <- c('OrangeRed','Gold','GreenYellow','LightSlateBlue','DarkTurquoise') roc <- survivalROC(Stime = riskdata$DFStime, status = riskdata$DFStatus, marker = riskdata$riskScore, predict.time = 365,method='KM') aucText=paste0("AUC for 1-Year Survival: ",sprintf("%.3f",roc$AUC)) auc <- cbind(auc,data.frame(AUC3=roc$AUC)) pdf('./风险模型的构建/Fig2l.GEO.ROC.pdf',width = 6.5,height = 7) plot(roc$FP,roc$TP, type="l",lwd = 3,xlim=c(0,1), ylim=c(0,1.1), xlab = "False Positive rate", ylab = "True Positive rate", col="OrangeRed", main = 'SurvivalROC for GSE12945 cohort') abline(0,1) for (i in c(2:5)){ roc1 <- survivalROC(Stime = riskdata$DFStime, status = riskdata$DFStatus, marker = riskdata$riskScore, predict.time = 365*i,method='KM') lines(roc1$FP, roc1$TP, type="l", xlim=c(0,1), ylim=c(0,1),col=mycol[i],lwd = 3) aucText=c(aucText,paste0("AUC for ",i,"-Year Survival:",sprintf("%.3f",roc1$AUC))) auc <- cbind(auc,data.frame(AUC3=roc1$AUC)) } legend("bottomright", aucText,lwd=2,bty="n",col=c('#FA8072',mycol[1:5])) dev.off() ##################################################生存风险评分模型与临床相关性################################################################# dir.create(paste0(Work_dir,"/生存风险评分模型与临床相关性")) TNM <- read.table("临床数据处理(tnm).txt",header = T,sep = '\t',check.names = F,fill = NA) train <- read.table('risk.txt',sep = '\t',header = T,check.names = F) vali <- read.table('test.risk.txt',sep = '\t',header = T,check.names = F) merge1 <- rbind(train,vali) merge1$Patient <-substr(merge1$fullid,1,12) riskTime <- merge(merge1,TNM,by='Patient') %>% na.omit(.) riskTime$Age <- ifelse(riskTime$Age >60 ,">60","<=60") meta1 <- riskTime[,c(13:18,9)] #meta1$riskScore[meta1$riskScore >10] <- 10 pl <- list() for (i in colnames(meta1)[-ncol(meta1)]) { data <- meta1 %>% dplyr::select(c("riskScore",i)) %>% drop_na() data[,i] = as.factor(data[,i]) data <- data[str_detect(data[,i],"X$",negate = TRUE),] pl[[i]] <- ggviolin(data,x=i,y="riskScore",fill = i,palette = "jco", add = "boxplot",add.params = list(fill="white"),ylab = "riskScore")+stat_compare_means() } length(pl) p = ggarrange(pl[[1]],pl[[2]],pl[[3]],pl[[4]],pl[[5]],pl[[6]], ncol = 3, nrow = 2) p ggsave("./生存风险评分模型与临床相关性/Fig7A.高低风险组不同临床特征中的表达.pdf",p ,width = 15,height = 10) #分层生存分析#### clinical <- riskTime[,c(1:4,13:18,10)] clinical <- clinical[clinical$ajcc_pathologic_m != "MX",] clinical <- clinical[clinical$ajcc_pathologic_n != "NX",] table(clinical$ajcc_pathologic_m) clinical$StageN <- ifelse(clinical$ajcc_pathologic_n=='N0','N0','N1-N3') clinical$StageM <- clinical$ajcc_pathologic_m clinical$StageT <- ifelse(clinical$ajcc_pathologic_t == 'T1' | clinical$ajcc_pathologic_t == 'T2','T1-T2','T3-T4') clinical$stage <- ifelse(clinical$ajcc_pathologic_stage == 'Stage I' | clinical$ajcc_pathologic_stage == 'Stage II','Stage I-II','Stage III-IV') clinical$Gender <- clinical$gender clinical$Age_group <- clinical$Age d <- clinical[,c(1:4,11:ncol(clinical))] list <- list() colnames(clinical) for (i in colnames(clinical)[12:ncol(clinical)]) { for (j in unique(clinical[,i])[order(unique(clinical[,i]),decreasing = F)]) { d <- clinical[,c("DFStime","DFStatus","risk",i)] diff <- survdiff(Surv(DFStime,DFStatus) ~risk , data = d[d[,i]== j,] ) pValue=format((1-pchisq(diff$chisq,df=1)),digits=5) pValue=as.numeric(pValue) fit <- survfit(Surv(DFStime,DFStatus) ~ risk, data = d[d[,i]== j,]) list[[j]] <- ggsurvplot(fit, data = d[d[,i]== j,], ggtheme = theme_bw(), #想要网格就运行这行 conf.int = F, #不画置信区间,想画置信区间就把F改成T #conf.int.style = "step",#置信区间的类型,还可改为ribbon risk.table = F, # 绘制累计风险曲线 #tables.theme = theme_void(), tables.height = 0.3, censor = T, #不显示观察值所在的位置 palette = c("Red3","DodgerBlue4"), #线的颜色对应高、低(自定义调色板) xlab = "Time (Days)", legend.title = j,#基因名写在图例题目的位置 font.legend = 10,#图例的字体大小 font.title = 12,font.x = 12,font.y = 12,#设置其他字体大小 legend=c(0.8,0.9), pval = TRUE, legend.labs = c('High_risk','Low_risk')) } } length(list) pdf('./生存风险评分模型与临床相关性/Fig7B.风险模型与临床特征的分层分析.pdf',width=15,height = 10) arrange_ggsurvplots(list,print = TRUE, ncol = 3, nrow = 4) dev.off() ##################################################独立预后################################################################# dir.create(paste0(Work_dir,"/独立预后")) #独立预后分析(只在训练集中进行) TNM <- read.table('tnm(1或0).txt',header = T,sep = '\t',check.names = F,fill = NA) train <- read.table('risk.txt',sep = '\t',header = T,check.names = F) train$Patient <-substr(train$fullid,1,12) riskTime <- merge(train,TNM,by='Patient') riskTime$Age <- ifelse(riskTime$Age < 60 ,"<60",">=60") indep_rt <- riskTime[,c(1:4,9,11:16)] outTab2 = data.frame() for(i in colnames(indep_rt[,5:ncol(indep_rt)])){ cox <- coxph(Surv(DFStime, DFStatus) ~ indep_rt[,i], data = indep_rt) coxSummary = summary(cox) coxP=coxSummary$coefficients[,"Pr(>|z|)"] outTab2=rbind(outTab2, cbind(id=i, HR=coxSummary$conf.int[,"exp(coef)"], HR.95L=coxSummary$conf.int[,"lower .95"], HR.95H=coxSummary$conf.int[,"upper .95"], pvalue=coxSummary$coefficients[,"Pr(>|z|)"]) ) } outTab2[,c(2:5)] <- apply(outTab2[,c(2:5)],2,function(x){as.numeric(x)}) outTab2 <- outTab2[order(outTab2$pvalue),] outTab2 <- outTab2[outTab2$pvalue<=0.05,] write.table(outTab2,file="./独立预后/indep.uniCox.txt",sep="\t",row.names=F,quote=F) bioForest(coxFile="./独立预后/indep.uniCox.txt", forestFile="./8独立预后/Fig8A.uniCox_indep_forest.pdf", forestCol=c("red","green")) #独立预后-多因素分析 indep.uni <- outTab2[outTab2$pvalue<=0.05,]$id indep_rt1 <- indep_rt[,c('DFStime','DFStatus',unique(indep.uni))] multiCox1=coxph(Surv(DFStime, DFStatus) ~ ., data = indep_rt1) multiCox1=step(multiCox1,direction = "both") multiCoxSum1=summary(multiCox1) outTab3=data.frame() outTab3=cbind( HR=multiCoxSum1$conf.int[,"exp(coef)"], HR.95L=multiCoxSum1$conf.int[,"lower .95"], HR.95H=multiCoxSum1$conf.int[,"upper .95"], pvalue=multiCoxSum1$coefficients[,"Pr(>|z|)"]) outTab3=cbind(id=row.names(outTab3),outTab3) outTab3=data.frame(outTab3) if (length(row.names(outTab3)) <=1){ next } outTab3[,c(2:5)] <- apply(outTab3[,c(2:5)],2,function(x){as.numeric(x)}) outTab3 <- outTab3[order(outTab3$pvalue,decreasing = F),] indep.final <- outTab3[outTab3$pvalue<0.05,]$id write.table(outTab3,'./独立预后/indep.mulCox.txt',row.names = F,col.names = T,sep = '\t',quote = F) bioForest(coxFile="./独立预后/indep.mulCox.txt", forestFile="./8独立预后/Fig8B.mulCox_indep_forest.pdf", forestCol=c("red","green")) #列线图#### nomod <- indep_rt #nomod$riskScore[nomod$riskScore >13] <- 13 dd<-datadist(nomod) options(datadist="dd") options(na.action="na.delete") summary(nomod$DFStime) coxpbc<-cph(formula = Surv(DFStime,DFStatus) ~riskScore+stage , data = nomod,x=T,y=T,surv = T) #,time.inc =2920 print(coxpbc) surv<-Survival(coxpbc) surv1 <- function(x) surv(365,x) # surv2 <- function(x) surv(365*2,x) surv3<-function(x) surv(1095,x) # surv4<-function(x) surv(365*4,x) surv5<-function(x) surv(1825,x) #surv10<-function(x) surv(3650,x) x<-nomogram(coxpbc,fun = list(surv1,surv3,surv5),lp=F, funlabel = c('1-year survival Probability', '3-year survival Probability', '5-year survival Probability'), maxscale = 100,fun.at = c(0.95,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1)) pdf("./独立预后/Fig8C.indep_nomogram_classical.pdf",width = 15,height =8) # png("train.indep_nomogram_classical.png",width = 1200,height = 600) plot(x, lplabel="Linear Predictor", xfrac=.35,varname.label=TRUE, varname.label.sep="=", ia.space=.1, tck=NA, tcl=-0.20, lmgp=0.3, points.label='Points', total.points.label='Total Points', total.sep.page=FALSE, cap.labels=FALSE,cex.var = 1.6,cex.axis = 1.2, lwd=5,label.every = 1,col.grid = gray(c(0.8, 0.95))) dev.off() #列线图配套的校正曲线 f1 <- cph(formula = Surv(DFStime,DFStatus) ~ riskScore+stage, data=nomod,x=T,y=T,surv = T,na.action=na.delete,time.inc = 365) cal1 <- calibrate(f1, cmethod="KM", method="boot",u=365,m=60,B=1000) #3年数据 f3 <- cph(formula = Surv(DFStime,DFStatus) ~ riskScore+stage, data=nomod,x=T,y=T,surv = T,na.action=na.delete,time.inc = 1095) cal3 <- calibrate(f3, cmethod="KM", method="boot",u=1095,m=60,B=1000) #5年数据 f5 <- cph(formula = Surv(DFStime,DFStatus) ~ riskScore+stage, data=nomod,x=T,y=T,surv = T,na.action=na.delete,time.inc = 1825) cal5 <- calibrate(f5, cmethod="KM", method="boot",u=1825,m=60,B=1000) #作图 pdf("./独立预后/Fig8D.indep_calibration_compare.pdf",width = 7,height = 7) plot(cal1,lwd = 2,lty = 1,errbar.col = c("#FB8072"),#lty=1画上bar bty = "l", #只画左边和下边框 xlim = c(0.1,1),ylim= c(0.1,1),#xlim可以调整,怎么好看怎么调 xlab = "Nomogram-prediced OS (%)",ylab = "Observed OS (%)", col = c("#FB8072"), cex.lab=1.2,cex.axis=1, cex.main=1.2, cex.sub=0.6) lines(cal1[,c('mean.predicted',"KM")], type = 'b', lwd = 2, col = c("#FB8072"), pch =16) mtext("") plot(cal3,lwd = 2,lty = 1,errbar.col = c("RoyalBlue1"), xlim = c(0.1,1),ylim= c(0.1,1),col = c("RoyalBlue1"),add = T) lines(cal3[,c('mean.predicted',"KM")], type = 'b', lwd = 2, col = c("RoyalBlue1"), pch = 16) plot(cal5,lwd = 2,lty = 1,errbar.col = c("#B3DE69"), xlim = c(0.1,1),ylim= c(0.1,1),col = c("#B3DE69"),add = T) lines(cal5[,c('mean.predicted',"KM")], type = 'b', lwd = 2, col = c("#B3DE69"), pch = 16) abline(0,1, lwd = 3, lty = 3, col = c("#224444")) legend("topleft", #图例的位置 legend = c('1-year','3-year',"5-year"), #图例文字 col =c("#FB8072","RoyalBlue1",'#B3DE69'), #图例线的颜色,与文字对应 lwd = 2,#图例中线的粗细 cex = 1.2,#图例字体大小 bty = "n")#不显示图例边框 dev.off() #计算校准曲线斜率 library(stringr) caldat <- data.frame(summary(cal1)) cal1rate <- lm(str_sub(caldat[caldat$Var2 == " KM.corrected","Freq"], -8, -1) ~ str_sub(caldat[caldat$Var2 == "mean.predicted","Freq"], -8, -1))[["coefficients"]][["(Intercept)"]] # summary(lm(str_sub(caldat[caldat$Var2 == " KM.corrected","Freq"], -8, -1) ~ str_sub(caldat[caldat$Var2 == "mean.predicted","Freq"], -8, -1))) caldat <- data.frame(summary(cal3)) cal3rate <- lm(str_sub(caldat[caldat$Var2 == " KM.corrected","Freq"], -8, -3)[1:6] ~ str_sub(caldat[caldat$Var2 == "mean.predicted","Freq"], -8, -3)[1:6])[["coefficients"]][["(Intercept)"]] caldat <- data.frame(summary(cal5)) cal5rate <- lm( str_sub(caldat[caldat$Var2 == " KM.corrected","Freq"], -8, -3)[1:6] ~ str_sub(caldat[caldat$Var2 == "mean.predicted","Freq"], -10, -3)[1:6])[["coefficients"]][["(Intercept)"]] #计算C指数 set.seed(1) # the method of validate is using random fit.cph <- cph(Surv(DFStime,DFStatus)~riskScore+Stage, data = nomod, x = TRUE, y = TRUE, surv = TRUE) # Get the Dxy v <- validate(fit.cph, dxy=TRUE, B=1000) Dxy = v[rownames(v)=="Dxy", colnames(v)=="index.corrected"] orig_Dxy = v[rownames(v)=="Dxy", colnames(v)=="index.orig"] # The c-statistic according to Dxy=2(c-0.5) bias_corrected_c_index <- abs(Dxy)/2+0.5 orig_c_index <- abs(orig_Dxy)/2+0.5 #最终使用校正后的C指数 bias_corrected_c_index ##################################################富集分析################################################################# dir.create(paste0(Work_dir,"/富集分析")) train <- read.table('risk.txt',sep = '\t',header = T,check.names = F) risk_expr <- LUAD_expr[,train$fullid] group_list = factor(train$risk, levels = c("High_risk","Low_risk")) design <- model.matrix(~0+group_list) colnames(design)= levels(group_list) rownames(design)= colnames(risk_expr) exprSet <- log(risk_expr+0.0001,2) write.table(file = "risk_expr.txt",exprSet,sep = "\t") library("limma") fit <- lmFit(risk_expr, design) constrasts = paste((levels(group_list)),collapse = "-");constrasts cont.matrix <- makeContrasts(contrasts=constrasts,levels = design) fit2=contrasts.fit(fit,cont.matrix) fit2=eBayes(fit2) library("msigdbr") library(tibble) DEG_GEO = topTable(fit2, coef=constrasts, n=Inf) DEG_GEO = na.omit(DEG_GEO) DEG_GEO$genes <- rownames(DEG_GEO) cluster.genes<- DEG_GEO %>% arrange(desc(logFC)) %>% dplyr::select(genes,logFC) #基因按logFC排序 ranks<- deframe(cluster.genes) library(openxlsx) library(clusterProfiler) library(ggplot2) library(enrichplot) library(GOplot) library(DOSE) library(stringr) library(org.Hs.eg.db) gse.GO <- gseGO(geneList = ranks, ont = "ALL", OrgDb = org.Hs.eg.db, keyType = "SYMBOL", pAdjustMethod = "BH", pvalueCutoff = 1) ridgeplot(gse.GO, 5, pvalue_table = TRUE) gseaplot2(gse.GO,1:10,) gse.go <- as.data.frame(gse.GO) write.table(file = "./富集分析/gse.go.txt",gse.go,sep = "\t") gene.id <- AnnotationDbi::select(org.Hs.eg.db, keytype = "SYMBOL", keys = names(ranks), columns = c("ENTREZID", "ENSEMBL")) %>%na.omit(.) gene.id <- gene.id[!duplicated(gene.id$SYMBOL),] ranks <- ranks[intersect(names(ranks),gene.id$SYMBOL)] names(ranks) <- gene.id$ENTREZID save(ranks,file = "ranks.rdata") load("ranks.rdata") KEGG_gseresult <- gseKEGG(ranks, nPerm = 1000, minGSSize = 10, maxGSSize = 1000, pvalueCutoff=1) gseaplot2(KEGG_gseresult,1:10,) kegg_gseresult <- as.data.frame(KEGG_gseresult) write.table(file = "./富集分析/gse.kegg.txt",kegg_gseresult,sep = "\t") ##################################################突变分析################################################################# dir.create(paste0(Work_dir,"/突变分析")) library(TCGAbiolinks) tcga_maf <- GDCquery(project = "TCGA-STAD", data.category = "Simple Nucleotide Variation", # Simple nucleotide variation if legacy data.type = "Masked Somatic Mutation", access = "open", legacy = F, sample.type = "Primary Tumor") GDCdownload(tcga_maf) tcga_maf <- GDCprepare(tcga_maf) library(maftools) require(data.table) tcga_maf$Tumor_Sample_Barcode <- substr(tcga_maf$Tumor_Sample_Barcode,1,16) save(tcga_maf,file = "tcga_STADmaf.Rdata") laml = read.maf(tcga_maf) pdf('Fig9B.突变景观整体情况.pdf',width = 12,height = 7) plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE) dev.off() train <- read.table('risk.txt',sep = '\t',header = T,check.names = F) vali <- read.table('test.risk.txt',sep = '\t',header = T,check.names = F) merge1 <- train merge1$Tumor_Sample_Barcode <- merge1$fullid clin <- merge1[merge1$risk == "High_risk",] maf1 <- tcga_maf[tcga_maf$Tumor_Sample_Barcode %in% clin$fullid,] laml1 = read.maf(maf1) pdf('Fig.高风险组突变景观瀑布图.pdf',width = 12,height = 7) oncoplot(maf = laml1,showTumorSampleBarcodes=F) dev.off() clin2 <- merge1[merge1$risk == "Low_risk",] maf2 <- tcga_maf[tcga_maf$Tumor_Sample_Barcode %in% clin2$fullid,] laml2 = read.maf(maf2) pdf('Fig.低风险组突变景观瀑布图.pdf',width = 12,height = 7) oncoplot(maf = laml2,showTumorSampleBarcodes=F) dev.off() ##计算TMB指标 x1 = tmb(maf = laml1) x2 = tmb(maf = laml2) TMB <- rbind(x1,x2) colnames(TMB)[1] <- "fullid" TMB_riskscore <- merge(merge1[,c(1:3,8:9)],TMB[,c(1,4)],by="fullid") colnames(TMB_riskscore)[6] <- "TMB" #风险评分与TMB相关性分析 r <- cor(TMB_riskscore$TMB,TMB_riskscore$riskScore) p <- cor.test(TMB_riskscore$TMB,TMB_riskscore$riskScore)$p.value rvalue <- as.character(as.expression(substitute(~~italic(r)~"="~R, list(R = format(round(unique(r),2), nsmall= 2))))) pvalue <- as.character(as.expression(substitute(~~italic(P)~"="~p, list(p = format(sprintf("%1.1e", unique(p)), nsmall= 2))))) p <- ggplot(TMB_riskscore) + geom_point(aes(x=riskScore, y=TMB,colour = risk)) + geom_smooth(aes(x=riskScore, y=TMB),method = "lm", se = F, colour = "#206BB5")+ #画在图中,需要根据自己的数据调整位置 annotate("text", x = 1.2, y = max(TMB_riskscore$TMB), label = rvalue, parse = TRUE) + annotate("text", x = 1.2, y = max(TMB_riskscore$TMB)-0.5, label = pvalue, parse = TRUE) p ggsave("fig.riskScore与TMB相关性分析.pdf",p,width= 8,height = 6) p1 <- ggviolin(TMB_riskscore,x="risk",y="TMB",fill ="risk", add = c("boxplot"),add.params = list(fill="white"),ylab ="log(TMB)")+stat_compare_means() ggsave("fig.TMB在高低风险组差异分析.pdf",p1,width= 8,height = 6) TMB_riskscore$TMB_group <- ifelse(TMB_riskscore$TMB > mean(TMB_riskscore$TMB),"TMB_hight","TMB_low") fit <- survfit(Surv(DFStime,DFStatus) ~ TMB_group,data = TMB_riskscore) p1 <- ggsurvplot(fit, data = TMB_riskscore, #ggtheme = theme_bw(), #想要网格就运行这 conf.int = F, #不画置信区间,想画置信区间就把F改成T conf.int.style = "ribbon",#置信区间的类型,还可改为ribbon risk.table = T, # 绘制累计风险曲线 #tables.theme = theme_void(), tables.height = 0.25, censor = T, #不显示观察值所在的位置 palette = c("#D95F02","#1B9E77"), #线的颜色对应高、低(自定义调色板) xlab = "Time (day)", ylab = "Survival probability", legend.title = "TMB_group", font.legend = 9, pval = T) pdf('fig9G.高低TMB分组生存分析.pdf',width=7,height = 5) print(p1,newpage = FALSE) dev.off() TMB_riskscore$TMB_riskscore_group <- ifelse(TMB_riskscore$risk == "High_risk" & TMB_riskscore$TMB_group == "TMB_hight" ,"Risk_h+TMB_h", (ifelse(TMB_riskscore$risk == "High_risk" & TMB_riskscore$TMB_group == "TMB_low","Risk_h+TMB_l", (ifelse(TMB_riskscore$risk == "Low_risk" & TMB_riskscore$TMB_group == "TMB_hight","Risk_l+TMB_h", "Risk_l+TMB_l"))))) table(TMB_riskscore$TMB_riskscore_group) fit <- survfit(Surv(DFStime,DFStatus) ~ TMB_riskscore_group,data = TMB_riskscore) p1 <- ggsurvplot(fit, data = TMB_riskscore, #ggtheme = theme_bw(), #想要网格就运行这 conf.int = F, #不画置信区间,想画置信区间就把F改成T conf.int.style = "ribbon",#置信区间的类型,还可改为ribbon risk.table = T, # 绘制累计风险曲线 #tables.theme = theme_void(), tables.height = 0.25, censor = T, #不显示观察值所在的位置 #palette = c("#D95F02","#1B9E77"), #线的颜色对应高、低(自定义调色板) xlab = "Time (day)", ylab = "Survival probability", legend.title = "TMB_risk", font.legend = 9, pval = T) pdf('fig.TMB_riskscore_group分组生存分析.pdf',width=10,height = 6) print(p1,newpage = FALSE) dev.off() #############################高低风险组药物敏感性分析###################################################### dir.create(paste0(Work_dir,"/高低风险组药物敏感性分析")) risk_expr <- expr[,train$fullid] library(pRRophetic) library(ggpubr) table(duplicated(rownames(risk_expr))) durg <- c("A.443654", "A.770041", "ABT.263", "ABT.888","AG.014699","AICAR","AKT.inhibitor.VIII", "AMG.706","ATRA", "AUY922", "Axitinib", "Bexarotene", "BIBW2992", "Bicalutamide", "BI.D1870", "BIRB.0796", "Bleomycin", "BMS.509744", "BMS.536924", "BMS.708163", "BMS.754807", "Bortezomib", "Bosutinib", "Bryostatin.1", "BX.795", "Camptothecin","Cisplatin", "CMK", "Cyclopamine", "Cytarabine", "Dasatinib", "DMOG", "Docetaxel", "Doxorubicin", "EHT.1864", "Elesclomol", "Embelin", "Epothilone.B", "Erlotinib", "Etoposide", "FH535", "FTI.277","Gefitinib", "Gemcitabine", "GNF.2", "GW843682X", "Imatinib", "IPA.3", "JNJ.26854165", "JNK.9L", "JNK.Inhibitor.VIII", "Lapatinib", "Lenalidomide", "LFM.A13", "Metformin", "Methotrexate") #durg <- c("5-fluorouracil", "cisplatin", "oxaliplatin", "capecitabine", "paclitaxel", "docetaxel", "irinotecan") pl <- list() mydata1<-matrix(data=NA,ncol=57,nrow=ncol(risk_expr)) predict <-as.data.frame(mydata1) colnames(predict)<- durg#重新给赋值 rownames(predict)<-colnames(risk_expr) predict$risk <- merge1$risk for (i in durg){ predict[,i] <- pRRopheticPredict(as.matrix(risk_expr),drug = i,selection = 1,) } predict$risk <- as.factor(predict$risk) write.table(predict,file = "药物预测.txt") dat <- data.frame(id=colnames(predict)) for (i in 1:(ncol(predict)-2)) { dat[i,2] <- wilcox.test(predict[,i] ~ predict$risk)$p.value } dat <- dat[order(dat$V2,decreasing = F),] dat0.05 <- dat[dat$V2 < 0.05,] %>% drop_na() write.table(dat0.05,file = "药物预测差异统计.txt") wta <- predict[,c("risk",dat0.05$id)] for (i in dat0.05$id) { pl[[i]] <- ggviolin(wta,x="risk",y=i,fill ="risk",palette = "jco",add = "boxplot",add.params = list(fill="white"),ylab = paste0(i," IC50"))+stat_compare_means() } str(data) length(pl) p = ggarrange(pl[[1]],pl[[2]],pl[[3]],pl[[4]],pl[[5]],pl[[6]],pl[[7]],pl[[8]],pl[[9]],pl[[10]],pl[[11]],pl[[12]], ncol = 3, nrow = 4) p = ggarrange(pl[[13]],pl[[14]],pl[[15]],pl[[16]],pl[[17]],pl[[18]],pl[[19]],pl[[20]],pl[[21]],pl[[22]],pl[[23]],ncol = 3, nrow = 4) ggsave(paste0("figB.药物敏感性分析.pdf"),p,width = 10,height = 10) #############################高低风险组免疫治疗响应分析###################################################### dir.create(paste0(Work_dir,"/高低风险组免疫治疗响应分析")) as <- sweep(risk_expr,2,apply(risk_expr,2,median)) as <- sweep(risk_expr,1,apply(risk_expr,1,median)) write.table(file = "risk_expr.txt",as,sep = "\t")#上传TIDE数据进行预测 TIDE <- read.csv("tide (1).csv",header = T) colnames(TIDE)[1] <- "fullid" data <- merge(merge1[,c(1,8,9)],TIDE[,1:4],by = "fullid") p <- ggviolin(data,x="risk",y="TIDE",fill ="risk",add = c("boxplot","jitter"),add.params = list(fill="white"),ylab = "TIDE prediction")+stat_compare_means() p ggsave("fig.11A 高低风险组免疫响应(TIDE)差异分析.pdf", p, width = 8, height = 6) checkpiont <- c( "CD274","CD276","CTLA4","HHLA2" , "ICOS","ICOSLG","PDCD1","PDCD1LG2","TMIGD2"," VTCN1", "BTlA","CD27","CD40","CD40lG","CD70","TNFRSF18","TNFRSF14", "TNFRSF4","TNFRSF9","TNFSF4","TNFSF9","ENTPD1","FGL1","HAVCR2","IDO1","LAG","NCR3","NT5E","SIGLEC15") train <- read.table('risk.txt',sep = '\t',header = T,check.names = F) vali <- read.table('test.risk.txt',sep = '\t',header = T,check.names = F) risk_expr <- expr[,vali$fullid] exp_con = risk_expr[checkpiont,] %>% na.omit(.) %>% t()%>% as.data.frame() exp_con$class = merge1$risk table(exp_con$class) exp_con <- exp_con[,c(ncol(exp_con),1:ncol(exp_con)-1)] str(exp_con) data2 <- exp_con %>% gather("cell_types","Gene_expression", -class) %>% mutate(cell_types = factor(cell_types, levels = colnames(exp_con))) %>% mutate(class = factor(class, levels = c("High_risk","Low_risk"))) data_label <- data2 %>% group_by(class, cell_types) %>% summarise(Mean = mean(Gene_expression)) %>% group_by(cell_types) %>% summarise(lable = mean(Mean)) p <- ggboxplot(data2, x = "cell_types", y = "Gene_expression", fill = "class", size = 0.15) + labs(x = "", fill = "")+ rotate_x_text(angle =45)+ stat_compare_means(aes(group = class),label = "p.signif",label.y = data_label$lable + 5.02) p ggsave("fig11B.免疫检查位点基因表达模式.pdf", p, width = 10, height = 6) write.csv(exp_con,file="免疫检查位点_expr.csv",sep="\t",quote=FALSE,row.names=FALSE) write.table(file = "expr_test.txt",risk_expr,sep = "\t") source("CIBERSORT.R") # CIBERSORT ciber.res <- CIBERSORT(sig_matrix = "LM22.txt", mixture_file = "expr_test.txt", perm = 100, QN = TRUE) write.table(ciber.res,"CIBERSORT_result(test).txt",sep = "\t",row.names = T,col.names = NA,quote = F) ciber.res <- read.table("CIBERSORT_result(test).txt",sep = "\t",row.names = 1,check.names = F,stringsAsFactors = F,header = T) ciber.res <- ciber.res[,-c(23,24,25)] ciber.res <- ciber.res[,colSums(ciber.res) > 0] #柱状堆叠图1 identical(rownames(ciber.res),vali$fullid) ciber.res <- ciber.res[match(merge1$fullid,rownames(ciber.res)),] ciber.res$cluster <- vali$risk data=ciber.res[order(ciber.res$cluster),] gaps=c(1, as.vector(cumsum(table(data$cluster)))) xlabels=levels(factor(data$cluster)) bioCol=c("#0066FF","#FF9900","#FF0000","#6E568C","#7CC767","#223D6C","#D20A13","#FFD121","#088247","#11AA4D") data1=t(as.matrix(data[,-ncol(data)])) pdf("fig.barplot(test).pdf",height=10,width=18) col=rainbow(nrow(data1),s=0.7,v=0.7) par(las=1,mar=c(8,5,4,16),mgp=c(3,0.1,0),cex.axis=1.5) a1=barplot(data1,col=col,yaxt="n",ylab="Relative Percent",xaxt="n",cex.lab=1.8) a2=axis(2,tick=F,labels=F) axis(2,a2,paste0(a2*100,"%")) par(srt=0,xpd=T) for(i in 1:length(gaps)){ j=i+1 rect(xleft=a1[gaps[i]], ybottom = -0.01, xright = a1[gaps[j]], ytop= -0.06, col=bioCol[i]) text((a1[gaps[i]]+a1[gaps[j]])/2,-0.035,xlabels[i],cex=2) } ytick2 = cumsum(data1[,ncol(data1)]) ytick1 = c(0,ytick2[-length(ytick2)]) legend(par('usr')[2]*0.98,par('usr')[4],legend=rownames(data1),col=col,pch=15,bty="n",cex=1.3) dev.off() table(ciber.res$cluster) library("tidyverse") data2 <- ciber.res %>% gather("cell_types","percentages", -cluster) %>% mutate(cell_types = factor(cell_types, levels = colnames(ciber.res))) %>% mutate(cluster = factor(cluster, levels = c("High_risk","Low_risk"))) data3 <- data2 %>% group_by(cluster,cell_types) %>% summarise("Mx" =max(percentages)) %>% group_by(cell_types) %>% summarise(lable = max(Mx)) x = c() xend = c() for(i in 1:length(unique(data2$cell_types))){ x[i] = i - 0.2 xend[i] = i + 0.2} y = data3$lable +0.01 yend = data3$lable +0.01 line <- data.frame(x, xend, y, yend) P <- ggplot()+ geom_violin(data = data2, aes(x = cell_types, y = percentages,fill = cluster), position = position_dodge(1),scale = "width")+ geom_boxplot(data = data2, aes(cell_types, percentages,fill = cluster), position = position_dodge(1),width=.4, outlier.shape = NA)+ stat_summary(data = data2, aes(cell_types, percentages,fill = cluster), fun.y = "median",geom = "point",shape = 16, size = 2, color = "white",position = position_dodge(1))+ ggsci::scale_fill_aaas()+ ggpubr::stat_compare_means(data = data2,aes(cell_types, percentages,group = cluster), label = "p.signif")+ #加小短线 #geom_segment(data = line, aes(x=x, y=y, xend= xend, yend=yend))+ #改坐标轴标题 labs(x = "", fill = "")+ #主题 theme_few(base_size = 14)+ #改坐标轴刻度,可算完了…… theme(axis.text.x = element_text(angle = 45, hjust = 1,vjust = 1, colour = "black"), legend.position = "top") P ggsave(paste0("Fig.高低风险组22种免疫细胞的免疫评分.pdf"),width = 10,height = 7)