library(survival) library(FanCodeV1) library(dplyr) library(purrr) library(readxl) library(ggplot2) library(pheatmap) source("D:\\ferroptosis\\code\\common_code.R") setwd("F:\\pyroptosis\\TCGA_COADREAD") work_dir <- "F:\\pyroptosis\\TCGA_COADREAD" GTEx_exp<- my_load_tibble("F:\\TCGA_COADREAD_lnc\\ferroptosis_lnc\\GTEx_exp.csv") GTEx_PRG<-GTEx_exp %>% filter(sample %in% PRG)#308 cases load("F:\\TCGA_COADREAD_lnc\\ferroptosis_lnc\\TCGA_mRNA_cancer1.RData") TCGA_mRNA_cancer2<-TCGA_mRNA_cancer1%>% filter(gene %in% PRG) save(TCGA_mRNA_cancer2,file="data_temp\\TCGA_mRNA_cancer2.RData")#469 load("data_temp\\TCGA_mRNA_cancer2.RData") load("F:\\TCGA_COADREAD_lnc\\ferroptosis_lnc\\TCGA_mRNA_norm.RData") #TCGA_mRNA_norm<-TCGA_mRNA_norm %>% mutate(ID=substring(ID,1,12)) TCGA_mRNA_norm1<-my_t_tibble(TCGA_mRNA_norm,new_name_col_1 = "gene") #41 TCGA_mRNA_norm2<-TCGA_mRNA_norm1 %>% filter(gene %in% PRG) PRG_data1<-inner_join(TCGA_mRNA_cancer2,TCGA_mRNA_norm2,by="gene") PRG_data<-inner_join(PRG_data1,GTEx_PRG,by=c("gene"="sample")) colnames(PRG_data)<-substring(colnames(PRG_data),1,12) my_save_tibble(PRG_data,file="data_temp\\PRG_data.csv") PRG_data<-my_load_tibble("data_temp\\PRG_data.csv") PRG_data1<-my_t_tibble(PRG_data,new_name_col_1 = "ID") group_GTEx<-tibble(ID=PRG_data1$ID,group=c(rep("cancer",469),rep("normal",349))) PRG_wil_data<-inner_join(group_GTEx,PRG_data1,by="ID") PRG_wil_res<-tibble() for (i in 3:34){ PRG_wil<-wilcox.test(data=PRG_wil_data,PRG_wil_data[[i]]~group) PRG_wil_res[i-2,"gene"] <-colnames(PRG_wil_data)[i] PRG_wil_res[i-2,"p_value"]<-PRG_wil$p.value } save(PRG_wil_res,file="data_temp\\PRG_wil_res.RData") load("data_temp\\steps_score_wil.RData") library(reshape2) library(ggpubr) PRG_wil_data1<-melt(PRG_wil_data) PRG_wil_data1$group<-factor(PRG_wil_data1$group,levels = c("normal","cancer")) PRG_GTEx_plot<-ggplot(PRG_wil_data1,aes(x = variable, y = value,fill=group))+ geom_boxplot(outlier.size = 0.3)+theme_classic()+ stat_compare_means( aes(label = ..p.signif..),method = "wilcox.test", label.y = 16,size=3)+ xlab("Relative expression")+ylab("Pyroptosis relative genes")+ theme(axis.text.x = element_text(angle=90,size=10))+ scale_fill_manual(values=c("lightseagreen", "lightcoral")) ggsave("plot\\PRG_GTEx_plot.pdf",PRG_GTEx_plot,width = 24,height = 12,units = "cm") ####------------TCGA primary data--------------------- TCGA_exp_count<-my_load_tibble("F:\\TCGA_COADREAD_lnc\\ferroptosis_lnc\\TCGA_COADREAD\\data_temp\\TCGA_exp_count.csv") TCGA_exp_fpkm<-my_load_tibble("F:\\TCGA_COADREAD_lnc\\ferroptosis_lnc\\TCGA_COADREAD\\data_temp\\TCGA_exp_fpkm.csv") TCGA_cli_info<-my_load_tibble("data_temp\\clinical_info.xlsx") TCGA_cli_info1<-TCGA_cli_info %>% mutate(ID=TCGA_cli_info$case_submitter_id, age=TCGA_cli_info$age_at_index, gender=TCGA_cli_info$gender, tumor_T=TCGA_cli_info$ajcc_pathologic_t, tumor_N=TCGA_cli_info$ajcc_pathologic_n, tumor_M=TCGA_cli_info$ajcc_pathologic_m, stage=TCGA_cli_info$ajcc_pathologic_stage) TCGA_cli_info2<-TCGA_cli_info1[,c(155:160,12)] %>% mutate(ID=stringr::str_replace_all(ID,"[-]", ".")) TCGA_cli_info<-TCGA_cli_info2[-which(duplicated(TCGA_cli_info2$ID)),] my_save_tibble(TCGA_cli_info,file="data_temp\\TCGA_cli_info.csv") TCGA_cli_info<-my_load_tibble("data_temp\\TCGA_cli_info.csv") #---fpkm to tpm------------ FPKM2TPM <- function(fpkm){ exp(log(fpkm) - log(sum(fpkm)) + log(1e6)) } TCGA_fpkm_tpm1<-TCGA_exp_fpkm %>% data.matrix() %>% as.data.frame() TCGA_fpkm_tpm2<-apply(TCGA_fpkm_tpm1,2,FPKM2TPM) TCGA_fpkm_tpm<-as_tibble(TCGA_fpkm_tpm2)%>% mutate(ID=TCGA_exp_fpkm$ID) save(TCGA_fpkm_tpm,file="data_temp\\TCGA_fpkm_tpm.RData") load("data_temp\\TCGA_fpkm_tpm.RData") my_save_tibble(TCGA_fpkm_tpm,file="data_temp\\TCGA_fpkm_tpm.csv") TCGA_fpkm_tpm<-my_load_tibble("data_temp\\TCGA_fpkm_tpm.csv") PRG_sheet<-my_load_tibble("PRGs.xlsx") PRG<-PRG_sheet$Genes #33 genes ###------------SNP------------------------ library(maftools) TCGA_SNP_cli<-my_load_tibble("SNP\\clinical_SNP\\clinical_SNP1.xlsx") cli_1<-tibble(caseID=TCGA_SNP_cli$case_id, ID=TCGA_SNP_cli$case_submitter_id, status=TCGA_SNP_cli$vital_status, sur_time=TCGA_SNP_cli$days_to_last_follow_up,ajcc_m=TCGA_SNP_cli$ajcc_pathologic_m, ajcc_n=TCGA_SNP_cli$ajcc_pathologic_n,ajcc_t=TCGA_SNP_cli$ajcc_pathologic_t, grade=TCGA_SNP_cli$ajcc_pathologic_stage) colnames(cli_1) = c("case_id","Tumor_Sample_Barcode","vital_status","days_to_last_follow_up", "ajcc_pathologic_m","ajcc_pathologic_n", "ajcc_pathologic_stage","ajcc_pathologic_t") clin1 <- cli_1[cli_1$days_to_last_follow_up!= "--",] clin1$days_to_last_follow_up <- as.numeric(clin1$days_to_last_follow_up) clin1 <- clin1[clin1$vital_status!= "--",] clin1$vital_status <- ifelse(clin1$vital_status== "Alive",0,1) SNP_cli<-unique(clin1) my_save_tibble(SNP_cli,file="data_temp\\SNP_cli.csv") SNP_cli<-my_load_tibble("data_temp\\SNP_cli.csv") laml = read.maf(maf = "SNP\\SNP_maf\\SNP.maf", clinicalData = SNP_cli, isTCGA = TRUE) save(laml,file="data_temp\\laml.RData") load("data_temp\\laml.RData") clindata = laml@clinical.data plotmafSummary(laml, rmOutlier = TRUE, dashboard = TRUE, titvRaw = TRUE, addStat = NULL, showBarcodes = FALSE, fs = 0.8, textSize = 0.8, color = NULL, titleSize = c(0.8, 0.6), titvColor = NULL, top = 10) # 提取肿瘤样本中特定基因的SNP统计数据 tp53 = genesToBarcodes(laml, genes = "TP53")[[1]] #--------------PRG mutation--------------------- oncoplot(maf=laml,top=10,fontSize = 1,draw_titv = T) #clinicalFeatures = c("vital_status","days_to_last_follow_up")) oncoplot(maf=laml,genes = PRG,fontSize = 1,keepGeneOrder = F, draw_titv = T, SampleNamefontSize = 0.5, showTitle = TRUE, titleFontSize = 1.5, legendFontSize = 1.2, annotationFontSize =1) PRG_maf<-subsetMaf(maf = laml, genes = PRG) laml_titv<-titv(maf=PRG_maf,plot = F,useSyn = T) plotTiTv(laml_titv) #---clinical info of SNP Clin = getClinicalData(laml) mafCompare(laml,laml) #---- mafs <- mafSummary(laml) #----the total of subsets SNP by gene----- lamaGene = getGeneSummary(laml) #-------------the total of subsets SNP by case----- lamaSample = getSampleSummary(laml) # snpdata <- laml@data tidydata <- snpdata[,c("Hugo_Symbol","Variant_Type", "Variant_Classification","Tumor_Sample_Barcode")] #idydata0<- tidydata[which(tidydata$Variant_Type != "Nonsense_Mutation"), #c("Hugo_Symbol","Variant_Type","Tumor_Sample_Barcode")]#删除无义突变 tidydata1<- tidydata %>% filter(Variant_Type !="Nonsense_Mutation") library(reshape2) rsdata = dcast(tidydata, Hugo_Symbol~Tumor_Sample_Barcode) rownames(rsdata) <- rsdata$Hugo_Symbol rsdata <- rsdata[,-1] ####------------------CNV-------------- rm(list = ls()) options(stringsAsFactors = F) options(scipen = 200) library(TCGAbiolinks) query <- GDCquery(project = "TCGA-COADREAD", data.category = "Copy Number Variation", data.type = "Masked Copy Number Segment") GDCdownload(query, method = "api")#, files.per.chunk = 100) segment_dat1 <- GDCprepare(query = query) segment_dat2<-segment_dat1 %>% filter(substr(Sample,14,15)=="01") %>% mutate(Sample=stringr::str_sub(Sample,1,16)) #mutate(Sample=stringr::str_replace_all(Sample,"[-]", ".")) %>% a<- which((duplicated(segment_dat2$Start))&(duplicated(segment_dat2$Sample))&(duplicated(segment_dat2$Chromosome))) segment_dat3 <- segment_dat %>% mutate(v12=paste(Sample,Chromosome), v13=paste(Sample,Start), v14=paste(Sample,End)) %>% mutate(Chromosome1=stringr::str_replace_all(Chromosome,"X","30")) %>% mutate(Chromosome1=as.numeric(Chromosome1)) %>% distinct(v13,.keep_all = T) %>%distinct(v14,.keep_all = T) %>% arrange(Sample,Chromosome1) %>% dplyr::select(-c("v13","v14","Chromosome1")) overlap_fun <- function(i, j) { return ( (segment_dat3$Start[i]<=segment_dat3$Start[j] && segment_dat3$End[i]>=segment_dat3$End[j])|| (segment_dat3$Start[i]<=segment_dat3$Start[j]&& segment_dat3$End[i]>=segment_dat3$Start[j]) ) } segment_dat3[["delete"]]<-rep(0,dim(segment_dat3)[1]) left = 0 right = 0 while(right% filter(delete!=1) %>% dplyr::select(c(1:6)) segment_data9<-na.omit(segment_data) my_save_tibble(segment_data, file="F:\\pyroptosis\\TCGA_COADREAD\\data_temp\\segment_data.tsv") segment_data<-my_load_tibble(file="F:\\pyroptosis\\TCGA_COAD\\data_temp\\segment_data.tsv") segment_data_01A<-segment_data %>% filter(substr(Sample,14,16)=="01A") segment_data_N1<-segment_data[c(1:1000),] my_save_tibble(segment_data_01A, file="F:\\pyroptosis\\TCGA_COADREAD\\data_temp\\segment_data_01A.csv") segment_dat<-select(segment_dat2,7,2:6) my_save_tibble(segment_dat,file="F:\\pyroptosis\\TCGA_COADREAD\\data_temp\\segment_dat.csv") segment_dat<-my_load_tibble(segment_dat,file="F:\\pyroptosis\\TCGA_COADREAD\\data_temp\\segment_dat.csv") segment_dat_test<-segment_dat[c(1:7000),] my_save_tibble(segment_dat_test,file="F:\\pyroptosis\\TCGA_COADREAD\\data_temp\\segment_dat_test.csv") save(segment_dat,file="data_temp\\segment_dat.RData") load("F:\\pyroptosis\\TCGA_COADREAD\\data_temp\\segment_dat.RData") hg38_marker_file<-my_load_tibble("CNV\\snp6.na35.remap.hg38.subset.txt") hg_marker_file1 <- hg38_marker_file[hg38_marker_file$freqcnv=="FALSE",] hg_marker_file2 <- hg_marker_file1 [,c(1,2,3)] hg_marker_file <-hg_marker_file2 %>% rename(Marker_Name =probeid)%>% rename(Chrosome =chr)%>% rename(Marker_Position=pos) hg_marker_file<-my_save_tibble(hg_marker_file,file="CNV\\hg_marker_file.txt") #####----------Consensus Cluster------------------ TCGA_fpkm_tpm3<-TCGA_fpkm_tpm %>% filter(ID %in% PRG) TCGA_fpkm_tpm4<-as.matrix(TCGA_fpkm_tpm3[,-1]) row.names(TCGA_fpkm_tpm4)<-TCGA_fpkm_tpm3$ID save(TCGA_fpkm_tpm4,file="data_temp\\TCGA_fpkm_tpm4.RData") load("data_temp\\TCGA_fpkm_tpm4.RData") #----437 cases TCGA_fpkm_tpm7<-my_t_tibble(TCGA_fpkm_tpm3,new_name_col_1 = "ID") TCGA_fpkm_tpm8<-TCGA_fpkm_tpm7 %>% filter(ID %in% TCGA_fpkm_tpm6$sample)#424 TCGA_fpkm_tpm9<-as.matrix(TCGA_fpkm_tpm8[,-1]) row.names(TCGA_fpkm_tpm9)<-TCGA_fpkm_tpm8$ID library(ConsensusClusterPlus) data_matr = sweep(TCGA_fpkm_tpm9,1, apply(TCGA_fpkm_tpm9,1,median,na.rm=T)) title<- "F:\\pyroptosis\\TCGA_COADREAD\\consensus cluster" results_424 = ConsensusClusterPlus(data_matr,maxK=10,reps=1000,pItem=0.8,pFeature=1, title=title,distance="pearson", clusterAlg="km") save(results_424,file="consensus cluster\\consensus cluster.RData") cluster2<-results[[2]] classID_1<-cluster2$consensusClass classID<-tibble(ID=colnames(data_matr),group=classID_1) save(classID,file="consensus cluster\\classID.Rdata") my_save_tibble(classID,file="consensus cluster\\classID.csv") load("consensus cluster\\classID.Rdata") #-------------- icl = calcICL(results,title=title,plot="pdf") icl[["clusterConsensus"]] ###--------differential analysis---- library(DESeq2) groups_1<-classID %>% mutate(case_id=ID, group=as.factor(group)) groups_2<- groups_1[,-1] %>% mutate(case_id=stringr::str_replace_all(case_id,"[-]", ".")) groups<-groups_2[,c(2,1)] save(groups,file="data_temp\\groups.RData") load("data_temp\\groups.RData") TCGA_exp_count1<-TCGA_exp_count %>% dplyr::select(ID,groups$case_id) TCGA_deseq<-as.matrix(TCGA_exp_count1[,-1]) row.names(TCGA_deseq)<-TCGA_exp_count$ID #ID is gene a<-groups$group dds <- DESeqDataSetFromMatrix(TCGA_deseq,DataFrame(a), ~a) DESeq_res <- DESeq(dds) res <- results(DESeq_res) DESeq_result<-res@listData logFC <- res$log2FoldChange names(logFC) <- row.names(res) #gene names geneList <- sort(logFC, decreasing = T) geneList <- -rev(geneList) save(geneList,file="data_temp\\geneList.RData") all_gene<-as_tibble(res@rownames) DES_data<-as_tibble(DESeq_result) DESeq_result1<-cbind(all_gene,DES_data) cludif_gene2<-DESeq_result1 %>% filter(padj<0.05) %>% filter(abs(log2FoldChange)>1) #489 cludif_gene2<-my_save_tibble(cludif_gene2,file="data_temp\\cludif_gene2.csv") DEG<-my_load_tibble("data_temp\\cludif_gene2.csv") ####-----uni cox------------------------------ pvalue<-vector() HR<-vector() for (i in 4:491){ uni_data <- tibble(time=as.numeric(TCGA_can_uni$OS.time), status=as.numeric(TCGA_can_uni$OS), x=TCGA_can_uni[[i]]) library(survival) fit<-coxph(Surv(time, status) ~ x, data=uni_data) summary_temp1<-summary(fit) pvalue[i]<-summary_temp1$coefficients[5] HR[i]<-summary_temp1$coefficients[2] } #-------- results_uni<-tibble(gene=colnames(TCGA_can_uni)[4:491], HR=HR[4:491], pvalue=pvalue[4:491]) result_uni <- results_uni %>% filter(pvalue<0.05) # 66 genes k=6--12 save(result_uni,file="data_temp\\results_uni.RData") load("data_temp\\results_uni.RData") ####-----------------lasso-------- library(glmnet) multi_gene_sheet<-my_load_tibble("data_temp\\multi_gene.xlsx") multi_gene<-multi_gene_sheet$gene data_lasso1<-TCGA_can_uni %>% mutate(OS.time=as.numeric(OS.time)) %>% mutate(OS=as.integer(OS)) %>% mutate(OS.time=as.integer(OS.time)) %>% as_tibble() %>% filter(is.na(OS.time)==F) data_lasso<-data_lasso1[,c(1:3,which(colnames(data_lasso1) %in% multi_gene))] save(data_lasso,file="data_temp\\data_lasso.RData") load("data_temp\\data_lasso.RData") x<-as.matrix(dplyr::select(data_lasso,4:37)) y<-cbind(time=data_lasso$OS.time,status=data_lasso$OS) cv.fit<-cv.glmnet(x, y, family="cox",nfolds =5) plot(cv.fit) cv.fit$lambda.min cv.fit$lambda.1se fit<-glmnet(x, y, family="cox") plot(fit) coefficients<-coef(fit,s=cv.fit$lambda.min) # or: cv.fit$lambda.1se Active.Index<-which(coefficients[,1]!=0) Active.coefficients<-coefficients[Active.Index] x<-x[,Active.Index] genes_1<-colnames(x) toString(genes_1)%>% stringr::str_replace_all(", ", "+") fit<-coxph(formula = Surv(time=OS.time, event=OS) ~FCRL1+WNT16+ GRIK2+ZMAT1+ZG16+DRD4+MAPK12+OR51B5+PRSS21+MAGEA3, data_lasso) # k=2-uni-multi-lasso--0.704 summary(fit) #10--0.698 fit3<-coxph(formula = Surv(time=OS.time, event=OS) ~FCRL1+ WNT16+ZMAT1+DRD4+MAPK12+OR51B5+PRSS21+MAGEA3, data_lasso) #k=6--uni--lasso--0.63 fit_step<-step(fit3)#, direction="forward") ###---------------forest plot----- forest_data1<-inner_join(data_score[,c(1:3,38)],TCGA_cli_info,by=c("sample"="ID")) forest_data<-forest_data1 %>%mutate(OS.time=as.numeric(OS.time)) %>% mutate(OS=as.numeric(OS)) forest_data[which(forest_data$tumor_T%in% c("Tis","t1")),6]<-"T1" forest_data[which(forest_data$tumor_T %in% c("T4a","T4b")),6]<-"T4" forest_data[which(forest_data$tumor_N %in% c("N1a","N1b","N1c")),7]<-"N1" forest_data[which(forest_data$tumor_N %in% c("N2a","N2b")),7]<-"N2" forest_data[which(forest_data$tumor_M %in% c("M1a","M1b")),8]<-"M1" #forest_data[which(forest_data$tumor_M =="'--"),8]<-"MX" forest_data[which(forest_data$stage=="Stage IA"),9]<-"Stage I" forest_data[which(forest_data$stage %in% c("Stage IIA","Stage IIB","Stage IIC")),9]<-"Stage II" forest_data[which(forest_data$stage %in% c("Stage IIIA","Stage IIIB","Stage IIIC")),9]<-"Stage III" forest_data[which(forest_data$stage %in% c("Stage IVA","Stage IVB")),9]<-"Stage IV" forest_data$tumor_M<-factor(forest_data$tumor_M,levels = c("M0","M1","MX","'--")) forest_data$stage <-factor(forest_data$stage,levels = c("Stage I","Stage II","Stage III","Stage IV","'--")) save(forest_data,file="data_temp\\forest_data.RData") load("data_temp\\forest_data.RData") forest_data1<-forest_data%>% distinct(sample,.keep_all=T) model <- coxph(Surv(time=OS.time, event=OS) ~ age+gender+tumor_T+tumor_N +tumor_M+stage+score, data=forest_data) library(survminer) forest_plot<-ggforest(model, data = forest_data1, main = "Hazard ratio", fontsize = 0.5, noDigits = 2) ggsave(file="plot\\forest_plot.pdf",forest_plot,height = 12,width=15,units = "cm") library(survminer) data_lasso1<-data_lasso %>% distinct(sample,.keep_all=T) mogel_forest<-ggforest(fit, data = data_lasso1, main = "Hazard ratio", fontsize = 0.6, noDigits = 3) ggsave(file="plot\\mogel_forest.pdf",mogel_forest,height = 10,width=15,units = "cm") #-------------uni for model genes mode_sheet<-my_load_tibble("data_temp\\mode_gene_10.xlsx") mode_gene_exp<-data_lasso1 %>% dplyr::select(1:3,mode_sheet$gene) save(mode_gene_exp,file="data_temp\\mode_gene_exp.RData") load("data_temp\\mode_gene_exp.RData") pvalue<-vector() HR<-vector() for (i in 4:13){ uni_data <- tibble(time=as.numeric(mode_gene_exp$OS.time), status=as.numeric(mode_gene_exp$OS), x=mode_gene_exp[[i]]) library(survival) fit<-coxph(Surv(time, status) ~ x, data=uni_data) summary_temp1<-summary(fit) pvalue[i]<-summary_temp1$coefficients[5] HR[i]<-summary_temp1$coefficients[2] } mode_gene_uni<-tibble(gene=colnames(mode_gene_exp)[4:13], HR=HR[4:13], pvalue=pvalue[4:13]) my_save_tibble(mode_gene_uni,file="data_temp\\mode_gene_uni.csv") mode_uni_gene1<-my_load_tibble("data_temp\\mode_gene_uni.csv") mode_uni_gene2<-mode_uni_gene1 %>% filter(pvalue<0.05) mode_uni_gene<- mode_uni_gene2$gene col_index<-which(colnames(mode_gene_exp) %in% mode_uni_gene) mode_data_multi<- mode_gene_exp %>% dplyr::select(2:3,col_index) fit<-coxph(formula = Surv(time=OS.time, event=OS) ~ ., mode_data_multi) summary(fit) #PP <- matrix(1,10,10) modegene_RR <- matrix(0,10,10) row.names(modegene_RR)<-colnames(mode_gene_exp[,-c(1:3)]) colnames(modegene_RR)<-colnames(mode_gene_exp[,-c(1:3)]) for (i in 4:13) { for (j in 4:13) { cor_res<-cor.test(mode_gene_exp[[i]],mode_gene_exp[[j]]) # PP[i-3,j-3]<-cor_res$p.value modegene_RR[i-3,j-3]<-cor_res$estimate } } save(modegene_RR,file="data_temp\\modegene_RR.RData") load("data_temp\\modegene_RR.RData") library(tidyr) modegene_RR1<-as_tibble(modegene_RR, rownames = "gene1")%>% gather(gene2, value, -gene1) my_save_tibble(modegene_RR1,file="data_temp\\modegene_RR1.csv") modegene_RR1<-my_load_tibble("data_temp\\modegene_RR.csv") modegene_RR2<-modegene_RR1%>% distinct(value,.keep_all = TRUE) %>% mutate(linetype=ifelse(value>0,1,2)) my_save_tibble(modegene_RR2,file="data_temp\\modegene_RR2.csv") #-------- results_mode<-tibble(gene=colnames(mode_gene_exp)[4:13], HR=HR[4:13], pvalue=pvalue[4:13]) ####----------------grouping by score----------------- risk_score<-predict(fit) data_score<-data_lasso data_score<-data_score %>% mutate(score=risk_score) median_score<-median(risk_score) data_score <- data_score %>% mutate(group=ifelse(score>median_score,"high","low")) my_save_tibble(data_score,file="data_temp\\data_score.csv") data_score<-my_load_tibble("data_temp\\data_score.csv") ####---expression of PRG in risk group------- PRG_score_data<-TCGA_tpm_all[,c(1:3,which(colnames(TCGA_tpm_all) %in% PRG))] save(PRG_score_data,file="data_temp\\PRG_score_data.RData") PRG_score_wil<-tibble() for (i in 4:35){ PRG_wil<-wilcox.test(data=PRG_score_data,PRG_score_data[[i]]~group) PRG_score_wil[i-3,"gene"] <-colnames(PRG_score_data)[i] PRG_score_wil[i-3,"p_value"]<-PRG_wil$p.value } save(PRG_score_wil,file="data_temp\\PRG_score_wil.RData") load("data_temp\\PRG_score_wil.RData") PRG_score_res<-PRG_score_wil %>% filter(p_value<0.05) library(reshape2) library(ggpubr) PRG_score_log<-PRG_score_data %>% modify_at(c(colnames(PRG_score_data[,c(-1:-3)])),function(x) log(x+0.01)) PRG_score1<-melt(PRG_score_log[,-2]) PRG_score_boxplot<-ggplot(PRG_score1,aes(x = variable, y = value, fill = group))+ geom_boxplot()+theme_classic()+ stat_compare_means( aes(label = ..p.signif..),method = "wilcox.test", label.y = 8,size=3)+ xlab("Pyroptosis related genes")+ylab("Relative expression")+ theme(axis.text.x = element_text(angle=90,size=10)) ggsave("plot\\PRG_score_boxplot.pdf",PRG_score_boxplot,width=20,height=12,units="cm") #---------distribution of score------------------ sort_score <- data_score %>% arrange(score) %>% mutate(time=OS.time/365) %>% mutate(status=ifelse(OS==0,"Alive","Dead")) sort_score1<-sort_score %>% distinct(sample,.keep_all=T) sort_score1$index<-c(1:dim(sort_score1)[1]) Plot_A<-ggplot(sort_score1,aes(x=index,y=score))+ geom_point()+theme_classic()+ theme(axis.title.x = element_blank())+ theme(axis.text.x = element_blank(),axis.ticks.x = element_blank()) Plot_B<-ggplot(sort_score1,aes(x=index,y=time))+ geom_point(aes(color=status))+theme_classic()+ theme(axis.title.x = element_blank())+ theme(axis.text.x = element_blank(),axis.ticks.x = element_blank()) library(patchwork) Riskcore_AB<-Plot_A/Plot_B ggsave("plot\\Riskcore_AB.pdf",Riskcore_AB,width=12, height=6,units="cm") library(tidyr) mat_d<-as.matrix(sort_score1[,4:37]) my_scale<-function(x){ return((x-mean(x))/sd(x))} mat_d_1<-apply(mat_d, 2, as.numeric) scale_data<-apply(mat_d_1, 2, my_scale) scale_tibble<-as_tibble(scale_data) scale_tibble$index<-c(1:dim(sort_score1)[1]) scale_tibble1 <- scale_tibble %>% gather("FCRL1", "WNT16","GRIK2","ZMAT1","ZG16","DRD4","MAPK12","OR51B5", "PRSS21","MAGEA3", key="gene",value="value") %>% dplyr::select("index","gene","value") scale_tibble2 <-scale_tibble1 %>% mutate(value=ifelse(value > 2,2,value)) %>% mutate(value=ifelse(value < -2,-2,value)) save(scale_tibble2,file="data_temp\\scale_tibble1.RData") Plot_C<-ggplot(scale_tibble2,aes(x=index,y=gene,fill=value))+ scale_fill_gradient2(low = "#20B2AA", high = "#FF3030",mid = "white")+ geom_tile()+ theme_classic()+ theme(axis.title.y = element_blank()) Plot_riskscore<-Plot_A/Plot_B/Plot_C ggsave("plot\\Riskcore_plot1.pdf",Plot_riskscore,width=12,height=16,units="cm") #------volcanoPlot x<-DESeq_result2$pvalue DESeq_result2<-na.omit(DESeq_result1) %>% mutate(log=(-log10(pvalue))) DESeq_result2<-DESeq_result2[rev(order(DESeq_result2$log)),] DESeq_result3<-DESeq_result2[-1,] DEG_plot<-my_volcanoPlot( logFC=DESeq_result3$log2FoldChange, pvalue=DESeq_result3$padj, logFC.cutoff = 1, pvalue.cutoff = 0.05, xlab = "Log2(Fold change)", ylab = "-Log10(Pvalue)", main = "", xlim = NULL, ylim = NULL, linetype = "dashed", genes = NULL, highlight_genes = NULL ) ggsave("plot\\DEG_volplot.pdf", DEG_plot,width=10,height=12,units="cm") ####-------GO------- EC_genelist <- DEG %>% dplyr::select(c(1,3)) EC_genes <- DEG %>% dplyr::select(value,log2FoldChange) %>% as.data.frame() TCGA_ana<-TCGA_can_uni require(clusterProfiler) require(org.Hs.eg.db) org.db<-"org.Hs.eg.db" enrichment_results<-list() enrichment_results<-my_enrichment_DEG_2(DEG$value,"All","All") enrichment_results[["BP"]]<- enrichGO(gene = DEG$value, OrgDb = org.db, keyType = "SYMBOL", ont = "BP", pvalueCutoff=0.05, qvalueCutoff = 0.05, readable=F) enrichment_results[["MF"]]<- enrichGO(gene = DEG$value, OrgDb = org.db, keyType = "SYMBOL", ont = "MF", pvalueCutoff=0.05, qvalueCutoff = 0.05, readable=F) enrichment_results[["CC"]]<- enrichGO(gene = DEG$value, OrgDb = org.db, keyType = "SYMBOL", ont = "CC", pvalueCutoff=0.05, qvalueCutoff = 0.05, readable=F) save(enrichment_results,file="data_temp\\enrichment_results.RData") load("data_temp\\enrichment_results.RData") #####------------------ dotplot for GO/KEGG------------- library(enrichplot) dotplot(enrichment_results[["BP"]],color = "p.adjust", showCategory = 10, title="Top10 GO-BP terms") GO_all<- enrichGO(gene = DEG$value, OrgDb = org.db, keyType = "SYMBOL", ont = "ALL", pvalueCutoff=0.2, qvalueCutoff = 0.2, readable=F) GO_dotplot<-dotplot(GO_all,color = "p.adjust", showCategory = 10, title="Top10 GO terms of each sub-class",split="ONTOLOGY")+ facet_grid(ONTOLOGY~.,scale="free") KEGG_gene<-clusterProfiler::bitr(DEG$value,fromType = "SYMBOL",toType = "ENTREZID", OrgDb="org.Hs.eg.db") KEGG_res<-enrichKEGG(gene = KEGG_gene$ENTREZID,organism = "hsa",keyType = "kegg", pvalueCutoff = 0.2)#,pAdjustMethod = "BH") KEGG_dotplot<-dotplot(KEGG_res,color = "p.adjust", showCategory = 10, title="Top10 KEGG pathways") ggsave("plot\\GO_dotplot.pdf", GO_dotplot,width=18,height=18,units="cm") ggsave("plot\\KEGG_dotplot.pdf", KEGG_dotplot,width=18,height=18,units="cm") library(GOplot) top_index<-c(1:9) EC_process<-list() EC_david<-list() lists<-prepare_gene_mapping() # for (ontol in c("BP","MF","CC","KEGG")){ enrichment_result<-enrichment_results[[ontol]] #if (ontol=="KEGG") top_index=c(1:2) else top_index=c(1:5) EC_process<-enrichment_result$Description[top_index] enrichment_result_geneID<-enrichment_result$geneID if (ontol=="KEGG") enrichment_result_geneID<-enrichment_result_geneID %>% map(~(my_map_gene(.x))) EC_david<-data.frame(Category=rep(ontol,length(top_index)), ID=enrichment_result$ID[top_index], Term=enrichment_result$Description[top_index], Genes=enrichment_result_geneID[top_index] %>% stringr::str_replace_all("/",","), adj_pval=enrichment_result$p.adjust[top_index]) circ <- circle_dat(EC_david, EC_genelist) chord <- chord_dat(data = circ, genes = EC_genes, process = EC_process) chord_plot<-GOChord(chord, space = 0.02, gene.order = 'logFC', gene.space = 0.25, gene.size = 5) ggsave(paste("F:\\pyroptosis\\TCGA_COADREAD\\plot\\chord_plot_",ontol,".pdf",sep=""), chord_plot,width=40,height=40,units="cm") } View(EC_david[["BP"]]) ####--------------------GSVA----------- library(GSVA) library(limma) library(pheatmap) library(purrr) original_gmt_GSVA<-read_lines("D:\\GEO\\geneset\\c2.cp.kegg.v7.1.symbols.gmt") original_gmt_GSVA<-read_lines("D:\\GEO\\geneset\\c7.all.v7.1.symbols.gmt") strsplit_no_name<-function(gmt.list_layer){ as.character(unlist(strsplit(gmt.list_layer,split="\t",fixed=T)))[-2] } database_list_GSVA<-lapply(original_gmt_GSVA,strsplit_no_name) #database_list_GSVA1<-list() for (layers in 1:length(database_list_GSVA)){ names(database_list_GSVA)[layers]<-database_list_GSVA[layers][[1]][1] database_list_GSVA[layers][[1]]<-database_list_GSVA[layers][[1]][-1] } GSVA_exp_matrix<-as.matrix(TCGA_fpkm_tpm[,-1]) rownames(GSVA_exp_matrix)<-TCGA_fpkm_tpm$ID GSVA_score<-gsva(GSVA_exp_matrix, database_list_GSVA, method="gsva", kcdf="Gaussian", abs.ranking=FALSE, mx.diff=T,verbose=FALSE, parallel.sz=1 ) save(GSVA_score,file="data_temp\\GSVA_score_c7.RData") load("data_temp\\GSVA_score_c7.RData") design <- model.matrix(~ factor(groups$group)) colnames(design) <- c("group1", "group1vs2") fit<-lmFit(GSVA_score,design) fit <- eBayes(fit) allGeneSets <- topTable(fit, coef="group1vs2", number=Inf) DEgeneSets_c7 <- topTable(fit, coef="group1vs2", number=Inf, p.value=0.05, adjust="BH") #p.value namely adj.p save(DEgeneSets_c7,file="data_temp\\DEgeneSets_c7.RData") load("data_temp\\DEgeneSets_c7.RData") #res <- decideTests(fit, p.value=0.05) #summary(res) ###----------------pheatmap DE_c2<-row.names(DEgeneSets_c2) group_h<-classID[order(classID$group),] GSVA_score1<-as_tibble(GSVA_score) row_name<-tibble(pathway=row.names(GSVA_score)) GSVA_score2<-cbind(row_name,GSVA_score1) GSVA_score_c2<-GSVA_score2 %>% filter(pathway %in% DE_c2) GSVA_score3<-GSVA_score_c2%>% dplyr::select(pathway,group_h$ID) GSVA_score4<-as.matrix(GSVA_score3[,-1]) row.names(GSVA_score4)<-GSVA_score3$pathway annotation_col = data.frame(group_h =as.factor(group_h$group)) GSVA_c2_pheatmap<-pheatmap(GSVA_score4, annotation_col = annotation_col, cluster_row = FALSE, cluster_col = FALSE, show_colnames = F,fontsize = 5)#,show_rownames = F) ggsave("plot\\GSVA_c2_pheatmap.pdf",GSVA_c2_pheatmap,width=20,height=20,units="cm") ####-------------------GSEA-------------- library(ontologyIndex) obo<-get_OBO("GO\\go_basic .obo", propagate_relationships = c("is_a","regulates"), extract_tags = "minimal") immune <- get_descendants(ontology=obo, roots="GO:0002376") immune_anno_tibble <- my_get_anno_tibble(immune) hallmark <- read.gmt("D:\\GEO\\geneset\\hallmarker.gmt") #c6 <- read.gmt("geneset\\c6.all.v7.2.symbols.gmt") kegg <- read.gmt("D:\\GEO\\geneset\\c2.cp.kegg.v7.1.symbols.gmt") all_set <- c(hallmark,c6,kegg,c5bp) #----hallmmarker---- GSEA_hallmark <- GSEA(geneList, TERM2GENE=hallmark, pvalueCutoff=1, verbose=F) GSEA_hallmark_result<-GSEA_hallmark@result[,-c(1:4)] %>% as_tibble(rownames="ont") GSEA_hallmark_select<-GSEA_hallmark_result[c(1:3),] selected_ont_hallmark <-c("HALLMARK_HEDGEHOG_SIGNALING", "HALLMARK_ALLOGRAFT_REJECTION") my_GSEA_output(GSEA_hallmark, selected_ont_hallmark) GSEA_kegg <- GSEA(geneList, TERM2GENE=kegg, pvalueCutoff=0.3, verbose=F) GSEA_kegg_result<-GSEA_kegg@result[,-c(1:4)] %>% as_tibble(rownames="ont") save(GSEA_kegg_result,file="data_temp\\GSEA_kegg_result.RData") selected_ont_kegg <-c("KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION", "KEGG_NATURAL_KILLER_CELL_MEDIATED_CYTOTOXICITY", "KEGG_O_GLYCAN_BIOSYNTHESIS", "KEGG_PROTEIN_EXPORT", "KEGG_PROTEASOME", "KEGG_STARCH_AND_SUCROSE_METABOLISM", "KEGG_DRUG_METABOLISM_OTHER_ENZYMES" , "KEGG_STEROID_HORMONE_BIOSYNTHESIS" , "KEGG_OXIDATIVE_PHOSPHORYLATION") my_GSEA_output(GSEA_kegg, selected_ont_kegg) #-----immune from Geneontology------------- gse_go<-gseGO(geneList=geneList,ont="BP",OrgDb=org.Hs.eg.db, keyType = "SYMBOL", pvalueCutoff=1) gse_go<-gseGO(geneList=geneList,ont="ALL",OrgDb=org.Hs.eg.db, keyType = "SYMBOL", pvalueCutoff=1) BP_set <- gse_go@geneSets ALL_set <- gse_go@geneSets immune_set_ontname <- my_get_set_ontname(BP_set,immune_anno_tibble,msigDB=T) immune_set_ontname <- my_get_set_ontname(ALL_set,immune_anno_tibble,msigDB=T) GSEA_immune1 <- GSEA(geneList, TERM2GENE=immune_set_ontname, pvalueCutoff=1,verbose=F) GSEA_immune_result<-GSEA_immune1@result[,-c(1:4)] %>% as_tibble(rownames="ont") save(GSEA_immune_result,file="data_temp\\GSEA_immune_result.RData") GSEA_result_select<-GSEA_immune_result[c(1:6),] selected_ont <-c("GO_NATURAL_KILLER_CELL_ACTIVATION_INVOLVED_IN_IMMUNE_RESPONSE", "GO_NEGATIVE_REGULATION_OF_LEUKOCYTE_MIGRATION", "GO_LEUKOCYTE_MEDIATED_CYTOTOXICITY", "GO_POSITIVE_REGULATION_OF_T_CELL_MEDIATED_CYTOTOXICITY", "GO_T_CELL_MEDIATED_CYTOTOXICITY", "GO_T_CELL_MEDIATED_IMMUNITY") #BP-6,ALL-8 my_GSEA_output(GSEA_immune1, selected_ont) ####--------------MSI---------------- MSIs_score<-my_load_tibble("MSIsensor_Score.txt") MSIs_score1<-MSIs_score[,c(2,4)] %>% mutate(Patient.ID=stringr::str_replace_all(Patient.ID,"[-]", ".")) MSIs_data<-inner_join(MSIs_score1,data_score_1,by=c("Patient.ID"="sample")) save(MSIs_data,file="data_temp\\MSIs_data.RData") load("data_temp\\MSIs_data.RData") MSIs_data_log<-MSIs_data %>% mutate(log= log(MSIsensor.Score+0.001)) MSIs_wil_1<-wilcox.test(data=MSIs_data,MSIsensor.Score~group) ggplot(MSIs_data_log,aes(group,log))+ geom_boxplot()+geom_jitter() library(ggbeeswarm) library(ggpubr) ggplot(MSIs_data_log,aes(x=group,y=log))+ geom_beeswarm(aes(color=factor(group)),cex=2, priority = "density")+ theme_bw()+ theme( panel.grid = element_blank() ) ggplot(MSIs_data_log,aes(x=group,y=log,color=group))+ geom_boxplot()+ theme_classic()+ stat_compare_means(label = "p.signif", label.x = 1.5, label.y = 4.5)+#aes(label = ..p.format..) or aes(label = paste0(??p =??, ..p.format..)) ylab("MSIsensor log(score)")#+ theme(axis.title.x =element_text(color="black")) ####-------------TIP------------------ steps_score1<-my_load_tibble("TIP\\ssGSEA.normalized.steps_score.txt") steps_score2<-my_t_tibble(steps_score1,new_name_col_1 = "ID") %>% filter(substr(ID,14,15)=="01") steps_score3<-steps_score2 %>% mutate(ID=substr(ID,1,12)) steps_score4<-inner_join(data_score_1,steps_score3,by=c("sample"="ID")) index<-duplicated(steps_score4$sample) steps_score<-steps_score4[!index,] save(steps_score,file="data_temp\\steps_score.RData") load("data_temp\\steps_score.RData") library(reshape2) library(ggpubr) steps_score1<-melt(steps_score[,-2]) TIPcor_boxplot<-ggplot(steps_score1,aes(x = variable, y = value, fill = group))+ geom_boxplot(outlier.size = 0.4)+theme_classic()+ stat_compare_means( aes(label = ..p.signif..),method = "wilcox.test", label.y = 6)+ xlab("Cancer Immunity Cycle")+ylab("Anti-cancer Immunity")+ theme(axis.text.x = element_text(angle=90,size=10)) #,aes(color=variable))) ggsave(TIPcor_boxplot,file="plot\\TIPcor_boxplot.pdf",width = 12,height = 12,units = "in") library(ggpubr) library(ggthemes) TIPcor_result1<-TIPcor_result %>% arrange(-cor) TIPcor_result2<-as.data.frame(TIPcor_result1) TIPcor_lollipop<-ggdotchart(TIPcor_result2, x = "gene", y = "cor", color = "p.value", # ????cyl??????ɫ sorting = "ascending", rotate = TRUE, # ????תΪ??ֱ dot.size = "cor", add = "segments", # ???Ӱ??? add.params = list(color = "#000000", size = 0.8), ggtheme = theme_pander() , # ?ı????? ylab="correlation", xlab="steps of immune cycle" )+scale_color_gradient2(low = "red", mid = "#FF8888", high = "#66CDAA") ggsave(TIPcor_lollipop,file="plot\\TIPcor_lollipop.pdf",width = 6,height = 6,units = "in") steps_score_wil<-tibble() for (i in 4:26){ steps_wil<-wilcox.test(data=steps_score,steps_score[[i]]~group) steps_score_wil[i-3,"steps"] <-colnames(steps_score)[i] steps_score_wil[i-3,"p_value"]<-steps_wil$p.value } save(steps_score_wil,file="data_temp\\steps_score_wil.RData") load("data_temp\\steps_score_wil.RData") steps_score_res<-steps_score_wil %>% filter(p_value<0.05) TIP_cor<-tibble() for (i in 4:length(steps_score)){ TIPcor_res<-cor.test(steps_score$score,steps_score[[i]]) TIP_cor[i-3,"gene"]<-colnames(steps_score[i]) TIP_cor[i-3,"p.value"]<-TIPcor_res$p.value TIP_cor[i-3,"cor"]<-TIPcor_res$estimate } save(TIP_cor,file="data_temp\\TIP_cor.RData") load("data_temp\\TIP_cor.RData") TIPcor_result<-TIP_cor %>% filter(p.value<0.05) steps_score1<-steps_score[,c(2,which((colnames(steps_score) %in% TIPcor_result$gene)))] for (i in 2:11) { pvalue<-paste("p=",signif(TIPcor_result$p.value[[i-1]],3) ,sep="") R<-paste("R=",signif(TIPcor_result$cor[[i-1]],3),sep="") p<-ggplot(steps_score1,aes(x=score,y=steps_score1[[i]]))+geom_point(color="#A52A2A",size=1.3)+theme_classic()+ geom_smooth(method="lm",color="#FF6347",)+ annotate("text", x = -5, y = max(steps_score1[i]), label = paste(pvalue,",",R,sep = ""), fontface = "italic", size =3.5)+ ylab(names(steps_score1[i]))+ geom_rug(color="#A52A2A") ggsave(p,file=paste("plot\\",names(steps_score1[i]),"_cor.pdf",sep=""),width=10,height=10,units="cm") } #------------------- sGenes_Exp1<-my_load_tibble("TIP\\SignatureGenes.Expression.txt") sGenes_Exp2<-my_t_tibble(sGenes_Exp1[,-2],new_name_col_1 = "ID") %>% filter(substr(ID,14,15)=="01")%>% mutate(ID=substr(ID,1,12)) sGenes_steps<-sGenes_Exp1[,c(1,2)] risk_group<-data_score2[,c(1,38,39)] %>% mutate(group=ifelse(group=="low",1,2)) risk_group_order<-risk_group[order(risk_group$group),] save(risk_group_order,file="risk_group_order.RData") load("risk_group_order.RData") sGenes_Exp3<-inner_join(risk_group_order,sGenes_Exp2,by=c("sample"="ID")) index<-duplicated(sGenes_Exp3$sample) sGenes_Exp<-sGenes_Exp3[!index,] save(sGenes_Exp,file="data_temp\\sGenes_Exp.RData") #sGenes_Exp<- my_tibble_scale2(sGenes_Exp_data, modify=2) sGenes_Exp_log<-sGenes_Exp%>% modify_at(c(colnames(sGenes_Exp[,c(-1:-3)])),function(x) log(x+0.0001)) sGenes_Exp4<-my_t_tibble(sGenes_Exp_log[,c(-2,-3)],new_name_col_1 = "gene") #sGenes_Exp5<-sGenes_Exp4 %>% dplyr::select(gene,group_h$ID) sGenes_Exp_matrix1<-as.matrix(sGenes_Exp4[,-1]) row.names(sGenes_Exp_matrix1)<-sGenes_Exp4$gene annotation_col = data.frame(group=as.factor(sGenes_Exp$group)) annotation_row = data.frame(step=as.factor(sGenes_steps$Steps)) row.names(annotation_col)<-sGenes_Exp$sample row.names(annotation_row)<-sGenes_Exp4$gene bk = unique(c(seq(-4,4, length=100))) pheatmap(sGenes_Exp_matrix1, annotation_col=annotation_col, annotation_row=annotation_row, cluster_row = FALSE, cluster_col = FALSE, show_colnames = F,fontsize = 5,scale = "row", breaks = bk, color = colorRampPalette(c("yellow","white", "blue"))(100)) #color = colorRampPalette(c("navy", "white", "firebrick3"))(100)) ###-------------survival plot------------- library(survminer) #data_score_<-data_score%>%mutate(OS=ifelse(OS.time>60*30,0,OS))%>% #mutate(OS.time=ifelse(OS.time>60*30,60*30,OS.time)) fit_2 <- survfit(Surv(OS.time/30, OS) ~ group, data = data_score2) survplot<-ggsurvplot(fit_2,conf.int=T,pval = T,risk.table = T,xlab="Time(month)") ggsave("plot\\survplot.pdf", survplot,width=8.5,height=8.5,units="cm") #for CC surCC_data1<-inner_join(classID,TCGA_can_uni,by=c("ID"="sample")) surCC_data<-surCC_data1%>% distinct(ID,.keep_all=T) fit_CC <- survfit(Surv(OS.time/30, OS) ~ group, data =surCC_data) survplot<-ggsurvplot(fit_CC,conf.int=T,pval = T,risk.table = T,xlab="Time(month)")#p=0.93 ####------------------immune related-------------------- ###-------------------------ICB--------------------- TCGA_tpm_all_1<- my_t_tibble(TCGA_fpkm_tpm,new_name_col_1 = "ID") data_score_1<-data_score[,c(1,38,39)] save(data_score_1,file="data_temp\\data_score_1.RData") load("data_temp\\data_score_1.RData") TCGA_tpm_all<-inner_join(data_score_1,TCGA_tpm_all_1,by=c("sample"="ID")) save(TCGA_tpm_all,file="data_temp\\TCGA_tpm_all.RData") load("data_temp\\TCGA_tpm_all.RData") ICB_sheet<-my_load_tibble("ICB_genes.xlsx") ICB_gene<-ICB_sheet$gene TCGA_tpm_ICB<-TCGA_tpm_all %>% dplyr::select(sample,group,score,ICB_gene) save(TCGA_tpm_ICB,file="data_temp\\TCGA_tpm_ICB.RData") load("data_temp\\TCGA_tpm_ICB.RData") ICB_cor<-tibble() for (i in 4:length(TCGA_tpm_ICB)){ cor_res<-cor.test(TCGA_tpm_ICB$score,TCGA_tpm_ICB[[i]]) ICB_cor[i-3,"gene"]<-colnames(TCGA_tpm_ICB[i]) ICB_cor[i-3,"p.value"]<-cor_res$p.value ICB_cor[i-3,"cor"]<-cor_res$estimate } save(ICB_cor,file="data_temp\\ICB_cor.RData") load("data_temp\\ICB_cor.RData") ICB_cor1<-ICB_cor %>% filter(p.value<0.05) load("data_temp\\TCGA_tpm_ICB.RData") ICB_cor_data<-cor(TCGA_tpm_ICB[3:23]) library(corrplot) corrplot(corr = ICB_cor_data,order="original",type="upper",tl.pos="full", tl.cex=0.5,tl.col="black",method="pie" ) corrplot(corr = ICB_cor_data,add=TRUE, type="lower", order="original",diag=FALSE,tl.pos="n", cl.pos="n", ) #corrplot(corr = ICB_cor_data,add=TRUE, type="lower", method="number", # order="original",diag=FALSE,tl.pos="n", cl.pos="n", number.cex =0.5) ###-----------------TCIA----------------- immune_proportion<-my_load_tibble("immune_proportion_TCIA.tsv") immune_prop<-unique(immune_proportion[,c(-2,-5)]) colnames(immune_prop)<-c("ID","cell","value") library(reshape2) immune_TCIA<-dcast(immune_prop,ID~cell) %>% mutate(ID=stringr::str_replace_all(ID,"[-]", ".")) immune_TCIA_score<-inner_join(data_score_1,immune_TCIA,by=c("sample"="ID")) save(immune_TCIA_score,file="data_temp\\immune_TCIA_score.RData") load("data_temp\\immune_TCIA_score.RData") TCIA_cor<-tibble() for (i in c(4:14)){ cor_res<-cor.test(immune_TCIA_score$score,immune_TCIA_score[[i]]) TCIA_cor[i-3,1] <-colnames(immune_TCIA_score)[i] TCIA_cor[i-3,2]<-cor_res$p.value TCIA_cor[i-3,3]<-cor_res$estimate } colnames(TCIA_cor)<-c("cell","p_value","cor") save(TCIA_cor,file="data_temp\\TCIA_cor.RData") load("data_temp\\TCIA_cor.RData") TCIA_cor_res<-TCIA_cor %>% filter(p_value<0.05) TCIA_wil<-tibble() for (i in c(4:14)){ wil_res<-wilcox.test(data=immune_TCIA_score,immune_TCIA_score[[i]]~group) TCIA_wil[i-3,1] <-colnames(immune_TCIA_score)[i] TCIA_wil[i-3,2]<-wil_res$p.value } colnames(TCIA_wil)<-c("cell","p_value") save(TCIA_wil,file="data_temp\\TCIA_wil.RData") load("data_temp\\TCIA_wil.RData") TCIA_wil_res<-TCIA_wil %>% filter(p_value<0.05) immune_TCIA_score1<-immune_TCIA_score[,c(2,which(colnames(immune_TCIA_score) %in% TCIA_cor_res$cell))] for (i in 2:6) { pvalue<-paste("p=",signif(TCIA_cor_res$p_value[[i-1]],3) ,sep="") R<-paste("R=",signif(TIPcor_result$cor[[i-1]],3),sep="") p<-ggplot(immune_TCIA_score1,aes(x=score,y=immune_TCIA_score1[[i]]))+geom_point(color="#A52A2A",size=1.3)+theme_classic()+ geom_smooth(method="lm",color="#FF6347",)+ annotate("text", x = -5, y = max(immune_TCIA_score1[i]), label = paste(pvalue,",",R,sep = ""), fontface = "italic", size =3.5)+ ylab(names(immune_TCIA_score1[i]))+ geom_rug(color="#A52A2A") ggsave(p,file=paste("plot\\TCIA\\",names(immune_TCIA_score1[i]),"_cor.pdf",sep=""),width=10,height=10,units="cm") } immune_TCIA_group1<-immune_TCIA_score[,c(1,3,which(colnames(immune_TCIA_score) %in% TCIA_wil_res$cell))] immune_TCIA_group2<-melt(immune_TCIA_group1) #immune_TCIA_group2$group<- factor(immune_TCIA_group2$group,levels=c("low,high")) TCIA_boxplot<-ggplot(immune_TCIA_group2,aes(x = variable, y = value, fill = group))+ geom_boxplot()+theme_classic()+ stat_compare_means( aes(label = ..p.signif..),method = "wilcox.test")+ xlab("immune cells")+ylab("TCIA score")+ theme(axis.text.x = element_text(angle=90,size=10)) # ylim(-0.05,0.17) ggsave(TCIA_boxplot,file="plot\\TCIA\\TCIA_boxplot.pdf",width=10,height=10,units="cm") ###---------------------------cibersort------------------------------------ source("D:\\GEO\\geneset\\CIBERSORT.R") LM22 <-my_load_tibble("D:\\GEO\\geneset\\LM22.txt") #TCGA_exp_cancer<-my_t_tibble(TCGA_mRNA_cancer,new_name_col_1 = "gene") #my_save_tibble(TCGA_exp_cancer,file="data_temp\\TCGA_exp_cancer.csv") write.table(TCGA_fpkm_tpm,file = "data_temp\\sig_data.txt",sep="\t",row.names = F) sig_data<-my_load_tibble("data_temp\\sig_data.txt") ciber_results <- CIBERSORT("D:\\GEO\\geneset\\LM22.txt","data_temp\\sig_data.txt", perm = 1000, QN = F) ciber_res<-as_tibble(ciber_results,rownames="ID") my_save_tibble(ciber_res,file="data_temp\\ciber_res.csv") ciber_res<-my_load_tibble("data_temp\\ciber_res.csv") ciber_res1<-ciber_res[,c(-24:-26)] cell_sig<-inner_join(data_score_1,ciber_res,by=c("sample"="ID")) cor.test(cell_sig$score,cell_sig$B.cells.naive) cor.test(cell_sig$score,cell_sig$Plasma.cells) cor.test(cell_sig$score,cell_sig$T.cells.CD8) cor.test(cell_sig$score,cell_sig$T.cells.follicular.helper) wilcox.test(data=cell_sig,`T cells CD4 memory activated`~group)#0.0515 wilcox.test(data=cell_sig,NK.cells.resting~group) wilcox.test(data=cell_sig,Dendritic.cells.activated~group) ###-----------------xCell----------------- xCell_TCGA1<-my_load_tibble("xCell\\xCell_tpm.txt") xCell_TCGA2<-my_t_tibble(xCell_TCGA1,new_name_col_1 = "ID") xCell_TCGA3<-inner_join(data_score2[,c(1,38,39)],xCell_TCGA2,by=c("sample"="ID")) xCell_cor<-tibble() for (i in c(4:70)){ cor_res<-cor.test(xCell_TCGA3$score,xCell_TCGA3[[i]]) xCell_cor[i-3,1] <-colnames(xCell_TCGA3)[i] xCell_cor[i-3,2]<-cor_res$p.value xCell_cor[i-3,3]<-cor_res$estimate } colnames(xCell_cor)<-c("cell","p_value","cor") save(xCell_cor,file="data_temp\\xCell_cor.RData") load("data_temp\\xCell_cor.RData") xCell_cor_res<-xCell_cor %>% filter(p_value<0.05) xCell_wil<-tibble() for (i in 4:70){ xCell_wil_res<-wilcox.test(data=xCell_TCGA3,xCell_TCGA3[[i]]~group) xCell_wil[i-3,"cell"] <-colnames(xCell_TCGA3)[i] xCell_wil[i-3,"p_value"]<-xCell_wil_res$p.value } save(xCell_wil,file="data_temp\\xCell_wil.RData") load("data_temp\\xCell_wil.RData") xCell_wil_1<-xCell_wil %>% filter(p_value<0.05) #ggplot(xCell_TCGA3,aes(group,`CD4+ Tcm`))+geom_violin() xCell_pheatmap1<-inner_join(risk_group_order,xCell_TCGA3,by="sample") xCell_pheatmap_log<-xCell_pheatmap1[,c(-3:-5)]%>% modify_at(c(colnames(xCell_pheatmap1[,c(-1:-3)])),function(x) log(x+0.0001)) xCell_pheatmap2<-xCell_pheatmap_log[,c(1,which(colnames(xCell_pheatmap_log) %in%xCell_wil_1$cell))] xCell_pheatmap3<-xCell_pheatmap2[-(which(duplicated(xCell_pheatmap2$sample))),] xCell_pheatmap_matrix<-my_t_tibble(xCell_pheatmap3,new_name_col_1 = "cell")[,-1] %>% as.matrix() save(xCell_pheatmap_matrix,file="data_temp\\xCell_pheatmap_matrix.RData") load("data_temp\\xCell_pheatmap_matrix.RData") mat<-xCell_pheatmap_matrix[c(1,5,7,9,11,14,28,20:22,12,3,2,6,8,10,17,18, 23,13,4,12,15,16,23:27,29:32),] row.names(xCell_pheatmap_matrix)<-xCell_wil_1$cell risk_group_uniuqe1<-xCell_pheatmap1[,c(1:3)] risk_group_uniuqe<-risk_group_uniuqe1[-(which(duplicated(risk_group_uniuqe1$sample))),] save(risk_group_uniuqe,file="data_temp\\risk_group_uniuqe.RData") annotation_col=data.frame(group=as.factor(risk_group_uniuqe1$group.x)) row.names(annotation_col)=risk_group_uniuqe1$sample bk = unique(c(seq(-2,2, length=100))) pheatmap(mat, cluster_rows = F,cluster_cols = F, show_colnames = F,scale = "row", breaks = bk, annotation_col =annotation_col, gaps_col =213, #color = colorRampPalette(c("#FF6666", "white", "#33CCCC"))(100), color = colorRampPalette(c("blue", "white", "yellow"))(100), filename = "plot\\xCell_pheatmap.png",width = 8,height=8,unit="cm") ggsave(xCell_heatmap,filename = "plot\\xCell_pheatmap1.pdf",width=15,height=15,units="cm") xCell_sankey_data<-xCell_TCGA3[,c(2:3,68:70)] %>% mutate(ImmuneScore=ifelse(ImmuneScore>median(ImmuneScore),"high","low")) %>% mutate(StromaScore=ifelse(StromaScore>median(StromaScore),"high","low")) %>% mutate(MicroenvironmentScore=ifelse(MicroenvironmentScore>median(MicroenvironmentScore),"high","low")) save(xCell_sankey_data,file="data_temp\\xCell_sankey_data.RData") load("data_temp\\xCell_sankey_data.RData") library(ggalluvial) xCell_sankey_data1<-xCell_sankey_data %>% mutate(ImmuneScore=as.factor(ImmuneScore)) %>% mutate(StromaScore=as.factor(StromaScore)) %>% mutate(MicroenvironmentScore=as.factor(MicroenvironmentScore)) xCell_sankey_plot<- ggplot(data=as.data.frame(xCell_sankey_data1), aes(axis1 =group,axis2 =ImmuneScore, axis3 =StromaScore,axis4 = MicroenvironmentScore)) + scale_x_discrete(limits = c("Group","Immune","Stroma"," Microenvironment"), expand = c(.1, .05)) + geom_alluvium(aes(fill = group)) + geom_stratum() + geom_text(stat = "stratum", infer.label = TRUE) + #geom_stratum() + geom_text(stat = "stratum", label.strata = TRUE) + theme_minimal() ggsave("plot\\xCell_sankey_plot.pdf",xCell_sankey_plot,width=16,height=8,units="cm") xCell_TCGA4<-xCell_TCGA3[,c(1:2,67:69)] # "ImmuneScore"(-0.01,0.42) "StromaScore"(-0.01,0.12) "MicroenvironmentScore"(-0.01,0.3) xCell_TCGA5<-melt(xCell_TCGA4[c(1,2,4)]) xCell_i<-ggplot(xCell_TCGA5,aes(x =group, y =value,fill=group))+ geom_boxplot(notch = T, outlier.shape = NA)+ ylab("StromaScore Score")+ylim(-0.01,0.12)+ stat_compare_means( aes(label = ..p.signif..),method = "wilcox.test", label.y = 0.11,label.x = 1.5,size=5)+ theme_classic()+ theme(legend.position='none')+ scale_fill_manual(values=c("lightcoral","lightseagreen")) ggsave(xCell_i,file="plot\\xCell_s.png",width=8,heigh=12,units="cm") ggsave(xCell_i,file="plot\\xCell_s.pdf",width=8,heigh=12,units="cm") ####-----------------drugbank--------------------- drug_target<-my_load_tibble("drugbank\\drug_target.xlsx") drug_target_gene<-unique(drug_target$gene) drug_target_tpm<-TCGA_fpkm_tpm %>% filter(ID %in% drug_target_gene) drug_target_tpm_t<-my_t_tibble(drug_target_tpm,new_name_col_1 = "ID") drug_target_data1<-inner_join(risk_group_order,drug_target_tpm_t,by=c("sample"="ID")) drug_target_data2<-drug_target_data1[order(drug_target_data1$sample),] ab<-which(duplicated(drug_target_data2$sample)) drug_target_data3<-drug_target_data2[-ab,] drug_target_data<-drug_target_data3[order(drug_target_data3$group),] save(drug_target_data,file="data_temp\\drug_target_data.RData") load("data_temp\\drug_target_data.RData") drug_score_wil<-tibble() for (i in 4:74){ drug_wil<-wilcox.test(data=drug_target_data,drug_target_data[[i]]~group) drug_score_wil[i-3,"gene"] <-colnames(drug_target_data)[i] drug_score_wil[i-3,"p_value"]<-drug_wil$p.value } save(drug_score_wil,file="data_temp\\drug_score_wil.RData") load("data_temp\\drug_score_wil.RData") drug_score_res<-drug_score_wil %>% filter(p_value<0.01) drug_target1<-drug_target %>% filter(gene %in% drug_score_res$gene) drug_target2<-drug_target1[-which(duplicated(drug_target1$gene)),] drug_target3<-drug_target2[,2] drug_pheatmap_data1<-drug_target_tpm %>% filter(ID %in% drug_score_res$gene) drug_pheatmap_data2<-drug_pheatmap_data1 %>% select(ID,drug_target_data$sample) drug_pheatmap_data<-inner_join(drug_target3,drug_pheatmap_data2,by=c("gene"="ID")) #drug_pheatmap_data<- my_tibble_scale2(drug_pheatmap_data2, modify=2) drug_pheatmap_log<-drug_pheatmap_data%>% modify_at(c(colnames(drug_pheatmap_data[,-1])),function(x) log(x+0.0001)) drug_pheatmap_matrix<-as.matrix(drug_pheatmap_data[,-1]) drug_target4<-drug_target2%>% mutate(name=paste(drug,gene,sep="$")) rownames(drug_pheatmap_matrix)<-drug_target4$name #annotation_col<-data.frame(data_score_1$group) annotation_col = data.frame(group=as.factor(drug_target_data$group)) row.names(annotation_col)=drug_target_data$sample annotation_row=data.frame(Therapy=as.factor(drug_target4$class)) row.names(annotation_row)=drug_target4$name library(pheatmap)S bk = unique(c(seq(-5,5, length=100))) pheatmap(drug_pheatmap_matrix, annotation_col =annotation_col,annotation_row=annotation_row, scale = "row", breaks=bk, #color = colorRampPalette(c("yellow", "white", "blue"))(100), color = colorRampPalette(c("navy", "white", "firebrick3"))(100), cluster_rows = F,cluster_cols = F, gaps_col =213,gaps_row=c(9,16,26), show_colnames = F, filename = "plot\\drugbank_heatmap.pdf") ####---------------timeROC-------------- library(survivalROC) library(survival) library(ggplot2) library(plotROC) data_score2<-data_score %>% distinct(sample,.keep_all=T) sroc <- lapply(c(12,24,36), function(t){ stroc <- survivalROC(Stime = data_score2$OS.time/30, status =data_score2$OS, marker =data_score2$score, predict.time = t, method = "KM" ## KM???? # method = "NNE", span = .25 * 350^(-.2) ## NE???? ) data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], c = stroc[["cut.values"]], time = rep(stroc[["predict.time"]], length(stroc[["FP"]]))) }) ## combine data sroclong <- do.call(rbind, sroc) sroclong$time<-factor(sroclong$time) ## plot ROC pROC<-ggplot(sroclong, aes(x = FPF, y = TPF, label = c, color = time)) + geom_roc(labels = FALSE, stat = "identity",n.cuts = 20) + style_roc()+ #theme(legend.position = "none")+ geom_segment(aes(x = 0, xend = 01, y = 0, yend = 1), color="grey", linetype="dashed") timeROC_plot<-pROC+annotate("text",x = .7, y = .25, size=3.5, ## position of text label = paste("AUC of 1 year =", -round(calc_auc(pROC)$AUC[1], 2))) + annotate("text",x = .7, y = .15,size=3.5, ## position of text label=paste("AUC of 2 years =", -round(calc_auc(pROC)$AUC[2], 2)))+ annotate("text",x = .7, y = .05,size=3.5, ## position of text label=paste("AUC of 3 years =", -round(calc_auc(pROC)$AUC[3], 2)))+ theme(legend.position = c(0.28,0.22))+ ggsci::scale_color_jco(name="",labels=c("","","")) ggsave("plot\\timeROC_plot5.pdf", timeROC_plot,width=8.5,height=8.5,units="cm") ####-----survival info----------- load("F:\\TCGA_COADREAD_lnc\\ferroptosis_lnc\\sur_cancer.RData") survial_cancer<-sur_cancer[,-3] %>% mutate(sample=substring(sample,1,12)) %>% mutate(sample=stringr::str_replace_all(sample,"[-]", ".")) DEG<-cludif_gene2$value TCGA_fpkm_tpm4<-TCGA_fpkm_tpm %>% filter(ID %in% DEG) TCGA_fpkm_tpm5<-my_t_tibble(TCGA_fpkm_tpm4,new_name_col_1 = "ID") TCGA_can_uni<-inner_join(survial_cancer,TCGA_fpkm_tpm5,by=c("sample"="ID")) save(TCGA_can_uni,file="data_temp\\TCGA_can_uni.RData") load("data_temp\\TCGA_can_uni.RData") TCGA_can_uni1<-TCGA_can_uni %>% distinct(sample,.keep_all=T) TCGA_fpkm_tpm6<-TCGA_can_uni[,c(-2,-3)] ####----------Nomogram------------------------- library(rms) fit_2<-cph(Surv(OS.time, OS) ~ FCRL1+WNT16+ GRIK2+ZMAT1+ZG16+DRD4+MAPK12+OR51B5+PRSS21+MAGEA3,data_score, x=T,y=T, surv=T) surv <- Survival(fit_2) dd<-datadist(data_score) options(datadist='dd') surv1_DFS <- function(x)surv(1*365,lp=x) surv2_DFS <- function(x)surv(2*365,lp=x) surv3_DFS <- function(x)surv(3*365,lp=x) nomogram<-plot(nomogram(fit_2,fun=list(surv1_DFS,surv2_DFS,surv3_DFS),lp= T, funlabel=c('1-year Survival','2-year Survival','3-year Survival'), maxscale=100, fun.at=c('0.99','0.9','0.5','0.01')), xfrac=.45) ggsave(nomogram,file="plot\\Nomogram.pdf",width=9,height = 12,units = "cm") cal1 <- calibrate(fit_2, cmethod='KM', method="boot", u=365, m=109, B=1000) par(mar=c(5,4,4,2),cex = 1.0) plot(cal1,lwd=2,lty=1, errbar.col=c(rgb(0,118,192,maxColorValue=255)), xlim=c(0.7,1),ylim=c(0.7,1), xlab="Nomogram-Predicted Probability of 1-Year DFS", ylab="Actual 1-Year DFS (proportion)", col=c(rgb(192,98,83,maxColorValue=255))) cal1 <- calibrate(fit_2, cmethod='KM', method="boot", u=1095, m=109, B=1000) #par(mar=c(5,4,4,2),cex = 1.0) plot(cal1,lwd=2,lty=1, errbar.col=c(rgb(0,118,192,maxColorValue=255)), xlim=c(0.7,1),ylim=c(0.5,1), xlab="Nomogram-Predicted Probability of 3-Year DFS", ylab="Actual 3-Year DFS (proportion)", col=c(rgb(192,98,83,maxColorValue=255))) ####-------------HPA ccc<-c("FCRL1", "GRIK2", "MAPK12","OR51B5") TCGA_ccc<-TCGA_mRNA_cancer1 %>% filter(gene %in% ccc) GTEx_ccc<-GTEx_exp %>% filter(sample %in% ccc) GTEx_TCGA_ccc<-inner_join(GTEx_ccc,TCGA_ccc,by=c("sample"="gene")) GTEx_TCGA_cc<-my_t_tibble(GTEx_TCGA_ccc,new_name_col_1 = "ID") cc_group<-tibble(ID=GTEx_TCGA_cc$ID,group=c(rep("normal",308),rep("cancer",469))) GTEx_ccc<-TCGA_mRNA_norm1 %>% filter(gene %in% ccc) GTEx_TCGA_ccc<-inner_join(GTEx_ccc,TCGA_ccc,by="gene") GTEx_TCGA_cc<-my_t_tibble(GTEx_TCGA_ccc,new_name_col_1 = "ID") cc_group<-tibble(ID=GTEx_TCGA_cc$ID,group=c(rep("normal",41),rep("cancer",469))) GTEx_TCGA_c<-inner_join(cc_group,GTEx_TCGA_cc,by="ID") GTEx_TCGA_c1<-melt(GTEx_TCGA_c) GTEx_TCGA_c1$group<-factor(GTEx_TCGA_c1$group,levels = c("normal","cancer")) GTEx_TCGAc_plot<-ggplot(GTEx_TCGA_c1,aes(x = variable, y = value,fill=group))+ geom_boxplot(outlier.size = 0.3)+theme_classic()+ stat_compare_means( aes(label = ..p.signif..),method = "wilcox.test", label.y = 16,size=3)+ xlab("Relative expression")+ylab("Pyroptosis relative genes")+ theme(axis.text.x = element_text(angle=90,size=10))+ scale_fill_manual(values=c("lightseagreen", "lightcoral"))