####TCGA表达谱数据下载#### library(SummarizedExperiment) library(TCGAbiolinks) query <- GDCquery(project = "TCGA-KIRC", data.category = "Transcriptome Profiling", data.type = "Gene Expression Quantification", workflow.type = "STAR - Counts", legacy = FALSE) #扩内存 memory.limit(1000000) GDCdownload(query = query) mydata=GDCprepare(query = query) mydata2=as.data.frame(rowRanges(mydata)) geneexp <- assay(mydata,i = "tpm_unstrand")#tpm_unstrand fpkm_unstrand #unstranded geneexp=as.data.frame(geneexp) geneexp1=cbind(gene_type=mydata2$gene_type,gene_name=mydata2$gene_name,geneexp) mRNA=geneexp1[geneexp1$gene_type=="protein_coding",] mRNA=mRNA[,-1] TPM <- mRNA[!duplicated(mRNA$gene_name),] rownames(TPM) <- TPM$gene_name TPM <- TPM[,-1] TPM <- TPM[,substr(colnames(TPM),14,16)%in% c("01A","11A")] ###判断是否log2了 # 自动log(x)化 ex <- TPM qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T)) LogC <- (qx[5] > 100) || (qx[6]-qx[1] > 50 && qx[2] > 0) || (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2) if (LogC) { ex[which(ex < 0,arr.ind=T)] <- NaN #将负数定义为NaN exprSet <- log2(ex+1) #log2(x+1) print("log2 transform finished") }else{ print("log2 transform not needed") } # 去除NaN exprSet[which(exprSet == "NaN",arr.ind=T)] expr_TCGA_TPM <- as.data.frame(exprSet) tumor <- colnames(expr_TCGA_TPM)[substr(colnames(expr_TCGA_TPM),14,16) == "01A"] #533 normal <- colnames(expr_TCGA_TPM)[substr(colnames(expr_TCGA_TPM),14,16) == "11A"] #72 expr_TCGA_TPM <- expr_TCGA_TPM[,c(normal,tumor)] expr_TCGA_TPM_01A <- expr_TCGA_TPM[,tumor] expr_TCGA_TPM_11A <- expr_TCGA_TPM[,normal] save(expr_TCGA_TPM,expr_TCGA_TPM_01A,expr_TCGA_TPM_11A,file="data/TCGA_KIRC_mRNA.Rdata") #完结# ####TCGA临床数据下载#### rm(list = ls()) library(dplyr) library(tibble) library(tidyr) library(TCGAbiolinks) options(stringsAsFactors = F) ## 1.临床信息下载 clinical <- GDCquery_clinic(project = "TCGA-KIRC", type = "clinical") dput(colnames(clinical)) #神奇的操作,直接生成向量 clin_data <- clinical %>% dplyr::select(patient_ID = submitter_id, vital_status, gender, age = age_at_diagnosis, Stage = ajcc_pathologic_stage, T_Stage = ajcc_pathologic_t , N_Stage = ajcc_pathologic_n, M_Stage = ajcc_pathologic_m, days_to_death, days_to_last_follow_up) clin_data$age <- (as.numeric(clin_data$age))/365 clin_data$days_to_death <- as.numeric(clin_data$days_to_death) clin_data$days_to_last_follow_up <- as.numeric(clin_data$days_to_last_follow_up) #添加时间和状态,删除这两列为空的值 rt <- clin_data %>% mutate(time = coalesce(days_to_death, days_to_last_follow_up)/365, status = ifelse(vital_status == 'Alive', 0, 1)) %>% dplyr::filter(time > 0) %>% dplyr::filter(age > 0) %>% distinct() %>% dplyr::select(- c("days_to_death", "days_to_last_follow_up")) rt <- rt[-which(rt$N_Stage == "NX"),] rt <- rt[-which(rt$M_Stage == "MX"),] rt$gender <- factor(rt$gender,labels=c("female", "male")) rt$Stage <- if_else(rt$Stage %in% c("Stage IV"),"Stage IV", if_else(rt$Stage %in% c("Stage III"),"Stage III", if_else(rt$Stage %in% c("Stage II"),"Stage II", "Stage I"))) rt$Stage <- factor(rt$Stage,labels=c("Stage I", "Stage II", "Stage III", "Stage IV")) rt$T_Stage <- if_else(rt$T_Stage %in% c("T1","T1a","T1b"),"T1", if_else(rt$T_Stage %in% c("T2","T2a","T2b"),"T2", if_else(rt$T_Stage%in% c("T3","T3a","T3b","T3c"),"T3","T4"))) rt$T_Stage <- factor(rt$T_Stage,labels=c("T1", "T2", "T3", "T4")) rt$N_Stage <- factor(rt$N_Stage,labels=c("N0", "N1")) rt$M_Stage <- factor(rt$M_Stage,labels=c("M0", "M1")) TCGA_clinical <- rt save(TCGA_clinical,file = "data/TCGA_KIRC_clinic.Rdata") #完结# ####TTC13非配对样本表达量#### load(file = "data/TCGA_KIRC_mRNA.Rdata") exprSet <- expr_TCGA_TPM["TTC13",] TCGA_id <- colnames(exprSet) sample <- ifelse(substring(TCGA_id,14,15)=="01","Tumor","Normal") sample <- factor(sample,levels = c("Normal","Tumor")) #factor的目的是把normal排在前面 metadata <- data.frame(TCGA_id,sample) exprSet <-t(exprSet) exprSet <-as.data.frame(exprSet) exprSet <- cbind(TCGA_id=rownames(exprSet),exprSet) #行名不能直接变为第一列,必须cbind exprSet <- merge(metadata,exprSet,by="TCGA_id") #画图 library(ggplot2) library(ggpubr) #设置p的比较组 compare_means(TTC13 ~ sample, data = exprSet) #基因,样本,数据 p <- ggboxplot(exprSet, x = "sample", y = "TTC13", color = "sample", palette = "jco", add = "jitter")+ylab("The expression of TTC13") p + stat_compare_means( aes(label = ..p.signif..), label.x = 1.5, label.y = 6) ####TTC13配对样本表达量#### library(limma) load(file = "data/TCGA_KIRC_mRNA.Rdata") exprSet <- expr_TCGA_TPM["TTC13",] TCGA_id <- colnames(exprSet) sample <- ifelse(substring(TCGA_id,14,15)=="01","Tumor","Normal") sample <- factor(sample,levels = c("Normal","Tumor")) #factor的目的是把normal排在前面 metadata <- data.frame(TCGA_id,sample) exprSet <-t(exprSet) exprSet <-as.data.frame(exprSet) exprSet <- cbind(TCGA_id=rownames(exprSet),exprSet) #行名不能直接变为第一列,必须cbind exprSet <- merge(metadata,exprSet,by="TCGA_id") geneName="TTC13" #提取配对样品的数据 rownames(exprSet) <- exprSet$TCGA_id normalData=exprSet[exprSet$sample=="Normal",3,drop=F] normalData=as.matrix(normalData) rownames(normalData)=gsub("(.*?)\\-(.*?)\\-(.*?)\\-(.*?)\\-.*", "\\1\\-\\2\\-\\3", rownames(normalData)) normalData=avereps(normalData) tumorData=exprSet[exprSet$sample=="Tumor",3,drop=F] tumorData=as.matrix(tumorData) rownames(tumorData)=gsub("(.*?)\\-(.*?)\\-(.*?)\\-(.*?)\\-.*", "\\1\\-\\2\\-\\3", rownames(tumorData)) tumorData=avereps(tumorData) sameSample=intersect(row.names(normalData), row.names(tumorData)) data=cbind(normalData[sameSample,,drop=F], tumorData[sameSample,,drop=F]) colnames(data)=c("Normal", "Tumor") data=as.data.frame(data) library(ggplot2) library(ggpubr) #绘制配对差异分析的图形 ggpaired(data, cond1="Normal", cond2="Tumor", fill="condition", xlab="", ylab=paste0(geneName, " expression"), legend.title="Type", color = "condition", line.color = "gray", line.size = 0.4, palette = "jco")+ stat_compare_means(paired = TRUE, symnum.args=list(cutpoints = c(0, 0.001, 0.01, 0.05, 1), symbols = c("***", "**", "*", "ns")),label = "p.signif",label.x = 1.35) dev.off() ####预后KM#### load(file = "data/TCGA_KIRC_mRNA.Rdata") load(file = "data/TCGA_KIRC_clinic.Rdata") colnames(expr_TCGA_TPM_01A) <- substr(colnames(expr_TCGA_TPM_01A),1,12) expr_TCGA_TPM_01A <- expr_TCGA_TPM_01A["TTC13",] rownames(TCGA_clinical) <- TCGA_clinical$patient_ID comgene <- intersect(rownames(TCGA_clinical),colnames(expr_TCGA_TPM_01A)) TCGA_clinical <- TCGA_clinical[comgene,] expr_TCGA_TPM_01A <- expr_TCGA_TPM_01A[,comgene] expr_TCGA_TPM_01A <- as.data.frame(expr_TCGA_TPM_01A) expr_TCGA_TPM_01A <- t(expr_TCGA_TPM_01A) rt <- cbind(expr_TCGA_TPM_01A,TCGA_clinical) save(rt,file = "data/TTC13_clinic.Rdata") library(survival) library(survminer) res.cut <- surv_cutpoint(rt, time ="time", event = "status", variables = "TTC13", minprop = 0.3) res.cut cutoff=res.cut$cutpoint$cutpoint rt$TTC13 <- ifelse(rt$TTC13 > cutoff, "TTC13_high","TTC13_low") rt <- as.data.frame(rt) #分组 fit <- survfit(Surv(time, status) ~ TTC13, data = rt) p1 <- ggsurvplot(fit, legend.title = "",#定义图例的名称 legend.labs = c("TTC13_high","TTC13_low"),#一定要和table的顺序一致 #legend = "top",#图例位置 pval = T, pval.method = TRUE,#添加p值的检验方法 #conf.int = TRUE,#添加置信区间 risk.table = TRUE, #在图下方添加风险表 risk.table.col = "strata", #根据数据分组为风险表添加颜色 risk.table.y.text = F,#风险表Y轴是否显示分组的名称,F为以线条展示分组 #linetype = "strata", #改变不同组别的生存曲线的线型 #surv.median.line = "hv", #标注出中位生存时间 xlab = "Time in years", #x轴标题 xlim = c(0,max(rt$time)), #展示x轴的范围 ylab = "Overall survival rate", break.time.by = 1, #x轴间隔 size = 1, #线条大小 #ggtheme = theme_bw(), #为图形添加网格 palette = "jco"#图形颜色风格 ) #看一下图 p1 ####ROC曲线#### library(ROCR) library(glmnet) library(caret) library(timeROC) library(survival) load(file = "data/TTC13_clinic.Rdata") time_roc_res <- timeROC( T=rt$time, #结局时间 delta=rt$status, #结局指标 marker=rt$TTC13, cause = 1, weighting="marginal", times = c(1, 3, 5), #改时间年限 ROC = TRUE, iid = TRUE ) time_roc_res$AUC confint(time_roc_res, level = 0.95)$CI_AUC time_ROC_df <- data.frame( TP_1year = time_roc_res$TP[, 1], FP_1year = time_roc_res$FP[, 1], TP_2year = time_roc_res$TP[, 2], FP_2year = time_roc_res$FP[, 2], TP_3year = time_roc_res$TP[, 3], FP_3year = time_roc_res$FP[, 3] ) ggplot(data = time_ROC_df) + geom_line(aes(x = FP_1year, y = TP_1year), size = 1, color = "#BC3C29FF") + geom_line(aes(x = FP_2year, y = TP_2year), size = 1, color = "#0072B5FF") + geom_line(aes(x = FP_3year, y = TP_3year), size = 1, color = "#E18727FF") + geom_abline(slope = 1, intercept = 0, color = "grey", size = 1, linetype = 2) + theme_bw() + annotate("text", x = 0.75, y = 0.25, size = 4.5, label = paste0("AUC at 1 years = ", sprintf("%.3f", time_roc_res$AUC[[1]])), color = "#BC3C29FF" #改名字 ) + annotate("text", x = 0.75, y = 0.15, size = 4.5, label = paste0("AUC at 3 years = ", sprintf("%.3f", time_roc_res$AUC[[2]])), color = "#0072B5FF" #改名字 ) + annotate("text", x = 0.75, y = 0.05, size = 4.5, label = paste0("AUC at 5 years = ", sprintf("%.3f", time_roc_res$AUC[[3]])), color = "#E18727FF" #改名字 ) + labs(x = "False positive rate", y = "True positive rate") + theme( axis.text = element_text(face = "bold", size = 11, color = "black"), axis.title.x = element_text(face = "bold", size = 14, color = "black", margin = margin(c(15, 0, 0, 0))), axis.title.y = element_text(face = "bold", size = 14, color = "black", margin = margin(c(0, 15, 0, 0))) ) #完毕# ####TTC13与临床特征相关性#### load(file = "data/TTC13_clinic.Rdata") #stage rt$Stage <- if_else(rt$Stage %in% c("Stage I","Stage II"),"Stage I-II", "Stage III-IV") rt$Stage <- factor(rt$Stage,labels=c("Stage I-II", "Stage III-IV")) compare_means(TTC13 ~ Stage, data = rt, method = "anova") p <- ggboxplot(rt, x = "Stage", y = "TTC13", color = "Stage", palette = "jco", add = "jitter")+ylab("The expression of TTC13") p + stat_compare_means(label.x = 1.5, label.y = 6) #T_Stage rt$T_Stage <- if_else(rt$T_Stage %in% c("T1","T2"),"T1-2", "T3-4") rt$T_Stage <- factor(rt$T_Stage,labels=c("T1-2", "T3-4")) compare_means(TTC13 ~ T_Stage, data = rt, method = "anova") p <- ggboxplot(rt, x = "T_Stage", y = "TTC13", color = "T_Stage", palette = "jco", add = "jitter")+ylab("The expression of TTC13") p + stat_compare_means(label.x = 1.5, label.y = 6) #N_Stage compare_means(TTC13 ~ N_Stage, data = rt, method = "anova") p <- ggboxplot(rt, x = "N_Stage", y = "TTC13", color = "N_Stage", palette = "jco", add = "jitter")+ylab("The expression of TTC13") p + stat_compare_means(label.x = 1.5, label.y = 6) #M_Stage compare_means(TTC13 ~ M_Stage, data = rt, method = "anova") p <- ggboxplot(rt, x = "M_Stage", y = "TTC13", color = "M_Stage", palette = "jco", add = "jitter")+ylab("The expression of TTC13") p + stat_compare_means(label.x = 1.5, label.y = 6) #Age rt$age <- ifelse(rt$age >=65, ">=65","<65") rt$age<-factor(rt$age,levels=c("<65",">=65")) compare_means(TTC13 ~ age, data = rt, method = "anova") p <- ggboxplot(rt, x = "age", y = "TTC13", color = "age", palette = "jco", add = "jitter")+ylab("The expression of TTC13") p + stat_compare_means(label.x = 1.5, label.y = 6) #gender compare_means(TTC13 ~ gender, data = rt, method = "anova") p <- ggboxplot(rt, x = "gender", y = "TTC13", color = "gender", palette = "jco", add = "jitter")+ylab("The expression of TTC13") p + stat_compare_means(label.x = 1.5, label.y = 6) ####GSEA软件版数据准备#### load(file = "data/TCGA_KIRC_mRNA.Rdata") gene <- "TTC13" med=median(as.numeric(expr_TCGA_TPM_01A[gene,])) #取ITGA3表达的中位数 conditions=data.frame(sample=colnames(expr_TCGA_TPM_01A), group=factor(ifelse(expr_TCGA_TPM_01A[gene,]>med,"high","low"),levels = c("low","high"))) %>% column_to_rownames("sample") high <- rownames(conditions)[conditions$group == "high"] #266 low <- rownames(conditions)[conditions$group == "low"] #267 expr_TCGA_TPM_01A <- expr_TCGA_TPM_01A[,c(high,low)] ### board 桌面版本 ### 从R语言对接桌面版本 ### 需要三个文件 ### 表达量gct,表型cls,基因集gmt exprSet = expr_TCGA_TPM_01A exprSet <- cbind(gene_id = rownames(exprSet), description = "na", exprSet) ## 创建gct文件 gct_file = paste0("my.gct") sink(gct_file) cat("#1.2\n") cat(paste0(nrow(exprSet), "\t", (length(exprSet) - 2), "\n")) sink() write.table(exprSet, gct_file, append = T, quote = F, row.names = F, col.names = T, sep = "\t") # 创建cls文件 group_list <- c(rep("high",266),rep("low",267)) cls_file = paste0("my.cls") sink(cls_file) cat(paste0(length(group_list), "\t", length(unique(group_list)), "\t1\n")) cat(paste0("#", paste(unique(group_list), collapse = "\t"), "\n")) cat(paste(group_list, collapse = "\t"), "\n") sink() ####单/多因素分析#### load(file = "data/TTC13_clinic.Rdata") rt <- rt %>% select(c("time","status", "age","gender","Stage","T_Stage","N_Stage","M_Stage","TTC13")) #单因素Cox回归 #把因素变为数字 rt$T_Stage <- if_else(rt$T_Stage =="T4",4, if_else(rt$T_Stage =="T3",3, if_else(rt$T_Stage =="T2",2,1))) rt$gender <- if_else(rt$gender == "female",0,1) rt$Stage <- if_else(rt$Stage == "Stage IV",4, if_else(rt$Stage == "Stage III",3, if_else(rt$Stage == "Stage II",2,1))) rt$N_Stage <- if_else(rt$N_Stage == "N1",2,1) rt$M_Stage <- if_else(rt$M_Stage == "M0",0,1) #假设我们要对如下5个特征做单因素cox回归分析 covariates <- c("gender","age","Stage","T_Stage","N_Stage","M_Stage","TTC13") #分别对每一个变量,构建生存分析的公式 univ_formulas <- sapply(covariates, function(x) as.formula(paste('Surv(time, status)~', x))) #注意更改time和OS的名字 #循环对每一个特征做cox回归分析 univ_models <- lapply( univ_formulas, function(x){coxph(x, data = rt)}) #注意更改数据 univ_results <- lapply(univ_models, function(x){ x <- summary(x) #获取p值 p.value<-signif(x$wald["pvalue"], digits=4) #获取HR HR <-signif(x$coef[2], digits=4); #获取95%置信区间 HR.confint.lower <- signif(x$conf.int[,"lower .95"], 4) HR.confint.upper <- signif(x$conf.int[,"upper .95"],4) lower <- HR.confint.lower upper <- HR.confint.upper HR <- HR res<-c(p.value,HR,lower,upper) names(res)<-c("pvalue","HR","lower","upper") return(res) }) #转换成数据框,并转置 res <- t(as.data.frame(univ_results, check.names = FALSE)) as.data.frame(res) write.table(file="output/TCC13_univariate_cox_result.txt",as.data.frame(res),quote=F,sep="\t") library(forestploter) #读取单因素结果 dt <- data.table::fread("output/TCC13_univariate_cox_result.txt",data.table = F) dt$` ` <- paste(rep(" ", 20), collapse = " ") dt$HR = round(dt$HR,4) dt$pvalue = round(dt$pvalue,4) dt$lower = round(dt$lower,4) dt$upper = round(dt$upper,4) dt$`HR (95% CI)` <- ifelse(is.na(dt$HR), "", sprintf("%.3f (%.3f to %.3f)", dt$HR, dt$lower, dt$upper)) tm <- forest_theme(base_size = 10, # 置信区间点形状,线类型/颜色/宽度 ci_pch = 16, # 点形状 ci_col = "red", ci_lty = 1, # 线条类型 ci_lwd = 1.5, # 线条宽度 ci_Theight = 0.4, # 在CI的末尾设置T形尾部 # 参考线宽/类型/颜色 refline_lwd = 1, refline_lty = "dashed", refline_col = "grey20", # 垂直的线宽/类型/颜色 vertline_lwd = 1, vertline_lty = "dashed", vertline_col = "grey20", # 更改填充和边框的摘要颜色 summary_fill = "red", summary_col = "red", # 脚注字体大小/字形/颜色 footnote_cex = 0.6, footnote_fontface = "italic", footnote_col = "red") plot <- forest(dt[, c(1,2,6,7)], est = dt$HR, lower = dt$lower, upper = dt$upper, ci_column = 3,theme = tm,ticks_at = c(0, 1, 5, 10,15,20,25,30), ref_line = 1)#默认是1 plot #TCGA多因素cox回归分析# res.cox <- coxph(Surv(time, status) ~ age + Stage + T_Stage + N_Stage + M_Stage + TTC13, data = rt) x <- summary(res.cox) pvalue=signif(as.matrix(x$coefficients)[,5],4) HR=signif(as.matrix(x$coefficients)[,2],4) low=signif(x$conf.int[,3],4) high=signif(x$conf.int[,4],4) multi_res=data.frame(pvalue=pvalue, HR=HR, lower=low, upper=high, stringsAsFactors = F ) multi_res write.table(file="output/TTC13multivariate_cox_result.txt",multi_res,quote=F,sep="\t") ##画图 library(forestploter) #读取多因素结果 dt <- data.table::fread("output/TTC13multivariate_cox_result.txt",data.table = F) dt$` ` <- paste(rep(" ", 20), collapse = " ") dt$HR = round(dt$HR,4) dt$pvalue = round(dt$pvalue,4) dt$lower = round(dt$lower,4) dt$upper = round(dt$upper,4) dt$`HR (95% CI)` <- ifelse(is.na(dt$HR), "", sprintf("%.3f (%.3f to %.3f)", dt$HR, dt$lower, dt$upper)) tm <- forest_theme(base_size = 10, # 置信区间点形状,线类型/颜色/宽度 ci_pch = 16, # 点形状 ci_col = "red", ci_lty = 1, # 线条类型 ci_lwd = 1.5, # 线条宽度 ci_Theight = 0.4, # 在CI的末尾设置T形尾部 # 参考线宽/类型/颜色 refline_lwd = 1, refline_lty = "dashed", refline_col = "grey20", # 垂直的线宽/类型/颜色 vertline_lwd = 1, vertline_lty = "dashed", vertline_col = "grey20", # 更改填充和边框的摘要颜色 summary_fill = "red", summary_col = "red", # 脚注字体大小/字形/颜色 footnote_cex = 0.6, footnote_fontface = "italic", footnote_col = "red") plot <- forest(dt[, c(1,2,6,7)], est = dt$HR, lower = dt$lower, upper = dt$upper, ci_column = 3,theme = tm,ticks_at = c(0, 1, 5, 10,15,20,25,30), ref_line = 1)#默认是1 plot ####nomcox#### library(survival) library(regplot) library(rms) load(file = "data/TTC13_clinic.Rdata") rt <- rt %>% select(c("time","status", "age","gender","Stage","T_Stage","N_Stage","M_Stage","TTC13")) nomcox=coxph(Surv(time, status) ~ . , data = rt) regplot(nomcox, plots = c("bars", "boxes"), clickable=F, title=NULL, points=T, droplines=T, observation=rt[20,], rank="sd", failtime = c(1,3,5), prfail = F) #校准曲线 pdf("calibration.pdf",10,8) #1 year mx1 <- cph(Surv(time, status) ~TTC13, x=T, y=T, surv=T, data=rt, time.inc=1) cal1 <- calibrate(mx1, cmethod="KM", method="boot", u=1, m=(nrow(rt)/3), B=1000) plot(cal1, xlim=c(0,1), ylim=c(0,1), xlab="Nomogram-predicted Overall survival (%)", ylab="Observed Overall survival (%)", lwd=1.5, col="green", sub=F) #3 year mx2 <- cph(Surv(time, status) ~ TTC13, x=T, y=T, surv=T, data=rt, time.inc=3) cal2 <- calibrate(mx2, cmethod="KM", method="boot", u=2, m=(nrow(rt)/3), B=1000) plot(cal2, xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", lwd=1.5, col="blue", sub=F, add=T) #5 year mx3 <- cph(Surv(time, status) ~TTC13, x=T, y=T, surv=T, data=rt, time.inc=5) cal3 <- calibrate(mx3, cmethod="KM", method="boot", u=3, m=(nrow(rt)/3), B=1000) plot(cal3, xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", lwd=1.5, col="red", sub=F, add=T) legend('bottomright', c('1-year', '3-year', '5-year'), col=c("green","blue","red"), lwd=1.5, bty = 'n') dev.off() ####免疫浸润#### library(e1071) library(parallel) library(preprocessCore) source("CIBERSORT.R") #这边需要读取现成的R代码,在文件夹里 load(file = "data/TCGA_KIRC_mRNA.Rdata") write.table(expr_TCGA_TPM_01A, file = "output/KIRC_mRNA_01A.txt",sep = "\t",row.names = T,col.names = NA,quote = F) sig_matrix <- "LM22.txt" #免疫细胞的注释文件 mixture_file = 'output/KIRC_mRNA_01A.txt' #肿瘤患者表达谱 res_cibersort <- CIBERSORT(sig_matrix, mixture_file, perm=100, QN=TRUE) #此处运行时间较久,100是算的次数,越大,数据越准确 save(res_cibersort,file = "output/res_cibersort.Rdata") #保存中间文件 library(limma) library(reshape2) library(ggpubr) #加载cibersort数据 load("output/res_cibersort.Rdata") res_cibersort <- res_cibersort[,1:22] data <- res_cibersort[,colSums(res_cibersort) > 0] data <- as.data.frame(data) risk <- t(expr_TCGA_TPM_01A) sameSample=intersect(row.names(data), row.names(risk)) data=cbind(risk[sameSample,"TTC13",drop=F],data[sameSample,,drop=F]) corResult=data.frame() for(i in colnames(data)){ tdc <- cor.test(data[,"TTC13"], data[,i], method = "pearson") pvalue=tdc$p.value corResult=rbind(corResult, cbind(gene=i, cor= tdc$estimate, Low95CI=tdc$conf.int[1], High95CI=tdc$conf.int[2], pvalue=tdc$p.value) ) } outTab=corResult[as.numeric(corResult$pvalue)<0.05,] #Plasma cells# library(ggpubr) ggscatter(data, x = "TTC13", y = "Plasma cells", color = "black", size =2, # Points color and size add = "reg.line", # Add regression line add.params = list(color = "blue", fill = "gray"), # Customize regression line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson")) #T cells CD4 naive# library(ggpubr) ggscatter(data, x = "TTC13", y = "T cells CD4 naive", color = "black", size =2, # Points color and size add = "reg.line", # Add regression line add.params = list(color = "blue", fill = "gray"), # Customize regression line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson")) #T cells CD4 memory resting# library(ggpubr) ggscatter(data, x = "TTC13", y = "T cells CD4 memory resting", color = "black", size =2, # Points color and size add = "reg.line", # Add regression line add.params = list(color = "blue", fill = "gray"), # Customize regression line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson")) #T cells follicular helper# library(ggpubr) ggscatter(data, x = "TTC13", y = "T cells follicular helper", color = "black", size =2, # Points color and size add = "reg.line", # Add regression line add.params = list(color = "blue", fill = "gray"), # Customize regression line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson")) #NK cells activated# library(ggpubr) ggscatter(data, x = "TTC13", y = "NK cells activated", color = "black", size =2, # Points color and size add = "reg.line", # Add regression line add.params = list(color = "blue", fill = "gray"), # Customize regression line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson")) #Macrophages M0# library(ggpubr) ggscatter(data, x = "TTC13", y = "Macrophages M0", color = "black", size =2, # Points color and size add = "reg.line", # Add regression line add.params = list(color = "blue", fill = "gray"), # Customize regression line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson")) #Macrophages M1# library(ggpubr) ggscatter(data, x = "TTC13", y = "Macrophages M1", color = "black", size =2, # Points color and size add = "reg.line", # Add regression line add.params = list(color = "blue", fill = "gray"), # Customize regression line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson")) #Dendritic cells activated# library(ggpubr) ggscatter(data, x = "TTC13", y = "Dendritic cells activated", color = "black", size =2, # Points color and size add = "reg.line", # Add regression line add.params = list(color = "blue", fill = "gray"), # Customize regression line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson")) #Mast cells resting# library(ggpubr) ggscatter(data, x = "TTC13", y = "Mast cells resting", color = "black", size =2, # Points color and size add = "reg.line", # Add regression line add.params = list(color = "blue", fill = "gray"), # Customize regression line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson")) #Eosinophils# library(ggpubr) ggscatter(data, x = "TTC13", y = "Eosinophils", color = "black", size =2, # Points color and size add = "reg.line", # Add regression line add.params = list(color = "blue", fill = "gray"), # Customize regression line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson")) ####TIDE#### options(stringsAsFactors = FALSE) #禁止chr转成factor #表达矩阵 load(file = "data/TCGA_KIRC_mRNA.Rdata") dat <- expr_TCGA_TPM_01A TCGA_id <- colnames(expr_TCGA_TPM_01A) med=median(as.numeric(expr_TCGA_TPM_01A["TTC13",])) expr_TCGA_TPM_01A <- t(expr_TCGA_TPM_01A) expr_TCGA_TPM_01A <- as.data.frame(expr_TCGA_TPM_01A) sample <- ifelse(expr_TCGA_TPM_01A$TTC13 > med, "TTC13_high","TTC13_low") sample <- factor(sample,levels = c("TTC13_high","TTC13_low")) #factor的目的是把normal排在前面 metadata <- data.frame(TCGA_id,sample) rownames(metadata) <- metadata$TCGA_id #临床数据 sameSample=intersect(colnames(dat),rownames(metadata)) dat=dat[,sameSample] metadata=metadata[sameSample,] ann <- metadata$sample ann <- as.data.frame(ann) rownames(ann) <- rownames(metadata) colnames(ann)[1] <- "ImmClust" TIDE <- dat # 这里为了得到比较好的结果,采用two direction median centered TIDE <- sweep(TIDE,2, apply(TIDE,2,median,na.rm=T)) TIDE <- sweep(TIDE,1, apply(TIDE,1,median,na.rm=T)) write.table(TIDE,"data/TIDE_input.self_subtract",sep = "\t",row.names = T,col.names = NA,quote = F) #------------------------------------# # 请参照文件夹中的TIDE使用教程.docx完成该部分# TIDE.res <- read.csv("TIDE_output.csv",header = T,row.names = 1,check.names = F,stringsAsFactors = F) TIDE.res <- TIDE.res[rownames(metadata),] data=cbind(TIDE.res, metadata) #TIDE# library("RColorBrewer") compare_means(TIDE ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = TIDE, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("TIDE") p + stat_compare_means(label.x = 1.5, label.y = 6) #TIDE# library("RColorBrewer") compare_means(TIDE ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = TIDE, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("TIDE") p + stat_compare_means(label.x = 1.5, label.y = 6) #MDSC# library("RColorBrewer") compare_means(MDSC ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = MDSC, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("MDSC") p + stat_compare_means(label.x = 1.5, label.y = 0.8) #Exclusion# library("RColorBrewer") compare_means(Exclusion ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = Exclusion, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("Exclusion") p + stat_compare_means(label.x = 1.5, label.y = 0.8) #CD8# library("RColorBrewer") compare_means(CD8 ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = CD8, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("CD8") p + stat_compare_means(label.x = 1.5, label.y = 0.8) #Merck18# library("RColorBrewer") compare_means(Merck18 ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = Merck18, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("Merck18") p + stat_compare_means(label.x = 1.5, label.y = 0.25) #TAM M2# library("RColorBrewer") colnames(data)[14] <- "TAMM2" compare_means(TAMM2 ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = TAMM2, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("TAM M2") p + stat_compare_means(label.x = 1.5, label.y = 0.25) #CAF# library("RColorBrewer") compare_means(CAF ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = CAF, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("CAF") p + stat_compare_means(label.x = 1.5, label.y = 0.8) #Dysfunction# library("RColorBrewer") compare_means(Dysfunction ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = Dysfunction, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("Dysfunction") p + stat_compare_means(label.x = 1.5, label.y = 4) #CD274# library("RColorBrewer") compare_means(CD274 ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = CD274, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("CD274") p + stat_compare_means(label.x = 1.5, label.y = 4) #MSI# library("RColorBrewer") colnames(data)[5] <- "MSI" compare_means(MSI ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = MSI, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("MSI") p + stat_compare_means(label.x = 1.5, label.y = 1.5) #IFNG# library("RColorBrewer") compare_means(IFNG ~ sample, data = data) #基因,样本,数据 p <- ggplot(data, aes(x = sample, y = IFNG, fill = sample)) + geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + theme_classic() + geom_boxplot(width = 0.1, fill = "white") + theme(text = element_text(size = 18), axis.text.x = element_text(size = 18), plot.title = element_text(hjust = 0.5), legend.position = c(0.15,0.9)) + xlab("sample") + ylab("IFNG") p + stat_compare_means(label.x = 1.5, label.y = 4) ####药敏oncoppredict预测#### #TCGA# source("CALCPHENOTYPE.R") library(sva) library(preprocessCore) library(ridge) library(glmnet) library(car) library(tidyverse) library(impute) library(ggplot2) library(cowplot) Sys.setenv(LANGUAGE = "en") #显示英文报错信息 options(stringsAsFactors = FALSE) #禁止chr转成factor #表达矩阵 load(file = "data/TCGA_KIRC_mRNA.Rdata") dat <- expr_TCGA_TPM_01A #临床数据 TCGA_id <- colnames(expr_TCGA_TPM_01A) med=median(as.numeric(expr_TCGA_TPM_01A["TTC13",])) expr_TCGA_TPM_01A <- t(expr_TCGA_TPM_01A) expr_TCGA_TPM_01A <- as.data.frame(expr_TCGA_TPM_01A) sample <- ifelse(expr_TCGA_TPM_01A$TTC13 > med, "TTC13_high","TTC13_low") sample <- factor(sample,levels = c("TTC13_high","TTC13_low")) #factor的目的是把normal排在前面 metadata <- data.frame(TCGA_id,sample) rownames(metadata) <- metadata$TCGA_id sameSample=intersect(colnames(dat),rownames(metadata)) dat=dat[,sameSample] rt=metadata[sameSample,] ann <- rt$sample ann <- as.data.frame(ann) rownames(ann) <- rownames(rt) colnames(ann)[1] <- "ImmClust" table(ann$ImmClust) # 构建测试集数据 testingExprData <- as.matrix(dat[,rownames(ann)]) # 准备数据1:细胞系表达 trainingExprData <- readRDS(file='GDSC2_Expr_short.rds') # 数据类型为矩阵,细胞系表达是行为基因,列为细胞系,入值为表达谱的矩阵,表达谱需做log2标准化 # 准备数据2:细胞系对应药敏 trainingPtype <- readRDS(file = "GDSC2_Res.rds") # 数据类型为矩阵,细胞系药敏是行为细胞系,列为药物,入值为药物敏感性的矩阵,数据一般存在NA,需要过滤或进行填补 trainingPtype <- trainingPtype[,apply(trainingPtype, 2, function(x) {sum(is.na(x)) < 0.2 * nrow(trainingPtype)})] # 去除在超过20%样本中都缺失的药物 trainingPtype <- trainingPtype[apply(trainingPtype, 1, function(x) {sum(is.na(x)) < 0.2 * ncol(trainingPtype)}),] # 去除在超过20%药物中都缺失的样本 trainingPtype <- t(impute.knn(t(trainingPtype))$data) # KNN填补缺失值 trainingPtype <- exp(trainingPtype) # 这里根据数据特点做指数化,因为原始数据是经过取对数的,但CALCPHENOTYPE.R会做一步power transformation,因此避免重复,这里还原数据规模 batchCorrect <- "eb" # 但我的测试集是标准化的RNA-seq,和芯片信号强度类似,所以依然选择eb # 是否进行power transform选项:默认进行 powerTransformPhenotype <- TRUE # 低变异基因过滤阈值选项:默认为0.2 removeLowVaryingGenes <- 0.2 # 在过滤基因时候采用的数据类型选项:'homogenizeData' (批次效应消除后的数据)或 'rawData'(原始数据) removeLowVaringGenesFrom <- "homogenizeData" # 确定最小训练样本数目(一般不存在这个问题因为GDSC,CTRP,PRISM都有很多细胞系) minNumSamples <- 10 # 确定如何处理重复基因选项: -1是询问用户, 1 取均值, 以及 2 移除重复 selection <- 1 # 确定是否打印结果选项:默认进行 printOutput <- FALSE # 这里我不进行,免得最终markdown文件很长,自己运行的时候可以改为TRUE看到进度 # 确定是否采用PCA对基因表达进行降维选项:默认不进行 # 注意,如果选项report_pca = TRUE则上述选项必须为TRUE pcr <- FALSE report_pc <- FALSE # 确定是否需要列出相关性结果来确定潜在的药物biomarker(个人觉得没有必要,药物靶标的搜寻可以参考FigureYa212drugTarget,这里的做法过于“简单粗暴”) cc <- FALSE # 确定是否要输入模型的拟合优度值R^2(细胞系训练集的模型回代到训练集的预测结果之间的吻合程度) rsq <- FALSE # 想虐一下自己看看模型准确度的可以改为TRUE # 确定在pcr=TRUE时,主成份分析需要达到的解释度,默认80,即所得到的主成份的解释度相加要大于80% percent <- 80 # 构建模型并预测,该函数会在当前目录下生成calcPhenotype_Output,并且将药敏预测结果输入该目录,名为:DrugPredictions.csv calcPhenotype(trainingExprData = as.matrix(trainingExprData), # 函数会自己匹配细胞系表达和药敏的共有样本 trainingPtype = trainingPtype, # 但如果要选用特殊的细胞系类型,比如“生殖系统”、“消化系统”等,需要提前对细胞系做预筛选,函数不提供该操作 testExprData = as.matrix(testingExprData), batchCorrect = batchCorrect, powerTransformPhenotype = powerTransformPhenotype, removeLowVaryingGenes = removeLowVaryingGenes, minNumSamples = minNumSamples, selection = selection, printOutput = printOutput, pcr = pcr, removeLowVaringGenesFrom = removeLowVaringGenesFrom, report_pc = report_pc, cc = cc, percent = percent, rsq = rsq) drugsen <- read.csv("./calcPhenotype_Output/DrugPredictions.csv",row.names = 1,check.names = F,stringsAsFactors = F,header = T) outTab <- NULL for (drug in colnames(drugsen)) { tmp <- data.frame(sen = drugsen[,drug], group = ann[rownames(drugsen),"ImmClust"], row.names = rownames(drugsen), stringsAsFactors = F) avg <- as.numeric(by(tmp$sen, tmp$group, mean)) wt <- wilcox.test(sen~group,data = tmp) outTab <- rbind.data.frame(outTab, data.frame(drug = drug, avg1 = avg[1], avg2 = avg[2], p = wt$p.value, stringsAsFactors = F), stringsAsFactors = F) } outTab$fc <- outTab$avg2/outTab$avg1 outTab$log2fc <- log2(outTab$fc) # 把统计结果输出到文件 write.table(outTab,"output_wilcox test for potential drugs.txt",sep = "\t",row.names = F,col.names = T,quote = F) # 设置颜色 jco <- c("#EABF00", "#2874C5") plotp <- list() GCP.drug <- outTab[which(outTab$p < 0.05 & outTab$fc > 0),"drug"] # 这是我随便选取的阈值,控制药物数目 for (drug in GCP.drug) { predictedBoxdat <- data.frame("response" = drugsen[rownames(ann),drug], "Group" = ann$ImmClust, row.names = rownames(ann)) # 绘图 my_comparisons <- list(c("TTC13_high", "TTC13_low")) p <- ggplot(data = predictedBoxdat, aes(x=Group, y=response)) p <- p + geom_boxplot(aes(fill = Group)) + scale_fill_manual(values = jco[1:length(unique(predictedBoxdat$Group))]) + #自定义box的配色 theme_classic() + theme(legend.position="none") + # 倾斜字体 theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 12), plot.title = element_text(size = 12, hjust = 0.5)) + xlab("") + ylab("Drug sensitivity") + # 这里可能是AUC,可能是IC50,根据具体情况调整 ggtitle(drug)+stat_compare_means(comparisons = my_comparisons, method = "t.test") # 补上title # 每种药物的结果画到一个单独的文件里 ggsave(paste("boxplot of", drug, "sensitivity.pdf"), width = 4, height = 5) plotp[[drug]] <- p # 保存在列表里供合并图片用 } # 合并图片 p <- plot_grid(plotlist = plotp,nrow = 2) # title可以AI下拉到合适位置,就如例文所示 p ggsave("boxplot of predicted drug sensitivity.pdf", width = 6, height = 5) ####泛癌分析#### #单基因泛癌表达矩阵# library(limma) gene="TTC13" files=dir() files=grep("^symbol.", files, value=T) geneList=list() for(i in files){ CancerType=gsub("symbol\\.|\\.txt", "", i) rt=read.table(i, header=T, sep="\t", check.names=F) geneList[[CancerType]]=as.vector(rt[,1]) } interGenes=Reduce(intersect, geneList) outTab=data.frame() allTab=data.frame() j=0 for(i in files){ CancerType=gsub("symbol\\.|\\.txt", "", i) rt=read.table(i, header=T, sep="\t", check.names=F) rt=as.matrix(rt) rownames(rt)=rt[,1] exp=rt[,2:ncol(rt)] dimnames=list(rownames(exp), colnames(exp)) data=matrix(as.numeric(as.matrix(exp)), nrow=nrow(exp), dimnames=dimnames) data=t(avereps(data)) row.names(data)=gsub(".$", "", row.names(data)) data=t(avereps(data)) group=sapply(strsplit(colnames(data),"\\-"), "[", 4) group=sapply(strsplit(group,""), "[", 1) Type=ifelse(group==0, "Tumor", "Normal") geneExp=t(data[gene,,drop=F]) outTab=rbind(outTab, cbind(geneExp,Type,CancerType)) j=j+1 if(j==1){ allTab=data[interGenes,group==0] }else{ allTab=cbind(allTab, data[interGenes,group==0]) } } #输出单基因的表达文件 out=cbind(ID=row.names(outTab), outTab) write.table(out, file="geneExp.txt", sep="\t", quote=F, row.names=F) #输出所有肿瘤的表达文件 allTab=allTab[rowMeans(allTab)>0.1,] allTab=cbind(ID=row.names(allTab), allTab) write.table(allTab, file="merge.txt", sep="\t", quote=F, row.names=F) ####TMB雷达图#### library(fmsb) expFile="geneExp.txt" tmbFile="TMB.txt" col="red" exp=read.table(expFile, header=T, sep="\t", check.names=F, row.names=1) exp=exp[(exp[,"Type"]=="Tumor"),] TMB=read.table(tmbFile, header=T, sep="\t", check.names=F, row.names=1) TMB=as.matrix(TMB) row.names(TMB)=gsub(".$", "", row.names(TMB)) sameSample=intersect(row.names(TMB), row.names(exp)) TMB=TMB[sameSample,] exp=exp[sameSample,] outTab=data.frame() fmsbTab=data.frame() for(i in levels(factor(exp[,"CancerType"]))){ exp1=exp[(exp[,"CancerType"]==i),] TMB1=TMB[(TMB[,"CancerType"]==i),] x=as.numeric(TMB1[,1]) y=as.numeric(exp1[,1]) corT=cor.test(x,y,method="spearman") cor=corT$estimate pValue=corT$p.value sig=ifelse(pValue<0.001,"***",ifelse(pValue<0.01,"**",ifelse(pValue<0.05,"*"," "))) outTab=rbind(outTab, cbind(CancerType=i,cor=cor,pValue=pValue)) fmsbTab=rbind(fmsbTab, cbind(CancerType=paste0(i,sig),cor=cor)) } write.table(outTab,file="cor.result.txt",sep="\t",row.names=F,quote=F) write.table(t(fmsbTab),file="fmsbInput.txt",sep="\t",col.names=F,quote=F) data=read.table("fmsbInput.txt", header=T, sep="\t", check.names=F, row.names=1) maxValue=ceiling(max(abs(data))*10)/10 data=rbind(rep(maxValue,ncol(data)),rep(-maxValue,ncol(data)),data) pdf(file="radar.pdf", width=7, height=7) radarchart(data, axistype=1, title="Tumor mutation burden", pcol=col, plwd=2 , plty=1, cglcol="grey", cglty=1, caxislabels=seq(-maxValue,maxValue,maxValue/2), cglwd=1.2, axislabcol="blue", vlcex=0.8 ) dev.off() ####MSI雷达图#### library(fmsb) expFile="geneExp.txt" msiFile="MSI.txt" col="blue" exp=read.table(expFile, header=T, sep="\t", check.names=F, row.names=1) exp=exp[(exp[,"Type"]=="Tumor"),] MSI=read.table(msiFile, header=T, sep="\t", check.names=F, row.names=1) MSI=as.matrix(MSI) row.names(MSI)=gsub(".$", "", row.names(MSI)) sameSample=intersect(row.names(MSI), row.names(exp)) MSI=MSI[sameSample,] exp=exp[sameSample,] outTab=data.frame() fmsbTab=data.frame() for(i in levels(factor(exp[,"CancerType"]))){ exp1=exp[(exp[,"CancerType"]==i),] MSI1=MSI[(MSI[,"CancerType"]==i),] x=as.numeric(MSI1[,1]) y=as.numeric(exp1[,1]) corT=cor.test(x,y,method="spearman") cor=corT$estimate pValue=corT$p.value sig=ifelse(pValue<0.001,"***",ifelse(pValue<0.01,"**",ifelse(pValue<0.05,"*"," "))) outTab=rbind(outTab, cbind(CancerType=i,cor=cor,pValue=pValue)) fmsbTab=rbind(fmsbTab, cbind(CancerType=paste0(i,sig),cor=cor)) } write.table(outTab,file="cor.result.txt",sep="\t",row.names=F,quote=F) write.table(t(fmsbTab),file="fmsbInput.txt",sep="\t",col.names=F,quote=F) data=read.table("fmsbInput.txt", header=T, sep="\t", check.names=F, row.names=1) maxValue=ceiling(max(abs(data))*10)/10 data=rbind(rep(maxValue,ncol(data)),rep(-maxValue,ncol(data)),data) pdf(file="radar.pdf", width=7, height=7) radarchart(data, axistype=1, title="Microsatellite instability", pcol=col, plwd=2 , plty=1, cglcol="grey", cglty=1, caxislabels=seq(-maxValue,maxValue,maxValue/2), cglwd=1.2, axislabcol="blue", vlcex=0.8 ) dev.off() ####TNB雷达图#### library(dplyr) library(fmsb) expFile="geneExp.txt" msiFile="TNB.txt" col="green" exp=read.table(expFile, header=T, sep="\t", check.names=F, row.names=1) exp=exp[(exp[,"Type"]=="Tumor"),] TNB=read.table(msiFile, header=T, sep="\t", check.names=F, row.names=NULL) TNB <- TNB %>% distinct(id,.keep_all = T) rownames(TNB) <- TNB$id sameSample=intersect(row.names(TNB), row.names(exp)) TNB=TNB[sameSample,] exp=exp[sameSample,] TNB <- TNB[,-3] TNB$CancerType <- exp$CancerType TNB <- TNB[,-1] outTab=data.frame() fmsbTab=data.frame() for(i in levels(factor(exp[,"CancerType"]))){ exp1=exp[(exp[,"CancerType"]==i),] TNB1=TNB[(TNB[,"CancerType"]==i),] x=as.numeric(TNB1[,1]) y=as.numeric(exp1[,1]) corT=cor.test(x,y,method="spearman") cor=corT$estimate pValue=corT$p.value sig=ifelse(pValue<0.001,"***",ifelse(pValue<0.01,"**",ifelse(pValue<0.05,"*"," "))) outTab=rbind(outTab, cbind(CancerType=i,cor=cor,pValue=pValue)) fmsbTab=rbind(fmsbTab, cbind(CancerType=paste0(i,sig),cor=cor)) } write.table(outTab,file="cor.result.txt",sep="\t",row.names=F,quote=F) write.table(t(fmsbTab),file="fmsbInput.txt",sep="\t",col.names=F,quote=F) data=read.table("fmsbInput.txt", header=T, sep="\t", check.names=F, row.names=1) maxValue=ceiling(max(abs(data))*10)/10 data=rbind(rep(maxValue,ncol(data)),rep(-maxValue,ncol(data)),data) pdf(file="radar.pdf", width=7, height=7) radarchart(data, axistype=1, title="Microsatellite instability", pcol=col, plwd=2 , plty=1, cglcol="grey", cglty=1, caxislabels=seq(-maxValue,maxValue,maxValue/2), cglwd=1.2, axislabcol="blue", vlcex=0.8 ) dev.off()