####TCGA表达谱数据下载####
library(SummarizedExperiment)
library(TCGAbiolinks)
query <- GDCquery(project = "TCGA-KIRC",
                  data.category = "Transcriptome Profiling",
                  data.type = "Gene Expression Quantification",   
                  workflow.type = "STAR - Counts", 
                  legacy = FALSE)

#扩内存
memory.limit(1000000)
GDCdownload(query = query)
mydata=GDCprepare(query = query)
mydata2=as.data.frame(rowRanges(mydata))
geneexp <- assay(mydata,i = "tpm_unstrand")#tpm_unstrand fpkm_unstrand #unstranded
geneexp=as.data.frame(geneexp)
geneexp1=cbind(gene_type=mydata2$gene_type,gene_name=mydata2$gene_name,geneexp)
mRNA=geneexp1[geneexp1$gene_type=="protein_coding",]
mRNA=mRNA[,-1]
TPM <- mRNA[!duplicated(mRNA$gene_name),] 
rownames(TPM) <- TPM$gene_name
TPM <- TPM[,-1]
TPM <- TPM[,substr(colnames(TPM),14,16)%in% c("01A","11A")]

###判断是否log2了
# 自动log(x)化
ex <- TPM
qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <- (qx[5] > 100) ||
  (qx[6]-qx[1] > 50 && qx[2] > 0) ||
  (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)

if (LogC) { 
  ex[which(ex < 0,arr.ind=T)] <- NaN #将负数定义为NaN
  exprSet <- log2(ex+1)  #log2(x+1)
  print("log2 transform finished")
}else{
  print("log2 transform not needed")
}
# 去除NaN
exprSet[which(exprSet == "NaN",arr.ind=T)]
expr_TCGA_TPM <- as.data.frame(exprSet)

tumor <- colnames(expr_TCGA_TPM)[substr(colnames(expr_TCGA_TPM),14,16) == "01A"] #533
normal <- colnames(expr_TCGA_TPM)[substr(colnames(expr_TCGA_TPM),14,16) == "11A"] #72
expr_TCGA_TPM <- expr_TCGA_TPM[,c(normal,tumor)]
expr_TCGA_TPM_01A <- expr_TCGA_TPM[,tumor]
expr_TCGA_TPM_11A <- expr_TCGA_TPM[,normal]
save(expr_TCGA_TPM,expr_TCGA_TPM_01A,expr_TCGA_TPM_11A,file="data/TCGA_KIRC_mRNA.Rdata")

#完结#




####TCGA临床数据下载####
rm(list = ls())
library(dplyr)
library(tibble)
library(tidyr)
library(TCGAbiolinks)
options(stringsAsFactors = F)
## 1.临床信息下载
clinical <- GDCquery_clinic(project = "TCGA-KIRC", 
                            type = "clinical")

dput(colnames(clinical)) #神奇的操作，直接生成向量

clin_data <- clinical %>% 
  dplyr::select(patient_ID = submitter_id,
                vital_status,
                gender,
                age = age_at_diagnosis,
                Stage = ajcc_pathologic_stage,
                T_Stage = ajcc_pathologic_t ,
                N_Stage = ajcc_pathologic_n,
                M_Stage = ajcc_pathologic_m,
                days_to_death,
                days_to_last_follow_up) 

clin_data$age <- (as.numeric(clin_data$age))/365
clin_data$days_to_death <- as.numeric(clin_data$days_to_death)
clin_data$days_to_last_follow_up <- as.numeric(clin_data$days_to_last_follow_up)

#添加时间和状态，删除这两列为空的值
rt <- clin_data %>%
  mutate(time = coalesce(days_to_death, days_to_last_follow_up)/365,
         status = ifelse(vital_status == 'Alive', 0, 1)) %>%
  dplyr::filter(time > 0) %>%
  dplyr::filter(age > 0) %>%
  distinct() %>%
  dplyr::select(- c("days_to_death", "days_to_last_follow_up")) 

rt <- rt[-which(rt$N_Stage == "NX"),] 
rt <- rt[-which(rt$M_Stage == "MX"),] 

rt$gender <- factor(rt$gender,labels=c("female", "male"))

rt$Stage <- if_else(rt$Stage %in% c("Stage IV"),"Stage IV",
                    if_else(rt$Stage %in% c("Stage III"),"Stage III",
                            if_else(rt$Stage %in% c("Stage II"),"Stage II",
                                    "Stage I")))
rt$Stage <- factor(rt$Stage,labels=c("Stage I", "Stage II", "Stage III", "Stage IV"))

rt$T_Stage <- if_else(rt$T_Stage %in% c("T1","T1a","T1b"),"T1",
                      if_else(rt$T_Stage %in% c("T2","T2a","T2b"),"T2",
                              if_else(rt$T_Stage%in% c("T3","T3a","T3b","T3c"),"T3","T4")))
rt$T_Stage <- factor(rt$T_Stage,labels=c("T1", "T2", "T3", "T4"))

rt$N_Stage <- factor(rt$N_Stage,labels=c("N0", "N1"))

rt$M_Stage <- factor(rt$M_Stage,labels=c("M0", "M1"))

TCGA_clinical <- rt
save(TCGA_clinical,file = "data/TCGA_KIRC_clinic.Rdata")

#完结#





####TTC13非配对样本表达量####
load(file = "data/TCGA_KIRC_mRNA.Rdata")
exprSet <- expr_TCGA_TPM["TTC13",]

TCGA_id <- colnames(exprSet)   
sample <- ifelse(substring(TCGA_id,14,15)=="01","Tumor","Normal")
sample <- factor(sample,levels = c("Normal","Tumor"))    #factor的目的是把normal排在前面
metadata <- data.frame(TCGA_id,sample) 

exprSet <-t(exprSet)    
exprSet <-as.data.frame(exprSet)

exprSet <- cbind(TCGA_id=rownames(exprSet),exprSet)    #行名不能直接变为第一列，必须cbind
exprSet <- merge(metadata,exprSet,by="TCGA_id")

#画图
library(ggplot2) 
library(ggpubr)
#设置p的比较组
compare_means(TTC13 ~ sample, data = exprSet)   #基因，样本，数据

p <- ggboxplot(exprSet, x = "sample", y = "TTC13",
               color = "sample", palette = "jco",
               add = "jitter")+ylab("The expression of TTC13")

p + stat_compare_means( aes(label = ..p.signif..), 
                        label.x = 1.5, label.y = 6)




####TTC13配对样本表达量####
library(limma)
load(file = "data/TCGA_KIRC_mRNA.Rdata")
exprSet <- expr_TCGA_TPM["TTC13",]

TCGA_id <- colnames(exprSet)   
sample <- ifelse(substring(TCGA_id,14,15)=="01","Tumor","Normal")
sample <- factor(sample,levels = c("Normal","Tumor"))    #factor的目的是把normal排在前面
metadata <- data.frame(TCGA_id,sample) 

exprSet <-t(exprSet)    
exprSet <-as.data.frame(exprSet)

exprSet <- cbind(TCGA_id=rownames(exprSet),exprSet)    #行名不能直接变为第一列，必须cbind
exprSet <- merge(metadata,exprSet,by="TCGA_id")
geneName="TTC13"

#提取配对样品的数据
rownames(exprSet) <- exprSet$TCGA_id
normalData=exprSet[exprSet$sample=="Normal",3,drop=F]
normalData=as.matrix(normalData)
rownames(normalData)=gsub("(.*?)\\-(.*?)\\-(.*?)\\-(.*?)\\-.*", "\\1\\-\\2\\-\\3", rownames(normalData))
normalData=avereps(normalData)
tumorData=exprSet[exprSet$sample=="Tumor",3,drop=F]
tumorData=as.matrix(tumorData)
rownames(tumorData)=gsub("(.*?)\\-(.*?)\\-(.*?)\\-(.*?)\\-.*", "\\1\\-\\2\\-\\3", rownames(tumorData))
tumorData=avereps(tumorData)
sameSample=intersect(row.names(normalData), row.names(tumorData))
data=cbind(normalData[sameSample,,drop=F], tumorData[sameSample,,drop=F])
colnames(data)=c("Normal", "Tumor")
data=as.data.frame(data)

library(ggplot2) 
library(ggpubr)
#绘制配对差异分析的图形
ggpaired(data, cond1="Normal", cond2="Tumor", fill="condition",
         xlab="", ylab=paste0(geneName, " expression"),
         legend.title="Type",
         color = "condition", line.color = "gray", line.size = 0.4,
         palette = "jco")+
  stat_compare_means(paired = TRUE, symnum.args=list(cutpoints = c(0, 0.001, 0.01, 0.05, 1), symbols = c("***", "**", "*", "ns")),label = "p.signif",label.x = 1.35)
dev.off()







####预后KM####
load(file = "data/TCGA_KIRC_mRNA.Rdata")
load(file = "data/TCGA_KIRC_clinic.Rdata")

colnames(expr_TCGA_TPM_01A) <- substr(colnames(expr_TCGA_TPM_01A),1,12)
expr_TCGA_TPM_01A <- expr_TCGA_TPM_01A["TTC13",]
rownames(TCGA_clinical) <- TCGA_clinical$patient_ID

comgene <- intersect(rownames(TCGA_clinical),colnames(expr_TCGA_TPM_01A))   
TCGA_clinical <- TCGA_clinical[comgene,]  
expr_TCGA_TPM_01A <- expr_TCGA_TPM_01A[,comgene]

expr_TCGA_TPM_01A <- as.data.frame(expr_TCGA_TPM_01A)
expr_TCGA_TPM_01A <- t(expr_TCGA_TPM_01A)

rt <- cbind(expr_TCGA_TPM_01A,TCGA_clinical)
save(rt,file = "data/TTC13_clinic.Rdata")

library(survival)
library(survminer)
res.cut <- surv_cutpoint(rt,
                         time ="time",
                         event = "status",
                         variables = "TTC13",
                         minprop = 0.3)
res.cut
cutoff=res.cut$cutpoint$cutpoint
rt$TTC13 <- ifelse(rt$TTC13 > cutoff, "TTC13_high","TTC13_low")
rt <- as.data.frame(rt)

#分组
fit <- survfit(Surv(time, status) ~ TTC13, data = rt)
p1  <-  ggsurvplot(fit,
                   legend.title = "",#定义图例的名称
                   legend.labs = c("TTC13_high","TTC13_low"),#一定要和table的顺序一致
                   #legend = "top",#图例位置
                   pval = T,
                   pval.method = TRUE,#添加p值的检验方法
                   #conf.int = TRUE,#添加置信区间
                   risk.table = TRUE, #在图下方添加风险表
                   risk.table.col = "strata", #根据数据分组为风险表添加颜色
                   risk.table.y.text = F,#风险表Y轴是否显示分组的名称,F为以线条展示分组
                   #linetype = "strata", #改变不同组别的生存曲线的线型
                   #surv.median.line = "hv", #标注出中位生存时间
                   xlab = "Time in years", #x轴标题
                   xlim = c(0,max(rt$time)), #展示x轴的范围
                   ylab = "Overall survival rate",
                   break.time.by = 1, #x轴间隔
                   size = 1, #线条大小
                   #ggtheme = theme_bw(), #为图形添加网格
                   palette = "jco"#图形颜色风格
) 

#看一下图
p1  






####ROC曲线####

library(ROCR)  
library(glmnet) 
library(caret)
library(timeROC)
library(survival)

load(file = "data/TTC13_clinic.Rdata")
time_roc_res <- timeROC(
  T=rt$time,   #结局时间
  delta=rt$status,   #结局指标
  marker=rt$TTC13, 
  cause = 1,
  weighting="marginal",
  times = c(1, 3, 5),      #改时间年限
  ROC = TRUE,
  iid = TRUE
)

time_roc_res$AUC
confint(time_roc_res, level = 0.95)$CI_AUC
time_ROC_df <- data.frame(
  TP_1year = time_roc_res$TP[, 1],
  FP_1year = time_roc_res$FP[, 1],
  TP_2year = time_roc_res$TP[, 2],
  FP_2year = time_roc_res$FP[, 2],
  TP_3year = time_roc_res$TP[, 3],
  FP_3year = time_roc_res$FP[, 3]
)
ggplot(data = time_ROC_df) +
  geom_line(aes(x = FP_1year, y = TP_1year), size = 1, color = "#BC3C29FF") +
  geom_line(aes(x = FP_2year, y = TP_2year), size = 1, color = "#0072B5FF") +
  geom_line(aes(x = FP_3year, y = TP_3year), size = 1, color = "#E18727FF") +
  geom_abline(slope = 1, intercept = 0, color = "grey", size = 1, linetype = 2) +
  theme_bw() +
  annotate("text",
           x = 0.75, y = 0.25, size = 4.5,
           label = paste0("AUC at 1 years = ", sprintf("%.3f", time_roc_res$AUC[[1]])), color = "#BC3C29FF"  #改名字
  ) +
  annotate("text",
           x = 0.75, y = 0.15, size = 4.5,
           label = paste0("AUC at 3 years = ", sprintf("%.3f", time_roc_res$AUC[[2]])), color = "#0072B5FF"  #改名字
  ) +
  annotate("text",
           x = 0.75, y = 0.05, size = 4.5,
           label = paste0("AUC at 5 years = ", sprintf("%.3f", time_roc_res$AUC[[3]])), color = "#E18727FF"  #改名字
  ) +
  labs(x = "False positive rate", y = "True positive rate") +
  theme(
    axis.text = element_text(face = "bold", size = 11, color = "black"),
    axis.title.x = element_text(face = "bold", size = 14, color = "black", margin = margin(c(15, 0, 0, 0))),
    axis.title.y = element_text(face = "bold", size = 14, color = "black", margin = margin(c(0, 15, 0, 0)))
  )

#完毕#








####TTC13与临床特征相关性####

load(file = "data/TTC13_clinic.Rdata")
#stage
rt$Stage <- if_else(rt$Stage %in% c("Stage I","Stage II"),"Stage I-II", "Stage III-IV")
rt$Stage <- factor(rt$Stage,labels=c("Stage I-II", "Stage III-IV"))

compare_means(TTC13 ~ Stage,  data = rt, method = "anova")

p <- ggboxplot(rt, x = "Stage", y = "TTC13",
               color = "Stage", palette = "jco",
               add = "jitter")+ylab("The expression of TTC13")

p + stat_compare_means(label.x = 1.5, label.y = 6)



#T_Stage
rt$T_Stage <- if_else(rt$T_Stage %in% c("T1","T2"),"T1-2", "T3-4")
rt$T_Stage <- factor(rt$T_Stage,labels=c("T1-2", "T3-4"))

compare_means(TTC13 ~ T_Stage,  data = rt, method = "anova")

p <- ggboxplot(rt, x = "T_Stage", y = "TTC13",
               color = "T_Stage", palette = "jco",
               add = "jitter")+ylab("The expression of TTC13")

p + stat_compare_means(label.x = 1.5, label.y = 6)



#N_Stage
compare_means(TTC13 ~ N_Stage,  data = rt, method = "anova")

p <- ggboxplot(rt, x = "N_Stage", y = "TTC13",
               color = "N_Stage", palette = "jco",
               add = "jitter")+ylab("The expression of TTC13")

p + stat_compare_means(label.x = 1.5, label.y = 6)



#M_Stage
compare_means(TTC13 ~ M_Stage,  data = rt, method = "anova")

p <- ggboxplot(rt, x = "M_Stage", y = "TTC13",
               color = "M_Stage", palette = "jco",
               add = "jitter")+ylab("The expression of TTC13")

p + stat_compare_means(label.x = 1.5, label.y = 6)



#Age
rt$age <- ifelse(rt$age >=65, ">=65","<65")
rt$age<-factor(rt$age,levels=c("<65",">=65")) 

compare_means(TTC13 ~ age,  data = rt, method = "anova")

p <- ggboxplot(rt, x = "age", y = "TTC13",
               color = "age", palette = "jco",
               add = "jitter")+ylab("The expression of TTC13")

p + stat_compare_means(label.x = 1.5, label.y = 6)



#gender
compare_means(TTC13 ~ gender,  data = rt, method = "anova")

p <- ggboxplot(rt, x = "gender", y = "TTC13",
               color = "gender", palette = "jco",
               add = "jitter")+ylab("The expression of TTC13")

p + stat_compare_means(label.x = 1.5, label.y = 6)








####GSEA软件版数据准备####

load(file = "data/TCGA_KIRC_mRNA.Rdata")
gene <- "TTC13"       
med=median(as.numeric(expr_TCGA_TPM_01A[gene,]))    #取ITGA3表达的中位数
conditions=data.frame(sample=colnames(expr_TCGA_TPM_01A),
                      group=factor(ifelse(expr_TCGA_TPM_01A[gene,]>med,"high","low"),levels = c("low","high"))) %>% 
  column_to_rownames("sample")

high <- rownames(conditions)[conditions$group == "high"] #266
low <- rownames(conditions)[conditions$group == "low"] #267
expr_TCGA_TPM_01A <- expr_TCGA_TPM_01A[,c(high,low)]

### board 桌面版本
### 从R语言对接桌面版本
### 需要三个文件
### 表达量gct，表型cls，基因集gmt
exprSet = expr_TCGA_TPM_01A
exprSet <- cbind(gene_id = rownames(exprSet), description = "na", exprSet)

## 创建gct文件
gct_file = paste0("my.gct")
sink(gct_file)
cat("#1.2\n")
cat(paste0(nrow(exprSet), "\t", (length(exprSet) - 2), "\n"))
sink()
write.table(exprSet, gct_file, append = T, quote = F, row.names = F, col.names = T,
            sep = "\t")

# 创建cls文件
group_list <- c(rep("high",266),rep("low",267)) 
cls_file = paste0("my.cls")
sink(cls_file)
cat(paste0(length(group_list), "\t", length(unique(group_list)), "\t1\n"))
cat(paste0("#", paste(unique(group_list), collapse = "\t"), "\n"))
cat(paste(group_list, collapse = "\t"), "\n")
sink()







####单/多因素分析####
load(file = "data/TTC13_clinic.Rdata")
rt <- rt %>% 
  select(c("time","status",
           "age","gender","Stage","T_Stage","N_Stage","M_Stage","TTC13"))

#单因素Cox回归
#把因素变为数字
rt$T_Stage <- if_else(rt$T_Stage =="T4",4,
                      if_else(rt$T_Stage =="T3",3,
                              if_else(rt$T_Stage =="T2",2,1)))
rt$gender <- if_else(rt$gender == "female",0,1)
rt$Stage <- if_else(rt$Stage == "Stage IV",4,
                    if_else(rt$Stage == "Stage III",3,
                            if_else(rt$Stage == "Stage II",2,1)))
rt$N_Stage <- if_else(rt$N_Stage == "N1",2,1)
rt$M_Stage <- if_else(rt$M_Stage == "M0",0,1)

#假设我们要对如下5个特征做单因素cox回归分析
covariates <- c("gender","age","Stage","T_Stage","N_Stage","M_Stage","TTC13")
#分别对每一个变量，构建生存分析的公式
univ_formulas <- 
  sapply(covariates,
         function(x) 
           as.formula(paste('Surv(time, status)~', x)))   #注意更改time和OS的名字

#循环对每一个特征做cox回归分析
univ_models <- lapply( univ_formulas, function(x){coxph(x, data = rt)})   #注意更改数据

univ_results <- lapply(univ_models,
                       function(x){ 
                         x <- summary(x)
                         #获取p值
                         p.value<-signif(x$wald["pvalue"], digits=4)
                         #获取HR
                         HR <-signif(x$coef[2], digits=4);
                         #获取95%置信区间
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 4)
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],4)
                         lower <- HR.confint.lower
                         upper <- HR.confint.upper
                         HR <- HR
                         res<-c(p.value,HR,lower,upper)
                         names(res)<-c("pvalue","HR","lower","upper")
                         return(res)
                       })
#转换成数据框，并转置
res <- t(as.data.frame(univ_results, check.names = FALSE))
as.data.frame(res)
write.table(file="output/TCC13_univariate_cox_result.txt",as.data.frame(res),quote=F,sep="\t")

library(forestploter)
#读取单因素结果
dt <- data.table::fread("output/TCC13_univariate_cox_result.txt",data.table = F)   
dt$` ` <- paste(rep(" ", 20), collapse = " ")
dt$HR = round(dt$HR,4)
dt$pvalue = round(dt$pvalue,4)
dt$lower = round(dt$lower,4)
dt$upper = round(dt$upper,4)

dt$`HR (95% CI)` <- ifelse(is.na(dt$HR), "",
                           sprintf("%.3f (%.3f to %.3f)",
                                   dt$HR, dt$lower, dt$upper))
tm <- forest_theme(base_size = 10,
                   # 置信区间点形状，线类型/颜色/宽度
                   ci_pch = 16,  # 点形状
                   ci_col = "red",
                   ci_lty = 1, # 线条类型
                   ci_lwd = 1.5, # 线条宽度
                   ci_Theight = 0.4, # 在CI的末尾设置T形尾部
                   # 参考线宽/类型/颜色
                   refline_lwd = 1,
                   refline_lty = "dashed",
                   refline_col = "grey20",
                   # 垂直的线宽/类型/颜色
                   vertline_lwd = 1,
                   vertline_lty = "dashed",
                   vertline_col = "grey20",
                   # 更改填充和边框的摘要颜色
                   summary_fill = "red",
                   summary_col = "red",
                   # 脚注字体大小/字形/颜色
                   footnote_cex = 0.6,
                   footnote_fontface = "italic",
                   footnote_col = "red")

plot <- forest(dt[, c(1,2,6,7)],
               est = dt$HR,
               lower = dt$lower,
               upper = dt$upper,
               ci_column = 3,theme = tm,ticks_at = c(0, 1, 5, 10,15,20,25,30),
               ref_line = 1)#默认是1
plot




#TCGA多因素cox回归分析#
res.cox <- coxph(Surv(time, status) ~  age + Stage + T_Stage + 
                   N_Stage + M_Stage + TTC13, data =  rt)
x <- summary(res.cox)
pvalue=signif(as.matrix(x$coefficients)[,5],4)
HR=signif(as.matrix(x$coefficients)[,2],4)
low=signif(x$conf.int[,3],4)
high=signif(x$conf.int[,4],4)
multi_res=data.frame(pvalue=pvalue,
                     HR=HR,
                     lower=low,
                     upper=high,
                     stringsAsFactors = F
)
multi_res
write.table(file="output/TTC13multivariate_cox_result.txt",multi_res,quote=F,sep="\t")

##画图
library(forestploter)
#读取多因素结果
dt <- data.table::fread("output/TTC13multivariate_cox_result.txt",data.table = F) 

dt$` ` <- paste(rep(" ", 20), collapse = " ")

dt$HR = round(dt$HR,4)
dt$pvalue = round(dt$pvalue,4)
dt$lower = round(dt$lower,4)
dt$upper = round(dt$upper,4)

dt$`HR (95% CI)` <- ifelse(is.na(dt$HR), "",
                           sprintf("%.3f (%.3f to %.3f)",
                                   dt$HR, dt$lower, dt$upper))

tm <- forest_theme(base_size = 10,
                   # 置信区间点形状，线类型/颜色/宽度
                   ci_pch = 16,  # 点形状
                   ci_col = "red",
                   ci_lty = 1, # 线条类型
                   ci_lwd = 1.5, # 线条宽度
                   ci_Theight = 0.4, # 在CI的末尾设置T形尾部
                   # 参考线宽/类型/颜色
                   refline_lwd = 1,
                   refline_lty = "dashed",
                   refline_col = "grey20",
                   # 垂直的线宽/类型/颜色
                   vertline_lwd = 1,
                   vertline_lty = "dashed",
                   vertline_col = "grey20",
                   # 更改填充和边框的摘要颜色
                   summary_fill = "red",
                   summary_col = "red",
                   # 脚注字体大小/字形/颜色
                   footnote_cex = 0.6,
                   footnote_fontface = "italic",
                   footnote_col = "red")

plot <- forest(dt[, c(1,2,6,7)],
               est = dt$HR,
               lower = dt$lower,
               upper = dt$upper,
               ci_column = 3,theme = tm,ticks_at = c(0, 1, 5, 10,15,20,25,30),
               ref_line = 1)#默认是1
plot




####nomcox####

library(survival)
library(regplot)
library(rms)

load(file = "data/TTC13_clinic.Rdata")
rt <- rt %>% 
  select(c("time","status",
           "age","gender","Stage","T_Stage","N_Stage","M_Stage","TTC13"))

nomcox=coxph(Surv(time, status) ~ . , data = rt)
regplot(nomcox,
        plots = c("bars", "boxes"),
        clickable=F,
        title=NULL,
        points=T,
        droplines=T,
        observation=rt[20,],
        rank="sd",
        failtime = c(1,3,5),
        prfail = F)


#校准曲线
pdf("calibration.pdf",10,8)
#1 year
mx1 <- cph(Surv(time, status) ~TTC13, x=T, y=T, surv=T, data=rt, time.inc=1)
cal1 <- calibrate(mx1, cmethod="KM", method="boot", u=1, m=(nrow(rt)/3), B=1000)
plot(cal1, xlim=c(0,1), ylim=c(0,1),
     xlab="Nomogram-predicted Overall survival (%)", ylab="Observed Overall survival (%)", lwd=1.5, col="green", sub=F)
#3 year
mx2 <- cph(Surv(time, status) ~ TTC13, x=T, y=T, surv=T, data=rt, time.inc=3)
cal2 <- calibrate(mx2, cmethod="KM", method="boot", u=2, m=(nrow(rt)/3), B=1000)
plot(cal2, xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", lwd=1.5, col="blue", sub=F, add=T)

#5 year
mx3 <- cph(Surv(time, status) ~TTC13, x=T, y=T, surv=T, data=rt, time.inc=5)
cal3 <- calibrate(mx3, cmethod="KM", method="boot", u=3, m=(nrow(rt)/3), B=1000)
plot(cal3, xlim=c(0,1), ylim=c(0,1), xlab="", ylab="",  lwd=1.5, col="red", sub=F, add=T)
legend('bottomright', c('1-year', '3-year', '5-year'),
       col=c("green","blue","red"), lwd=1.5, bty = 'n')
dev.off()



####免疫浸润####

library(e1071)
library(parallel)
library(preprocessCore) 
source("CIBERSORT.R")   #这边需要读取现成的R代码，在文件夹里

load(file = "data/TCGA_KIRC_mRNA.Rdata")
write.table(expr_TCGA_TPM_01A, file = "output/KIRC_mRNA_01A.txt",sep = "\t",row.names = T,col.names = NA,quote = F) 


sig_matrix <- "LM22.txt"   #免疫细胞的注释文件
mixture_file = 'output/KIRC_mRNA_01A.txt'   #肿瘤患者表达谱


res_cibersort <- CIBERSORT(sig_matrix, mixture_file, perm=100, QN=TRUE)  #此处运行时间较久，100是算的次数，越大，数据越准确
save(res_cibersort,file = "output/res_cibersort.Rdata")   #保存中间文件


library(limma)
library(reshape2)
library(ggpubr)
#加载cibersort数据
load("output/res_cibersort.Rdata")  
res_cibersort <- res_cibersort[,1:22]   
data <- res_cibersort[,colSums(res_cibersort) > 0]   
data <- as.data.frame(data)
risk <- t(expr_TCGA_TPM_01A)
sameSample=intersect(row.names(data), row.names(risk))
data=cbind(risk[sameSample,"TTC13",drop=F],data[sameSample,,drop=F])

corResult=data.frame()
for(i in colnames(data)){
  tdc <- cor.test(data[,"TTC13"], data[,i], method = "pearson")
  pvalue=tdc$p.value
  corResult=rbind(corResult,
                  cbind(gene=i,
                        cor= tdc$estimate,
                        Low95CI=tdc$conf.int[1],
                        High95CI=tdc$conf.int[2],
                        pvalue=tdc$p.value)
  )
}

outTab=corResult[as.numeric(corResult$pvalue)<0.05,]


#Plasma cells#
library(ggpubr)
ggscatter(data, x = "TTC13", y = "Plasma cells",
          color = "black", size =2, # Points color and size
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "gray"), # Customize regression line
          conf.int = TRUE, # Add confidence interval
          cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
          cor.coeff.args = list(method = "pearson"))



#T cells CD4 naive#
library(ggpubr)
ggscatter(data, x = "TTC13", y = "T cells CD4 naive",
          color = "black", size =2, # Points color and size
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "gray"), # Customize regression line
          conf.int = TRUE, # Add confidence interval
          cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
          cor.coeff.args = list(method = "pearson"))


#T cells CD4 memory resting#
library(ggpubr)
ggscatter(data, x = "TTC13", y = "T cells CD4 memory resting",
          color = "black", size =2, # Points color and size
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "gray"), # Customize regression line
          conf.int = TRUE, # Add confidence interval
          cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
          cor.coeff.args = list(method = "pearson"))


#T cells follicular helper#
library(ggpubr)
ggscatter(data, x = "TTC13", y = "T cells follicular helper",
          color = "black", size =2, # Points color and size
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "gray"), # Customize regression line
          conf.int = TRUE, # Add confidence interval
          cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
          cor.coeff.args = list(method = "pearson"))


#NK cells activated#
library(ggpubr)
ggscatter(data, x = "TTC13", y = "NK cells activated",
          color = "black", size =2, # Points color and size
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "gray"), # Customize regression line
          conf.int = TRUE, # Add confidence interval
          cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
          cor.coeff.args = list(method = "pearson"))


#Macrophages M0#
library(ggpubr)
ggscatter(data, x = "TTC13", y = "Macrophages M0",
          color = "black", size =2, # Points color and size
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "gray"), # Customize regression line
          conf.int = TRUE, # Add confidence interval
          cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
          cor.coeff.args = list(method = "pearson"))


#Macrophages M1#
library(ggpubr)
ggscatter(data, x = "TTC13", y = "Macrophages M1",
          color = "black", size =2, # Points color and size
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "gray"), # Customize regression line
          conf.int = TRUE, # Add confidence interval
          cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
          cor.coeff.args = list(method = "pearson"))


#Dendritic cells activated#
library(ggpubr)
ggscatter(data, x = "TTC13", y = "Dendritic cells activated",
          color = "black", size =2, # Points color and size
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "gray"), # Customize regression line
          conf.int = TRUE, # Add confidence interval
          cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
          cor.coeff.args = list(method = "pearson"))


#Mast cells resting#
library(ggpubr)
ggscatter(data, x = "TTC13", y = "Mast cells resting",
          color = "black", size =2, # Points color and size
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "gray"), # Customize regression line
          conf.int = TRUE, # Add confidence interval
          cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
          cor.coeff.args = list(method = "pearson"))


#Eosinophils#
library(ggpubr)
ggscatter(data, x = "TTC13", y = "Eosinophils",
          color = "black", size =2, # Points color and size
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "gray"), # Customize regression line
          conf.int = TRUE, # Add confidence interval
          cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
          cor.coeff.args = list(method = "pearson"))






####TIDE####
options(stringsAsFactors = FALSE) #禁止chr转成factor

#表达矩阵
load(file = "data/TCGA_KIRC_mRNA.Rdata")
dat <- expr_TCGA_TPM_01A

TCGA_id <- colnames(expr_TCGA_TPM_01A)   
med=median(as.numeric(expr_TCGA_TPM_01A["TTC13",])) 

expr_TCGA_TPM_01A <- t(expr_TCGA_TPM_01A)
expr_TCGA_TPM_01A <- as.data.frame(expr_TCGA_TPM_01A)

sample <- ifelse(expr_TCGA_TPM_01A$TTC13 > med, "TTC13_high","TTC13_low")
sample <- factor(sample,levels = c("TTC13_high","TTC13_low"))    #factor的目的是把normal排在前面
metadata <- data.frame(TCGA_id,sample) 
rownames(metadata) <- metadata$TCGA_id


#临床数据
sameSample=intersect(colnames(dat),rownames(metadata))
dat=dat[,sameSample]
metadata=metadata[sameSample,]

ann <- metadata$sample
ann <- as.data.frame(ann)
rownames(ann) <- rownames(metadata)
colnames(ann)[1] <- "ImmClust"

TIDE <- dat
# 这里为了得到比较好的结果，采用two direction median centered
TIDE <- sweep(TIDE,2, apply(TIDE,2,median,na.rm=T))
TIDE <- sweep(TIDE,1, apply(TIDE,1,median,na.rm=T))
write.table(TIDE,"data/TIDE_input.self_subtract",sep = "\t",row.names = T,col.names = NA,quote = F)

#------------------------------------#
# 请参照文件夹中的TIDE使用教程.docx完成该部分#


TIDE.res <- read.csv("TIDE_output.csv",header = T,row.names = 1,check.names = F,stringsAsFactors = F)
TIDE.res <- TIDE.res[rownames(metadata),]
data=cbind(TIDE.res, metadata)
#TIDE#
library("RColorBrewer")
compare_means(TIDE ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = TIDE, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("TIDE")

p + stat_compare_means(label.x = 1.5, label.y = 6)




#TIDE#
library("RColorBrewer")
compare_means(TIDE ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = TIDE, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("TIDE")

p + stat_compare_means(label.x = 1.5, label.y = 6)


#MDSC#
library("RColorBrewer")
compare_means(MDSC ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = MDSC, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("MDSC")

p + stat_compare_means(label.x = 1.5, label.y = 0.8)


#Exclusion#
library("RColorBrewer")
compare_means(Exclusion ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = Exclusion, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("Exclusion")

p + stat_compare_means(label.x = 1.5, label.y = 0.8)


#CD8#
library("RColorBrewer")
compare_means(CD8 ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = CD8, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("CD8")

p + stat_compare_means(label.x = 1.5, label.y = 0.8)



#Merck18#
library("RColorBrewer")
compare_means(Merck18 ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = Merck18, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("Merck18")

p + stat_compare_means(label.x = 1.5, label.y = 0.25)



#TAM M2#
library("RColorBrewer")
colnames(data)[14] <- "TAMM2"
compare_means(TAMM2 ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = TAMM2, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("TAM M2")

p + stat_compare_means(label.x = 1.5, label.y = 0.25)



#CAF#
library("RColorBrewer")
compare_means(CAF ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = CAF, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("CAF")

p + stat_compare_means(label.x = 1.5, label.y = 0.8)


#Dysfunction#
library("RColorBrewer")
compare_means(Dysfunction ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = Dysfunction, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("Dysfunction")

p + stat_compare_means(label.x = 1.5, label.y = 4)



#CD274#
library("RColorBrewer")
compare_means(CD274 ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = CD274, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("CD274")

p + stat_compare_means(label.x = 1.5, label.y = 4)



#MSI#
library("RColorBrewer")
colnames(data)[5] <- "MSI"
compare_means(MSI ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = MSI, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("MSI")

p + stat_compare_means(label.x = 1.5, label.y = 1.5)



#IFNG#
library("RColorBrewer")
compare_means(IFNG ~ sample, data = data)   #基因，样本，数据

p <- ggplot(data, aes(x = sample, y = IFNG, fill = sample)) +
  geom_violin(trim = F) + scale_fill_brewer(palette = "Dark2") + 
  theme_classic() + geom_boxplot(width = 0.1, fill = "white") + 
  theme(text = element_text(size = 18), 
        axis.text.x = element_text(size = 18), 
        plot.title = element_text(hjust = 0.5), 
        legend.position = c(0.15,0.9)) + 
  xlab("sample") +  ylab("IFNG")

p + stat_compare_means(label.x = 1.5, label.y = 4)






####药敏oncoppredict预测####
#TCGA#
source("CALCPHENOTYPE.R") 
library(sva)
library(preprocessCore)
library(ridge)
library(glmnet)
library(car)
library(tidyverse)
library(impute)
library(ggplot2)
library(cowplot)
Sys.setenv(LANGUAGE = "en") #显示英文报错信息
options(stringsAsFactors = FALSE) #禁止chr转成factor
#表达矩阵
load(file = "data/TCGA_KIRC_mRNA.Rdata")
dat <- expr_TCGA_TPM_01A

#临床数据
TCGA_id <- colnames(expr_TCGA_TPM_01A)   
med=median(as.numeric(expr_TCGA_TPM_01A["TTC13",])) 
expr_TCGA_TPM_01A <- t(expr_TCGA_TPM_01A)
expr_TCGA_TPM_01A <- as.data.frame(expr_TCGA_TPM_01A)
sample <- ifelse(expr_TCGA_TPM_01A$TTC13 > med, "TTC13_high","TTC13_low")
sample <- factor(sample,levels = c("TTC13_high","TTC13_low"))    #factor的目的是把normal排在前面
metadata <- data.frame(TCGA_id,sample) 
rownames(metadata) <- metadata$TCGA_id

sameSample=intersect(colnames(dat),rownames(metadata))
dat=dat[,sameSample]
rt=metadata[sameSample,]

ann <- rt$sample
ann <- as.data.frame(ann)
rownames(ann) <- rownames(rt)
colnames(ann)[1] <- "ImmClust"
table(ann$ImmClust)

# 构建测试集数据
testingExprData <- as.matrix(dat[,rownames(ann)])

# 准备数据1：细胞系表达
trainingExprData <- readRDS(file='GDSC2_Expr_short.rds') # 数据类型为矩阵，细胞系表达是行为基因，列为细胞系，入值为表达谱的矩阵，表达谱需做log2标准化

# 准备数据2：细胞系对应药敏
trainingPtype <- readRDS(file = "GDSC2_Res.rds") # 数据类型为矩阵，细胞系药敏是行为细胞系，列为药物，入值为药物敏感性的矩阵，数据一般存在NA，需要过滤或进行填补
trainingPtype <- trainingPtype[,apply(trainingPtype, 2, function(x) {sum(is.na(x)) < 0.2 * nrow(trainingPtype)})] # 去除在超过20%样本中都缺失的药物
trainingPtype <- trainingPtype[apply(trainingPtype, 1, function(x) {sum(is.na(x)) < 0.2 * ncol(trainingPtype)}),] # 去除在超过20%药物中都缺失的样本
trainingPtype <- t(impute.knn(t(trainingPtype))$data) # KNN填补缺失值
trainingPtype <- exp(trainingPtype) # 这里根据数据特点做指数化，因为原始数据是经过取对数的，但CALCPHENOTYPE.R会做一步power transformation，因此避免重复，这里还原数据规模

batchCorrect <- "eb" # 但我的测试集是标准化的RNA-seq，和芯片信号强度类似，所以依然选择eb

# 是否进行power transform选项：默认进行
powerTransformPhenotype <- TRUE

# 低变异基因过滤阈值选项：默认为0.2
removeLowVaryingGenes <- 0.2

# 在过滤基因时候采用的数据类型选项：'homogenizeData' （批次效应消除后的数据）或 'rawData'（原始数据）
removeLowVaringGenesFrom <- "homogenizeData"

# 确定最小训练样本数目（一般不存在这个问题因为GDSC，CTRP，PRISM都有很多细胞系）
minNumSamples <- 10

# 确定如何处理重复基因选项： -1是询问用户, 1 取均值, 以及 2 移除重复
selection <- 1

# 确定是否打印结果选项：默认进行
printOutput <- FALSE # 这里我不进行，免得最终markdown文件很长，自己运行的时候可以改为TRUE看到进度

# 确定是否采用PCA对基因表达进行降维选项：默认不进行
# 注意，如果选项report_pca = TRUE则上述选项必须为TRUE
pcr <- FALSE
report_pc <- FALSE

# 确定是否需要列出相关性结果来确定潜在的药物biomarker（个人觉得没有必要，药物靶标的搜寻可以参考FigureYa212drugTarget，这里的做法过于“简单粗暴”）
cc <- FALSE

# 确定是否要输入模型的拟合优度值R^2（细胞系训练集的模型回代到训练集的预测结果之间的吻合程度）
rsq <- FALSE # 想虐一下自己看看模型准确度的可以改为TRUE

# 确定在pcr=TRUE时，主成份分析需要达到的解释度，默认80，即所得到的主成份的解释度相加要大于80%
percent <- 80

# 构建模型并预测，该函数会在当前目录下生成calcPhenotype_Output，并且将药敏预测结果输入该目录，名为：DrugPredictions.csv
calcPhenotype(trainingExprData = as.matrix(trainingExprData), # 函数会自己匹配细胞系表达和药敏的共有样本
              trainingPtype = trainingPtype, # 但如果要选用特殊的细胞系类型，比如“生殖系统”、“消化系统”等，需要提前对细胞系做预筛选，函数不提供该操作
              testExprData = as.matrix(testingExprData),
              batchCorrect = batchCorrect,
              powerTransformPhenotype = powerTransformPhenotype,
              removeLowVaryingGenes = removeLowVaryingGenes,
              minNumSamples = minNumSamples,
              selection = selection,
              printOutput = printOutput,
              pcr = pcr,
              removeLowVaringGenesFrom = removeLowVaringGenesFrom,
              report_pc = report_pc,
              cc = cc,
              percent = percent,
              rsq = rsq)

drugsen <- read.csv("./calcPhenotype_Output/DrugPredictions.csv",row.names = 1,check.names = F,stringsAsFactors = F,header = T)
outTab <- NULL
for (drug in colnames(drugsen)) {
  tmp <- data.frame(sen = drugsen[,drug],
                    group = ann[rownames(drugsen),"ImmClust"],
                    row.names = rownames(drugsen),
                    stringsAsFactors = F)
  
  avg <- as.numeric(by(tmp$sen, tmp$group, mean))
  wt <- wilcox.test(sen~group,data = tmp)
  
  outTab <- rbind.data.frame(outTab,
                             data.frame(drug = drug,
                                        avg1 = avg[1],
                                        avg2 = avg[2],
                                        p = wt$p.value,
                                        stringsAsFactors = F),
                             stringsAsFactors = F)
}
outTab$fc <- outTab$avg2/outTab$avg1
outTab$log2fc <- log2(outTab$fc)

# 把统计结果输出到文件
write.table(outTab,"output_wilcox test for potential drugs.txt",sep = "\t",row.names = F,col.names = T,quote = F)

# 设置颜色
jco <- c("#EABF00", "#2874C5")

plotp <- list()
GCP.drug <- outTab[which(outTab$p < 0.05 & outTab$fc > 0),"drug"] # 这是我随便选取的阈值，控制药物数目

for (drug in GCP.drug) {
  
  predictedBoxdat <- data.frame("response" = drugsen[rownames(ann),drug],
                                "Group" = ann$ImmClust, 
                                row.names = rownames(ann)) 
  # 绘图
  my_comparisons <- list(c("TTC13_high", "TTC13_low"))
  p <- ggplot(data = predictedBoxdat, aes(x=Group, y=response))
  p <- p + geom_boxplot(aes(fill = Group)) + 
    scale_fill_manual(values = jco[1:length(unique(predictedBoxdat$Group))]) + #自定义box的配色
    theme_classic() +
    theme(legend.position="none") + # 倾斜字体
    theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 12), plot.title = element_text(size = 12, hjust = 0.5)) + 
    xlab("") + ylab("Drug sensitivity") + # 这里可能是AUC，可能是IC50，根据具体情况调整
    ggtitle(drug)+stat_compare_means(comparisons = my_comparisons, method = "t.test") # 补上title
  
  # 每种药物的结果画到一个单独的文件里
  ggsave(paste("boxplot of", drug, "sensitivity.pdf"), width = 4, height = 5)
  plotp[[drug]] <- p # 保存在列表里供合并图片用
}

# 合并图片
p <- plot_grid(plotlist = plotp,nrow = 2) # title可以AI下拉到合适位置，就如例文所示
p
ggsave("boxplot of predicted drug sensitivity.pdf", width = 6, height = 5)





####泛癌分析####
#单基因泛癌表达矩阵#

library(limma)      
gene="TTC13"      

files=dir()
files=grep("^symbol.", files, value=T)

geneList=list()
for(i in files){
  CancerType=gsub("symbol\\.|\\.txt", "", i)
  rt=read.table(i, header=T, sep="\t", check.names=F)
  geneList[[CancerType]]=as.vector(rt[,1])
}
interGenes=Reduce(intersect, geneList)

outTab=data.frame()
allTab=data.frame()
j=0
for(i in files){
  CancerType=gsub("symbol\\.|\\.txt", "", i)
  rt=read.table(i, header=T, sep="\t", check.names=F)
  
  rt=as.matrix(rt)
  rownames(rt)=rt[,1]
  exp=rt[,2:ncol(rt)]
  dimnames=list(rownames(exp), colnames(exp))
  data=matrix(as.numeric(as.matrix(exp)), nrow=nrow(exp), dimnames=dimnames)
  data=t(avereps(data))
  row.names(data)=gsub(".$", "", row.names(data))
  data=t(avereps(data))
  
  group=sapply(strsplit(colnames(data),"\\-"), "[", 4)
  group=sapply(strsplit(group,""), "[", 1)
  Type=ifelse(group==0, "Tumor", "Normal")
  
  geneExp=t(data[gene,,drop=F])
  outTab=rbind(outTab, cbind(geneExp,Type,CancerType))
  
  j=j+1
  if(j==1){
    allTab=data[interGenes,group==0]
  }else{
    allTab=cbind(allTab, data[interGenes,group==0])
  }
}
#输出单基因的表达文件
out=cbind(ID=row.names(outTab), outTab)
write.table(out, file="geneExp.txt", sep="\t", quote=F, row.names=F)
#输出所有肿瘤的表达文件
allTab=allTab[rowMeans(allTab)>0.1,]
allTab=cbind(ID=row.names(allTab), allTab)
write.table(allTab, file="merge.txt", sep="\t", quote=F, row.names=F)



####TMB雷达图####

library(fmsb) 
expFile="geneExp.txt"     
tmbFile="TMB.txt"          
col="red"                

exp=read.table(expFile, header=T, sep="\t", check.names=F, row.names=1)
exp=exp[(exp[,"Type"]=="Tumor"),]

TMB=read.table(tmbFile, header=T, sep="\t", check.names=F, row.names=1)
TMB=as.matrix(TMB)
row.names(TMB)=gsub(".$", "", row.names(TMB))

sameSample=intersect(row.names(TMB), row.names(exp))
TMB=TMB[sameSample,]
exp=exp[sameSample,]

outTab=data.frame()
fmsbTab=data.frame()
for(i in levels(factor(exp[,"CancerType"]))){
  exp1=exp[(exp[,"CancerType"]==i),]
  TMB1=TMB[(TMB[,"CancerType"]==i),]
  x=as.numeric(TMB1[,1])
  y=as.numeric(exp1[,1])
  corT=cor.test(x,y,method="spearman")
  cor=corT$estimate
  pValue=corT$p.value
  sig=ifelse(pValue<0.001,"***",ifelse(pValue<0.01,"**",ifelse(pValue<0.05,"*"," ")))
  outTab=rbind(outTab, cbind(CancerType=i,cor=cor,pValue=pValue))
  fmsbTab=rbind(fmsbTab, cbind(CancerType=paste0(i,sig),cor=cor))
}
write.table(outTab,file="cor.result.txt",sep="\t",row.names=F,quote=F)
write.table(t(fmsbTab),file="fmsbInput.txt",sep="\t",col.names=F,quote=F)

data=read.table("fmsbInput.txt", header=T, sep="\t", check.names=F, row.names=1)
maxValue=ceiling(max(abs(data))*10)/10
data=rbind(rep(maxValue,ncol(data)),rep(-maxValue,ncol(data)),data)

pdf(file="radar.pdf", width=7, height=7)
radarchart(data, axistype=1,
           title="Tumor mutation burden",    
           pcol=col,                 
           plwd=2 ,                   
           plty=1,                  
           cglcol="grey",             
           cglty=1,                  
           caxislabels=seq(-maxValue,maxValue,maxValue/2),   
           cglwd=1.2,                  
           axislabcol="blue",         
           vlcex=0.8                  
)
dev.off()




####MSI雷达图####

library(fmsb) 
expFile="geneExp.txt"     
msiFile="MSI.txt"          
col="blue"             

exp=read.table(expFile, header=T, sep="\t", check.names=F, row.names=1)
exp=exp[(exp[,"Type"]=="Tumor"),]

MSI=read.table(msiFile, header=T, sep="\t", check.names=F, row.names=1)
MSI=as.matrix(MSI)
row.names(MSI)=gsub(".$", "", row.names(MSI))

sameSample=intersect(row.names(MSI), row.names(exp))
MSI=MSI[sameSample,]
exp=exp[sameSample,]

outTab=data.frame()
fmsbTab=data.frame()
for(i in levels(factor(exp[,"CancerType"]))){
  exp1=exp[(exp[,"CancerType"]==i),]
  MSI1=MSI[(MSI[,"CancerType"]==i),]
  x=as.numeric(MSI1[,1])
  y=as.numeric(exp1[,1])
  corT=cor.test(x,y,method="spearman")
  cor=corT$estimate
  pValue=corT$p.value
  sig=ifelse(pValue<0.001,"***",ifelse(pValue<0.01,"**",ifelse(pValue<0.05,"*"," ")))
  outTab=rbind(outTab, cbind(CancerType=i,cor=cor,pValue=pValue))
  fmsbTab=rbind(fmsbTab, cbind(CancerType=paste0(i,sig),cor=cor))
}
write.table(outTab,file="cor.result.txt",sep="\t",row.names=F,quote=F)
write.table(t(fmsbTab),file="fmsbInput.txt",sep="\t",col.names=F,quote=F)

data=read.table("fmsbInput.txt", header=T, sep="\t", check.names=F, row.names=1)
maxValue=ceiling(max(abs(data))*10)/10
data=rbind(rep(maxValue,ncol(data)),rep(-maxValue,ncol(data)),data)

pdf(file="radar.pdf", width=7, height=7)
radarchart(data, axistype=1,
           title="Microsatellite instability",   
           pcol=col,                  
           plwd=2 ,                    
           plty=1,                      
           cglcol="grey",             
           cglty=1,                   
           caxislabels=seq(-maxValue,maxValue,maxValue/2),    
           cglwd=1.2,               
           axislabcol="blue",          
           vlcex=0.8                 
)
dev.off()







####TNB雷达图####
library(dplyr)
library(fmsb) 
expFile="geneExp.txt"     
msiFile="TNB.txt"          
col="green"             

exp=read.table(expFile, header=T, sep="\t", check.names=F, row.names=1)
exp=exp[(exp[,"Type"]=="Tumor"),]

TNB=read.table(msiFile, header=T, sep="\t", check.names=F, row.names=NULL)
TNB <- TNB %>% 
  distinct(id,.keep_all = T)
rownames(TNB) <- TNB$id

sameSample=intersect(row.names(TNB), row.names(exp))
TNB=TNB[sameSample,]
exp=exp[sameSample,]
TNB <- TNB[,-3]
TNB$CancerType <- exp$CancerType
TNB <- TNB[,-1]

outTab=data.frame()
fmsbTab=data.frame()
for(i in levels(factor(exp[,"CancerType"]))){
  exp1=exp[(exp[,"CancerType"]==i),]
  TNB1=TNB[(TNB[,"CancerType"]==i),]
  x=as.numeric(TNB1[,1])
  y=as.numeric(exp1[,1])
  corT=cor.test(x,y,method="spearman")
  cor=corT$estimate
  pValue=corT$p.value
  sig=ifelse(pValue<0.001,"***",ifelse(pValue<0.01,"**",ifelse(pValue<0.05,"*"," ")))
  outTab=rbind(outTab, cbind(CancerType=i,cor=cor,pValue=pValue))
  fmsbTab=rbind(fmsbTab, cbind(CancerType=paste0(i,sig),cor=cor))
}
write.table(outTab,file="cor.result.txt",sep="\t",row.names=F,quote=F)
write.table(t(fmsbTab),file="fmsbInput.txt",sep="\t",col.names=F,quote=F)

data=read.table("fmsbInput.txt", header=T, sep="\t", check.names=F, row.names=1)
maxValue=ceiling(max(abs(data))*10)/10
data=rbind(rep(maxValue,ncol(data)),rep(-maxValue,ncol(data)),data)

pdf(file="radar.pdf", width=7, height=7)
radarchart(data, axistype=1,
           title="Microsatellite instability",   
           pcol=col,                  
           plwd=2 ,                    
           plty=1,                      
           cglcol="grey",             
           cglty=1,                   
           caxislabels=seq(-maxValue,maxValue,maxValue/2),    
           cglwd=1.2,               
           axislabcol="blue",          
           vlcex=0.8                 
)
dev.off()