### 1 set pwd and subroutines options(warn = -1) library(ggplot2) library(survival) library(survminer) library(Rmisc) library(reshape2) #subroutines file2frame <- function(file, header = TRUE, sep = "\t",stringsAsFactors = F,row.names = NULL){ out = read.delim(file, header = header, sep = sep,stringsAsFactors = stringsAsFactors,row.names = row.names) return(out) } as.numeric.matrix <- function(s){ v = apply(s,2,as.numeric) rownames(v) = rownames(s) colnames(v) = colnames(s) return(v) } list <- structure(NA,class="result") "[<-.result" <- function(x,...,value) { args <- as.list(match.call()) args <- args[-c(1:2,length(args))] length(value) <- length(args) for(i in seq(along=args)) { a <- args[[i]] if(!missing(a)) eval.parent(substitute(a <- v,list(a=a,v=value[[i]]))) } x } ismember <- function(A,B){ IA = is.element(A,B) IB = matrix(0,length(A),1) for (i in 1:length(A)) { if (IA[i]){ c= which(B==A[i]); IB[i] = c[1] } } IB = as.vector(IB) return(list(IA,IB)) } #plot lghplot.boxplot <- function(p,width = 0.3){ p = p +geom_violin(alpha = 0.5,outlier.size = 0) + geom_boxplot(width = width,color = 'black',fill = 'white',outlier.size = 0,alpha = 1) } lghplot.addtheme <- function (size = 18,hjust = FALSE,legend.position = "none"){ p = theme(axis.text.x = element_text(size = size,face = 'bold'))+ theme(axis.text.y = element_text(size = size,face = 'bold'))+ theme(axis.title=element_text(size=size,face = 'bold')) + theme(axis.title=element_text(size=size,face = 'bold')) + theme(title = element_text(size= size,face = 'bold'))+ theme(#panel.grid.major =element_blank(), strip.text = element_text(size=size,face = 'bold'), legend.position = legend.position, #legend.position = "none", panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black") ) if (hjust){ p = p +theme(axis.text.x = element_text(angle = 45, hjust = 1)) } return(p) } log2fc <- function(e,c){ return(mean(e[c])-mean(e[!c])) } pval <- function(e,c){ xx = summary(aov(e~c)) return(xx[[1]]$`Pr(>F)`[1]) } ### 2 read data #readnorm expr.norm = file2frame('./data/expr_norm_tpm.txt') expr.norm = expr.norm[!duplicated(expr.norm$gene),] rownames(expr.norm) = expr.norm$gene expr.norm = as.numeric.matrix(expr.norm[,-1]) #read case expr.case = file2frame('./data/expr_case_tpm.txt') expr.case = expr.case[!duplicated(expr.case$gene),] rownames(expr.case) = expr.case$gene expr.case = as.numeric.matrix(expr.case[,-1]) #clin clin = file2frame('./data/coad_clin_data.csv',row.names =1) clin = clin[clin$HISTOLOGICAL_DIAGNOSIS != '',] list[IA,IB] = ismember(rownames(clin),colnames(expr.case)) clin = clin[IA,] expr.case = expr.case[,IB] expr.all = cbind(expr.norm,expr.case) ### 3 Sample QC sampleclass = c(rep('green',ncol(expr.norm)),rep('pink',ncol(expr.case))) pdf('./samples_boxplot1.pdf') boxplot(expr.all,outline = F,outwex = 0,xlab = 'Sample ID',ylab = 'log2 TPM',names = 1:ncol(expr.all),col = sampleclass, pars = list(boxwex = 1, staplewex =1, outwex =0.5), horizontal = FALSE, add = FALSE, at = NULL) dev.off() ### 4 Figures ### 4.1 Figures 4A sum(tclass_simple == 'Normal') sum(tclass_simple == 'N-COAD') sum(tclass_simple == 'M-COAD') tclass = c(rep('Control',ncol(expr.norm)),clin$HISTOLOGICAL_DIAGNOSIS) tclass_simple = tclass tclass_simple[tclass_simple == 'Colon Adenocarcinoma'] = 'N-COAD' tclass_simple[tclass_simple == 'Colon Mucinous Adenocarcinoma'] = 'M-COAD' tclass_simple[tclass_simple == 'Control'] = 'Normal' tclass_simple = factor(tclass_simple,levels = c('N-COAD','M-COAD','Normal')) idaa = 'GDE1' #pdf('./figures/Figure4A_GDE1_boxplot.pdf') p = ggplot(,aes(x = tclass_simple,y = expr.all[idaa,],color = tclass_simple))+ ylab('log2(TPM)')+ geom_text(x=3, y=30, label="Scatter plot") xlab('')+theme_bw()+lghplot.addtheme(size = 22) print(lghplot.boxplot(p)) #dev.off() ###4.2 Figure 4B #COAD vs COMAD tgene = 'GDE1' ida = clin$HISTOLOGICAL_DIAGNOSIS != '' cutoff = median(expr.case[tgene,ida]) survival_time = clin$OS_MONTHS survival_status = clin$OS_STATUS == 'DECEASED' st0 = clin$HISTOLOGICAL_DIAGNOSIS == 'Colon Adenocarcinoma' st = rep('M-COAD',length(st0)) st[st0] = 'N-COAD' tdata = data.frame(cancer_type = st[ida], survival_time = survival_time[ida], survival_status = survival_status[ida]) survdiff(Surv(survival_time[ida],survival_status[ida])~st[ida]) tfit = survfit(Surv(survival_time,survival_status)~cancer_type,data = tdata) p = ggsurvplot(tfit,legend.labs = c("M-COAD", "N-COAD"), risk.table.col = "strata", ggtheme = theme_bw(base_size = 22), palette = c("#CC0000", "#4C9900"), xlim = c(0, 120)) #pdf('./figures/Figure4B_GDE1_N-COADvsM-COAD_survival.pdf') print(p) #dev.off() ###4.3 Figure 4C # N-COAD tgene = 'GDE1' ida = clin$HISTOLOGICAL_DIAGNOSIS == 'Colon Adenocarcinoma' cutoff = median(expr.case[tgene,ida]) survival_time = clin$OS_MONTHS survival_status = clin$OS_STATUS == 'DECEASED' st0 = expr.case[tgene,] > cutoff st = rep('low expression',length(st0)) st[st0] = 'high expression' tdata = data.frame(cancer_type = st[ida], survival_time = survival_time[ida], survival_status = survival_status[ida]) survdiff(Surv(survival_time[ida],survival_status[ida])~st[ida]) tfit = survfit(Surv(survival_time,survival_status)~cancer_type,data = tdata) p = ggsurvplot(tfit,legend.labs = c("high expression", "low expression"), risk.table.col = "strata", ggtheme = theme_bw(base_size = 22), palette = c("#CC0000", "#4C9900"), xlim = c(0, 120)) #pdf('./figures/Figure4B_GDE1_N-COAD_survival.pdf') print(p) #dev.off() ###4.4 Figure 4D #COMAD tgene = 'GDE1' ida = clin$HISTOLOGICAL_DIAGNOSIS != 'Colon Adenocarcinoma' cutoff = median(expr.case[tgene,ida]) survival_time = clin$OS_MONTHS survival_status = clin$OS_STATUS == 'DECEASED' st0 = expr.case[tgene,] > cutoff st = rep('low expression',length(st0)) st[st0] = 'high expression' tdata = data.frame(cancer_type = st[ida], survival_time = survival_time[ida], survival_status = survival_status[ida]) survdiff(Surv(survival_time[ida],survival_status[ida])~st[ida]) tfit = survfit(Surv(survival_time,survival_status)~cancer_type,data = tdata) p = ggsurvplot(tfit,legend.labs = c("high expression", "low expression"), risk.table.col = "strata", ggtheme = theme_bw(base_size = 22), palette = c("#CC0000", "#4C9900"), xlim = c(0, 120)) #pdf('./figures/Figure4B_GDE1_M-COMD_survival.pdf') print(p) #dev.off() ###5 Tables des = file2frame('./data/des.all_genes.txt',row.names = 1) # supplementary table 1 ogenes = file2frame('./data/WGCNAmodule_revise_red.txt',row.names = NULL) rownames(ogenes) = ogenes$gene_name ogenes$log2fc = rep(1,nrow(ogenes)) ogenes$pvalue = rep(1,nrow(ogenes)) ogenes$p.adjust = rep(1,nrow(ogenes)) ogenes$HR_All_COAD =rep(1,nrow(ogenes)) ogenes$survival.pAll_COAD = rep(1,nrow(ogenes)) ogenes$out_All_COAD = rep('x',nrow(ogenes)) ogenes$HR_N_COAD =rep(1,nrow(ogenes)) ogenes$survival.pN_COAD = rep(1,nrow(ogenes)) ogenes$out_N_COAD = rep('x',nrow(ogenes)) ogenes$HR_M_COAD =rep(1,nrow(ogenes)) ogenes$survival.pM_COAD = rep(1,nrow(ogenes)) ogenes$out_M_COAD = rep('x',nrow(ogenes)) for (i in 1:nrow(ogenes)){ # log2fc tgene = ogenes$gene_name[i] # all ida = clin$HISTOLOGICAL_DIAGNOSIS != ' ' cutoff = median(expr.case[tgene,ida]) survival_time = clin$OS_MONTHS survival_status = clin$OS_STATUS == 'DECEASED' st = expr.case[tgene,] > cutoff xx = survdiff(Surv(survival_time[ida],survival_status[ida])~st[ida]) ogenes$survival.pAll_COAD[i] = pchisq(xx$chisq, length(xx$n)-1, lower.tail = FALSE) ogenes$HR_All_COAD[i] = (xx$obs[2]/xx$exp[2])/(xx$obs[1]/xx$exp[1]) ogenes$out_All_COAD[i] = paste(as.character(signif(ogenes$HR_All_COAD[i],2)),'(', as.character(signif(ogenes$survival.pAll_COAD[i],2)),')',sep='') #N-COAD ida = clin$HISTOLOGICAL_DIAGNOSIS == 'Colon Adenocarcinoma' cutoff = median(expr.case[tgene,ida]) survival_time = clin$OS_MONTHS survival_status = clin$OS_STATUS == 'DECEASED' st = expr.case[tgene,] > cutoff xx = survdiff(Surv(survival_time[ida],survival_status[ida])~st[ida]) ogenes$survival.pN_COAD[i] = pchisq(xx$chisq, length(xx$n)-1, lower.tail = FALSE) ogenes$HR_N_COAD[i] = (xx$obs[2]/xx$exp[2])/(xx$obs[1]/xx$exp[1]) ogenes$out_N_COAD[i] = paste(as.character(signif(ogenes$HR_N_COAD[i],2)),'(', as.character(signif(ogenes$survival.pN_COAD[i],2)),')',sep='') #M-COAD ida = clin$HISTOLOGICAL_DIAGNOSIS != 'Colon Adenocarcinoma' cutoff = median(expr.case[tgene,ida]) survival_time = clin$OS_MONTHS survival_status = clin$OS_STATUS == 'DECEASED' st = expr.case[tgene,] > cutoff xx = survdiff(Surv(survival_time[ida],survival_status[ida])~st[ida]) ogenes$survival.pM_COAD[i] = pchisq(xx$chisq, length(xx$n)-1, lower.tail = FALSE) ogenes$HR_M_COAD[i] = (xx$obs[2]/xx$exp[2])/(xx$obs[1]/xx$exp[1]) ogenes$out_M_COAD[i] = paste(as.character(signif(ogenes$HR_M_COAD[i],2)),'(', as.character(signif(ogenes$survival.pM_COAD[i],2)),')',sep='') } ix = sort.int(ogenes$survival.pAll_COAD,decreasing = F,index.return = T)$ix ogenes = ogenes[ix,] ogenes$log2fc = des[rownames(ogenes),]$log2FC ogenes$pvalue = des[rownames(ogenes),]$pvalue ogenes$p.adjust = des[rownames(ogenes),]$p.adjust write.table(ogenes,'Supplementary Table2.txt',sep = '\t',row.names = F,col.names = T,quote = F)### supplementary table 2 head(ogenes)