rm(list=ls()) options(stringsAsFactors = F) library("dplyr") getwd() Rdata_dir='./Rdata' Figure_dir='./Figure' ####Reading expression information miRNA_GA=read.table("HiSeqV2.gz",header = T,sep = '\t',check.names = F) dim(miRNA_GA) miRNA_GA[1:4,1:4] na.omit(miRNA_GA)[1:4,1:4] dim(na.omit(miRNA_GA)) #####remove duplicate values of expression matrix expr=t(miRNA_GA) write.table(expr,"expr.txt",quote = F,sep = "\t",col.names = F,row.names = T) z=read.table(file = "expr.txt",sep = "\t",header = T,check.names = F) expr=z[,-1] sample=substr(z$sample,1,12) ####Intercept the first 12 characters of ID expr=cbind(sample,expr) dim(expr) d=expr[!duplicated(expr$sample),] dim(d) expr_matirx=d ####Reading clinical information miRNA_HiSeq=read.table("LGG_clinicalMatrix.gz" ,header = T,sep = '\t') dim(miRNA_HiSeq) miRNA_HiSeq[1:4,1:4] na.omit(miRNA_HiSeq)[1:4,1:4] dim(na.omit(miRNA_HiSeq)) ####Extract the column of clinical information of interest and remove the duplicate barcode colnames(miRNA_HiSeq) phe=miRNA_HiSeq[,-1] sample=substr(miRNA_HiSeq$sampleID,1,12) ####Intercept the first 12 characters of ID d=cbind(sample,phe) colnames(d) meta=as.data.frame(d[c("sample","X_EVENT","OS.time","age_at_initial_pathologic_diagnosis", "gender","histological_type","neoplasm_histologic_grade","radiation_therapy", "targeted_molecular_therapy","vital_status")]) dim(meta) meta=(meta[complete.cases(meta[,2:10]),]) dim(meta) table(meta$X_EVENT) meta=na.omit(meta) dim(meta) table(duplicated(meta$sample)) unique_sample=unique(meta$sample) unique_sample_matrix=as.data.frame(unique_sample) #####create a matrix and assign the column name colnames(unique_sample_matrix)="sample" meta=meta[!duplicated(meta$sample),] ###########Remove the duplicates in the sample column of meta matrix dim(meta) table(meta[,1:ncol(meta)]) meta=meta[grep(pattern="G2|G3",meta[,7]),] ####Extract the seventh column of the meta containing "G2 or G3" characters dim(meta) meta=meta[grep(pattern="YES|NO",meta[,8]),] ####Extract the eighth column of the meta containing "YES or NO" characters meta=meta[grep(pattern="YES|NO",meta[,9]),] ####Extract the ninth column of the meta containing "YES or NO" characters dim(meta) meta_expr=merge(meta,expr_matirx,by="sample") ###########Integrate expression matrix and clinical information through the same column named "sample" save(expr_matirx,meta,meta_expr, file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata') ) ###################Store three files named 'TCGA-LGG-RNAseq-example.Rdata' ############### rm(list=ls()) options(stringsAsFactors = F) Rdata_dir='./Rdata' Figure_dir='./Figure' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')) ###Load stored files named 'TCGA-LGG-RNAseq-example.Rdata' write.table(meta,"meta.txt",quote = F,sep = "\t",row.names = F) #######Name the column of the clinical data and sorting the data meta=meta[,-10] ########## Delete the 10th column of meta colnames(meta)=c("sample","vital_status","OS.time","age","gender","type", "grade","radition","molecular_therapy") #####Name the columns one after the other meta$age_group=ifelse(meta$age>median(meta$age),'older','younger') ####Group ages based on median and rebuild a column named age_group meta$grade=gsub("G2","II",meta$grade) #####Replace G2 with II in the "grade" column of meta meta$grade=gsub("G3","III",meta$grade) #####Replace G3 with III in the "grade" column of meta meta_expr=merge(meta,expr_matirx,by="sample") ###########Integrate expression matrix and clinical information through the same column named "sample" save(meta,meta_expr,expr_matirx, file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata') ####Store three files named 'TCGA-LGG-RNAseq-example.Rdata' ) ####### rm(list=ls()) options(stringsAsFactors = F) Rdata_dir='./Rdata' Figure_dir='./Figure' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')) Ued_exprclinical=merge(meta,expr,by="sample") z=Ued_exprclinical[,-(2:10)] ####Delete the 2th to 10th column of meta z=t(z) write.table(z,"z.txt",quote = F,sep = "\t",col.names = F,row.names = T) z=read.table(file = "z.txt",sep = "\t",header = T,check.names = F) rm(list=ls()) options(stringsAsFactors = F) Rdata_dir='./Rdata' Figure_dir='./Figure' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata') ) Ued_exprclinical=meta_expr z=Ued_exprclinical[,-(2:10)] ####Delete the 2th to 10th column of meta z=t(z) write.table(z,"z.txt",quote = F,sep = "\t",col.names = F,row.names = T) z=read.table(file = "z.txt",sep = "\t",header = T,check.names = F) library('rbsurv') ###Robust test x=as.matrix(z[,2:ncol(z)]) time <- as.numeric(Ued_exprclinical$OS.time) View(time) status <- as.numeric(Ued_exprclinical$vital_status) ####riskfactor=as.matrix(Ued_exprclinical[,2:8]) c=z[,1] fit <-rbsurv(time=time, status=status, x=x,method="efron", max.n.genes=30, n.iter = 10,n.fold = 6,n.seq = 3,seed = 100,gene.ID =c) fit$model dim(fit$model) write.table(fit$model,"rbsurv.txt",quote = F,sep = "\t",row.names = F) ######Extract the tagged gene name from file "rbsurv.txt",and name the column name as sample,store the file and name it "TCGA-LGG-rbsurv_result.txt" rbsurv_result=read.table(file = "TCGA-LGG-rbsurv_result.txt",sep = "\t",header = T,check.names = F) Rdata_dir='./Rdata/' Figure_dir='./figures/' save(rbsurv_result,z, file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata') ) ####lasso regression Rdata_dir='./Rdata/' Figure_dir='./figures/' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata') ) load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata') ) d=merge(rbsurv_result,z,by="sample") d1=t(d) write.table(d1,"d1.txt",quote = F,sep = "\t",col.names = F,row.names = T) d2=read.table(file = "d1.txt",sep = "\t",header = T,check.names = F) lasso_input=merge(meta,d2,by="sample") library(glmnet) library(survival) miRNA=lasso_input ###miRNAEXP=log2(miRNA[,3:ncol(miRNA)]+1) ###miRNA=cbind(miRNA[,1:2],miRNAEXP) miRNA[,"OS.time"]=miRNA[,"OS.time"]/365 v1<-as.matrix(miRNA[,c(11:ncol(miRNA))]) v2 <- as.matrix(Surv(miRNA$OS.time,miRNA$vital_status)) myfit <- glmnet(v1, v2, family = "cox") pdf("lambda.pdf") plot(myfit, xvar = "lambda", label = TRUE) dev.off() dev.new() myfit2 <- cv.glmnet(v1, v2, family="cox") pdf("min.pdf") plot(myfit2) abline(v=log(c(myfit2$lambda.min,myfit2$lambda.1se)),lty="dashed") dev.off() myfit2$lambda.min coe <- coef(myfit, s = myfit2$lambda.min) act_index <- which(coe != 0) act_coe <- coe[act_index] row.names(coe)[act_index] lasso_output=as.matrix(row.names(coe)[act_index]) colnames(lasso_output)="sample" multcox_preinput=merge(d,lasso_output,by="sample") multcox_preinput=t(multcox_preinput) write.table(multcox_preinput,"multcox.txt",quote = F,sep = "\t",col.names = F,row.names = T) multicox=read.table(file = "multcox.txt",sep = "\t",header = T,check.names = F) save(multicox,meta, file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata') ) #####multivariable analysis Rdata_dir='./Rdata/' Figure_dir='./figures/' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata') ) multicox_input=merge(meta,multicox,by="sample") mRNA=multicox_input write.table(mRNA,file = "mRNA.txt",quote = F,sep = "\t",row.names = F) miRNA<-read.table(file="mRNA.txt",header=T,sep="\t",row.names = 1,check.names = F,stringsAsFactors = F) colnames(miRNA) fmla1 <- as.formula(Surv(OS.time,vital_status)~BMP2+CCDC46+CMYA5+CRTAC1+EMP3+FBXO39+IGF2BP3 +PTGFRN+RANBP17+SEMA4G+SHOX2+SMC4+WEE1) mycox <- coxph(fmla1,data=miRNA) summary(mycox) ####Select the selected genes for the rise score fmla1 <- as.formula(Surv(OS.time,vital_status)~SEMA4G+CRTACm1 +WEE1) mycox <- coxph(fmla1,data=miRNA) risk_score<-predict(mycox,type="risk",newdata=miRNA) risk_level<-as.factor(ifelse(risk_score>median(risk_score),"High","Low")) write.table(cbind(id=rownames(cbind(miRNA[,1:9],risk_score,risk_level)),cbind(miRNA[,1:9],risk_score,risk_level)),"risk_score.txt",sep="\t",quote=F,row.names=F) ####Positioning gene locations and survival analysis rm(list=ls()) options(stringsAsFactors = F) Rdata_dir='./Rdata/' Figure_dir='./figures/' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata') ) load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata') ) library(survival) library(survminer) BiocManager::install("dplyr") p=c(colnames(multicox)) grep("WEE1",p) grep("CRTAC1",p) grep("SEMA4G",p) multicox2=multicox[,c(1,14,5,11)] inputmiRNA=merge(meta[,c(1,2,3)],multicox2,by="sample") risk_score=read.table(file = "risk_score.txt",sep = "\t",header = T,check.names = F) inputmiRNA$OS.time=inputmiRNA$OS.time/365 risk_score$OS.time=risk_score$OS.time/365 group1<- ifelse(inputmiRNA$WEE1>median(inputmiRNA$WEE1),'high','low') sfit1<- survfit(Surv(OS.time,vital_status) ~ group1, data = inputmiRNA) group2<- ifelse(inputmiRNA$CRTAC1>median(inputmiRNA$CRTAC1),'high','low') sfit2=survfit(Surv(OS.time,vital_status) ~ group2, data = inputmiRNA) group3<- ifelse(inputmiRNA$SEMA4G>median(inputmiRNA$SEMA4G),'high','low') sfit3=survfit(Surv(OS.time,vital_status) ~ group3, data = inputmiRNA) sfit4=survfit(Surv(OS.time,vital_status) ~ risk_level, data = risk_score) ########################################################## stratified analysis strata=list() younger=risk_score[grep(pattern="younger",risk_score[,10]),] sfit5=survfit(Surv(OS.time,vital_status) ~ risk_level, data = younger) ggsurvplot(sfit5,data = younger, surv.median.line = "hv", legend.title="Three-genes Signature",xlab ="Time(years)", conf.int = TRUE, pval = TRUE,palette = c("orchid2","dodgerblue2"), ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Younger",legend.labs=c("High risk","Low risk")) old=risk_score[grep(pattern="old",risk_score[,10]),] sfit6=survfit(Surv(OS.time,vital_status) ~ risk_level, data = old) ggsurvplot(sfit6,data = old, surv.median.line = "hv", legend.title="Three-genes Signature",xlab ="Time(years)", conf.int = TRUE, pval = TRUE,palette = c("orchid2","dodgerblue2"), ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Old",legend.labs=c("High risk","Low risk")) male=risk_score[grep(pattern="MALE",risk_score[,5]),] sfit7=survfit(Surv(OS.time,vital_status) ~ risk_level, data = male) ggsurvplot(sfit7,data = male, surv.median.line = "hv", legend.title="Three-genes Signature",xlab ="Time(years)", conf.int = TRUE, pval = TRUE,palette = c("orchid2","dodgerblue2"), ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Male",legend.labs=c("High risk","Low risk")) female=risk_score[grep(pattern="FEMALE",risk_score[,5]),] sfit8=survfit(Surv(OS.time,vital_status) ~ risk_level, data = female) ggsurvplot(sfit8,data = female, surv.median.line = "hv", legend.title="Three-genes Signature",xlab ="Time(years)", conf.int = TRUE, pval = TRUE,palette = c("orchid2","dodgerblue2"), ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Female",legend.labs=c("High risk","Low risk")) G3=risk_score[grep(pattern="III",risk_score[,7]),] sfit9=survfit(Surv(OS.time,vital_status) ~ risk_level, data = G3) ggsurvplot(sfit9,data = G3, surv.median.line = "hv", legend.title="Three-genes Signature",xlab ="Time(years)", conf.int = TRUE, pval = TRUE,palette = c("orchid2","dodgerblue2"), ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Grade III",legend.labs=c("High risk","Low risk")) risk_score$grade=gsub("III","3",risk_score$grade) G2=risk_score[grep(pattern="II",risk_score[,7]),] sfit10=survfit(Surv(OS.time,vital_status) ~ risk_level, data = G2) ggsurvplot(sfit10,data = G2, surv.median.line = "hv", legend.title="Three-genes Signature",xlab ="Time(years)", conf.int = TRUE, pval = TRUE,palette = c("orchid2","dodgerblue2"), ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Grade II",legend.labs=c("High risk","Low risk")) multicox_meta=merge(meta,multicox,by="sample") ##################Draw ROC curve of our study library(lars) library(glmnet) b=multicox_meta[,c("sample","WEE1","SEMA4G","CRTAC1")] rownames(b)=b[,1] x=b[,2:4] x=as.matrix(x) y=multicox_meta$vital_status cv_fit <- cv.glmnet(x=x, y=y, alpha = 1, nlambda = 1000) lasso.prob <- predict(cv_fit, newx=x , s=c(cv_fit$lambda.min,cv_fit$lambda.1se) ) new_dat=multicox_meta library(timeROC) new_dat$fp=as.numeric(lasso.prob[,1]) new_dat$OS.time=new_dat$OS.time/30 with(new_dat, ROC <<- timeROC(T=OS.time,#End time delta=vital_status,#Survival outcome marker=fp,#Predictor cause=1,#Positive outcome assignment, such as death or not weighting="marginal",#Weight calculation method, marginal is the default value, using km to calculate the censored distribution times=c(24,60),# Time point, choose 2 years and 8 years survival rate ROC = T, iid = TRUE) ) plot(ROC,time=24,col = "blue",add =FALSE,title=F) #Add means whether to add in the previous picture plot(ROC,time=60,col = "red",add = T) legend("right",c("2 years AUC=0.910","5 years AUC=0.828"),col=c("blue","red"),lwd=2) title(main = list("TCGA-LGGs ROC for the three genes", cex = 1.3, col = "black", font = 3)) ROC$AUC confint(ROC) splots <- list() splots[[1]] ggsurvplot(sfit1,data = inputmiRNA, surv.median.line = "hv", legend.title="WEE1",xlab ="Time(years)", conf.int = TRUE, pval = TRUE,palette = c("orchid2","dodgerblue2"), ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA-LGGs",legend.labs=c("High","Low")) splots[[2]] <- ggsurvplot(sfit2,data = inputmiRNA, surv.median.line = "hv", legend.title="CRTAC1",xlab ="Time(years)", conf.int = TRUE, pval = TRUE,palette = c("orchid2","dodgerblue2"), ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA-LGGs",legend.labs=c("High","Low")) splots[[3]] <- ggsurvplot(sfit3,data = inputmiRNA, surv.median.line = "hv", legend.title="SEMA4G",xlab ="Time(years)", conf.int = TRUE, pval = TRUE,palette = c("orchid2","dodgerblue2"), ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA-LGGs",legend.labs=c("High","Low")) ggsurvplot(sfit4,data = risk_score, surv.median.line = "hv", legend.title="Three-genes Signature",xlab ="Time(years)", conf.int = TRUE, pval = TRUE,palette = c("orchid2","dodgerblue2"), ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs",legend.labs=c("High risk","Low risk")) # Arrange multiple ggsurvplots and print the output arrange_ggsurvplots(splots, print = TRUE, ncol = 1, nrow = 3) dev.off() ######################################## #######risk_distribution rm(list=ls()) options(stringsAsFactors = F) Rdata_dir='./Rdata/' Figure_dir='./figures/' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata') ) load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata') ) new_dat=merge(meta,multicox,by="sample") colnames(new_dat) s=Surv(OS.time,vital_status) ~ WEE1+CRTAC1+SEMA4G model <- coxph(s, data = new_dat ) summary(model,data=new_dat) options(scipen=1) ggforest(model, data =new_dat, main = "Hazard ratio", cpositions = c(0.10, 0.22, 0.4), fontsize = 1.0, refLabel = "1", noDigits = 4) fp <- predict(model,new_dat,type="risk");boxplot(fp) fp <- predict(model,new_dat,type="expected") ;boxplot(fp) plot(fp,phe$days) fp <- predict(model,new_dat) ;boxplot(fp) basehaz(model) library(Hmisc) options(scipen=200) with(new_dat,rcorr.cens(fp,Surv(OS.time,vital_status) )) # http://dni-institute.in/blogs/cox-regression-interpret-result-and-predict/ ###############################risk_distribution!!!!! library(cowplot) library(pheatmap) # https://cran.r-project.org/web/packages/cowplot/vignettes/introduction.html fp_dat=data.frame(s=1:length(fp),v=as.numeric(sort(fp ))) d=rbind(data.frame(fp_dat[1:228,],Type=as.factor("Low risk")),data.frame(fp_dat[229:456,],Type=as.factor("High risk"))) sur_dat=data.frame(s=1:length(fp), t=meta[names(sort(fp )),'OS.time'] , e=meta[names(sort(fp )),'vital_status'] ) sur_dat=na.omit(sur_dat) sur_dat$e=ifelse(sur_dat$e==0,'alive','death') exp_dat=new_dat[names(sort(fp )),c(14,23,20)] ####Column number of the gene of interest plot.point=ggplot(d,aes(x=s,y=v,shape=Type,color=Type))+labs(title =" ")+ theme(plot.title=element_text(hjust=0.5))+scale_shape_manual(values=c(7,1))+geom_point()+theme_bw()+xlab("Number of Patients")+ylab("Risk score");print(plot.point) plot.sur=ggplot(sur_dat,aes(x=s,y=t))+theme(legend.title=element_blank())+theme_bw()+geom_point(aes(col=e))+xlab("Number of Patients")+ylab("Survival time(Day)");print(plot.sur) mycolors <- colorRampPalette(c("navy", "white", "firebrick3"))(256) tmp=t(scale(exp_dat)) range(tmp) tmp[tmp > 1] = 1 tmp[tmp < -1] = -1 annotation_col=data.frame(risk_score$risk_level) names(annotation_col)="Group" plot.h=pheatmap(tmp,col= mycolors,show_colnames = F,cluster_cols = F,cluster_rows = F,clustering_distance_rows = "correlation") plot.h=pheatmap(tmp,col= mycolors,legend=T,show_colnames = F,annotation_names_col=F,annotation_legend=T,annotation_col=annotation_col,cluster_cols = F,cluster_rows = F) plot.h$gtable plot_grid(plot.point, plot.sur, plot.h$gtable, align = 'v',ncol = 1) ggarrange(plot.point, plot.sur, plot.h$gtable, labels = c("A", "B","C"), align = 'v',ncol = 1) ##################### ########boxplot rm(list=ls()) options(stringsAsFactors = F) BiocManager::install("tidyverse") BiocManager::install("dplyr") library("tidyverse") library("tidyr") library("dbplyr") Rdata_dir='./Rdata/' Figure_dir='./figures/' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata') ) load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata') ) multicox_meta=merge(meta,multicox,by="sample") dat=data.frame(WEE1=multicox_meta$WEE1,CRTAC1=multicox_meta$CRTAC1,SEMA4G=multicox_meta$SEMA4G, Grade=multicox_meta$grade) save(dat,file = 'data_for_boxplot.Rdata') head(dat) drop_outliers=function(df_name,dep_col){ a=df_name[,dep_col] iqr=IQR(a) q1=as.numeric(quantile(a,0.25)) q3=as.numeric(quantile(a,0.75)) bottem=q1-1.5*iqr top=q3+1.5*iqr df_return=df_name %>% filter(df_name[,dep_col]>=bottem & df_name[,dep_col]<=top) return(df_return) } mydat=drop_outliers(dat,"WEE1") %>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1") mydat=drop_outliers(mydat,"CRTAC1") %>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1") mydat=drop_outliers(mydat,"SEMA4G") %>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G") mydat=gather(dat,key="Gene",value="Expression",WEE1:SEMA4G) mydat=mydat[order(mydat[,1]),] compare_means( Expression ~ Grade, data = mydat, group.by = "Gene") # Box plot facetted by "gene" p=ggboxplot(mydat, x = "Grade", y = "Expression", color = "Grade", palette = "jco", add = "jitter",title = "TCGA LGGs", facet.by = "Gene", short.panel.labs = FALSE)+theme(plot.title=element_text(hjust=0.5)) # Use only p.format as label. Remove method name. p + stat_compare_means() ggboxplot(mydat, x = "gene", y = "Expression", color = "Grade", palette = "nejm", add = "jitter") p + stat_compare_means(aes(Grade = Grade)) if(require('ggpubr')){ library(ggpubr) # google search : ggpubr boxplot add p-value # http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/ p1 <- ggboxplot(dat, x = "Grade", y = "WEE1", color = "#00AFBB", palette = "jco", add = "jitter") # Add p-value p1 + stat_compare_means() } if(require('ggpubr')){ library(ggpubr) # google search : ggpubr boxplot add p-value # http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/ p2 <- ggboxplot(dat2, x = "Grade", y = "CRTAC1", color = "#E7B800", palette = "jco", add = "jitter") # Add p-value p + stat_compare_means() } ########################## #########ROC rm(list=ls()) options(stringsAsFactors = F) Rdata_dir='./Rdata/' Figure_dir='./figures/' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata') ) load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata') ) multicox_meta=merge(meta,multicox,by="sample") library(lars) library(glmnet) library(survival) library(survminer) library(timeROC) multicox_input=merge(meta,multicox,by="sample") mRNA=multicox_input write.table(mRNA,file = "mRNA.txt",quote = F,sep = "\t",row.names = F) miRNA<-read.table(file="mRNA.txt",header=T,sep="\t",row.names = 1,check.names = F,stringsAsFactors = F) colnames(miRNA) ####Select the selected genes for the rise score fmla1 <- as.formula(Surv(OS.time,vital_status)~SEMA4G+CRTAC1 +WEE1) mycox <- coxph(fmla1,data=miRNA) meta$risk_score<-predict(mycox,type="risk",newdata=miRNA) library("survivalROC") cutoff <- 365 ROC1= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score, predict.time = cutoff, method="KM") cutoff <- 1095 ROC3= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score, predict.time = cutoff, method="KM") cutoff <- 1825 ROC5= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score, predict.time = cutoff, method="KM") plot(ROC1$FP, ROC1$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1), xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), ylab="TP",main="Time dependent ROC") abline(0,1) lines(ROC3$FP, ROC3$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1)) lines(ROC5$FP, ROC5$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1)) legend(0.5,0.4,c(paste("AUC at 1 year =",round(ROC1$AUC,3)),paste("AUC at 3 years =",round(ROC3$AUC,3)),paste("AUC at 5 years =",round(ROC5$AUC,3))),x.intersp=1, y.intersp=0.8, lty= 1, lwd= 2, col=c("red","blue","yellow"), bty = "n", seg.len=1, cex=0.8) # #####################################################################Draw the ROC curve of the comparative literature ##Integrative Analysis of DNA Methylation and Gene Expression Identify a ThreeGene Signature for Predicting Prognosis in Lower-Grade Gliomas meta$risk_score2=0.597*meta_expr$EMP3+0.825*meta_expr$GSX2+0.942*meta_expr$EMILIN3 cutoff <- 365 ROC1b= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score2, predict.time = cutoff, method="KM") cutoff <- 1095 ROC3b= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score2, predict.time = cutoff, method="KM") cutoff <- 1825 ROC5b= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score2, predict.time = cutoff, method="KM") plot(ROC1b$FP, ROC1b$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1), xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), ylab="TP",main="Time dependent ROC") abline(0,1) lines(ROC3b$FP, ROC3b$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1)) lines(ROC5b$FP, ROC5b$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1)) legend(0.6,0.4,c(paste("AUC at 1 year =",round(ROC1b$AUC,3)),paste("AUC at 3 years =",round(ROC3b$AUC,3)),paste("AUC at 5 years =",round(ROC5b$AUC,3))),x.intersp=1, y.intersp=0.8, lty= 1, lwd= 2, col=c("red","blue","yellow"), bty = "n", seg.len=1, cex=0.8) # ##############################################################Draw the ROC curve of the comparative literature ###Comprehensive analysis of genes based on chr1p/ 19q co-deletion reveals a robust 4-gene prognostic signature for lower grade glioma meta$risk_score3=0.012*meta_expr$IFI44+0.068*meta_expr$KIF2C+0.035*meta_expr$GNG12+0.02*meta_expr$EMP3 cutoff <- 365 ROC1c= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score3, predict.time = cutoff, method="KM") cutoff <- 1095 ROC3c= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score3, predict.time = cutoff, method="KM") cutoff <- 1825 ROC5c= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score3, predict.time = cutoff, method="KM") plot(ROC1c$FP, ROC1c$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1), xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), ylab="TP",main="Time dependent ROC") abline(0,1) lines(ROC3c$FP, ROC3c$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1)) lines(ROC5c$FP, ROC5c$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1)) legend(0.6,0.4,c(paste("AUC at 1 year =",round(ROC1c$AUC,3)),paste("AUC at 3 years =",round(ROC3c$AUC,3)),paste("AUC at 5 years =",round(ROC5c$AUC,3))),x.intersp=1, y.intersp=0.8, lty= 1, lwd= 2, col=c("red","blue","yellow"), bty = "n", seg.len=1, cex=0.8) # ############################################################################## ###############Draw a comparison ROC rm(list=ls()) options(stringsAsFactors = F) Rdata_dir='./Rdata/' Figure_dir='./figures/' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata') ) load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata') ) multicox_meta=merge(meta,multicox,by="sample") library(lars) library(glmnet) library(survival) library(survminer) library(timeROC) multicox_input=merge(meta,multicox,by="sample") mRNA=multicox_input write.table(mRNA,file = "mRNA.txt",quote = F,sep = "\t",row.names = F) miRNA<-read.table(file="mRNA.txt",header=T,sep="\t",row.names = 1,check.names = F,stringsAsFactors = F) colnames(miRNA) fmla1 <- as.formula(Surv(OS.time,vital_status)~SEMA4G+CRTAC1 +WEE1) mycox <- coxph(fmla1,data=miRNA) meta$risk_score<-predict(mycox,type="risk",newdata=miRNA) ########################### 1 year ROC comparison library("survivalROC") cutoff <- 365 ROC1= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score, predict.time = cutoff, method="KM") meta$risk_score2=0.597*meta_expr$EMP3+0.825*meta_expr$GSX2+0.942*meta_expr$EMILIN3 cutoff <- 365 ROC1b= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score2, predict.time = cutoff, method="KM") meta$risk_score3=0.012*meta_expr$IFI44+0.068*meta_expr$KIF2C+0.035*meta_expr$GNG12+0.02*meta_expr$EMP3 cutoff <- 365 ROC1c= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score3, predict.time = cutoff, method="KM") plot(ROC1$FP, ROC1$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1), xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), ylab="TP",main="Time dependent ROC 1 year") abline(0,1) lines(ROC1b$FP, ROC1b$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1)) lines(ROC1c$FP, ROC1c$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1)) legend(0.5,0.4,c(paste("AUC at our study =",round(ROC1$AUC,3)),paste("AUC at Chen X.P et al =",round(ROC1b$AUC,3)),paste("AUC at Chuang Zhang et al =",round(ROC1c$AUC,3))),x.intersp=1, y.intersp=0.8, lty= 1, lwd= 2, col=c("red","blue","yellow"), bty = "n", seg.len=1, cex=0.8) ####################5 years ROC comparison cutoff <- 1825 ROC5= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score, predict.time = cutoff, method="KM") meta$risk_score2=0.597*meta_expr$EMP3+0.825*meta_expr$GSX2+0.942*meta_expr$EMILIN3 cutoff <- 1825 ROC5b= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score2, predict.time = cutoff, method="KM") meta$risk_score3=0.012*meta_expr$IFI44+0.068*meta_expr$KIF2C+0.035*meta_expr$GNG12+0.02*meta_expr$EMP3 cutoff <- 1825 ROC5c= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score3, predict.time = cutoff, method="KM") plot(ROC5$FP, ROC5$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1), xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), ylab="TP",main="Time dependent ROC 5 years") abline(0,1) lines(ROC5b$FP, ROC5b$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1)) lines(ROC5c$FP, ROC5c$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1)) legend(0.5,0.4,c(paste("AUC at our study =",round(ROC5$AUC,3)),paste("AUC at Chen X.P et al =",round(ROC5b$AUC,3)),paste("AUC at Chuang Zhang et al =",round(ROC5c$AUC,3))),x.intersp=1, y.intersp=0.8, lty= 1, lwd= 2, col=c("red","blue","yellow"), bty = "n", seg.len=1, cex=0.8) #########################################################3 years ROC comparison cutoff <- 1095 ROC3= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score, predict.time = cutoff, method="KM") meta$risk_score2=0.597*meta_expr$EMP3+0.825*meta_expr$GSX2+0.942*meta_expr$EMILIN3 cutoff <- 1095 ROC3b= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score2, predict.time = cutoff, method="KM") meta$risk_score3=0.012*meta_expr$IFI44+0.068*meta_expr$KIF2C+0.035*meta_expr$GNG12+0.02*meta_expr$EMP3 cutoff <- 1095 ROC3c= survivalROC(Stime=meta$OS.time, status=meta$vital_status, marker = meta$risk_score3, predict.time = cutoff, method="KM") plot(ROC3$FP, ROC3$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1), xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), ylab="TP",main="Time dependent ROC 3 years") abline(0,1) lines(ROC3b$FP, ROC3b$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1)) lines(ROC3c$FP, ROC3c$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1)) legend(0.5,0.4,c(paste("AUC at our study =",round(ROC3$AUC,3)),paste("AUC at Chen X.P et al =",round(ROC3b$AUC,3)),paste("AUC at Chuang Zhang et al =",round(ROC3c$AUC,3))),x.intersp=1, y.intersp=0.8, lty= 1, lwd= 2, col=c("red","blue","yellow"), bty = "n", seg.len=1, cex=0.8) #############################################################WGCNA rm(list=ls()) options(stringsAsFactors = F) library(WGCNA) options(stringsAsFactors = FALSE) enableWGCNAThreads() enableWGCNAThreads() Rdata_dir='./Rdata/' Figure_dir='./figures/' load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata') ) load( file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata') ) expr_matirx=merge(meta,expr_matirx,by="sample")[,-(2:10)] rownames(expr_matirx)=expr_matirx[,1] expro=t(expr_matirx[,-1]) datTraits=data.frame(meta$vital_status,meta$gender,meta$grade,meta$type,meta$age_group) rownames(datTraits)=meta[,1] head(datTraits) datTraits$gsm=meta[,1] #######################################Screening for the top 25% of the variance gene m.vars=apply(expro, 1,var) expro.upper=expro[which(m.vars>quantile(m.vars, probs = seq( 0, 1, 0.5))[ 2]),] dim(expro.upper) datExpr= as.data.frame(t(expro.upper)); nGenes = ncol(datExpr) nSamples = nrow(datExpr) gsg = goodSamplesGenes(datExpr, verbose = 3); gsg$allOK sampleNames = rownames(datExpr); traitRows = match(sampleNames, datTraits$gsm) rownames(datTraits) = datTraits[traitRows, 6] powers = c(c(1:10), seq(from = 12, to=20, by=2)) # Call the network topology analysis function sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5) # Plot the results: par(mfrow = c(1,2)); cex1 = 0.9; # Scale-free topology fit index as a function of the soft-thresholding power plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence")); text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=cex1,col="red"); # this line corresponds to using an R^2 cut-off of h abline(h=0.90,col="blue") # Mean connectivity as a function of the soft-thresholding power plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity")) text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") #Select beta best_beta=sft$powerEstimate best_beta [1] 10 net = blockwiseModules(datExpr, power = sft$powerEstimate, maxBlockSize = 6000,TOMType = "unsigned", minModuleSize = 30,reassignThreshold = 0, mergeCutHeight = 0.25, numericLabels = TRUE, pamRespectsDendro = FALSE, saveTOMs = TRUE, saveTOMFileBase = "AS-green-FPKM-TOM", verbose = 3) table(net$colors) # Convert labels to colors for plotting mergedColors = labels2colors(net$colors) table(mergedColors) # Plot the dendrogram and the module colors underneath plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]], "Module colors", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05) ## assign all of the gene to their corresponding module ## hclust for the genes. #Understand the number of samples and the number of genes nGenes = ncol(datExpr) nSamples = nrow(datExpr) #First make a systematic clustering tree for the sample datExpr_tree<-hclust(dist(datExpr), method = "average") par(mar = c(0,5,2,0)) plot(datExpr_tree, main = "Sample clustering", sub="", xlab="", cex.lab = 2, cex.axis = 1, cex.main = 1,cex.lab=1) #Add the corresponding color to the sample matrix of the previous construction sample_colors <- numbers2colors(as.numeric(factor(datTraits$meta.vital_status)), colors = c("green","blue"),signed = FALSE) sample_colors <- numbers2colors(,signed = FALSE) sample_colors <- numbers2colors(as.numeric(factor(datTraits$meta.grade)), colors = c("blue","red"),signed = FALSE) sample_colors <- numbers2colors(as.numeric(factor(datTraits$meta.type)), colors = c("grey","blue","red"),signed = FALSE) sample_colors <- numbers2colors(as.numeric(factor(datTraits$meta.age_group)), colors = c("red","green"),signed = FALSE) #Constructing a system clustering tree and trait heat map of 10 samples par(mar = c(1,4,3,1),cex=0.8) plotDendroAndColors(datExpr_tree, sample_colors, groupLabels = colnames(sample), cex.dendroLabels = 0.8, marAll = c(1, 4, 3, 1), cex.rowText = 0.01, main = "Sample dendrogram and trait heatmap") moduleColors <- labels2colors(net$colors) datTraits=data.frame(meta$vital_status,meta$gender,meta$grade,meta$type,meta$age_group) datTraits=data.frame(meta$vital_status) datTraits$meta.gender=gsub("FEMALE","0",datTraits$meta.gender) datTraits$meta.gender=gsub("MALE","1",datTraits$meta.gender) datTraits$meta.grade=gsub("III","3",datTraits$meta.grade) datTraits$meta.grade=gsub("II","2",datTraits$meta.grade) datTraits$meta.type=gsub("Astrocytoma","1",datTraits$meta.type) datTraits$meta.type=gsub("Oligoastrocytoma","2",datTraits$meta.type) datTraits$meta.type=gsub("Oligodendroglioma","3",datTraits$meta.type) datTraits$meta.age_group=gsub("older","2",datTraits$meta.age_group) datTraits$meta.age_group=gsub("younger","1",datTraits$meta.age_group) colnames(datTraits)=c("vital_status","gender","grade","histologic type","age") # Recalculate MEs with color labels MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes MEs = orderMEs(MEs0); ##ME value matrix for modules of different colors (sample vs module) moduleTraitCor = cor(MEs, datTraits , use = "p"); moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples) sizeGrWindow(10,6) # Will display correlations and their p-values textMatrix = paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = ""); dim(textMatrix) = dim(moduleTraitCor) par(mar = c(1,8.5, 3, 3)); # Display the correlation values within a heatmap plot View(textMatrix) moduleTraitCor=moduleTraitCor[,] labeledHeatmap(Matrix = moduleTraitCor,cex.lab.x = 0.8, xLabels = names(datTraits), yLabels = names(MEs), ySymbols = names(MEs), colorLabels = FALSE, colors = blueWhiteRed(50), textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.45,font.lab.x = 0.5,verticalSeparator.col = 1,verticalSeparator.lty = 0.3, zlim = c(-1,1),verticalSeparator.lwd = 0.3, main = paste("Module-trait relationships")) ?labeledHeatmap modNames = substring(names(MEs), 3) geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p")); ## Calculate the Pearson correlation coefficient matrix for each module and gene ## MEs is the value of each module in each sample ## datExpr is the amount of expression of each gene in each sample. MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples)); names(geneModuleMembership) = paste("MM", modNames, sep=""); names(MMPvalue) = paste("p.MM", modNames, sep=""); head(datTraits) ## Only continuous traits can only be calculated ## Here, the variable belonging to the vital_status phenotype is digitized with 0, 1. status = as.data.frame(datTraits[,1]); names(status) = "status" geneTraitSignificance = as.data.frame(cor(datExpr, status, use = "p")); GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples)); names(geneTraitSignificance) = paste("GS.", names(status), sep=""); names(GSPvalue) = paste("p.GS.", names(status), sep="") module = "yellow" column = match(module, modNames); moduleGenes = moduleColors==module; sizeGrWindow(7, 7); par(mfrow = c(1,1)); verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]), abs(geneTraitSignificance[moduleGenes, 1]), xlab = paste("Module Membership in", module, "module"), ylab = "Gene significance for status", main = paste("Module membership vs. gene significance\n"), cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module) #First draw a heat map for all genes nGenes = ncol(datExpr) nSamples = nrow(datExpr) geneTree = net$dendrograms[[1]]; dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = sft$powerEstimate); # 设置power=sft$powerEstimate,最佳beta值,此处是3 plotTOM = dissTOM^7; diag(plotTOM) = NA; ##TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes") #Then randomly select some genes for mapping nSelect = 400 # For reproducibility, we set the random seed set.seed(10); select = sample(nGenes, size = nSelect); selectTOM = dissTOM[select, select]; # There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster. selectTree = hclust(as.dist(selectTOM), method = "average") selectColors = moduleColors[select]; # Open a graphical window sizeGrWindow(9,9) # Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing # the color palette; setting the diagonal to NA also improves the clarity of the plot plotDiss = selectTOM^7; diag(plotDiss) = NA; TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes") #Finally draw the relationship between modules and traits # Recalculate module eigengenes MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes ## Only continuous traits can only be calculated ## Here, the variable belonging to the vital_status phenotype is digitized with 0, 1. head(datTraits) status = as.data.frame(datTraits[,1]); names(status) = "status" # Add the weight to existing module eigengenes MET = orderMEs(cbind(MEs, status)) # Plot the relationships among the eigengenes and the trait sizeGrWindow(5,7.5); par(cex = 0.9) plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle = 90) # Plot the dendrogram sizeGrWindow(6,6); par(cex = 1.0) ## Module clustering diagram plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0), plotHeatmaps = FALSE) # Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot) par(cex = 1.0) ## Traits and module heat map plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2), plotDendrograms = FALSE, xLabelsAngle = 90) ########################################################## Select module module = "yellow"; # Select module probes probes = colnames(datExpr) ## The probe in our example is the gene name. CRTAC1 inModule = (moduleColors==module); modProbes = probes[inModule]; View(modProbes) module = "green"; # Select module probes probes = colnames(datExpr) ## WEE1 inModule = (moduleColors==module); modProbes = probes[inModule]; View(modProbes) module = "black"; # Select module probes probes = colnames(datExpr) ## SEMA4G inModule = (moduleColors==module); modProbes = probes[inModule]; View(modProbes) # Recalculate topological overlap TOM = TOMsimilarityFromExpr(datExpr, power = sft$powerEstimate); # Select module module = "black"; # Select module probes probes = colnames(datExpr) ## View(probes) inModule = (moduleColors==module); modProbes = probes[inModule]; View(modProbes) ## Extract the gene name of the specified module # Select the corresponding Topological Overlap modTOM = TOM[inModule, inModule]; dimnames(modTOM) = list(modProbes, modProbes) vis = exportNetworkToVisANT(modTOM, file = paste("VisANTInput-", module, ".txt", sep=""), weighted = TRUE, threshold = 0) cyt = exportNetworkToCytoscape( modTOM, edgeFile = paste("CytoscapeInputSEMA4G-edges-", paste(module, collapse="-"), ".txt", sep=""), nodeFile = paste("CytoscapeInputSEA4G-nodes-", paste(module, collapse="-"), ".txt", sep=""), weighted = TRUE, threshold = 0.02, nodeNames = modProbes, nodeAttr = moduleColors[inModule] ); ####### # (1) Intramodular connectivity # moduleColors <- labels2colors(net$colors) connet=abs(cor(datExpr,use="p"))^6 Alldegrees1=intramodularConnectivity(connet, moduleColors) head(Alldegrees1) #####Relationship between connectivity within modules and GENE saliency # (2) Relationship between gene significance and intramodular connectivity which.module="green" status= as.data.frame(datTraits[,1]); # change specific names(status) = "status" GS1 = as.numeric(cor(status,datExpr, use="p")) GeneSignificance=abs(GS1) colorlevels=unique(moduleColors) sizeGrWindow(9,6) par(mfrow=c(2,as.integer(0.5+length(colorlevels)/2))) par(mar = c(4,5,3,1)) for (i in c(1:length(colorlevels))) { whichmodule=colorlevels[[i]]; restrict1 = (moduleColors==whichmodule); verboseScatterplot(Alldegrees1$kWithin[restrict1], GeneSignificance[restrict1], col=moduleColors[restrict1], main=whichmodule, xlab = "Connectivity", ylab = "Gene Significance", abline = TRUE) } ######Calculate the connectivity of all genes in the module, and screen hub genes ###(3) Generalizing intramodular connectivity for all genes on the array datKME=signedKME(datExpr, MEs, outputColumnName="MM.") # Display the first few rows of the data frame head(datKME) ##Finding genes with high gene significance and high intramodular connectivity in # interesting modules # abs(GS1)> .9 You can adjust the parameters according to the actual situation. # abs(datKME$MM.black)>.8 At least greater than >0.8 FilterGenes= abs(GS1)> .9 & abs(datKME$MM.black)>.8 table(FilterGenes) ############################################################# plotMEpairs(MEs,y=datTraits$cellType) #par(mfrow=c(3,1), mar=c(1, 2, 4, 1)) # for black module which.module="green"; plotMat(t(scale(datExpr[,moduleColors==which.module ]) ),nrgcols=30,rlabels=T, clabels=T,rcols=which.module, title=which.module ) sizeGrWindow(8,7); which.module="green" ME=MEs[, paste("ME",which.module, sep="")] par(mfrow=c(2,1), mar=c(0.3, 5.5, 3, 2)) plotMat(t(scale(datExpr[,moduleColors==which.module ]) ), nrgcols=30,rlabels=F,rcols=which.module, main=which.module, cex.main=2) par(mar=c(5, 4.2, 0, 0.7)) barplot(ME, col=which.module, main="", cex.main=2, ylab="eigengene expression",xlab="MPP") CRTAC1=read.table(file="CytoscapeInputCRTAC1-edges-yellow.txt",sep="\t",header = T,check.names = F) a=CRTAC1[grep(pattern="CRTAC1",CRTAC1[,1]),] b=CRTAC1[grep(pattern="CRTAC1",CRTAC1[,2]),] c=CRTAC1[grep(pattern="WEE1",CRTAC1[,1]),] d=CRTAC1[grep(pattern="WEE1",CRTAC1[,2]),] e=CRTAC1[grep(pattern="SEMA4G",CRTAC1[,1]),] f=CRTAC1[grep(pattern="SEMA4G",CRTAC1[,2]),] CRTAC1_relate=rbind(a,b,c,d,e,f)[,c(1,2)] CRTAC1_relate=c(CRTAC1_relate$fromNode,CRTAC1_relate$toNode) CRTAC1_relate=as.data.frame(unique(CRTAC1_relate)) names(CRTAC1_relate)=c("symble") write.table(CRTAC1_relate,"CRTAC1_relate.txt",quote = F,sep = "\t",col.names = T,row.names = F) WEE1=read.table(file="CytoscapeInputWEE1-edges-green.txt",sep="\t",header = T,check.names = F) a2=WEE1[grep(pattern="CRTAC1",WEE1[,1]),] b2=WEE1[grep(pattern="CRTAC1",WEE1[,2]),] c2=WEE1[grep(pattern="WEE1",WEE1[,1]),] d2=WEE1[grep(pattern="WEE1",WEE1[,2]),] e2=WEE1[grep(pattern="SEMA4G",WEE1[,1]),] f2=WEE1[grep(pattern="SEMA4G",WEE1[,2]),] WEE1_relate=rbind(a2,b2,c2,d2,e2,f2)[,c(1,2)] WEE1_relate=c(WEE1_relate$fromNode,WEE1_relate$toNode) WEE1_relate=as.data.frame(unique(WEE1_relate)) names(WEE1_relate)=c("symble") write.table(WEE1_relate,"WEE1_relate.txt",quote = F,sep = "\t",col.names = T,row.names = F) SEMA4G=read.table(file="CytoscapeInputSEMA4G-edges-black.txt",sep="\t",header = T,check.names = F) a3=SEMA4G[grep(pattern="CRTAC1",SEMA4G[,1]),] b3=SEMA4G[grep(pattern="CRTAC1",SEMA4G[,2]),] c3=SEMA4G[grep(pattern="WEE1",SEMA4G[,1]),] d3=SEMA4G[grep(pattern="WEE1",SEMA4G[,2]),] e3=SEMA4G[grep(pattern="SEMA4G",SEMA4G[,1]),] f3=SEMA4G[grep(pattern="SEMA4G",SEMA4G[,2]),] SEMA4G_relate=rbind(a3,b3,c3,d3,e3,f3)[,c(1,2)] SEMA4G_relate=c(SEMA4G_relate$fromNode,SEMA4G_relate$toNode) SEMA4G_relate=as.data.frame(unique(SEMA4G_relate)) names(SEMA4G_relate)=c("symble") write.table(SEMA4G_relate,"SEMA4G_relate.txt",quote = F,sep = "\t",col.names = T,row.names = F) merge=rbind(a,b,c,d,e,f,a2,b2,c2,d2,e2,f2,a3,b3,c3,d3,e3,f3) merge2=rbind(SEMA4G,WEE1,CRTAC1) a4=merge2[grep(pattern="CRTAC1",merge2[,1]),] b4=merge2[grep(pattern="CRTAC1",merge2[,2]),] c4=merge2[grep(pattern="WEE1",merge2[,1]),] d4=merge2[grep(pattern="WEE1",merge2[,2]),] e4=merge2[grep(pattern="SEMA4G",merge2[,1]),] f4=merge2[grep(pattern="SEMA4G",merge2[,2]),] merge3=rbind(a4,b4,c4,d4,e4,f4) write.table(merge,"merge.txt",quote = F,sep = "\t",col.names = T,row.names = F) write.table(merge2,"merge2.txt",quote = F,sep = "\t",col.names = T,row.names = F) ##table 1 P tableAge=matrix(c(232,86,224,73),nrow = 2,ncol = 2) chisq.test(tableAge) tableGender=matrix(c(210,63,246,96),nrow = 2,ncol = 2) chisq.test(tableGender) tableGrade=matrix(c(221,90,235,69),nrow = 2,ncol = 2) chisq.test(tableGrade) tableRisk=matrix(c(228,79,228,80),nrow = 2,ncol = 2) chisq.test(tableRisk) tableRT=matrix(c(280,141,176,18),nrow = 2,ncol = 2) chisq.test(tableRT) tableVS=matrix(c(341,77,115,82),nrow = 2,ncol = 2) chisq.test(tableVS) ########### t_value <- (43.4 - 40.7) / sqrt((((456-1)*(13.3^2) + (159-1)*(10.9^2)) / (456+159-2))*(1/456 + 1/159)) t_value ## 456+159-2 p <- pt(t_value, df = 613) p t_value <- (998 - 2024.7) / sqrt((((456-1)*(953.8^2) + (159-1)*(1334.3^2)) / (456+159-2))*(1/456 + 1/159)) t_value ## 456+159-2 p <- pt(t_value, df = 613) p #######################################GO/KEGG rm(list = ls()) library(ggplot2) BiocManager::install("clusterProfiler") BiocManager::install("ggraph") library(clusterProfiler) library(org.Hs.eg.db) BiocManager::install("cowplot") BiocManager::install("GOplot") library("customLayout") library("GOplot") a=read.table(file = "SEMA4G.txt",header = T,sep = "\t",check.names = F) df <- bitr(unique(a$symbol), fromType = "SYMBOL", toType = c( "ENTREZID"), OrgDb = org.Hs.eg.db) head(df) save(df,file = 'anno_DEG.Rdata') gene_up= df[,'ENTREZID'] grobs=list() go1<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='CC',pAdjustMethod = 'BH',pvalueCutoff = 0.05, qvalueCutoff = 0.05,keyType = 'ENTREZID') head(go1) barplot(go1,showCategory=5,title ="CC") dotplot() go2<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='BP',pAdjustMethod = 'BH',pvalueCutoff = 0.05, qvalueCutoff = 0.05,keyType = 'ENTREZID') barplot(go2,showCategory=5,title="BP") grob=list() go3<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='MF',pAdjustMethod = 'BH',pvalueCutoff = 0.05, qvalueCutoff = 0.05,keyType = 'ENTREZID') barplot(go3,showCategory=5,title = "MF") dim(go) dim(go[go$ONTOLOGY=='BP',]) dim(go[go$ONTOLOGY=='CC',]) dim(go[go$ONTOLOGY=='MF',]) ######################################################################### rm(list = ls()) library(ggplot2) BiocManager::install("clusterProfiler") BiocManager::install("ggraph") library(clusterProfiler) library(org.Hs.eg.db) BiocManager::install("cowplot") BiocManager::install("GOplot") library("customLayout") library("GOplot") ######### file "go-kegg.txt" is a collection of "SEMA4G_relate.txt"、“WEE1_relate.txt”、"CRTAC1_relate.txt" a=read.table(file = "go-kegg.txt",header = T,sep = "\t",check.names = F) names(a)=c("symbol") df <- bitr(unique(a$symbol), fromType = "SYMBOL", toType = c( "ENTREZID"), OrgDb = org.Hs.eg.db) head(df) save(df,file = 'anno_DEG-CRTAC1.Rdata') gene_up= df[,'ENTREZID'] grobs=list() go1<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='CC',pAdjustMethod = 'BH',pvalueCutoff = 0.05, qvalueCutoff = 0.05,keyType = 'ENTREZID') head(go) barplot(go1,showCategory=5,title ="CC") dotplot() go2<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='BP',pAdjustMethod = 'BH',pvalueCutoff = 0.05, qvalueCutoff = 0.05,keyType = 'ENTREZID') barplot(go2,showCategory=5,title="BP") grob=list() go3<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='MF',pAdjustMethod = 'BH',pvalueCutoff = 0.05, qvalueCutoff = 0.05,keyType = 'ENTREZID') barplot(go3,showCategory=5,title = "MF") dim(go) dim(go[go$ONTOLOGY=='BP',]) dim(go[go$ONTOLOGY=='CC',]) dim(go[go$ONTOLOGY=='MF',]) kegg<- enrichKEGG(gene_up, organism = 'hsa', keyType = 'kegg', pvalueCutoff = 0.05,pAdjustMethod = 'BH', qvalueCutoff = 0.05,use_internal_data = FALSE) head(kegg) dotplot(kegg, showCategory=5,title = "KEGG")