rm(list=ls())
options(stringsAsFactors = F)
library("dplyr")
getwd()
Rdata_dir='./Rdata'
Figure_dir='./Figure'
####Reading expression information
miRNA_GA=read.table("HiSeqV2.gz",header = T,sep = '\t',check.names = F)
dim(miRNA_GA)
miRNA_GA[1:4,1:4]
na.omit(miRNA_GA)[1:4,1:4]
dim(na.omit(miRNA_GA))

#####remove duplicate values of expression matrix
expr=t(miRNA_GA)
write.table(expr,"expr.txt",quote = F,sep = "\t",col.names = F,row.names = T)
z=read.table(file = "expr.txt",sep = "\t",header = T,check.names = F)
expr=z[,-1]
sample=substr(z$sample,1,12)    ####Intercept the first 12 characters of ID
expr=cbind(sample,expr)
dim(expr)
d=expr[!duplicated(expr$sample),]
dim(d)
expr_matirx=d



####Reading clinical information


miRNA_HiSeq=read.table("LGG_clinicalMatrix.gz" ,header = T,sep = '\t')
dim(miRNA_HiSeq)
miRNA_HiSeq[1:4,1:4]
na.omit(miRNA_HiSeq)[1:4,1:4]
dim(na.omit(miRNA_HiSeq))
####Extract the column of clinical information of interest and remove the duplicate barcode
colnames(miRNA_HiSeq)

phe=miRNA_HiSeq[,-1]
sample=substr(miRNA_HiSeq$sampleID,1,12)    ####Intercept the first 12 characters of ID
d=cbind(sample,phe)
colnames(d)
meta=as.data.frame(d[c("sample","X_EVENT","OS.time","age_at_initial_pathologic_diagnosis",
                       "gender","histological_type","neoplasm_histologic_grade","radiation_therapy",
                       "targeted_molecular_therapy","vital_status")])
dim(meta)
meta=(meta[complete.cases(meta[,2:10]),])
dim(meta)
table(meta$X_EVENT)
meta=na.omit(meta)
dim(meta)
table(duplicated(meta$sample))
unique_sample=unique(meta$sample)           




unique_sample_matrix=as.data.frame(unique_sample)      #####create a matrix and assign the column name
colnames(unique_sample_matrix)="sample"

meta=meta[!duplicated(meta$sample),]       ###########Remove the duplicates in the sample column of meta matrix 
dim(meta)                                                

table(meta[,1:ncol(meta)])
meta=meta[grep(pattern="G2|G3",meta[,7]),]    ####Extract the seventh column of the meta containing "G2 or G3" characters
dim(meta)
meta=meta[grep(pattern="YES|NO",meta[,8]),]    ####Extract the eighth column of the meta containing "YES or NO" characters
meta=meta[grep(pattern="YES|NO",meta[,9]),] ####Extract the ninth column of the meta containing "YES or NO" characters
dim(meta)

meta_expr=merge(meta,expr_matirx,by="sample")  ###########Integrate expression matrix and clinical information through the same column named "sample"

save(expr_matirx,meta,meta_expr,
     file = file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')
) ###################Store three files named 'TCGA-LGG-RNAseq-example.Rdata'

###############
rm(list=ls())
options(stringsAsFactors = F)  


Rdata_dir='./Rdata'
Figure_dir='./Figure'       
load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata'))  ###Load stored files named 'TCGA-LGG-RNAseq-example.Rdata'

write.table(meta,"meta.txt",quote = F,sep = "\t",row.names = F)
#######Name the column of the clinical data and sorting the data
meta=meta[,-10] ########## Delete the 10th column of meta
colnames(meta)=c("sample","vital_status","OS.time","age","gender","type",
                 "grade","radition","molecular_therapy")   #####Name the columns one after the other

meta$age_group=ifelse(meta$age>median(meta$age),'older','younger') ####Group ages based on median and rebuild a column named age_group

meta$grade=gsub("G2","II",meta$grade)   #####Replace G2 with II in the "grade" column of meta
meta$grade=gsub("G3","III",meta$grade)   #####Replace G3 with III in the "grade" column of meta
meta_expr=merge(meta,expr_matirx,by="sample")   ###########Integrate expression matrix and clinical information through the same column named "sample"
save(meta,meta_expr,expr_matirx,
     file = 
       file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')   ####Store three files named 'TCGA-LGG-RNAseq-example.Rdata'
)


#######
rm(list=ls())
options(stringsAsFactors = F)

Rdata_dir='./Rdata'
Figure_dir='./Figure'


load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata'))

Ued_exprclinical=merge(meta,expr,by="sample")
z=Ued_exprclinical[,-(2:10)]     ####Delete the 2th to 10th column of meta

z=t(z)
write.table(z,"z.txt",quote = F,sep = "\t",col.names = F,row.names = T)
z=read.table(file = "z.txt",sep = "\t",header = T,check.names = F)







rm(list=ls())
options(stringsAsFactors = F)



Rdata_dir='./Rdata'
Figure_dir='./Figure'
load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')
)


Ued_exprclinical=meta_expr
z=Ued_exprclinical[,-(2:10)]     ####Delete the 2th to 10th column of meta

z=t(z)
write.table(z,"z.txt",quote = F,sep = "\t",col.names = F,row.names = T)
z=read.table(file = "z.txt",sep = "\t",header = T,check.names = F)

library('rbsurv')
###Robust test
x=as.matrix(z[,2:ncol(z)])
time <- as.numeric(Ued_exprclinical$OS.time)
View(time)
status <- as.numeric(Ued_exprclinical$vital_status)
####riskfactor=as.matrix(Ued_exprclinical[,2:8])         
c=z[,1]
fit <-rbsurv(time=time, status=status, x=x,method="efron", max.n.genes=30, n.iter = 10,n.fold = 6,n.seq = 3,seed = 100,gene.ID =c)   



fit$model
dim(fit$model)
write.table(fit$model,"rbsurv.txt",quote = F,sep = "\t",row.names = F)
######Extract the tagged gene name from file "rbsurv.txt",and name the column name as sample,store the file and name it "TCGA-LGG-rbsurv_result.txt"
rbsurv_result=read.table(file = "TCGA-LGG-rbsurv_result.txt",sep = "\t",header = T,check.names = F)

Rdata_dir='./Rdata/'
Figure_dir='./figures/'
save(rbsurv_result,z,
     file = 
       file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata')
)

####lasso regression
Rdata_dir='./Rdata/'
Figure_dir='./figures/'

load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata')
)
load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')
)


d=merge(rbsurv_result,z,by="sample")
d1=t(d)
write.table(d1,"d1.txt",quote = F,sep = "\t",col.names = F,row.names = T)
d2=read.table(file = "d1.txt",sep = "\t",header = T,check.names = F)

lasso_input=merge(meta,d2,by="sample")


library(glmnet)
library(survival)
miRNA=lasso_input
###miRNAEXP=log2(miRNA[,3:ncol(miRNA)]+1) 
###miRNA=cbind(miRNA[,1:2],miRNAEXP)

miRNA[,"OS.time"]=miRNA[,"OS.time"]/365
v1<-as.matrix(miRNA[,c(11:ncol(miRNA))])
v2 <- as.matrix(Surv(miRNA$OS.time,miRNA$vital_status))

myfit <- glmnet(v1, v2, family = "cox")
pdf("lambda.pdf")
plot(myfit, xvar = "lambda", label = TRUE)
dev.off()
dev.new()


myfit2 <- cv.glmnet(v1, v2, family="cox")
pdf("min.pdf")
plot(myfit2)
abline(v=log(c(myfit2$lambda.min,myfit2$lambda.1se)),lty="dashed")
dev.off()

myfit2$lambda.min
coe <- coef(myfit, s = myfit2$lambda.min)
act_index <- which(coe != 0)
act_coe <- coe[act_index]
row.names(coe)[act_index]
lasso_output=as.matrix(row.names(coe)[act_index])
colnames(lasso_output)="sample"
multcox_preinput=merge(d,lasso_output,by="sample")

multcox_preinput=t(multcox_preinput)
write.table(multcox_preinput,"multcox.txt",quote = F,sep = "\t",col.names = F,row.names = T)
multicox=read.table(file = "multcox.txt",sep = "\t",header = T,check.names = F)


save(multicox,meta,
     file = 
       file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata')
)



#####multivariable analysis
Rdata_dir='./Rdata/'
Figure_dir='./figures/'

load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata')
)

multicox_input=merge(meta,multicox,by="sample")
mRNA=multicox_input
write.table(mRNA,file = "mRNA.txt",quote = F,sep = "\t",row.names = F)
miRNA<-read.table(file="mRNA.txt",header=T,sep="\t",row.names = 1,check.names = F,stringsAsFactors = F) 

colnames(miRNA)
fmla1 <- as.formula(Surv(OS.time,vital_status)~BMP2+CCDC46+CMYA5+CRTAC1+EMP3+FBXO39+IGF2BP3
                    +PTGFRN+RANBP17+SEMA4G+SHOX2+SMC4+WEE1)
mycox <- coxph(fmla1,data=miRNA)


summary(mycox)

####Select the selected genes for the rise score
fmla1 <- as.formula(Surv(OS.time,vital_status)~SEMA4G+CRTACm1
                    +WEE1)

mycox <- coxph(fmla1,data=miRNA)
risk_score<-predict(mycox,type="risk",newdata=miRNA)
risk_level<-as.factor(ifelse(risk_score>median(risk_score),"High","Low"))
write.table(cbind(id=rownames(cbind(miRNA[,1:9],risk_score,risk_level)),cbind(miRNA[,1:9],risk_score,risk_level)),"risk_score.txt",sep="\t",quote=F,row.names=F) 



####Positioning gene locations and  survival analysis
rm(list=ls())
options(stringsAsFactors = F)
Rdata_dir='./Rdata/'
Figure_dir='./figures/'

load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata')
)
load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')
)
library(survival)
library(survminer)
BiocManager::install("dplyr")
p=c(colnames(multicox))
grep("WEE1",p)
grep("CRTAC1",p)
grep("SEMA4G",p)

multicox2=multicox[,c(1,14,5,11)]

inputmiRNA=merge(meta[,c(1,2,3)],multicox2,by="sample")

risk_score=read.table(file = "risk_score.txt",sep = "\t",header = T,check.names = F)
inputmiRNA$OS.time=inputmiRNA$OS.time/365
risk_score$OS.time=risk_score$OS.time/365

group1<- ifelse(inputmiRNA$WEE1>median(inputmiRNA$WEE1),'high','low')
sfit1<- survfit(Surv(OS.time,vital_status) ~ group1, data = inputmiRNA)

group2<- ifelse(inputmiRNA$CRTAC1>median(inputmiRNA$CRTAC1),'high','low')
sfit2=survfit(Surv(OS.time,vital_status) ~ group2, data = inputmiRNA)

group3<- ifelse(inputmiRNA$SEMA4G>median(inputmiRNA$SEMA4G),'high','low')
sfit3=survfit(Surv(OS.time,vital_status) ~ group3, data = inputmiRNA)




sfit4=survfit(Surv(OS.time,vital_status) ~ risk_level, data = risk_score)
########################################################## stratified analysis
strata=list()
younger=risk_score[grep(pattern="younger",risk_score[,10]),] 

sfit5=survfit(Surv(OS.time,vital_status) ~ risk_level, data = younger)

ggsurvplot(sfit5,data = younger,
           surv.median.line = "hv",
           legend.title="Three-genes Signature",xlab ="Time(years)",
           conf.int = TRUE,
           pval = TRUE,palette = c("orchid2","dodgerblue2"),
           ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Younger",legend.labs=c("High risk","Low risk"))

old=risk_score[grep(pattern="old",risk_score[,10]),] 

sfit6=survfit(Surv(OS.time,vital_status) ~ risk_level, data = old)

ggsurvplot(sfit6,data = old,
           surv.median.line = "hv",
           legend.title="Three-genes Signature",xlab ="Time(years)",
           conf.int = TRUE,
           pval = TRUE,palette = c("orchid2","dodgerblue2"),
           ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Old",legend.labs=c("High risk","Low risk"))


male=risk_score[grep(pattern="MALE",risk_score[,5]),] 

sfit7=survfit(Surv(OS.time,vital_status) ~ risk_level, data = male)

ggsurvplot(sfit7,data = male,
           surv.median.line = "hv",
           legend.title="Three-genes Signature",xlab ="Time(years)",
           conf.int = TRUE,
           pval = TRUE,palette = c("orchid2","dodgerblue2"),
           ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Male",legend.labs=c("High risk","Low risk"))


female=risk_score[grep(pattern="FEMALE",risk_score[,5]),] 

sfit8=survfit(Surv(OS.time,vital_status) ~ risk_level, data = female)

ggsurvplot(sfit8,data = female,
           surv.median.line = "hv",
           legend.title="Three-genes Signature",xlab ="Time(years)",
           conf.int = TRUE,
           pval = TRUE,palette = c("orchid2","dodgerblue2"),
           ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Female",legend.labs=c("High risk","Low risk"))


G3=risk_score[grep(pattern="III",risk_score[,7]),] 

sfit9=survfit(Surv(OS.time,vital_status) ~ risk_level, data = G3)

ggsurvplot(sfit9,data = G3,
           surv.median.line = "hv",
           legend.title="Three-genes Signature",xlab ="Time(years)",
           conf.int = TRUE,
           pval = TRUE,palette = c("orchid2","dodgerblue2"),
           ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Grade III",legend.labs=c("High risk","Low risk"))

risk_score$grade=gsub("III","3",risk_score$grade)
G2=risk_score[grep(pattern="II",risk_score[,7]),]

sfit10=survfit(Surv(OS.time,vital_status) ~ risk_level, data = G2)

ggsurvplot(sfit10,data = G2,
           surv.median.line = "hv",
           legend.title="Three-genes Signature",xlab ="Time(years)",
           conf.int = TRUE,
           pval = TRUE,palette = c("orchid2","dodgerblue2"),
           ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs-Grade II",legend.labs=c("High risk","Low risk"))


multicox_meta=merge(meta,multicox,by="sample")
##################Draw ROC curve of our study
library(lars) 
library(glmnet) 
b=multicox_meta[,c("sample","WEE1","SEMA4G","CRTAC1")]
rownames(b)=b[,1]
x=b[,2:4]
x=as.matrix(x)
y=multicox_meta$vital_status 
cv_fit <- cv.glmnet(x=x, y=y, alpha = 1, nlambda = 1000) 
lasso.prob <- predict(cv_fit, newx=x , s=c(cv_fit$lambda.min,cv_fit$lambda.1se) )

new_dat=multicox_meta
library(timeROC)
new_dat$fp=as.numeric(lasso.prob[,1])
new_dat$OS.time=new_dat$OS.time/30
with(new_dat,
     ROC <<- timeROC(T=OS.time,#End time 
                     delta=vital_status,#Survival outcome 
                     marker=fp,#Predictor
                     cause=1,#Positive outcome assignment, such as death or not
                     weighting="marginal",#Weight calculation method, marginal is the default value, using km to calculate the censored distribution
                     times=c(24,60),# Time point, choose 2 years and 8 years survival rate
                     ROC = T,
                     iid = TRUE)
)

plot(ROC,time=24,col = "blue",add =FALSE,title=F)
#Add means whether to add in the previous picture
plot(ROC,time=60,col = "red",add = T)
legend("right",c("2 years AUC=0.910","5 years AUC=0.828"),col=c("blue","red"),lwd=2)
title(main = list("TCGA-LGGs ROC for the three genes", cex = 1.3,
                  col = "black", font = 3))

ROC$AUC
confint(ROC)


splots <- list()
splots[[1]] ggsurvplot(sfit1,data = inputmiRNA,
                       surv.median.line = "hv",
                       legend.title="WEE1",xlab ="Time(years)",
                       conf.int = TRUE,
                       pval = TRUE,palette = c("orchid2","dodgerblue2"),
                       ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA-LGGs",legend.labs=c("High","Low"))
splots[[2]] <- ggsurvplot(sfit2,data = inputmiRNA,
                          surv.median.line = "hv",
                          legend.title="CRTAC1",xlab ="Time(years)",
                          conf.int = TRUE,
                          pval = TRUE,palette = c("orchid2","dodgerblue2"),
                          ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA-LGGs",legend.labs=c("High","Low"))
splots[[3]] <- ggsurvplot(sfit3,data = inputmiRNA,
                          surv.median.line = "hv",
                          legend.title="SEMA4G",xlab ="Time(years)",
                          conf.int = TRUE,
                          pval = TRUE,palette = c("orchid2","dodgerblue2"),
                          ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA-LGGs",legend.labs=c("High","Low"))


ggsurvplot(sfit4,data = risk_score,
           surv.median.line = "hv",
           legend.title="Three-genes Signature",xlab ="Time(years)",
           conf.int = TRUE,
           pval = TRUE,palette = c("orchid2","dodgerblue2"),
           ggtheme = theme_bw()+theme(plot.title = element_text(hjust = 0.5)) ,title = "TCGA LGGs",legend.labs=c("High risk","Low risk"))

# Arrange multiple ggsurvplots and print the output
arrange_ggsurvplots(splots, print = TRUE,  ncol = 1, nrow = 3)
dev.off()



########################################
#######risk_distribution
rm(list=ls())
options(stringsAsFactors = F)
Rdata_dir='./Rdata/'
Figure_dir='./figures/'

load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata')
)
load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')
)

new_dat=merge(meta,multicox,by="sample")

colnames(new_dat) 
s=Surv(OS.time,vital_status) ~ WEE1+CRTAC1+SEMA4G
model <- coxph(s, data = new_dat )
summary(model,data=new_dat)
options(scipen=1)
ggforest(model, data =new_dat, 
         main = "Hazard ratio", 
         cpositions = c(0.10, 0.22, 0.4), 
         fontsize = 1.0, 
         refLabel = "1", noDigits = 4)




fp <- predict(model,new_dat,type="risk");boxplot(fp)
fp <- predict(model,new_dat,type="expected") ;boxplot(fp)
plot(fp,phe$days)
fp <- predict(model,new_dat) ;boxplot(fp)
basehaz(model) 
library(Hmisc)
options(scipen=200)
with(new_dat,rcorr.cens(fp,Surv(OS.time,vital_status)  ))
# http://dni-institute.in/blogs/cox-regression-interpret-result-and-predict/
###############################risk_distribution!!!!!
library(cowplot)
library(pheatmap)
# https://cran.r-project.org/web/packages/cowplot/vignettes/introduction.html
fp_dat=data.frame(s=1:length(fp),v=as.numeric(sort(fp )))

d=rbind(data.frame(fp_dat[1:228,],Type=as.factor("Low risk")),data.frame(fp_dat[229:456,],Type=as.factor("High risk")))

sur_dat=data.frame(s=1:length(fp),
                   t=meta[names(sort(fp )),'OS.time'] ,
                   e=meta[names(sort(fp )),'vital_status']  ) 

sur_dat=na.omit(sur_dat)
sur_dat$e=ifelse(sur_dat$e==0,'alive','death')
exp_dat=new_dat[names(sort(fp )),c(14,23,20)]             ####Column number of the gene of interest


plot.point=ggplot(d,aes(x=s,y=v,shape=Type,color=Type))+labs(title =" ")+
  theme(plot.title=element_text(hjust=0.5))+scale_shape_manual(values=c(7,1))+geom_point()+theme_bw()+xlab("Number of Patients")+ylab("Risk score");print(plot.point)






plot.sur=ggplot(sur_dat,aes(x=s,y=t))+theme(legend.title=element_blank())+theme_bw()+geom_point(aes(col=e))+xlab("Number of Patients")+ylab("Survival time(Day)");print(plot.sur)

mycolors <- colorRampPalette(c("navy", "white", "firebrick3"))(256)
tmp=t(scale(exp_dat))
range(tmp)
tmp[tmp > 1] = 1
tmp[tmp < -1] = -1

annotation_col=data.frame(risk_score$risk_level)
names(annotation_col)="Group"

plot.h=pheatmap(tmp,col= mycolors,show_colnames = F,cluster_cols = F,cluster_rows = F,clustering_distance_rows = "correlation")
plot.h=pheatmap(tmp,col= mycolors,legend=T,show_colnames = F,annotation_names_col=F,annotation_legend=T,annotation_col=annotation_col,cluster_cols = F,cluster_rows = F)
plot.h$gtable


plot_grid(plot.point, plot.sur, plot.h$gtable,
          
          align = 'v',ncol = 1)  
ggarrange(plot.point, plot.sur, plot.h$gtable,
          labels = c("A", "B","C"),
          align = 'v',ncol = 1)



#####################
########boxplot
rm(list=ls())
options(stringsAsFactors = F)
BiocManager::install("tidyverse")
BiocManager::install("dplyr")
library("tidyverse")
library("tidyr")
library("dbplyr")

Rdata_dir='./Rdata/'
Figure_dir='./figures/'

load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata')
)
load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')
)

multicox_meta=merge(meta,multicox,by="sample")

dat=data.frame(WEE1=multicox_meta$WEE1,CRTAC1=multicox_meta$CRTAC1,SEMA4G=multicox_meta$SEMA4G,
               Grade=multicox_meta$grade)
save(dat,file = 'data_for_boxplot.Rdata')
head(dat)

drop_outliers=function(df_name,dep_col){
  a=df_name[,dep_col]
  iqr=IQR(a)
  q1=as.numeric(quantile(a,0.25))
  q3=as.numeric(quantile(a,0.75))
  bottem=q1-1.5*iqr
  top=q3+1.5*iqr
  df_return=df_name %>%
    filter(df_name[,dep_col]>=bottem & df_name[,dep_col]<=top)
  return(df_return)
}
mydat=drop_outliers(dat,"WEE1") %>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")%>% drop_outliers("WEE1")

mydat=drop_outliers(mydat,"CRTAC1") %>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")%>% drop_outliers("CRTAC1")

mydat=drop_outliers(mydat,"SEMA4G") %>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")%>% drop_outliers("SEMA4G")



mydat=gather(dat,key="Gene",value="Expression",WEE1:SEMA4G)
mydat=mydat[order(mydat[,1]),]


compare_means( Expression ~ Grade, data = mydat, 
               group.by = "Gene")
# Box plot facetted by "gene"
p=ggboxplot(mydat, x = "Grade", y = "Expression",
            color = "Grade", palette = "jco",
            add = "jitter",title = "TCGA LGGs",
            facet.by = "Gene", short.panel.labs = FALSE)+theme(plot.title=element_text(hjust=0.5))
# Use only p.format as label. Remove method name.
p + stat_compare_means()


ggboxplot(mydat, x = "gene", y = "Expression",
          color = "Grade", palette = "nejm",
          add = "jitter")
p + stat_compare_means(aes(Grade = Grade))








if(require('ggpubr')){
  library(ggpubr)
  
  # google search : ggpubr boxplot add p-value
  # http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
  p1 <- ggboxplot(dat, x = "Grade", y = "WEE1",
                  color = "#00AFBB", palette = "jco",
                  add = "jitter")
  #  Add p-value
  p1 + stat_compare_means()
}

if(require('ggpubr')){
  library(ggpubr)
  
  # google search : ggpubr boxplot add p-value
  # http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
  p2 <- ggboxplot(dat2, x = "Grade", y = "CRTAC1",
                  color = "#E7B800", palette = "jco",
                  add = "jitter")
  #  Add p-value
  p + stat_compare_means()
}

##########################
#########ROC


rm(list=ls())
options(stringsAsFactors = F)

Rdata_dir='./Rdata/'
Figure_dir='./figures/'

load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata')
)
load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')
)

multicox_meta=merge(meta,multicox,by="sample")

library(lars) 
library(glmnet) 
library(survival)
library(survminer)
library(timeROC)

multicox_input=merge(meta,multicox,by="sample")
mRNA=multicox_input
write.table(mRNA,file = "mRNA.txt",quote = F,sep = "\t",row.names = F)
miRNA<-read.table(file="mRNA.txt",header=T,sep="\t",row.names = 1,check.names = F,stringsAsFactors = F) 

colnames(miRNA)

####Select the selected genes for the rise score
fmla1 <- as.formula(Surv(OS.time,vital_status)~SEMA4G+CRTAC1
                    +WEE1)

mycox <- coxph(fmla1,data=miRNA)
meta$risk_score<-predict(mycox,type="risk",newdata=miRNA)

library("survivalROC")
cutoff <- 365
ROC1= survivalROC(Stime=meta$OS.time, 
                  status=meta$vital_status,      
                  marker = meta$risk_score,     
                  predict.time =  cutoff, method="KM")


cutoff <- 1095

ROC3= survivalROC(Stime=meta$OS.time, 
                  status=meta$vital_status,      
                  marker = meta$risk_score,     
                  predict.time =  cutoff, method="KM")

cutoff <- 1825
ROC5= survivalROC(Stime=meta$OS.time, 
                  status=meta$vital_status,      
                  marker = meta$risk_score,     
                  predict.time =  cutoff, method="KM")

plot(ROC1$FP, ROC1$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1),   
     xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), 
     ylab="TP",main="Time dependent ROC")
abline(0,1)
lines(ROC3$FP, ROC3$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1))
lines(ROC5$FP, ROC5$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1))
legend(0.5,0.4,c(paste("AUC at 1 year =",round(ROC1$AUC,3)),paste("AUC at 3 years =",round(ROC3$AUC,3)),paste("AUC at 5 years =",round(ROC5$AUC,3))),x.intersp=1, y.intersp=0.8,
       lty= 1, lwd= 2, col=c("red","blue","yellow"),
       bty = "n",
       seg.len=1, cex=0.8) #
#####################################################################Draw the ROC curve of the comparative literature
##Integrative Analysis of DNA Methylation and Gene Expression Identify a ThreeGene Signature for Predicting Prognosis in Lower-Grade Gliomas
meta$risk_score2=0.597*meta_expr$EMP3+0.825*meta_expr$GSX2+0.942*meta_expr$EMILIN3

cutoff <- 365
ROC1b= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score2,     
                   predict.time =  cutoff, method="KM")


cutoff <- 1095

ROC3b= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score2,     
                   predict.time =  cutoff, method="KM")

cutoff <- 1825
ROC5b= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score2,     
                   predict.time =  cutoff, method="KM")

plot(ROC1b$FP, ROC1b$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1),   
     xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), 
     ylab="TP",main="Time dependent ROC")
abline(0,1)
lines(ROC3b$FP, ROC3b$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1))
lines(ROC5b$FP, ROC5b$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1))
legend(0.6,0.4,c(paste("AUC at 1 year =",round(ROC1b$AUC,3)),paste("AUC at 3 years =",round(ROC3b$AUC,3)),paste("AUC at 5 years =",round(ROC5b$AUC,3))),x.intersp=1, y.intersp=0.8,
       lty= 1, lwd= 2, col=c("red","blue","yellow"),
       bty = "n",
       seg.len=1, cex=0.8) #
##############################################################Draw the ROC curve of the comparative literature
###Comprehensive analysis of genes based on chr1p/ 19q co-deletion reveals a robust 4-gene prognostic signature for lower grade glioma

meta$risk_score3=0.012*meta_expr$IFI44+0.068*meta_expr$KIF2C+0.035*meta_expr$GNG12+0.02*meta_expr$EMP3

cutoff <- 365
ROC1c= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score3,     
                   predict.time =  cutoff, method="KM")


cutoff <- 1095

ROC3c= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score3,     
                   predict.time =  cutoff, method="KM")

cutoff <- 1825
ROC5c= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score3,     
                   predict.time =  cutoff, method="KM")

plot(ROC1c$FP, ROC1c$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1),   
     xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), 
     ylab="TP",main="Time dependent ROC")
abline(0,1)
lines(ROC3c$FP, ROC3c$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1))
lines(ROC5c$FP, ROC5c$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1))
legend(0.6,0.4,c(paste("AUC at 1 year =",round(ROC1c$AUC,3)),paste("AUC at 3 years =",round(ROC3c$AUC,3)),paste("AUC at 5 years =",round(ROC5c$AUC,3))),x.intersp=1, y.intersp=0.8,
       lty= 1, lwd= 2, col=c("red","blue","yellow"),
       bty = "n",
       seg.len=1, cex=0.8) #

##############################################################################

###############Draw a comparison ROC
rm(list=ls())
options(stringsAsFactors = F)

Rdata_dir='./Rdata/'
Figure_dir='./figures/'

load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata')
)
load( file = 
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')
)

multicox_meta=merge(meta,multicox,by="sample")

library(lars) 
library(glmnet) 
library(survival)
library(survminer)
library(timeROC)

multicox_input=merge(meta,multicox,by="sample")
mRNA=multicox_input
write.table(mRNA,file = "mRNA.txt",quote = F,sep = "\t",row.names = F)
miRNA<-read.table(file="mRNA.txt",header=T,sep="\t",row.names = 1,check.names = F,stringsAsFactors = F) 

colnames(miRNA)


fmla1 <- as.formula(Surv(OS.time,vital_status)~SEMA4G+CRTAC1
                    +WEE1)

mycox <- coxph(fmla1,data=miRNA)
meta$risk_score<-predict(mycox,type="risk",newdata=miRNA)
########################### 1 year ROC comparison
library("survivalROC")
cutoff <- 365
ROC1= survivalROC(Stime=meta$OS.time, 
                  status=meta$vital_status,      
                  marker = meta$risk_score,     
                  predict.time =  cutoff, method="KM")

meta$risk_score2=0.597*meta_expr$EMP3+0.825*meta_expr$GSX2+0.942*meta_expr$EMILIN3

cutoff <- 365
ROC1b= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score2,     
                   predict.time =  cutoff, method="KM")

meta$risk_score3=0.012*meta_expr$IFI44+0.068*meta_expr$KIF2C+0.035*meta_expr$GNG12+0.02*meta_expr$EMP3

cutoff <- 365
ROC1c= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score3,     
                   predict.time =  cutoff, method="KM")

plot(ROC1$FP, ROC1$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1),   
     xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), 
     ylab="TP",main="Time dependent ROC 1 year")
abline(0,1)
lines(ROC1b$FP, ROC1b$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1))
lines(ROC1c$FP, ROC1c$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1))
legend(0.5,0.4,c(paste("AUC at our study =",round(ROC1$AUC,3)),paste("AUC at Chen X.P et al =",round(ROC1b$AUC,3)),paste("AUC at Chuang Zhang et al =",round(ROC1c$AUC,3))),x.intersp=1, y.intersp=0.8,
       lty= 1, lwd= 2, col=c("red","blue","yellow"),
       bty = "n",
       seg.len=1, cex=0.8) 
####################5 years ROC comparison

cutoff <- 1825
ROC5= survivalROC(Stime=meta$OS.time, 
                  status=meta$vital_status,      
                  marker = meta$risk_score,     
                  predict.time =  cutoff, method="KM")

meta$risk_score2=0.597*meta_expr$EMP3+0.825*meta_expr$GSX2+0.942*meta_expr$EMILIN3

cutoff <- 1825
ROC5b= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score2,     
                   predict.time =  cutoff, method="KM")

meta$risk_score3=0.012*meta_expr$IFI44+0.068*meta_expr$KIF2C+0.035*meta_expr$GNG12+0.02*meta_expr$EMP3

cutoff <- 1825
ROC5c= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score3,     
                   predict.time =  cutoff, method="KM")

plot(ROC5$FP, ROC5$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1),   
     xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), 
     ylab="TP",main="Time dependent ROC 5 years")
abline(0,1)
lines(ROC5b$FP, ROC5b$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1))
lines(ROC5c$FP, ROC5c$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1))
legend(0.5,0.4,c(paste("AUC at our study =",round(ROC5$AUC,3)),paste("AUC at Chen X.P et al =",round(ROC5b$AUC,3)),paste("AUC at Chuang Zhang et al =",round(ROC5c$AUC,3))),x.intersp=1, y.intersp=0.8,
       lty= 1, lwd= 2, col=c("red","blue","yellow"),
       bty = "n",
       seg.len=1, cex=0.8) 
#########################################################3 years ROC comparison
cutoff <- 1095
ROC3= survivalROC(Stime=meta$OS.time, 
                  status=meta$vital_status,      
                  marker = meta$risk_score,     
                  predict.time =  cutoff, method="KM")

meta$risk_score2=0.597*meta_expr$EMP3+0.825*meta_expr$GSX2+0.942*meta_expr$EMILIN3

cutoff <- 1095
ROC3b= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score2,     
                   predict.time =  cutoff, method="KM")

meta$risk_score3=0.012*meta_expr$IFI44+0.068*meta_expr$KIF2C+0.035*meta_expr$GNG12+0.02*meta_expr$EMP3

cutoff <- 1095
ROC3c= survivalROC(Stime=meta$OS.time, 
                   status=meta$vital_status,      
                   marker = meta$risk_score3,     
                   predict.time =  cutoff, method="KM")

plot(ROC3$FP, ROC3$TP, type="l",col="red", xlim=c(0,1), ylim=c(0,1),   
     xlab=paste( "FP", "\n", "AUC = ",round(ROC1$AUC,3)), 
     ylab="TP",main="Time dependent ROC 3 years")
abline(0,1)
lines(ROC3b$FP, ROC3b$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1))
lines(ROC3c$FP, ROC3c$TP, type="l",col="yellow",xlim=c(0,1), ylim=c(0,1))
legend(0.5,0.4,c(paste("AUC at our study =",round(ROC3$AUC,3)),paste("AUC at Chen X.P et al =",round(ROC3b$AUC,3)),paste("AUC at Chuang Zhang et al =",round(ROC3c$AUC,3))),x.intersp=1, y.intersp=0.8,
       lty= 1, lwd= 2, col=c("red","blue","yellow"),
       bty = "n",
       seg.len=1, cex=0.8) 

#############################################################WGCNA

rm(list=ls())
options(stringsAsFactors = F)


library(WGCNA)
options(stringsAsFactors = FALSE)
enableWGCNAThreads() 
enableWGCNAThreads()


Rdata_dir='./Rdata/'
Figure_dir='./figures/'
load( file =
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-511example.Rdata')
)
load( file =
        file.path(Rdata_dir,'TCGA-LGG-RNAseq-example.Rdata')
)
expr_matirx=merge(meta,expr_matirx,by="sample")[,-(2:10)]
rownames(expr_matirx)=expr_matirx[,1]
expro=t(expr_matirx[,-1])
datTraits=data.frame(meta$vital_status,meta$gender,meta$grade,meta$type,meta$age_group)
rownames(datTraits)=meta[,1]
head(datTraits)

datTraits$gsm=meta[,1]



#######################################Screening for the top 25% of the variance gene
m.vars=apply(expro, 1,var)
expro.upper=expro[which(m.vars>quantile(m.vars, probs = seq( 0, 1, 0.5))[ 2]),]
dim(expro.upper)
datExpr= as.data.frame(t(expro.upper));
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
gsg = goodSamplesGenes(datExpr, verbose = 3);

gsg$allOK


sampleNames = rownames(datExpr);
traitRows = match(sampleNames, datTraits$gsm)  
rownames(datTraits) = datTraits[traitRows, 6]


powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# Plot the results:
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="blue")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
#Select beta
best_beta=sft$powerEstimate
best_beta
[1] 10


net = blockwiseModules(datExpr, power = sft$powerEstimate,
                       maxBlockSize = 6000,TOMType = "unsigned", 
                       minModuleSize = 30,reassignThreshold = 0, mergeCutHeight = 0.25,
                       numericLabels = TRUE, pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = "AS-green-FPKM-TOM",
                       verbose = 3)

table(net$colors)

# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
table(mergedColors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
## assign all of the gene to their corresponding module 
## hclust for the genes.

#Understand the number of samples and the number of genes
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
#First make a systematic clustering tree for the sample
datExpr_tree<-hclust(dist(datExpr), method = "average")
par(mar = c(0,5,2,0))
plot(datExpr_tree, main = "Sample clustering", sub="", xlab="", cex.lab = 2, 
     cex.axis = 1, cex.main = 1,cex.lab=1)
#Add the corresponding color to the sample matrix of the previous construction
sample_colors <- numbers2colors(as.numeric(factor(datTraits$meta.vital_status)), 
                                colors = c("green","blue"),signed = FALSE)
sample_colors <- numbers2colors(,signed = FALSE)
sample_colors <- numbers2colors(as.numeric(factor(datTraits$meta.grade)), 
                                colors = c("blue","red"),signed = FALSE)
sample_colors <- numbers2colors(as.numeric(factor(datTraits$meta.type)), 
                                colors = c("grey","blue","red"),signed = FALSE)
sample_colors <- numbers2colors(as.numeric(factor(datTraits$meta.age_group)), 
                                colors = c("red","green"),signed = FALSE)

#Constructing a system clustering tree and trait heat map of 10 samples
par(mar = c(1,4,3,1),cex=0.8)
plotDendroAndColors(datExpr_tree, sample_colors,
                    groupLabels = colnames(sample),
                    cex.dendroLabels = 0.8,
                    marAll = c(1, 4, 3, 1),
                    cex.rowText = 0.01,
                    main = "Sample dendrogram and trait heatmap")



moduleColors <- labels2colors(net$colors)
datTraits=data.frame(meta$vital_status,meta$gender,meta$grade,meta$type,meta$age_group)

datTraits=data.frame(meta$vital_status)


datTraits$meta.gender=gsub("FEMALE","0",datTraits$meta.gender)
datTraits$meta.gender=gsub("MALE","1",datTraits$meta.gender)

datTraits$meta.grade=gsub("III","3",datTraits$meta.grade)
datTraits$meta.grade=gsub("II","2",datTraits$meta.grade)

datTraits$meta.type=gsub("Astrocytoma","1",datTraits$meta.type)
datTraits$meta.type=gsub("Oligoastrocytoma","2",datTraits$meta.type)
datTraits$meta.type=gsub("Oligodendroglioma","3",datTraits$meta.type)

datTraits$meta.age_group=gsub("older","2",datTraits$meta.age_group)
datTraits$meta.age_group=gsub("younger","1",datTraits$meta.age_group)
colnames(datTraits)=c("vital_status","gender","grade","histologic type","age")

# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs = orderMEs(MEs0); ##ME value matrix for modules of different colors (sample vs module)
moduleTraitCor = cor(MEs, datTraits , use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)

sizeGrWindow(10,6)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
                   signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(1,8.5, 3, 3));
# Display the correlation values within a heatmap plot




View(textMatrix)

moduleTraitCor=moduleTraitCor[,]
labeledHeatmap(Matrix = moduleTraitCor,cex.lab.x = 0.8,
               xLabels = names(datTraits),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = blueWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.45,font.lab.x = 0.5,verticalSeparator.col = 1,verticalSeparator.lty = 0.3,
               zlim = c(-1,1),verticalSeparator.lwd = 0.3,
               main = paste("Module-trait relationships"))
?labeledHeatmap


modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"));
## Calculate the Pearson correlation coefficient matrix for each module and gene
## MEs is the value of each module in each sample
## datExpr is the amount of expression of each gene in each sample.
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
head(datTraits)



## Only continuous traits can only be calculated
## Here, the variable belonging to the vital_status phenotype is digitized with 0, 1.
status = as.data.frame(datTraits[,1]);
names(status) = "status"
geneTraitSignificance = as.data.frame(cor(datExpr, status, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
names(geneTraitSignificance) = paste("GS.", names(status), sep="");
names(GSPvalue) = paste("p.GS.", names(status), sep="")



module = "yellow"
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7);
par(mfrow = c(1,1));
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneTraitSignificance[moduleGenes, 1]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = "Gene significance for status",
                   main = paste("Module membership vs. gene significance\n"),
                   cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)




#First draw a heat map for all genes
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
geneTree = net$dendrograms[[1]]; 
dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = sft$powerEstimate); # 设置power=sft$powerEstimate，最佳beta值，此处是3
plotTOM = dissTOM^7; 
diag(plotTOM) = NA; 
##TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

#Then randomly select some genes for mapping 
nSelect = 400
# For reproducibility, we set the random seed
set.seed(10);
select = sample(nGenes, size = nSelect);
selectTOM = dissTOM[select, select];
# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select];
# Open a graphical window
sizeGrWindow(9,9)
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
# the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^7;
diag(plotDiss) = NA;
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")             




#Finally draw the relationship between modules and traits
# Recalculate module eigengenes
MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes
## Only continuous traits can only be calculated
## Here, the variable belonging to the vital_status phenotype is digitized with 0, 1.
head(datTraits)
status = as.data.frame(datTraits[,1]);
names(status) = "status"
# Add the weight to existing module eigengenes
MET = orderMEs(cbind(MEs, status))
# Plot the relationships among the eigengenes and the trait
sizeGrWindow(5,7.5);
par(cex = 0.9)
plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
                      = 90)
# Plot the dendrogram
sizeGrWindow(6,6);
par(cex = 1.0)
## Module clustering diagram
plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0),
                      plotHeatmaps = FALSE)
# Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
par(cex = 1.0)
## Traits and module heat map
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),
                      plotDendrograms = FALSE, xLabelsAngle = 90)



########################################################## Select module
module = "yellow";
# Select module probes
probes = colnames(datExpr) ##  The probe in our example is the gene name.  CRTAC1
inModule = (moduleColors==module);
modProbes = probes[inModule];
View(modProbes)


module = "green";
# Select module probes
probes = colnames(datExpr) ##     WEE1
inModule = (moduleColors==module);
modProbes = probes[inModule];
View(modProbes)

module = "black";
# Select module probes
probes = colnames(datExpr) ##       SEMA4G
inModule = (moduleColors==module);
modProbes = probes[inModule];
View(modProbes)

# Recalculate topological overlap
TOM = TOMsimilarityFromExpr(datExpr, power = sft$powerEstimate); 
# Select module
module = "black";
# Select module probes
probes = colnames(datExpr) ##
View(probes)
inModule = (moduleColors==module);
modProbes = probes[inModule]; 
View(modProbes)
## Extract the gene name of the specified module
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)



vis = exportNetworkToVisANT(modTOM,
                            file = paste("VisANTInput-", module, ".txt", sep=""),
                            weighted = TRUE,
                            threshold = 0)


cyt = exportNetworkToCytoscape(
  modTOM,
  edgeFile = paste("CytoscapeInputSEMA4G-edges-", paste(module, collapse="-"), ".txt", sep=""),
  nodeFile = paste("CytoscapeInputSEA4G-nodes-", paste(module, collapse="-"), ".txt", sep=""),
  weighted = TRUE,
  threshold = 0.02,
  nodeNames = modProbes, 
  nodeAttr = moduleColors[inModule]
);



#######
# (1) Intramodular connectivity

# moduleColors <- labels2colors(net$colors)
connet=abs(cor(datExpr,use="p"))^6
Alldegrees1=intramodularConnectivity(connet, moduleColors)
head(Alldegrees1)


#####Relationship between connectivity within modules and GENE saliency
# (2) Relationship between gene significance and intramodular connectivity
which.module="green"
status= as.data.frame(datTraits[,1]); # change specific 
names(status) = "status"
GS1 = as.numeric(cor(status,datExpr, use="p"))
GeneSignificance=abs(GS1)
colorlevels=unique(moduleColors)
sizeGrWindow(9,6)
par(mfrow=c(2,as.integer(0.5+length(colorlevels)/2)))
par(mar = c(4,5,3,1))
for (i in c(1:length(colorlevels)))
{
  whichmodule=colorlevels[[i]];
  restrict1 = (moduleColors==whichmodule);
  verboseScatterplot(Alldegrees1$kWithin[restrict1],
                     GeneSignificance[restrict1], col=moduleColors[restrict1],
                     main=whichmodule,
                     xlab = "Connectivity", ylab = "Gene Significance", abline = TRUE)
}


######Calculate the connectivity of all genes in the module, and screen hub genes
###(3) Generalizing intramodular connectivity for all genes on the array
datKME=signedKME(datExpr, MEs, outputColumnName="MM.")
# Display the first few rows of the data frame
head(datKME)
##Finding genes with high gene significance and high intramodular connectivity in
# interesting modules
# abs(GS1)> .9 You can adjust the parameters according to the actual situation.
# abs(datKME$MM.black)>.8 At least greater than >0.8
FilterGenes= abs(GS1)> .9 & abs(datKME$MM.black)>.8
table(FilterGenes)


#############################################################
plotMEpairs(MEs,y=datTraits$cellType)



#par(mfrow=c(3,1), mar=c(1, 2, 4, 1))
# for black module
which.module="green";
plotMat(t(scale(datExpr[,moduleColors==which.module ]) ),nrgcols=30,rlabels=T,
        clabels=T,rcols=which.module,
        title=which.module )



sizeGrWindow(8,7);
which.module="green"
ME=MEs[, paste("ME",which.module, sep="")]
par(mfrow=c(2,1), mar=c(0.3, 5.5, 3, 2))
plotMat(t(scale(datExpr[,moduleColors==which.module ]) ),
        nrgcols=30,rlabels=F,rcols=which.module,
        main=which.module, cex.main=2)
par(mar=c(5, 4.2, 0, 0.7))
barplot(ME, col=which.module, main="", cex.main=2,
        ylab="eigengene expression",xlab="MPP")


CRTAC1=read.table(file="CytoscapeInputCRTAC1-edges-yellow.txt",sep="\t",header = T,check.names = F)

a=CRTAC1[grep(pattern="CRTAC1",CRTAC1[,1]),]
b=CRTAC1[grep(pattern="CRTAC1",CRTAC1[,2]),]
c=CRTAC1[grep(pattern="WEE1",CRTAC1[,1]),]
d=CRTAC1[grep(pattern="WEE1",CRTAC1[,2]),]
e=CRTAC1[grep(pattern="SEMA4G",CRTAC1[,1]),]
f=CRTAC1[grep(pattern="SEMA4G",CRTAC1[,2]),]
CRTAC1_relate=rbind(a,b,c,d,e,f)[,c(1,2)]
CRTAC1_relate=c(CRTAC1_relate$fromNode,CRTAC1_relate$toNode)
CRTAC1_relate=as.data.frame(unique(CRTAC1_relate))
names(CRTAC1_relate)=c("symble")
write.table(CRTAC1_relate,"CRTAC1_relate.txt",quote = F,sep = "\t",col.names = T,row.names = F)


WEE1=read.table(file="CytoscapeInputWEE1-edges-green.txt",sep="\t",header = T,check.names = F)

a2=WEE1[grep(pattern="CRTAC1",WEE1[,1]),]
b2=WEE1[grep(pattern="CRTAC1",WEE1[,2]),]
c2=WEE1[grep(pattern="WEE1",WEE1[,1]),]
d2=WEE1[grep(pattern="WEE1",WEE1[,2]),]
e2=WEE1[grep(pattern="SEMA4G",WEE1[,1]),]
f2=WEE1[grep(pattern="SEMA4G",WEE1[,2]),]

WEE1_relate=rbind(a2,b2,c2,d2,e2,f2)[,c(1,2)]
WEE1_relate=c(WEE1_relate$fromNode,WEE1_relate$toNode)
WEE1_relate=as.data.frame(unique(WEE1_relate))
names(WEE1_relate)=c("symble")
write.table(WEE1_relate,"WEE1_relate.txt",quote = F,sep = "\t",col.names = T,row.names = F)



SEMA4G=read.table(file="CytoscapeInputSEMA4G-edges-black.txt",sep="\t",header = T,check.names = F)

a3=SEMA4G[grep(pattern="CRTAC1",SEMA4G[,1]),]
b3=SEMA4G[grep(pattern="CRTAC1",SEMA4G[,2]),]
c3=SEMA4G[grep(pattern="WEE1",SEMA4G[,1]),]
d3=SEMA4G[grep(pattern="WEE1",SEMA4G[,2]),]
e3=SEMA4G[grep(pattern="SEMA4G",SEMA4G[,1]),]
f3=SEMA4G[grep(pattern="SEMA4G",SEMA4G[,2]),]

SEMA4G_relate=rbind(a3,b3,c3,d3,e3,f3)[,c(1,2)]
SEMA4G_relate=c(SEMA4G_relate$fromNode,SEMA4G_relate$toNode)
SEMA4G_relate=as.data.frame(unique(SEMA4G_relate))
names(SEMA4G_relate)=c("symble")
write.table(SEMA4G_relate,"SEMA4G_relate.txt",quote = F,sep = "\t",col.names = T,row.names = F)

merge=rbind(a,b,c,d,e,f,a2,b2,c2,d2,e2,f2,a3,b3,c3,d3,e3,f3)

merge2=rbind(SEMA4G,WEE1,CRTAC1)

a4=merge2[grep(pattern="CRTAC1",merge2[,1]),]
b4=merge2[grep(pattern="CRTAC1",merge2[,2]),]
c4=merge2[grep(pattern="WEE1",merge2[,1]),]
d4=merge2[grep(pattern="WEE1",merge2[,2]),]
e4=merge2[grep(pattern="SEMA4G",merge2[,1]),]
f4=merge2[grep(pattern="SEMA4G",merge2[,2]),]

merge3=rbind(a4,b4,c4,d4,e4,f4)


write.table(merge,"merge.txt",quote = F,sep = "\t",col.names = T,row.names = F)

write.table(merge2,"merge2.txt",quote = F,sep = "\t",col.names = T,row.names = F)




##table 1 P
tableAge=matrix(c(232,86,224,73),nrow = 2,ncol = 2)
chisq.test(tableAge)

tableGender=matrix(c(210,63,246,96),nrow = 2,ncol = 2)
chisq.test(tableGender)

tableGrade=matrix(c(221,90,235,69),nrow = 2,ncol = 2)
chisq.test(tableGrade)


tableRisk=matrix(c(228,79,228,80),nrow = 2,ncol = 2)
chisq.test(tableRisk)

tableRT=matrix(c(280,141,176,18),nrow = 2,ncol = 2)
chisq.test(tableRT)


tableVS=matrix(c(341,77,115,82),nrow = 2,ncol = 2)
chisq.test(tableVS)


###########
t_value <- (43.4 - 40.7) / sqrt((((456-1)*(13.3^2) + (159-1)*(10.9^2)) / (456+159-2))*(1/456 + 1/159)) 
t_value

##
456+159-2


p <- pt(t_value, df = 613) 
p


t_value <- (998 - 2024.7) / sqrt((((456-1)*(953.8^2) + (159-1)*(1334.3^2)) / (456+159-2))*(1/456 + 1/159)) 
t_value

##
456+159-2


p <- pt(t_value, df = 613) 
p
#######################################GO/KEGG
rm(list = ls())
library(ggplot2)
BiocManager::install("clusterProfiler")
BiocManager::install("ggraph")
library(clusterProfiler)
library(org.Hs.eg.db)

BiocManager::install("cowplot")
BiocManager::install("GOplot")

library("customLayout")
library("GOplot")

a=read.table(file = "SEMA4G.txt",header = T,sep = "\t",check.names = F)
df <- bitr(unique(a$symbol), fromType = "SYMBOL",
           toType = c( "ENTREZID"),
           OrgDb = org.Hs.eg.db)
head(df)
save(df,file = 'anno_DEG.Rdata')


gene_up= df[,'ENTREZID'] 

grobs=list()
go1<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='CC',pAdjustMethod = 'BH',pvalueCutoff = 0.05, 
               qvalueCutoff = 0.05,keyType = 'ENTREZID')
head(go1)



barplot(go1,showCategory=5,title ="CC")
dotplot()

go2<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='BP',pAdjustMethod = 'BH',pvalueCutoff = 0.05, 
               qvalueCutoff = 0.05,keyType = 'ENTREZID')

barplot(go2,showCategory=5,title="BP")



grob=list()
go3<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='MF',pAdjustMethod = 'BH',pvalueCutoff = 0.05, 
               qvalueCutoff = 0.05,keyType = 'ENTREZID')
barplot(go3,showCategory=5,title = "MF")

dim(go)

dim(go[go$ONTOLOGY=='BP',])

dim(go[go$ONTOLOGY=='CC',])

dim(go[go$ONTOLOGY=='MF',])
#########################################################################
rm(list = ls())
library(ggplot2)
BiocManager::install("clusterProfiler")
BiocManager::install("ggraph")
library(clusterProfiler)
library(org.Hs.eg.db)

BiocManager::install("cowplot")
BiocManager::install("GOplot")

library("customLayout")
library("GOplot")


######### file "go-kegg.txt" is a collection of "SEMA4G_relate.txt"、“WEE1_relate.txt”、"CRTAC1_relate.txt" 
a=read.table(file = "go-kegg.txt",header = T,sep = "\t",check.names = F)
names(a)=c("symbol")
df <- bitr(unique(a$symbol), fromType = "SYMBOL",
           toType = c( "ENTREZID"),
           OrgDb = org.Hs.eg.db)
head(df)
save(df,file = 'anno_DEG-CRTAC1.Rdata')


gene_up= df[,'ENTREZID'] 

grobs=list()
go1<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='CC',pAdjustMethod = 'BH',pvalueCutoff = 0.05, 
               qvalueCutoff = 0.05,keyType = 'ENTREZID')
head(go)



barplot(go1,showCategory=5,title ="CC")
dotplot()

go2<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='BP',pAdjustMethod = 'BH',pvalueCutoff = 0.05, 
               qvalueCutoff = 0.05,keyType = 'ENTREZID')

barplot(go2,showCategory=5,title="BP")



grob=list()
go3<- enrichGO(gene_up, OrgDb = org.Hs.eg.db, ont='MF',pAdjustMethod = 'BH',pvalueCutoff = 0.05, 
               qvalueCutoff = 0.05,keyType = 'ENTREZID')
barplot(go3,showCategory=5,title = "MF")

dim(go)

dim(go[go$ONTOLOGY=='BP',])

dim(go[go$ONTOLOGY=='CC',])

dim(go[go$ONTOLOGY=='MF',])


kegg<- enrichKEGG(gene_up, organism = 'hsa', keyType = 'kegg', pvalueCutoff = 0.05,pAdjustMethod = 'BH', 
                  qvalueCutoff = 0.05,use_internal_data = FALSE)
head(kegg)
dotplot(kegg, showCategory=5,title = "KEGG")