rm( list = ls() )
gc()


{
  GSE = 'GSE200306' 
  disease = "lupus nephritis"
  platform = 'GPL21847' 
  
  setwd( "/Users/jiesbidy/Documents/total/20230730" ) 
  
}

#install.packages("devtools")
library(devtools)
#install.packages("Rcpp")  
library(Rcpp)
#install_github("hadley/readxl")

library(readxl) # 

library(showtext) # 
showtext_auto(enable=T) #


library(GEOquery) # 
getOption('timeout')
options(timeout=300)

gset2 = getGEO( GSE, AnnotGPL = T )

save(gset2, file = paste(GSE,"_eSet2.Rdata",sep = ""))

xxx=gset2[["GSE200306_series_matrix.txt.gz"]]@assayData[["exprs"]]


library(GEOquery) 
#gset3 = getGEO(filename = './GSE2372_series_matrix.txt.gz', AnnotGPL = T)  
gset4 = getGEO(filename = './GSE200306_series_matrix.txt', AnnotGPL = T)

save(gset4, file = paste(GSE,"_eSet3.Rdata",sep = "") )

#paste(GSE,"_eSet3.Rdata",sep = "") 


### 1.1 
#sampleinfo = pData(gset[[1]] ) #
#sampleinfo = pData(gset2[[1]] )#
#sampleinfo = pData(gset3 )
sampleinfo = pData(gset4 )

#sampleinfo = gset2[[1]]@phenoData@data
sampleinfo = gset4@phenoData@data

head(sampleinfo)


library(tidyverse)

term_x = sampleinfo1$source_name_ch1 
table(term_x)

#sort(c( "WT", "apoE" ) )

#level_x = c( "WT", "apoE" )
#contrast = c( "apoE-WT" )


library(openxlsx)
write.csv(sampleinfo, file = "sampleinfo.csv")
#write.xlsx(sampleinfo, file = "xxx.csv")


sampleinfo = sampleinfo %>% #
  mutate( Group = "HC" ) %>% #
  mutate( Group = ifelse( grepl('LN',term_x), "LN", Group ) ) %>% 
  
  mutate( Group = factor( Group, levels = level_x, ordered = T ) ) %>% # 
  
  mutate( GSE = GSE ) %>%
  mutate( Disease = disease ) %>%
  mutate( Platform = platform ) %>%
  arrange( Group ) # 

table(sampleinfo$Group )

save(sampleinfo, level_x, contrast, file = paste('1.1__',GSE,'.Rdata',sep = "") )


### 1.2 

# 
#exprset1 = exprs(gset[[1]] ) 
#exprset1 = exprs(gset2[[1]] ) # GEO querry 
#exprset1 = exprs(gset3 ) # 
exprset1 = exprs(gset4 ) # 

### 
summary( as.numeric( unlist(exprset1) ) )
table( is.na(exprset1) ) # 
table( exprset1 < 0 ) 
exprset1 <- na.omit(exprset1)
dim(exprset1)
#non_NA <- !is.na(exprset1[,1])
#exprset1<-exprset1[non_NA,]     #
#summary(non_NA)
# 

#dim(exprset1)
#non_NA <- !is.na(exprset1[,1])
#exprset1<-exprset1[non_NA,]     #
#summary(non_NA)


hist(unlist( exprset1 )) # 
hist(unlist( exprset1 ), breaks = 100 )


#hist(unlist( exprset2 ))
#hist(unlist( log2(exprset2) ))

# exprset1 = log2(exprset1+1) # 
# hist(unlist( log2(exprset1) ), breaks = 100 )


identical( colnames( exprset1), sampleinfo1$geo_accession ) # 

exprset1 = as.matrix( exprset1[ , sampleinfo1$geo_accession ] )  # 
identical( rownames(sampleinfo1), colnames(exprset1))
colnames(exprset1) = sampleinfo1$geo_accession


save( GSE,exprset1, file = paste('1.2__',GSE,'.Rdata',sep = ""))
rm( gset, eset, term_x, eset_rma, gset, gset2, gset3, gset4 )
write.csv(exprset1, file = paste('1.2__',GSE,'.csv',sep = ""))

#log2(exprset2+1) #

#write.csv(exprset1, file = ".csv") #


### 1.3 
boxplot( exprset1 )
exprset2=exprset1
# 
library(limma)
exprset2 = normalizeBetweenArrays( exprset2, method = "quantile" ) # normalizeBetweenArrays 
#exprset2 = exprset1[,-c(80:181)]    # 
  #write.csv(exprset2, file = "exprset2.csv")    
boxplot( exprset2 )

install.packages("magrittr") # 
library(magrittr) # needs to be run every time you start R and want to use %>%
library(ggsci)  #
#n_select = 5000   # 
sample( 1:nrow(exprset2))
expr_l = exprset2[ sample(1:nrow(exprset2)) , ] %>% # 
  as.data.frame(  ) %>% 
  gather(  )  %>%  # 
  setNames( c( 'Sample','Expression') ) %>% 
  mutate( Group = sampleinfo1$source_name_ch1[ match( Sample, sampleinfo1$geo_accession ) ] ) %>%  # 
  arrange( Group ) %>% # 
  mutate( Sample = factor(Sample, levels = unique(Sample), ordered = T ) ) # 

unique(expr_l$Sample) # 


#figure
p1 = ggplot(expr_l,aes(x= Sample, y= Expression, fill= Group, color = Group))+ 
  geom_boxplot(outlier.shape = 0.05,outlier.size = 0.05) +
  theme_light() +
  ggtitle("Expression levels of samples") +
  theme(
    plot.title = element_text( hjust = 0.5 ),
    axis.text.x = element_blank(),
    plot.background = element_rect(fill = "transparent",colour = NA)
    ) +
  scale_color_nejm(alpha = 1)+
  scale_fill_nejm(alpha = 0.8)
p1
ggsave( p1, filename = paste('1.3__',GSE,'.pdf',sep = ""), width = 6,height = 3)
ggsave( filename = paste('1.3__',GSE,'.pdf',sep = ""), width = 6,height = 3)
rm(n_select, expr_l)


### 1.4 

library(paletteer)
hclust1 <- hclust(dist(t(exprset2)), method = "complete") # hclust 
plot(hclust1, hang = -1)


library(ggfortify) 
library(ggtree)
cut_tree = length( level_5 )  # 
cut = cutree(hclust1, cut_tree) 
cut = data.frame(label = names(cut), 
                 member = factor(cut) )
cut$Group = sampleinfo1$Group

p2 = ggtree(hclust1, ladderize = FALSE, branch.length = "none") %<+% cut + 
  geom_tippoint(size=2, shape = 16, show.legend = T, alpha = 0.8,
                aes(color = Group)) + 
  # geom_tiplab(aes(label=Group), size=3, hjust=.5) +
  theme_light() +
  ggtitle("hClust of samples") +
  ylab("Sample") +
  theme(
    plot.title = element_text( hjust = 0.5 ),
    plot.background = element_rect(fill = "transparent",colour = NA)
  ) +
  coord_flip()+scale_x_reverse() +
  scale_color_nejm(alpha = 0.8) +
  guides(color = guide_legend(override.aes = list(size = 4)))
#p2
ggsave(p2, filename = paste("1.4__",GSE,".pdf",sep=""),width = 6, height = 3) 

rm(hclust1, cut_tree, cut )

  
### 1.5 PCA----
pca1 = prcomp( as.data.frame( t( exprset2 ) ) ) # prcomp

xxx = pca1[["x"]]

p3 = autoplot( pca1,
         data = sampleinfo1, colour = "source_name_ch1",
         size=2.5, frame = TRUE, frame.type = 'norm')+
  theme_light() +
  ggtitle("PCA of samples") +
  theme(
    plot.title = element_text( hjust = 0.5 ),
    plot.background = element_rect(fill = "transparent",colour = NA)
  ) +
  scale_color_nejm(alpha = 0.8) +
  scale_fill_nejm(alpha = 0.8) 
#p3
ggsave(p3, filename =paste("1.5_PCA_",GSE,".pdf",sep=""),width = 5.2, height = 4)

rm( pca1 )

### 1.6 
sd = exprset2 %>%
  as.data.frame() %>%
  mutate( SD = apply(., 1, sd ) ) %>%
  # arrange( desc(SD) ) %>%
  slice_max( SD, n= 400 ) %>% 
  dplyr::select( 1:ncol(exprset2) )


library(pheatmap) # 
library(ggplotify) # 
legend_col = data.frame( row.names = sampleinfo1$geo_accession,
                         Group = sampleinfo1$source_name_ch1 )  # 
bk = 2 # 
brk <- c(seq(-bk,-0.01,by=0.01),seq(0,bk,by=0.01))
heat1 = pheatmap(sd,
         scale = "row", # 
         annotation_col = legend_col, #
         color = c(colorRampPalette(colors = c("dodgerblue4","white"))(length(brk)/2),
                   colorRampPalette(colors = c("white","brown"))(length(brk)/2)),
         legend_breaks=seq(-bk,bk,1),
         breaks=brk,
         border_color = NA,
         show_rownames = F,
         show_colnames = F
         )
p4 = as.ggplot(heat1)+
  ggtitle('Heatmap of Total Gene')+  
  xlab('Sample') + ylab('Gene')+
  theme(
    plot.title = element_text(hjust = 0.4),
    plot.background = element_rect(fill = "transparent",colour = NA)
    ) 
p4
ggsave(p4, filename = paste("1.6__",GSE,".pdf",sep= ""),width = 5.2, height = 4)
rm(sd, legend_col, bk, brk, heat1 )

library(patchwork) # 
pp = (p1/p4)  + plot_annotation( tag_levels = "A" )#pp = (p1 / p2 / p3 ) | ( p4) + plot_annotation( tag_levels = "A" )
pp

rm( sd, pp, p1,p2,p3,p4)

#p3|p4 #
#p3/p4 #

ggsave(pp, filename = paste("1.6__",GSE,".pdf",sep= ""),width = 10, height = 8)


## 3、
library( limma )


### 3.1 
# 
contrast <- table(sampleinfo1$source_name_ch1) #sampleinfo$Group
contrast


# 
design <- model.matrix(~0+factor(sampleinfo1$source_name_ch1) ) %>%
  as.data.frame() %>% 
  setNames( unique(sampleinfo1$source_name_ch1) )
rownames(design) = sampleinfo1$geo_accession
design

#  c("A-B","C-B","A-C") #

contrast.matrix <-makeContrasts( contrasts = "HC-LN", levels = design ) # 
contrast.matrix
#

### 3.2 
degs_temp <- lmFit( exprset2, design ) %>% # Fit linear model
  contrasts.fit( contrast.matrix ) %>% # compute estimated coefficients and standard errors for a given set of contrasts
  eBayes %>% # compute moderated t-statistics, moderated F-statistic, and log-odds of differential expression
  topTable( coef = 1, n = Inf ) %>% # coef = 1 
  na.omit %>% # 
  setNames( c("log2FC", "Mean.Expr", "t", "P.value", "adj.P", "B" ) ) %>%
  mutate( probe_id = rownames(.) ) 

logFC_cut = with(degs_temp, mean(abs(log2FC))+2*sd(abs(log2FC))) # 


degs_temp = degs_temp %>%
  mutate( DEG = factor( ifelse( abs(degs_temp$log2FC) > logFC_cut & degs_temp$P.value < 0.05,
                               ifelse(degs_temp$log2FC > 0, 'up','down'),
                               'ns' ), 
                        levels = c("up", "ns", "down" ), ordered = T ) )
table( degs_temp$DEG )

save(degs_temp, file = paste('3.2__',GSE,'.Rdata',sep = ""))

rm( design, contrast.matrix )

del <- which(degs_temp$DEG=="ns") # 
degs_temp1 <- degs_temp[-del,]
save(degs_temp1, file = paste('3.2_significant_',GSE,'.Rdata',sep = ""))                    
write.csv(degs_temp1, file = "significant.csv") #

### 3.3 
p5 = ggplot(data = degs_temp,
       aes(x = log2FC, y = -log10(P.value), color = DEG)) +
  # ylim(0,25)+ #
  #  xlim(-0.005,0.005)+ #
  geom_point(size = 1, shape = 16, alpha = 0.9) +
  ggtitle('Overall distribution of DEGs') +
  theme_light() +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.background = element_rect(fill = "transparent",colour = NA)
  ) +
  scale_color_manual(values = c('brown','grey','dodgerblue3' ))+
  guides(color = guide_legend(override.aes = list(size = 4))) +
  geom_vline( xintercept = c(-logFC_cut, logFC_cut), linetype = "dashed" ) +
  geom_hline( yintercept = -log10(0.05), linetype = "dashed" )
p5
ggsave( p5, filename = paste("3.3_DEG_",GSE,".pdf",sep= ""),width = 4, height = 4)

rm( logFC_cut )


### 3.4 
# 
chs_x = degs_temp1 %>%
  slice_max( abs(log2FC), n = 522 ) 
  slice_min( P.value, n = 522 ) 

# 
heat_matrix = exprset2[ chs_x$probe_id, ]

legend_col = data.frame( row.names = sampleinfo1$geo_accession,
                         Group = sampleinfo1$source_name_ch1 )  # 
bk = 2 # 
brk <- c(seq(-bk,-0.01,by=0.01),seq(0,bk,by=0.01))
heat1 = pheatmap(heat_matrix,
                 scale = "row",
                 annotation_col = legend_col,
                 color = c(colorRampPalette(colors = c("dodgerblue4","white"))(length(brk)/2),
                           colorRampPalette(colors = c("white","brown"))(length(brk)/2)),
                 legend_breaks=seq(-bk,bk,1),
                 breaks=brk,
                 treeheight_row = 40,
                 treeheight_col = 40,
                 border_color = NA,
                 cluster_cols = F,
                 show_rownames = T,
                 show_colnames = F
)
p6 = as.ggplot(heat1)+
  ggtitle('Heatmap of significant DEGs')+  
  xlab('Sample') + ylab('Gene') +
  theme(
    plot.title = element_text(hjust = 0.4),
    plot.background = element_rect(fill = "transparent",colour = NA)
  )

p6

ggsave( p6, filename = paste("sig-DEG热图_",GSE,".pdf",sep= ""),width = 6, height = 3.0)
rm(chs_x, heat_matrix, legend_col, bk, brk, heat1 )


pp = (p5 | p6) + plot_annotation( tag_levels = "A" )
pp

rm(pp, p1,p2,p3,p4,p5,p6)


## 4、

### 4.1 
library(AnnoProbe) #

gpl_anno = getGEO(filename = './GPL21847_family.soft.gz', AnnotGPL = T) #

gpl_anno = idmap( "GPL2184" ) %>% #
  as.data.frame() %>%
  distinct( probe_id, .keep_all = T ) # 
rownames(gpl_anno) = gpl_anno$probe_id
head( gpl_anno )

# 
exprset2 = exprset2[rownames(exprset1) %in% gpl_anno$probe_id,] #  
dim(exprset2)
gpl_anno = gpl_anno[gpl_anno$probe_id %in% rownames(exprset2),] #  

# 
# xxx = checkGeneSymbols( gpl_anno$symbol, species="mouse")
# table(xxx$Approved == "FALSE" & xxx$Suggested.Symbol != "NA")
# identical(gpl_anno$symbol, xxx$x)
# pos = which(xxx$Approved == "FALSE" & xxx$Suggested.Symbol != "NA")
# gpl_anno$symbol[pos] = xxx$Suggested.Symbol[pos]

### 4.2 
identical( gpl_anno$probe_id, rownames(exprset2))
gpl_anno = gpl_anno[rownames(exprset2),] # 

identical( gpl_anno$probe_id, rownames(exprset2))
exprset2 = cbind(gpl_anno,exprset2)

save(exprset2,file = paste('4.2__',GSE,'.Rdata',sep = ""))
# write.csv(exprset2,file = paste('4.2__',GSE,'.csv',sep = ""))

###  
degs_anno = degs_temp %>% 
  filter( rownames(.) %in% gpl_anno$probe_id )
gpl_anno2 = gpl_anno[ degs_anno$probe_id, ] # 

identical( gpl_anno2$probe_id,rownames(degs_anno))
degs_anno = cbind( gpl_anno2,degs_anno) %>%
  dplyr::select(-1)

save( degs_anno,file = paste('4.2_DEG_',GSE,'.Rdata',sep = ""))
# write.csv(degs_anno,file = paste('4.2_DEGs__',GSE,'.csv',sep = ""))

### 4.4 
summary( degs_anno$Mean.Expr)
plot(table(sort(table(degs_anno$symbol))))
table( is.na(degs_anno$P.value) )
table( degs_anno$P.value == 0 )

# 
degs_final = degs_anno %>% 
  group_by( symbol ) %>%
  mutate( mean = mean(Mean.Expr) ) %>% 
  group_by( ) %>% # 
  filter( Mean.Expr >= mean | Mean.Expr >= mean(Mean.Expr) ) %>%
  arrange( P.value ) %>%
  distinct( symbol, .keep_all = T ) # 
  
save(degs_final,file = paste('4.4_DEG_',GSE,'.Rdata',sep = ""))
#write.csv(degs_final,file = paste('4.4_DEGs__',GSE,'.csv',sep = ""))
# 
exprset3 = exprset1[ degs_final$probe_id, ] 
identical( degs_final$probe_id, rownames(exprset3))
#
rownames(exprset3) = degs_final$symbol

save(exprset3,file = paste('4.4__',GSE,'.Rdata',sep = "") )

rm(pos, exprset1, exprset2, degs_temp, degs_anno, gpl_anno, gpl_anno2, xxx )


## 4.5 
load( paste('4.4_DEG_',GSE,'.Rdata',sep = "") )
summary(degs_final$Mean.Expr)
summary(abs(degs_final$log2FC))


# 
qu1 = quantile(degs_final$Mean.Expr, probs = 0.25 ) # 
fc1 = mean(abs(degs_final$log2FC))  # Log2FC：
qu1;fc1


### 
library(org.Hs.eg.db) # 
library(clusterProfiler) # 


#
degs_top = degs_temp1 
  #filter( Mean.Expr > qu1 & abs(log2FC) > fc1 & P.value < 0.05 ) %>%  # 
  #arrange( P.value ) %>% # 按 p 
  #arrange( desc( abs(log2FC) ) ) %>%
  #pull(probe_id ) %>% # 
  #degs_top=degs_top[,c(1,7)]
  genes <- as.vector(degs_top$probe_id ) 
  entrezIDs <- mget(genes, org.Hs.egSYMBOL2EG, ifnotfound=NA)    #?ҳ???????Ӧ??id
  entrezIDs <- as.character(entrezIDs)
  out=cbind(degs_top,entrezID=entrezIDs)
  write.table(out,file="entrezID.txt",sep="\t",quote=F,row.names=F)
  save(out,file = paste('4.5_DEG+entrezID_',GSE,'.Rdata',sep = ""))


# /////////////////////////----
# //  二、（enrichment）----
# /////////////////////////----


## e1.3
library(clusterProfiler) 
library(enrichplot) 
library("org.Hs.eg.db")
library("ggplot2")

#load( paste('4.5_Top300DEG_',GSE,'.Rdata',sep = "") )
### e1.3 GO
go_data <- enrichGO( gene = out$ENTREZID, # enrichGO 
                     OrgDb="org.Hs.eg.db", # 
                    ont ="ALL", # 
                   pvalueCutoff = 0.05, #
                    qvalueCutoff = 1,
                    readable= TRUE # 
                    ) %>% 
  clusterProfiler::simplify() %>% # 
  pairwise_termsim() # 

go_data = go_data@result
write.csv(go_data, file = "go_data.csv")


#GO
#install.packages("colorspace")
#install.packages("stringi")
#install.packages("ggplot2")

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("clusterProfiler")
#BiocManager::install("enrichplot")


#???ð?
library("clusterProfiler")
library("org.Hs.eg.db")
library("enrichplot")
library("ggplot2")
#out = out %>%
pvalueFilter=0.05         #pֵ????????
qvalueFilter=0.05         #????????pֵ????????

#setwd("/Users/jiesbidy/Desktop/GO")         #???ù???Ŀ¼
rt=read.table("entrezID.txt",sep="\t",header=T,check.names=F)      #??ȡid.txt?ļ?
rt=rt[is.na(rt[,"entrezID"])==F,]                            #ȥ??????idΪNA?Ļ???
gene=rt$entrezID
geneFC=rt$log2FC
#geneFC=2^geneFC
names(geneFC)=gene

#??????ɫ????
colorSel="qvalue"
if(qvalueFilter>0.05){
  colorSel="pvalue"
}

#GO????????
kk=enrichGO(gene = gene,OrgDb = org.Hs.eg.db, pvalueCutoff =1, qvalueCutoff = 1, ont="all", readable =T)
GO=as.data.frame(kk)
GO=GO[(GO$pvalue<pvalueFilter & GO$qvalue<qvalueFilter),]
#
write.table(GO,file="GO.txt",sep="\t",quote=F,row.names = F)

#??????ʾTerm??Ŀ
showNum=10
if(nrow(GO)<30){
  showNum=nrow(GO)
}

#??״ͼ
pdf(file="barplot.pdf",width = 9,height =10)
bar=barplot(kk, drop = TRUE, showCategory =showNum,split="ONTOLOGY",color = colorSel) + facet_grid(ONTOLOGY~., scale='free')
print(bar)
dev.off()

#????ͼ
pdf(file="bubble.pdf",width = 9,height =10)
bub=dotplot(kk,showCategory = showNum, orderBy = "GeneRatio",split="ONTOLOGY", color = colorSel) + facet_grid(ONTOLOGY~., scale='free')
print(bub)
dev.off()

### e1.4 KEGG
kegg_data <- enrichKEGG(gene = degs_top$ENTREZID, 
                        keyType = "kegg",
                        organism   = 'hsa', # mmu 为 mouse， hsa
                        pvalueCutoff = 1, # 
                        qvalueCutoff = 1,
                        use_internal_data = F #
                        ) %>%
  setReadable( ., OrgDb = 'org.Hs.eg.db', keyType = 'ENTREZID' )  %>% # 
  pairwise_termsim() # emap

save( go_data, kegg_data, file = paste('e1.4__',GSE,'.Rdata',sep = ""))

kegg_data=kegg_data@result
write.csv(kegg_data, file = "kegg_data.csv")

library(DOSE)
enrichDO()
enrichWP()

#KEGG
library("clusterProfiler")
library("org.Hs.eg.db")
library("enrichplot")
library("ggplot2")

pvalueFilter=0.05       
qvalueFilter=0.05        #????????pֵ????????

#setwd("/Users/jiesbidy/Desktop/KEGG")        
rt=read.table("entrezID.txt",sep="\t",header=T,check.names=F)    #??ȡid.txt?ļ?
rt=rt[is.na(rt[,"entrezID"])==F,]                          #ȥ??????idΪNA?Ļ???
colnames(rt)[1]="Gene"
gene=rt$entrezID
geneFC=rt$log2FC
#geneFC=2^geneFC
names(geneFC)=gene

#??????ɫ????
colorSel="qvalue"
if(qvalueFilter>0.05){
  colorSel="pvalue"
}

#kegg????????
kk <- enrichKEGG(gene = gene, organism = "hsa", pvalueCutoff =1, qvalueCutoff =1)
KEGG=as.data.frame(kk)
KEGG$geneID=as.character(sapply(KEGG$geneID,function(x)paste(rt$Gene[match(strsplit(x,"/")[[1]],as.character(rt$entrezID))],collapse="/")))
KEGG=KEGG[(KEGG$pvalue<pvalueFilter & KEGG$qvalue<qvalueFilter),]
#???渻??????
write.table(KEGG,file="KEGG.txt",sep="\t",quote=F,row.names = F)

#??????ʾTerm??Ŀ
showNum=30
if(nrow(KEGG)<showNum){
  showNum=nrow(KEGG)
}

#??״ͼ
pdf(file="barplot-kegg.pdf",width = 9,height = 7)
KEGGbar=barplot(kk, drop = TRUE, showCategory = showNum, color = colorSel)
print(KEGGbar)
dev.off()

#????ͼ
pdf(file="bubble-kegg.pdf",width = 9,height = 7)
KEGGbub=dotplot(kk, showCategory = showNum, orderBy = "GeneRatio",color = colorSel)
print(KEGGbub)
dev.off()

library(patchwork) # 

pp = (p1/p6)|(p5)+ plot_annotation( tag_levels = "A" )#pp = (p1 / p2 / p3 ) | ( p4) + plot_annotation( tag_levels = "A" )
pp1=pp/(bub|KEGGbub) 
ggsave( pp1, filename = paste("_",GSE,".pdf",sep= ""),width = 5.2, height = 3.5)
rm( sd, pp, p1,p2,p3,p4)


### e1.5 
dotplot(kegg_data, showCategory= 20, title="Top enriched terms") + 
  theme_light()

emapplot(go_data, showCategory = 20) +
  ggtitle("Top enriched terms")+
  theme_light()+
  theme(axis.text = element_blank(), axis.title = element_blank())+
  theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))

cnetplot(go_data,showCategory = 10, 
         circular=F, 
         colorEdge= T)+
  ggtitle("Terms and DEGs") +
  theme_light()+
  theme(axis.text = element_blank(), axis.title = element_blank())

rm(qu1,fc1, degs_top, go_data, kegg_data )

#barplot(go_data, drop = TRUE, showCategory = showNum, color = colorSel)
#dev.off()


## e2

### e2.1 
# go_gmt = read.gmt("/usr/local/lib/R/Rlib/c5.go.v7.2.symbols.gmt")
# kegg_gmt = read.gmt("/usr/local/lib/R/Rlib/c2.cp.kegg.v7.2.symbols.gmt")
btr_gsea_order = bitr( degs_temp1$symbol, 
                       fromType = "SYMBOL", toType = c("ENTREZID"),
                      OrgDb = org.Hs.eg.db) %>% # 
  mutate( log2FC = degs_temp1$log2FC[ match(SYMBOL, degs_temp1$symbol ) ] ) %>% # 
  arrange( desc( log2FC ) ) # log2FC 

table( is.na(btr_gsea_order$ENTREZID) )  # 
table( is.na(btr_gsea_order$log2FC) )  # 
head(btr_gsea_order) ; tail(btr_gsea_order) # 


gsea_list = setNames( btr_gsea_order$log2FC, btr_gsea_order$ENTREZID ) # 

head(gsea_list);tail(gsea_list)


### e2.2 GSEA 
gsea_go <- gseGO(gsea_list, 
                 ont = "ALL",
                 OrgDb = org.Hs.eg.db, # 
                 eps = 0 ) %>%
  clusterProfiler::simplify( )  


### e2.3 GSEA 
gsea_kegg <- gseKEGG(gsea_list, 
                     keyType = "kegg",
                     organism = 'hsa', # 
                     pvalueCutoff = 1,
                     eps = 0,
                     use_internal_data = F ) 

# gsea_wp <- gseWP(gsea_list, organism = 'Homo sapiens', eps = 0)
# gsea_do <- gseDO(gsea_list, by = "fgsea", eps = 0)

save( gsea_go, gsea_kegg, file = paste('e2.3_GSEA_',GSE,'.Rdata',sep = ""))


### e2.4 GSEA GO
ridgeplot(gsea_go,showCategory = 30)+
  theme_light()+
  ggtitle("Top GSEA-GO terms")+
  theme(
    plot.title = element_text(hjust = 0.4),
    plot.background = element_rect(fill = "transparent",colour = NA)
  ) +
  scale_fill_gradient( low = "brown", high = "blue" )

ridgeplot(gsea_kegg,showCategory = 30)+
  theme_light()+
  ggtitle("Top GSEA-KEGG terms")+
  theme(
    plot.title = element_text(hjust = 0.4),
    plot.background = element_rect(fill = "transparent",colour = NA)
  ) +
  scale_fill_gradient( low = "brown", high = "blue"  )

enrichplot::gseaplot2(gsea_go,1:5)+
  theme_light()+
  ggtitle("Top 5 GSEA-GO terms")+
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.background = element_rect(fill = "transparent",colour = NA)
  )

rm(btr_gsea_order, gsea_list, gsea_go, gsea_kegg )

# rm(list = ls())