# DEseq2 package
library(tidyverse)
library(DESeq2)
load(file = "TCGA_GBM_Count.Rda")
dataFilt[1:3,1:3]

# 1
coldata <- data.frame(c(rep("Normal", 41), rep("Tumor", 478)))
row.names(coldata) <- colnames(dataFilt)
colnames(coldata) <- "condition"
head(coldata)

dds_GBM <- DESeqDataSetFromMatrix(countData = dataFilt,                                 
                                  colData = coldata, 
                                  design = ~ condition) 
dds <- DESeq(dds_GBM, parallel = T) 
resultsNames(dds)
res <- results(dds, name = "condition_Tumor_vs_Normal")  
head(res)

# 2
res_GBM <- res %>%   
  as.data.frame() %>%  
  na.omit() %>%
  rownames_to_column("gene_name")
write_tsv(res_GBM, "res_GBM_all.txt")

res_GBM_diff <- res_GBM %>% 
  subset(abs(log2FoldChange) > 2 & padj < 0.05) 
write_tsv(res_GBM_diff, "res_GBM_diff.txt")

# Volcano map
library(ggplot2)
library(ggrepel)
colnames(res_GBM)
vocano_result <- res_GBM %>%
  mutate(Type = if_else(padj > 0.05 | abs(log2FoldChange) < 2, 
                        'Non_significant', 
                        if_else(log2FoldChange >= 2, 
                                'Up_Regulated', 
                                'Down_Regulated'))) 

ggplot(vocano_result,
       aes(log2FoldChange,
           -log10(padj))) +
  geom_point(size = 5, 
             alpha = 0.65,
             aes(color = Type),
             show.legend = T)  +
  geom_text_repel(data = subset(vocano_result, abs(log2FoldChange) > 6),
                  size = 5, 
                  box.padding = 0.5,
                  aes(label = gene_name)) +
  scale_color_manual(values = c("steelblue", "gray70", "darkred")) +
  ylim(0, 250) +
  xlim(-10, 12) +
  labs(x = "Log2(fold change)", y = "-log10(p-value)") +
  geom_hline(yintercept = -log10(0.05),  
             linetype = 'dotdash', 
             color = 'gray70') + 
  geom_vline(xintercept = c(-1, 1), 
             linetype = 'dotdash', 
             color = 'gray70')
ggsave("GBM_Volcano.pdf")

# Heat Map
GBM_Diff_matrix <- GBM_mRNA_Count[res_GBM_diff$gene_name,]
save(GBM_Diff_matrix, file = "GBM_Diff_matrix.Rda")

# Heat Map
library(pheatmap)
GBM_Diff_matrix_log <- log(GBM_Diff_matrix + 1)
range(GBM_Diff_matrix_log)
Type <- c(rep("Normal", 41),rep("Tumor", 478))
names(Type) <- colnames(GBM_Diff_matrix_log)
Type <- as.data.frame(Type)

pdf("GBM_Heatmap.pdf", 10, 8)
pheatmap(GBM_Diff_matrix_log, 
         scale = "column",
         annotation_col = Type,
         show_rownames = FALSE,
         show_colnames = FALSE,
         fontsize_row = 3,
         color = colorRampPalette(c("steelblue", "white","darkred"))(100),
         annotation_legend = TRUE,
         cluster_cols = FALSE,
         cluster_rows = TRUE,
         border_color = FALSE)
dev.off()


#GOKEGG-clusterProfiler
# Load R package
library(clusterProfiler)
library(org.Hs.eg.db)

# read genes list
input <- read.table("data/clusterProfiler_input.txt",sep="\t",header=T,check.names=F)
input <- input[is.na(input[,"ENTREZID"])==F,]
gene <- input$ENTREZID  
# change format
gene.df <- bitr(input$SYMBOL,
                fromType = "SYMBOL",
                toType = c("ENSEMBL", "ENTREZID"),
                OrgDb = org.Hs.eg.db)  # Hs is for human
head(gene.df)

########## GO ########## 
ego <- enrichGO(gene = gene,
                OrgDb = org.Hs.eg.db, 
                pvalueCutoff =0.05, 
                qvalueCutoff =0.05,
                readable = TRUE)
head(ego)
# write
write.table(ego,file="data/clusterProfiler_GO_result.txt",sep="\t",
            quote=F,row.names = F)

########## KEGG ########## 
kk <- enrichKEGG(gene = gene, 
                 organism = "hsa", 
                 pvalueCutoff =0.05, 
                 qvalueCutoff =1)
head(kk)
# write
write.table(kk,file="data/clusterProfiler_KEGG_result.txt",sep="\t",quote=F,row.names = F)

# save
save(ego, kk, file = "data/clusterProfiler_result.Rda")

########## GSEA ########## 
# clean
remove(list = ls())

# load r package
library(clusterProfiler)
library(org.Hs.eg.db)


input <- read.table("data/GSEA_input.txt",sep="\t",header=T,check.names=F)
head(input)
# 
# ENTREZID    logFC
# 1     4312 4.572613
# 2     8318 4.514594
# 3    10874 4.418218
# 4    55143 4.144075
# 5    55388 3.876258
# 6      991 3.677857
dim(input)

# sorting
## 1: 
geneList = input[,2]
## 2: 
names(geneList) = as.character(input[,1])
head(geneList)
## gene name：ENTREZID
# 4312     8318    10874    55143    55388      991 
## logFC
# 4.572613 4.514594 4.418218 4.144075 3.876258 3.677857 

## 3: 
geneList = sort(geneList, decreasing = TRUE)

##########gseGO ##########
gsego <- gseGO(geneList     = geneList,
               OrgDb        = org.Hs.eg.db,
               ont          = "CC",
               nPerm        = 1000,
               minGSSize    = 100,
               maxGSSize    = 500,
               pvalueCutoff = 0.05,
               verbose      = FALSE)
head(gsego)

# save
write.table(gsego,file="data/GSEA_GO_result.txt",sep="\t",
            quote=F,row.names = F)

########## gseKEGG ##########
gsekk <- gseKEGG(geneList     = geneList,
                 organism     = 'hsa',
                 nPerm        = 1000,
                 minGSSize    = 120,
                 pvalueCutoff = 0.05,
                 verbose      = FALSE)
head(gsekk)

# save
write.table(gsekk,file="data/GSEA_KEGG_result.txt",sep="\t",
            quote=F,row.names = F)

########## GSEA ##########
# MSigDb
# https://www.gsea-msigdb.org/gsea/downloads.jsp
# gmt
msigdb_GMTs <- "msigdb_v7.0_GMTs"


msigdb <- "msigdb.v7.0.entrez.gmt"

# read all 
all_msigdb <- read.gmt(file.path(msigdb_GMTs,msigdb))

gsegmt <- GSEA(geneList, TERM2GENE=all_msigdb, verbose=FALSE)
head(gsegmt)

# save
write.table(gsegmt,file="data/GSEA_MSigDb_result.txt",sep="\t",
            quote=F,row.names = F)

# 
save(geneList, gsego, gsekk, gsegmt, file = "data/gsea_result.Rda")


# clean
rm(list = ls())


immune <- read.table("data/pan_cancer_immune.txt", sep="\t", 
                     header=T, check.names=F)

# 
CellType <- as.vector(unique(immune$`Cell type`))

# 
geneset <- lapply(CellType, function(x){
  x <- as.vector(immune[immune$`Cell type`==x,1])
  return(x)
})
names(geneset)<- CellType

### ssGSEA score
# load GSVA包
library(GSVA)
# read 
input <- read.table("data/sample_input.txt", 
                    header=T, row.names = 1, check.names=F)


# change
exp <- as.matrix(input)

# ssGSEA
gsva_matrix<- gsva(exp, 
                   geneset,
                   method='ssgsea',#ssgsea
                   kcdf='Gaussian',
                   abs.ranking=TRUE)

normalize <- function(x){
  return((x-min(x))/(max(x)-min(x)))}

ssgseaScore <- normalize(gsva_matrix)
# write
write.table(ssgseaScore,file="data/ssgseaScore.txt",sep="\t", quote=F, col.names=T)

annotation_col <- data.frame(colnames(ssgseaScore))
colnames(annotation_col) <- "sample"
rownames(annotation_col) <- colnames(ssgseaScore)