# DEseq2 package library(tidyverse) library(DESeq2) load(file = "TCGA_GBM_Count.Rda") dataFilt[1:3,1:3] # 1 coldata <- data.frame(c(rep("Normal", 41), rep("Tumor", 478))) row.names(coldata) <- colnames(dataFilt) colnames(coldata) <- "condition" head(coldata) dds_GBM <- DESeqDataSetFromMatrix(countData = dataFilt, colData = coldata, design = ~ condition) dds <- DESeq(dds_GBM, parallel = T) resultsNames(dds) res <- results(dds, name = "condition_Tumor_vs_Normal") head(res) # 2 res_GBM <- res %>% as.data.frame() %>% na.omit() %>% rownames_to_column("gene_name") write_tsv(res_GBM, "res_GBM_all.txt") res_GBM_diff <- res_GBM %>% subset(abs(log2FoldChange) > 2 & padj < 0.05) write_tsv(res_GBM_diff, "res_GBM_diff.txt") # Volcano map library(ggplot2) library(ggrepel) colnames(res_GBM) vocano_result <- res_GBM %>% mutate(Type = if_else(padj > 0.05 | abs(log2FoldChange) < 2, 'Non_significant', if_else(log2FoldChange >= 2, 'Up_Regulated', 'Down_Regulated'))) ggplot(vocano_result, aes(log2FoldChange, -log10(padj))) + geom_point(size = 5, alpha = 0.65, aes(color = Type), show.legend = T) + geom_text_repel(data = subset(vocano_result, abs(log2FoldChange) > 6), size = 5, box.padding = 0.5, aes(label = gene_name)) + scale_color_manual(values = c("steelblue", "gray70", "darkred")) + ylim(0, 250) + xlim(-10, 12) + labs(x = "Log2(fold change)", y = "-log10(p-value)") + geom_hline(yintercept = -log10(0.05), linetype = 'dotdash', color = 'gray70') + geom_vline(xintercept = c(-1, 1), linetype = 'dotdash', color = 'gray70') ggsave("GBM_Volcano.pdf") # Heat Map GBM_Diff_matrix <- GBM_mRNA_Count[res_GBM_diff$gene_name,] save(GBM_Diff_matrix, file = "GBM_Diff_matrix.Rda") # Heat Map library(pheatmap) GBM_Diff_matrix_log <- log(GBM_Diff_matrix + 1) range(GBM_Diff_matrix_log) Type <- c(rep("Normal", 41),rep("Tumor", 478)) names(Type) <- colnames(GBM_Diff_matrix_log) Type <- as.data.frame(Type) pdf("GBM_Heatmap.pdf", 10, 8) pheatmap(GBM_Diff_matrix_log, scale = "column", annotation_col = Type, show_rownames = FALSE, show_colnames = FALSE, fontsize_row = 3, color = colorRampPalette(c("steelblue", "white","darkred"))(100), annotation_legend = TRUE, cluster_cols = FALSE, cluster_rows = TRUE, border_color = FALSE) dev.off() #GOKEGG-clusterProfiler # Load R package library(clusterProfiler) library(org.Hs.eg.db) # read genes list input <- read.table("data/clusterProfiler_input.txt",sep="\t",header=T,check.names=F) input <- input[is.na(input[,"ENTREZID"])==F,] gene <- input$ENTREZID # change format gene.df <- bitr(input$SYMBOL, fromType = "SYMBOL", toType = c("ENSEMBL", "ENTREZID"), OrgDb = org.Hs.eg.db) # Hs is for human head(gene.df) ########## GO ########## ego <- enrichGO(gene = gene, OrgDb = org.Hs.eg.db, pvalueCutoff =0.05, qvalueCutoff =0.05, readable = TRUE) head(ego) # write write.table(ego,file="data/clusterProfiler_GO_result.txt",sep="\t", quote=F,row.names = F) ########## KEGG ########## kk <- enrichKEGG(gene = gene, organism = "hsa", pvalueCutoff =0.05, qvalueCutoff =1) head(kk) # write write.table(kk,file="data/clusterProfiler_KEGG_result.txt",sep="\t",quote=F,row.names = F) # save save(ego, kk, file = "data/clusterProfiler_result.Rda") ########## GSEA ########## # clean remove(list = ls()) # load r package library(clusterProfiler) library(org.Hs.eg.db) input <- read.table("data/GSEA_input.txt",sep="\t",header=T,check.names=F) head(input) # # ENTREZID logFC # 1 4312 4.572613 # 2 8318 4.514594 # 3 10874 4.418218 # 4 55143 4.144075 # 5 55388 3.876258 # 6 991 3.677857 dim(input) # sorting ## 1: geneList = input[,2] ## 2: names(geneList) = as.character(input[,1]) head(geneList) ## gene name:ENTREZID # 4312 8318 10874 55143 55388 991 ## logFC # 4.572613 4.514594 4.418218 4.144075 3.876258 3.677857 ## 3: geneList = sort(geneList, decreasing = TRUE) ##########gseGO ########## gsego <- gseGO(geneList = geneList, OrgDb = org.Hs.eg.db, ont = "CC", nPerm = 1000, minGSSize = 100, maxGSSize = 500, pvalueCutoff = 0.05, verbose = FALSE) head(gsego) # save write.table(gsego,file="data/GSEA_GO_result.txt",sep="\t", quote=F,row.names = F) ########## gseKEGG ########## gsekk <- gseKEGG(geneList = geneList, organism = 'hsa', nPerm = 1000, minGSSize = 120, pvalueCutoff = 0.05, verbose = FALSE) head(gsekk) # save write.table(gsekk,file="data/GSEA_KEGG_result.txt",sep="\t", quote=F,row.names = F) ########## GSEA ########## # MSigDb # https://www.gsea-msigdb.org/gsea/downloads.jsp # gmt msigdb_GMTs <- "msigdb_v7.0_GMTs" msigdb <- "msigdb.v7.0.entrez.gmt" # read all all_msigdb <- read.gmt(file.path(msigdb_GMTs,msigdb)) gsegmt <- GSEA(geneList, TERM2GENE=all_msigdb, verbose=FALSE) head(gsegmt) # save write.table(gsegmt,file="data/GSEA_MSigDb_result.txt",sep="\t", quote=F,row.names = F) # save(geneList, gsego, gsekk, gsegmt, file = "data/gsea_result.Rda") # clean rm(list = ls()) immune <- read.table("data/pan_cancer_immune.txt", sep="\t", header=T, check.names=F) # CellType <- as.vector(unique(immune$`Cell type`)) # geneset <- lapply(CellType, function(x){ x <- as.vector(immune[immune$`Cell type`==x,1]) return(x) }) names(geneset)<- CellType ### ssGSEA score # load GSVA包 library(GSVA) # read input <- read.table("data/sample_input.txt", header=T, row.names = 1, check.names=F) # change exp <- as.matrix(input) # ssGSEA gsva_matrix<- gsva(exp, geneset, method='ssgsea',#ssgsea kcdf='Gaussian', abs.ranking=TRUE) normalize <- function(x){ return((x-min(x))/(max(x)-min(x)))} ssgseaScore <- normalize(gsva_matrix) # write write.table(ssgseaScore,file="data/ssgseaScore.txt",sep="\t", quote=F, col.names=T) annotation_col <- data.frame(colnames(ssgseaScore)) colnames(annotation_col) <- "sample" rownames(annotation_col) <- colnames(ssgseaScore)