# Clear the workspace rm(list = ls()) # Gene ID conversion library(data.table) library(org.Hs.eg.db) library(clusterProfiler) library(biomaRt) library(enrichplot) library(tidyverse) genelist_input <- fread(file = "interGene.txt", header = FALSE, sep = '\t', data.table = FALSE) genename <- as.character(genelist_input[, 1]) # Extract the gene names from the first column # Gene ID conversion using biomaRt gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "SYMBOL", columns = c("ENTREZID")) gene_map write.csv(as.data.frame(gene_map), "GeneConversion.csv", row.names = FALSE) # Export the results to the default path genelist_input <- gene_map[, 2] head(genelist_input) genelist_input <- na.omit(genelist_input) # GO analysis - Biological Process (BP) Go_result_BP <- enrichGO(genelist_input, 'org.Hs.eg.db', ont = "BP", pvalueCutoff = 0.05) goplot(Go_result_BP, showCategory = 5) # GO topological plot p1 <- dotplot(Go_result_BP, showCategory = 20) # Bubble plot showing the top 20 terms p1 <- p1 + scale_y_discrete(labels = function(x) str_wrap(x, width = 50)) ggsave(p1, filename = 'GO_BP_Bubble_Plot.pdf', width = 7.23, height = 8) ggsave(p1, filename = 'GO_BP_Bubble_Plot.TIFF', width = 7.23, height = 8) barplot(Go_result_BP, showCategory = 20) # Bar plot showing the top 20 terms y <- as.data.frame(Go_result_BP) y$geneID <- as.character(sapply(y$geneID, function(x) paste(gene_map$SYMBOL[match(strsplit(x, "/")[[1]], as.character(gene_map$ENTREZID))], collapse = "/"))) write.csv(y, "GO-BP.csv", row.names = FALSE) # Export the results to the default path save(y, file = 'GO-BP.RDATA') # GO analysis - Cellular Component (CC) Go_result_CC <- enrichGO(genelist_input, 'org.Hs.eg.db', ont = "CC", pvalueCutoff = 0.05) goplot(Go_result_CC, showCategory = 5) # GO topological plot p1 <- dotplot(Go_result_CC, showCategory = 20) # Bubble plot showing the top 20 terms p1 <- p1 + scale_y_discrete(labels = function(x) str_wrap(x, width = 50)) ggsave(p1, filename = 'GO_CC_Bubble_Plot.pdf', width = 7.23, height = 8) ggsave(p1, filename = 'GO_CC_Bubble_Plot.TIFF', width = 7.23, height = 8) barplot(Go_result_CC, showCategory = 20) # Bar plot showing the top 20 terms y <- as.data.frame(Go_result_CC) y$geneID <- as.character(sapply(y$geneID, function(x) paste(gene_map$SYMBOL[match(strsplit(x, "/")[[1]], as.character(gene_map$ENTREZID))], collapse = "/"))) write.csv(y, "GO-CC.csv", row.names = FALSE) # Export the results to the default path save(y, file = 'GO-CC.RDATA') # GO analysis - Molecular Function (MF) Go_result_MF <- enrichGO(genelist_input, 'org.Hs.eg.db', ont = "MF", pvalueCutoff = 0.05) goplot(Go_result_MF, showCategory = 5) # GO topological plot p1 <- dotplot(Go_result_MF, showCategory = 20) # Bubble plot showing the top 20 terms p1 <- p1 + scale_y_discrete(labels = function(x) str_wrap(x, width = 50)) ggsave(p1, filename = 'GO_MF_Bubble_Plot.pdf', width = 7.23, height = 8) ggsave(p1, filename = 'GO_MF_Bubble_Plot.TIFF', width = 7.23, height = 8) barplot(Go_result_MF, showCategory = 20) # Bar plot showing the top 20 terms y <- as.data.frame(Go_result_MF) y$geneID <- as.character(sapply(y$geneID, function(x) paste(gene_map$SYMBOL[match(strsplit(x, "/")[[1]], as.character(gene_map$ENTREZID))], collapse = "/"))) write.csv(y, "GO-MF.csv", row.names = FALSE) # Export the results to the default path save(y, file = 'GO-MF.RDATA') # Combined GO analysis for BP, MF, and CC go <- enrichGO(genelist_input, OrgDb = "org.Hs.eg.db", ont = "all") library(ggplot2) p <- dotplot(go, split = "ONTOLOGY") + facet_grid(ONTOLOGY ~ ., scale = "free") p <- p + scale_y_discrete(labels = function(x) str_wrap(x, width = 50)) ggsave(p, filename = 'Combined_GO_Bubble_Plot.pdf', width = 7.23, height = 8) ggsave(p, filename = 'Combined_GO_Bubble_Plot.TIFF', width = 7.23, height = 8) write.csv(as.data.frame(go), "GO.csv", row.names = FALSE) # Export the results to the default path # KEGG analysis KEGG_result <- enrichKEGG(genelist_input, keyType = "kegg", pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH", minGSSize = 5, maxGSSize = 500, organism = "hsa", use_internal_data = TRUE) barplot(KEGG_result, showCategory = 20) # Bar plot showing the top 20 KEGG pathways p1 <- dotplot(KEGG_result, showCategory = 20) # Bubble plot showing the top 20 KEGG pathways p1 <- p1 + scale_y_discrete(labels = function(x) str_wrap(x, width = 50)) ggsave(p1, filename = 'KEGG_Bubble_Plot.pdf', width = 7.23, height = 8) ggsave(p1, filename = 'KEGG_Bubble_Plot.TIFF', width = 7.23, height = 8) # Circular plot for KEGG pathways pdf(file = "KEGG_circos.pdf", width = 10, height = 7) kkx <- setReadable(KEGG_result, 'org.Hs.eg.db', 'ENTREZID') cnetplot(kkx, showCategory = 5, circular = TRUE, colorEdge = TRUE, node_label = "all") dev.off() x <- as.data.frame(KEGG_result) x$geneID <- as.character(sapply(x$geneID, function(x) paste(gene_map$SYMBOL[match(strsplit(x, "/")[[1]], as.character(gene_map$ENTREZID))], collapse = "/"))) write.csv(as.data.frame(x), "KEGG.csv", row.names = FALSE) # Export the results to the default path save(x, file = 'KEGG.RDATA') # Create a bubble plot for KEGG pathways with P-values, not adjusted y <- KEGG_result@result # Extract values from GeneRatio and convert to numeric forward <- as.numeric(sub("/\\d+$", "", y$GeneRatio)) backward <- as.numeric(sub("^\\d+/", "", y$GeneRatio)) y$GeneRatio <- forward / backward showCategory <- 20 font.size <- 12 library(ggplot2) library(forcats) library(dplyr) y %>% arrange(pvalue) %>% slice(1:showCategory) %>% ggplot(aes(GeneRatio, forcats::fct_reorder(Description, Count))) + geom_point(aes(color = pvalue, size = Count)) + scale_color_continuous(low = "red", high = "blue", guide = guide_colorbar(reverse = TRUE)) + scale_size_continuous(range = c(3, 8)) + labs(y = NULL) + ggtitle("") + theme_bw() + theme( axis.text.x = element_text(colour = "black", size = font.size, vjust = 1), axis.text.y = element_text(colour = "black", size = font.size, hjust = 1), axis.title = element_text(margin = margin(10, 5, 0, 0), color = "black", size = font.size), axis.title.y = element_text(angle = 90) ) ggsave(filename = 'KEGG_Bubble_Plot2.pdf', width = 10, height = 8) ggsave(filename = 'KEGG_Bubble_Plot2.TIFF', width = 10, height = 8) # Create a bar plot for KEGG pathways with P-values, not adjusted showCategory <- 20 font.size <- 12 y %>% arrange(pvalue) %>% slice(1:showCategory) %>% ggplot(aes(x = forcats::fct_reorder(Description, pvalue, .desc = TRUE), y = Count, fill = pvalue)) + geom_bar(stat = "identity") + coord_flip() + scale_fill_continuous(low = "red", high = "blue", guide = guide_colorbar(reverse = TRUE)) + labs(x = NULL, y = NULL) + ggtitle("") + theme_bw() + theme( axis.text.x = element_text(colour = "black", size = font.size, vjust = 1), axis.text.y = element_text(colour = "black", size = font.size, hjust = 1), axis.title = element_text(margin = margin(10, 5, 0, 0), color = "black", size = font.size), axis.title.y = element_text(angle = 90) ) # GO results sorted by P-value, without padjust y <- KEGG_result@result forward <- as.numeric(sub("/\\d+$", "", y$GeneRatio)) backward <- as.numeric(sub("^\\d+/", "", y$GeneRatio)) y$GeneRatio <- forward / backward showCategory <- 20 font.size <- 12 library(ggpubr) library(tidyverse) GODATA <- y GODATA$"-log10(Pvalue)" <- -log10(GODATA$pvalue) GODATA$yyy <- -log10(GODATA$pvalue) write.csv(GODATA, "GODATA.csv", row.names = FALSE) aaa <- filter(GODATA, ONTOLOGY == 'BP') aaaa <- aaa[1:10,] bbb <- filter(GODATA, ONTOLOGY == 'CC') bbbb <- bbb[1:10,] ccc <- filter(GODATA, ONTOLOGY == 'MF') cccc <- ccc[1:10,] drawdata <- rbind(aaaa, bbbb, cccc) ggbarplot( drawdata, x = "Description", y = 'yyy', fill = "ONTOLOGY", color = "white", palette = c('#5FB404', '#01DFD7', '#C238E5'), sort.val = "desc", sort.by.groups = FALSE, x.text.angle = 90 ) ggbarplot( drawdata, x = "Description", y = "yyy", fill = "ONTOLOGY", color = "white", palette = c('#5FB404', '#12B5EC', '#C238E5'), sort.val = "asc", sort.by.groups = TRUE, x.text.angle = 75, ylab = '-log10(P-Value)', xlab = 'Pathway' ) ggsave(filename = 'Combined_GO_Bubble_Plot2.pdf', width = 7.23, height = 10) ggsave(filename = 'Combined_GO_Bubble_Plot2.TIFF', width = 7.23, height = 10)