########################################################################################## # R code to run GOSemSim for enrichment analysis # ########################################################################################## # Install and load the required packages if not already installed if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } BiocManager::install("GOSemSim") library(GOSemSim) # Load gene list gene_list <- read.table("cytokine-receptor_84_list.txt", header = FALSE, sep = "\t") gene_list <- gene_list$V1 # My gene list has one column of gene symbols # Load the necessary ontology data (GO.db) if (!requireNamespace("AnnotationDbi", quietly = TRUE)) { BiocManager::install("AnnotationDbi") } library(AnnotationDbi) godb <- AnnotationDbi::select(org.Hs.eg.db, keys = gene_list, keytype = "SYMBOL") # Calculate semantic similarity between genes and GO terms sim_matrix <- GOSemSim::mgoSim(gene_list, godb, semData = GODbData(godb)) # Perform enrichment analysis enrich_result <- GOSemSim::goEnrichment( sim_matrix, semData = GODbData(godb), ontology = "BP", # You can change to "CC" or "MF" for different ontologies pvalueCutoff = 0.05, qvalueCutoff = 0.1 ) # View the enriched GO terms head(enrich_result) ########################################################################################## # R code to use TCGAbiolinks for the TCGA pan-cancer SNV mutational analysis # ########################################################################################## # Install and load the required packages if not already installed if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } BiocManager::install("TCGAbiolinks") library(TCGAbiolinks) # Set working directory to where you want to download and store TCGA data setwd("TCGAbiolinks") # Load the TCGA data query <- GDCquery(project = "TCGA-PANCAN", data.category = "Somatic Mutation", workflow.type = "MuTect2 Variant Aggregation and Masking", file.type = "Masked Somatic Mutation") # Download the data GDCdownload(query) # Prepare the data for analysis mutation_data <- GDCprepare(query) # Read the cytokine-receptor gene list gene_list <- read.table("cytokine-receptor_84_list.txt", header = FALSE, sep = "\t") gene_list <- gene_list$V1 # My gene list has one column of gene symbols # Subset the mutation data for the specified genes subset_data <- subset(mutation_data, gene_id %in% gene_symbols) # Perform mutational analysis on the subset_data # You can calculate mutation frequencies, plot mutational spectra, etc. # For example, to calculate mutation frequencies mutation_freq <- freqMut(subset_data, gene_col = "gene_id") # To visualize the mutation frequencies barplot(mutation_freq$freq, names.arg = mutation_freq$gene_id, las = 2, cex.names = 0.7)