# Clear the workspace rm(list = ls()) # Load libraries library(Seurat) library(ggplot2) library(cowplot) library(Matrix) library(dplyr) library(ggsci) library(scater) library(SingleR) library(hdf5r) library(tidyverse) library(RColorBrewer) library(dplyr) library(Seurat) library(ComplexHeatmap) library(GSVA) library(GSEABase) library(limma) library(ggplot2) # Load the "merge.RDATA" file load("merge.RDATA") # Read the "score.txt" file score <- read.table("score.txt", header = TRUE, sep = "\t") # Import the GMT file, using Hallmark as an example # MsigDB link (https://www.gsea-msigdb.org/gsea/msigdb/) # When downloading the file, make sure to choose the appropriate gene format, typically gene symbol format for 10X results. genesets <- getGmt("h.all.v7.5.1.symbols.gmt") str(genesets) # Perform GSVA analysis gsvascore <- gsva(outTab, genesets, min.sz = 10, max.sz = 500, verbose = TRUE, parallel.sz = 1) # Prepare data for correlation analysis aaa <- as.data.frame(t(gsvascore)) aaa$id <- rownames(aaa) aaa <- inner_join(score, aaa, by = "id") # Correlation analysis with all genes gene <- "score" y <- as.numeric(aaa[, gene]) bbb <- aaa[, 3:52] cor_data_df <- data.frame(colnames(bbb)) for (a in 1:length(colnames(bbb))) { test <- cor.test(as.numeric(bbb[, a]), y, method = "pearson") cor_data_df[a, 2] <- test$estimate cor_data_df[a, 3] <- test$p.value } names(cor_data_df) <- c("symbol", "correlation", "pvalue") # Export correlation analysis results write.csv(cor_data_df, file = paste(gene, ' GSVA Correlation Analysis.csv', sep = ' '), row.names = FALSE) # Sort and select the top 15 genes for correlation analysis cor_data_df <- na.omit(cor_data_df) posneg <- cor_data_df %>% filter(symbol != gene) posneg$group <- NA posneg$group[posneg$pvalue < 0.05 & posneg$correlation < 0] <- "negative" posneg$group[posneg$pvalue >= 0.05] <- "pvalue>0.05" posneg$group[posneg$pvalue < 0.05 & posneg$correlation > 0] <- "positive" posneg$symbol <- str_replace_all(posneg$symbol, '_', ' ') posneg$symbol <- str_replace_all(posneg$symbol, 'HALLMARK ', '') # Create a correlation plot library(ggpubr) p <- ggbarplot(posneg, x = "symbol", y = "correlation", fill = "group", color = "white", palette = "jco", sort.val = "asc", sort.by.groups = FALSE, ylab = "GSVA correlation", xlab = "Term", legend.title = "group", lab.col = "black", rotate = TRUE, ggtheme = theme_minimal() ) # Save the correlation plot as a PDF ggsave(p, filename = paste(gene, ' GSVA Correlation Plot.pdf', sep = ' '), width = 10, height = 10)