# Copy this code to each folder as different datasets have different processing parameters rm(list = ls()) library(Seurat) library(ggplot2) library(cowplot) library(Matrix) library(dplyr) library(ggsci) library(scater) library(SingleR) library(hdf5r) library(tidyverse) library(RColorBrewer) library(dplyr) library(Seurat) library(ComplexHeatmap) library(GSVA) library(GSEABase) library(limma) library(ggplot2) load("CRC_GSE166555.RData") # Import gmt file, here using MsigDB's Hallmark as an example # MsigDB link (https://www.gsea-msigdb.org/gsea/msigdb/) # When downloading files, pay attention to choosing the corresponding gene format. Generally, the results from 10X are # gene symbol format. genesets <- getGmt("h.all.v7.5.1.symbols.gmt") str(genesets) # Get the matrix for GSVA analysis (after normalization) # df.data <- GetAssayData(object = pbmc, slot = "data") expr = as.matrix(pbmc@assays$RNA@data) expr = expr[rowSums(expr) > 0,] # Save the group information of the cells df.group <- data.frame(umi = names(Idents(pbmc)), cluster = pbmc@active.ident, stringsAsFactors = F) # Check what the result looks like head(df.group) str(expr) dim(expr) gsvascore <- gsva(expr, genesets, parallel.sz = 8, verbose = T, method = "zscore") # Because there are many genes with expression value of 0 in single-cell data, red warning messages may appear, but it's okay. # Check the result gsvascore[1, 1:5] # First use a heatmap to display the results of all 50 genesets in the hallmark ha.t <- HeatmapAnnotation(Cluster = df.group$cluster) aaa = as.data.frame(gsvascore) load("hallmarkname.RDATA") aaa = aaa[hallmarkname,] aaa = as.matrix(aaa) Heatmap(aaa, show_column_names = F, cluster_rows = T, cluster_columns = T, top_annotation = ha.t, column_split = df.group$cluster, row_names_gp = gpar(fontsize = 8), row_names_max_width = max_text_width(rownames(gsvascore), gp = gpar(fontsize = 8))) pdf(file = "7.hallmark_heatmap.pdf", width = 10, height = 8) Heatmap(aaa, show_column_names = F, cluster_rows = T, cluster_columns = T, top_annotation = ha.t, column_split = df.group$cluster, row_names_gp = gpar(fontsize = 8), row_names_max_width = max_text_width(rownames(gsvascore), gp = gpar(fontsize = 8))) dev.off() aaa = as.data.frame(t(gsvascore)) write.csv(aaa, "GSVAscore.csv", row.names = F) phe = aaa pbmc = AddMetaData(pbmc, phe, col.name = colnames(phe)) genesetname = "ICD" genesets = getGmt(paste0(genesetname, ".gmt")) str(genesets) genename = genesets@.Data[[1]]@geneIds pbmc <- SetIdent(pbmc, value = "celltype") table(pbmc@active.ident) genes_to_check = genename # Improved plotting method, gene names vertical p1 <- DotPlot(pbmc, features = genes_to_check, assay = 'RNA') + coord_flip() + theme_bw() + theme(panel.grid = element_blank(), axis.text.x = element_text(hjust = 0.5, vjust = 0.5)) + labs(x = NULL, y = NULL) + guides(size = guide_legend(order = 3)) + scale_color_gradientn(values = seq(0, 1, 0.2), colours = c('#330066', '#336699', '#66CC66', '#FFCC33')) ggsave(print(p1), filename = "8.specified_geneset_expression_in_cells.pdf", height = 10, width = 8) dev.off() # Here, choose the first pathway for plotting count <- gsvascore[genesetname, , drop = FALSE] count <- as.data.frame(t(count)) colnames(count) <- "geneset" count$cluster <- as.character(Idents(pbmc)) index <- vector() name <- vector() for (i in 1:length(unique(count$cluster))) { aaa <- dplyr::filter(count, cluster == unique(count$cluster)[i]) index <- c(mean(aaa[, "geneset"]), index) name <- c(unique(count$cluster)[i], name) } dat <- data.frame(name, index) value <- as.character(arrange(dat, index)$name) aaaa <- count aaaa$cluster <- factor(aaaa$cluster, levels = value) title.name = "ICD" p1 <- ggplot(data = aaaa, aes(x = cluster, y = geneset, fill = cluster)) + geom_boxplot() + ylab(label = title.name) + theme_classic() + theme(legend.position = 'none', axis.text.x = element_text(angle = 90, size = 8, hjust = 1, vjust = 0.5, colour = "black"), axis.text.y = element_text(colour = "black")) ggsave(p1, filename = paste0("9.", title.name, "_single_cell_expression.pdf"), height = 6, width = 8) pdf(file = "10.umap_plot.pdf", width = 8, height = 6) DimPlot(pbmc, reduction = "umap", label = TRUE, repel = TRUE) dev.off() colnames(count)[1] = genesetname pbmc = AddMetaData(pbmc, count, col.name = colnames(count)) pdf(file = "11.geneset_expression_umap_plot.pdf", width = 6, height = 5.5) FeaturePlot(pbmc, features = c(title.name), reduction = "umap", # label = TRUE, # ncol = 3, # Draw in three columns cols = rev(rainbow(2))) dev.off() p1 = DimPlot(pbmc, reduction = "umap", label = TRUE, group.by = "celltype", repel = TRUE) p3 = FeaturePlot(pbmc, features = c(title.name), reduction = "umap", # label = TRUE, # ncol = 3, # Draw in three columns cols = rev(rainbow(2))) pbmc@meta.data$group = ifelse(pbmc@meta.data[, title.name] > median(pbmc@meta.data[, title.name]), "High.score.group", "Low.score.group") pdf(file = "12.high_low_score_group_umap_plot.pdf", width = 6, height = 5.5) DimPlot(pbmc, reduction = "umap", group.by = "group", # column 'Diagnosis' in metadata cols = c("yellow2", "royalblue3"), pt.size = 1.5) dev.off() p4 = DimPlot(pbmc, reduction = "umap", group.by = "group", # column 'Diagnosis' in metadata cols = c("yellow2", "royalblue3"), pt.size = 1.5) p1 + p3 + p4 ggsave('13.umap_combined_plot.pdf', width = 16, height = 8) # pbmc@meta.data$celltype <- pbmc@active.ident data_plotC <- table(pbmc@meta.data$group, pbmc@meta.data$celltype) %>% reshape::melt() colnames(data_plotC) <- c("Sample", "CellType", "Number") colourCount = length(unique(pbmc@meta.data$celltype)) getPalette = colorRampPalette(brewer.pal(8, "Dark2")) celltype_colors <- getPalette(colourCount) dev.off() pC1 <- ggplot(data = data_plotC, aes(x = Sample, y = Number, fill = CellType)) + geom_bar(stat = "identity", width = 0.8, aes(group = CellType), position = "stack") + scale_fill_manual(values = celltype_colors) + theme_bw() + theme(panel.grid = element_blank()) + labs(x = "", y = "Average cell number") + theme(axis.text = element_text(size = 12, colour = "black")) + theme(axis.title.y = element_text(size = 12, colour = "black")) + theme(panel.border = element_rect(size = 1, linetype = "solid", colour = "black")) + theme(axis.text.x = element_text(angle = 45, hjust = 0.8, vjust = 0.6)) pC2 <- ggplot(data = data_plotC, aes(x = Sample, y = Number, fill = CellType)) + geom_bar(stat = "identity", width = 0.8, aes(group = CellType), position = "fill") + scale_fill_manual(values = celltype_colors) + theme_bw() + theme(panel.grid = element_blank()) + labs(x = "", y = "Cell proportion") + theme(panel.grid = element_blank()) + labs(x = "", y = "Cell proportion") + scale_fill_manual(values = celltype_colors) + theme_bw() + theme(panel.grid = element_blank()) + theme(axis.text = element_text(size = 12, colour = "black")) + theme(axis.title.y = element_text(size = 12, colour = "black")) + theme(panel.border = element_rect(size = 1, linetype = "solid", colour = "black")) + theme(axis.text.x = element_text(angle = 45, hjust = 0.8, vjust = 0.6)) aaa$group[aaa$score >= median(aaa$score)] = "High" aaa$group[aaa$score < median(aaa$score)] = "Low" table(aaa$group) rownames(aaa) = aaa$id pbmc = AddMetaData(pbmc, aaa, col.name = colnames(aaa)) pbmc <- SetIdent(pbmc, value = "group") # Save the group information of the cells df.group <- data.frame(umi = names(Idents(pbmc)), cluster = pbmc@active.ident, stringsAsFactors = F) # Check what the result looks like head(df.group) # Because there are many genes with expression value of 0 in single-cell data, red warning messages may appear, but it's okay. # Check the result aaa[1, 1:5] # First use a heatmap to display the results of all 50 genesets in the hallmark ha.t <- HeatmapAnnotation(Cluster = df.group$cluster) bbb = aaa[, 4:54] bbb = bbb[, -51] bbb = as.matrix(bbb) bbb = t(bbb) pdf(file = "19.group_segmented_hallmark_heatmap.pdf", width = 10, height = 8) Heatmap(bbb, show_column_names = F, cluster_rows = T, cluster_columns = T, top_annotation = ha.t, column_split = df.group$cluster, row_names_gp = gpar(fontsize = 8), row_names_max_width = max_text_width(rownames(bbb), gp = gpar(fontsize = 8))) dev.off() # Select integers num = floor(length(pbmc@meta.data$score) / 10) ccc = arrange(pbmc@meta.data, score) ccc = ccc[1:num, ] ccc = rownames(ccc) ddd = arrange(pbmc@meta.data, desc(score)) ddd = ddd[1:num, ] ddd = rownames(ddd) colnames(pbmc) pbmc1 = pbmc[, c(ccc, ddd)] table(pbmc1@meta.data$group) df.group <- data.frame(umi = names(Idents(pbmc1)), cluster = pbmc1@active.ident, stringsAsFactors = F) ha.t <- HeatmapAnnotation(Cluster = df.group$cluster) bbb = bbb[, c(ccc, ddd)] pdf(file = "19.group_top10_segmented_hallmark_heatmap.pdf", width = 10, height = 8) Heatmap(bbb, show_column_names = F, cluster_rows = T, cluster_columns = T, top_annotation = ha.t, column_split = df.group$cluster, row_names_gp = gpar(fontsize = 8), row_names_max_width = max_text_width(rownames(bbb), gp = gpar(fontsize = 8))) dev.off()