#Copy it to each folder together with the code, because different data sets have different processing parameters rm(list = ls()) library(Seurat) library(ggplot2) library(cowplot) library(Matrix) library(dplyr) library(ggsci) library(scater) library(SingleR) library(hdf5r) library(tidyverse) library(RColorBrewer) library(dplyr) library(Seurat) library(ComplexHeatmap) library(GSVA) library(GSEABase) library(limma) library(ggplot2) load("CRC_EMTAB8107.RData") # Import the gmt file. Here we take Hallmark in MsigDB as an example. # MsigDB link (https://www.gsea-msigdb.org/gsea/msigdb/) # When downloading files, please pay attention to selecting the corresponding gene format. Generally, the results of 10X are in gene symbol format. genesets <- getGmt("h.all.v7.5.1.symbols.gmt") str(genesets) #Get the matrix for GSVA analysis (normalized) #df.data <- GetAssayData(object = pbmc, slot = "data") expr=as.matrix(pbmc@assays$RNA@data) expr=expr[rowSums(expr)>0,] # Save the group information to which the cell belongs df.group <- data.frame(umi = names(Idents(pbmc)), cluster = pbmc@active.ident, stringsAsFactors = F) # See what the results look like head(df.group) str(expr) dim(expr) gsvascore <- gsva(expr, genesets, parallel.sz = 4,verbose=T,method = "zscore") # Because there are many gene expression values of 0 in the single-cell data, a red warning message will appear, which is not a big deal. # Check out the results gsvascore[1, 1:5] # First, use a heat map to show the results of all 50 genesets in hallmarker. ha.t <- HeatmapAnnotation(Cluster = df.group$cluster) aaa=as.data.frame(gsvascore) load("hallmarkname.RDATA") aaa=aaa[hallmarkname,] aaa=as.matrix(aaa) pdf(file = "7.hallmark heatmap.pdf",width = 10,height = 8) Heatmap(aaa, show_column_names = F, cluster_rows = T, cluster_columns = T, top_annotation = ha.t, column_split = df.group$cluster, row_names_gp = gpar(fontsize = 8), row_names_max_width = max_text_width(rownames(gsvascore), gp = gpar(fontsize = 8))) dev.off() aaa=as.data.frame(t(gsvascore)) write.csv(aaa,"GSVAscore.csv",row.names = F) phe=aaa pbmc=AddMetaData(pbmc, phe, col.name = colnames(phe)) genesetname="TLS" genesets=getGmt(paste0(genesetname,".gmt")) str(genesets) genename=genesets@.Data[[1]]@geneIds pbmc <- SetIdent(pbmc, value = "celltype") table(pbmc@active.ident) genes_to_check = genename # Improved mapping method, gene names vertically p1 <-DotPlot(pbmc, features = genes_to_check, assay='RNA' )+coord_flip()+ theme_bw()+ theme(panel.grid = element_blank(), axis.text.x=element_text(hjust = 0.5,vjust=0.5))+ labs(x=NULL,y=NULL)+guides(size=guide_legend(order=3))+ scale_color_gradientn(values = seq(0,1,0.2),colours = c('#330066','#336699','#66CC66','#FFCC33')) ggsave(print(p1),filename = "8.Specifies the gene set expressed in cells.pdf",height = 10,width =8) dev.off() # Select the first pathway here to draw the picture count <- gsvascore[genesetname, , drop = FALSE] count <- as.data.frame(t(count)) colnames(count) <- "geneset" count$cluster <- as.character(Idents(pbmc)) index<-vector() name<-vector() for (i in 1:length(unique(count$cluster))) { aaa<-dplyr::filter(count, cluster == unique(count$cluster)[i]) index<-c(mean(aaa[,"geneset"]),index) name<-c(unique(count$cluster)[i],name) } dat<-data.frame(name,index) value<-as.character(arrange(dat,index)$name) aaaa<-count aaaa$cluster<-factor(aaaa$cluster,levels =value ) title.name="TLS" p1<-ggplot(data=aaaa,aes(x=cluster,y=geneset,fill=cluster))+ geom_boxplot()+ ylab(label =title.name)+ theme_classic()+ theme(legend.position = 'none' ,axis.text.x =element_text(angle=90,size=8,hjust =1,vjust =0.5,colour = "black" ), axis.text.y =element_text(colour = "black" )) ggsave(p1,filename = paste0("9.",title.name,"Single cell expression.pdf"),height = 6,width = 8) pdf(file = "10.umap map.pdf",width=8,height=6) DimPlot(pbmc, reduction = "umap", label = TRUE, repel = TRUE) dev.off() colnames(count)[1]=genesetname pbmc=AddMetaData(pbmc, count, col.name = colnames(count)) pdf(file = "11.Gene set expression UMAP plots.pdf",width=6,height=5.5) FeaturePlot(pbmc, features = c(title.name), reduction = "umap", #label = TRUE, #ncol = 3, # Draw three columns cols = rev(rainbow(2))) dev.off() p1=DimPlot(pbmc, reduction = "umap", label = TRUE,group.by = "celltype", repel = TRUE) p3=FeaturePlot(pbmc, features = c(title.name), reduction = "umap", #label = TRUE, #ncol = 3, # Draw three columns cols = rev(rainbow(2))) pbmc@meta.data$group=ifelse(pbmc@meta.data[,title.name] > median(pbmc@meta.data[,title.name]),"High.score.group","Low.score.group") pdf(file = "12.High and low score group umap chart.pdf",width=6,height=5.5) DimPlot(pbmc, reduction = "umap", group.by = "group", cols = c("yellow2", "royalblue3"), pt.size = 1.5) dev.off() p4=DimPlot(pbmc, reduction = "umap", group.by = "group", cols = c("yellow2", "royalblue3"), pt.size = 1.5) p1+p3+p4 ggsave('13.umap Combination chart.pdf',width = 16,height=8) # pbmc@meta.data$celltype <- pbmc@active.ident data_plotC <- table(pbmc@meta.data$group, pbmc@meta.data$celltype) %>% reshape::melt() colnames(data_plotC) <- c("Sample", "CellType","Number") colourCount = length(unique(pbmc@meta.data$celltype)) getPalette = colorRampPalette(brewer.pal(8, "Dark2")) celltype_colors <- getPalette(colourCount) dev.off() pC1 <- ggplot(data = data_plotC, aes(x = Sample, y = Number, fill = CellType)) + geom_bar(stat = "identity", width=0.8,aes(group=CellType),position="stack")+ scale_fill_manual(values=celltype_colors) + theme_bw()+ theme(panel.grid =element_blank()) + labs(x="",y="Average cell number")+ theme(axis.text = element_text(size=12, colour = "black"))+ theme(axis.title.y = element_text(size=12, colour = "black"))+ theme(panel.border = element_rect(size = 1, linetype = "solid", colour = "black"))+ theme(axis.text.x = element_text(angle = 45,hjust = 0.8, vjust = 0.6)) pC2 <- ggplot(data = data_plotC, aes(x = Sample, y = Number, fill = CellType)) + geom_bar(stat = "identity", width=0.8,aes(group=CellType),position="fill")+ scale_fill_manual(values=celltype_colors) + theme_bw()+ theme(panel.grid =element_blank()) + labs(x="",y="Cell proportion")+ scale_y_continuous(labels = scales::percent)+ theme(axis.text = element_text(size=12, colour = "black"))+ theme(axis.title.y = element_text(size=12, colour = "black"))+ theme(panel.border = element_rect(size = 1, linetype = "solid", colour = "black"))+ theme(axis.text.x = element_text(angle = 45,hjust = 0.8, vjust = 0.6)) library(patchwork) pC <- pC1 + pC2 + plot_layout(ncol = 2, widths = c(1,1),guides = 'collect') pdf(file = "14.Ratio of high- and low-score immune cells.pdf",height=6,width=8) pC dev.off() top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) p4=DoHeatmap(pbmc, features = top10$gene) + NoLegend() ggsave(print(p4),filename = "15.Top 10 gene heat map.pdf",height = 6,width =8) head(df.group) gsvascore <- pbmc@meta.data[,hallmarkname] gsvascore[1, 1:5] a=colnames(pbmc) b=pbmc@meta.data$celltype c=pbmc@meta.data[,genesetname] drawdata=data.frame(id=a,celltype=b,score=c) drawdata$celltype=str_replace_all(drawdata$celltype,"/",".") d=as.data.frame(gsvascore) d$id=rownames(d) drawdata=inner_join(drawdata,d,by="id") write.csv(drawdata,"GSVAscore.csv",row.names = F) library(ggpubr) Index<-unique(drawdata$celltype) Index length(Index) gene="score" for (i in 1:length(Index)) { print(i) aaa<-drawdata%>% filter(celltype ==Index[[i]] ) y <- as.numeric(aaa[,gene]) bbb=aaa[,4:53] cor_data_df <- data.frame(colnames(bbb)) for (a in 1:length(colnames(bbb))){ test <- cor.test(as.numeric(bbb[,a]),y,method = "pearson") cor_data_df[a,2] <- test$estimate cor_data_df[a,3] <- test$p.value } names(cor_data_df) <- c("symbol","correlation","pvalue") write.csv (cor_data_df, file =paste("16.",Index[[i]], gene,'GSVA correlation analysis.csv',sep=' '), row.names =FALSE) cor_data_df<-na.omit(cor_data_df) posneg<-cor_data_df %>% filter(symbol != gene) posneg$group<-NA posneg$group[ posneg$pvalue<0.05& posneg$correlation<0]<-"negative" posneg$group[ posneg$pvalue>=0.05]<-"pvalue>0.05" posneg$group[ posneg$pvalue<0.05& posneg$correlation>0]<-"positive" posneg$symbol<-str_replace_all(posneg$symbol,'_',' ') posneg$symbol<-str_replace_all(posneg$symbol,'HALLMARK ','') p<-ggbarplot(posneg, x = "symbol", y = "correlation", fill = "group", # change fill color by mpg_level color = "white", # Set bar border colors to white palette = "jco", # jco journal color palett. see ?ggpar sort.val = "asc", # Sort the value in ascending order sort.by.groups = FALSE, # Don't sort inside each group #x.text.angle = 90, # Rotate vertically x axis texts ylab = "GSVA correlation", xlab = "Term", legend.title = "group", lab.col = "black", rotate = TRUE, ggtheme = theme_minimal() ) ggsave(p,filename = paste("16.",Index[[i]], gene,'GSVA Correlation Plot.pdf',sep=' '),width = 10,height = 10) } mergedata<-data.frame(symbol=as.character(),correlation=as.numeric(),pvalue=as.numeric()) for (i in 1:length(Index)) { assign(Index[i],read.csv(paste0("16. ",Index[i]," ",gene," GSVA correlation analysis.csv"))) aaa<-get(Index[i]) aaa$type<-Index[i] mergedata<-rbind(aaa,mergedata) } mergedata<-filter(mergedata,symbol!=gene) mergedata$symbol<-str_replace_all(mergedata$symbol,'_',' ') mergedata$symbol<-str_replace_all(mergedata$symbol,'HALLMARK ','') mergedata[mergedata[,"pvalue"]<=0.0001,"label"]<-"****" mergedata[mergedata[,"pvalue"]<= 0.001 & mergedata[,"pvalue"]>0.0001,"label"]<-"***" mergedata[mergedata[,"pvalue"]<= 0.01 & mergedata[,"pvalue"]>0.001,"label"]<-"**" mergedata[mergedata[,"pvalue"]<= 0.05 & mergedata[,"pvalue"]>0.01,"label"]<-"*" meandata<-data.frame(symbol= character(),meanvalue= numeric()) for (i in 1:50) { meandata1<-dplyr::filter(mergedata,symbol == mergedata$symbol[[i]] ) meandata[i,1]<-mergedata$symbol[[i]] meandata[i,2]<-mean(meandata1$correlation) } bbbbb<-as.character(arrange(meandata,meanvalue)$symbol) mergedata$symbol<-factor(mergedata$symbol,levels = bbbbb) p<-ggplot(mergedata,aes(x=type,y=symbol))+geom_tile(aes(fill=correlation))+ scale_fill_gradientn(colors=c("blue","white","red"),guide="colorbar")+ theme_void()+ theme(axis.text.x=element_text(angle=90,vjust=0.3,hjust = 1))+ theme(axis.text.y=element_text(angle=0,hjust=1))+ theme(axis.title.x=element_blank(),axis.title.y=element_blank())+ geom_text(aes(label = label),col='black',cex=2.5) ggsave(p,filename = '17.GSVA enrichment analysis correlation analysis heat map.pdf',width =6,height = 11) aaa<-drawdata y <- as.numeric(aaa[,gene]) bbb=aaa[,4:53] cor_data_df <- data.frame(colnames(bbb)) for (a in 1:length(colnames(bbb))){ test <- cor.test(as.numeric(bbb[,a]),y,method = "pearson") cor_data_df[a,2] <- test$estimate cor_data_df[a,3] <- test$p.value } names(cor_data_df) <- c("symbol","correlation","pvalue") write.csv (cor_data_df, file =paste( "18.ALL cell",gene,'GSVA correlation analysis.csv',sep=' '), row.names =FALSE) cor_data_df<-na.omit(cor_data_df) posneg<-cor_data_df %>% filter(symbol != gene) posneg$group<-NA posneg$group[ posneg$pvalue<0.05& posneg$correlation<0]<-"negative" posneg$group[ posneg$pvalue>=0.05]<-"pvalue>0.05" posneg$group[ posneg$pvalue<0.05& posneg$correlation>0]<-"positive" posneg$symbol<-str_replace_all(posneg$symbol,'_',' ') posneg$symbol<-str_replace_all(posneg$symbol,'HALLMARK ','') p<-ggbarplot(posneg, x = "symbol", y = "correlation", fill = "group", # change fill color by mpg_level color = "white", # Set bar border colors to white palette = "jco", # jco journal color palett. see ?ggpar sort.val = "asc", # Sort the value in ascending order sort.by.groups = FALSE, # Don't sort inside each group #x.text.angle = 90, # Rotate vertically x axis texts ylab = "GSVA correlation", xlab = "Term", legend.title = "group", lab.col = "black", rotate = TRUE, ggtheme = theme_minimal() ) ggsave(p,filename = paste("18.ALL cell", gene,'GSVA Correlation Plot.pdf',sep=' '),width = 10,height = 10) aaa=read.csv("GSVAscore.csv",header = T,sep = ",") aaa$group=NA aaa$group[aaa$score >= median(aaa$score)]="High" aaa$group[aaa$score < median(aaa$score)]="Low" table(aaa$group) rownames(aaa)=aaa$id pbmc=AddMetaData(pbmc, aaa, col.name = colnames(aaa)) pbmc <- SetIdent(pbmc, value = "group") df.group <- data.frame(umi = names(Idents(pbmc)), cluster = pbmc@active.ident, stringsAsFactors = F) head(df.group) aaa[1, 1:5] ha.t <- HeatmapAnnotation(Cluster = df.group$cluster) bbb=aaa[,4:54] bbb=bbb[,-51] bbb=as.matrix(bbb) bbb=t(bbb) pdf(file = "19.Group hallmark heat map.pdf",width = 10,height = 8) Heatmap(bbb, show_column_names = F, cluster_rows = T, cluster_columns = T, top_annotation = ha.t, column_split = df.group$cluster, row_names_gp = gpar(fontsize = 8), row_names_max_width = max_text_width(rownames(bbb), gp = gpar(fontsize = 8))) dev.off() num=floor(length(pbmc@meta.data$score)/10) ccc=arrange(pbmc@meta.data,score) ccc=ccc[1:num,] ccc=rownames(ccc) ddd=arrange(pbmc@meta.data,desc(score)) ddd=ddd[1:num,] ddd=rownames(ddd) colnames(pbmc) pbmc1=pbmc[,c(ccc,ddd)] table(pbmc1@meta.data$group) df.group <- data.frame(umi = names(Idents(pbmc1)), cluster = pbmc1@active.ident, stringsAsFactors = F) ha.t <- HeatmapAnnotation(Cluster = df.group$cluster) bbb=bbb[,c(ccc,ddd)] pdf(file = "19.group.top10 group hallmark heat map.pdf",width = 10,height = 8) Heatmap(bbb, show_column_names = F, cluster_rows = T, cluster_columns = T, top_annotation = ha.t, column_split = df.group$cluster, row_names_gp = gpar(fontsize = 8), row_names_max_width = max_text_width(rownames(bbb), gp = gpar(fontsize = 8))) dev.off()