#Part1 Identification of RCD-DEGs (RRDs) and enrichment functional analysis 
#Combination of GSE28750 and GSE95233----------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
rt1<-read.table("GSE28750_matrix.txt",header = T,sep = "\t")
rownames(rt1)<-rt1[,1]
rt1<-rt1[,-1]
rt2<-read.table("GSE95233_matrix.txt",header = T,sep = "\t")
rownames(rt2)<-rt2[,1]
rt2<-rt2[,-1]

table(rownames(rt1)%in%rownames(rt2))
#TRUE 
#54675 
length(intersect(rownames(rt1),rownames(rt2)))
#[1] 54675
rt1<-rt1[intersect(rownames(rt1),rownames(rt2)),]
rt2<-rt2[intersect(rownames(rt1),rownames(rt2)),]

cli1<-read.table("GSE28750_cli.txt",header = T,sep = "\t")
rownames(cli1)<-cli1[,1]
cli2<-read.table("GSE95233_cli.txt",header = T,sep = "\t")
cli2<-cli2[,-(2:4)]
rownames(cli2)<-cli2[,1]

identical(colnames(rt1),rownames(cli1))
#[1] TRUE
same2<-intersect(colnames(rt2),rownames(cli2))
rt2<-rt2[,same2]
cli2<-cli2[same2,]
identical(colnames(rt2),rownames(cli2))
#[1] TRUE
rt_merge<-cbind(rt1,rt2)
cli_merge<-rbind(cli1,cli2)

Group<-factor(cli_merge$Group,levels = c("Control","Sepsis"))
batch<-c(rep("GSE28750",nrow(cli1)),rep("GSE95233",nrow(cli2)))
mod<-model.matrix(~Group)
combat<-ComBat(rt_merge,batch = batch,mod = mod)

library(tinyarray)
boxplot(rt_merge,main="Orignal")
draw_pca(rt_merge,group_list = Group)

boxplot(combat,main="Batch corrected")
draw_pca(combat,group_list = Group)

save(combat,cli_merge,file = "GSE28750_GSE95233_Batch_corrected_input.Rdata")


setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
load("DEG_input.Rdata")

gpl<-read.table("GPL570.txt",header = T,sep = "\t")
rownames(gpl)<-gpl[,1]

library(dplyr)
rt<-as.data.frame(combat)
rt$ID<-rownames(rt)
exp<-merge(rt,gpl,by.x="ID",by.y="ID")
exp<-exp[!duplicated(exp$Gene),]
rownames(exp)<-exp$Gene
exp<-exp[,-c(1,105)]
write.table(exp,file = "exp_id_trans.txt",sep = "\t",quote = F,col.names = NA)
write.table(cli_merge,file = "cli_merge.txt",sep = "\t",quote = F,col.names = NA)

#Differential analysis----------------------------------------------------------------
library(limma)
exp<-read.table("exp_id_trans.txt",header = T,sep = "\t")
rownames(exp)<-exp[,1]
exp<-exp[,-1]
cli<-read.table("cli_merge.txt",header = T,sep = "\t")
rownames(cli)<-cli[,1]
cli<-cli[,-1]
identical(rownames(cli),colnames(exp))
#[1] TRUE
table(cli$Group)
#Control  Sepsis 
#42      61 
group_list<-c(rep("Control",42),rep("Sepsis",61))
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
contrast.matrix<-makeContrasts("Sepsis-Control",
                               levels = design)
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
DEG<-topTable(fit2, coef=1, n=Inf,p.value = 0.05) %>% na.omit()  
DEG$Change<-NA
up<-subset(DEG,DEG$logFC>0.585)
up$Change<-"UP"
down<-subset(DEG,DEG$logFC< -0.585)
down$Change<-"DOWN"
stable<-subset(DEG,
               subset=DEG$logFC >= -0.585 & DEG$logFC <= 0.585)
stable$Change<-"STABLE"
DEG1<-rbind(up,down,stable)
table(DEG1$Change)
#DOWN STABLE     UP 
#501  6424    819 
diff<-rbind(up,down)
diffexp<-exp[rownames(diff),]
write.table(DEG1,file = "GSE28750_GSE95233_com_allgenediff.txt",sep = "\t",col.names = NA,quote = F)
write.table(diff,file = "GSE28750_GSE95233_com_diffgene.txt",sep = "\t",col.names = NA,quote = F)
write.table(diffexp,file = "GSE28750_GSE95233_com_diffgeneexp.txt",sep = "\t",col.names = NA,quote = F)

#Visualization----------------------------------------------------------------
DEG<-read.table("part1.GSE28750_GSE95233_DEGs/GSE28750_GSE95233_com_allgenediff.txt",header = T,sep = "\t")
rownames(DEG)<-DEG[,1]
DEG<-DEG[,-1]
DEG<-data.frame(DEG)
library(ggplot2)
library(ggrepel)
top_20 <-bind_rows(
  DEG %>%
    filter(Change == 'UP') %>%
    arrange(adj.P.Val, desc(abs(logFC))) %>%
    head(10),
  DEG %>%
    filter(Change == 'DOWN') %>%
    arrange(adj.P.Val, desc(abs(logFC))) %>%
    head(10))
top_20 %>% gt()
ggplot(DEG,aes(logFC,-log10(adj.P.Val),colour=Change))+
  geom_point()+
  labs(x=expression(Log[2]*"Fold Change"),
       y=expression(-Log[10]*"Adjust P-value"))+
  theme_bw()+
  scale_color_manual(values = c("green", "gray", "red"))+
  scale_x_continuous(limits = c(-2,2))+
  geom_vline(xintercept=-0.585, linetype=2, colour="gray30") +
  geom_vline(xintercept=0.585, linetype=2, colour="gray30") +
  geom_hline(yintercept=-log10(0.05), linetype=2, colour="gray30") +
  geom_label_repel(data = top_20,
                   aes(logFC, -log10(adj.P.Val), label = rownames(top_20)),
                   size = 4)+
  theme(axis.title = element_text(size = 16),
        legend.text = element_text(size = 16))

#GSEA----------------------------------------------------------------
library(clusterProfiler)
library(org.Hs.eg.db)
library(dplyr)
library(ggplot2)

deg<-read.table("part1.GSE28750_GSE95233_DEGs/GSE28750_GSE95233_com_diffgene.txt",header = T,sep = "\t")
deg1<-deg[,c(1,2)]
ids<-bitr(deg1$X,"SYMBOL","ENTREZID","org.Hs.eg.db")
colnames(deg1)[1]<-"SYMBOL"
data<-inner_join(ids,deg1,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList = data$logFC
names(geneList) = data$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 1,
                 pAdjustMethod = "BH"
)
gsea_res@result$Description <- gsub("HALLMARK_", "", gsea_res@result$Description)
ridgeplot(gsea_res,
          showCategory = 20,
          fill = "pvalue",
          core_enrichment = TRUE,
          label_format = 30,
          orderBy = "NES",
          decreasing = FALSE
) +
  theme(axis.text.y = element_text(size=12))+
  scale_fill_gradient(low = "red", high = "blue", 
                      name = "p - value")


#GO----------------------------------------------------------------
library(org.Hs.eg.db)
library(clusterProfiler)
genes <- read.delim('part1.GSE28750_GSE95233_DEGs/GSE28750_GSE95233_com_diffgene.txt', header = TRUE, stringsAsFactors = FALSE)[[1]]
ids<-bitr(genes,"SYMBOL","ENTREZID","org.Hs.eg.db")
enrich.go <- enrichGO(gene = ids$ENTREZID,  
                      OrgDb = org.Hs.eg.db, 
                      keyType = 'ENTREZID',  
                      ont = 'ALL',  #GO Ontology，
                      pAdjustMethod = 'fdr', 
                      pvalueCutoff = 0.05,  
                      qvalueCutoff = 0.2,  
                      readable = FALSE)
save(enrich.go,file = "GSE28750_GSE95233_com_diff_enrichGO.Rdata")


barplot(enrich.go, x = "GeneRatio", color = "p.adjust", 
        showCategory =5, 
        split="ONTOLOGY") + 
  facet_grid(ONTOLOGY~., scale='free') +
  scale_fill_gradient(low = "red", high = "blue", 
                      name = "p - value")+
  theme(axis.text.y = element_text(size = 16))

#KEGG----------------------------------------------------------------
enrich.kegg <- enrichKEGG(gene = ids$ENTREZID,  
                          organism = "hsa", 
                          keyType = 'kegg',  
                          pAdjustMethod = 'fdr',  
                          pvalueCutoff = 0.05,  
                          qvalueCutoff = 0.2)  
save(enrich.kegg,file = "GSE28750_GSE95233_com_diff_enrichkegg.Rdata")

barplot(enrich.kegg, x = "GeneRatio", color = "p.adjust", 
        showCategory =15) +
  scale_fill_gradient(low = "red", high = "blue", 
                      name = "p - value")+
  theme(axis.text.y = element_text(size = 16))

#RCD-DEGs---------------------------------------------------------
library(gplots)
rcd<-read.table("part3.RCD_DEGs_inter/RCDgenes.txt",header = T,sep = "\t")
c1<-subset(rcd,rcd$Type=="Netotic cell death")
c2<-subset(rcd,rcd$Type=="Oxeiptosis")
c3<-subset(rcd,rcd$Type=="Alkaliptosis")
c4<-subset(rcd,rcd$Type=="Anoikis")
c5<-subset(rcd,rcd$Type=="Mitotic cell death")
c6<-subset(rcd,rcd$Type=="Cuproptosis")
c7<-subset(rcd,rcd$Type=="Disulfidptosis")
c8<-subset(rcd,rcd$Type=="Lysosomal")
c9<-subset(rcd,rcd$Type=="Entotic cell death")
c10<-subset(rcd,rcd$Type=="Parthanatos")
c11<-subset(rcd,rcd$Type=="Autosis")
c12<-subset(rcd,rcd$Type=="Necroptosis")
c13<-subset(rcd,rcd$Type=="Pyrotosis")
c14<-subset(rcd,rcd$Type=="Mpt")
c15<-subset(rcd,rcd$Type=="Autophagy")
c16<-subset(rcd,rcd$Type=="Ferroptosis")
c17<-subset(rcd,rcd$Type=="Immunogenic cell death")
c18<-subset(rcd,rcd$Type=="Apoptosis")
set1<-c1$Gene
set2<-c2$Gene
set3<-c3$Gene
set4<-c4$Gene
set5<-c5$Gene
set6<-c6$Gene
set7<-c7$Gene
set8<-c8$Gene
set9<-c9$Gene
set10<-c10$Gene
set11<-c11$Gene
set12<-c12$Gene
set13<-c13$Gene
set14<-c14$Gene
set15<-c15$Gene
set16<-c16$Gene
set17<-c17$Gene
set18<-c18$Gene

s2 <- list(
  Netotic_cell_death = set1,
  Oxeiptosis = set2,
  Alkaliptosis = set3,
  Anoikis = set4,
  Mitotic_cell_death = set5,
  Cuproptosis = set6,
  Disulfidptosis = set7,
  Lysosomal = set8,
  Entotic_cell_death = set9,
  Parthanatos = set10,
  Autosis = set11,
  Necroptosis = set12,
  Pyrotosis = set13,
  Mpt = set14,
  Autophagy = set15,
  Ferroptosis = set16,
  Immunogenic_cell_death = set17,
  Apoptosis = set18)
#upset
library(UpSetR)
upset(fromList(s2), 
      nsets = 18,
      number.angles = 30,
      point.size = 2.5,
      line.size = 1.5,
      sets.x.label = "Gene Number", 
      text.scale=c(1.5,1.5,1.5,1.5,4,1), 
      order.by = "freq",
      decreasing = T,
      empty.intersections = "on")

setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
library(VennDiagram)
#RCD----------------------------------------------------------------
rcd<-read.table("part3.RCD_DEGs_inter/RCDgenes.txt",header = T,sep = "\t")
list1<-rcd$Gene
#DEGs
deg<-read.table("part1.GSE28750_GSE95233_DEGs/GSE28750_GSE95233_com_diffgene.txt",header = T,sep = "\t")
list2<-deg$X
list<-list(RCD=list1,DEGs=list2)
p<-venn.diagram(list, filename = 'RCD_DEGs_venn.png', imagetype = 'png', scaled=F,
                fill = c('red', 'blue'), alpha = 0.50, cat.col = rep('black', 2), 
                col = 'black', cex = 1.5, fontfamily = 'serif', 
                cat.cex = 1.5, cat.fontfamily = 'serif',resolution = 300)

inter <- get.venn.partitions(list)
interset<-inter$..values..
interset<-interset[[1]]
write.csv(interset,file = "RCD_DEGs_inter.csv")
#630RCD-DEGs
rcddegs<-read.csv("RCD_DEGs_inter.csv")
rownames(rcddegs)<-rcddegs[,2]
rownames(deg)<-deg[,1]
same<-intersect(rownames(rcddegs),rownames(deg))
deg<-deg[same,]
deg<-deg[,-1]
write.table(deg,file = "RCD_DEGs_inter_diff.txt",sep = "\t",quote = F,col.names = NA)

#RCD-DEGs----------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
deg<-read.table("part3.RCD_DEGs_inter/RCD_DEGs_inter_diff.txt",header = T,sep = "\t")
table(deg$Group)
length(deg$Group)
#[1] 627
#Anoikis              Apoptosis              Autophagy         Disulfidptosis 
#2                    105                     38                      2 
#Ferroptosis Immunogenic cell death              Lysosomal     Mitotic cell death 
#13                     28                     30                     20 
#Mpt       Multi_sets_inter            Necroptosis         Two_sets_inter 
#1                    231                      4                    153 
values<-c(1,2,2,4,13,20,28,30,38,105,153,231)
names<-c("Mpt","Anoikis","Disulfidptosis","Necroptosis","Ferroptosis",
         "Mitotic cell death","Immunogenic cell death","Lysosomal","Autophagy","Apoptosis",
         "Two_sets_inter","Multi_sets_inter")
r<-cbind(names,values)
r<-as.data.frame(r)
r$Ratio<-as.numeric(r$values)/627

library(dplyr)
library(ggplot2)
legend<-c("Mpt 1(0.16%)","Anoikis 2(0.32%)","Disulfidptosis 2(0.32%)","Necroptosis 4(0.64%)","Ferroptosis 13(2.07%)",
          "Mitotic cell death 20(3.19%)","Immunogenic cell death 29(4.47%)","Lysosomal 30(4.78%)","Autophagy 38(6.06%)",
          "Apoptosis 105(16.75%)","Two_sets_inter 153(24.40%)","Multi_sets_inter 233(36.84%)")
colour=c("#DC143C","#20B2AA","#FFA500","#98FB98","#F08080","#1E90FF",  
         "#808000","#FF00FF","#FA8072","#87CEEB","#40E0D0","#FFE4B5")

pdf(file = "RCD_DEGs_Type_Ratio.pdf",width = 26,height = 16)
barplot(values,names.arg = names,legend.text = legend,col = colour,width = 2.5,space = 0.2,args.legend = list(x = "topleft", cex=4))
dev.off()

#RCD-DEGs-KEGG----------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(dplyr)
library(ggplot2)
deg<-read.table("part3.RCD_DEGs_inter/RCD_DEGs_inter_diff.txt",header = T,sep = "\t")
geneList<-deg$logFC
deg1<-deg[,c(1,2)]
ids<-bitr(deg1$X,"SYMBOL","ENTREZID","org.Hs.eg.db")
colnames(deg1)[1]<-"SYMBOL"
temp<-inner_join(ids,deg1,by="SYMBOL")
temp<-temp[,-1]
enrich.kegg <- enrichKEGG(gene = temp$ENTREZID,  
                          organism = "hsa", 
                          keyType = 'kegg',  
                          pAdjustMethod = 'fdr',  
                          pvalueCutoff = 0.05,  
                          qvalueCutoff = 0.2)  
enrich.kegg1<-setReadable(enrich.kegg,OrgDb = "org.Hs.eg.db",keyType = "ENTREZID")
new_colors <- c("Epstein-Barr virus infection" = "#1F77B4FF",
                "Hematopoietic cell lineage" = "#AEC7E8FF",
                "Inflammatory bowel disease" = "#FF7F0EFF",
                "NF-kappa B signaling pathway" = "#FFBB78FF",
                "PD-L1 expression and PD-1 checkpoint pa" = "#2CA02CFF",
                "Primary immunodeficiency" = "#98DF8AFF",
                "T cell receptor signaling pathway" = "#D62728FF",
                "Th1 and Th2 cell differentiation" = "#FF9896FF",
                "Th17 cell differentiation" = "#9467BDFF",
                "Toxoplasmosis" = "#C5B0D5FF")
colors<-c("#1F77B4FF" ,"#AEC7E8FF" ,"#FF7F0EFF","#FFBB78FF", "#2CA02CFF","#98DF8AFF", "#D62728FF", "#FF9896FF" ,"#9467BDFF" ,"#C5B0D5FF")
paletteer_d("ggthemes::Classic_20",n=10) 

cnetp1 <- cnetplot(enrich.kegg1,  
                   layout = "kk",
                   color.params = list(foldChange = geneList,
                                       edge = TRUE,
                                       category = new_colors),
                   showCategory = 10,
                   node_label = 'all',
                   cex.params = list(category_node = 1.5, 
                                     gene_node = 1,
                                     category_label = 1.5,
                                     gene_label = 1))

cnetp1 <- cnetplot(enrich.kegg1,  
                   layout="kk",
                   foldChange = geneList,
                   showCategory = 10,
                   colorEdge = T,
                   node_label = 'all',
                   color_category ='steelblue', 
                   cex.params = list(category_node = 1.5 # 控制标签和节点的大小
                                     , gene_node = 1
                                     , category_label = 1.5
                                     , gene_label = 1))
ggsave(cnetp1,filename ='RCD-DEGs-KEGG_cnetplot.pdf', width =20,height =16)
#RCD-DEGs-GO分析
enrich.go <- enrichGO(gene = temp$ENTREZID,  
                      OrgDb = org.Hs.eg.db, 
                      keyType = 'ENTREZID',  
                      ont = 'ALL',  
                      pAdjustMethod = 'fdr',  
                      pvalueCutoff = 0.05,  
                      qvalueCutoff = 0.2,  
                      readable = FALSE)
enrich.go1<-setReadable(enrich.go,OrgDb = "org.Hs.eg.db",keyType = "ENTREZID")
cnetp1 <- cnetplot(enrich.go1,  
                   layout="kk",
                   foldChange = geneList,
                   showCategory = 10,
                   colorEdge = T,
                   node_label = 'all',
                   color_category ='steelblue', 
                   cex.params = list(category_node = 1.5 
                                     , gene_node = 1
                                     , category_label = 1.5
                                     , gene_label = 1))
ggsave(cnetp1,filename ='RCD-DEGs-GO_cnetplot.pdf', width =20,height =16)


#RCD-DEGs-GSEA----------------------------------------------------------------
deg2<-deg1[order(deg1$logFC,decreasing = T),]
ids<-bitr(deg2$SYMBOL,"SYMBOL","ENTREZID","org.Hs.eg.db")
identical(deg2$SYMBOL,ids$SYMBOL) #[1] TRUE
geneList<-deg2$logFC #此处的必须是排序的
names(geneList) = ids$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 1,
                 pAdjustMethod = "BH"
)
ridgeplot(gsea_res,
          showCategory = 20,
          fill = "pvalue", #填充色 "pvalue", "p.adjust", "qvalue" 
          core_enrichment = TRUE,#是否只使用 core_enriched gene
          label_format = 30,
          orderBy = "NES",
          decreasing = FALSE
) +
  theme(axis.text.y = element_text(size=8))

####Part2 Characterization of hub RCD pathways and RRDs####
#CIBERSORT----------------------------------------------------------------
library(CIBERSORT)
sig_matrix <- system.file("extdata", "LM22.txt", package = "CIBERSORT")
rt=read.table("exp_id_trans.txt" , header=T, sep="\t", check.names=F)
rt=as.matrix(rt)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp),colnames(exp))data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
data=avereps(data)
data=data[rowMeans(data)>0,]
#CIBERSORT
results <- cibersort(sig_matrix = LM22,mixture_file = data,perm = 1000,QN = F)
write.table(results,"GSE28750_GSE95233_CIBERSORT.txt",sep = "\t",quote = F,col.names = NA)

results<-read.table("GSE28750_GSE95233_CIBERSORT.txt",header = T,sep = "\t")
rownames(results)<-results[,1]
results<-results[,-1]
k <- apply(results,2,function(x) {sum(x == 0) < nrow(rt)/2})
results1<- as.data.frame(results[,k])
group<-read.table("cli_merge.txt",header = T,sep = "\t")
rownames(group)<-group[,1]
group<-group[,-1]
identical(rownames(results1),rownames(group))
#[1] TRUE
data<-cbind(results1[,-(23:25)],group=group$Group)
library(reshape2)
data$Sample<-rownames(data)
data1<-melt(data,id.vars =c("Sample","group"))
colnames(data1)<-c("Sample","Group","Celltype","Composition")
mode(data1$Composition)
#[1] "numeric"

library(ggplot2)
library(ggpubr)
library(RColorBrewer)
theme <- theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1,size=16), 
        axis.text.y = element_text(size = 20), 
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 18), 
        axis.line = element_line(size = 1),
        plot.title = element_blank(),
        legend.text = element_text(size = 20),
        legend.key = element_rect(fill = 'transparent'), 
        legend.background = element_rect(fill = 'transparent'), 
        legend.position = "top",
        legend.title = element_blank())
p <- ggplot(data1,aes(x = Celltype,y = Composition,fill = factor(Group))) +
  geom_violin(position = position_dodge(width = 1), scale = 'width') +
  geom_boxplot(position = position_dodge(width = 1), outlier.shape=NA, 
               width = 0.25, alpha = 0.2, show.legend = FALSE) +
  #scale_fill_manual(values = alpha(brewer.pal(8,"Set1")[1:2],0.6)) +
  scale_fill_manual(values =  c("#377EB8","#E41A1C"))+
  scale_y_continuous(expand = c(0,0),limits = c(0,0.8), breaks = seq(0,0.8,0.1),
                     labels = seq(0,0.8,0.1)) +
  theme +
  labs(y = 'Immune Cell Relative Proportion',fill = NULL)

Data_summary <- as.data.frame(compare_means(Composition~Group, data1, method = "wilcox.test", 
                                            paired = FALSE,group.by = "Celltype"))    

stat.test <- Data_summary[,-c(2,9)]
stat.test$xmin=c(1:21)-0.2  
stat.test$xmax=c(1:21)+0.2 
stat.test$p.signif[stat.test$p.signif == "ns"] <- NA

p2 <- p + geom_signif(xmin = stat.test$xmin,xmax = stat.test$xmax,
                      annotations = stat.test$p.signif,margin_top = 0.00,
                      y_position = 0.72,size = 0.5,textsize = 7.5,
                      tip_length = 0)


#Con-Sepsis：RCD-GSVA----------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
rcd<-read.table("RCDgenes.txt",header = T,sep = "\t")
c1<-subset(rcd,rcd$Type=="Netotic cell death")
c2<-subset(rcd,rcd$Type=="Oxeiptosis")
c3<-subset(rcd,rcd$Type=="Alkaliptosis")
c4<-subset(rcd,rcd$Type=="Anoikis")
c5<-subset(rcd,rcd$Type=="Mitotic cell death")
c6<-subset(rcd,rcd$Type=="Cuproptosis")
c7<-subset(rcd,rcd$Type=="Disulfidptosis")
c8<-subset(rcd,rcd$Type=="Lysosomal")
c9<-subset(rcd,rcd$Type=="Entotic cell death")
c10<-subset(rcd,rcd$Type=="Parthanatos")
c11<-subset(rcd,rcd$Type=="Autosis")
c12<-subset(rcd,rcd$Type=="Necroptosis")
c13<-subset(rcd,rcd$Type=="Pyrotosis")
c14<-subset(rcd,rcd$Type=="Mpt")
c15<-subset(rcd,rcd$Type=="Autophagy")
c16<-subset(rcd,rcd$Type=="Ferroptosis")
c17<-subset(rcd,rcd$Type=="Immunogenic cell death")
c18<-subset(rcd,rcd$Type=="Apoptosis")
set1<-c1$Gene
set2<-c2$Gene
set3<-c3$Gene
set4<-c4$Gene
set5<-c5$Gene
set6<-c6$Gene
set7<-c7$Gene
set8<-c8$Gene
set9<-c9$Gene
set10<-c10$Gene
set11<-c11$Gene
set12<-c12$Gene
set13<-c13$Gene
set14<-c14$Gene
set15<-c15$Gene
set16<-c16$Gene
set17<-c17$Gene
set18<-c18$Gene

s2 <- list(
  Netotic_cell_death = set1,
  Oxeiptosis = set2,
  Alkaliptosis = set3,
  Anoikis = set4,
  Mitotic_cell_death = set5,
  Cuproptosis = set6,
  Disulfidptosis = set7,
  Lysosomal = set8,
  Entotic_cell_death = set9,
  Parthanatos = set10,
  Autosis = set11,
  Necroptosis = set12,
  Pyrotosis = set13,
  Mpt = set14,
  Autophagy = set15,
  Ferroptosis = set16,
  Immunogenic_cell_death = set17,
  Apoptosis = set18)

rt=read.table("exp_id_trans.txt" , header=T, sep="\t", check.names=F)
#GSVA
library(GSVA)
rt<-as.data.frame(rt)
rownames(rt)<-rt[,1]
rt<-rt[,-1]
rt<-data.matrix(rt)
gsvaP<- ssgseaParam(
  exprData = rt,
  geneSets = s2,
  assay = NA_character_,
  annotation = NA_character_,
  minSize = 1,
  maxSize = Inf,
  alpha = 0.25,
  normalize = TRUE)
gsva<-gsva(gsvaP)
gsva<-as.matrix(gsva)

save(gsva,file = "GSE28750_GSE95233_GSVAscores.Rdata")

group<-read.table("cli_merge.txt",header = T,sep = "\t")
identical(colnames(gsva),group$Id)
#[1] TRUE
gsva<-t(gsva)
data<-cbind(gsva,group[,c(2,3)])
colnames(data)[[19]]<-"Sample"
rt=melt(data,id.vars=c("Sample","Group"),variable.names="RCD_type",value_name="Score")
colnames(rt)[[3]]<-"RCD_type"
colnames(rt)[[4]]<-"Score"

library(ggpubr)
ggplot(rt, aes(x = RCD_type, y = Score))+ 
  labs(y="Scores", x =  NULL,title = "GSVA scores of RCD_types")+  
  geom_boxplot(aes(fill = Group), position = position_dodge(0.5), width = 0.5, outlier.alpha = 0)+ 
  scale_fill_manual(values = c("blue", "red")) +
  theme_bw() + 
  theme(plot.title = element_text(size = 20,color="black",hjust = 0.5), 
        axis.text.x = element_text(angle = 45, hjust = 1 ,size = 12),
        panel.grid = element_blank(),
        legend.position = "top",
        legend.text = element_text(size= 18),
        legend.title= element_text(size= 14)) + 
  stat_compare_means(aes(group =  Group),
                     label = "p.signif",
                     method = "wilcox.test",
                     hide.ns = T)

#Correlation (CIBERSORT results and GSVA scores)----------------------------------------------------------------------------------------------------
load("GSE28750_GSE95233_GSVAscores.Rdata")
results<-read.table("GSE28750_GSE95233_CIBERSORT.txt",header = T,sep = "\t",check.names = F)
rownames(results)<-results[,1]
results<-results[,-1]

group<-read.table("cli_merge.txt",header = T,sep = "\t")
sep<-subset(group,group$Group=="Sepsis")
same1<-intersect(rownames(gsva),sep$Id)
same2<-intersect(rownames(results),sep$Id)
identical(same1,same2)
#[1] TRUE
gsva1<-gsva[same1,]
results1<-results[same2,]

library(psych)
cor<-corr.test(gsva1,results1,method = "pearson",adjust = "fdr")
length(cor$r>0.4)
#[1] 450
length(cor$p.adj<0.05)
#[1] 450
r<-cor$r
p<-cor$p.adj

save(r,p,cor,file = "CIBERSORT_GSVA_cor.Rdata")

setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
load("CIBERSORT_GSVA_cor.Rdata")
write.table(p,file = "GSVA_CIBERSORT_P.txt",quote = F,sep = "\t",col.names = NA)
write.table(r,file = "GSVA_CIBERSORT_R.txt",quote = F,sep = "\t",col.names = NA)


#RCD-ROC ----------------------------------------------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
load("GSE28750_GSE95233_GSVAscores.Rdata")
group<-read.table("cli_merge.txt",header = T,sep = "\t")
group<-group[order(group$Group,decreasing = T),]
gsva<-gsva[group$X,]
identical(rownames(gsva),group$X) #[1] TRUE
data<-cbind(gsva,Survival=group$Group)
data<-data.frame(data)
roc1<-roc(data$Survival,as.numeric(data$Netotic_cell_death))
roc18<-roc(data$Survival,as.numeric(data$Apoptosis))
roc1$auc
roc18$auc
plot(smooth(roc1),col="red",legacy.axes=T)
plot(smooth(roc7),col="#808000",add=TRUE)
plot(smooth(roc14),col="#8A2BE2",add=TRUE)
plot(smooth(roc3),col="#7CFC00",add=TRUE)
plot(smooth(roc16),col="blue",add=TRUE)
plot(smooth(roc2),col="midnightblue",add=TRUE)
plot(smooth(roc10),col="pink",add=TRUE)
plot(smooth(roc8),col="#FF00FF",add=TRUE)
plot(smooth(roc11),col="#E9967A",add=TRUE)
plot(smooth(roc4),col="#6a5acd",add=TRUE)
plot(smooth(roc13),col="#228B22",add=TRUE)
legend("bottomright",legend=c("Netotic_CD-auc0.9824","Disulfidptosis-auc0.95",
                              "Mpt-auc0.9247","Alkaliptosis-auc0.9212",
                              "Ferroptosis-auc0.9212","Oxeiptosis-auc0.8989",
                              "Parthanatos-auc0.8888","Lysosomal-auc0.8646",
                              "Autosis-auc0.8458","Anoikis-auc0.8314",
                              "Pytoptosis-auc0.8177"),
       col=c("red","#808000","#8A2BE2","#7CFC00","blue","midnightblue","pink",
             "#ff00ff","#e9967a","#6a5acd","#228b22"),lty=1.5,cex =0.6)


#WGCNA-----------------------------------------------------------------------------------------------------------
library(WGCNA)
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
rt=read.table("exp_id_trans.txt" , header=T, sep="\t", check.names=F)
rownames(rt)<-rt[,1]
exp<-rt[,-1]
exp1 <- exp[order(apply(exp,1,mad), decreasing = T)[1:10000],]
exp2<-t(exp1)
gsg <- goodSamplesGenes(exp2,verbose = 3)
gsg$allOK  #[1] TRUE
sampleTree<-hclust(dist(exp2),method="average")
plot(sampleTree,main = "Sample Clustering to detect outliners",sub = "",xlab = "")
cutHeight=85
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 2, 
     cex.axis = 1.5, cex.main = 2) +
  abline(h = cutHeight, col = "red") 
clust = cutreeStatic(sampleTree, cutHeight = cutHeight, minSize = 10)
keepSamples = (clust==1)
exp2=exp2[keepSamples,]     
sampleTree_final<-hclust(dist(exp2),method="average")
plot(sampleTree_final,main = "Sample Clustering to detect outliners",sub = "",xlab = "")

trait<-read.table("GSE28750_GSE95233_RCD_GSVA.txt",header = T,sep = "\t")
rownames(trait)<-trait[,1]
trait<-trait[,-1]
sample<-rownames(exp2)
trait<-trait[sample,]
identical(rownames(exp2),rownames(trait))
#[1] TRUE
dataTraits1<-trait
traitColors<-numbers2colors(dataTraits1,signed=FALSE) 

plotDendroAndColors(sampleTree_final,traitColors,groupLabels=names(dataTraits1),main="Sample dendrogram and trait heatmap",
                    cex.colorLabels=1.0,cex.dendroLabels=1.0,cex.rowText=2) 
nGenes<-ncol(exp2)
nSamples<-nrow(exp2)
save(nGenes,nSamples,exp2,dataTraits1,file="WGCNA_MADfilter1W_input.Rdata")

load("WGCNA_MADfilter1W_input.Rdata")
powers <- c(c(1:10), seq(from = 12, to = 20, by = 2))
sft <- pickSoftThreshold(exp2, powerVector = powers, verbose = 5)
sft$powerEstimate    #[1] 9
sft$fitIndices 
cex1=0.9

win.graph(width=6, height=5,pointsize=9) 
plot(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, signed R^2", type = "n",
     main = paste("Scale independence"))+
  text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
       labels = powers, cex=cex1,col = "red")+
  abline(h=0.90,col="red") 

op <- par(mar = rep(0, 4))  
plot.new()
par(op)
plot(sft$fitIndices[, 1], sft$fitIndices[, 5],
     xlab = "Soft Threshold (power)", ylab = "Mean Connectivity", type = "n",
     main = paste("Mean connectivity"))+
  text(sft$fitIndices[, 1], sft$fitIndices[, 5], labels = powers, cex = cex1, col = "red")
save(sft,powers,file="WGCNA_MADfilter1W_power_value.Rdata")

load(file = "WGCNA_MADfilter1W_input.Rdata")
load(file = "WGCNA_MADfilter1W_power_value.Rdata")
cor<-WGCNA::cor
net <- blockwiseModules(exp2, power = 9, maxBlockSize = 10000,
                        TOMType ='unsigned', minModuleSize = 30,
                        reassignThreshold = 0, mergeCutHeight = 0.25,
                        numericLabels = TRUE, pamRespectsDendro = FALSE,
                        saveTOMs=TRUE, saveTOMFileBase = "drought",
                        verbose = 3)
table(net$colors)
#0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18 
#2829 1423 1185 1152  577  465  375  290  269  210  200  163  162  153  152  145  115   85   50 
cor<-stats::cor
moduleLabels = net$colors
moduleColors = labels2colors(moduleLabels)
pdf("R.WGCNA_genes-modules.pdf",width = 8,height = 6)
plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
dev.off() 

MEs0 = moduleEigengenes(exp2, moduleColors)$eigengenes

MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, dataTraits1 , use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
                   signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
pdf("WGCNA_Module-trait-relat.pdf",width = 14,height = 12)
par(mar = c(6, 8.5, 3, 3));
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = colnames(dataTraits1),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = blueWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.5,
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))
dev.off() 
save(moduleLabels,moduleColors,MEs,net,file = "WGCNA_MAD1W_genemodule.Rdata")

modNames <- substring(names(MEs), 3)  
modNames
#[1] "brown"        "red"          "black"        "midnightblue" "pink"         "salmon"       "turquoise"   
#[8] "lightcyan"    "greenyellow"  "magenta"      "yellow"       "green"        "lightgreen"   "grey60"      
#[15] "cyan"         "blue"         "purple"       "tan"          "grey"   

geneModuleMembership <- as.data.frame(cor(exp2, MEs, use = "p"))
MMPvalue <- as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples)) 

names(geneModuleMembership) <- paste("MM", modNames, sep = "") 
names(MMPvalue) <- paste("p.MM", modNames, sep = "");   
geneModuleMembership[1:5, 1:5]
#MMbrown       MMred    MMblack MMmidnightblue      MMpink
#MMP8   -0.06804004 -0.07577908 0.28464986     0.14127134  0.22442648
#OLFM4   0.01135215 -0.16140578 0.04256177    -0.14831889 -0.05018467
#EIF1AY  0.23214715 -0.19342077 0.04425104    -0.05117022 -0.23419671
#HP     -0.02482097  0.01244376 0.41086743     0.36843596  0.39651030
#CD177   0.27007056 -0.22455997 0.15191872     0.33417018  0.32598869

geneTraitSignificance <- as.data.frame(cor(exp2, dataTraits1, use = "p"))
GSPvalue <- as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples)) 
names(geneTraitSignificance) <- paste("GS.", colnames(dataTraits1), sep = "") 
names(GSPvalue) <- paste("p.GS.", colnames(dataTraits1), sep = "") 
head(geneTraitSignificance)
#GS.Netotic_cell_death GS.Oxeiptosis GS.Alkaliptosis   GS.Anoikis GS.Mitotic_cell_death GS.Cuproptosis
#MMP8              0.41259639   -0.04175572     0.075570773  0.337439441            0.47632051     0.01581552
#OLFM4             0.12514139   -0.26493062    -0.013302485  0.087060705            0.24994778    -0.10874859
#EIF1AY           -0.18288020   -0.22705509    -0.008992086 -0.049776462           -0.23656575    -0.27989992
#HP                0.18228337    0.05997374     0.186107781  0.346880874            0.44971106     0.03729882
#CD177             0.04734007    0.02589130     0.316662756 -0.003976755            0.04076045    -0.35843699
#LCN2              0.44105062   -0.14156008  
selectModule<-c("red","green","pink","midnightblue","black")
pdf("R.WGCNA_gene-Module-trait-significance.pdf",width=1.5*ncol(MEs), height=1.5*ncol(MEs))
par(mfrow=c(ceiling(length(selectModule)/2),2)) 
for(module in selectModule){
  column <- match(module,selectModule)  
  print(module)
  moduleGenes <- moduleColors==module  
  verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                     abs(geneTraitSignificance[moduleGenes, 1]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = "Gene significance for trait",
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)}
dev.off()

save(selectModule,moduleGenes,column,file = "WGCNA_MAD1W_MM_GS.Rdata")

setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
library(WGCNA)
load("WGCNA_MADfilter1W_input.Rdata")
load("WGCNA_MADfilter1W_power_value.Rdata")
load("WGCNA_MAD1W_genemodule.Rdata")
load("WGCNA_MAD1W_MM_GS.Rdata")

geneTree = net$dendrograms[[1]];  
dissTOM = 1-TOMsimilarityFromExpr(exp2, power = 9);   
plotTOM = dissTOM^7; 
diag(plotTOM) = NA;  
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
nSelect = 0.1*nGenes  
set.seed(123);  
select = sample(nGenes, size = nSelect);  
selectTOM = dissTOM[select, select];  
selectTree = hclust(as.dist(selectTOM), method = "average")  
selectColors = moduleColors[select];
sizeGrWindow(9,9)
plotDiss = selectTOM^7; 
diag(plotDiss) = NA; 
pdf("WGCNA_Network-heatmap.pdf",width = 12,height = 12)
TOMplot(plotDiss, selectTree, selectColors, 
        main = "Network heatmap plot, selected genes")
dev.off() 

SCM = as.data.frame(dataTraits1[,2]); 
names(SCM) = "SCM"
MET = orderMEs(cbind(MEs, SCM))
sizeGrWindow(5,7.5);
par(cex = 0.9)
pdf("WGCNA_MAD1W_Eigengene-dendrogram.pdf",width = 12,height = 8)
plotEigengeneNetworks(MET, "",  
                      marDendro =c(0,5,1,5),  
                      marHeatmap = c(5,6,1,2), cex.lab = 0.8, 
                      xLabelsAngle = 90)  
dev.off() 

setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
library(WGCNA)
load("WGCNA_MADfilter1W_input.Rdata")
load("WGCNA_MADfilter1W_power_value.Rdata")
load("WGCNA_MAD1W_genemodule.Rdata")
load("WGCNA_MAD1W_MM_GS.Rdata")
modNames <- substring(names(MEs), 3) 
geneTraitSignificance <- as.data.frame(cor(exp2, dataTraits1, use = "p"))
geneModuleMembership <- as.data.frame(cor(exp2, MEs, use = "p"))

#pink
module_pink<-"pink"
column_pink<-match(module_pink,modNames)
moduleGenes<-moduleColors==module_pink   
pink_moduleGenes<-names(net$colors)[which(moduleColors==module_pink)]
write.csv(pink_moduleGenes,"WGCNA_MAD1W_module_pink.csv")
#black
module_black<-"black"
column_black<-match(module_black,modNames)
moduleGenes<-moduleColors==module_black   
black_moduleGenes<-names(net$colors)[which(moduleColors==module_black)]
write.csv(black_moduleGenes,"WGCNA_MAD1W_module_black.csv")
#green
module_green<-"green"
column_green<-match(module_green,modNames)
moduleGenes<-moduleColors==module_green   
green_moduleGenes<-names(net$colors)[which(moduleColors==module_green)]
write.csv(green_moduleGenes,"WGCNA_MAD1W_module_green.csv")
#midnightblue
module_midnightblue<-"midnightblue"
column_midnightblue<-match(module_midnightblue,modNames)
moduleGenes<-moduleColors==module_midnightblue   
midnightblue_moduleGenes<-names(net$colors)[which(moduleColors==module_midnightblue)]
write.csv(midnightblue_moduleGenes,"WGCNA_MAD1W_module_midnight.csv")
#turquoise
module_turquoise<-"turquoise"
column_turquoise<-match(module_turquoise,modNames)
moduleGenes<-moduleColors==module_turquoise   
turquoise_moduleGenes<-names(net$colors)[which(moduleColors==module_turquoise)]
write.csv(turquoise_moduleGenes,"WGCNA_MAD1W_module_turquoise.csv")
#red
module_red<-"red"
column_red<-match(module_red,modNames)
moduleGenes<-moduleColors==module_red   
red_moduleGenes<-names(net$colors)[which(moduleColors==module_red)]
write.csv(red_moduleGenes,"WGCNA_MAD1W_module_red.csv")
#blue
module_blue<-"blue"
column_blue<-match(module_blue,modNames)
moduleGenes<-moduleColors==module_blue  
blue_moduleGenes<-names(net$colors)[which(moduleColors==module_blue)]
write.csv(blue_moduleGenes,"WGCNA_MAD1W_module_blue.csv")
#tan
module_tan<-"tan"
column_tan<-match(module_tan,modNames)
moduleGenes<-moduleColors==module_tan   
tan_moduleGenes<-names(net$colors)[which(moduleColors==module_tan)]
write.csv(tan_moduleGenes,"WGCNA_MAD1W_module_tan.csv")


#pink-Oxeiptosis(55),midnigthblue-Lysosomal(10), green-Ferroptosis(44)，Black-anoikis(9)，Red-pyroptosis(32)

setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
gene1<-read.csv("WGCNA_module_RCD_DEGs.csv",header = T,sep = ",")
gene<-c(gene1$RCD.DEGs.WGCNA_pink,gene1$RCD.DEGs.WGCNA_green,gene1$RCD.DEGs.WGCNA_red,
        gene1$RCD.DEGs.WGCNA_midnightblue,gene1$RCD.DEGs.WGCNA_black)

#WGCNA-RRDs-KEGG-----------------------------------------------------------------------------------------------------------
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(dplyr)
library(ggplot2)
ids<-bitr(gene,"SYMBOL","ENTREZID","org.Hs.eg.db")
enrich.kegg <- enrichKEGG(gene = ids$ENTREZID, organism = "hsa", keyType = 'kegg', 
                          pAdjustMethod = 'fdr', pvalueCutoff = 0.05, 
                          qvalueCutoff = 0.2) 
enrich.kegg1<-setReadable(enrich.kegg,OrgDb = "org.Hs.eg.db",keyType = "ENTREZID")
cnetp1 <- cnetplot(enrich.kegg1,layout="kk",foldChange = ids$ENTREZID,showCategory = 10,
                   colorEdge = T,node_label = 'all',color_category ='steelblue', 
                   cex.params = list(category_node = 1.5 , gene_node = 1,
                                     category_label = 1.5, gene_label = 1))
#WGCNA-RRDs-GO-----------------------------------------------------------------------------------------------------------
enrich.go <- enrichGO(gene = ids$ENTREZID,OrgDb = org.Hs.eg.db,keyType = 'ENTREZID',                       ont = 'ALL', pAdjustMethod = 'fdr', pvalueCutoff = 0.05,  
                      qvalueCutoff = 0.2,readable = FALSE)
enrich.go1<-setReadable(enrich.go,OrgDb = "org.Hs.eg.db",keyType = "ENTREZID")
cnetp1 <- cnetplot(enrich.go1, layout="kk",foldChange = ids$ENTREZID,showCategory = 10,
                   colorEdge = T,node_label = 'all',color_category ='steelblue', 
                   cex.params = list(category_node = 1.5, gene_node = 1,
                                     category_label = 1.5, gene_label = 1))

#WGCNA-RRDs-Variation Selection----------------------------------------------------------------------------------------------------
data<-read.table("exp_id_trans.txt",header = T,sep = "\t")
rownames(data)<-data[,1]
data<-data[gene,]
data<-na.omit(data)
data<-data[,-1]
data<-t(data)

group<-read.table("cli_merge.txt",header = T,sep = "\t")
rownames(group)<-group[,1]
group<-group[order(group$Group,decreasing = F),]

data<-data[rownames(group),]
identical(rownames(group),rownames(data)) #[1] TRUE
table(group$Group)
#Control  Sepsis 
#42      61 
type<-c(rep("Control",42),rep("Sepsis",61))
data<-data.frame(data)


#RF------------------------------------------------------------------------------------------------------------
library(randomForest)
rf<-randomForest(as.factor(type)~.,data=data, 
                 importance=TRUE,proximity=TRUE) 
print(rf)
rf.mds<-cmdscale(1-rf$proximity,eig = T)
plot(rf.mds$points,col=c("blue","red"),each=50)
varImpPlot(rf)
i<-data.frame(rf$importance)
save(rf,i,file = "WGCNA_RCDDEGs_RF.Rdata")

#LASSO-------------------------------------------------------------------------------------------------------------
library(glmnet)
x<-as.matrix(data)
y<-as.matrix(group$Group)
lasso_model <- glmnet(x, y, family = "binomial",alpha = 1) 
max(lasso_model$lambda)  #[1] 0.4551891
print(lasso_model) 

plot(lasso_model, xvar = "lambda")
cv_model <- cv.glmnet(x, y, family = "binomial",alpha = 1,nfolds = 10)
plot(cv_model)
lambda_min <- cv_model$lambda.min
lambda_min #[1] 0.004551891
lambda_1se <- cv_model$lambda.1se
lambda_1se 
best_model<-glmnet(x,y, family = "binomial",alpha = 1,lambda = lambda_min)
coef<-coef(best_model,s=lambda_min)
coef_idex<-which(coef!=0)
coef_active<-coef[coef_idex]
coef_name<-row.names(coef)[coef_idex]
LASSO<-cbind(Gene=coef_name,coef=coef_active)
save(best_model,LASSO,file = "WGCNA_RCDDEGs_LASSO.Rdata")

#SVM----------------------------------------------------------------------------------------------------------
r<-cbind(data,Group=group$Group)
library(mlbench)
library(caret)
set.seed(123)

control <- rfeControl(functions = caretFuncs, method = "cv", number = 5)  #cv 交叉验证次数5
results <- rfe(r[,1:150], as.numeric(as.factor(r[,151])), sizes = c(1:150), rfeControl = control,method = "svmRadial")

print(results)
#The top 5 variables (out of 19):
#  BLOC1S1, RAB32, CLIC1, CD59, GSTO1
predictors(results)
#[1] "BLOC1S1" "RAB32"   "CLIC1"   "CD59"    "GSTO1"   "TSPO"    "ZDHHC3" 
#[8] "CA4"     "CKAP4"   "IL1R2"   "QSOX1"   "DDAH2"   "MAP2K6"  "NLRC4"  
#[15] "RIT1"    "VNN1"    "GRB10"   "TLR5"    "ADAM9"   "ACER3"   "DYNLT1" 
#[22] "TCN1"    "SVIP"    "MAPK14"  "DACH1"  
plot(results, type=c("g", "o"))
save(results,file = "WGCNA_RCDDEGs_SVM.Rdata")

#RF,LASSO,SVM-Intersection--------------------------------------------------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
load("WGCNA_RCDDEGs_RF.Rdata")
load("WGCNA_RCDDEGs_LASSO.Rdata")
load("WGCNA_RCDDEGs_SVM.Rdata")
rf<-i[order(i$MeanDecreaseGini,decreasing = T),] #RF筛选的默认的前30个变量
lasso<-data.frame(LASSO[-1,])
library(caret)
svm<-predictors(results)
library(VennDiagram)
list<-list(RF=rownames(rf)[1:30],LASSO=lasso$Gene,SVM=svm)
venn.diagram(list, filename = 'WGCNA_RCDDEGs_RF_LASSO_SVM_venn.png', imagetype = 'png', scaled=F,
             fill = c('#E41aec', 'steelblue',"lightgreen"), alpha = 0.50, cat.col = rep('black', 3), 
             col = 'black', cex = 1.5, fontfamily = 'serif', 
             cat.cex = 1.5, cat.fontfamily = 'serif',resolution = 300)

inter <- get.venn.partitions(list)
interset<-inter$..values..
interset<-interset[[1]]
write.csv(interset,file = "WGCNA_RCDDEGs_RF_LASSO_SVM_venn.csv")
#"ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5"

####Part3 Immune localization analysis for hub RRDs at single-cell level####
#GSE167363-------------------------------------------------------------------------------------------------------------
library(Seurat) 
library(tidyverse)  
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna\\scRNA_GSE167363")
samples<-list.files("D:/AA研究/RCD_sepsis_bulk_scrna/scRNA_GSE167363/GSE167363")
seurat_list <- list()
for (sample in samples) {
  data.path <- paste0("D:/AA研究/RCD_sepsis_bulk_scrna/scRNA_GSE167363/GSE167363/", sample)
  seurat_data <- Read10X(data.dir = data.path)
  seurat_obj <- CreateSeuratObject(counts = seurat_data,
                                   project = sample,
                                   min.features = 200, 
                                   min.cells = 3)   
  seurat_list <- append(seurat_list, seurat_obj)
}
seurat_list
seurat_combined <- merge(seurat_list[[1]], 
                         y = seurat_list[-1],
                         add.cell.ids = samples)
print(seurat_combined)
#An object of class Seurat 
#20005 features across 35888 samples within 1 assay 
#Active assay: RNA (20005 features, 0 variable features)
#7 layers present: counts.GSM5102900_HC, counts.GSM5102901_HC, counts.GSM5102902_NS, counts.GSM5102904_S, counts.GSM5511351_NS, counts.GSM5511353_S, counts.GSM5511355_S

head((seurat_combined@meta.data),3)
#                                  orig.ident     nCount_RNA   nFeature_RNA
#GSM5102900_HC_AAACCCACACCAAAGG-1 GSM5102900_HC       4399         1119
#GSM5102900_HC_AAACCCAGTCTGATAC-1 GSM5102900_HC       5040         1417
#GSM5102900_HC_AAACCCAGTGGTCAAG-1 GSM5102900_HC      13493         1871

seurat_combined$log10GenesPerUMI <- log10(seurat_combined$nFeature_RNA) / log10(seurat_combined$nCount_RNA)
seurat_combined$mitoRatio <- PercentageFeatureSet(object = seurat_combined, pattern = "^MT-")seurat_combined$mitoRatio <- seurat_combined@meta.data$mitoRatio / 100

metadata<-seurat_combined@meta.data
metadata$cells<-rownames(metadata)
metadata$sample<-NA
metadata$sample[which(str_detect(metadata$orig.ident, "^GSM5102900_HC"))] <- "control"
metadata$sample[which(str_detect(metadata$orig.ident, "^GSM5102901_HC"))] <- "control"
metadata$sample[which(str_detect(metadata$orig.ident, "^GSM5102902_NS"))] <- "Non Survivor"
metadata$sample[which(str_detect(metadata$orig.ident, "^GSM5511351_NS"))] <- "Non Survivor"
metadata$sample[which(str_detect(metadata$orig.ident, "^GSM55102904_S"))] <- "Survivor"
metadata$sample[which(str_detect(metadata$orig.ident, "^GSM5511353_S"))] <- "Survivor"
metadata$sample[which(str_detect(metadata$orig.ident, "^GSM5511355_S"))] <- "Survivor"

metadata <- metadata %>%
  dplyr::rename(seq_folder = orig.ident,
                nUMI = nCount_RNA,
                nGene = nFeature_RNA)
seurat_combined@meta.data<-metadata

VlnPlot(seurat_combined, features = c("nGene", "nUMI", "mitoRatio"), ncol = 3,group.by = "sample")
save(seurat_combined,file="seura_combined.Rdata") 

p1 <- FeatureScatter(seurat_combined, feature1 = "nUMI",feature2 = "nGene")
p2 <- FeatureScatter(seurat_combined, feature1 = "nUMI", feature2 = "mitoRatio")
p1 + p2
#0.89 -0.21
seurat_combined1<- subset(seurat_combined, 
                          subset = nUMI > 500 & nUMI < 25000 & 
                            nGene > 200 & nGene< 4000 & 
                            mitoRatio < 0.10)  
seurat_combined1
#An object of class Seurat 
#20005 features across 30192 samples within 1 assay 
#Active assay: RNA (20005 features, 0 variable features)
#7 layers present: counts.GSM5102900_HC, counts.GSM5102901_HC, counts.GSM5102902_NS, counts.GSM5102904_S, counts.GSM5511351_NS, counts.GSM5511353_S, counts.GSM5511355_S
VlnPlot(seurat_combined1, features = c("nGene", "nUMI", "mitoRatio"), ncol = 3,group.by = "sample")

seurat_combined1 <- NormalizeData(seurat_combined1, 
                                  normalization.method = 'LogNormalize',  
                                  scale.factor = 10000)
seurat_combined1 <- FindVariableFeatures(seurat_combined1,
                                         selection.method = "vst",
                                         nfeatures = 2000)
top10.hgvs <- head(VariableFeatures(seurat_combined1), 10)
# [1] "PPBP"   "PF4"    "NRGN"   "GNG11"  "GNLY"   "IGLC2"  "RGS18"  "CAVIN2" "CCL2"   "PTGDS" 
p3 <- VariableFeaturePlot(seurat_combined1)
p4 <- LabelPoints(plot = p3, points = top10.hgvs, repel = TRUE)
p5<-p3 + p4
ggsave("top10hgvs.pdf",plot = p5,width = 40,height = 40) 

all.genes <- rownames(seurat_combined1)
seurat_combined2 <- ScaleData(seurat_combined1, features = all.genes)

#PCA-----------------------------------------------------------------------------------------
seurat_combined2 <- RunPCA(seurat_combined2, features = VariableFeatures(object = seurat_combined2))#根据高变基因进行主成分分析
print(seurat_combined2[["pca"]], dims = 1:5, nfeatures = 5)
seurat_combined2 <- FindNeighbors(seurat_combined2, dims = 1:10)  

library(clustree)
res.used <- seq(0.1,1.0,by=0.1) 
res.used
for(i in res.used){
  seurat_combined2 <- FindClusters(object = seurat_combined2, verbose = TRUE,resolution = i) #
}
clus.tree.out <- clustree(seurat_combined2@meta.data, prefix="RNA_snn_res.") +
  theme(legend.position = "bottom") + 
  scale_color_brewer(palette = "Set1") +
  scale_edge_color_continuous(low = "grey80", high = "red")

clus.tree.out
ggsave("clustree_A.png", plot = clus.tree.out, width = 15, height = 7.5, dpi=300)

#resolution=0.5
seurat_combined2 <- FindClusters(seurat_combined2, resolution = 0.5)

head(Idents(seurat_combined2), 5)
table(seurat_combined2$seurat_clusters)
#   0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17 
#4676 4068 3581 3120 2690 2649 1995 1857 1522  967  718  492  432  310  297  278  251  150 
#18 
#139 

#UMAP/TSNE--------------------------------------------------------------------
clusterCols <- c("#DC143C","#0000FF","#20B2AA","#FFA500","#9370DB","#98FB98","#F08080","#1E90FF","#7CFC00","#FFFF00",  
                 "#808000","#FF00FF","#FA8072","#7B68EE","#9400D3","#800080","#A0522D","#D2B48C","#D2691E","#87CEEB","#40E0D0","#5F9EA0",
                 "#FF1493","#0000CD","#008B8B","#FFE4B5","#8A2BE2","#228B22","#E9967A","#4682B4","#32CD32","#F0E68C","#FFFFE0","#EE82EE",
                 "#FF6347","#6A5ACD","#9932CC","#8B008B","#8B4513","#DEB887")
seurat_combined2 <- RunUMAP(seurat_combined2, dims = 1:10)
DimPlot(seurat_combined2, reduction = "umap",label = T,
        repel = T,cols = clusterCols,pt.size = 0.5,
        label.size = 5,label.box = T)
save(seurat_combined2,file = "seurat_combined2.Rdata")

seurat_combined2.markers <- FindAllMarkers(seurat_combined2, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)

top10markers<-seurat_combined2.markers%>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)

cluster1.markers <- FindMarkers(seurat_combined2, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE)
save(seurat_combined2.markers,top10markers,cluster1.markers,file = "findallmarkers.Rdata")

p<-DoHeatmap(seurat_combined2,features = top10markers$gene,size = 5,group.colors = clusterCols)

write.csv(top10markers,file = "top10markers.csv")
write.csv(seurat_combined2.markers,file = "seurat_combined2.markers.csv")
write.csv(cluster1.markers,file = "cluster1.markers_logFC0.25.csv")

library(SingleR)
library(celldex)
library(Seurat)

hpca.se <- HumanPrimaryCellAtlasData()
bpe.se <- BlueprintEncodeData()

save(hpca.se,file = "HumanPrimaryCellAtlas.Rdata")
save(bpe.se,file = "BlueprintEncodeData.Rdata")

sce_for_singler<-GetAssayData(seurat_combined2,slot ="data" )  
clusters<-seurat_combined2@meta.data$seurat_clusters         
pred.HCPA_BPE<-SingleR(test = sce_for_singler,
                       ref = list(hpca.se,bpe.se),              
                       labels = list(hpca.se$label.main,bpe.se$label.main),
                       method = "cluster", 
                       clusters = clusters,  
                       assay.type.test = "logcounts",
                       assay.type.ref = "logcounts")

as.matrix(table(pred.HCPA_BPE$labels))
#B-cells         2
#CD4+ T-cells    2
#CD8+ T-cells    2
#Erythrocytes    1
#Monocytes       6
#Neutrophils     1
#NK_cell         3
#Platelets       2

cellType<-data.frame(clusterID=levels(seurat_combined2@meta.data$seurat_clusters),celltype=pred.HCPA_BPE$labels)

seurat_combined2@meta.data$singleR<-cellType[match(clusters,cellType$clusterID),"celltype"]

P1<- DimPlot(seurat_combined2, reduction = "umap", group.by = "seurat_clusters",label = T,
             repel = T,cols = clusterCols,pt.size = 0.5,
             label.size = 5,label.box = T)
P2<- DimPlot(seurat_combined2, reduction = "umap", group.by = "singleR",label = T,
             repel = T,cols = clusterCols,pt.size = 0.5,
             label.size = 5,label.box = T)
P1+P2

save(seurat_combined2,file = "seurat_combined2.Rdata")
save(sce_for_singler,clusters,pred.HCPA_BPE,cellType,file = "singleR_HPCA_BPE.Rdata")

gene<-read.csv("D://AA研究//RCD_sepsis_bulk_scrna//WGCNA_RCDDEGs_RF_LASSO_SVM_venn.csv",header = T)
gene<-gene$x
#1.
DotPlot(seurat_combined2,group.by = "singleR",
        features = c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5"))

#2.
library(Nebulosa)
P1<-plot_density(seurat_combined2,"ZDHHC3")
P2<-plot_density(seurat_combined2,"CLIC1")
P3<-plot_density(seurat_combined2,"GSTO1")
P4<-plot_density(seurat_combined2,"BLOC1S1")
P5<-plot_density(seurat_combined2,"TLR5")
P6<- P1+P2+P4+P3+P5
ggsave("Fivegene密度图.pdf",plot = P6,width =12 ,height = 8)

#3.
library(scCustomize)
p5<-FeaturePlot_scCustom(seurat_object = seurat_combined2, features = "TLR5", split.by = "sample",
                         num_columns = 3)
p6<-p1/p2/p3/p4/p5
ggsave("Fivegene气泡图.pdf",plot = p6,width =12 ,height = 16)

#4.堆叠小提琴图
colors <- c("dodgerblue", "forestgreen", "darkorange2", "darkorchid3","orange","#E64B35B2", "#4DBBD5B2" ,"#00A087B2", "#3C5488B2", "#F39B7FB2",
            "#8491B4B2", "#91D1C2B2", "#DC0000B2", "#7E6148B2", "#FF6347","#6A5ACD","#9932CC","#8B008B","#8B4513","#DEB887")
Stacked_VlnPlot(seurat_object = seurat_combined2, features = gene, x_lab_rotate = TRUE,
                colors_use = colors,group.by = "singleR")
#Five genes 共表达免疫细胞共定位分析------------------------------------------------------------------
#GSE167363
setwd("D:/AA研究/2.RCD_sepsis_bulk_scrna/part10.scRNA_GSE167363")
library(Seurat)
library(dplyr)
#读取seurat对象
load("seurat_combined2.Rdata")
table(seurat_combined2$singleR)
#Control-----------------------------------------------------------------------------------------------------
Idents(seurat_combined2)<-"sample"
Immunecell<-subset(seurat_combined2,idents = c("control"))
Idents(Immunecell)<-"singleR"
Immunecell<-subset(Immunecell,idents = c("B-cells","CD4+ T-cells","CD8+ T-cells","Monocytes","Neutrophils","NK_cell"))
Hubgene<-GetAssayData(Immunecell,assay = "RNA",slot = "data")[c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5"),]
Hubgene1<-as.data.frame(t(Hubgene))
Median_thresholds <- apply(Hubgene1, 2, median) Hubgene1$Celltype<-Idents(Immunecell)
Average_expression <- Hubgene1 %>%
  group_by(Celltype) %>%
  summarise(across(all_of(c("ZDHHC3","CLIC1","GSTO1","BLOC1S1")), mean))
Median_thresholds
Hubgene1$CoExpressed <- apply(Hubgene1[, c("ZDHHC3", "CLIC1", "GSTO1", "BLOC1S1")], 1, function(x) {
  all(sapply(seq_along(x), function(i) x[i] > Median_thresholds[names(x)[i]]))})
Coexpression_ratio <- Hubgene1 %>%
  group_by(Celltype) %>%
  summarise(CoexpressionRatio = mean(CoExpressed)) 
Coexpression_ratio$Celltype<-factor(Coexpression_ratio$Celltype,levels = c("Monocytes","Neutrophils","NK_cell","B-cells","CD4+ T-cells","CD8+ T-cells"))
library(ggplot2)
ggplot(Coexpression_ratio, aes(x = Celltype, y = CoexpressionRatio)) +
  geom_col() +
  labs(x = "Cell Type", y = "Co-expression Ratio", title = "Control: Hub Gene Co-expression in Immune Cells") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Survivor-----------------------------------------------------------------------------------------------------------
Idents(seurat_combined2)<-"sample"
IMmunecell<-subset(seurat_combined2,idents = c("Survivor"))
Idents(IMmunecell)<-"singleR"
IMmunecell<-subset(IMmunecell,idents = c("B-cells","CD4+ T-cells","CD8+ T-cells","Monocytes","Neutrophils","NK_cell"))
HUbgene<-GetAssayData(IMmunecell,assay = "RNA",slot = "data")[c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5"),]
HUbgene1<-as.data.frame(t(HUbgene))
HUbgene1$Celltype<-Idents(IMmunecell)
AVerage_expression <- HUbgene1 %>%
  group_by(Celltype) %>%
  summarise(across(all_of(c("ZDHHC3","CLIC1","GSTO1","BLOC1S1")), mean))
HUbgene1$CoExpressed <- apply(HUbgene1[, c("ZDHHC3", "CLIC1", "GSTO1", "BLOC1S1")], 1, function(x) {
  all(sapply(seq_along(x), function(i) x[i] > median_thresholds[names(x)[i]]))})
COexpression_ratio <- HUbgene1 %>%
  group_by(Celltype) %>%
  summarise(COexpressionRatio = mean(CoExpressed)) 
COexpression_ratio$Celltype<-factor(COexpression_ratio$Celltype,levels = c("Monocytes","Neutrophils","NK_cell","B-cells","CD4+ T-cells","CD8+ T-cells"))
library(ggplot2)
ggplot(COexpression_ratio, aes(x = Celltype, y = COexpressionRatio)) +
  geom_col() +
  labs(x = "Cell Type", y = "Co-expression Ratio", title = "Survivor: Hub Gene Co-expression in Immune Cells") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Non Survivor---------------------------------------------------------------------------------------------------
Idents(seurat_combined2)<-"sample"
IMMunecell<-subset(seurat_combined2,idents = c("Non Survivor"))
Idents(IMMunecell)<-"singleR"
IMMunecell<-subset(IMMunecell,idents = c("B-cells","CD4+ T-cells","CD8+ T-cells","Monocytes","Neutrophils","NK_cell"))
HUBgene<-GetAssayData(IMMunecell,assay = "RNA",slot = "data")[c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5"),]
HUBgene1<-as.data.frame(t(HUBgene))
HUBgene1$Celltype<-Idents(IMMunecell)
AVErage_expression <- HUBgene1 %>%
  group_by(Celltype) %>%
  summarise(across(all_of(c("ZDHHC3","CLIC1","GSTO1","BLOC1S1")), mean))
HUBgene1$CoExpressed <- apply(HUBgene1[, c("ZDHHC3", "CLIC1", "GSTO1", "BLOC1S1")], 1, function(x) {
  all(sapply(seq_along(x), function(i) x[i] > median_thresholds[names(x)[i]]))})
COExpression_ratio <- HUBgene1 %>%
  group_by(Celltype) %>%
  summarise(COExpressionRatio = mean(CoExpressed)) 
COExpression_ratio$Celltype<-factor(COExpression_ratio$Celltype,levels = c("Monocytes","Neutrophils","NK_cell","B-cells","CD4+ T-cells","CD8+ T-cells"))
library(ggplot2)
ggplot(COExpression_ratio, aes(x = Celltype, y = COExpressionRatio)) +
  geom_col() +
  labs(x = "Cell Type", y = "Co-expression Ratio", title = "Non Survivor: Hub Gene Co-expression in Immune Cells") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
#合并数据可视化(共表达hub基因细胞比率)--------------------------------------------------------
Coexpression_ratio$Group<-"Control"
COexpression_ratio$Group<-"Survivor"
colnames(COexpression_ratio)[2]<-"CoexpressionRatio"
COExpression_ratio$Group<-"Non Survivor"
colnames(COExpression_ratio)[2]<-"CoexpressionRatio"
data<-rbind(Coexpression_ratio,COexpression_ratio,COExpression_ratio)
data$Group<-factor(data$Group,levels = c("Control","Survivor","Non Survivor"))
ggplot(data,aes(Group,CoexpressionRatio,color=Group,fill=Group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+ 
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+
  labs(x="Groups",y="Co-expression Ratio",title = "Hub Gene Co-expression in Immune Cells")+
  theme_prism(palette = "candy_bright",
              base_fontface = "plain",
              base_family = "serif", 
              base_size = 16, 
              base_line_size = 0.8, 
              axis_text_angle = 45)+ 
  scale_fill_prism(palette = "candy_bright")+
  facet_grid(~Celltype,scales = 'free')

#每个hub基因在每种免疫细胞中的表达量比较--------------------------------------------------------------
Hubgene1$Group<-"Control"
HUBgene1$Group<-"Non Survivor"
HUbgene1$Group<-"Survivor"
all<-rbind(Hubgene1,HUBgene1,HUbgene1)
all$group<-ifelse(all$Group=="Control","Control","Sepsis")
table(all$Group)
#Control    Survivior       Non Survivor 
#13428         10611          4043 
#ZDHHC3-----------------------------------------------------------------------------------------------
zdhhc3<-all[,c(1,6,7,8)]
zdhhc3$Celltype<-factor(zdhhc3$Celltype,levels = c("Monocytes","Neutrophils","NK_cell","B-cells","CD4+ T-cells","CD8+ T-cells"))
#sepsis~control
ggplot(zdhhc3,aes(group,ZDHHC3,color=group,fill=group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+ 
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+
  labs(title = "",
       x = "Group",
       y = "ZDHHC3 Expression")+
  facet_grid(~Celltype,scales = 'free')+
  scale_color_manual(values=c("#A5D1B0","#F7C9CF"))+
  scale_fill_manual(values=c("#A5D1B0","#F7C9CF"))+
  theme_prism(
    #palette = "candy_bright",
    base_fontface = "plain", 
    base_family = "serif", 
    base_size = 16,  
    base_line_size = 0.8, 
    axis_text_angle = 45)+#+ # 可选值有 0，45，90，270
  geom_signif(comparisons = list(c("Control","Sepsis" )),
              map_signif_level = TRUE,
              test = "t.test",
              y_position = 0.4,
              tip_length = 0.01,
              size = 0.8,
              color = "black")#+ 
zdhhc3_2<-subset(zdhhc3,zdhhc3$group=="Sepsis")
zdhhc3_2$Group<-factor(zdhhc3_2$Group,levels = c("Survivor","Non Survivor"))
ggplot(zdhhc3_2,aes(Group,ZDHHC3,color=Group,fill=Group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+ 
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+ 
  labs(title = "",
       x = "Group",
       y = "ZDHHC3 Expression")+ 
  facet_grid(~Celltype,scales = 'free')+
  scale_color_manual(values=c("#7CA3B8","#CE8A8D"))+
  scale_fill_manual(values=c("#7CA3B8","#CE8A8D"))+
  theme_prism(
    base_fontface = "plain", 
    base_family = "serif", 
    base_size = 16, 
    base_line_size = 0.8,
    axis_text_angle = 45)+
  #scale_fill_prism(palette = "candy_bright")+
  geom_signif(comparisons = list(c("Non Survivor", "Survivor")),
              map_signif_level = TRUE,
              test = "t.test",
              y_position = 0.4,
              tip_length = 0.01,
              size = 0.8,
              color = "black")

#CLIC1------------------------------------------------------------------------------------------------
clic1<-all[,c(2,6,7,8)]
clic1$Celltype<-factor(clic1$Celltype,levels = c("Monocytes","Neutrophils","NK_cell","B-cells","CD4+ T-cells","CD8+ T-cells"))
#sepsis~control
ggplot(clic1,aes(group,CLIC1,color=group,fill=group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+ 
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+
  labs(title = "",
       x = "Group",
       y = "CLIC1 Expression")+
  facet_grid(~Celltype,scales = 'free')+
  scale_color_manual(values=c("#A5D1B0","#F7C9CF"))+
  scale_fill_manual(values=c("#A5D1B0","#F7C9CF"))+
  theme_prism(
    base_fontface = "plain", 
    base_family = "serif", 
    base_size = 16,  
    base_line_size = 0.8, 
    axis_text_angle = 45)+
  geom_signif(comparisons = list(c("Control","Sepsis" )),
              map_signif_level = TRUE,
              test = "t.test",
              y_position = 2,
              tip_length = 0.01,
              size = 0.8,
              color = "black")

clic1_2<-subset(clic1,clic1$group=="Sepsis")
clic1_2$Group<-factor(clic1_2$Group,levels = c("Survivor","Non Survivor"))
ggplot(clic1_2,aes(Group,CLIC1,color=Group,fill=Group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+
  labs(title = "",
       x = "Group",
       y = "CLIC1 Expression")+
  facet_grid(~Celltype,scales = 'free')+
  scale_color_manual(values=c("#7CA3B8","#CE8A8D"))+
  scale_fill_manual(values=c("#7CA3B8","#CE8A8D"))+
  theme_prism(
    base_fontface = "plain", 
    base_family = "serif", 
    base_size = 16,  
    base_line_size = 0.8, 
    axis_text_angle = 45)+
  geom_signif(comparisons = list(c("Non Survivor", "Survivor")),
              map_signif_level = TRUE,
              test = "t.test",
              y_position = 2,
              tip_length = 0.01,
              size = 0.8,
              color = "black")

#GSTO1------------------------------------------------------------------------------------------------------------
gsto1<-all[,c(3,6,7,8)]
gsto1$Celltype<-factor(gsto1$Celltype,levels = c("Monocytes","Neutrophils","NK_cell","B-cells","CD4+ T-cells","CD8+ T-cells"))
#sepsis~control
ggplot(gsto1,aes(group,GSTO1,color=group,fill=group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+ 
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+
  labs(title = "",
       x = "Group",
       y = "GSTO1 Expression")+
  facet_grid(~Celltype,scales = 'free')+
  scale_color_manual(values=c("#A5D1B0","#F7C9CF"))+
  scale_fill_manual(values=c("#A5D1B0","#F7C9CF"))+
  theme_prism(
    base_fontface = "plain", 
    base_family = "serif", 
    base_size = 16,  
    base_line_size = 0.8, 
    axis_text_angle = 45)+
  #scale_fill_prism(palette = "candy_bright")+
  geom_signif(comparisons = list(c("Control","Sepsis" )),
              map_signif_level = TRUE,
              test = "t.test",
              y_position = 2,
              tip_length = 0.01,
              size = 0.8,
              color = "black")

#Survivor~Non Survivor
gsto1_2<-subset(gsto1,gsto1$group=="Sepsis")
gsto1_2$Group<-factor(gsto1_2$Group,levels = c("Survivor","Non Survivor"))
ggplot(gsto1_2,aes(Group,GSTO1,color=Group,fill=Group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+ 
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+
  labs(title = "",
       x = "Group",
       y = "GSTO1 Expression")+
  facet_grid(~Celltype,scales = 'free')+
  scale_color_manual(values=c("#7CA3B8","#CE8A8D"))+
  scale_fill_manual(values=c("#7CA3B8","#CE8A8D"))+
  theme_prism(
    base_fontface = "plain",
    base_family = "serif", 
    base_size = 16,  
    base_line_size = 0.8, 
    axis_text_angle = 45)+
  #scale_fill_prism(palette = "candy_bright")+
  geom_signif(comparisons = list(c("Non Survivor", "Survivor")),
              map_signif_level = TRUE,
              test = "t.test",
              y_position = 2,
              tip_length = 0.01,
              size = 0.8,
              color = "black")
#BLOC1S1--------------------------------------------------------------------------------------------------------
bloc1s1<-all[,c(4,6,7,8)]
bloc1s1$Celltype<-factor(bloc1s1$Celltype,levels = c("Monocytes","Neutrophils","NK_cell","B-cells","CD4+ T-cells","CD8+ T-cells"))
#sepsis~control
ggplot(bloc1s1,aes(group,BLOC1S1,color=group,fill=group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+ 
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+
  labs(title = "",
       x = "Group",
       y = "BLOC1S1 Expression")+
  facet_grid(~Celltype,scales = 'free')+
  scale_color_manual(values=c("#A5D1B0","#F7C9CF"))+
  scale_fill_manual(values=c("#A5D1B0","#F7C9CF"))+
  theme_prism(
    base_fontface = "plain", 
    base_family = "serif", 
    base_size = 16,  
    base_line_size = 0.8, 
    axis_text_angle = 45)+
  #scale_fill_prism(palette = "candy_bright")+
  geom_signif(comparisons = list(c("Control","Sepsis" )),
              map_signif_level = TRUE,
              test = "t.test",
              y_position = 2,
              tip_length = 0.01,
              size = 0.8,
              color = "black")

#Survivor~Non Survivor
bloc1s1_2<-subset(bloc1s1,bloc1s1$group=="Sepsis")
bloc1s1_2$Group<-factor(bloc1s1_2$Group,levels = c("Survivor","Non Survivor"))
ggplot(bloc1s1_2,aes(Group,BLOC1S1,color=Group,fill=Group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+
  labs(title = "",
       x = "Group",
       y = "BLOC1S1 Expression")+
  facet_grid(~Celltype,scales = 'free')+
  scale_color_manual(values=c("#7CA3B8","#CE8A8D"))+
  scale_fill_manual(values=c("#7CA3B8","#CE8A8D"))+
  theme_prism(
    base_fontface = "plain",
    base_family = "serif",
    base_size = 16,  
    base_line_size = 0.8, 
    axis_text_angle = 45)+
  #scale_fill_prism(palette = "candy_bright")+
  geom_signif(comparisons = list(c("Non Survivor", "Survivor")),
              map_signif_level = TRUE,
              test = "t.test",
              y_position = 2,
              tip_length = 0.01,
              size = 0.8,
              color = "black")
#TLR5--------------------------------------------------------------------------------------------------------------
tlr5<-all[,c(5,6,7,8)]
tlr5$Celltype<-factor(tlr5$Celltype,levels = c("Monocytes","Neutrophils","NK_cell","B-cells","CD4+ T-cells","CD8+ T-cells"))
#sepsis~control
ggplot(tlr5,aes(group,TLR5,color=group,fill=group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+
  labs(title = "",
       x = "Group",
       y = "TLR5 Expression")+
  facet_grid(~Celltype,scales = 'free')+
  scale_color_manual(values=c("#A5D1B0","#F7C9CF"))+
  scale_fill_manual(values=c("#A5D1B0","#F7C9CF"))+
  theme_prism(
    base_fontface = "plain", 
    base_family = "serif",
    base_size = 16, 
    base_line_size = 0.8, 
    axis_text_angle = 45)+
  #scale_fill_prism(palette = "candy_bright")+
  geom_signif(comparisons = list(c("Control","Sepsis" )),
              map_signif_level = TRUE,
              test = "t.test",
              y_position = 0.05,
              tip_length = 0.001,
              size = 0.8,
              color = "black")

#Survivor~Non Survivor
tlr5_2<-subset(tlr5,tlr5$group=="Sepsis")
tlr5_2$Group<-factor(tlr5_2$Group,levels = c("Survivor","Non Survivor"))
ggplot(tlr5_2,aes(Group,TLR5,color=Group,fill=Group))+
  geom_bar(stat="summary",fun=mean,position="dodge")+ 
  #stat_summary(geom = "errorbar",fun.data = 'mean_sd', width = 0.3)+
  labs(title = "",
       x = "Group",
       y = "TLR5 Expression")+
  facet_grid(~Celltype,scales = 'free')+
  scale_color_manual(values=c("#7CA3B8","#CE8A8D"))+
  scale_fill_manual(values=c("#7CA3B8","#CE8A8D"))+
  theme_prism(
    base_fontface = "plain", 
    base_family = "serif", 
    base_size = 16,  
    base_line_size = 0.8, 
    axis_text_angle = 45)+
  #scale_fill_prism(palette = "candy_bright")+
  geom_signif(comparisons = list(c("Non Survivor", "Survivor")),
              map_signif_level = TRUE,
              test = "t.test",
              y_position = 0.05,
              tip_length = 0.001,
              size = 0.8,
              color = "black")

####Part4 Validation of hub RRDs and their GSEA, PPI, immune correlation analysis####
#GSE65682--------------------------------------------------------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
test_Data<-read.table("exp_idchange_GSE65682.txt",header = T,sep = "\t")
rownames(test_Data)<-test_Data[,1]
test_Data<-test_Data[,-1]
test_Data<-t(test_Data)
test_Group<-read.table("cli_GSE65682.txt",header = T,sep = "\t")
test_Group<-test_Group[,c(1,8,9,12)]
rownames(test_Group)<-test_Group[,1]

colnames(test_Group)[2]<-"fustat"
colnames(test_Group)[3]<-"futime"
test_Group<-test_Group[order(test_Group$Group,decreasing = F),]
test_Data1<-test_Data[rownames(test_Group),]
identical(rownames(test_Data1),rownames(test_Group))

table(test_Group$Group)
#Control  Sepsis 
#42        479
rt1<-cbind(test_Group,test_Data1)
gene<-read.csv("WGCNA_RCDDEGs_RF_LASSO_SVM_venn.csv",header = T)

five<-rt1[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
five$Group<-as.factor(five$Group)
library(reshape2)
five$sample<-rownames(five)
meltfive<-melt(five,id.vars = c("sample","Group"),variable.name = "gene",value.name = "expression")
compare_means(expression~Group,data = meltfive,group.by = "gene")
ggboxplot(meltfive, x = "Group", y = "expression",ncol=5,
          color = "Group",add = "jitter", palette = "lancet",
          facet.by = "gene", short.panel.labs = F)+
  stat_compare_means(label = "p.signif",label.y = 11)

#ROC---------------------------------------------------------------------------------------------------------
ZDHHC3<-roc(five$Group,five$ZDHHC3)
CLIC1<-roc(five$Group,five$CLIC1)
GSTO1<-roc(five$Group,five$GSTO1)
BLOC1S1<-roc(five$Group,five$BLOC1S1)
TLR5<-roc(five$Group,five$TLR5)
#各个roc对象的AUC和下面单独分析是一样的
plot(smooth(ZDHHC3),col="red",legacy.axes=T)
plot(smooth(CLIC1),col="#808000",add=TRUE)
plot(smooth(GSTO1),col="#8A2BE2",add=TRUE)
plot(smooth(BLOC1S1),col="#7CFC00",add=TRUE)
plot(smooth(TLR5),col="blue",add=TRUE)
legend("bottomright",legend=c("ZDHHC3-auc0.9773","CLIC1-auc0.9508",
                              "GSTO1-auc0.9425","BLOC1S1-auc0.986",
                              "TLR5-auc0.984"),
       col=c("red","#808000","#8A2BE2","#7CFC00","blue"),lty=1.5,cex =0.8)

#GSE69528-----------------------------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
rt4<-read.table("RCD_sepsis_GEOorignaldata/GSE69528/GSE69528_matrix.txt",header = T,sep = "\t")
gpl<-read.table("RCD_sepsis_GEOorignaldata/GSE69528/GPL10558.txt",header=T,sep="\t")
library(dplyr)
rt4<-merge(rt4,gpl,by.x="ID_REF",by.y="ID")
rt4<-rt4[!duplicated(rt4$Symbol),]
rownames(rt4)<-rt4$Symbol
rt4<-rt4[,-c(1,140)]
rt4<-t(rt4)
write.table(rt4,file = "GSE69528_matrix.txt",quote = F,sep = "\t",col.names = NA)

test_Group4<-read.table("GSE69528_cli.txt",header = T,sep = "\t",row.names = 1)
rt4<-rt4[rownames(test_Group4),]
identical(rownames(rt4),rownames(test_Group4)) #[1] TRUE
rt4<-cbind(test_Group4,rt4)

gene<-read.csv("WGCNA_RCDDEGs_RF_LASSO_SVM_venn.csv",header = T)
five4<-rt4[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
five4$Group<-as.factor(five4$Group)
library(reshape2)
five4$sample<-rownames(five4)
meltfive4<-melt(five4,id.vars = c("sample","Group"),variable.name = "gene",value.name = "expression")

library(ggpubr)
compare_means(expression~Group,data = meltfive4,group.by = "gene")
ggboxplot(meltfive4, x = "Group", y = "expression",
          color = "Group",add = "jitter", palette = "lancet",facet.by = "gene",ncol=5, short.panel.labs = FALSE)+
  stat_compare_means(label = "p.signif",label.y = 12)
#ROC
library(pROC)
ZDHHC3<-roc(five4$Group,five4$ZDHHC3)
CLIC1<-roc(five4$Group,five4$CLIC1)
GSTO1<-roc(five4$Group,five4$GSTO1)
BLOC1S1<-roc(five4$Group,five4$BLOC1S1)
TLR5<-roc(five4$Group,five4$TLR5)
#各个roc对象的AUC和下面单独分析是一样的
plot(smooth(ZDHHC3),col="red",legacy.axes=T)
plot(smooth(CLIC1),col="#808000",add=TRUE)
plot(smooth(GSTO1),col="#8A2BE2",add=TRUE)
plot(smooth(BLOC1S1),col="#7CFC00",add=TRUE)
plot(smooth(TLR5),col="blue",add=TRUE)
legend("bottomright",legend=c("ZDHHC3-auc0.9505","CLIC1-auc0.9542",
                              "GSTO1-auc0.9494","BLOC1S1-auc0.9576","TLR5-auc0.9677"),
       col=c("red","#808000","#8A2BE2","#7CFC00","blue"),lty=1.5,cex =0.8)

#GSE26440-------------------------------------------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
rt5<-read.table("RCD_sepsis_GEOorignaldata/GSE26440/GSE26440_matrix.txt",header = T,sep = "\t")
gpl<-read.table("GPL570.txt",header=T,sep="\t")
library(dplyr)
rt5<-merge(rt5,gpl,by.x="ID_REF",by.y="ID")
rt5<-rt5[!duplicated(rt5$Gene),]
rownames(rt5)<-rt5$Gene
rt5<-rt5[,-c(1,132)]
rt5<-t(rt5)
write.table(rt5,file = "GSE26440_matrix.txt",quote = F,sep = "\t",col.names = NA)

test_Group5<-read.table("GSE26440_cli.txt",header = T,sep = "\t",row.names = 1)
rt5<-rt5[rownames(test_Group5),]
identical(rownames(rt5),rownames(test_Group5)) #[1] TRUE
rt5<-cbind(test_Group5,rt5)

gene<-read.csv("WGCNA_RCDDEGs_RF_LASSO_SVM_venn.csv",header = T)
five5<-rt5[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
five5$Group<-as.factor(five5$Group)
#转变长格式
library(reshape2)
five5$sample<-rownames(five5)
meltfive5<-melt(five5,id.vars = c("sample","Group"),variable.name = "gene",value.name = "expression")

library(ggpubr)
compare_means(expression~Group,data = meltfive5,group.by = "gene")
ggboxplot(meltfive5, x = "Group", y = "expression",
          color = "Group",add = "jitter", 
          palette = "lancet",facet.by = "gene", 
          ncol=5,short.panel.labs = FALSE)+
  stat_compare_means(label = "p.signif",label.y = 15)

library(pROC)
ZDHHC3<-roc(five5$Group,five5$ZDHHC3)
CLIC1<-roc(five5$Group,five5$CLIC1)
GSTO1<-roc(five5$Group,five5$GSTO1)
BLOC1S1<-roc(five5$Group,five5$BLOC1S1)
TLR5<-roc(five5$Group,five5$TLR5)
plot(smooth(ZDHHC3),col="red",legacy.axes=T)
plot(smooth(CLIC1),col="#808000",add=TRUE)
plot(smooth(GSTO1),col="#8A2BE2",add=TRUE)
plot(smooth(BLOC1S1),col="#7CFC00",add=TRUE)
plot(smooth(TLR5),col="blue",add=TRUE)
legend("bottomright",legend=c("ZDHHC3-auc0.9276","CLIC1-auc0.8371",
                              "GSTO1-auc0.8431","BLOC1S1-auc0.8881","TLR5-auc0.9684"),
       col=c("red","#808000","#8A2BE2","#7CFC00","blue"),lty=1.5,cex =0.8)

#GSE26378-----------------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
rt6<-read.table("RCD_sepsis_GEOorignaldata/GSE26378/GSE26378_matrix.txt",header = T,sep = "\t")
gpl<-read.table("GPL570.txt",header=T,sep="\t")
library(dplyr)
rt6<-merge(rt6,gpl,by.x="ID_REF",by.y="ID")
rt6<-rt6[!duplicated(rt6$Gene),]
rownames(rt6)<-rt6$Gene
rt6<-rt6[,-c(1,105)]
rt6<-t(rt6)
write.table(rt6,file = "GSE26378_matrix.txt",quote = F,sep = "\t",col.names = NA)

test_Group6<-read.table("GSE26378_cli.txt",header = T,sep = "\t",row.names = 1)
rt6<-rt6[rownames(test_Group6),]
identical(rownames(rt6),rownames(test_Group6)) #[1] TRUE
rt6<-cbind(test_Group6,rt6)

gene<-read.csv("WGCNA_RCDDEGs_RF_LASSO_SVM_venn.csv",header = T)
five6<-rt6[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
five6$Group<-as.factor(five6$Group)
library(reshape2)
five6$sample<-rownames(five6)
meltfive6<-melt(five6,id.vars = c("sample","Group"),variable.name = "gene",value.name = "expression")

library(ggpubr)
compare_means(expression~Group,data = meltfive6,group.by = "gene")
ggboxplot(meltfive6, x = "Group", y = "expression",
          color = "Group",add = "jitter", palette = "lancet",facet.by = "gene", ncol=5,short.panel.labs = FALSE)+
  stat_compare_means(label = "p.signif",label.y = 14)
library(pROC)
ZDHHC3<-roc(five6$Group,five6$ZDHHC3)
CLIC1<-roc(five6$Group,five6$CLIC1)
GSTO1<-roc(five6$Group,five6$GSTO1)
BLOC1S1<-roc(five6$Group,five6$BLOC1S1)
TLR5<-roc(five6$Group,five6$TLR5)
plot(smooth(ZDHHC3),col="red",legacy.axes=T)
plot(smooth(CLIC1),col="#808000",add=TRUE)
plot(smooth(GSTO1),col="#8A2BE2",add=TRUE)
plot(smooth(BLOC1S1),col="#7CFC00",add=TRUE)
plot(smooth(TLR5),col="blue",add=TRUE)
legend("bottomright",legend=c("ZDHHC3-auc0.9361","CLIC1-auc0.9294",
                              "GSTO1-auc0.9347","BLOC1S1-auc0.9605","TLR5-auc0.9739"),
       col=c("red","#808000","#8A2BE2","#7CFC00","blue"),lty=1.5,cex =0.8)

#GSE13904-----------------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
rt7<-read.table("RCD_sepsis_GEOorignaldata/GSE13904/GSE13904_matrix.txt",header = T,sep = "\t")
gpl<-read.table("GPL570.txt",header=T,sep="\t")
library(dplyr)
rt7<-merge(rt7,gpl,by.x="ID_REF",by.y="ID")
rt7<-rt7[!duplicated(rt7$Gene),]
rownames(rt7)<-rt7$Gene
rt7<-rt7[,-c(1,229)]
rt7<-t(rt7)
write.table(rt7,file = "GSE13904_matrix.txt",quote = F,sep = "\t",col.names = NA)

test_Group7<-read.table("GSE13904_cli.txt",header = T,sep = "\t",row.names = 1)
rt7<-rt7[rownames(test_Group7),]
identical(rownames(rt7),rownames(test_Group7)) #[1] TRUE
rt7<-cbind(test_Group7,rt7)

gene<-read.csv("WGCNA_RCDDEGs_RF_LASSO_SVM_venn.csv",header = T)
five7<-rt7[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
five7$Group<-as.factor(five7$Group)
library(reshape2)
five7$sample<-rownames(five7)
five7$Group2<-c(rep("Control",18),rep("Sepsis",158)) 
five7$Group2<-as.factor(five7$Group2)
meltfive7<-melt(five7,id.vars = c("sample","Group2","Group"),variable.name = "gene",value.name = "expression")

library(ggpubr)
compare_means(expression~Group2,data = meltfive7,group.by = "gene")
ggboxplot(meltfive7, x = "Group2", y = "expression",
          color = "Group2",add = "jitter", palette = "lancet",facet.by = "gene", ncol=5,short.panel.labs = FALSE)+
  stat_compare_means(label = "p.signif",label.y = 24)
library(pROC)
ZDHHC3<-roc(five7$Group2,five7$ZDHHC3)
CLIC1<-roc(five7$Group2,five7$CLIC1)
GSTO1<-roc(five7$Group2,five7$GSTO1)
BLOC1S1<-roc(five7$Group2,five7$BLOC1S1)
TLR5<-roc(five7$Group2,five7$TLR5)
plot(smooth(ZDHHC3),col="red",legacy.axes=T)
plot(smooth(CLIC1),col="#808000",add=TRUE)
plot(smooth(GSTO1),col="#8A2BE2",add=TRUE)
plot(smooth(BLOC1S1),col="#7CFC00",add=TRUE)
plot(smooth(TLR5),col="blue",add=TRUE)
legend("bottomright",legend=c("ZDHHC3-auc0.9181","CLIC1-auc0.8087",
                              "GSTO1-auc0.8136","BLOC1S1-auc0.8365","TLR5-auc0.9624"),
       col=c("red","#808000","#8A2BE2","#7CFC00","blue"),lty=1.5,cex =0.8)

#meta-----------------------------------------------------------------------------------------------------------

five1<-rt1[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
five1_Control<-subset(five1,five1$Group=="Control")
five1_Control_ZDHCC3_mean<-mean(five1_Control$ZDHHC3)
five1_Control_CLIC1_mean<-mean(five1_Control$CLIC1)
five1_Control_GSTO1_mean<-mean(five1_Control$GSTO1)
five1_Control_BLOC1S1_mean<-mean(five1_Control$BLOC1S1)
five1_Control_TLR5_mean<-mean(five1_Control$TLR5)
five1_Control_ZDHCC3_sd<-sd(five1_Control$ZDHHC3)
five1_Control_CLIC1_sd<-sd(five1_Control$CLIC1)
five1_Control_GSTO1_sd<-sd(five1_Control$GSTO1)
five1_Control_BLOC1S1_sd<-sd(five1_Control$BLOC1S1)
five1_Control_TLR5_sd<-sd(five1_Control$TLR5)
five1_Sepsis<-subset(five1,five1$Group=="Sepsis")
five1_Sepsis_ZDHCC3_mean<-mean(five1_Sepsis$ZDHHC3)
five1_Sepsis_CLIC1_mean<-mean(five1_Sepsis$CLIC1)
five1_Sepsis_GSTO1_mean<-mean(five1_Sepsis$GSTO1)
five1_Sepsis_BLOC1S1_mean<-mean(five1_Sepsis$BLOC1S1)
five1_Sepsis_TLR5_mean<-mean(five1_Sepsis$TLR5)
five1_Sepsis_ZDHCC3_sd<-sd(five1_Sepsis$ZDHHC3)
five1_Sepsis_CLIC1_sd<-sd(five1_Sepsis$CLIC1)
five1_Sepsis_GSTO1_sd<-sd(five1_Sepsis$GSTO1)
five1_Sepsis_BLOC1S1_sd<-sd(five1_Sepsis$BLOC1S1)
five1_Sepsis_TLR5_sd<-sd(five1_Sepsis$TLR5)
#rt4为GSE69528矩阵
five4<-rt4[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
five4_Control<-subset(five4,five4$Group=="Control")
five4_Sepsis<-subset(five4,five4$Group=="Sepsis")
#同上计算Mean和Sd
#rt5为GSE26440矩阵
five5<-rt5[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
five5_Control<-subset(five5,five5$Group=="Control")
five5_Sepsis<-subset(five5,five5$Group=="Septic shock")
#同上代码
#rt6为GSE26378矩阵
five6<-rt6[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
five6_Control<-subset(five6,five6$Group=="Control")
five6_Sepsis<-subset(five6,five6$Group=="Septic shock")
#rt7为GSE13904
five7<-rt7[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
five7_Control<-subset(five7,five7$Group=="Control")
#同上代码
five7_Sepsis<-subset(five7,subset=five7$Group !="Control")

rnames<-c("GSE65682","GSE69528","GSE26440","GSE26378","GSE13904")
cnames<-c("n1","m1","sd1","n2","m2","sd2")
ZDHHC3<-c(42,five1_Control_ZDHCC3_mean,five1_Control_ZDHCC3_sd,479,five1_Sepsis_ZDHCC3_mean,five1_Sepsis_ZDHCC3_sd,
          28,five4_Control_ZDHCC3_mean,five4_Control_ZDHCC3_sd,83,five4_Sepsis_ZDHCC3_mean,five4_Sepsis_ZDHCC3_sd,
          32,five5_Control_ZDHCC3_mean,five5_Control_ZDHCC3_sd,98,five5_Sepsis_ZDHCC3_mean,five5_Sepsis_ZDHCC3_sd,
          21,five6_Control_ZDHCC3_mean,five6_Control_ZDHCC3_sd,82,five6_Sepsis_ZDHCC3_mean,five6_Sepsis_ZDHCC3_sd,
          18,five7_Control_ZDHCC3_mean,five7_Control_ZDHCC3_sd,158,five7_Sepsis_ZDHCC3_mean,five7_Sepsis_ZDHCC3_sd)
ZDHHC3 <- matrix(data = ZDHHC3, nrow = 5, ncol = 6, byrow = TRUE,dimnames = list(rnames, cnames))
ZDHHC3<-data.frame(ZDHHC3)
ZDHHC3$dataset<-rownames(ZDHHC3)

CLIC1<-c(42,five1_Control_CLIC1_mean,five1_Control_CLIC1_sd,479,five1_Sepsis_CLIC1_mean,five1_Sepsis_CLIC1_sd,
         28,five4_Control_CLIC1_mean,five4_Control_CLIC1_sd,83,five4_Sepsis_CLIC1_mean,five4_Sepsis_CLIC1_sd,
         32,five5_Control_CLIC1_mean,five5_Control_CLIC1_sd,98,five5_Sepsis_CLIC1_mean,five5_Sepsis_CLIC1_sd,
         21,five6_Control_CLIC1_mean,five6_Control_CLIC1_sd,82,five6_Sepsis_CLIC1_mean,five6_Sepsis_CLIC1_sd,
         18,five7_Control_CLIC1_mean,five7_Control_CLIC1_sd,158,five7_Sepsis_CLIC1_mean,five7_Sepsis_CLIC1_sd)
CLIC1 <- matrix(data = CLIC1, nrow = 5, ncol = 6, byrow = TRUE,dimnames = list(rnames, cnames))
CLIC1<-data.frame(CLIC1)
CLIC1$dataset<-rownames(CLIC1)

GSTO1<-c(42,five1_Control_GSTO1_mean,five1_Control_GSTO1_sd,479,five1_Sepsis_GSTO1_mean,five1_Sepsis_GSTO1_sd,
         28,five4_Control_GSTO1_mean,five4_Control_GSTO1_sd,83,five4_Sepsis_GSTO1_mean,five4_Sepsis_GSTO1_sd,
         32,five5_Control_GSTO1_mean,five5_Control_GSTO1_sd,98,five5_Sepsis_GSTO1_mean,five5_Sepsis_GSTO1_sd,
         21,five6_Control_GSTO1_mean,five6_Control_GSTO1_sd,82,five6_Sepsis_GSTO1_mean,five6_Sepsis_GSTO1_sd,
         18,five7_Control_GSTO1_mean,five7_Control_GSTO1_sd,158,five7_Sepsis_GSTO1_mean,five7_Sepsis_GSTO1_sd)
GSTO1 <- matrix(data = GSTO1, nrow = 5, ncol = 6, byrow = TRUE,dimnames = list(rnames, cnames))
GSTO1<-data.frame(GSTO1)
GSTO1$dataset<-rownames(GSTO1)

BLOC1S1<-c(42,five1_Control_BLOC1S1_mean,five1_Control_BLOC1S1_sd,479,five1_Sepsis_BLOC1S1_mean,five1_Sepsis_BLOC1S1_sd,
           28,five4_Control_BLOC1S1_mean,five4_Control_BLOC1S1_sd,83,five4_Sepsis_BLOC1S1_mean,five4_Sepsis_BLOC1S1_sd,
           32,five5_Control_BLOC1S1_mean,five5_Control_BLOC1S1_sd,98,five5_Sepsis_BLOC1S1_mean,five5_Sepsis_BLOC1S1_sd,
           21,five6_Control_BLOC1S1_mean,five6_Control_BLOC1S1_sd,82,five6_Sepsis_BLOC1S1_mean,five6_Sepsis_BLOC1S1_sd,
           18,five7_Control_BLOC1S1_mean,five7_Control_BLOC1S1_sd,158,five7_Sepsis_BLOC1S1_mean,five7_Sepsis_BLOC1S1_sd)
BLOC1S1<- matrix(data = BLOC1S1, nrow = 5, ncol = 6, byrow = TRUE,dimnames = list(rnames, cnames))
BLOC1S1<-data.frame(BLOC1S1)
BLOC1S1$dataset<-rownames(BLOC1S1)

TLR5<-c(42,five1_Control_TLR5_mean,five1_Control_TLR5_sd,479,five1_Sepsis_TLR5_mean,five1_Sepsis_TLR5_sd,
        28,five4_Control_TLR5_mean,five4_Control_TLR5_sd,83,five4_Sepsis_TLR5_mean,five4_Sepsis_TLR5_sd,
        32,five5_Control_TLR5_mean,five5_Control_TLR5_sd,98,five5_Sepsis_TLR5_mean,five5_Sepsis_TLR5_sd,
        21,five6_Control_TLR5_mean,five6_Control_TLR5_sd,82,five6_Sepsis_TLR5_mean,five6_Sepsis_TLR5_sd,
        18,five7_Control_TLR5_mean,five7_Control_TLR5_sd,158,five7_Sepsis_TLR5_mean,five7_Sepsis_TLR5_sd)
TLR5<- matrix(data = TLR5, nrow = 5, ncol = 6, byrow = TRUE,dimnames = list(rnames, cnames))
TLR5<-data.frame(TLR5)
TLR5$dataset<-rownames(TLR5)
save(ZDHHC3,CLIC1,GSTO1,BLOC1S1,TLR5,file = "验证集_meta.Rdata")

library(meta)
#ZDHHC3
meta1_ZDHHC3 <-metacont(n1,m1,sd1,n2,m2,sd2,studlab=paste(dataset),data=ZDHHC3,sm="SMD",label.e = "Control",label.c = "Sepsis")
summary(meta1_ZDHHC3 )  
#SMD             95%-CI %W(common) %W(random)
#GSE65682 -2.6130 [-2.9663; -2.2597]       32.4       21.9
#GSE69528 -1.7754 [-2.2652; -1.2856]       16.8       19.6
#GSE26440 -1.6411 [-2.0885; -1.1936]       20.2       20.3
#GSE26378 -1.4540 [-1.9742; -0.9337]       14.9       19.0
#GSE13904 -1.3614 [-1.8697; -0.8530]       15.6       19.2

#Number of studies: k = 5
#Number of observations: o = 1041 (o.e = 141, o.c = 900)

#SMD             95%-CI      z  p-value
#Common effect model  -1.9067 [-2.1078; -1.7057] -18.59 < 0.0001
#Random effects model -1.7906 [-2.2516; -1.3296]  -7.61 < 0.0001

#Quantifying heterogeneity:
#  tau^2 = 0.2203 [0.0476; 2.0098]; tau = 0.4694 [0.2183; 1.4177]
#I^2 = 83.5% [62.7%; 92.7%]; H = 2.47 [1.64; 3.71]

#Test of heterogeneity:
#  Q d.f.  p-value
#24.31    4 < 0.0001
forest(meta1_ZDHHC3)
funnel(meta1_ZDHHC3)

metainf(meta1_ZDHHC3) 
#Influential analysis (common effect model)
#SMD             95%-CI  p-value   tau^2     tau    I^2
#Omitting GSE65682 -1.5685 [-1.8130; -1.3240] < 0.0001  0.0000  0.0000   0.0%
#Omitting GSE69528 -1.9333 [-2.1538; -1.7129] < 0.0001  0.3018  0.5494  87.5%
#Omitting GSE26440 -1.9739 [-2.1990; -1.7489] < 0.0001  0.2941  0.5423  86.7%
#Omitting GSE26378 -1.9862 [-2.2042; -1.7682] < 0.0001  0.2575  0.5074  85.6%
#Omitting GSE13904 -2.0079 [-2.2268; -1.7890] < 0.0001  0.2313  0.4809  84.3%

#Pooled estimate   -1.9067 [-2.1078; -1.7057] < 0.0001  0.2203  0.4694  83.5%

#CLIC1
meta1_CLIC1 <-metacont(n1,m1,sd1,n2,m2,sd2,studlab=paste(dataset),data=CLIC1,sm="SMD",label.e = "Control",label.c = "Sepsis")
summary(meta1_CLIC1 ) 
#SMD             95%-CI %W(common) %W(random)
#GSE65682 -2.2277 [-2.5711; -1.8843]       33.7       21.7
#GSE69528 -2.3136 [-2.8421; -1.7851]       14.2       19.1
#GSE26440 -1.2220 [-1.6486; -0.7954]       21.9       20.6
#GSE26378 -1.6069 [-2.1359; -1.0780]       14.2       19.1
#GSE13904 -1.0499 [-1.5499; -0.5499]       15.9       19.5

#Number of studies: k = 5
#Number of observations: o = 1041 (o.e = 141, o.c = 900)

#SMD             95%-CI      z  p-value
#Common effect model  -1.7443 [-1.9437; -1.5448] -17.14 < 0.0001
#Random effects model -1.6887 [-2.1931; -1.1842]  -6.56 < 0.0001

#Quantifying heterogeneity:
#  tau^2 = 0.2743 [0.0634; 2.6548]; tau = 0.5237 [0.2518; 1.6294]
#I^2 = 84.3% [64.8%; 93.0%]; H = 2.52 [1.69; 3.78]

#Test of heterogeneity:
#  Q d.f.  p-value
#25.50    4 < 0.0001
forest(meta1_CLIC1) 
funnel(meta1_CLIC1)
metainf(meta1_CLIC1)
#SMD             95%-CI  p-value   tau^2     tau    I^2
#Omitting GSE65682 -1.4980 [-1.7431; -1.2529] < 0.0001  0.2435  0.4934  78.6%
#Omitting GSE69528 -1.6497 [-1.8651; -1.4343] < 0.0001  0.2420  0.4919  85.2%
#Omitting GSE26440 -1.8904 [-2.1161; -1.6648] < 0.0001  0.2877  0.5363  83.5%
#Omitting GSE26378 -1.7670 [-1.9824; -1.5516] < 0.0001  0.3769  0.6139  88.1%
#Omitting GSE13904 -1.8757 [-2.0932; -1.6581] < 0.0001  0.2219  0.4711  82.0%
#Pooled estimate   -1.7443 [-1.9437; -1.5448] < 0.0001  0.2743  0.5237  84.3%

#GSTO1
meta1_GSTO1 <-metacont(n1,m1,sd1,n2,m2,sd2,studlab=paste(dataset),data=GSTO1,sm="SMD",label.e = "Control",label.c = "Sepsis")
summary(meta1_GSTO1 ) 
#SMD             95%-CI %W(common) %W(random)
#GSE65682 -1.9312 [-2.2678; -1.5945]       33.7       22.8
#GSE69528 -1.8430 [-2.3373; -1.3487]       15.6       19.1
#GSE26440 -1.1378 [-1.5608; -0.7148]       21.3       20.8
#GSE26378 -1.4850 [-2.0070; -0.9630]       14.0       18.4
#GSE13904 -0.9230 [-1.4202; -0.4258]       15.4       19.0

#Number of studies: k = 5
#Number of observations: o = 1041 (o.e = 141, o.c = 900)

#SMD             95%-CI      z  p-value
#Common effect model  -1.5303 [-1.7256; -1.3350] -15.36 < 0.0001
#Random effects model -1.4762 [-1.8661; -1.0863]  -7.42 < 0.0001

#Quantifying heterogeneity:
#  tau^2 = 0.1441 [0.0196; 1.5203]; tau = 0.3796 [0.1401; 1.2330]
#I^2 = 75.1% [38.6%; 89.9%]; H = 2.00 [1.28; 3.14]

#Test of heterogeneity:
#  Q d.f. p-value
#16.05    4  0.0030
forest(meta1_GSTO1) 
funnel(meta1_GSTO1)
metainf(meta1_GSTO1) 
#SMD             95%-CI  p-value   tau^2     tau    I^2
#Omitting GSE65682 -1.3270 [-1.5668; -1.0873] < 0.0001  0.0990  0.3146  61.8%
#Omitting GSE69528 -1.4725 [-1.6851; -1.2599] < 0.0001  0.1601  0.4001  78.9%
#Omitting GSE26440 -1.6367 [-1.8568; -1.4165] < 0.0001  0.1557  0.3946  74.7%
#Omitting GSE26378 -1.5377 [-1.7483; -1.3271] < 0.0001  0.2033  0.4509  81.3%
#Omitting GSE13904 -1.6411 [-1.8535; -1.4288] < 0.0001  0.0952  0.3085  67.7%

#Pooled estimate   -1.5303 [-1.7256; -1.3350] < 0.0001  0.1441  0.3796  75.1%

#BLOC1S1
meta1_BLOC1S1 <-metacont(n1,m1,sd1,n2,m2,sd2,studlab=paste(dataset),data=BLOC1S1,sm="SMD",label.e = "Control",label.c = "Sepsis")
summary(meta1_BLOC1S1 ) 
forest(meta1_BLOC1S1) 
funnel(meta1_BLOC1S1)
metainf(meta1_BLOC1S1) 

#TLR5
meta1_TLR5 <-metacont(n1,m1,sd1,n2,m2,sd2,studlab=paste(dataset),data=TLR5,sm="SMD",label.e = "Control",label.c = "Sepsis")
summary(meta1_TLR5 ) 
forest(meta1_TLR5) 
funnel(meta1_TLR5)
metainf(meta1_TLR5) 

#GSEA between two groups with high- and low- hub genes expression----------------------------------
exp<-read.table("补充/GSE28750_GSE95233exp_id_trans.txt",header = T,sep = "\t")
rownames(exp)<-exp[,1]
exp<-exp[,-1]
cli<-read.table("补充/GSE28750_GSE95233cli_merge.txt",header = T,sep = "\t")
rownames(cli)<-cli[,1]
cli<-cli[,-1]
sep<-subset(cli,cli$Group=="Sepsis")
same<-intersect(colnames(exp),rownames(sep))
exp<-exp[,same]
exp<-data.frame(t(exp))
#ZDHHC3 --------------------------------------------------------------------------------------------------
group<-data.frame(id=rownames(exp),group=ifelse(exp$ZDHHC3>median(exp$ZDHHC3),"High","Low"))
rownames(group)<-group[,1]

library(limma)
exp<-data.frame(t(exp))
table(group$group)
#High  Low 
#30   31 
identical(rownames(group),colnames(exp))#[1] TRUE
group_list<-factor(group$group)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
head(design)
contrast.matrix<-makeContrasts("High-Low",levels = design)
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
library(dplyr)
DEG<-topTable(fit2, coef=1, n=Inf) %>% na.omit()  #GSEA不应该进行FDR或FC值预先筛选基因
library(clusterProfiler)
library(org.Hs.eg.db)
library(dplyr)
library(ggplot2)
library(enrichplot)
ids<-bitr(rownames(DEG),"SYMBOL","ENTREZID","org.Hs.eg.db")
DEG$SYMBOL<-rownames(DEG)
data<-inner_join(ids,DEG,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList <- data$logFC
names(geneList) <- data$ENTREZID
library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH")
pathways <- subset(gsea_res, gsea_res@result$NES > 0 )
pathways <- pathways[order(pathways$p.adjust), ]
top_5_up <- head(pathways, 5)
pathways1 <- subset(gsea_res, gsea_res@result$NES < 0 )
pathways1 <- pathways1[order(pathways1$p.adjust), ]
top_5_do <- head(pathways1, 5)
top5<-rbind(top_5_up,top_5_do)
library(ggsci)
col_gsea1<-pal_simpsons()(16)
num2=5
p<-gseaplot2(gsea_res,geneSetID = rownames(top5)[1:num2],
             title = "Up_regulated",
             color = col_gsea1[1:num2],
             base_size = 10,
             rel_heights = c(1, 0.2, 0.4),
             subplots = 1:3,
             pvalue_table = FALSE,
             ES_geom = "line"
)
p
num2=10
p2<-gseaplot2(gsea_res,geneSetID = rownames(top5)[6:num2],
              title = "Down_regulated",
              color = col_gsea1[6:num2],
              base_size = 10,#基础大小
              rel_heights = c(1, 0.2, 0.4),
              subplots = 1:3,#展示小图
              pvalue_table = FALSE,
              ES_geom = "line")
p2
#CLIC1 -------------------------------------------------------------------------------------------------------------
exp<-data.frame(t(exp))
group<-data.frame(id=rownames(exp),group=ifelse(exp$CLIC1>median(exp$CLIC1),"High","Low"))
rownames(group)<-group[,1]
exp<-data.frame(t(exp))
table(group$group)
#High  Low 
#30   31 
identical(rownames(group),colnames(exp))#[1] TRUE
group_list<-factor(group$group)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
head(design)
contrast.matrix<-makeContrasts("High-Low",levels = design)
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
library(dplyr)
DEG<-topTable(fit2, coef=1, n=Inf) %>% na.omit()  
ids<-bitr(rownames(DEG),"SYMBOL","ENTREZID","org.Hs.eg.db")
DEG$SYMBOL<-rownames(DEG)
data<-inner_join(ids,DEG,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList <- data$logFC
names(geneList) <- data$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH")
pathways <- subset(gsea_res, gsea_res@result$NES > 0 )
pathways <- pathways[order(pathways$p.adjust), ]
top_5_up <- head(pathways, 5)
pathways1 <- subset(gsea_res, gsea_res@result$NES < 0 )
pathways1 <- pathways1[order(pathways1$p.adjust), ]
top_5_do <- head(pathways1, 5)
top5<-rbind(top_5_up,top_5_do)
library(ggsci)
col_gsea1<-pal_simpsons()(16)
num2=5
p<-gseaplot2(gsea_res,geneSetID = rownames(top5)[1:num2],
             title = "Up_regulated",
             color = col_gsea1[1:num2],
             base_size = 10,#基础大小
             rel_heights = c(1, 0.2, 0.4),
             subplots = 1:3,
             pvalue_table = FALSE,
             ES_geom = "line"
)
p
num2=10
p2<-gseaplot2(gsea_res,geneSetID = rownames(top5)[6:num2],
              title = "Down_regulated",
              color = col_gsea1[6:num2],
              base_size = 10,#基础大小
              rel_heights = c(1, 0.2, 0.4),
              subplots = 1:3,
              pvalue_table = FALSE,
              ES_geom = "line")
p2
#GSTO1 ----------------------------------------------------------------------
exp<-data.frame(t(exp))
group<-data.frame(id=rownames(exp),group=ifelse(exp$GSTO1>median(exp$GSTO1),"High","Low"))
rownames(group)<-group[,1]
exp<-data.frame(t(exp))
table(group$group)
#High  Low 
#30   31 
identical(rownames(group),colnames(exp))#[1] TRUE
group_list<-factor(group$group)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
head(design)
contrast.matrix<-makeContrasts("High-Low",levels = design)
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
library(dplyr)
DEG<-topTable(fit2, coef=1, n=Inf) %>% na.omit() 
ids<-bitr(rownames(DEG),"SYMBOL","ENTREZID","org.Hs.eg.db")
DEG$SYMBOL<-rownames(DEG)
data<-inner_join(ids,DEG,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList <- data$logFC
names(geneList) <- data$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH")
pathways <- subset(gsea_res, gsea_res@result$NES > 0 )
pathways <- pathways[order(pathways$p.adjust), ]
top_5_up <- head(pathways, 5)
pathways1 <- subset(gsea_res, gsea_res@result$NES < 0 )
pathways1 <- pathways1[order(pathways1$p.adjust), ]
top_5_do <- head(pathways1, 5)
top5<-rbind(top_5_up,top_5_do)
library(ggsci)
col_gsea1<-pal_simpsons()(16)
num2=5
p<-gseaplot2(gsea_res,geneSetID = rownames(top5)[1:num2],
             title = "Up_regulated",
             color = col_gsea1[1:num2],
             base_size = 10,
             rel_heights = c(1, 0.2, 0.4),
             subplots = 1:3,
             pvalue_table = FALSE,
             ES_geom = "line"
)
p
num2=9
p2<-gseaplot2(gsea_res,geneSetID = rownames(top5)[6:num2],
              title = "Down_regulated",
              color = col_gsea1[6:num2],
              base_size = 10,#基础大小
              rel_heights = c(1, 0.2, 0.4),
              subplots = 1:3,
              pvalue_table = FALSE,
              ES_geom = "line")
p2


#BLOC1S1 -----------------------------------------------------------------------------------------------------
exp<-data.frame(t(exp))
group<-data.frame(id=rownames(exp),group=ifelse(exp$BLOC1S1>median(exp$BLOC1S1),"High","Low"))
rownames(group)<-group[,1]
exp<-data.frame(t(exp))
table(group$group)
#High  Low 
#30   31 
identical(rownames(group),colnames(exp))#[1] TRUE
group_list<-factor(group$group)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
head(design)
contrast.matrix<-makeContrasts("High-Low",levels = design)
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
library(dplyr)
DEG<-topTable(fit2, coef=1, n=Inf) %>% na.omit()  
ids<-bitr(rownames(DEG),"SYMBOL","ENTREZID","org.Hs.eg.db")
DEG$SYMBOL<-rownames(DEG)
data<-inner_join(ids,DEG,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList <- data$logFC
names(geneList) <- data$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH")

pathways <- subset(gsea_res, gsea_res@result$NES > 0 )
pathways <- pathways[order(pathways$p.adjust), ]
top_5_up <- head(pathways, 5)

pathways1 <- subset(gsea_res, gsea_res@result$NES < 0 )
pathways1 <- pathways1[order(pathways1$p.adjust), ]
top_5_do <- head(pathways1, 5)
top5<-rbind(top_5_up,top_5_do)

library(ggsci)
col_gsea1<-pal_simpsons()(16)
num2=5
p<-gseaplot2(gsea_res,geneSetID = rownames(top5)[1:num2],
             title = "Up_regulated",
             color = col_gsea1[1:num2],
             base_size = 10,#基础大小
             rel_heights = c(1, 0.2, 0.4),
             subplots = 1:3,
             pvalue_table = FALSE,
             ES_geom = "line"
)
p
num2=10
p2<-gseaplot2(gsea_res,geneSetID = rownames(top5)[6:num2],
              title = "Down_regulated",
              color = col_gsea1[6:num2],
              base_size = 10,#基础大小
              rel_heights = c(1, 0.2, 0.4),
              subplots = 1:3,
              pvalue_table = FALSE,
              ES_geom = "line")
p2

#TLR5------------------------------------------------------------------------------------------------------------------
exp<-data.frame(t(exp))
group<-data.frame(id=rownames(exp),group=ifelse(exp$TLR5>median(exp$TLR5),"High","Low"))
rownames(group)<-group[,1]
exp<-data.frame(t(exp))
table(group$group)
#High  Low 
#30   31 
identical(rownames(group),colnames(exp))#[1] TRUE
group_list<-factor(group$group)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
head(design)
contrast.matrix<-makeContrasts("High-Low",levels = design)
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
library(dplyr)
DEG<-topTable(fit2, coef=1, n=Inf) %>% na.omit()  
ids<-bitr(rownames(DEG),"SYMBOL","ENTREZID","org.Hs.eg.db")
DEG$SYMBOL<-rownames(DEG)
data<-inner_join(ids,DEG,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList <- data$logFC
names(geneList) <- data$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH")
pathways <- subset(gsea_res, gsea_res@result$NES > 0 )
pathways <- pathways[order(pathways$p.adjust), ]
top_5_up <- head(pathways, 5)
pathways1 <- subset(gsea_res, gsea_res@result$NES < 0 )
pathways1 <- pathways1[order(pathways1$p.adjust), ]
top_5_do <- head(pathways1, 5)
top5<-rbind(top_5_up,top_5_do)
library(ggsci)
col_gsea1<-pal_simpsons()(16)
num2=5
p<-gseaplot2(gsea_res,geneSetID = rownames(top5)[1:num2],
             title = "Up_regulated",
             color = col_gsea1[1:num2],
             base_size = 10,#基础大小
             rel_heights = c(1, 0.2, 0.4),
             subplots = 1:3,
             pvalue_table = FALSE,
             ES_geom = "line"
)
p
num2=10
p2<-gseaplot2(gsea_res,geneSetID = rownames(top5)[6:num2],
              title = "Down_regulated",
              color = col_gsea1[6:num2],
              base_size = 10,#基础大小
              rel_heights = c(1, 0.2, 0.4),
              subplots = 1:3,
              pvalue_table = FALSE,
              ES_geom = "line")
p2


# immune correlation analysis-------------------------------------------------------------------------------
results<-read.table("part2.GSE28750_GSE95233CIBERSORT_RCDGSVA/GSE28750_GSE95233_CIBERSORT.txt",header = T,sep = "\t",check.names = F)
rownames(results)<-results[,1]
results<-results[,-1]
exp<-read.table("part1.GSE28750_GSE95233_DEGs/exp_id_trans.txt",header = T,sep = "\t")
rownames(exp)<-exp[,1]
exp<-exp[,-1]
cli<-read.table("part1.GSE28750_GSE95233_DEGs/cli_merge.txt",header = T,sep = "\t")
rownames(cli)<-cli[,1]
cli<-cli[,-1]
sep<-rownames(cli[cli$Group=="Sepsis",])
sepexp<-exp[,sep]
sepexp<-t(sepexp)
results<-results[sep,]
results<-results[,-c(12,17,23:25)]
identical(rownames(sepexp),rownames(results)) #[1] TRUE

gene<-c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5")
ZDHHC3<-as.matrix(sepexp[,"ZDHHC3"])
colnames(ZDHHC3)[1]<-"ZDHHC3"
CLIC1<-as.matrix(sepexp[,"CLIC1"])
colnames(CLIC1)[1]<-"CLIC1"
GSTO1<-as.matrix(sepexp[,"GSTO1"])
colnames(GSTO1)[1]<-"GSTO1"
BLOC1S1<-as.matrix(sepexp[,"BLOC1S1"])
colnames(BLOC1S1)[1]<-"BLOC1S1"
TLR5<-as.matrix(sepexp[,"TLR5"])
colnames(TLR5)[1]<-"TLR5"

#Visualization (TLR5换成相关基因)
library(Hmisc)
library(ggplot2)
cor_results <- rcorr(TLR5, as.matrix(results))
cor_coef <- cor_results$r
p_values <- cor_results$P
cor_df <- data.frame(
  Cell_Type = rownames(cor_coef),
  Correlation = cor_coef[, 1],
  P_Value = p_values[, 1]
)
cor_df<-cor_df[-1,]
ggplot(cor_df, aes(x = reorder(Cell_Type, Correlation), y = Correlation)) +
  geom_point(aes(size = abs(Correlation), color = P_Value)) +
  geom_segment(aes(x = Cell_Type, xend = Cell_Type, y = 0, yend = ifelse(Correlation > 0, Correlation - offset_factor * abs(Correlation), 
                                                                         Correlation + offset_factor * abs(Correlation))),size=1) +  scale_size_continuous(name = "abs(cor)") +
  scale_color_gradient(name = "p.value", low = "green", high = "purple",
                       breaks=c(0.05,0.25,0.50,0.75),
                       labels=c(0.05,0.25,0.50,0.75)) +
  labs(x = "TLR5", y = "Correlation") +
  coord_flip() +
  theme_minimal()+
  theme(axis.title = element_text(size = 14,color = "black"),
        axis.text = element_text(size = 12,color = "black"))





#单因素多因素分析(GSE65682)----------------------------------------------------------------------
setwd("D:\\AA研究\\RCD_sepsis_bulk_scrna")
test_Data<-read.table("exp_idchange_GSE65682.txt",header = T,sep = "\t")
rownames(test_Data)<-test_Data[,1]
test_Data<-test_Data[,-1]
test_Data<-t(test_Data)
test_Group<-read.table("cli_GSE65682.txt",header = T,sep = "\t")
test_Group<-test_Group[,c(1,2,3,8,9,12)]
rownames(test_Group)<-test_Group[,1]
#整理test_Group
colnames(test_Group)[4]<-"fustat"
colnames(test_Group)[5]<-"futime"
test_Group<-test_Group[order(test_Group$Group,decreasing = F),]
test_Data1<-test_Data[rownames(test_Group),]
identical(rownames(test_Data1),rownames(test_Group))
rt1<-cbind(test_Group,test_Data1)
rt1<-subset(rt1,rt1$Group=="Sepsis")
gene<-read.csv("WGCNA_RCDDEGs_RF_LASSO_SVM_venn.csv",header = T)
five<-rt1[,c("futime","fustat","gender","age","ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5","Group")]
write.table(five,file = "GSE65682_sangerbox.txt",quote = F,sep = "\t",col.names = NA)


#2.基因和免疫细胞浸润的相关性分析GSE28750-GSE95233的CIBOERSORT评分结果和每个hubgene的------------------------------------------------------------
#免疫打分
immscore<-read.table("补充/GSE28750_GSE95233_CIBERSORT.txt",header = T,sep = "\t")
rownames(immscore)<-immscore[,1]
immscore<-immscore[,-1]
#hub基因表达矩阵
exp<-read.table("补充/GSE28750_GSE95233exp_id_trans.txt",header = T,sep = "\t")
rownames(exp)<-exp[,1]
exp<-exp[,-1]
exp<-as.data.frame(t(exp))
cli<-read.table("补充/GSE28750_GSE95233cli_merge.txt",header = T,sep = "\t")
rownames(cli)<-cli[,1]
cli<-cli[,-1]
sep<-subset(cli,cli$Group=="Sepsis")
same<-intersect(rownames(exp),rownames(sep))
exp<-exp[same,]
gene<-exp[,c("ZDHHC3","CLIC1","GSTO1","BLOC1S1","TLR5")]
#打分的矩阵
immscore<-immscore[same,]
identical(rownames(immscore),rownames(gene))

#相似性分析
library(psych)
cor<-corr.test(gene,immscore,method = "spearman",adjust = "fdr")
length(cor$r>0.4)
#[1] 450
length(cor$p.adj<0.05)
#[1] 450
r<-cor$r
r<-data.frame(t(r))
p<-cor$p.adj
p<-data.frame(t(p))
#可视化
# 相关性数据
r<-r[-c(12,17,23,24,25),]
r<-as.matrix(r)
# 模拟显著性数据（假设p值范围用于生成*标注）
p<-p[-c(12,17,23,24,25),]
p<-as.matrix(p)
significance_matrix <- ifelse(p < 0.001, "***", ifelse(p < 0.01, "**", ifelse(p < 0.05, "*", "")))
# 合并成一个数据框
data <- data.frame(Cell_Type = rep(rownames(r), each = 5),
                   Factor = rep(colnames(r), times = 20),
                   Correlation = as.numeric(r),
                   Significance = as.vector(significance_matrix))
library(ggplot2)
library(reshape2)
ggplot(data, aes(x = Factor, y = Cell_Type, fill = Correlation)) +
  geom_tile() +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0,
                       limits = c(-0.8, 0.8), name = "correlation") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
#添加显著性标签
ggplot(data, aes(x = Factor, y = Cell_Type, fill = Correlation)) +
  geom_tile() +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0,
                       limits = c(-0.8, 0.8), name = "correlation") +
  geom_text(aes(label = Significance)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#3.GSEA分析：基于GSE28750_GSE9523的合并表达矩阵数据，根据每个基因的表达分为两组，然后进行GSEA分析----------------------------------------------------
#表达矩阵（只需要sepsis样本的）
exp<-read.table("补充/GSE28750_GSE95233exp_id_trans.txt",header = T,sep = "\t")
rownames(exp)<-exp[,1]
exp<-exp[,-1]
cli<-read.table("补充/GSE28750_GSE95233cli_merge.txt",header = T,sep = "\t")
rownames(cli)<-cli[,1]
cli<-cli[,-1]
sep<-subset(cli,cli$Group=="Sepsis")
same<-intersect(colnames(exp),rownames(sep))
exp<-exp[,same]
exp<-data.frame(t(exp))
#3.1ZDHHC3 ----------------------------------------------------------------------
group<-data.frame(id=rownames(exp),group=ifelse(exp$ZDHHC3>median(exp$ZDHHC3),"High","Low"))
rownames(group)<-group[,1]
#limma差异分析
library(limma)
#读取文件
#将表达矩阵变成列名为样本名
exp<-data.frame(t(exp))
table(group$group)
#High  Low 
#30   31 
identical(rownames(group),colnames(exp))#[1] TRUE
#此处最好用比较矩阵
group_list<-factor(group$group)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
head(design)
#按照我的定义Sepsis vs Control
contrast.matrix<-makeContrasts("High-Low",levels = design)
#拟合模型
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
library(dplyr)
DEG<-topTable(fit2, coef=1, n=Inf) %>% na.omit()  #GSEA不应该进行FDR或FC值预先筛选基因

#GSEA
library(clusterProfiler)
library(org.Hs.eg.db)
library(dplyr)
library(ggplot2)
library(enrichplot)
#读取DEGs
ids<-bitr(rownames(DEG),"SYMBOL","ENTREZID","org.Hs.eg.db")
DEG$SYMBOL<-rownames(DEG)
data<-inner_join(ids,DEG,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList <- data$logFC
names(geneList) <- data$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH")
#选择前5条上调通路和下调通路
# 提取上调通路（ES > 0）并按p值排序，取前5个
pathways <- subset(gsea_res, gsea_res@result$NES > 0 )
pathways <- pathways[order(pathways$p.adjust), ]
top_5_up <- head(pathways, 5)
#提取下调的
pathways1 <- subset(gsea_res, gsea_res@result$NES < 0 )
pathways1 <- pathways1[order(pathways1$p.adjust), ]
top_5_do <- head(pathways1, 5)
top5<-rbind(top_5_up,top_5_do)
#可视化
library(ggsci)
col_gsea1<-pal_simpsons()(16)
#上调
num2=5
p<-gseaplot2(gsea_res,geneSetID = rownames(top5)[1:num2],
             title = "Up_regulated",#标题
             color = col_gsea1[1:num2],#颜色
             base_size = 10,#基础大小
             rel_heights = c(1, 0.2, 0.4),#小图相对高度
             subplots = 1:3,#展示小图
             pvalue_table = FALSE,#p值表格
             ES_geom = "line"#line or dot
)
p
#下调
num2=10
p2<-gseaplot2(gsea_res,geneSetID = rownames(top5)[6:num2],
              title = "Down_regulated",#标题
              color = col_gsea1[6:num2],#颜色
              base_size = 10,#基础大小
              rel_heights = c(1, 0.2, 0.4),#小图相对高度
              subplots = 1:3,#展示小图
              pvalue_table = FALSE,#p值表格
              ES_geom = "line")#line or dot
p2
#3.2CLIC1 ----------------------------------------------------------------------
exp<-data.frame(t(exp))
group<-data.frame(id=rownames(exp),group=ifelse(exp$CLIC1>median(exp$CLIC1),"High","Low"))
rownames(group)<-group[,1]
#limma差异分析
#将表达矩阵变成列名为样本名
exp<-data.frame(t(exp))
table(group$group)
#High  Low 
#30   31 
identical(rownames(group),colnames(exp))#[1] TRUE
#此处最好用比较矩阵
group_list<-factor(group$group)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
head(design)
#按照我的定义
contrast.matrix<-makeContrasts("High-Low",levels = design)
#拟合模型
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
library(dplyr)
DEG<-topTable(fit2, coef=1, n=Inf) %>% na.omit()  #GSEA不应该进行FDR或FC值预先筛选基因

#GSEA
#读取DEGs
ids<-bitr(rownames(DEG),"SYMBOL","ENTREZID","org.Hs.eg.db")
DEG$SYMBOL<-rownames(DEG)
data<-inner_join(ids,DEG,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList <- data$logFC
names(geneList) <- data$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH")
#选择前5条上调通路和下调通路
# 提取上调通路（ES > 0）并按p值排序，取前5个
pathways <- subset(gsea_res, gsea_res@result$NES > 0 )
pathways <- pathways[order(pathways$p.adjust), ]
top_5_up <- head(pathways, 5)
#提取下调的
pathways1 <- subset(gsea_res, gsea_res@result$NES < 0 )
pathways1 <- pathways1[order(pathways1$p.adjust), ]
top_5_do <- head(pathways1, 5)
top5<-rbind(top_5_up,top_5_do)
#可视化
library(ggsci)
col_gsea1<-pal_simpsons()(16)
#上调
num2=5
p<-gseaplot2(gsea_res,geneSetID = rownames(top5)[1:num2],
             title = "Up_regulated",#标题
             color = col_gsea1[1:num2],#颜色
             base_size = 10,#基础大小
             rel_heights = c(1, 0.2, 0.4),#小图相对高度
             subplots = 1:3,#展示小图
             pvalue_table = FALSE,#p值表格
             ES_geom = "line"#line or dot
)
p
#下调
num2=10
p2<-gseaplot2(gsea_res,geneSetID = rownames(top5)[6:num2],
              title = "Down_regulated",#标题
              color = col_gsea1[6:num2],#颜色
              base_size = 10,#基础大小
              rel_heights = c(1, 0.2, 0.4),#小图相对高度
              subplots = 1:3,#展示小图
              pvalue_table = FALSE,#p值表格
              ES_geom = "line")#line or dot
p2

#3.3GSTO1 ----------------------------------------------------------------------
exp<-data.frame(t(exp))
group<-data.frame(id=rownames(exp),group=ifelse(exp$GSTO1>median(exp$GSTO1),"High","Low"))
rownames(group)<-group[,1]
#limma差异分析
#将表达矩阵变成列名为样本名
exp<-data.frame(t(exp))
table(group$group)
#High  Low 
#30   31 
identical(rownames(group),colnames(exp))#[1] TRUE
#此处最好用比较矩阵
group_list<-factor(group$group)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
head(design)
#按照我的定义
contrast.matrix<-makeContrasts("High-Low",levels = design)
#拟合模型
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
library(dplyr)
DEG<-topTable(fit2, coef=1, n=Inf) %>% na.omit()  #GSEA不应该进行FDR或FC值预先筛选基因

#GSEA
#读取DEGs
ids<-bitr(rownames(DEG),"SYMBOL","ENTREZID","org.Hs.eg.db")
DEG$SYMBOL<-rownames(DEG)
data<-inner_join(ids,DEG,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList <- data$logFC
names(geneList) <- data$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH")
#选择前5条上调通路和下调通路
# 提取上调通路（ES > 0）并按p值排序，取前5个
pathways <- subset(gsea_res, gsea_res@result$NES > 0 )
pathways <- pathways[order(pathways$p.adjust), ]
top_5_up <- head(pathways, 5)
#提取下调的
pathways1 <- subset(gsea_res, gsea_res@result$NES < 0 )
pathways1 <- pathways1[order(pathways1$p.adjust), ]
top_5_do <- head(pathways1, 5)
top5<-rbind(top_5_up,top_5_do)
#可视化
library(ggsci)
col_gsea1<-pal_simpsons()(16)
#上调
num2=5
p<-gseaplot2(gsea_res,geneSetID = rownames(top5)[1:num2],
             title = "Up_regulated",#标题
             color = col_gsea1[1:num2],#颜色
             base_size = 10,#基础大小
             rel_heights = c(1, 0.2, 0.4),#小图相对高度
             subplots = 1:3,#展示小图
             pvalue_table = FALSE,#p值表格
             ES_geom = "line"#line or dot
)
p
#下调
num2=9
p2<-gseaplot2(gsea_res,geneSetID = rownames(top5)[6:num2],
              title = "Down_regulated",#标题
              color = col_gsea1[6:num2],#颜色
              base_size = 10,#基础大小
              rel_heights = c(1, 0.2, 0.4),#小图相对高度
              subplots = 1:3,#展示小图
              pvalue_table = FALSE,#p值表格
              ES_geom = "line")#line or dot
p2


#3.4BLOC1S1 ----------------------------------------------------------------------
exp<-data.frame(t(exp))
group<-data.frame(id=rownames(exp),group=ifelse(exp$BLOC1S1>median(exp$BLOC1S1),"High","Low"))
rownames(group)<-group[,1]
#limma差异分析
#将表达矩阵变成列名为样本名
exp<-data.frame(t(exp))
table(group$group)
#High  Low 
#30   31 
identical(rownames(group),colnames(exp))#[1] TRUE
#此处最好用比较矩阵
group_list<-factor(group$group)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
head(design)
#按照我的定义
contrast.matrix<-makeContrasts("High-Low",levels = design)
#拟合模型
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
library(dplyr)
DEG<-topTable(fit2, coef=1, n=Inf) %>% na.omit()  #GSEA不应该进行FDR或FC值预先筛选基因

#GSEA
#读取DEGs
ids<-bitr(rownames(DEG),"SYMBOL","ENTREZID","org.Hs.eg.db")
DEG$SYMBOL<-rownames(DEG)
data<-inner_join(ids,DEG,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList <- data$logFC
names(geneList) <- data$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH")
#选择前5条上调通路和下调通路
# 提取上调通路（ES > 0）并按p值排序，取前5个
pathways <- subset(gsea_res, gsea_res@result$NES > 0 )
pathways <- pathways[order(pathways$p.adjust), ]
top_5_up <- head(pathways, 5)
#提取下调的
pathways1 <- subset(gsea_res, gsea_res@result$NES < 0 )
pathways1 <- pathways1[order(pathways1$p.adjust), ]
top_5_do <- head(pathways1, 5)
top5<-rbind(top_5_up,top_5_do)
#可视化
library(ggsci)
col_gsea1<-pal_simpsons()(16)
#上调
num2=5
p<-gseaplot2(gsea_res,geneSetID = rownames(top5)[1:num2],
             title = "Up_regulated",#标题
             color = col_gsea1[1:num2],#颜色
             base_size = 10,#基础大小
             rel_heights = c(1, 0.2, 0.4),#小图相对高度
             subplots = 1:3,#展示小图
             pvalue_table = FALSE,#p值表格
             ES_geom = "line"#line or dot
)
p
#下调
num2=10
p2<-gseaplot2(gsea_res,geneSetID = rownames(top5)[6:num2],
              title = "Down_regulated",#标题
              color = col_gsea1[6:num2],#颜色
              base_size = 10,#基础大小
              rel_heights = c(1, 0.2, 0.4),#小图相对高度
              subplots = 1:3,#展示小图
              pvalue_table = FALSE,#p值表格
              ES_geom = "line")#line or dot
p2

#3.5TLR5----------------------------------------------------------------------
exp<-data.frame(t(exp))
group<-data.frame(id=rownames(exp),group=ifelse(exp$TLR5>median(exp$TLR5),"High","Low"))
rownames(group)<-group[,1]
#limma差异分析
#将表达矩阵变成列名为样本名
exp<-data.frame(t(exp))
table(group$group)
#High  Low 
#30   31 
identical(rownames(group),colnames(exp))#[1] TRUE
#此处最好用比较矩阵
group_list<-factor(group$group)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
head(design)
#按照我的定义
contrast.matrix<-makeContrasts("High-Low",levels = design)
#拟合模型
fit <- lmFit(exp,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)
library(dplyr)
DEG<-topTable(fit2, coef=1, n=Inf) %>% na.omit()  #GSEA不应该进行FDR或FC值预先筛选基因

#GSEA
#读取DEGs
ids<-bitr(rownames(DEG),"SYMBOL","ENTREZID","org.Hs.eg.db")
DEG$SYMBOL<-rownames(DEG)
data<-inner_join(ids,DEG,by="SYMBOL")
data<-data[order(data$logFC,decreasing = T),]
geneList <- data$logFC
names(geneList) <- data$ENTREZID

library(msigdbr)
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
gsea_res <- GSEA(geneList, 
                 TERM2GENE = m_t2g,
                 minGSSize = 10,
                 maxGSSize = 500,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH")
#选择前5条上调通路和下调通路
# 提取上调通路（ES > 0）并按p值排序，取前5个
pathways <- subset(gsea_res, gsea_res@result$NES > 0 )
pathways <- pathways[order(pathways$p.adjust), ]
top_5_up <- head(pathways, 5)
#提取下调的
pathways1 <- subset(gsea_res, gsea_res@result$NES < 0 )
pathways1 <- pathways1[order(pathways1$p.adjust), ]
top_5_do <- head(pathways1, 5)
top5<-rbind(top_5_up,top_5_do)
#可视化
library(ggsci)
col_gsea1<-pal_simpsons()(16)
#上调
num2=5
p<-gseaplot2(gsea_res,geneSetID = rownames(top5)[1:num2],
             title = "Up_regulated",#标题
             color = col_gsea1[1:num2],#颜色
             base_size = 10,#基础大小
             rel_heights = c(1, 0.2, 0.4),#小图相对高度
             subplots = 1:3,#展示小图
             pvalue_table = FALSE,#p值表格
             ES_geom = "line"#line or dot
)
p
#下调
num2=10
p2<-gseaplot2(gsea_res,geneSetID = rownames(top5)[6:num2],
              title = "Down_regulated",#标题
              color = col_gsea1[6:num2],#颜色
              base_size = 10,#基础大小
              rel_heights = c(1, 0.2, 0.4),#小图相对高度
              subplots = 1:3,#展示小图
              pvalue_table = FALSE,#p值表格
              ES_geom = "line")#line or dot
p2