library("affyPLM") library(limma) Sys.setenv("VROOM_CONNECTION_SIZE"=131072*600) library("GEOquery") gse <- getGEO("GSE96804",destdir = ".", getGPL = T) exp <- exprs(gse[[1]]) cli <- pData(gse[[1]]) cli$group <- ifelse(grepl("DN",cli$title),"DN","control") exp <- as.data.frame(exp) exp$ID <- row.names(exp) #gpl <- fData(gse[[1]]) gpl <- read.csv("annotation.csv") gpl <- gpl[,c(1,2,8,9,10,11)] gpl$'gene symble' <- sapply(strsplit(gpl$gene_assignment,"//"),"[",2) gpl1 <- gpl[,c(2,9)] colnames(gpl1) <- c("ID",'gene symbol') symbol <- merge(exp,gpl1,by="ID") symbol <- na.omit(symbol) table(duplicated(symbol$`gene symbol`)) library(limma) symbol_unique <- avereps(symbol[,-c(1,ncol(symbol))], ID=symbol$`gene symbol`) table(duplicated(row.names(symbol_unique))) save(cli,exp,symbol_unique,gpl,gpl1,gse,file = "GSE96804.Rdata") write.csv(symbol_unique,"GSE96804_unique.CSV") write.table(symbol_unique,"expMatrix.txt") write.csv(group,"sample.csv") group <- cli[,c(2,ncol(cli))] DN <- group[group$group=='DN',] control <- group[group$group=="control",] exp_DN <-symbol_unique[,which(colnames(symbol_unique)%in%DN$geo_accession)] exp_control <- symbol_unique[,which(colnames(symbol_unique)%in%control$geo_accession)] rm(list = ls()) load("GSE96804.Rdata") boxplot(symbol_unique) options(stringsAsFactors = F) library("limma") group <- group$group design <- model.matrix(~0+factor(group)) colnames(design) <- levels(factor(group)) rownames(design) <- colnames(symbol_unique) contrast.matrix <- makeContrasts(DN-control,levels = design) contrast.matrix fit <- lmFit(symbol_unique,design) fit2 <- contrasts.fit(fit,contrast.matrix) fit2 <- eBayes(fit2) options(digits = 4) DEG <- topTable(fit2,coef = 1,n=Inf) DEG$group <- ifelse(DEG$adj.P.Val>0.05,"no_change", ifelse(DEG$logFC> 1,"up", ifelse(DEG$logFC< -1,"down" ,"no_change"))) table(DEG$group) DEG$gene <- rownames(DEG) save(DEG,group,change,file = "DEG96804.Rdata") write.csv(DEG,"DEG96804.csv") #========================================================== library("ggplot2") aging_EEG$threshold = factor(ifelse(aging_EEG$adj.P.Val < 0.05 & abs(aging_EEG$logFC) >= 1, ifelse(aging_EEG$logFC>= 1 ,'Up','Down'),'NoSignifi'),levels=c('Up','Down','NoSignifi')) pp<- ggplot(aging_EEG,aes(x=logFC,y=-log10(adj.P.Val),color=threshold))+ geom_point()+ scale_color_manual(values=c("#00008B","#808080","#DC143C"))+ geom_text_repel( data = aging_EEG[aging_EEG$adj.P.Val<0.05&abs(aging_EEG$logFC)>1,], aes(label = gene), size = 3, segment.color = "black", show.legend = FALSE )+ theme_bw()+ theme( legend.title = element_blank() )+ ylab('-log10 (p-adj)')+ xlab('log2 (FoldChange)')+ geom_hline(yintercept = -log10(0.05),lty=3,col="black",lwd=0.5)+ geom_vline(xintercept=0,lty=5) ggsave( plot = pp, filename = " Volcano plot.pdf", width = 16, height = 12, units = "cm") #===================================================================================== library(raster) rownames(symbol_unique) <- trim(rownames(symbol_unique)) ae <- symbol_unique[rownames(symbol_unique)%in%aging_DEG$gene,] aging_exp <- t(ae) data1 <- as.data.frame(aging_exp) data1$name <- rownames(data1) group$name <- rownames(group) data <- merge(data1,group,by="name") data2 <- data[,-1] #============================================ library(reshape2) data3 <- melt(data2) names(data3) <- c("Group","Gene_symbol","Gene_expression") library(ggplot2) library(ggpubr) data3$Group <- ifelse(data3$Group=="DN","DKD","Control") data3$Group <- factor(data3$Group,levels = c("DKD","Control")) a <- ggplot(data3, aes(x = Gene_symbol, y = Gene_expression, fill = Group)) + geom_boxplot(outlier.size = 1,width=0.4,outlier.colour = "grey") + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5,size=9), axis.text.y = element_text(size=9), legend.text=element_text(size=9))+ labs(x = '', y = 'Gene Expression')+ scale_fill_manual(values = c("#ED0000E5","#00468BE5")) pp <- a+stat_compare_means(aes(group = Group),method = "t.test",label = "p.signif") ggsave( plot = pp, filename = "15gene Box plots.pdf", width = 18, height = 11, units = "cm") write.csv(aging_DEG,"aging_DEG15.csv") #===================================================================================== library(raster) rownames(symbol_unique) <- trim(rownames(symbol_unique)) ae <- symbol_unique[rownames(symbol_unique)%in%aging_DEG$gene,] aging_exp <- t(ae) data1 <- as.data.frame(aging_exp) data1$name <- rownames(data1) group$name <- rownames(group) data <- merge(data1,group,by="name") data <- na.omit(data) data$DN <- ifelse(data$group=="DN",1,0) data2 <- data[data$DN==1,] datafile <- data1[,-16] #datafile <- datafile[rownames(datafile)%in%data2$name,]dataPearson <- round(cor(datafile,method = c("pearson")) , 2)library(Hmisc) dataPearson_rcorr <- rcorr(as.matrix(datafile)) library(corrplot) p <- quickcor(dataPearson_rcorr, cor.test =TRUE,type = "upper")+ ## cor.test geom_square(data =get_data(type ="upper", show.diag =FALSE))+ geom_mark(r = NA,sig.thres = 0.05, size = 5, colour = "grey90")+ geom_abline(slope =-1, intercept =16,colour = "grey90")+ scale_fill_gradient2(midpoint = 0, low = "#56B4E9", mid = "white",high = "#DC143C", space = "Lab" )+ scale_color_manual(values=c("#DC143C", "#E69F00", "#56B4E9")) ggsave(p,filename = "pearson-15gene.tiff",width =7 ,height=7,dpi = 150) #BiocManager::install("ConsensusClusterPlus") library(ConsensusClusterPlus) library(limma) maxk=9 title=setwd("D:/data-R/DN202204/") results = ConsensusClusterPlus(exp,maxK=maxk, reps=50,pItem=0.8, pFeature=1, title=title, clusterAlg="hc",distance="pearson", seed=1262118388.71279,plot="png", writeTable = F) results[1] #K=2 results[[2]][["consensusMatrix"]][1:5,1:5] results[[2]][["consensusTree"]] agegroup9_159 <- as.data.frame(results[[3]][["consensusClass"]]) names(agegroup9_159) <- "group" agegroup9_159$name <- rownames(agegroup9_159) agegroup9_159$group <- ifelse(agegroup9_159$group=="1","group1", ifelse(agegroup9_159$group=="2","group2", "group3")) icl <- calcICL(results, title = title, plot = "png") dim(icl[["clusterConsensus"]]) icl[["clusterConsensus"]] dim(icl[["itemConsensus"]]) icl[["itemConsensus"]][1:5,] #========================================================== group <- agegroup9_159 colnames(group) <- "group" group$group <- ifelse(group$group=="1","group1", ifelse(group$group=="2","group2", "group3")) group$name <- rownames(group) exp3 <- as.data.frame(exp3) exp3$name <- rownames(exp3) exp <- merge(exp3,group,by="name") library(reshape2) data2 <- melt(exp) head(data2) colnames(data2) <- c("ID","Group","gene","gene_expression") library(ggplot2) ggplot(data2, aes(x = gene, y = gene_expression, fill = Group)) + geom_boxplot(outlier.size = 1) + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank()) + labs(x = '', y = 'gene expression') library(ggpubr) data2$Group <- factor(data2$Group,levels = c("group1","group2","group3")) data3 <- data2[data2$Group!="group3",] library(ggplot2) ggplot(data3, aes(x = gene, y = gene_expression, fill = Group)) + geom_boxplot(outlier.size = 1) + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank()) + labs(x = '', y = 'gene expression') library(ggpubr) data3$Group <- factor(data3$Group,levels = c("group1","group2")) a <- ggplot(data3, aes(x = gene, y = gene_expression, fill = Group)) + geom_boxplot(outlier.size = 1,width=0.4,outlier.colour = "grey") + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(family = "Times",angle = 45, hjust = 0.5, vjust = 0.5,size=16), axis.text.y = element_text(size=16), legend.text=element_text(size=16))+ labs(x = '', y = 'Gene Expression')+ labs_pubr(base_size = 14)+ scale_fill_manual(values = c("#8ECFC9","#FFBE7A")) pp <- a+stat_compare_means(aes(group = Group),method = "t.test",label = "p.signif", hide.ns = T) pp ggsave( plot = pp, filename = "15gene Box plots of 2 clusters.pdf", width = 18, height = 12, units = "cm") #========================================================== r1 <- read.csv("Cibersort/result3_perm1000/CIBERSORTx_Job11_Results.csv") names(r1) r2 <- r1[,1:23] names(r2) rownames(r2) <- r2$Mixture colnames(r2)[1] <- "name" r3 <- r2[r2$name%in%group$name,] r4 <- merge(r3,group,by="name") library(reshape2) data <- melt(r4) head(data) colnames(data) <- c("ID","Group","Cell_name","Fraction") library(ggpubr) data$Group <- factor(data$Group,levels = c("group1","group2","group3")) data3 <- data[data$Group!="group3",] library(ggplot2) ggplot(data3, aes(x = Cell_name, y = Fraction, fill = Group)) + geom_boxplot(outlier.size = 1) + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank()) + labs(x = '', y = 'Fraction') library(ggpubr) data3$Group <- factor(data3$Group,levels = c("group1","group2")) a <- ggplot(data3, aes(x = Cell_name, y = Fraction, fill = Group)) + geom_boxplot(outlier.size = 1,width=0.6,outlier.colour = "grey") + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(angle = -45, hjust = 0.1, vjust = 0.1,size=14,colour = "black"), axis.text.y = element_text(size=14), legend.text=element_text(size=14))+ labs(x = '', y = 'Relative Fraction')+ labs_pubr(base_size = 18)+ scale_fill_manual(values = c("#228B22","#FFE384")) p3 <- a+stat_compare_means(aes(group = Group),method = "t.test",label = "p.signif", hide.ns = T) #========================================================== library(ggplot2) a <- ggplot(data, aes(x = Cell_name, y = Fraction, fill = Group)) + geom_violin(cex=0.2,position = position_dodge(width = 0.5), scale = 'width', alpha = 0.8) + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(angle = -45, hjust = 0.1, vjust = 0.1,size=14,colour = "black"), axis.text.y = element_text(size=14), slegend.text=element_text(size=14)) + theme_classic(base_size = 20)+ labs(x = '', y = 'Fraction')+ scale_fill_manual(values = c("#228B22","#FFE384","#A066D3"))+ coord_flip()+ geom_boxplot(width = 0.4, outlier.shape = NA, alpha = 0.3) a+stat_compare_means(aes(group = Group),method = "t.test",label = "p.signif", hide.ns = T) library(tidyverse) distributions5 <- ggplot(data = data, aes(x = Cell_name, y = Fraction, fill = Group)) + geom_flat_violin(aes(x = Cell_name, y = Fraction, fill = Group), position = position_nudge(x = 0.2, y = 0), alpha = 0.8) + geom_point(aes(y = Fraction, color = Group), position = position_jitter(width = 0.15), size = 1, alpha = 0.1) + geom_boxplot(width = 0.4, outlier.shape = NA, alpha = 0.3) + scale_fill_manual(values = c("#5A4A6F", "#E47250", "#EBB261")) + scale_colour_manual(values = c("#5A4A6F", "#E47250", "#EBB261"))+ coord_flip()+ theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(angle = -45, hjust = 0.1, vjust = 0.1,size=14,colour = "black"), axis.text.y = element_text(size=14), legend.text=element_text(size=14)) distributions5 r1 <- read.csv("Cibersort/result3_perm1000/CIBERSORTx_Job11_Results.csv") names(r1) r2 <- r1[,1:23] names(r2) rownames(r2) <- r2$Mixture r2 <- r2[,-1] r3 <- t(r2) library("pheatmap") exp1 <- exp[,2:16] group1 <- agegroup9_159 group1$name <- rownames(group1) group3 <- group1[order(group1[,1]),] colnames(group3)[1] <- "group" group3$group <- ifelse(group3$group=="1","group1", ifelse(group3$group=="2","group2", "group3")) group<- as.data.frame(group3[,1]) rownames(group) <- rownames(group3) colnames(group) <- "group" group r1 <- read.csv("Cibersort/result3_perm1000/CIBERSORTx_Job11_Results.csv") names(r1) r2 <- r1[,1:23] names(r2) rownames(r2) <- r2$Mixture r2 <- r2[,-1] r3 <- t(r2) r4 <- r3[,colnames(r3)%in%rownames(group)] a <- match(rownames(group),colnames(r4)) r4 <- r4[,a] r5 <- r4 r5 <- r5[-8,] rownames(r4)[8] #T.cells.follicular.helper All the data in this row is 0, and it is eliminated pheatmap(r5, show_rownames = T, show_colnames = T, cluster_cols = F, scale="row", color = colorRampPalette(c("blue", "white", "red"))(50), cluster_rows = T, annotation_col = group) r6 <- r4[c(3,4,12,15,16,17,20),] ann_colors <- c("#228B22","#FFE384","#A066D3") names(ann_colors) <- c("group1","group2","group3") ann_colors <- list(group=ann_colors) library(RColorBrewer) display.brewer.pal(11,"PiYG") barplot(1:50,col=colorRampPalette(brewer.pal(11, "PiYG"))(50)) coul <- colorRampPalette(brewer.pal(11, "PiYG"))(50) pheatmap(r6, show_rownames = T, show_colnames = T, cluster_cols = F, scale="row", color = coul, cluster_rows = T, annotation_col = group, annotation_colors = ann_colors) display.brewer.pal(11,"PRGn") barplot(1:50,col=colorRampPalette(brewer.pal(11, "PRGn"))(50)) coul2 <- colorRampPalette(brewer.pal(11, "PRGn"))(50) pheatmap(r6, show_rownames = T, show_colnames = T, cluster_cols = F, scale="row", color = coul2, cluster_rows = T, annotation_col = group, annotation_colors = ann_colors) coul3 <- colorRampPalette(brewer.pal(11, "BrBG"))(50) p4 <- pheatmap(r6, show_rownames = T, show_colnames = T, cluster_cols = F, scale="row", color = coul3, cluster_rows = T, annotation_col = group, annotation_colors = ann_colors) r7 <- r4[c(3,12,15,16,17),1:39] group1 <- group group1$name <- rownames(group1) group1 <- group1[1:39,] group2 <- as.data.frame(group1[,-2]) rownames(group2) <- rownames(group1) colnames(group2) <- "group" ann_colors <- c("#228B22","#FFE384") names(ann_colors) <- c("group1","group2") ann_colors <- list(group=ann_colors) coul3 <- colorRampPalette(brewer.pal(11, "BrBG"))(50) p5 <- pheatmap(r7, show_rownames = T, show_colnames = T, cluster_cols = F, scale="row" , color = coul3, cluster_rows = T, annotation_col = group2, annotation_colors = ann_colors) immune_exp_all <- r2 immune_exp_DN <- r4 different_immune_exp_age159.3clusters <- r6 different_immune_exp_age159.3_1and2_clusters <- r7 sig_matrix <- "LM22.txt" mixture_file = 'GSE96804EXP.txt' res_cibersort <- CIBERSORT(sig_matrix, mixture_file, perm=10, QN=FALSE) #===================================================================================== r1 <- read.csv("result3_perm1000/CIBERSORTx_Job11_Results.csv") names(r1) r2 <- r1[,1:23] names(r2) rownames(r2) <- r2$Mixture r2 <- r2[,-1] setwd("D:/data-R/DN202204/") load("DEG96804.Rdata") setwd("D:/data-R/DN202204/Cibersort/") r3 <- cbind(r2,group) r3$ID <- rownames(r3) data1 <- as.data.frame(r3) library(reshape2) data2 <- melt(data1) names(data2) <- c("Group","ID","cell_type","cell_composition") #============================================ library(ggplot2) ggplot(data2, aes(x = cell_type, y = cell_composition, fill = Group)) + geom_boxplot(outlier.size = 1) + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank()) + labs(x = '', y = 'cell composition') library(ggpubr) data2$Group <- ifelse(data2$Group=="DN","DKD","Control") data2$Group <- factor(data2$Group,levels = c("DKD","Control")) col <- c("black","red","black","red","black","black","black","black","black","black","black","black","black","black","red","red","red","black","red","red","black","red") a <- ggplot(data2, aes(x = cell_type, y = cell_composition, fill = Group)) + geom_boxplot(outlier.size = 1,width=0.6,outlier.colour = "grey") + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(family = "Times",angle = -45, hjust = 0.1, vjust = 0.1,size=18,colour = "black"), axis.text.y = element_text(size=18), legend.text=element_text(size=18))+ labs(x = '', y = 'Relative Fraction')+ scale_fill_manual(values = c("#ED0000E5","#00468BE5")) a <- ggplot(data2, aes(x = cell_type, y = cell_composition, fill = Group)) + geom_boxplot(outlier.size = 1,width=0.6,outlier.colour = "grey") + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(family = "Times",angle = -45, hjust = 0.1, vjust = 0.1,size=18,colour = "black"), axis.text.y = element_text(size=18), legend.text=element_text(size=18))+ labs(x = '', y = 'Relative Fraction')+ labs_pubr(base_size = 18)+ scale_fill_manual(values = c("#ED0000E5","#00468BE5")) p3 <- a+stat_compare_means(aes(group = Group),method = "t.test",label = "p.signif", hide.ns = T) pp <- a+stat_compare_means(aes(group = Group),method = "t.test",label = "p.signif", hide.ns = T) p3 pp ggsave( plot = p3, filename = "immune Box plots of DKD_Control.pdf", width = 30, height = 15, units = "cm") #========================================================== library("pheatmap") r22 <- t(r2) pheatmap(r22, show_rownames = T, show_colnames = F, cluster_cols = F, scale="row", color = colorRampPalette(c("blue", "white", "red"))(50), cluster_rows = T, annotation_col = group) r22 <- as.data.frame(r22) r22d <- r22[c(2,4,15,16,17,19,20,22),] pp <- pheatmap(r22d, show_rownames = T, show_colnames = F, angle_col = '90', cluster_rows = T, cluster_cols = F, scale="row", color = colorRampPalette(c("blue", "white", "red"))(50), fontsize = 10, fontsize_row =10, annotation_col = group) ggsave( plot = pp, filename = "immune pheatmap of DKD_Control.pdf", width = 25, height = 6, units = "cm") #========================================================== p <- ggplot(data2, aes(x=ID, y=cell_composition,fill=cell_type)) p + geom_bar(stat="identity", position="fill")+ labs(x = '', y = 'Gene composition')+ theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5,size=9,colour = "red")) library(reshape2) library(ggplot2) ggplot(data2, aes(x=ID, y=cell_composition,fill=cell_type))+ geom_col(position = 'fill', width = 0.8) + facet_wrap(~Group, scales = 'free_x', ncol = 2) + labs(x = '', y = 'Gene composition') + theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5,size=9), panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent')) ggplot(data2, aes(x=ID, y=cell_composition,fill=cell_type))+ geom_col(position = 'fill', width = 0.9) + facet_grid(.~Group,scales= "free" ,space= "free" ) + labs(x = '', y = 'Gene composition') + theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5,size=12), panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent')) #ggsave('ggplot2_plot.pdf', p, width = 8, height = 6) #===================================================================================== rm(list=ls()) setwd("D:/data-R/DN202204/") load("DEG96804.Rdata") load("GSE96804.Rdata") library(raster) rownames(symbol_unique) <- trim(rownames(symbol_unique)) ae <- symbol_unique[rownames(symbol_unique)%in%aging_DEG$gene,] aging_exp <- t(ae) data1 <- as.data.frame(aging_exp) data1$name <- rownames(data1) group$name <- rownames(group) data <- merge(data1,group,by="name") data <- na.omit(data) data$DN <- ifelse(data$group=="DN",1,0) library(glmnet) #install.packages("caret") library(caret) X <- as.matrix(data[,2:16]) Y <- as.matrix(data[,18]) lasso <- glmnet(X,Y,alpha = 1,family = "binomial",nlambda = 1000 ) View(print(lasso)) NROW(lasso$lambda) lasso$lambda[NROW(lasso$lambda)] lasso$df[NROW(lasso$lambda)] plot(lasso,lwd=2,label=TRUE) plot(lasso,lwd=2,label=TRUE,xvar = "lambda") plot(lasso,lwd=2,xvar = 'dev',label=TRUE) coef(lasso,s=0.0002314995) lambdas <- seq(0,0.5,length.out=200) set.seed(123) cv.lasso <- cv.glmnet(X,Y,alpha=1,lambda=lambdas,nfolds=10 ,family="binomial") plot(cv.lasso) lasso_1se <- cv.lasso$lambda.1se lasso_1se lasso.coef <- coef(cv.lasso$glmnet.fit,s=lasso_1se,exact = F) lasso.coef library(glmnet) model_lasso_1se <- glmnet(x=X, y=Y, alpha = 1, lambda=cv.lasso$lambda.1se) choose_gene_1se=rownames(model_lasso_1se$beta)[as.numeric(model_lasso_1se$beta)!=0] length(choose_gene_1se) choose_gene_1se model_lasso_min <- glmnet(x=X, y=Y, alpha = 1, lambda=cv.lasso$lambda.min) choose_gene_min=rownames(model_lasso_min$beta)[as.numeric(model_lasso_min$beta)!=0] length(choose_gene_min) choose_gene_min #================== X30528a9 <- as.matrix(data_30528a9[,2:16]) Y30528a9 <- as.matrix(data_30528a9[,17]) rownames(X30528a9) <- data3$name lasso.prob <- predict(cv.lasso, newx=X30528a9 , s=c(cv.lasso$lambda.min,cv.lasso$lambda.1se) ) lasso.prob re=cbind(Y30528a9,lasso.prob) re re=as.data.frame(re) colnames(re)=c('DN','prob_min','prob_1se') re$DN=as.factor(re$DN) library(ggpubr) p1 = ggboxplot(re, x = "DN", y = "prob_min", color = "DN", palette = "jco", add = "jitter")+ stat_compare_means() #install.packages("ROCR") library(ROCR) pred_min <- prediction(re[,3], re[,1]) auc_min = performance(pred_min,"auc")@y.values[[1]] perf_min <- performance(pred_min,"tpr","fpr") plot(perf_min,colorize=FALSE, col="blue") lines(c(0,1),c(0,1),col = "gray", lty = 4 ) text(0.8,0.2, labels = paste0("AUC = ",round(auc_min,3))) pred_min <- prediction(re[,2], re[,1]) perf_min <- performance(pred_min,"tpr","fpr") auc_min = performance(pred_min,"auc")@y.values[[1]] pred_1se <- prediction(re[,3], re[,1]) perf_1se <- performance(pred_1se,"tpr","fpr") auc_1se = performance(pred_1se,"auc")@y.values[[1]] plot(perf_min,colorize=FALSE, col="blue") plot(perf_1se,colorize=FALSE, col="red",add = T) lines(c(0,1),c(0,1),col = "gray", lty = 4 ) text(0.8,0.3, labels = paste0("AUC.min = ",round(auc_min,3)),col = "blue") text(0.8,0.2, labels = paste0("AUC.lse= ",round(auc_1se,3)),col = "red") p <- plot(perf_min,colorize=FALSE, col="blue") + lines(c(0,1),c(0,1),col = "gray", lty = 4 )+ text(0.8,0.3, labels = paste0("AUC = ",round(auc_min,3)),col = "blue") #===================================================================================== a <- rownames(aging_change) library(raster) a <- trim(a) annot$symbol <- trim(annot$symbol) aging_annot <- annot[annot$symbol%in%a,] library("clusterProfiler") library("org.Hs.eg.db") aging_GO <- enrichGO(aging_annot$To, keyType="ENTREZID", ont="ALL", org.Hs.eg.db, pvalueCutoff=0.0001, qvalueCutoff=0.2) aging_go <- as.data.frame(aging_GO) aging_GOBP <- enrichGO(aging_annot$To, keyType="ENTREZID", ont="BP", org.Hs.eg.db, pvalueCutoff=0.0001, qvalueCutoff=0.2) aging_goBP <- as.data.frame(aging_GOBP) aging_GOCC <- enrichGO(aging_annot$To, keyType="ENTREZID", ont="CC", org.Hs.eg.db, pvalueCutoff=0.0001, qvalueCutoff=0.2) aging_goCC <- as.data.frame(aging_GOCC) aging_GOMF <- enrichGO(aging_annot$To, keyType="ENTREZID", ont="MF", org.Hs.eg.db, pvalueCutoff=0.0001, qvalueCutoff=0.2) aging_goMF <- as.data.frame(aging_GOMF) library(ggplot2) library(clusterProfiler) library("DOSE") aging_GOx <- mutate(aging_GO, FoldEnrichment=parse_ratio(GeneRatio)/parse_ratio(BgRatio)) aging_GOx2 <- ggplot(aging_GOx,aes(x=FoldEnrichment,y=Description))+ geom_point(aes(color=p.adjust, size=Count))+ scale_color_gradient(low="red",high="blue")+ xlab("Fold Enrichment")+ theme_bw()+ ylim(rev(aging_GOx$Description))+ guides(color=guide_colorbar(reverse = T)) library(stringr) aging_GOx2 + scale_y_discrete(labels=function(x) str_wrap(x, width=45))+ ggtitle("aging-baced DN gene FoldEnrichment GO") + theme(plot.title = element_text(hjust = 0.5)) library(enrichplot) GOall <- barplot(aging_GO,showCategory=10)+ ggtitle("GO") + theme(plot.title = element_text(hjust = 0.5)) GOBP <- barplot(aging_GOBP,showCategory=10, font.size=7, label_format=100)+ ggtitle("BP")+ theme(plot.title = element_text(hjust = 0.5)) GOCC <- barplot(aging_GOCC,showCategory=10, font.size=7, label_format=100)+ ggtitle("CC") + theme(plot.title = element_text(hjust = 0.5)) GOMF <- barplot(aging_GOMF,showCategory=10, font.size=7, label_format=100)+ ggtitle("MF") + theme(plot.title = element_text(hjust = 0.5)) #library(patchwork) library(clusterProfiler) KEGG <-enrichKEGG(aging_annot$To, organism = "hsa", pvalueCutoff = 0.05) kegg <- as.data.frame(KEGG) dotplot(KEGG) barplot(KEGG,showCategory=20,font.size=15,label_format=30) #========================================================== data <- t(Data[order(apply(Data, 1, mad), decreasing = T)[1:10000],]) # Load the WGCNA package library(WGCNA); enableWGCNAThreads() # The following setting is important, do not omit. options(stringsAsFactors = FALSE); #Read in the female liver data set library(raster) colnames(data) <- trim(colnames(data)) datExpr0 = data rm(Data) names(datExpr0) rownames(datExpr0) gsg = goodSamplesGenes(datExpr0, verbose = 3); gsg$allOK if (!gsg$allOK) { # Optionally, print the gene and sample names that were removed: if (sum(!gsg$goodGenes)>0) printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", "))); if (sum(!gsg$goodSamples)>0) printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", "))); # Remove the offending genes and samples from the data: datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes] } sampleTree = hclust(dist(datExpr0), method = "average"); sizeGrWindow(12,9) #pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9); par(cex = 1); par(mar = c(0,4,2,0)) plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) abline(h = 95, col = "red"); clust = cutreeStatic(sampleTree, cutHeight = 95, minSize = 10) table(clust) keepSamples = (clust==1) datExpr = datExpr0[keepSamples, ] nGenes = ncol(datExpr) nSamples = nrow(datExpr) group <- agegroup9_159 group$group1 <- ifelse(group$group=="group1",1,0) group$group2 <- ifelse(group$group=="group2",1,0) group$group3 <- ifelse(group$group=="group3",1,0) group <- group[,3:5] traitData <- group dim(traitData) names(traitData) allTraits <- traitData Samples = rownames(datExpr); datTraits=allTraits[rownames(allTraits)%in%Samples,]; traitRows = match(Samples, rownames(datTraits)); collectGarbage(); sampleTree2 = hclust(dist(datExpr), method = "average") traitColors = numbers2colors(datTraits, signed = FALSE); plotDendroAndColors(sampleTree2, traitColors, groupLabels = "group1", main = "Sample dendrogram and trait heatmap") sampleTree = hclust(dist(datExpr0), method = "average") traitColors = numbers2colors(group, signed = FALSE); plotDendroAndColors(sampleTree, traitColors, groupLabels = c("group1","group2","group3"), main = "Sample dendrogram and trait heatmap") datTraits <- datTraits[,-3] traitColors = numbers2colors(datTraits, signed = FALSE,commonLim = FALSE); plotDendroAndColors(sampleTree2, traitColors, groupLabels = c("group1","group2"), main = "Sample dendrogram and trait heatmap") rm(list=ls()) library(WGCNA) # The following setting is important, do not omit. options(stringsAsFactors = FALSE); # Allow multi-threading within WGCNA. This helps speed up certain calculations. # At present this call is necessary for the code to work. # Any error here may be ignored but you may want to update WGCNA if you see one. # Caution: skip this line if you run RStudio or other third-party R environments. # See note above. enableWGCNAThreads() # Load the data saved in the first part lnames = load(file = "GSE96804 WGCNAInput.RData"); powers = c(c(1:10), seq(from = 12, to=20, by=2)) sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5) sizeGrWindow(9, 5) par(mfrow = c(1,2)); plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence")); text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=cex1,col="red"); abline(h=0.87,col="red"); plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity",type="n", main = paste("Mean connectivity")); text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") softPower = 3; adjacency = adjacency(datExpr, power = softPower); TOM = TOMsimilarity(adjacency); dissTOM = 1-TOM geneTree = hclust(as.dist(dissTOM), method = "average"); sizeGrWindow(12,9) plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity", labels = FALSE, hang = 0.04); minModuleSize = 20; dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize); table(dynamicMods) dynamicColors = labels2colors(dynamicMods) table(dynamicColors) sizeGrWindow(8,6) plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "Gene dendrogram and module colors") MEList = moduleEigengenes(datExpr, colors = dynamicColors) MEs = MEList$eigengenes MEDiss = 1-cor(MEs); METree = hclust(as.dist(MEDiss), method = "average"); sizeGrWindow(7, 6) plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "") MEDissThres = 0.25 abline(h=MEDissThres, col = "red") merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3) mergedColors = merge$colors; mergedMEs = merge$newMEs; sizeGrWindow(12, 9) pdf(file = "WGCNA10000/geneDendro-3.pdf", wi = 9, he = 6) plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05) dev.off() moduleColors = mergedColors colorOrder = c("grey", standardColors(50)); moduleLabels = match(moduleColors, colorOrder)-1; MEs = mergedMEs; library(WGCNA) # The following setting is important, do not omit. options(stringsAsFactors = FALSE); # Load the expression and trait data saved in the first part lnames = load(file = "GSE96804_ WGCNAInput.RData"); #The variable lnames contains the names of loaded variables. lnames # Load network data saved in the second part. lnames = load(file = "GSE96804159_ WGCNA2.RData"); lnames options("scipen"=7, "digits"=7) nGenes = ncol(datExpr); nSamples = nrow(datExpr); MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes MEs = orderMEs(MEs0) moduleTraitCor = cor(MEs, datTraits, use = "p"); moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples); sizeGrWindow(10,6) textMatrix = paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = ""); dim(textMatrix) = dim(moduleTraitCor) par(mar = c(6, 8.5, 3, 3)); labeledHeatmap(Matrix = moduleTraitCor, xLabels = colnames(datTraits), yLabels = names(MEs), ySymbols = names(MEs), colorLabels = FALSE, colors = blueWhiteRed(50), textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 1, zlim = c(-1,1), xLabelsAngle=0, xLabelsAdj=0.5, main = paste("Module-trait relationships")) datTraits <- as.data.frame(datTraits) group = as.data.frame(datTraits$group1); names(group) = "group" modNames = substring(names(MEs), 3) geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p")); MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples)); names(geneModuleMembership) = paste("MM", modNames, sep=""); names(MMPvalue) = paste("p.MM", modNames, sep=""); geneTraitSignificance = as.data.frame(cor(datExpr, group, use = "p")); GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples)); names(geneTraitSignificance) = paste("GS.", names(group), sep=""); names(GSPvalue) = paste("p.GS.", names(group), sep=""); module = "turquoise" column = match(module, modNames); moduleGenes = moduleColors==module; sizeGrWindow(7, 7); par(mfrow = c(1,1)); verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]), abs(geneTraitSignificance[moduleGenes, 1]), xlab = paste("Module Membership in", module, "module"), ylab = "Gene significance for group1 and group2", main = paste("Module membership vs. gene significance\n"), cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module) abline(h = 0.7, col = "red") abline(v=0.8,col = "red") colnames(datExpr) colnames(datExpr)[moduleColors=="turquoise"] setwd("D:/data-R/DN202204/") load("GSE96804_annot.Rdata") setwd("D:/data-R/DN202204/WGCNA10000") annot = annot dim(annot) names(annot) probes = colnames(datExpr) library(raster) probes <- trim(probes) probes2annot = match(probes, annot$symbol) sum(is.na(probes2annot)) # Should return 0. geneInfo0 = data.frame(substanceBXH = probes, geneSymbol = annot$symbol[probes2annot], LocusLinkID = annot$To[probes2annot], moduleColor = moduleColors, geneTraitSignificance, GSPvalue) modOrder = order(-abs(cor(MEs, group, use = "p"))); for (mod in 1:ncol(geneModuleMembership)) { oldNames = names(geneInfo0) geneInfo0 = data.frame(geneInfo0, geneModuleMembership[, modOrder[mod]], MMPvalue[, modOrder[mod]]); names(geneInfo0) = c(oldNames, paste("MM.", modNames[modOrder[mod]], sep=""), paste("p.MM.", modNames[modOrder[mod]], sep="")) } geneOrder = order(geneInfo0$moduleColor, -abs(geneInfo0$GS.group)); geneInfo = geneInfo0[geneOrder, ] write.csv(geneInfo, file = "GSE96804_DN WGCNA_geneInfo.csv") # Display the current working directory getwd(); # If necessary, change the path below to the directory where the data files are stored. # "." means current directory. On Windows use a forward slash / instead of the usual \. setwd("D:/data-R/DN202204/WGCNA10000") # Load the WGCNA package library(WGCNA) # The following setting is important, do not omit. options(stringsAsFactors = FALSE); # Load the expression and trait data saved in the first part lnames = load(file = "GSE96804_DN WGCNAInput.RData"); #The variable lnames contains the names of loaded variables. lnames # Load network data saved in the second part. lnames = load(file = "GSE96804159_DN WGCNA2.RData" ); lnames annot = read.csv(file = "GSE96804_DN WGCNA_geneInfo.csv"); probes = colnames(datExpr) library(raster) probes <- trim(probes) probes2annot = match(probes, annot$substanceBXH) allLLIDs = annot$LocusLinkID[probes2annot]; intModules = c("turquoise", "blue", "yellow") for (module in intModules) { modGenes = (moduleColors==module) modLLIDs = allLLIDs[modGenes]; fileName = paste("LocusLinkIDs-", module, ".txt", sep=""); write.table(as.data.frame(modLLIDs), file = fileName, row.names = FALSE, col.names = FALSE) } fileName = paste("LocusLinkIDs-all.txt", sep=""); write.table(as.data.frame(allLLIDs), file = fileName, row.names = FALSE, col.names = FALSE) library(org.Hs.eg.db) GOenr = GOenrichmentAnalysis(moduleColors, allLLIDs, organism = "human", nBestP = 10); tab = GOenr$bestPTerms[[4]]$enrichment names(tab) keepCols = c(1, 2, 5, 6, 7, 12, 13); screenTab = tab[, keepCols]; #names(screenTab)°üº¬£º"module" "modSize" "enrichment P""Bonferoni P""nModGenesInTerm""termOntology" "termName" numCols = c(3, 4); screenTab[, numCols] = signif(apply(screenTab[, numCols], 2, as.numeric), 2) screenTab[, 7] = substring(screenTab[, 7], 1, 40) colnames(screenTab) = c("module", "size", "p-val", "Bonf", "nInTerm", "ont", "term name"); rownames(screenTab) = NULL; options(width=95) screenTab workingDir = "D:/data-R/DN202204/WGCNA10000/" setwd(workingDir); library(WGCNA) options(stringsAsFactors = FALSE); # Load the expression and trait data saved in the first part lnames = load(file = "GSE96804_DN WGCNAInput.RData"); #The variable lnames contains the names of loaded variables. lnames # Load network data saved in the second part. lnames = load(file = "GSE96804159_DN WGCNA2.RData"); lnames nGenes = ncol(datExpr) nSamples = nrow(datExpr) enableWGCNAThreads() load("10000TOM.Rdata") dissTOM = 1-TOM; plotTOM = dissTOM^7; diag(plotTOM) = NA; sizeGrWindow(9,9) save(plotTOM,geneTree, moduleColors,file = "Network_heatmap_plot") pdf(file = "Network_heatmap_plot.pdf") TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot") dev.off() MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes datTraits <- as.data.frame(datTraits) group = as.data.frame(datTraits$group1); names(group) = "group" MET = orderMEs(cbind(MEs, group)) sizeGrWindow(5,7.5); tiff(filename = "WGCNA.tiff") par(cex = 1.3) plotEigengeneNetworks(MET, "", marDendro = c(0,4,4,2), marHeatmap = c(3,4,1,2), cex.lab = 1, xLabelsAngle = 0) dev.off() sizeGrWindow(6,6); par(cex = 1.0) plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0), plotHeatmaps = FALSE) par(cex = 1.0) plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2), plotDendrograms = FALSE, xLabelsAngle = 90) rm(list = ) # Display the current working directory getwd(); # If necessary, change the path below to the directory where the data files are stored. # "." means current directory. On Windows use a forward slash / instead of the usual \. workingDir = "D:/data-R/DN202204/"; setwd(workingDir); # Load the WGCNA package library(WGCNA) # The following setting is important, do not omit. options(stringsAsFactors = FALSE); # Load the expression and trait data saved in the first part lnames = load(file = "GSE96804DN159 WGCNAInput.RData"); #The variable lnames contains the names of loaded variables. lnames lnames = load(file = "GSE96804159DN159 WGCNA.RData"); lnames TOM = TOMsimilarityFromExpr(datExpr, power = 3); annot = read.csv(file = "GeneAnnotation.csv"); module = "brown"; probes = names(datExpr) inModule = (moduleColors==module); modProbes = probes[inModule]; modTOM = TOM[inModule, inModule]; dimnames(modTOM) = list(modProbes, modProbes) vis = exportNetworkToVisANT(modTOM, file = paste("VisANTInput-", module, ".txt", sep=""), weighted = TRUE, threshold = 0, probeToGene = data.frame(annot$substanceBXH, annot$gene_symbol) ) nTop = 30; IMConn = softConnectivity(datExpr[, modProbes]); top = (rank(-IMConn) <= nTop) vis = exportNetworkToVisANT(modTOM[top, top], file = paste("VisANTInput-", module, "-top30.txt", sep=""), weighted = TRUE, threshold = 0, probeToGene = data.frame(annot$substanceBXH, annot$gene_symbol) ) TOM = TOMsimilarityFromExpr(datExpr, power = 6); # Read in the annotation file annot = read.csv(file = "GeneAnnotation.csv"); # Select modules modules = c("brown", "red"); # Select module probes probes = names(datExpr) inModule = is.finite(match(moduleColors, modules)); modProbes = probes[inModule]; modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)]; # Select the corresponding Topological Overlap modTOM = TOM[inModule, inModule]; dimnames(modTOM) = list(modProbes, modProbes) # Export the network into edge and node list files Cytoscape can read cyt = exportNetworkToCytoscape(modTOM, edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""), nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""), weighted = TRUE, threshold = 0.02, nodeNames = modProbes, altNodeNames = modGenes, nodeAttr = moduleColors[inModule]); #========================================================== library(Seurat) library(tidyverse) geneID <- readr::read_delim("~/UbuntuData/Rtools/Biomart/human_ ensembl_symbol.txt") ensg2symbol <- function(data) { # data <- con1 dat <- as.data.frame(as.matrix(data$readcount$exon$all)) dat <- dat[rowSums(dat>0)>3, colSums(dat>0)>500] dat <- dat %>% rownames_to_column(var = "Gene stable ID") %>% left_join(geneID, by = "Gene stable ID") %>% filter(!is.na(`Gene name`)) dat2 <- apply(dat[, !colnames(dat) %in%colnames(geneID)], 2, FUN = function(x) { tapply(x, dat$`Gene name`, FUN = median, simplify = TRUE) }) return(dat2) } con1 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/GSM3823939_control.s1.dgecounts.rds") con1 <- ensg2symbol(con1) Con1 <- CreateSeuratObject(counts = con1, project = "con1", min.cells = 3, min.features = 500) Con1$source<- "con1" Con1$condition <- "control" rm(con1) con2 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/GSM3823940_control.s2.dgecounts.rds") con2 <- ensg2symbol(con2) Con2 <- CreateSeuratObject(counts = con2, project = "con2", min.cells = 3, min.features = 500) Con2$source <- "con2" Con2$condition <- "control" rm(con2) con3 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/GSM3823941_control.s3.dgecounts.rds") con3 <- ensg2symbol(con3) Con3 <- CreateSeuratObject(counts = con3, project = "con3", min.cells = 3, min.features = 500) Con3$source <- "con3" Con3$condition <- "control" rm(con3) case1 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/GSM3823942_diabetes.s1.dgecounts.rds") case1 <- ensg2symbol(case1) Case1 <- CreateSeuratObject(counts = case1, project = "case1", min.cells = 3, min.features = 500) Case1$source <- "case1" Case1$condition <- "case" rm(case1) case2 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/GSM3823943_diabetes.s2.dgecounts.rds") case2 <- ensg2symbol(case2) Case2 <- CreateSeuratObject(counts = case2, project = "case2", min.cells = 3, min.features = 500) Case2$source <- "case2" Case2$condition <- "case" rm(case2) case3 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/GSM3823944_diabetes.s3.dgecounts.rds") case3 <- ensg2symbol(case3) Case3 <- CreateSeuratObject(counts = case3, project = "case3", min.cells = 3, min.features = 500) Case3$source <- "case3" Case3$condition <- "case" rm(case3) readr::write_rds(Con1, "~/UbuntuData/PROJECT/20230717_SC_du/Data/Con1.rds") readr::write_rds(Con2, "~/UbuntuData/PROJECT/20230717_SC_du/Data/Con2.rds") readr::write_rds(Con3, "~/UbuntuData/PROJECT/20230717_SC_du/Data/Con3.rds") readr::write_rds(Case1, "~/UbuntuData/PROJECT/20230717_SC_du/Data/Case1.rds") readr::write_rds(Case2, "~/UbuntuData/PROJECT/20230717_SC_du/Data/Case2.rds") readr::write_rds(Case3, "~/UbuntuData/PROJECT/20230717_SC_du/Data/Case3.rds") library(Seurat) library(tidyverse) Con1 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/Con1.rds") Con2 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/Con2.rds") Con3 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/Con3.rds") Case1 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/Case1.rds") Case2 <- readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/Case2.rds") Case3 <-readr::read_rds("~/UbuntuData/PROJECT/20230717_SC_du/Data/Case3.rds") ifnb.list <- list( con1 = Con1, con2 = Con2, con3 = Con3, case1 = Case1, case2 = Case2, case3 = Case3 ) dim(Con3) ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { x[["percent.mt"]] <- PercentageFeatureSet(x, pattern = "^MT-") x <- SCTransform(x, , vars.to.regress = "percent.mt", vst.flavor = "v2") %>% RunPCA(npcs = 30) }) features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 5000) ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features) anchors <- FindIntegrationAnchors( object.list = ifnb.list, normalization.method = "SCT", anchor.features = features ) combined_sct <- IntegrateData(anchorset = anchors, normalization.method = "SCT") qs::qsave(combined_sct, "~/UbuntuData/PROJECT/20230717_SC_du/Data/combind.qs") library(Seurat) library(tidyverse) Diabetic <- qs::qread("~/UbuntuData/PROJECT/20230717_SC_du/Data/combind.qs") DefaultAssay(Diabetic) Diabetic <- SCTransform(Diabetic, vars.to.regress = "percent.mt",variable.features.n = 5000, vst.flavor = "v2") %>% RunPCA(npcs = 30) %>% RunUMAP(reduction = "pca", dims = 1:30) %>% FindNeighbors(reduction = "pca", dims = 1:30) %>% FindClusters(resolution = seq(0,1.5,0.1)) library(patchwork) d1 <- DimPlot(Diabetic, raster = TRUE, group.by = "source") d2 <- DimPlot(Diabetic, raster = TRUE, group.by = "condition") d1 + d2 ggsave( filename = "01Source_Condition_detect.pdf", device = cairo_pdf, path = "~/UbuntuData/PROJECT/20230717_SC_du/Plot/", width = 40, height = 18, units = "cm", family = "arial" ) library(clustree) clustree(Diabetic) Idents(Diabetic) <- "SCT_snn_res.1.1" DimPlot(Diabetic, raster = TRUE, label = TRUE) # the best resolution is 1.1 PCT <- c("CUBN","LRP2","SLC34A1","SLC5A12","SLC5A2","ALDOB") CFH <- c("CFH") LOH <- "SLC12A1" DCT <- c("SLC12A3","SLC12A2") `DCT/CT` <- "SLC8A1" `CD-PC`<- "AQP2" `CD-ICA`<- c("AQP6", "KIT", "ATP6V0D2") `CD-ICB`<- "SLC26A4" PODO <- c("NPHS1","NPHS2") ENDO <- c("PECAM1","FLT1") MES <- c("ITGA8","PDGFRB") LEUK <- "PTPRC" marker.gene <- c(PCT,CFH,LOH,DCT,`DCT/CT`,`CD-PC`,`CD-ICA`,`CD-ICB`,PODO,ENDO,MES,LEUK) Idents(Diabetic) <- Diabetic$SCT_snn_res.1.1 features <- intersect(marker.gene, rownames(Diabetic@assays$SCT@scale.data)) ht <- DoHeatmap(Diabetic, features = features, raster = TRUE) ggsave( plot = ht, filename = "markerHeatmap.pdf", device = cairo_pdf, path = "~/UbuntuData/PROJECT/20230717_SC_du/Plot/", width = 50, height = 20, units = "cm", family = "arial" ) # library(patchwork) Diabetic$CellType_course <- as.character(Diabetic$SCT_snn_res.1.1) Diabetic$CellType_course[Diabetic$CellType_course %in% c(0,4,14)] <- "PCT" Diabetic$CellType_course[Diabetic$CellType_course %in% c(9,16,23)] <- "CFH" Diabetic$CellType_course[Diabetic$CellType_course %in% c(5,7,10,19,22)] <- "LOH" Diabetic$CellType_course[Diabetic$CellType_course %in% c(1,6,24)] <- "DCT" Diabetic$CellType_course[Diabetic$CellType_course %in% 3] <- "DCT/CT" Diabetic$CellType_course[Diabetic$CellType_course %in% c(2,11,18,20)] <- "CD-PC" Diabetic$CellType_course[Diabetic$CellType_course %in% c(8,13)] <- "CD-ICA" Diabetic$CellType_course[Diabetic$CellType_course %in% c(17)] <- "CD-ICB" Diabetic$CellType_course[Diabetic$CellType_course %in% 15] <- "PODO" Diabetic$CellType_course[Diabetic$CellType_course %in% 12] <- "ENDO" Diabetic$CellType_course[Diabetic$CellType_course %in% 21] <- "MES" Diabetic$CellType_course[Diabetic$CellType_course %in% 25] <- "LEUK" table(Diabetic$CellType_course) Diabetic$CellType_course <- factor(Diabetic$CellType_course, levels = c( "PCT","CFH","LOH","DCT","DCT/CT","CD-PC","CD-ICA","CD-ICB","PODO","ENDO","MES","LEUK")) d3 <- DimPlot(Diabetic , group.by = "CellType_course",raster = TRUE, label = TRUE) d4 <- DimPlot(Diabetic , group.by = "SCT_snn_res.1.1", raster = TRUE, label = TRUE) d3 + d4 ggsave( filename = "02Type_Cluster_detect.pdf", device = cairo_pdf, path = "~/UbuntuData/PROJECT/20230717_SC_du/Plot/", width = 40, height = 18, units = "cm", family = "arial" ) # Function DimPlot_my <- function(object = NULL, group.by = NULL, cols = NULL, title = NULL, raster = TRUE) { UMAP <- DimPlot( object = object, group.by = group.by, label = FALSE, reduction = "umap", cols = cols, raster = raster, pt.size = .5 ) + labs(title = title) + theme_test() + theme( axis.title = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(), legend.text = element_text(size = 8), legend.justification = c(1, 0) ) } DotPlot_my <- function(object = NULL, marker = NULLL, group.by = NULL, cols = NULL) { dotplot <- DotPlot(object = object, features = marker, group.by = group.by) + coord_flip() + scale_color_gradient( low = "grey90", high = "black", guide = guide_colorbar(ticks.colour = "white", frame.colour = "white") ) + scale_size(range = c(0, 4)) + theme_test() + theme( axis.title = element_blank(), axis.text.x.bottom = element_text( angle = 45, hjust = 1, vjust = 1, colour = cols, size = 8 ), legend.justification = c(1, 0), axis.text.y = element_text(size = 8), legend.title = element_text(size = 8), legend.text = element_text(size = 8) ) } unique(Diabetic$CellType_course) library(ggplot2) library(paletteer) CellType_course_color <- paletteer_d("ggsci::category20_d3")[c(1:12)] celltype_course_UMAP <- DimPlot_my( object = Diabetic, group.by = "CellType_course", cols = CellType_course_color, title = "landscape umap", raster = FALSE) celltype_course_UMAP ggsave( plot = celltype_course_UMAP, filename = "03Celltype_coures_UMAP.pdf", device = cairo_pdf, path = "~/UbuntuData/PROJECT/20230717_SC_du/Plot/", width = 12, height = 10, units = "cm", family = "arial" ) ### Dotplot PCT <- c("CUBN","SLC5A2","ALDOB") CFH <- c("CFH") LOH <- "SLC12A1" DCT <- c("SLC12A3","SLC12A2") `DCT/CT` <- "SLC8A1" `CD-PC` <- "AQP2" `CD-ICA` <- c("AQP6", "KIT", "ATP6V0D2") `CD-ICB` <- "SLC26A4" PODO <- c("NPHS1","NPHS2") ENDO <- c("PECAM1","FLT1") MES <- c("ITGA8","PDGFRB") LEUK <- "PTPRC" marker <- c(PCT,CFH,LOH,DCT,`DCT/CT`,`CD-PC`,`CD-ICA`,`CD-ICB`,PODO,ENDO,MES,LEUK) CellType_course_Dotplot <- DotPlot_my( object = Diabetic, marker = marker, group.by = "CellType_course", cols = CellType_course_color ) CellType_course_Dotplot ggsave( plot = CellType_course_Dotplot, filename = "04Celltype_coures_Dotplot.pdf", device = cairo_pdf, path = "~/UbuntuData/PROJECT/20230717_SC_du/Plot/", width = 15, height = 10, units = "cm", family = "arial" ) LEUK <- subset(Diabetic, CellType_course == "LEUK") LEUK <- SCTransform(LEUK, vars.to.regress = "percent.mt",variable.features.n = 5000, vst.flavor = "v2") %>% RunPCA(npcs = 30) %>% RunUMAP(reduction = "pca", dims = 1:30) %>% FindNeighbors(reduction = "pca", dims = 1:30) %>% FindClusters(resolution = seq(0,1.5,0.1)) clustree(LEUK) Idents(LEUK) <- "SCT_snn_res.1" DimPlot(LEUK,label = TRUE) T_cell <- c("CD247","CD96","CD28") Monocytes <- c("FCGR2A","CSF1R","CSF2RA","CD86") B_cell <- c("MS4A1","PAX5") Plasma <- c("CD38","SDC1") marker.gene <- c(T_cell, Monocytes, B_cell, Plasma) Idents(LEUK) <- LEUK$SCT_snn_res.1 features <- intersect(marker.gene, rownames(LEUK@assays$SCT@scale.data)) DoHeatmap(LEUK, features = features, raster = TRUE) LEUK$CellType <- as.character(LEUK$SCT_snn_res.1) LEUK$CellType[LEUK$CellType == 0] <- "T cells" LEUK$CellType[LEUK$CellType == 1] <- "Monocytes" LEUK$CellType[LEUK$CellType == 2] <- "B cells" LEUK$CellType[LEUK$CellType == 3] <- "Plasma" LEUK$CellType <- factor(LEUK$CellType, levels = c("T cells", "Monocytes", "B cells", "Plasma")) table(LEUK$CellType) LEUK_meta <- LEUK@meta.data gg <- ggplot(LEUK_meta, aes(x = CellType, fill = condition)) + geom_bar(stat = "count", position = "fill", width = .8) + scale_y_continuous(labels = scales::percent) + theme_test() + theme( axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.x = element_text(size = 8), axis.text.y = element_text(size = 8), legend.title = element_text(size = 8), legend.text = element_text(size = 8), legend.key.size = unit(.2, "cm") ) gg ggsave( plot = gg, filename = "05Immue_celltype_stack.pdf", device = cairo_pdf, path = "~/UbuntuData/PROJECT/20230717_SC_du/Plot/", width = 10, height = 10, units = "cm", family = "arial" ) wilcox_My <- function(dat = NULL, metadata = NULL, thread = 20) { dat <- as.data.frame(t(dat)) parallel::detectCores() cl <- parallel::makeCluster(thread) doParallel::registerDoParallel(cl) library(foreach) result <- foreach(cell = unique(metadata), .combine = rbind, .errorhandling = "pass") %do% { index <- as.character(metadata) index[index != cell] <- "other" # nolint Res <- foreach(gene = colnames(dat), .combine = rbind, .errorhandling = "pass") %dopar% { # gene <- colnames(dat)[1] cellMean <- mean(dat[[gene]][index == cell]) # nolint otherMean <- mean(dat[[gene]][index != cell]) # nolint log2FoldChange <- log2(cellMean+1) - log2(otherMean+1) pct.1 <- sum(dat[[gene]][index == cell] > 0) / sum(index == cell) pct.2 <- sum(dat[[gene]][index != cell] > 0) / sum(index != cell) test <- wilcox.test(dat[[gene]] ~ index, data = dat) res <- data.frame( cell = cell, gene = gene, mean.1 = cellMean, mean.2 = otherMean, pct.1 = pct.1, pct.2 = pct.2, lfc = log2FoldChange, p.value = test$p.value ) return(res) } Res$p.adj <- p.adjust(Res$p.value, method = "BH") return(Res) } parallel::stopCluster(cl) return(result) } library(ggsignif) Cell <- "MES" # PODO,ENDO,MES,PCT cell.object <- subset(Diabetic, CellType_course == Cell) DefaultAssay(cell.object) <- "RNA" cell.object <- NormalizeData(cell.object) %>% FindVariableFeatures(selection.method = "vst", nfeatures = nrow(cell.object)) %>% ScaleData(features = rownames(cell.object)) cell.expr <- as.data.frame(as.matrix(GetAssayData(cell.object, slot = "data"))) DEG <- wilcox_My(dat = cell.expr, metadata = cell.object$condition) readr::write_csv(DEG, paste0("~/UbuntuData/PROJECT/20230717_SC_du/Plot/",Cell,"_DEG.csv")) gene <- c("HSPA1A","IGF1","FOS","CETP","PCK1") gene.object <- cell.object[gene, ] gene.expr <- as.data.frame(t(as.matrix(GetAssayData(gene.object)))) %>% rownames_to_column(var = "cell") gene.meta <- cell.object@meta.data dat <- gene.meta %>% rownames_to_column(var = "cell") %>% select(cell, condition) %>% left_join(gene.expr, by = "cell") %>% pivot_longer(cols = -c(1:2), names_to = "gene") gg <- ggplot(dat, aes(x = condition, y = value)) + geom_violin(aes(fill = condition)) + geom_boxplot(outlier.size = 0, width = .1) + geom_signif(comparisons = list(c(1:2)), map_signif_level = TRUE,textsize = 8/.pt) + facet_wrap(~gene, nrow = 1, scales = "free") + theme_test() + theme( axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.x = element_text(size = 8), axis.text.y = element_text(size = 8), legend.title = element_text(size = 8), legend.text = element_text(size = 8), legend.key.size = unit(.2, "cm") ) gg ggsave( plot = gg, filename = paste0(Cell,"_gene_vln.pdf"), device = cairo_pdf, path = "~/UbuntuData/PROJECT/20230717_SC_du/Plot/", width = 2 + 8* length(unique(dat$gene)), height = 10, units = "cm", family = "arial" ) readr::write_csv(dat, paste0("~/UbuntuData/PROJECT/20230717_SC_du/Plot/",Cell,"_gene_vln.csv")) DefaultAssay(Diabetic) <- "RNA" Diabetic <- NormalizeData(Diabetic) %>% FindVariableFeatures(selection.method = "vst", nfeatures = nrow(Diabetic)) %>% ScaleData(features = rownames(Diabetic)) gene <- c("HSPA1A","IGF1","FOS","CETP","PCK1") gene.Diabetic <- Diabetic[gene, ] gene.expr <- as.data.frame(t(as.matrix(GetAssayData(gene.Diabetic)))) %>% rownames_to_column(var = "cell") gene.meta <- gene.Diabetic@meta.data dat <- gene.meta %>% rownames_to_column(var = "cell") %>% select(cell, CellType_course,condition) %>% left_join(gene.expr, by = "cell") readr::write_csv(dat, "~/UbuntuData/PROJECT/20230717_SC_du/Plot/gene_all.csv") #========================================================== exp <- read.csv("PCR.csv") colnames(exp)[1] <- "Sample" bartlett.test(exp$Value~exp$Sample_Group) exp1 <- exp[exp$Gene=="Hspa1a",] exp2 <- exp[exp$Gene=="Igf1",] exp3 <- exp[exp$Gene=="Pck1",] exp4 <- exp[exp$Gene=="Fos",] shapiro.test(exp1[which(exp$Sample_Group=="Control"),"Value"]) shapiro.test(exp1[which(exp$Sample_Group=="DKD"),"Value"]) shapiro.test(exp2[which(exp$Sample_Group=="Control"),"Value"]) shapiro.test(exp2[which(exp$Sample_Group=="DKD"),"Value"]) shapiro.test(exp3[which(exp$Sample_Group=="Control"),"Value"]) shapiro.test(exp3[which(exp$Sample_Group=="DKD"),"Value"]) shapiro.test(exp4[which(exp$Sample_Group=="Control"),"Value"]) shapiro.test(exp4[which(exp$Sample_Group=="DKD"),"Value"]) bartlett.test(exp1$Value~exp1$Sample_Group) bartlett.test(exp2$Value~exp2$Sample_Group) bartlett.test(exp3$Value~exp3$Sample_Group) bartlett.test(exp4$Value~exp4$Sample_Group) wilcox.test(exp1[which(exp1$Sample_Group=="Control"),"Value"],exp1[which(exp1$Sample_Group=="DKD"),"Value"]) t.test(exp2[which(exp2$Sample_Group=="Control"),"Value"],exp2[which(exp2$Sample_Group=="DKD"),"Value"],paired=F,var.equal=T) t.test(exp3[which(exp3$Sample_Group=="Control"),"Value"],exp3[which(exp3$Sample_Group=="DKD"),"Value"],paired=F,var.equal=T) t.test(exp4[which(exp4$Sample_Group=="Control"),"Value"],exp4[which(exp4$Sample_Group=="DKD"),"Value"],paired=F,var.equal=T) #Hspa1a library(ggpubr) library(ggplot2) datamin <- exp1 datamin$group <- datamin$Sample_Group library(ggpubr) library(ggplot2) datamin$group <- factor(datamin$Sample_Group,levels = c("Control","DKD")) a <- ggplot(datamin, aes(x = group, y = Value, fill = group,color=group)) + geom_boxplot(outlier.size = 1,width=0.3,outlier.colour = "grey",alpha=0.6) + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'grey'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5,size=12), axis.text.y = element_text(size=12), legend.text=element_text(size=12))+ labs(x = '', y = '2^-¦¤¦¤CT')+ scale_fill_manual(values = c("#3f60aa","#cc340c"))+ scale_color_manual(values = c("#3f60aa","#cc340c"))+ labs(title="Hspa1a")+ theme(plot.title = element_text(hjust = 0.5,size = 16)) p1 <- a+stat_compare_means(aes(group = group),method = "kruskal",label = "p.signif", label.x = 1.5, label.y = 1.9) p1 ggsave(p1,filename = "ͼ/Hspa1a_DKD.png",width =3.5 ,height=3.5,dpi = 250) # Pck1 datamin <- exp3 datamin$group <- datamin$Sample_Group datamin$group <- factor(datamin$Sample_Group,levels = c("Control","DKD")) a <- ggplot(datamin, aes(x = group, y = Value, fill = group,color=group)) + geom_boxplot(outlier.size = 1,width=0.3,outlier.colour = "grey",alpha=0.6) + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'grey'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5,size=12), axis.text.y = element_text(size=12), legend.text=element_text(size=12))+ labs(x = '', y = '2^-¦¤¦¤CT')+ scale_fill_manual(values = c("#3f60aa","#cc340c"))+ scale_color_manual(values = c("#3f60aa","#cc340c"))+ labs(title="Pck1")+ theme(plot.title = element_text(hjust = 0.5,size = 16)) p2 <- a+stat_compare_means(aes(group = group),method = "t.test",label = "p.signif", label.x = 1.5, label.y = 2.7) p2 ggsave(p2,filename = "ͼ/Pck1_DKD.png",width =3.5 ,height=3.5,dpi = 250) #Fos library(ggpubr) library(ggplot2) datamin <- exp4 datamin$group <- datamin$Sample_Group datamin$group <- factor(datamin$Sample_Group,levels = c("Control","DKD")) a <- ggplot(datamin, aes(x = group, y = Value, fill = group,color=group)) + geom_boxplot(outlier.size = 1,width=0.3,outlier.colour = "grey",alpha=0.6) + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'grey'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5,size=12), axis.text.y = element_text(size=12), legend.text=element_text(size=12))+ labs(x = '', y = '2^-¦¤¦¤CT')+ scale_fill_manual(values = c("#3f60aa","#cc340c"))+ scale_color_manual(values = c("#3f60aa","#cc340c"))+ labs(title="Fos")+ theme(plot.title = element_text(hjust = 0.5,size = 16)) p1 <- a+stat_compare_means(aes(group = group),method = "t.test",label = "p.signif", label.x = 1.5, label.y = 1.56) p1 ggsave(p1,filename = "ͼ/Fos_DKD.png",width =3.5 ,height=3.5,dpi = 250) # Igf1 datamin <- exp2 datamin$group <- datamin$Sample_Group datamin$group <- factor(datamin$Sample_Group,levels = c("Control","DKD")) a <- ggplot(datamin, aes(x = group, y = Value, fill = group,color=group)) + geom_boxplot(outlier.size = 1,width=0.3,outlier.colour = "grey",alpha=0.6) + theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'grey'), legend.title = element_blank(), legend.key = element_blank(), axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5,size=12), axis.text.y = element_text(size=12), legend.text=element_text(size=12))+ labs(x = '', y = '2^-¦¤¦¤CT')+ scale_fill_manual(values = c("#3f60aa","#cc340c"))+ scale_color_manual(values = c("#3f60aa","#cc340c"))+ labs(title="Igf1")+ theme(plot.title = element_text(hjust = 0.5,size = 16)) p2 <- a+stat_compare_means(aes(group = group),method = "t.test",label = "p.signif", label.x = 1.5, label.y = 1.3) p2 #==========================================================