rm(list = ls()) options(stringsAsFactors = F) library(stringr) library(GEOquery) library(AnnoProbe) library(limma) library(ggplot2) library(tinyplanet) library(dplyr) library(WGCNA) library(data.table) library(tidyverse) library(readxl) exp = read.table(file = "data/GSE137943.txt") exp[1:4,1:4] png(filename = "1-raw-sample-stat-boxplot.png") par(mai = c(1.3,1,0.1,1) ) boxplot(exp,las=2) dev.off() pd <- read.table(file = "trait-2.txt",row.names = 1, sep = "\t", header = T) pd <- data.frame(pd) head(pd) exp = exp[,colnames(exp) %in% rownames(pd)] p = identical(rownames(pd),colnames(exp));p exp=log2(exp+1) library(limma) exp=normalizeBetweenArrays(exp) png(filename = "2-normal-sample-stat-boxplot.png") par(mai = c(1.3,1,0.1,1) ) boxplot(exp,las=2) dev.off() names(pd) datTraits = pd gsg = goodSamplesGenes(exp, verbose = 3) gsg$allOK;table(!is.na(exp)) datExpr0 <- exp datExpr0 <- t(datExpr0[order(apply(datExpr0,1,mad), decreasing = T)[1:8000],]) dim(datExpr0) library(data.table) ids <- read.table('data/Bos_taurus_ARS-UCD1_2_101.txt',na.strings = "",sep = " ") ids[1:4,1:4] ids=ids[,c(6,7)] head(ids) colnames(ids)=c('probe_id','symbol') ids = na.omit(ids) ids$probe_id=as.character(ids$probe_id) ids=ids[ids$probe_id %in% colnames(datExpr0),] datExpr0 = t(datExpr0) datExpr0[1:4,1:4] datExpr0=datExpr0[ids$probe_id,] ids$median=apply(datExpr0,1,median) ids=ids[order(ids$symbol,ids$median,decreasing = T),] ids=ids[!duplicated(ids$symbol),] datExpr0=datExpr0[ids$probe_id,] rownames(datExpr0)=ids$symbol datExpr0[1:4,1:4] datExpr0 = t(datExpr0) sampleTree = hclust(dist(datExpr0), method = "average") plot(sampleTree) # Plot the sample tree: Open a graphic output window of size 12 by 9 inches # The user should change the dimensions if the window is too large or too small. dir.create('output') sizeGrWindow(12,9) pdf(file = "./output/01_sampleTree.pdf", width = 12, height = 9); par(cex = 0.6); par(mar = c(0,4,2,0)) plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) dev.off() datExpr=datExpr0 nGenes = colnames(datExpr) nSamples = rownames(datExpr) collectGarbage() femaleSamples = rownames(datExpr) traitRows = match(femaleSamples, rownames(datTraits)) datTraits = datTraits[traitRows,] # Re-cluster samples sampleTree2 = hclust(dist(datExpr), method = "average") # Convert traits to a color representation: white means low, red means high, grey means missing entry datTraits = pd traitColors = numbers2colors(datTraits[,3], signed = FALSE,colors = c("#FF5252", "#8BC34A", "#FF0000", "#FF00FF", "#8600FF", "#4A4AFF", "#00FFFF", "#28FF28") ) # Plot the sample dendrogram and the colors underneath. pdf(file = "output/011_sampleClustering_addTraits.pdf", width = 14, height = 9); plotDendroAndColors(sampleTree2, traitColors, groupLabels = names(datTraits), main = "Sample dendrogram and trait heatmap") dev.off() dir.create('raw_Rdata1') save(datExpr,exp, ids, datTraits,pd,file = "./raw_Rdata1/01_data.Rdata") ##################################################################################################### rm(list=ls()) # The following setting is important, do not omit. options(stringsAsFactors = FALSE) # Load the WGCNA package library(WGCNA) load("./raw_Rdata/01_data.Rdata") # Choose a set of soft-thresholding powers powers = c(c(1:10), seq(from = 12, to=30, by=2)) # Call the network topology analysis function sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5) po <- sft$powerEstimate;po pdf('./output/02_powers.pdf',width = 9,height =5) par(mfrow = c(1,2)) cex1 = 0.9 # Scale-free topology fit index as a function of the soft-thresholding power plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)", ylab="Scale Free Topology Model Fit,signed R^2", type="n", main = paste("Scale independence")); text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers, cex=cex1,col="red"); # this line corresponds to using an R^2 cut-off of h abline(h=0.8,col="red") # Mean connectivity as a function of the soft-thresholding power plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)", ylab="Mean Connectivity", type="n", main = paste("Mean connectivity")) text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") dev.off() ########################## cor <- WGCNA::cor net = blockwiseModules(datExpr, power = 8, TOMType = "unsigned", minModuleSize = 30, reassignThreshold = 0, mergeCutHeight = 0.3, numericLabels = T, pamRespectsDendro = FALSE, saveTOMs = TRUE, saveTOMFileBase = "Bos_taurus_TOM", verbose = 3) ??blockwiseModules cor<-stats::cor class(net) names(net) table(net$colors) moduleLabels = net$colors moduleColors = labels2colors(net$colors) table(moduleColors) MEs = net$MEs library(ggplot2) pd = read.table("trait-2.txt",row.names = 1, sep = "\t", header = T) MEs$groups = pd$sample_title library(ggplot2) p = ggplot(data = MEs) + geom_bar(mapping = aes(x = rownames(MEs), y = ME4, color = groups,fill = groups), stat = "identity")+ theme_bw()+ theme(axis.text.x = element_blank())+ labs(x = " ", y = "MEmagenta eigengene" ) tiff('./output/MEmagenta eigengene.tiff',width = 8,height =4,units = "in",res = 300,pointsize = 3 ) print(p) dev.off() geneTree = net$dendrograms[[1]] pdf('./output/03_networkmodule.pdf',width = 9,height =5) par(mfrow = c(1,2)) cex1 = 0.9 # Plot the dendrogram and the module colors underneath plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]], "Module colors", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05) dev.off() save(MEs, moduleLabels, moduleColors, po, geneTree, file = "raw_Rdata1/02_networkConstruction.RData") table(moduleColors) #################################################################################################### rm(list=ls()) # Load the WGCNA package library(WGCNA) # The following setting is important, do not omit. options(stringsAsFactors = FALSE) #0.导入数据---- load(file = "./raw_Rdata1/01_data.Rdata") load(file = "./raw_Rdata1/02_networkConstruction.RData") nGenes = ncol(datExpr) nSamples = nrow(datExpr) # Calculate topological overlap anew: this could be done more efficiently by saving the TOM # calculated during module detection, but let us do it again here. dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = 8) # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap plotTOM = dissTOM^7; plotTOM[1:6,1:6] # Set diagonal to NA for a nicer plot diag(plotTOM) = NA; # Call the plot function sizeGrWindow(9,9) #TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes") nSelect = 500 # For reproducibility, we set the random seed set.seed(10); select = sample(nGenes, size = nSelect); selectTOM = dissTOM[select, select]; dim(selectTOM) # There's no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster. selectTree = hclust(as.dist(selectTOM), method = "average") selectColors = moduleColors[select]; # Open a graphical window sizeGrWindow(9,9) pdf('./output/04_TOM.pdf',width = 9,height = 9) par(mfrow = c(1,2)) cex1 = 0.9 # Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing # the color palette; setting the diagonal to NA also improves the clarity of the plot plotDiss = selectTOM^7; diag(plotDiss) = NA; TOMplot(plotDiss, selectTree, selectColors, #main = "Network heatmap plot, selected genes", col=gplots::colorpanel(250,'red',"orange",'lemonchiffon')) dev.off() # Recalculate module eigengenes MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes pdf('./output/04_Eigengene.pdf',width = 6 ,height =5) par(mfrow = c(1,2)) cex1 = 0.9 par(cex = 0.6) plotEigengeneNetworks(MEs, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle= 90) dev.off() # Plot the dendrogram sizeGrWindow(6,6); pdf('./output/04_Eigengene_2.pdf',width = 5 ,height =5) par(cex = 0.8) plotEigengeneNetworks(MEs, "Eigengene dendrogram", marDendro = c(0,4,2,0), plotHeatmaps = FALSE) # Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot) par(cex = 0.8) plotEigengeneNetworks(MEs, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2), plotDendrograms = FALSE, xLabelsAngle = 90) dev.off() head(datTraits) rownames(datTraits) <- rownames(pd) names(datTraits) # Add the weight to existing module eigengenes MET = orderMEs(cbind(MEs, datTraits)) # Plot the relationships among the eigengenes and the trait #sizeGrWindow(7,7); pdf('./output/04_Eigengene_3.pdf',width = 5 ,height =5) par(mai = c(3,3,0.1,0.1) ) plotEigengeneNetworks(MET, "", marDendro = c(1,9,1,3), marHeatmap = c(9,9,0,1), cex.lab = 0.8, xLabelsAngle= 90) dev.off() #################################################################################################### rm(list=ls()) options(stringsAsFactors = FALSE) library(WGCNA) load("./raw_Rdata1/01_data.Rdata") load("./raw_Rdata1/02_networkConstruction.RData") #1.Modules-traits relationships---- # Define numbers of genes and samples nGenes = ncol(datExpr) nSamples = nrow(datExpr) # Recalculate MEs with color labels MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes MEs0[1:6,1:6] MEs = orderMEs(MEs0) MEs[1:6,1:6] moduleTraitCor = cor(MEs, datTraits, use = "p"); moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples); sizeGrWindow(10,6) # Will display correlations and their p-values textMatrix = paste(signif(moduleTraitCor, 2),"\n(",signif(moduleTraitPvalue, 1),")", sep = ""); dim(textMatrix) = dim(moduleTraitCor) textMatrix[1:6,1:2] tiff('./output/05_correlation.tiff',width = 10,height =12,units = "in",res = 300,pointsize = 3 ) par(cex=3) par(mai = c(0.1,0.1,0.1,0.1)) par(mar = c(9,10, 3, 5)) # Display the correlation values within a heatmap plot names(datTraits) labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels = names(MEs), ySymbols = names(MEs), colorLabels = FALSE, colors = blueWhiteRed(50), textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 1.2, zlim = c(-1,1), main = paste("Module-trait relationships")) dev.off() #3.Module membership---- # names (colors) of the modules modNames = substring(names(MEs), 3) geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p")); MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples)); names(geneModuleMembership) = paste("MM", modNames, sep="") names(MMPvalue) = paste("p.MM", modNames, sep="") # Define variable weight containing the weight column of datTrait i = 6 h = 3 gp <- as.data.frame(datTraits[,i]) names(gp) = colnames(datTraits)[i] geneTraitSignificance = as.data.frame(cor(datExpr, gp, use = "p")) GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples)) names(geneTraitSignificance) = paste("GS.", names(gp), sep="") names(GSPvalue) = paste("p.GS.", names(gp), sep="") module = modNames[h] column = match(module, modNames); moduleGenes = moduleColors==module; #sizeGrWindow(7, 7) tiff(file = paste0("./output/",module,"_",names(gp),".tiff"),width = 6,height =5,units = "in",res = 300,pointsize = 3 ) par(cex = 3) par(mar = c(5,6,3,3) ) verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]), abs(geneTraitSignificance[moduleGenes, 1]), xlab = paste("Module Membership in", module, "module"), ylab = "Gene significance for Control", main = paste("Module membership vs. gene significance\n"), cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module) abline(h=0.8, v = 0.8, col="black", lty=2) dev.off() target_gene <- data.frame(geneTraitSignificance$GS.Muscle, geneModuleMembership$MMmagenta) rownames(target_gene) <- rownames(geneTraitSignificance) hub_gene <- target_gene[abs(target_gene[1]) > 0.79 & abs(target_gene[2]) > 0.8,] library(ComplexHeatmap) library(ggplot2) module = modNames[h] load("./raw_Rdata1/01_data.Rdata") dat <- t(datExpr) target_exp <- datExpr[,colnames(datExpr) %in% rownames(target_gene)] target_exp[1:4,1:4] hub_exp <- datExpr[,colnames(datExpr) %in% rownames(hub_gene)] hub_cor <- cor(hub_exp) Heatmap(hub_cor) library(corrplot) library(paletteer) my_color = rev(paletteer_d("RColorBrewer::RdYlBu")) my_color = colorRampPalette(my_color)(10) M = cor(hub_exp) testRes = cor.mtest(hub_exp, conf.level = 0.95) tiff('./output/heatmap-muscle-60.tiff',width = 20,height =15,units = "in",res = 300,pointsize = 3 ) par(cex = 4) corrplot(M, order = 'AOE', method = "square", type = 'upper', tl.pos = 'd', p.mat = testRes$p, insig = "label_sig", pch.cex = .9, tl.col="black", col = my_color) dev.off() corrplot(M, col = my_color, add = TRUE, type = 'lower', method = 'number', order = 'AOE', diag = FALSE, tl.pos = 'n', cl.pos = 'n') dev.off() library(corrplot) M<-cor(hub_exp) library(paletteer) p.mat <- cor.mtest(hub_exp)$p tiff('./output/muscle__heatmap_60.tiff',width = 10,height =12,units = "in",res = 600,pointsize = 3 ) par(cex = 1.5) col <- rev(colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))(200)) corrplot(M, method="color", #tl.pos = "d", col=col, type="lower", order="hclust", addCoef.col = "black", tl.col="black", tl.srt=45, p.mat = p.mat, sig.level = 0.05, insig = "blank", diag=FALSE ) dev.off() #?heatmap genes_all <- data.frame(module=moduleColors, genes=colnames(datExpr)) head(genes_all) write.csv(genes_all,file = './output/genes-all-modules.csv') module = modNames[h] genes_go <- genes_all[genes_all$module == module,] write.csv(genes_go, file = paste0("./output/",module,"_muscle_genes.csv")) write.csv(rownames(hub_gene), file = paste0("./output/",module,"_",names(datTraits)[i],"_hub_genes_20.csv")) length(modNames) save(MEs,geneModuleMembership, geneTraitSignificance, geneModuleMembership, target_exp, target_gene, hub_gene, hub_exp, file = "./raw_Rdata1/03_Modules-fat-traits.Rdata")