install.packages("WGCNA") install.packages("stringr") install.packages(c("AnnotationDbi","impute","GO.db","preprocessCore")) install.packages("AnnotationDbi") if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("AnnotationDbi", version = "3.8") if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("impute", version = "3.8") if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("GO.db", version = "3.8") #set workdictionary setwd('D:\\Bioinformatics\\WGCNA\\WGCNA') # The following setting is important, do not omit. options(stringsAsFactors = FALSE); expro=read.csv('normalize.txt',sep = '\t',row.names = 1) dim(expro) m.vars=apply(expro,1,var) expro.upper=expro[which(m.vars>quantile(m.vars, probs = seq(0, 1, 0.25))[4]),] dim(expro.upper) write.table(expro.upper,file="geneInput_variancetop0.25.txt",sep='\t ',quote=F,row.names=T) datExpr0=as.data.frame(t(expro.upper)); library(WGCNA) gsg = goodSamplesGenes(datExpr0, verbose = 3); gsg$allOK #optional if (!gsg$allOK) { # Optionally, print the gene and sample names that were removed: if (sum(!gsg$goodGenes)>0) printFlush(paste("Removing genes:", paste(names(datExpr0)[! gsg$goodGenes], collapse = ", "))); if (sum(!gsg$goodSamples)>0) printFlush(paste("Removing samples:", paste(rownames(datExpr0)[! gsg$goodSamples], collapse = ", "))); # Remove the offending genes and samples from the data: datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes] } sampleTree = hclust(dist(datExpr0), method = "average"); # Plot the sample tree: Open a graphic output window of size 12 by 9 inches # The user should change the dimensions if the window is too large or too small. sizeGrWindow(12,9) #pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9); par(cex = 0.45); par(mar = c(0,4,2,0)) plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) # Plot a line to show the cut abline(h = 65, col = "red"); # Determine cluster under the line clust = cutreeStatic(sampleTree, cutHeight = 5050000, minSize = 10) table(clust) # clust 1 contains the samples we want to keep. keepSamples = (clust==1) datExpr = datExpr0[keepSamples, ] dim(datExpr0) dim(datExpr) nGenes = ncol(datExpr) nSamples = nrow(datExpr) # Re-cluster samples dim(datExpr) sampleTree2 = hclust(dist(datExpr), method = "average") sizeGrWindow(12,9) #pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9); par(cex = 0.45); par(mar = c(0,4,2,0)) pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9); plot(sampleTree2, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) library(WGCNA) # The following setting is important, do not omit. options(stringsAsFactors = FALSE); # Allow multi-threading within WGCNA. At present this call is necessary. # Any error here may be ignored but you may want to update WGCNA if you see one. # Caution: skip this line if you run RStudio or other third-party R environments. # See note above. enableWGCNAThreads(8) # Load the data saved in the first part lnames = load(file = "G-01-dataInput.RData"); #The variable lnames contains the names of loaded variables. lnames # Choose a set of soft-thresholding powers powers = c(c(1:10), seq(from = 12, to=20, by=2)) # Call the network topology analysis function sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5) # Plot the results: sizeGrWindow(9, 5) par(mfrow = c(1,2)); cex1 = 0.9; # Scale-free topology fit index as a function of the soft- thresholding power plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[, 2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence")); text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[, 2], labels=powers,cex=cex1,col="red"); # this line corresponds to using an R^2 cut-off of h abline(h=0.90,col="red") sft$powerEstimate Spower=sft$powerEstimate # Mean connectivity as a function of the soft-thresholding power plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity")) text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") ADJ1=abs(cor(datExpr,use="p"))^Spower # When you have relatively few genes (<5000) use the following code k=as.vector(apply(ADJ1,2,sum, na.rm=T)) # When you have a lot of genes use the following code k=softConnectivity(datE=datExpr,power=Spower) # Plot a histogram of k and a scale free topology plot sizeGrWindow(10,5) par(mfrow=c(1,2)) hist(k) scaleFreePlot(k, main="Check scale free topology\n") softPower = Spower; adjacency = adjacency(datExpr, power = softPower) TOM = TOMsimilarity(adjacency); dissTOM = 1-TOM geneTree = hclust(as.dist(dissTOM), method = "average"); # Plot the resulting clustering tree (dendrogram) sizeGrWindow(12,12) plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity", labels = FALSE, hang = 0.04); minModuleSize = 30; # Module identification using dynamic tree cut: dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, deepSplit = 2, pamRespectsDendro =FALSE, minClusterSize = minModuleSize); table(dynamicMods) dynamicColors = labels2colors(dynamicMods) table(dynamicColors) sizeGrWindow(8,12) plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "Gene dendrogram and module colors") MEList = moduleEigengenes(datExpr, colors = dynamicColors) MEs = MEList$eigengenes MEDiss = 1-cor(MEs); # Cluster module eigengenes METree = hclust(as.dist(MEDiss), method = "average"); # Plot the result sizeGrWindow(7, 6) plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "") MEDissThres = 0.25 #abline=0.25 abline(h=MEDissThres, col = "red") merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3) # The merged module colors mergedColors = merge$colors; # Eigengenes of the new merged modules: mergedMEs = merge$newMEs; sizeGrWindow(12, 9) #pdf(file = "Plots/geneDendro-3.pdf", wi = 9, he = 6) plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05) #dev.off() # Rename to moduleColors moduleColors = mergedColors colorOrder = c("grey", standardColors(50)); moduleLabels = match(moduleColors, colorOrder)-1; MEs = mergedMEs; # Save module colors and labels for use in subsequent parts save(MEs, moduleLabels, moduleColors, geneTree, file = "G-02-networkConstruction-StepByStep.RData") # Select module table(mergedColors) module = "yellow"; #manually enter colors of each module and then run the rest of the code every time you change the module color # Select module probes probes = names(datExpr) inModule = (moduleColors==module); modProbes = probes[inModule]; IMConn = softConnectivity(datExpr[, modProbes],power=Spower); dat1=datExpr[inModule] datExp_IMConn <-data.frame(IMConn,t(dat1)) datExp_IMConn=data.frame(datExp_IMConn) write.table(datExp_IMConn, file =paste("Intramodule_connectivity-",module," .txt"),sep='\t') nTop = 1000; top = (rank(-IMConn) <= nTop) dat2=t(datExp_IMConn) dat2<-data.frame(dat2) dat3<-dat2[top] dat3<-t(dat3) dat3<-data.frame(dat3) write.table(dat3, file = paste("Intramodule_connectivity-",module,"- top1000.txt"),sep='\t') # Calculate topological overlap anew: this could be done more efficiently by saving the TOM # calculated during module detection, but let us do it again here. dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = Spower); # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap plotTOM = dissTOM^Spower; # Set diagonal to NA for a nicer plot diag(plotTOM) = NA; # Call the plot function sizeGrWindow(9,9) TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes") setwd('D:\\Bioinformatics\\WGCNA\\WGCNA'); # Load the WGCNA package library(WGCNA) # The following setting is important, do not omit. options(stringsAsFactors = FALSE); # Load the expression and trait data saved in the first part lnames = load(file = "GBM-01-dataInput.RData"); lnames = load(file = "G-01-dataInput.RData"); #The variable lnames contains the names of loaded variables. lnames # Load network data saved in the second part. lnames = load(file = "GBM-02-networkConstruction-StepByStep.RData"); lnames = load(file = "G-02-networkConstruction-StepByStep.RData"); lnames # Recalculate topological overlap if needed TOM = TOMsimilarityFromExpr(datExpr, power = Spower); # Select mbodules modules = c("turquoise"); # Select module probes probes = names(datExpr) inModule = is.finite(match(moduleColors, modules)); modProbes = probes[inModule]; modGenes = modProbes; # Select the corresponding Topological Overlap modTOM = TOM[inModule, inModule]; dimnames(modTOM) = list(modProbes, modProbes) nTop = 1000; IMConn = softConnectivity(datExpr[, modProbes]); top = (rank(-IMConn) <= nTop) # Export the network into edge and node list files Cytoscape can read cyt = exportNetworkToCytoscape(modTOM[top, top], edgeFile = paste("CytoscapeInput- edges-", paste(modules, collapse="-"), ".txt", sep=""), nodeFile = paste("CytoscapeInput- nodes-", paste(modules, collapse="-"), ".txt", sep=""), weighted = TRUE, threshold = 0.02, nodeNames = modProbes, altNodeNames = modGenes, nodeAttr = moduleColors[inModule])