library(WGCNA) library(RColorBrewer) options(stringsAsFactors=FALSE) allowWGCNAThreads() femData <- read.csv("genes.fpkm.anno.csv",header=T,sep=",",check.names=F) dim( femData ) head(femData[,1:25]) femData <- femData[femData$Symbol!="-",] datExpr0 <- femData[,-c(1,20:25)] rownames(datExpr0) <- femData$gene_id datExpr0 <- t(datExpr0) head(datExpr0[,1:25]) gsg <- goodSamplesGenes(datExpr0) gsg$allOK dim(datExpr0) sampleTree <- hclust(dist(datExpr0),method="average") pdf("01.samples_cluster_tree.pdf",width=25,height=8) par(mar=c(0,4,2,0),cex=2) plot(sampleTree,main="Sample clustering to detect outliers",xaxt='n',sub="10",xlab="5",lwd=3) cutHeight <- 3000#According to my needs abline(h=cutHeight,col="red") dev.off() dim(datExpr0) datExpr <- datExpr0[-9,]#Which row to delete according to the sample to be deleted dim(datExpr) head(datExpr[,1:5]) text=datExpr write.table(text,file="01.datExpr.xls",quote=F,sep="\t",row.names=T) #Filter the appropriate phenotype data in the phenotype data, and only keep the filtered data of the sample traitData <- read.csv("ClinicalTraits.csv",header=T,sep=",",check.names=F) head(traitData) allTraits <- traitData[,c(3:9)] #allTraits <- traitData rownames(allTraits) <- traitData[,2] femaleSamples <- rownames(datExpr) datTraits <- allTraits[femaleSamples,] #Visualize the data sampleTree2 <- hclust(dist(datExpr),method="average") traitColors <- numbers2colors(datTraits,colors=blueWhiteRed(50)) pdf("01.cluster_trait.pdf",width=25,height=10) plotDendroAndColors(sampleTree2,traitColors,groupLabels=names(datTraits),main="Sample dendrogram and trait heatmap",cex.main=3,cex.lab=2,cex.axis=2,cex.dendroLabels = 2,cex.rowText = 2, cex.colorLabels = 2,guideCount = 100) dev.off() #02 Screening soft threshold β #Call the pickSoftThreshold function #Note: Need to specify the network type or adjacency function type #Automatically provide the number of available soft thresholds sft$powerEstimate powers <- c(c(1:10),seq(from=12,to=30,by=2)) networkType <- "signed" sft <- pickSoftThreshold(datExpr,powerVector=powers,networkType="signed",verbose=5) sft$powerEstimate write.table(sft,file="02.Soft Threshold(power).xls",quote=F,sep="\t",row.names=F) pdf(file="02.soft_threshold.pdf",width=8,height=4.5) par(mfrow=c(1,2)) cex1 <- 0.9 plot(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft threshold (power)", ylab="Scale free topology model fit, signed R^2", type="n", main=paste("Scale independence")) text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=cex1,col="red") abline(h=0.8870,col="red") plot(sft$fitIndices[,1],sft$fitIndices[,5], xlab="Soft threshold (power)",ylab="Mean connectivity", type="n",main=paste("Mean connectivity")) text(sft$fitIndices[,1],sft$fitIndices[,5],labels=powers,cex=cex1,col="red") dev.off() #03 Complete co-expression network and module identification in one step #blockwiseModules function softPower <- 10 networkType <- "signed" net <- blockwiseModules(datExpr,power=softPower,networkType=networkType,numericLabels=TRUE,mergeCutHeight=0.25,minModuleSize=50,maxBlockSize=30000,saveTOMs=TRUE,saveTOMFileBase="WGCNA_TOM",verbose=5) table(net$colors) #Visualize according to the node #The minimum number of nodes is set to 30 moduleLabels <- net$colors moduleColors <- labels2colors(moduleLabels) pdf(file="03.auto_modules_color.pdf",width=12,height=4) plotDendroAndColors(net$dendrograms[[1]],moduleColors[net$blockGenes[[1]]],"Module colors",dendroLabels=FALSE,addGuide=TRUE) dev.off() save(datExpr,datExpr0,sft,softPower,networkType,net,moduleColors,file="WGCNA_2.RData") #05ME (Module Eigengenes) calculation #That is, the numerical value of the number expressed by the calculation module in each sample MEs0 <- moduleEigengenes(datExpr[,net$goodGenes],moduleColors[net$blockGenes[[1]]])$eigengenes MEs <- orderMEs(MEs0) rownames(MEs) <- rownames(datExpr[,net$goodGenes]) text <- cbind(rownames(MEs),MEs) colnames(text)[1] <- "samples" write.table(text,file="05.module_eigengenes.xls",quote=F,sep="\t",row.names=F) #Drawing module hierarchical clustering tree based on ME names(MEs) <- substring(names(MEs),3) MEDiss <- 1-cor(MEs) METree <- hclust(as.dist(MEDiss),method="average") pdf(file="05.modules_cluster_tree.pdf",width=7,height=5) plot(METree,main="Clustering of module eigengenes",xlab="",sub="") dev.off() #Draw module ME and node gene expression heat map dir.create("05.expression_ME") for(i in 1:(ncol(MEs)-1)){ which.module <- labels2colors(i) dir <- "05.expression_ME/" pdf(file=paste(dir,"05.expression_ME_",which.module,".pdf",sep=""),25,10) ME <- MEs[,which.module] ME <- t(as.matrix(MEs[,which.module])) colnames(ME) <- rownames(datExpr[,net$goodGenes]) layout(matrix(c(1,2)),heights=c(1.5,3)) par(mar=c(0.3,9,3,5),cex=2) plotMat(t(scale(datExpr[,net$goodGenes][,moduleColors[net$blockGenes[[1]]]==which.module])),nrgcols=30,rlabels=F,rcols=which.module,main=paste(which.module),cex.main=1) par(mar=c(8,4,0,0),cex=2) barplot(ME,col=which.module,main="",cex.names=1,cex.axis=1,ylab="module eigengenes",las=3) dev.off() } #06Associated module and other information (sample attributes/characters) #Associate the module with the sample sample_cor <- cor(t(datExpr[,net$goodGenes]),t(datExpr[,net$goodGenes]),use='pairwise.complete.obs') moduleSampleCor <- cor(MEs,sample_cor,use="p") nSamples <- nrow(datExpr[,net$goodGenes]) moduleSamplePvalue <- corPvalueStudent(moduleSampleCor,nSamples) textMatrix <- paste(signif(moduleSampleCor,2),"\n(",signif(moduleSamplePvalue,1),")",sep="") dim(textMatrix) <- dim(moduleSampleCor) rowLabels <- paste("ME",names(MEs),sep="") #Due to too many samples, the following drawing does not use the textMatrix function pdf(file="06.modules_samples_relationship.pdf",22,6) par(mar=c(7,9,2,2)) labeledHeatmap(Matrix=moduleSampleCor,xLabels=colnames(sample_cor),yLabels=rowLabels,ySymbols=names(MEs),colorLabels=TRUE,colors=blueWhiteRed(50),setStdMargins=FALSE,xLabelsAngle=90,zlim=c(-1,1)) dev.off() #Save module and sample correlation information text <- paste("cor=",round(moduleSampleCor,4),";p-value=",round(moduleSamplePvalue,4),sep="") dim(text) <- dim(moduleSampleCor) rownames(text) <- rownames(moduleSampleCor) colnames(text) <- colnames(moduleSampleCor) text <- cbind(rownames(text),text) colnames(text)[1] <- "modules" write.table(text,file="06.modules_samples_relationships.xls",quote=F,sep="\t",row.names=F) #Associate module with phenotype moduleTraitCor <- cor(MEs,datTraits,use="p") nSamples <- nrow(datExpr[,net$goodGenes]) moduleTraitPvalue <- corPvalueStudent(moduleTraitCor,nSamples) textMatrix <- paste(signif(moduleTraitCor,2), "\n(",signif(moduleTraitPvalue,1),")",sep="") dim(textMatrix) <- dim(moduleTraitCor) rowLabels <- paste("ME",names(MEs),sep="") pdf(file="06.modules_traits_relationships.pdf",15,8) par(mar=c(7,12,1,2),cex=0.75) labeledHeatmap(Matrix=moduleTraitCor, textMatrix=textMatrix, xLabels=colnames(datTraits), yLabels=rowLabels,ySymbols=names(MEs), colorLabels=TRUE,colors=blueWhiteRed(50), setStdMargins=FALSE,xLabelsAngle=90,zlim=c(-1,1)) dev.off() #Save module and phenotype correlation information text <- paste("cor=",round(moduleTraitCor,4), ";p-value=",round(moduleTraitPvalue,4),sep="") dim(text) <- dim(moduleTraitCor) rownames(text) <- rownames(moduleTraitCor) colnames(text) <- colnames(moduleTraitCor) text <- cbind(rownames(text),text) colnames(text)[1] <- "modules" write.table(text,file="06.modules_traits_relationships.xls", quote=F,sep="\t",row.names=F) #07Associated nodes and other information #Associate the node gene with the sample modNames <- names(MEs) geneModuleMembership <- cor(datExpr[,net$goodGenes],MEs,use="p") nSamples <- nrow(datExpr[,net$goodGenes]) MMPvalue <- corPvalueStudent(geneModuleMembership,nSamples) colnames(geneModuleMembership) <- paste("MM",modNames,sep="") colnames(MMPvalue) <- paste("p.MM",modNames,sep="") text <- paste("cor=",round(geneModuleMembership,4), ";p-value=",round(MMPvalue,4),sep="") dim(text) <- dim(geneModuleMembership) rownames(text) <- rownames(geneModuleMembership) colnames(text) <- colnames(geneModuleMembership) text <- cbind(rownames(text),text) colnames(text)[1] <- "modules" write.table(text,file="07.genes_module_membership.xls", quote=F,sep="\t",row.names=F) #Associate node genes with phenotypes (such as UR) modNames <- names(MEs) UR <- as.data.frame(datTraits$UR) names(UR) <- "UR" geneTraitSignificance <- as.data.frame(cor(datExpr[,net$goodGenes],UR,use="p")) GSPvalue <- as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance),nSamples)) names(geneTraitSignificance) <- paste("GS.",names(UR),sep="") names(GSPvalue) <- paste("p.GS.",names(UR),sep="") head(geneTraitSignificance) head(GSPvalue) #Draw the correlation diagram of each module node gene and phenotype one by one #X-axis direction shows the result of the association between the node and the ME #Y-axis shows the results of the association between the node and the phenotype dir.create("07.MM_vs_UR") for(i in 1:(ncol(MEs)-1)) { which.module <- labels2colors(i) column <- match(which.module,modNames) moduleGenes <- moduleColors[net$blockGenes[[1]]]==which.module dir <- "07.MM_vs_UR/" pdf(file=paste(dir,"07.",which.module,"_MM_vs_UR.pdf",sep=""),6,6) verboseScatterplot(geneModuleMembership[moduleGenes,column], geneTraitSignificance[moduleGenes,1], xlab=paste("Module membership (MM) in",which.module,"module"), ylab="Gene significance for UR", main=paste("Module membership vs. gene significance\n"), col=which.module) dev.off() } #Save the results of the correlation between node genes and phenotypes text <- cbind(geneTraitSignificance,GSPvalue) text <- cbind(rownames(text),text) colnames(text)[1] <- "genes" write.table(text,file="07.genes_trait_significance.xls", quote=F,sep="\t",row.names=F) save(datExpr,sft,softPower,networkType,net,moduleColors,MEs, moduleSampleCor,moduleSamplePvalue, moduleTraitCor,moduleTraitPvalue, geneModuleMembership,MMPvalue, file="WGCNA_2.RData") dir.create("07.MM_vs_G1-48") for(i in 1:(ncol(MEs)-1)) { which.module <- labels2colors(i) column <- match(which.module,modNames) moduleGenes <- moduleColors[net$blockGenes[[1]]]==which.module dir <- "07.MM_vs_G1-48/" pdf(file=paste(dir,"07.",which.module,"_MM_vs_G1-48.pdf",sep=""),6,6) verboseScatterplot(geneModuleMembership[moduleGenes,column], geneTraitSignificance[moduleGenes,1], xlab=paste("Module membership (MM) in",which.module,"module"), ylab="Gene significance for G1-48", main=paste("Module membership vs. gene significance\n"), col=which.module) dev.off() } text <- cbind(geneTraitSignificance,GSPvalue) text <- cbind(rownames(text),text) colnames(text)[1] <- "genes" write.table(text,file="07.genes_trait_significance G1-48.xls", quote=F,sep="\t",row.names=F) save(datExpr,sft,softPower,networkType,net,moduleColors,MEs, moduleSampleCor,moduleSamplePvalue, moduleTraitCor,moduleTraitPvalue, geneModuleMembership,MMPvalue, file="WGCNA_2.RData") dir.create("07.MM_vs_G2-48") for(i in 1:(ncol(MEs)-1)) { which.module <- labels2colors(i) column <- match(which.module,modNames) moduleGenes <- moduleColors[net$blockGenes[[1]]]==which.module dir <- "07.MM_vs_G2-48/" pdf(file=paste(dir,"07.",which.module,"_MM_vs_G2-48.pdf",sep=""),6,6) verboseScatterplot(geneModuleMembership[moduleGenes,column], geneTraitSignificance[moduleGenes,1], xlab=paste("Module membership (MM) in",which.module,"module"), ylab="Gene significance for G2-48", main=paste("Module membership vs. gene significance\n"), col=which.module) dev.off() } text <- cbind(geneTraitSignificance,GSPvalue) text <- cbind(rownames(text),text) colnames(text)[1] <- "genes" write.table(text,file="07.genes_trait_significance G2-48.xls", quote=F,sep="\t",row.names=F) save(datExpr,sft,softPower,networkType,net,moduleColors,MEs, moduleSampleCor,moduleSamplePvalue, moduleTraitCor,moduleTraitPvalue, geneModuleMembership,MMPvalue, file="WGCNA_2.RData") dir.create("07.MM_vs_S-48") for(i in 1:(ncol(MEs)-1)) { which.module <- labels2colors(i) column <- match(which.module,modNames) moduleGenes <- moduleColors[net$blockGenes[[1]]]==which.module dir <- "07.MM_vs_S-48/" pdf(file=paste(dir,"07.",which.module,"_MM_vs_S-48.pdf",sep=""),6,6) verboseScatterplot(geneModuleMembership[moduleGenes,column], geneTraitSignificance[moduleGenes,1], xlab=paste("Module membership (MM) in",which.module,"module"), ylab="Gene significance for S-48", main=paste("Module membership vs. gene significance\n"), col=which.module) dev.off() } text <- cbind(geneTraitSignificance,GSPvalue) text <- cbind(rownames(text),text) colnames(text)[1] <- "genes" write.table(text,file="07.genes_trait_significance S-48.xls", quote=F,sep="\t",row.names=F) save(datExpr,sft,softPower,networkType,net,moduleColors,MEs, moduleSampleCor,moduleSamplePvalue, moduleTraitCor,moduleTraitPvalue, geneModuleMembership,MMPvalue, file="WGCNA_2.RData") #08Export co-expression network #Load TOM matrix #Export the topological overlap matrix done before #Here, because there is too much data in the topological overlapping matrix, it cannot operate normally, so the matrix is split into 4 small matrices, and then combined load(file="WGCNA_TOM-block.1.RData") ATOM <- as.matrix(TOM) TOM1 <- ATOM[1:round((nrow(ATOM)/2)),1:round((nrow(ATOM)/2))] TOM2 <- ATOM[(round(nrow(ATOM)/2)+1):nrow(ATOM),1:round((nrow(ATOM)/2))] TOM3 <- ATOM[1:round((nrow(ATOM)/2)),(round(nrow(ATOM)/2)+1):nrow(ATOM)] TOM4 <- ATOM[(round(nrow(ATOM)/2)+1):nrow(ATOM),(round(nrow(ATOM)/2)+1):nrow(ATOM)] #module <- labels2colors(i) #Modify the current memory memory.limit(size=10240) dir.create("11.module_result") setwd("11.module_result") for(i in 1:(ncol(MEs)-1)) { module ="red" inModule <- moduleColors[net$blockGenes[[1]]]==module genename <- colnames(datExpr[,net$goodGenes]) modGenes <- genename[inModule] modTOM1 <- TOM1[inModule[1:round((nrow(ATOM)/2))], inModule[1:round((nrow(ATOM)/2))]] modTOM2 <- TOM2[inModule[(round(nrow(ATOM)/2)+1):nrow(ATOM)], inModule[1:round((nrow(ATOM)/2))]] modTOM3 <- TOM3[inModule[1:round((nrow(ATOM)/2))], inModule[(round(nrow(ATOM)/2)+1):nrow(ATOM)]] modTOM4 <- TOM4[inModule[(round(nrow(ATOM)/2)+1):nrow(ATOM)], inModule[(round(nrow(ATOM)/2)+1):nrow(ATOM)]] modTOM <- rbind(cbind(modTOM1,modTOM3),cbind(modTOM2,modTOM4)) #Save node gene pair information as Cytoscape input file cyt1 <- exportNetworkToCytoscape(modTOM, edgeFile=paste("CytoscapeInput-edges-",paste(module,collapse="-"),".txt",sep=""), nodeFile=paste("CytoscapeInput-nodes-",paste(module,collapse="-"),".txt",sep=""), weighted=TRUE,threshold=0.02, nodeNames=modGenes,altNodeNames=modGenes, nodeAttr=moduleColors[net$blockGenes[[1]]][inModule]) #Save a module node gene and hub node information, it is generally believed that the first 5 to 15% of the genes can be considered as hub genes IMConn <- softConnectivity(datExpr[,net$goodGenes][,modGenes]) out <- cbind(modGenes,IMConn) colnames(out) <- c("gene","connectivity") out <- out[order(as.numeric(out[,2]),decreasing=T),] write.table(out,paste(module,"-module-gene.txt",sep=""), sep="\t",quote=F,row.names=F) nTop <- 0.05*length(modGenes) top <- (rank(-IMConn) <= nTop) out <- cbind(modGenes[top],IMConn[top]) colnames(out) <- c("gene","connectivity") out <- out[order(as.numeric(out[,2]),decreasing=T),] write.table(out,paste(module,"-5%hubgene.txt",sep=""), sep="\t",quote=F,row.names=F) nTop <- 0.1*length(modGenes) top <- (rank(-IMConn) <= nTop) out <- cbind(modGenes[top],IMConn[top]) colnames(out) <- c("gene","connectivity") out <- out[order(as.numeric(out[,2]),decreasing=T),] write.table(out,paste(module,"-10%hubgene.txt",sep=""), sep="\t",quote=F,row.names=F) nTop <- 10 top <- (rank(-IMConn) <= nTop) out <- cbind(modGenes[top],IMConn[top]) colnames(out) <- c("gene","connectivity") out <- out[order(as.numeric(out[,2]),decreasing=T),] write.table(out,paste(module,"-top5_hubgene.txt",sep=""), sep="\t",quote=F,row.names=F) } genename <- colnames(datExpr[,net$goodGenes]) modGenes <- genename IMConn <- softConnectivity(datExpr[,net$goodGenes][,modGenes]) cyt1 <- exportNetworkToCytoscape(ATOM, edgeFile=paste("CytoscapeInput-edges-overall.txt",sep=""), nodeFile=paste("CytoscapeInput-nodes-overall.txt",sep=""), weighted=TRUE,threshold=0.02, nodeNames=modGenes,altNodeNames=modGenes, nodeAttr=moduleColors[net$blockGenes[[1]]]) out <- cbind(modGenes,IMConn) colnames(out) <- c("gene","connectivity") out <- out[order(as.numeric(out[,2]),decreasing=T),] write.table(out,paste("overall_gene_connectivity.txt",sep=""), sep="\t",quote=F,row.names=F) IntraMConn <- intramodularConnectivity.fromExpr(datExpr[,net$goodGenes], moduleColors[net$blockGenes[[1]]], networkType=networkType, power=softPower) merged_IntraMConn <- cbind(colnames(datExpr[,net$goodGenes]), moduleColors[net$blockGenes[[1]]],IntraMConn) colnames(merged_IntraMConn)[1:2] <- c("gene","module") head(merged_IntraMConn)