##WGCNA process fpkm=read.table("GSE43696_series_matrix",comment.char = "!",stringsAsFactors = F,header=T,sep = "\t") duiying=read.table("duiying.txt",stringsAsFactors = F,header=T,sep = "\t") duiying <-duiying[,-2] rownames(fpkm)<-fpkm[,1] fpkm <-fpkm[,-1] RNAseq<-fpkm RNAseq = RNAseq[match(duiying$ID,rownames(RNAseq)),] RNAseq = cbind(duiying$Gene.symbol,RNAseq) RNAseq<-RNAseq[-1,] colnames(RNAseq)[1]<-"symbol" RNAseq1<-with(RNAseq,aggregate.data.frame(RNAseq[,2:109],by = list(symbol),FUN=mean)) rownames(RNAseq1)<-RNAseq1[,1] RNAseq1 <-RNAseq1[,-1] WGCNA_matrix = t(RNAseq1[order(apply(RNAseq1,1,mad), decreasing = T)[1:5000],]) library(WGCNA) s = abs(bicor(WGCNA_matrix)) powers = c(c(1:10), seq(from = 12, to=20, by=2)) sft = pickSoftThreshold(WGCNA_matrix, powerVector = powers, verbose = 5) plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab='Soft Threshold (power)',ylab='Scale Free Topology Model Fit,signed R^2', type='n', main = paste('Scale independence')); text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=1,col='red'); abline(h=0.90,col='red') #calculation of adjacency matrix beta = 8 a = s^beta #dissimilarity measure w = 1-a geneTree = hclust(as.dist(w), method = 'average') modules = cutreeDynamic(dendro = geneTree, distM = w, deepSplit = 4, pamRespectsDendro = FALSE, minClusterSize = 30) module.colours = labels2colors(modules) table(module.colours) plotDendroAndColors(geneTree, module.colours, 'Module colours', dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang =1, main='') library(ape) MEs = moduleEigengenes(WGCNA_matrix, colors = module.colours, excludeGrey = FALSE)$eigengenes MEDiss = 1-cor(MEs); METree = hclust(as.dist(MEDiss), method = 'average'); par(mar = c(2, 2, 2, 2)) plot(METree,main = "Clustering of module eigengenes",xlab = "", sub = "") MEDissThres = 0.25 abline(h=MEDissThres, col = "red") merge = mergeCloseModules(WGCNA_matrix, module.colours, cutHeight = MEDissThres, verbose = 3) mergedColors = merge$colors; mergedMEs = merge$newMEs; plotDendroAndColors(geneTree, cbind(module.colours, mergedColors), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05) ######### lscore= rep(NA,length(RNAseq1)) lscore[1:20] = 0 lscore[21:70] = 1 lscore[71:108] = 2 options(scipen=6) GS_lscore =as.matrix(sapply(1:ncol(WGCNA_matrix),function(x) (kruskal.test(WGCNA_matrix[,x]~lscore)$p.value))) rownames(GS_lscore) = colnames(WGCNA_matrix) ref_genes = colnames(WGCNA_matrix) library(org.Hs.eg.db) GO = toTable(org.Hs.egGO); SYMBOL = toTable(org.Hs.egSYMBOL) GO_data_frame = data.frame(GO$go_id, GO$Evidence,SYMBOL$symbol[match(GO$gene_id,SYMBOL$gene_id)]) library(AnnotationDbi) GO_ALLFrame = GOAllFrame(GOFrame(GO_data_frame, organism = 'Homo sapiens')) library(GSEABase) gsc <- GeneSetCollection(GO_ALLFrame, setType = GOCollection()) #perform GO enrichment analysis and save results to list - this make take several minutes library(GOstats) GSEAGO = vector('list',length(unique(modules))) for(i in 0:(length(unique(modules))-1)){ GSEAGO[[i+1]] = summary(hyperGTest(GSEAGOHyperGParams(name = 'Homo sapiens GO', geneSetCollection = gsc, geneIds = rownames(RNAseq1)[modules==i], universeGeneIds = ref_genes, ontology = 'BP', pvalueCutoff = 0.05, conditional = FALSE, testDirection = 'over'))) print(i) } cutoff_size = 100 GO_module_name = rep(NA,length(unique(modules))) for (i in 1:length(unique(modules))){ GO_module_name[i] = GSEAGO[[i]][GSEAGO[[i]]$Sizemean(pval)) colnames(modules_3)<-c("modules","pval") modules_5<-modules_m[which(modules_3$pval==TRUE),]