#------------------------------------------------------------------------------------------------ setwd("~") data1<-ReadAffy() Pset<-fitPLM(data1) image(Pset,type="weights",which=2,main="weights") image(Pset,type="resids",which=1,main="residuals") image(Pset,type="sign.resids",which=1,main="residuals.sign") library(RColorBrewer) colors<-brewer.pal(12,"Set3") Mbox(Pset,ylim=c(-1,1),color=colors,main="RLE",las=3)#RLE boxplot(Pset,ylim=c(0.95,1.22),color=colors,main="NUSE",las=3)#NUSE data.deg<-AffyRNAdeg(data1) plotAffyRNAdeg(data.deg)# legend("topleft",sampleNames(data1),lwd=1,inset = 0.05,cex=0.2) #------------------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------------------ setwd("~") data4<-ReadAffy() eset.rma<-rma(data4) group_tumor_exprs<-exprs(eset.rma) probeid<-rownames(group_tumor_exprs) group_tumor_exprs<-cbind(probeid,group_tumor_exprs) write.table(group_tumor_exprs,file = "group.tumor.expres.txt",sep = "\t",quote=F,row.names = F) setwd("~") data5<-ReadAffy() eset.rma<-rma(data5) group_normal_exprs<-exprs(eset.rma) probeid<-rownames(group_normal_exprs) group_normal_exprs<-cbind(probeid,group_normal_exprs) write.table(group_normal_exprs,file = "group.normal.expres.txt",sep = "\t",quote=F,row.names = F) setwd("~") group_tumor_exprs<-read.table("group.tumor.expres.txt",header = T,sep = "\t") group_normal_exprs<-read.table("group.normal.expres.txt",header = T,sep = "\t") probe_expres<-merge(group_tumor_exprs,group_normal_exprs,by="probeid") write.table(probe_expres,file = "twogroups.probeid.expres.txt",sep = "\t",quote=F,row.names = F) ###############============================================================================================ setwd("~") probe_exp<-read.table("twochips.probeid.exprs.combat.txt",header = T,sep = "\t",row.names = 1) probeid_geneid<-read.table("GPL570-55999.txt",header = T,sep = "\t") probe_name<-rownames(probe_exp) loc<-match(probeid_geneid[,1],probe_name) probe_exp<-probe_exp[loc,] raw_geneid<-as.numeric(as.matrix(probeid_geneid[,3])) index<-which(!is.na(raw_geneid)) geneid<-raw_geneid[index] exp_matrix<-probe_exp[index,] geneidfactor<-factor(geneid) gene_exp_matrix<-apply(exp_matrix,2,function(x) tapply(x,geneidfactor,mean)) rownames(gene_exp_matrix)<-levels(geneidfactor) geneid<-rownames(gene_exp_matrix) gene_exp_matrix2<-cbind(geneid,gene_exp_matrix) write.table(gene_exp_matrix2,file = "geneid.expres.txt",sep = "\t",quote=F,row.names = F) loc<-match(rownames(gene_exp_matrix),probeid_geneid[,3]) rownames(gene_exp_matrix)=probeid_geneid[loc,2] genesymbol<-rownames(gene_exp_matrix) gene_exp_matrix3<-cbind(genesymbol,gene_exp_matrix) write.table(gene_exp_matrix3,file = "genesymbol.expres.txt",sep = "\t",quote=F,row.names = F) library(impute) gene_exp_matrix<-read.table("genesymbol.expres.txt",header=T,sep="\t",row.names=1) gene_exp_matrix<-as.matrix(gene_exp_matrix) imputed_gene_exp<-impute.knn(gene_exp_matrix,k=10,rowmax=0.5,colmax=0.8,maxp=1500,rng.seed=362436069) GeneExp<-imputed_gene_exp$data genesymbol<-rownames(GeneExp) GeneExp<-cbind(genesymbol,GeneExp) write.table(GeneExp,file="genesyb.exprs.afterimpute.txt",sep='\t',quote=F,row.names=F) ###############============================================================================================ ###************************************************* ###############============================================================================================ # followed by WGCNA analysis ###############============================================================================================ setwd("~") expro.upper<-read.table("geneInput_variancetop0.25.txt",header=T,sep="\t",row.names=1) datExpr_training=as.data.frame(t(expro.upper)); library(WGCNA) gsg = goodSamplesGenes(datExpr_training, verbose = 3); gsg$allOK ###----------------------------------------------------------------------------------------- sampleTree = hclust(dist(datExpr_training), method = "average"); sizeGrWindow(12,9) par(cex = 0.45); par(mar = c(0,4,2,0)) plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) datExpr_training.1<-sampleTree ###------------------------------------------------------------------------------------------ traitData = read.csv('clinical traits-1.txt',sep = '\t'); dim(traitData) names(traitData) library(stringr) tumorSamples = datExpr_training.1$labels; traitRows = match(tumorSamples, traitData$Accession); datTraits = traitData[traitRows, -1]; datTraitsColor_training <- numbers2colors(datTraits, signed = FALSE); sizeGrWindow(15,11) plotDendroAndColors(sampleTree, datTraitsColor_training, groupLabels = names(datTraits), colorHeight = 0.2, colorHeightBase = 0.2, colorHeightMax = 0.4, rowWidths = NULL, dendroLabels = NULL, addGuide = FALSE, guideAll = FALSE, guideCount = 50, guideHang = 0.2, addTextGuide = FALSE, cex.colorLabels = 0.8, cex.dendroLabels = 0.7, cex.rowText = 0.8, marAll = c(1, 5, 3, 1), saveMar = TRUE, main = "Sample dendrogram and trait heatmap") save(datExpr_training.1, datTraits,file = "training-01-dataInput.RData") ###========================================================================== ###========================================================================== ###------------------------------------------------------------------------------------------ ###------------------------------------------------------------------------------------------ powers = c(c(1:10), seq(from = 12, to=30, by=2)) nGenes = ncol(datExpr_training) nSamples = nrow(datExpr_training) sft = pickSoftThreshold(datExpr_training, powerVector = powers, verbose = 24)# sizeGrWindow(9, 8) par(mfrow = c(1,2)); cex1 = 0.9; # Scale-free topology fit index as a function of the softthresholding power plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence")); text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=cex1,col="red"); # this line corresponds to using an R^2 cut-off of h abline(h=0.90,col="red") ###------------------------------------------------------------------------------------------ # Mean connectivity as a function of the soft-thresholding power plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity")) text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") ###------------------------------------------------------------------------------------------ #detect whether the network is scale free network ADJ1=abs(cor(datExpr_training,use="p"))^4 k=as.vector(apply(ADJ1,2,sum, na.rm=T)) sizeGrWindow(10,5) par(mfrow=c(1,2)) hist(k) scaleFreePlot(k, main="Check scale free topology\n") ###------------------------------------------------------------------------------------------ softPower = 4; adjacency = adjacency(datExpr_training, power = softPower) ###------------------------------------------------------------------------------------------ TOM = TOMsimilarity(adjacency); dissTOM = 1-TOM ###------------------------------------------------------------------------------------------ geneTree = hclust(as.dist(dissTOM), method = "average"); # Plot the resulting clustering tree (dendrogram) sizeGrWindow(12,12) plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity", labels = FALSE, hang = 0.04); ###------------------------------------------------------------------------------------------ minModuleSize = 30; # Module identification using dynamic tree cut: dynamicMods_training = cutreeDynamic(dendro = geneTree, distM = dissTOM, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize); table(dynamicMods_training) dynamicColors_training = labels2colors(dynamicMods_training) table(dynamicColors_training) sizeGrWindow(8,12) plotDendroAndColors(geneTree, dynamicColors_training, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "Gene dendrogram and module colors") #=================================================================== #=================================================================== # eigengenes MEList = moduleEigengenes(datExpr_training, colors = dynamicColors_training) MEs = MEList$eigengenes MEDiss = 1-cor(MEs); # Cluster module eigengenes METree = hclust(as.dist(MEDiss), method = "average"); # Plot the result sizeGrWindow(7, 6) plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "") #=================================================================== MEDissThres = 0.25 abline(h=MEDissThres, col = "red") merge = mergeCloseModules(datExpr_training, dynamicColors_training, cutHeight = MEDissThres, verbose = 4) mergedColors = merge$colors; mergedMEs = merge$newMEs; #=================================================================== sizeGrWindow(12, 9) plotDendroAndColors(geneTree, cbind(dynamicColors_training, mergedColors), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05) # 9.±£´æ #=================================================================== moduleColors = mergedColors colorOrder = c("grey", standardColors(50)); moduleLabels = match(moduleColors, colorOrder)-1; MEs = mergedMEs; save(MEs, moduleLabels, moduleColors, geneTree, file = "training-02-networkConstruction-StepByStep.RData") ###========================================================================== ###========================================================================== ###------------------------------------------------------------------------------------------ nGenes = ncol(datExpr_training); nSamples = nrow(datExpr_training); MEs0 = moduleEigengenes(datExpr_training, moduleColors)$eigengenes MEs = orderMEs(MEs0) DataForLogis<-cbind(MEs,datTraits) DataForLogis1<-DataForLogis[,-13] write.table(DataForLogis1,file = "DataForLogis1.txt",sep = "\t",quote=F,row.names = F) DataForLogis1<-read.table("DataForLogis1.txt",header=T,sep="\t") fit<-glm(type~DataForLogis1$MEgrey,family = binomial,data=DataForLogis1) summary(fit) log10(pvalue) a<-read.table("regression p value.txt",sep="\t",header=T,check.names = T) barplot(height=a$absolute.log10.p.., names.arg =a$modules, main="Screening significant modules",ylab = "log10 transformation of the P-value", xlab = "Modules",ylim =c(0,20),border = TRUE,width=10,col=c("black","turquoise","brown","cyan","grey","purple","yellow","pink","green","salmon","greenyellow","blue")) #============================================================== #============================================================== Surtime = as.data.frame(datTraits$type); names(Surtime) = "Surtime" modNames = substring(names(MEs), 3) geneModuleMembership = as.data.frame(cor(datExpr_training, MEs, use = "p")); MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples)); names(geneModuleMembership) = paste("MM", modNames, sep=""); names(MMPvalue) = paste("p.MM", modNames, sep=""); #========================================================== ###************************************************* ###========================================================================== setwd("~") library(affy) library(affyPLM) data4<-ReadAffy() eset.rma<-rma(data4) group_tumor_exprs<-exprs(eset.rma) probeid<-rownames(group_tumor_exprs) group_tumor_exprs<-cbind(probeid,group_tumor_exprs) write.table(group_tumor_exprs,file = "probeid.expres.txt",sep = "\t",quote=F,row.names = F) probe_exp<-read.table("probeid.expres.txt",header = T,sep = "\t",row.names = 1) probeid_geneid<-read.table("GPL570-55999.txt",header = T,sep = "\t") probe_name<-rownames(probe_exp) loc<-match(probeid_geneid[,1],probe_name) probe_exp<-probe_exp[loc,] raw_geneid<-as.numeric(as.matrix(probeid_geneid[,3])) index<-which(!is.na(raw_geneid)) geneid<-raw_geneid[index] exp_matrix<-probe_exp[index,] geneidfactor<-factor(geneid) gene_exp_matrix<-apply(exp_matrix,2,function(x) tapply(x,geneidfactor,mean)) rownames(gene_exp_matrix)<-levels(geneidfactor) geneid<-rownames(gene_exp_matrix) gene_exp_matrix2<-cbind(geneid,gene_exp_matrix) write.table(gene_exp_matrix2,file = "geneid.expres.txt",sep = "\t",quote=F,row.names = F) loc<-match(rownames(gene_exp_matrix),probeid_geneid[,3]) rownames(gene_exp_matrix)=probeid_geneid[loc,2] genesymbol<-rownames(gene_exp_matrix) gene_exp_matrix3<-cbind(genesymbol,gene_exp_matrix) write.table(gene_exp_matrix3,file = "genesymbol.expres.txt",sep = "\t",quote=F,row.names = F) library(impute) gene_exp_matrix<-read.table("genesymbol.expres.txt",header=T,sep="\t",row.names=1) gene_exp_matrix<-as.matrix(gene_exp_matrix) imputed_gene_exp<-impute.knn(gene_exp_matrix,k=10,rowmax=0.5,colmax=0.8,maxp=3000,rng.seed=362436069) GeneExp<-imputed_gene_exp$data genesymbol<-rownames(GeneExp) GeneExp<-cbind(genesymbol,GeneExp) write.table(GeneExp,file="genesyb.exprs.afterimpute.txt",sep='\t',quote=F,row.names=F) ###========================================================================== setwd("~") a<-read.table("5115genes.txt",header=T,sep="\t") b<-read.table("genesyb.exprs.afterimpute.txt",header=T,sep="\t") c<-merge(a,b,by="genesymbol",all.x=TRUE) write.table(c,file="geneInput wgcna.txt",sep='\t',quote=F,row.names=T) options(stringsAsFactors = FALSE); expro.upper<-read.table("geneInput wgcna.txt",header=T,sep="\t",row.names=1) datExpr_51725=as.data.frame(t(expro.upper)); library(WGCNA) gsg = goodSamplesGenes(datExpr_51725, verbose = 4); gsg$allOK ###----------------------------------------------------------------------------------------- sampleTree = hclust(dist(datExpr_51725), method = "average"); sizeGrWindow(12,9) par(cex = 0.45); par(mar = c(0,4,2,0)) plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) datExpr_51725.1<-sampleTree ###------------------------------------------------------------------------------------------ traitData = read.csv('clinical traits.txt',sep = '\t'); dim(traitData) names(traitData) library(stringr) tumorSamples = datExpr_51725.1$labels; traitRows = match(tumorSamples, traitData$Accession) datTraits = traitData[traitRows, -1]; datTraitsColor_51725<- numbers2colors(as.numeric(datTraits), signed = FALSE); sizeGrWindow(15,11) plotDendroAndColors(sampleTree, datTraitsColor_51725, groupLabels = names(datTraits), colorHeight = 0.2, colorHeightBase = 0.2, colorHeightMax = 0.4, rowWidths = NULL, dendroLabels = NULL, addGuide = FALSE, guideAll = FALSE, guideCount = 50, guideHang = 0.2, addTextGuide = FALSE, cex.colorLabels = 0.8, cex.dendroLabels = 0.7, cex.rowText = 0.8, marAll = c(1, 5, 3, 1), saveMar = TRUE, main = "Sample dendrogram and trait heatmap") save(datExpr_51725.1, datTraits,file = "51725-01-dataInput.RData") ###========================================================================== ###========================================================================== ###------------------------------------------------------------------------------------------ powers = c(c(1:10), seq(from = 12, to=20, by=2)) nGenes = ncol(datExpr_51725) nSamples = nrow(datExpr_51725) sft = pickSoftThreshold(datExpr_51725, powerVector = powers, verbose = 16) sizeGrWindow(9, 5) par(mfrow = c(1,2)); cex1 = 0.9; plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence")); text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=cex1,col="red"); abline(h=0.90,col="red") ###------------------------------------------------------------------------------------------ plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity")) text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") ###------------------------------------------------------------------------------------------ ADJ1=abs(cor(datExpr_51725,use="p"))^9 k=as.vector(apply(ADJ1,2,sum, na.rm=T))#?????Ñ¡Ò»???? sizeGrWindow(9,5) par(mfrow=c(1,2)) hist(k) scaleFreePlot(k, main="Check scale free topology\n") ###------------------------------------------------------------------------------------------ softPower = 9; adjacency = adjacency(datExpr_51725, power = softPower) ###------------------------------------------------------------------------------------------ TOM = TOMsimilarity(adjacency); dissTOM = 1-TOM ###------------------------------------------------------------------------------------------ geneTree = hclust(as.dist(dissTOM), method = "average"); sizeGrWindow(12,12) plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity", labels = FALSE, hang = 0.04); ###------------------------------------------------------------------------------------------ minModuleSize = 30; sizeGrWindow(8,12) plotDendroAndColors(geneTree, mergedColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "Gene dendrogram and module colors") save(mergedColors, geneTree, file = "51725-02-dynamic tree cut.RData") ###========================================================================== ###========================================================================== ###------------------------------------------------------------------------------------------ save(datExpr_51725,datExpr_training,mergedColors,file = "training VS 51725 expression.RData") lnames=load("training VS 51725 expression.RData") lnames #calculating of modules preservation setLabels=c("training","GSE51725"); multiExpr=list(training=list(data=datExpr_training),GSE51725=list(data=datExpr_51725)); multiColor=list(training=mergedColors,GSE51725=mergedColors); a<-multiColor[["GSE51725"]] write.table(a,file="GSE13911-module color.txt",sep='\t',quote=F,row.names=T) nSets=2; system.time({ mp=modulePreservation(multiExpr,multiColor, referenceNetworks=c(1:2), nPermutations=200, randomSeed=1, verbose=3, maxModuleSize = 3000,maxGoldModuleSize = 3000) }) save(mp,file="trainingOnly-modulePreservation.RData") load(file="trainingOnly-modulePreservation.RData") #display the preservation results ref=1 test=2 statsObs=cbind(mp$quality$observed[[ref]][[test]][,-1],mp$preservation$observed[[ref]][[test]][,-1]) statsZ=cbind(mp$quality$Z[[ref]][[test]][,-1],mp$preservation$Z[[ref]][[test]][,-1]) print(cbind(statsObs[,c("medianRank.pres","medianRank.qual")], signif(statsZ[,c("Zsummary.pres","Zsummary.qual")],2))) #display the preservation figure modColors=rownames(mp$preservation$observed[[ref]][[test]]) moduleSizes=mp$preservation$Z[[ref]][[test]][,1]; #leave grey and gold modules out plotMods=!(modColors %in% c("grey","gold")); #Text labels for points text=modColors[plotMods]; #Auxiliary covenience variable plotData=cbind(mp$preservation$observed[[ref]][[test]][,2],mp$preservation$Z[[ref]][[test]][,2]) #Main titles for the plot mains=c("Preservation Median rank","Preservation Zsummary"); #start the plot sizeGrWindow(10,5) pdf(file = "modulepreservation-2-2.pdf",height = 10,width=20) par(mfrow=c(1,2)) par(mar=c(4.5,4.5,2.5,1)) for(p in 1:2) { min=min(plotData[,p],na.rm=TRUE); max=max(plotData[,p],na.rm = TRUE); #adjust ploting ranges appropriately if(p==2) { if (min>-max/10) min=-max/10 ylim=c(min-0.1*(max-min),max+0.1*(max-min)) } else ylim=c(max+0.1*(max-min),min-0.1*(max-min)) plot(moduleSizes[plotMods],plotData[plotMods,p],col=1,bg=modColors[plotMods],pch=21, main=mains[p], cex=3.5, ylab=mains[p],xlab="Module size",log="x", ylim=ylim, xlim=c(10,2300),cex.lab=2,cex.axis=2,cex.main=2) labelPoints(moduleSizes[plotMods],plotData[plotMods,p],text,cex=1.3,offs=0.08); # for Zsummary, add threshold lines if(p==2) { abline(h=0) abline(h=2,col="blue",lty=2) abline(h=10,col="darkgreen",lty=2) } } dev.off() ###========================================================================== ###************************************************* ###========================================================================== ###========================================================================= setwd("~") library(limma) rt<-read.table("geneInput_variancetop0.25.txt",header = T, sep = "\t", row.names = "genesymbol") class<-c(rep("tumor",300),rep("normal",100)) design<-model.matrix(~factor(class)) colnames(design)<-c("tumor","normal") fit<-lmFit(rt,design) fit2<-eBayes(fit) allDiff=topTable(fit2,adjust="fdr",coef = 2,number = 20000) write.table(allDiff,file = "limmaTab.txt",sep = "\t",quote = F) diffLab<-allDiff[with(allDiff,((logFC>1)|logFC<(-1))&adj.P.Val<0.05),] diffExpLevel<-rt[rownames(diffLab),] qvalue=allDiff[rownames(diffLab),]$adj.P.Val diffExpQvalue=cbind(qvalue,diffExpLevel) write.table(diffExpQvalue,file = "diffExpLevel.xls",sep = "\t",quote=F) a<-read.table("candidate genes 20201112.txt",header=T,check.names=F,sep="\t") b<-read.table("diffExpLevel.txt",header=T,check.names=F,sep="\t") c<-merge(a,b,by="genesymbol") write.table(c,file = "overlap-candidate genes 20201112.txt",sep = "\t",quote=F) setwd("~") diffcandidate<-read.table("training set-original 20201111.txt",row.names=1,header=T,sep="\t") diffcandidate<-as.matrix(diffcandidate) annotation<-read.table("sample type for heatmap.txt",row.names=1,header=T,sep="\t") install.packages("pheatmap") library(pheatmap) library(heatmap) library(RColorBrewer) annotationcol = factor(c(rep("blue", 300), rep("pink", 100))) pheatmap(diffcandidate,annotation_col = annotation, cluster_cols = FALSE) data1<-read.table("expression-candidate genes 20201112 for check outcome final.txt",header=T,check.names=F,sep="\t") library(glmnet) x<-model.matrix(outcome~.,data1)[,-1] y<-data1$outcome cv.fit <- cv.glmnet(x, y, family="binomial",alpha=1,type.measure = "mse", nfolds = 10) cv.fit$lambda.min fit<-glmnet(x, y, family="binomial",alpha=1) Coefficients <- coef(fit, s = cv.fit$lambda.min) Active.Index <- which(Coefficients != 0) Active.Coefficients <- Coefficients[Active.Index] Coefficients exp(Coefficients) ###========================================================================== ###************************************************* ###========================================================================== setwd("C:/Users/vivian/Desktop/GC/revise20200325/testing set/validation-GS0.6 MM0.8-DEG") a<-read.table("hub genes20201111.txt",header = T,sep = "\t") b<-read.table("genesyb.exprs.afterimpute-PC.txt",header = T,sep = "\t") c<-merge(a,b,by="genesymbol") write.table(c,file="testing set-PC-logis 20201211.txt",sep = "\t",quote=F,row.names = F) a<-read.table("hub genes20201111.txt",header = T,sep = "\t") b<-read.table("genesyb.exprs.afterimpute-CC.txt",header = T,sep = "\t") c<-merge(a,b,by="genesymbol") write.table(c,file="testing set-CC-logis 20201211.txt",sep = "\t",quote=F,row.names = F) #hub genes identifications ###========================================================================== # ANN library(neuralnet) train_dataset<-read.table("training set-logis 20201111.txt",header = T,sep = "\t") train_dataset<-as.data.frame(train_dataset) test_dataset<-read.table("testing set-logis 20201111.txt",header = T,sep = "\t") test_dataset<-as.data.frame(test_dataset) train_model<-neuralnet(outcome~ADIPOQ+ARHGAP39+ATAD3A+C1orf95+CWH43+INHBA+GRIK3+LYVE1+RDH12+SIGLEC11+SCNN1G, data=train_dataset,hidden = 1) model_results<-neuralnet::compute(train_model,test_dataset[1:11]) predicted_results<-as.numeric(model_results$net.result) predicted_results1<-as.data.frame(predicted_results1) test_dataset1<-as.data.frame(test_dataset$outcome) a<-predicted_results1-test_dataset1 table(a) date_roc2 <- roc(test_dataset$outcome, predicted_results) rocobj1 <- plot.roc(date_roc2,main="AUC Curves", col="blue", print.auc = TRUE,print.auc.x=0.2,print.auc.y=0.2,legacy.axes = T) ###========================================================================== ###========================================================================== #hub genes identifications IN PC ###========================================================================== # ANN library(neuralnet) train_dataset<-read.table("training set-logis 20201111.txt",header = T,sep = "\t") train_dataset<-as.data.frame(train_dataset) test_dataset<-read.table("testing set-PC-logis 20201211.txt",header = T,sep = "\t") test_dataset<-as.data.frame(test_dataset) train_model<-neuralnet(outcome~ADIPOQ+ARHGAP39+ATAD3A+C1orf95+CWH43+GRIK3+INHBA+LYVE1+RDH12+SIGLEC11+SCNN1G, data=train_dataset,hidden = 2) model_results<-neuralnet::compute(train_model,test_dataset[1:11]) predicted_results<-as.numeric(model_results$net.result) predicted_results1<-as.data.frame(predicted_results1) test_dataset1<-as.data.frame(test_dataset$outcome) a<-predicted_results1-test_dataset1 table(a) date_roc2 <- roc(test_dataset$outcome, predicted_results) rocobj1 <- plot.roc(date_roc2,main="AUC Curves", col="blue", print.auc = TRUE,print.auc.x=0.2,print.auc.y=0.2,legacy.axes = T) ###========================================================================== ###========================================================================== #hub genes identifications IN CC ###========================================================================== # ANN library(neuralnet) train_dataset<-read.table("training set-logis 20200923.txt",header = T,sep = "\t") train_dataset<-as.data.frame(train_dataset) test_dataset<-read.table("testing set-CC-logis 20200923.txt",header = T,sep = "\t") test_dataset<-as.data.frame(test_dataset) train_model<-neuralnet(outcome~ACADL+ADIPOQ+ARHGAP39+ATAD3A+C1orf95+CCKBR+GRIK3+LYVE1+SIGLEC11+SCNN1G+TXLNB, data=train_dataset,hidden = 2) model_results<-neuralnet::compute(train_model,test_dataset[1:11]) predicted_results<-as.numeric(model_results$net.result) predicted_results1<-as.data.frame(predicted_results1) test_dataset1<-as.data.frame(test_dataset$outcome) a<-predicted_results1-test_dataset1 table(a) date_roc2 <- roc(test_dataset$outcome, predicted_results) rocobj1 <- plot.roc(date_roc2,main="AUC Curves", col="blue", print.auc = TRUE,print.auc.x=0.2,print.auc.y=0.2,legacy.axes = T) ###========================================================================== ###========================================================================== #GO analysis ###========================================================================== #---------------------------------------------------------------------- source("http://www.bioconductor.org/biocLite.R") library(GOplot) setwd("~") gene_blue<-read.table("GeneList symb_Blue.txt",header = T,sep = "\t") diffExp<-read.table("limmaTab.txt",header = T,sep = "\t") diffExp_blue<-merge(gene_blue,diffExp,by="genesymbol") write.table(diffExp_blue,file="diffExp_blue.txt",sep = "\t",quote=F,row.names = F) gene_brown<-read.table("GeneList symb_Black.txt",header = T,sep = "\t") diffExp<-read.table("limmaTab.txt",header = T,sep = "\t") diffExp_brown<-merge(gene_brown,diffExp,by="genesymbol") write.table(diffExp_brown,file="diffExp_black.txt",sep = "\t",quote=F,row.names = F) gene_brown<-read.table("GeneList symb_Greenyellow.txt",header = T,sep = "\t") diffExp<-read.table("limmaTab.txt",header = T,sep = "\t") diffExp_brown<-merge(gene_brown,diffExp,by="genesymbol") write.table(diffExp_brown,file="diffExp_greenyellow.txt",sep = "\t",quote=F,row.names = F) gene_brown<-read.table("GeneList symb_Turquoise.txt",header = T,sep = "\t") diffExp<-read.table("limmaTab.txt",header = T,sep = "\t") diffExp_brown<-merge(gene_brown,diffExp,by="genesymbol") write.table(diffExp_brown,file="diffExp_turquoise.txt",sep = "\t",quote=F,row.names = F) gene_brown<-read.table("GeneList symb_Salmon.txt",header = T,sep = "\t") diffExp<-read.table("limmaTab.txt",header = T,sep = "\t") diffExp_brown<-merge(gene_brown,diffExp,by="genesymbol") write.table(diffExp_brown,file="diffExp_salmon.txt",sep = "\t",quote=F,row.names = F) #---------------------------------------------------------------------- david<-read.table("blue GO BP_input.txt",header = T,sep = "\t") david<-as.data.frame(david) diffEXp<-read.table("diffExp_blue-1.txt",header = T,sep = "\t") diffEXp<-as.data.frame(diffEXp) circ <- circle_dat(david, diffEXp) write.table(circ,file = "circ_blue.txt",sep = "\t",quote=F,row.names = F) circ <- read.table("circ_blue.txt",header = T,sep = "\t") GOBubble(circ,table.col = T, table.legend = F,labels = 17) #---------------------------------------------------------------------- #---------------------------------------------------------------------- david<-read.table("black GO BP_input.txt",header = T,sep = "\t") david<-as.data.frame(david) diffEXp1<-read.table("diffExp_black-1.txt",header = T,sep = "\t") diffEXp1<-as.data.frame(diffEXp1) circ <- circle_dat(david, diffEXp1) write.table(circ,file = "circ-black.txt",sep = "\t",quote=F,row.names = F) circ <- read.table("circ-black.txt",header = T,sep = "\t") GOBubble(circ,table.col = T, table.legend = F,labels = 2.8) #---------------------------------------------------------------------- #---------------------------------------------------------------------- david<-read.table("greenyellow GO BP_input.txt",header = T,sep = "\t") david<-as.data.frame(david) diffEXp1<-read.table("diffExp_greenyellow-1.txt",header = T,sep = "\t") diffEXp1<-as.data.frame(diffEXp1) circ <- circle_dat(david, diffEXp1) write.table(circ,file = "circ_greenyellow.txt",sep = "\t",quote=F,row.names = F) circ <- read.table("circ_greenyellow 20200928.txt",header = T,sep = "\t") GOBubble(circ,table.col = T, table.legend = F,labels = 14) #---------------------------------------------------------------------- #---------------------------------------------------------------------- david<-read.table("turquoise GO BP_input.txt",header = T,sep = "\t") david<-as.data.frame(david) diffEXp1<-read.table("diffExp_turquoise-1.txt",header = T,sep = "\t") diffEXp1<-as.data.frame(diffEXp1) circ <- circle_dat(david, diffEXp1) write.table(circ,file = "circ_turquoise.txt",sep = "\t",quote=F,row.names = F) circ <- read.table("circ_turquoise 20200928.txt",header = T,sep = "\t") GOBubble(circ,table.col = T, table.legend = F,labels = 5) #---------------------------------------------------------------------- #---------------------------------------------------------------------- david<-read.table("salmon GO BP_input.txt",header = T,sep = "\t") david<-as.data.frame(david) diffEXp1<-read.table("diffExp_salmon-1.txt",header = T,sep = "\t") diffEXp1<-as.data.frame(diffEXp1) circ <- circle_dat(david, diffEXp1) write.table(circ,file = "circ_salmon.txt",sep = "\t",quote=F,row.names = F) circ <- read.table("circ_salmon 20201111.txt",header = T,sep = "\t") GOBubble(circ,table.col = T, table.legend = F,labels = 8) #---------------------------------------------------------------------- ###************************************************* ###========================================================================== #KEGG analysis ###========================================================================== #---------------------------------------------------------------------- setwd("~") library(ggplot2) KEGG<-read.table("blue KEGG 20200928.txt",header = T,sep = "\t") p<-ggplot(KEGG,aes(-log10(PValue),Term)) q<-p+geom_point(aes(size=Count,col=-log10(PValue))) q+scale_colour_gradient(low="blue",high="red") KEGG<-read.table("black KEGG 20200928.txt",header = T,sep = "\t") p<-ggplot(KEGG,aes(-log10(PValue),Term)) q<-p+geom_point(aes(size=Count,col=-log10(PValue))) q+scale_colour_gradient(low="blue",high="red") KEGG<-read.table("greenyellow KEGG 20200928.txt",header = T,sep = "\t") p<-ggplot(KEGG,aes(-log10(PValue),Term)) q<-p+geom_point(aes(size=Count,col=-log10(PValue))) q+scale_colour_gradient(low="blue",high="red") KEGG<-read.table("turquoise KEGG 20200928.txt",header = T,sep = "\t") p<-ggplot(KEGG,aes(-log10(PValue),Term)) q<-p+geom_point(aes(size=Count,col=-log10(PValue))) q+scale_colour_gradient(low="blue",high="red") KEGG<-read.table("salmon KEGG 20201112.txt",header = T,sep = "\t") p<-ggplot(KEGG,aes(-log10(PValue),Term)) q<-p+geom_point(aes(size=Count,col=-log10(PValue))) q+scale_colour_gradient(low="blue",high="red")