#------------------------------------------------------------------------------------------------
setwd("~")
data1<-ReadAffy()
Pset<-fitPLM(data1)
image(Pset,type="weights",which=2,main="weights")
image(Pset,type="resids",which=1,main="residuals")
image(Pset,type="sign.resids",which=1,main="residuals.sign")
library(RColorBrewer)
colors<-brewer.pal(12,"Set3")
Mbox(Pset,ylim=c(-1,1),color=colors,main="RLE",las=3)#RLE
boxplot(Pset,ylim=c(0.95,1.22),color=colors,main="NUSE",las=3)#NUSE
data.deg<-AffyRNAdeg(data1)
plotAffyRNAdeg(data.deg)#
legend("topleft",sampleNames(data1),lwd=1,inset = 0.05,cex=0.2)
#------------------------------------------------------------------------------------------------

#------------------------------------------------------------------------------------------------
setwd("~")
data4<-ReadAffy()
eset.rma<-rma(data4)
group_tumor_exprs<-exprs(eset.rma)
probeid<-rownames(group_tumor_exprs)
group_tumor_exprs<-cbind(probeid,group_tumor_exprs)
write.table(group_tumor_exprs,file = "group.tumor.expres.txt",sep = "\t",quote=F,row.names = F)


setwd("~")
data5<-ReadAffy()
eset.rma<-rma(data5)
group_normal_exprs<-exprs(eset.rma)
probeid<-rownames(group_normal_exprs)
group_normal_exprs<-cbind(probeid,group_normal_exprs)
write.table(group_normal_exprs,file = "group.normal.expres.txt",sep = "\t",quote=F,row.names = F)


setwd("~")
group_tumor_exprs<-read.table("group.tumor.expres.txt",header = T,sep = "\t")
group_normal_exprs<-read.table("group.normal.expres.txt",header = T,sep = "\t")
probe_expres<-merge(group_tumor_exprs,group_normal_exprs,by="probeid")
write.table(probe_expres,file = "twogroups.probeid.expres.txt",sep = "\t",quote=F,row.names = F)

###############============================================================================================
setwd("~")
probe_exp<-read.table("twochips.probeid.exprs.combat.txt",header = T,sep = "\t",row.names = 1)
probeid_geneid<-read.table("GPL570-55999.txt",header = T,sep = "\t")
probe_name<-rownames(probe_exp)
loc<-match(probeid_geneid[,1],probe_name)
probe_exp<-probe_exp[loc,]
raw_geneid<-as.numeric(as.matrix(probeid_geneid[,3]))
index<-which(!is.na(raw_geneid))
geneid<-raw_geneid[index]
exp_matrix<-probe_exp[index,]
geneidfactor<-factor(geneid)
gene_exp_matrix<-apply(exp_matrix,2,function(x) tapply(x,geneidfactor,mean))
rownames(gene_exp_matrix)<-levels(geneidfactor)
geneid<-rownames(gene_exp_matrix)
gene_exp_matrix2<-cbind(geneid,gene_exp_matrix)
write.table(gene_exp_matrix2,file = "geneid.expres.txt",sep = "\t",quote=F,row.names = F)
loc<-match(rownames(gene_exp_matrix),probeid_geneid[,3])
rownames(gene_exp_matrix)=probeid_geneid[loc,2]
genesymbol<-rownames(gene_exp_matrix)
gene_exp_matrix3<-cbind(genesymbol,gene_exp_matrix)
write.table(gene_exp_matrix3,file = "genesymbol.expres.txt",sep = "\t",quote=F,row.names = F)

library(impute)
gene_exp_matrix<-read.table("genesymbol.expres.txt",header=T,sep="\t",row.names=1)
gene_exp_matrix<-as.matrix(gene_exp_matrix)
imputed_gene_exp<-impute.knn(gene_exp_matrix,k=10,rowmax=0.5,colmax=0.8,maxp=1500,rng.seed=362436069)
GeneExp<-imputed_gene_exp$data
genesymbol<-rownames(GeneExp)
GeneExp<-cbind(genesymbol,GeneExp)
write.table(GeneExp,file="genesyb.exprs.afterimpute.txt",sep='\t',quote=F,row.names=F)
###############============================================================================================



###*************************************************
###############============================================================================================
# followed by WGCNA analysis
###############============================================================================================
setwd("~")
expro.upper<-read.table("geneInput_variancetop0.25.txt",header=T,sep="\t",row.names=1)
datExpr_training=as.data.frame(t(expro.upper));
library(WGCNA)
gsg = goodSamplesGenes(datExpr_training, verbose = 3);
gsg$allOK

###-----------------------------------------------------------------------------------------
sampleTree = hclust(dist(datExpr_training), method = "average");
sizeGrWindow(12,9)
par(cex = 0.45);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers",
     sub="", xlab="",
     cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
datExpr_training.1<-sampleTree

###------------------------------------------------------------------------------------------
traitData = read.csv('clinical traits-1.txt',sep = '\t');
dim(traitData)
names(traitData)
library(stringr)
tumorSamples = datExpr_training.1$labels;
traitRows = match(tumorSamples, traitData$Accession);
datTraits = traitData[traitRows, -1];

datTraitsColor_training <- numbers2colors(datTraits, signed = FALSE);
sizeGrWindow(15,11)
plotDendroAndColors(sampleTree, datTraitsColor_training,
                    groupLabels = names(datTraits),
                    colorHeight = 0.2,
                    colorHeightBase = 0.2,
                    colorHeightMax = 0.4,
                    rowWidths = NULL,
                    dendroLabels = NULL,
                    addGuide = FALSE, guideAll = FALSE,
                    guideCount = 50, guideHang = 0.2,
                    addTextGuide = FALSE,
                    cex.colorLabels = 0.8,
                    cex.dendroLabels = 0.7, 
                    cex.rowText = 0.8,
                    marAll = c(1, 5, 3, 1), saveMar = TRUE,
                    main = "Sample dendrogram and trait heatmap")
save(datExpr_training.1, datTraits,file = "training-01-dataInput.RData")

###==========================================================================
###==========================================================================
###------------------------------------------------------------------------------------------

###------------------------------------------------------------------------------------------
powers = c(c(1:10), seq(from = 12, to=30, by=2))
nGenes = ncol(datExpr_training)
nSamples = nrow(datExpr_training)
sft = pickSoftThreshold(datExpr_training, powerVector = powers, verbose = 24)#
sizeGrWindow(9, 8)
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the softthresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")

###------------------------------------------------------------------------------------------
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity",
     type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers,
     cex=cex1,col="red")
###------------------------------------------------------------------------------------------
#detect whether the network is scale free network
ADJ1=abs(cor(datExpr_training,use="p"))^4
k=as.vector(apply(ADJ1,2,sum, na.rm=T))
sizeGrWindow(10,5)
par(mfrow=c(1,2))
hist(k)
scaleFreePlot(k, main="Check scale free topology\n")

###------------------------------------------------------------------------------------------
softPower = 4;
adjacency = adjacency(datExpr_training, power = softPower)

###------------------------------------------------------------------------------------------
TOM = TOMsimilarity(adjacency);
dissTOM = 1-TOM

###------------------------------------------------------------------------------------------
geneTree = hclust(as.dist(dissTOM), method = "average");
# Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,12)
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based
     dissimilarity",
     labels = FALSE, hang = 0.04);

###------------------------------------------------------------------------------------------
minModuleSize = 30;
# Module identification using dynamic tree cut:
dynamicMods_training = cutreeDynamic(dendro = geneTree, distM = dissTOM,
                                 deepSplit = 2, pamRespectsDendro =
                                   FALSE,
                                 minClusterSize = minModuleSize);
table(dynamicMods_training)
dynamicColors_training = labels2colors(dynamicMods_training)
table(dynamicColors_training)
sizeGrWindow(8,12)
plotDendroAndColors(geneTree, dynamicColors_training, "Dynamic Tree Cut",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05,
                    main = "Gene dendrogram and module colors")

#===================================================================
  #===================================================================
# eigengenes
  MEList = moduleEigengenes(datExpr_training, colors = dynamicColors_training)
MEs = MEList$eigengenes
MEDiss = 1-cor(MEs);
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average");
# Plot the result
sizeGrWindow(7, 6)
plot(METree, main = "Clustering of module eigengenes",
     xlab = "", sub = "")


 #===================================================================
 MEDissThres = 0.25
abline(h=MEDissThres, col = "red")
merge = mergeCloseModules(datExpr_training, dynamicColors_training, cutHeight =
                            MEDissThres, verbose = 4)
mergedColors = merge$colors;
mergedMEs = merge$newMEs;

  #===================================================================
  sizeGrWindow(12, 9)
plotDendroAndColors(geneTree, cbind(dynamicColors_training, mergedColors),
                    c("Dynamic Tree Cut", "Merged dynamic"),
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)

# 9.괏닸
#===================================================================
moduleColors = mergedColors
colorOrder = c("grey", standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1;
MEs = mergedMEs;
save(MEs, moduleLabels, moduleColors, geneTree, file = "training-02-networkConstruction-StepByStep.RData")

###==========================================================================
###==========================================================================
###------------------------------------------------------------------------------------------
nGenes = ncol(datExpr_training);
nSamples = nrow(datExpr_training);
MEs0 = moduleEigengenes(datExpr_training, moduleColors)$eigengenes
MEs = orderMEs(MEs0)

DataForLogis<-cbind(MEs,datTraits)
DataForLogis1<-DataForLogis[,-13]
write.table(DataForLogis1,file = "DataForLogis1.txt",sep = "\t",quote=F,row.names = F)

DataForLogis1<-read.table("DataForLogis1.txt",header=T,sep="\t")
fit<-glm(type~DataForLogis1$MEgrey,family = binomial,data=DataForLogis1)
summary(fit)
log10(pvalue)

a<-read.table("regression p value.txt",sep="\t",header=T,check.names = T)
barplot(height=a$absolute.log10.p.., names.arg =a$modules, main="Screening significant modules",ylab = "log10 transformation of the P-value",
        xlab = "Modules",ylim =c(0,20),border = TRUE,width=10,col=c("black","turquoise","brown","cyan","grey","purple","yellow","pink","green","salmon","greenyellow","blue"))

#==============================================================
#==============================================================
Surtime = as.data.frame(datTraits$type);
names(Surtime) = "Surtime"
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr_training, MEs, use = "p"));
MMPvalue =
  as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership),
                                 nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
#==========================================================



###*************************************************
###==========================================================================
setwd("~")
library(affy)
library(affyPLM)
data4<-ReadAffy()
eset.rma<-rma(data4)
group_tumor_exprs<-exprs(eset.rma)
probeid<-rownames(group_tumor_exprs)
group_tumor_exprs<-cbind(probeid,group_tumor_exprs)
write.table(group_tumor_exprs,file = "probeid.expres.txt",sep = "\t",quote=F,row.names = F)

probe_exp<-read.table("probeid.expres.txt",header = T,sep = "\t",row.names = 1)
probeid_geneid<-read.table("GPL570-55999.txt",header = T,sep = "\t")
probe_name<-rownames(probe_exp)
loc<-match(probeid_geneid[,1],probe_name)
probe_exp<-probe_exp[loc,]
raw_geneid<-as.numeric(as.matrix(probeid_geneid[,3]))
index<-which(!is.na(raw_geneid))
geneid<-raw_geneid[index]
exp_matrix<-probe_exp[index,]
geneidfactor<-factor(geneid)
gene_exp_matrix<-apply(exp_matrix,2,function(x) tapply(x,geneidfactor,mean))
rownames(gene_exp_matrix)<-levels(geneidfactor)
geneid<-rownames(gene_exp_matrix)
gene_exp_matrix2<-cbind(geneid,gene_exp_matrix)
write.table(gene_exp_matrix2,file = "geneid.expres.txt",sep = "\t",quote=F,row.names = F)
loc<-match(rownames(gene_exp_matrix),probeid_geneid[,3])
rownames(gene_exp_matrix)=probeid_geneid[loc,2]
genesymbol<-rownames(gene_exp_matrix)
gene_exp_matrix3<-cbind(genesymbol,gene_exp_matrix)
write.table(gene_exp_matrix3,file = "genesymbol.expres.txt",sep = "\t",quote=F,row.names = F)

library(impute)
gene_exp_matrix<-read.table("genesymbol.expres.txt",header=T,sep="\t",row.names=1)
gene_exp_matrix<-as.matrix(gene_exp_matrix)
imputed_gene_exp<-impute.knn(gene_exp_matrix,k=10,rowmax=0.5,colmax=0.8,maxp=3000,rng.seed=362436069)
GeneExp<-imputed_gene_exp$data
genesymbol<-rownames(GeneExp)
GeneExp<-cbind(genesymbol,GeneExp)
write.table(GeneExp,file="genesyb.exprs.afterimpute.txt",sep='\t',quote=F,row.names=F)
###==========================================================================

setwd("~")
a<-read.table("5115genes.txt",header=T,sep="\t")
b<-read.table("genesyb.exprs.afterimpute.txt",header=T,sep="\t")
c<-merge(a,b,by="genesymbol",all.x=TRUE)
write.table(c,file="geneInput wgcna.txt",sep='\t',quote=F,row.names=T)

options(stringsAsFactors = FALSE);
expro.upper<-read.table("geneInput wgcna.txt",header=T,sep="\t",row.names=1)
datExpr_51725=as.data.frame(t(expro.upper));
library(WGCNA)
gsg = goodSamplesGenes(datExpr_51725, verbose = 4);
gsg$allOK

###-----------------------------------------------------------------------------------------
sampleTree = hclust(dist(datExpr_51725), method = "average");
sizeGrWindow(12,9)
par(cex = 0.45);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers",
     sub="", xlab="",
     cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
datExpr_51725.1<-sampleTree

###------------------------------------------------------------------------------------------
traitData = read.csv('clinical traits.txt',sep = '\t');
dim(traitData)
names(traitData)
library(stringr)
tumorSamples = datExpr_51725.1$labels;
traitRows = match(tumorSamples, traitData$Accession)
datTraits = traitData[traitRows, -1];

datTraitsColor_51725<- numbers2colors(as.numeric(datTraits), signed = FALSE);
sizeGrWindow(15,11)
plotDendroAndColors(sampleTree, datTraitsColor_51725,
                    groupLabels = names(datTraits),
                    colorHeight = 0.2,
                    colorHeightBase = 0.2,
                    colorHeightMax = 0.4,
                    rowWidths = NULL,
                    dendroLabels = NULL,
                    addGuide = FALSE, guideAll = FALSE,
                    guideCount = 50, guideHang = 0.2,
                    addTextGuide = FALSE,
                    cex.colorLabels = 0.8,
                    cex.dendroLabels = 0.7, 
                    cex.rowText = 0.8,
                    marAll = c(1, 5, 3, 1), saveMar = TRUE,
                    main = "Sample dendrogram and trait heatmap")
save(datExpr_51725.1, datTraits,file = "51725-01-dataInput.RData")

###==========================================================================
###==========================================================================
###------------------------------------------------------------------------------------------
powers = c(c(1:10), seq(from = 12, to=20, by=2))
nGenes = ncol(datExpr_51725)
nSamples = nrow(datExpr_51725)
sft = pickSoftThreshold(datExpr_51725, powerVector = powers, verbose = 16)
sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model
     Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
abline(h=0.90,col="red")

###------------------------------------------------------------------------------------------
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity",
     type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers,
     cex=cex1,col="red")
###------------------------------------------------------------------------------------------
ADJ1=abs(cor(datExpr_51725,use="p"))^9
k=as.vector(apply(ADJ1,2,sum, na.rm=T))#?????朞寧????
sizeGrWindow(9,5)
par(mfrow=c(1,2))
hist(k)
scaleFreePlot(k, main="Check scale free topology\n")

###------------------------------------------------------------------------------------------
softPower = 9;
adjacency = adjacency(datExpr_51725, power = softPower)

###------------------------------------------------------------------------------------------
TOM = TOMsimilarity(adjacency);
dissTOM = 1-TOM

###------------------------------------------------------------------------------------------
geneTree = hclust(as.dist(dissTOM), method = "average");
sizeGrWindow(12,12)
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based
     dissimilarity",
     labels = FALSE, hang = 0.04);

###------------------------------------------------------------------------------------------
minModuleSize = 30;

sizeGrWindow(8,12)
plotDendroAndColors(geneTree, mergedColors, "Dynamic Tree Cut",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05,
                    main = "Gene dendrogram and module colors")
save(mergedColors, geneTree, file = "51725-02-dynamic tree cut.RData")

###==========================================================================
###==========================================================================
###------------------------------------------------------------------------------------------
save(datExpr_51725,datExpr_training,mergedColors,file = "training VS 51725 expression.RData")
lnames=load("training VS 51725 expression.RData")
lnames

#calculating of modules preservation
setLabels=c("training","GSE51725");
multiExpr=list(training=list(data=datExpr_training),GSE51725=list(data=datExpr_51725));
multiColor=list(training=mergedColors,GSE51725=mergedColors);
a<-multiColor[["GSE51725"]]
write.table(a,file="GSE13911-module color.txt",sep='\t',quote=F,row.names=T)
nSets=2;

system.time({
  mp=modulePreservation(multiExpr,multiColor,
                        referenceNetworks=c(1:2),
                        nPermutations=200,
                        randomSeed=1,
                        verbose=3, maxModuleSize = 3000,maxGoldModuleSize = 3000)
})
save(mp,file="trainingOnly-modulePreservation.RData")


load(file="trainingOnly-modulePreservation.RData")
#display the preservation results
ref=1
test=2
statsObs=cbind(mp$quality$observed[[ref]][[test]][,-1],mp$preservation$observed[[ref]][[test]][,-1])
statsZ=cbind(mp$quality$Z[[ref]][[test]][,-1],mp$preservation$Z[[ref]][[test]][,-1])
print(cbind(statsObs[,c("medianRank.pres","medianRank.qual")],
             signif(statsZ[,c("Zsummary.pres","Zsummary.qual")],2)))

#display the preservation figure
modColors=rownames(mp$preservation$observed[[ref]][[test]])
moduleSizes=mp$preservation$Z[[ref]][[test]][,1];
#leave grey and gold modules out
plotMods=!(modColors %in% c("grey","gold"));
#Text labels for points
text=modColors[plotMods];
#Auxiliary covenience variable
plotData=cbind(mp$preservation$observed[[ref]][[test]][,2],mp$preservation$Z[[ref]][[test]][,2])
#Main titles for the plot
mains=c("Preservation Median rank","Preservation Zsummary");
#start the plot
sizeGrWindow(10,5)
pdf(file = "modulepreservation-2-2.pdf",height = 10,width=20)
par(mfrow=c(1,2))
par(mar=c(4.5,4.5,2.5,1))
for(p in 1:2)
{
  min=min(plotData[,p],na.rm=TRUE);
  max=max(plotData[,p],na.rm = TRUE);
  #adjust ploting ranges appropriately
  if(p==2)
  {
  if (min>-max/10) min=-max/10
  ylim=c(min-0.1*(max-min),max+0.1*(max-min))
} else
  ylim=c(max+0.1*(max-min),min-0.1*(max-min))
plot(moduleSizes[plotMods],plotData[plotMods,p],col=1,bg=modColors[plotMods],pch=21,
  main=mains[p],
  cex=3.5,
  ylab=mains[p],xlab="Module size",log="x",
  ylim=ylim,
  xlim=c(10,2300),cex.lab=2,cex.axis=2,cex.main=2)
labelPoints(moduleSizes[plotMods],plotData[plotMods,p],text,cex=1.3,offs=0.08);
# for Zsummary, add threshold lines
if(p==2)
{
  abline(h=0)
  abline(h=2,col="blue",lty=2)
  abline(h=10,col="darkgreen",lty=2)
}
}
dev.off()

###==========================================================================





###*************************************************
###==========================================================================
###=========================================================================
setwd("~")
library(limma)
rt<-read.table("geneInput_variancetop0.25.txt",header = T, sep = "\t", row.names = "genesymbol")
class<-c(rep("tumor",300),rep("normal",100))
design<-model.matrix(~factor(class))
colnames(design)<-c("tumor","normal")
fit<-lmFit(rt,design)
fit2<-eBayes(fit)
allDiff=topTable(fit2,adjust="fdr",coef = 2,number = 20000)
write.table(allDiff,file = "limmaTab.txt",sep = "\t",quote = F)
diffLab<-allDiff[with(allDiff,((logFC>1)|logFC<(-1))&adj.P.Val<0.05),]
diffExpLevel<-rt[rownames(diffLab),]
qvalue=allDiff[rownames(diffLab),]$adj.P.Val
diffExpQvalue=cbind(qvalue,diffExpLevel)
write.table(diffExpQvalue,file = "diffExpLevel.xls",sep = "\t",quote=F)


a<-read.table("candidate genes 20201112.txt",header=T,check.names=F,sep="\t")
b<-read.table("diffExpLevel.txt",header=T,check.names=F,sep="\t")
c<-merge(a,b,by="genesymbol")
write.table(c,file = "overlap-candidate genes 20201112.txt",sep = "\t",quote=F)


setwd("~")
diffcandidate<-read.table("training set-original 20201111.txt",row.names=1,header=T,sep="\t")
diffcandidate<-as.matrix(diffcandidate)
annotation<-read.table("sample type for heatmap.txt",row.names=1,header=T,sep="\t")

install.packages("pheatmap")
library(pheatmap)
library(heatmap)
library(RColorBrewer)
annotationcol = factor(c(rep("blue", 300), rep("pink", 100)))
pheatmap(diffcandidate,annotation_col = annotation, cluster_cols = FALSE)
data1<-read.table("expression-candidate genes 20201112 for check outcome final.txt",header=T,check.names=F,sep="\t")


library(glmnet)
x<-model.matrix(outcome~.,data1)[,-1]
y<-data1$outcome

cv.fit <- cv.glmnet(x, y, family="binomial",alpha=1,type.measure = "mse", nfolds = 10)
cv.fit$lambda.min
fit<-glmnet(x, y, family="binomial",alpha=1)

Coefficients <- coef(fit, s = cv.fit$lambda.min)
Active.Index <- which(Coefficients != 0)
Active.Coefficients <- Coefficients[Active.Index]
Coefficients
exp(Coefficients)
###==========================================================================


###*************************************************
###==========================================================================
setwd("C:/Users/vivian/Desktop/GC/revise20200325/testing set/validation-GS0.6 MM0.8-DEG")
a<-read.table("hub genes20201111.txt",header = T,sep = "\t")
b<-read.table("genesyb.exprs.afterimpute-PC.txt",header = T,sep = "\t")
c<-merge(a,b,by="genesymbol")
write.table(c,file="testing set-PC-logis 20201211.txt",sep = "\t",quote=F,row.names = F)

a<-read.table("hub genes20201111.txt",header = T,sep = "\t")
b<-read.table("genesyb.exprs.afterimpute-CC.txt",header = T,sep = "\t")
c<-merge(a,b,by="genesymbol")
write.table(c,file="testing set-CC-logis 20201211.txt",sep = "\t",quote=F,row.names = F)

#hub genes identifications 
###==========================================================================
#  ANN
library(neuralnet)
train_dataset<-read.table("training set-logis 20201111.txt",header = T,sep = "\t")
train_dataset<-as.data.frame(train_dataset)
test_dataset<-read.table("testing set-logis 20201111.txt",header = T,sep = "\t")
test_dataset<-as.data.frame(test_dataset)
train_model<-neuralnet(outcome~ADIPOQ+ARHGAP39+ATAD3A+C1orf95+CWH43+INHBA+GRIK3+LYVE1+RDH12+SIGLEC11+SCNN1G,
                       data=train_dataset,hidden = 1)
model_results<-neuralnet::compute(train_model,test_dataset[1:11])
predicted_results<-as.numeric(model_results$net.result)
predicted_results1<-as.data.frame(predicted_results1)
test_dataset1<-as.data.frame(test_dataset$outcome)
a<-predicted_results1-test_dataset1
table(a)
date_roc2 <- roc(test_dataset$outcome, predicted_results)
rocobj1 <- plot.roc(date_roc2,main="AUC Curves", col="blue",
                    print.auc = TRUE,print.auc.x=0.2,print.auc.y=0.2,legacy.axes = T)
###==========================================================================


###==========================================================================
#hub genes identifications IN PC 
###==========================================================================
# ANN
library(neuralnet)
train_dataset<-read.table("training set-logis 20201111.txt",header = T,sep = "\t")
train_dataset<-as.data.frame(train_dataset)
test_dataset<-read.table("testing set-PC-logis 20201211.txt",header = T,sep = "\t")
test_dataset<-as.data.frame(test_dataset)
train_model<-neuralnet(outcome~ADIPOQ+ARHGAP39+ATAD3A+C1orf95+CWH43+GRIK3+INHBA+LYVE1+RDH12+SIGLEC11+SCNN1G,
                       data=train_dataset,hidden = 2)
model_results<-neuralnet::compute(train_model,test_dataset[1:11])
predicted_results<-as.numeric(model_results$net.result)
predicted_results1<-as.data.frame(predicted_results1)
test_dataset1<-as.data.frame(test_dataset$outcome)
a<-predicted_results1-test_dataset1
table(a)
date_roc2 <- roc(test_dataset$outcome, predicted_results)
rocobj1 <- plot.roc(date_roc2,main="AUC Curves", col="blue",
                    print.auc = TRUE,print.auc.x=0.2,print.auc.y=0.2,legacy.axes = T)
###==========================================================================


###==========================================================================
#hub genes identifications IN CC 
###==========================================================================
# ANN
library(neuralnet)
train_dataset<-read.table("training set-logis 20200923.txt",header = T,sep = "\t")
train_dataset<-as.data.frame(train_dataset)
test_dataset<-read.table("testing set-CC-logis 20200923.txt",header = T,sep = "\t")
test_dataset<-as.data.frame(test_dataset)
train_model<-neuralnet(outcome~ACADL+ADIPOQ+ARHGAP39+ATAD3A+C1orf95+CCKBR+GRIK3+LYVE1+SIGLEC11+SCNN1G+TXLNB,
                       data=train_dataset,hidden = 2)
model_results<-neuralnet::compute(train_model,test_dataset[1:11])
predicted_results<-as.numeric(model_results$net.result)
predicted_results1<-as.data.frame(predicted_results1)
test_dataset1<-as.data.frame(test_dataset$outcome)
a<-predicted_results1-test_dataset1
table(a)
date_roc2 <- roc(test_dataset$outcome, predicted_results)
rocobj1 <- plot.roc(date_roc2,main="AUC Curves", col="blue",
                    print.auc = TRUE,print.auc.x=0.2,print.auc.y=0.2,legacy.axes = T)
###==========================================================================

###==========================================================================
#GO analysis
###==========================================================================
#----------------------------------------------------------------------
source("http://www.bioconductor.org/biocLite.R")
library(GOplot)
setwd("~")
gene_blue<-read.table("GeneList symb_Blue.txt",header = T,sep = "\t")
diffExp<-read.table("limmaTab.txt",header = T,sep = "\t")
diffExp_blue<-merge(gene_blue,diffExp,by="genesymbol")
write.table(diffExp_blue,file="diffExp_blue.txt",sep = "\t",quote=F,row.names = F)

gene_brown<-read.table("GeneList symb_Black.txt",header = T,sep = "\t")
diffExp<-read.table("limmaTab.txt",header = T,sep = "\t")
diffExp_brown<-merge(gene_brown,diffExp,by="genesymbol")
write.table(diffExp_brown,file="diffExp_black.txt",sep = "\t",quote=F,row.names = F)

gene_brown<-read.table("GeneList symb_Greenyellow.txt",header = T,sep = "\t")
diffExp<-read.table("limmaTab.txt",header = T,sep = "\t")
diffExp_brown<-merge(gene_brown,diffExp,by="genesymbol")
write.table(diffExp_brown,file="diffExp_greenyellow.txt",sep = "\t",quote=F,row.names = F)

gene_brown<-read.table("GeneList symb_Turquoise.txt",header = T,sep = "\t")
diffExp<-read.table("limmaTab.txt",header = T,sep = "\t")
diffExp_brown<-merge(gene_brown,diffExp,by="genesymbol")
write.table(diffExp_brown,file="diffExp_turquoise.txt",sep = "\t",quote=F,row.names = F)

gene_brown<-read.table("GeneList symb_Salmon.txt",header = T,sep = "\t")
diffExp<-read.table("limmaTab.txt",header = T,sep = "\t")
diffExp_brown<-merge(gene_brown,diffExp,by="genesymbol")
write.table(diffExp_brown,file="diffExp_salmon.txt",sep = "\t",quote=F,row.names = F)

#----------------------------------------------------------------------
david<-read.table("blue GO BP_input.txt",header = T,sep = "\t")
david<-as.data.frame(david)
diffEXp<-read.table("diffExp_blue-1.txt",header = T,sep = "\t")
diffEXp<-as.data.frame(diffEXp)
circ <- circle_dat(david, diffEXp)
write.table(circ,file = "circ_blue.txt",sep = "\t",quote=F,row.names = F)

circ <- read.table("circ_blue.txt",header = T,sep = "\t")
GOBubble(circ,table.col = T, table.legend = F,labels = 17)
#----------------------------------------------------------------------

#----------------------------------------------------------------------
david<-read.table("black GO BP_input.txt",header = T,sep = "\t")
david<-as.data.frame(david)
diffEXp1<-read.table("diffExp_black-1.txt",header = T,sep = "\t")
diffEXp1<-as.data.frame(diffEXp1)
circ <- circle_dat(david, diffEXp1)
write.table(circ,file = "circ-black.txt",sep = "\t",quote=F,row.names = F)

circ <- read.table("circ-black.txt",header = T,sep = "\t")
GOBubble(circ,table.col = T, table.legend = F,labels = 2.8)
#----------------------------------------------------------------------

#----------------------------------------------------------------------
david<-read.table("greenyellow GO BP_input.txt",header = T,sep = "\t")
david<-as.data.frame(david)
diffEXp1<-read.table("diffExp_greenyellow-1.txt",header = T,sep = "\t")
diffEXp1<-as.data.frame(diffEXp1)
circ <- circle_dat(david, diffEXp1)
write.table(circ,file = "circ_greenyellow.txt",sep = "\t",quote=F,row.names = F)

circ <- read.table("circ_greenyellow 20200928.txt",header = T,sep = "\t")
GOBubble(circ,table.col = T, table.legend = F,labels = 14)
#----------------------------------------------------------------------

#----------------------------------------------------------------------
david<-read.table("turquoise GO BP_input.txt",header = T,sep = "\t")
david<-as.data.frame(david)
diffEXp1<-read.table("diffExp_turquoise-1.txt",header = T,sep = "\t")
diffEXp1<-as.data.frame(diffEXp1)
circ <- circle_dat(david, diffEXp1)
write.table(circ,file = "circ_turquoise.txt",sep = "\t",quote=F,row.names = F)

circ <- read.table("circ_turquoise 20200928.txt",header = T,sep = "\t")
GOBubble(circ,table.col = T, table.legend = F,labels = 5)
#----------------------------------------------------------------------

#----------------------------------------------------------------------
david<-read.table("salmon GO BP_input.txt",header = T,sep = "\t")
david<-as.data.frame(david)
diffEXp1<-read.table("diffExp_salmon-1.txt",header = T,sep = "\t")
diffEXp1<-as.data.frame(diffEXp1)
circ <- circle_dat(david, diffEXp1)
write.table(circ,file = "circ_salmon.txt",sep = "\t",quote=F,row.names = F)

circ <- read.table("circ_salmon 20201111.txt",header = T,sep = "\t")
GOBubble(circ,table.col = T, table.legend = F,labels = 8)
#----------------------------------------------------------------------

###*************************************************
###==========================================================================
#KEGG analysis
###==========================================================================
#----------------------------------------------------------------------
setwd("~")
library(ggplot2)
KEGG<-read.table("blue KEGG 20200928.txt",header = T,sep = "\t")
p<-ggplot(KEGG,aes(-log10(PValue),Term))
q<-p+geom_point(aes(size=Count,col=-log10(PValue)))
q+scale_colour_gradient(low="blue",high="red")

KEGG<-read.table("black KEGG 20200928.txt",header = T,sep = "\t")
p<-ggplot(KEGG,aes(-log10(PValue),Term))
q<-p+geom_point(aes(size=Count,col=-log10(PValue)))
q+scale_colour_gradient(low="blue",high="red")

KEGG<-read.table("greenyellow KEGG 20200928.txt",header = T,sep = "\t")
p<-ggplot(KEGG,aes(-log10(PValue),Term))
q<-p+geom_point(aes(size=Count,col=-log10(PValue)))
q+scale_colour_gradient(low="blue",high="red")

KEGG<-read.table("turquoise KEGG 20200928.txt",header = T,sep = "\t")
p<-ggplot(KEGG,aes(-log10(PValue),Term))
q<-p+geom_point(aes(size=Count,col=-log10(PValue)))
q+scale_colour_gradient(low="blue",high="red")

KEGG<-read.table("salmon KEGG 20201112.txt",header = T,sep = "\t")
p<-ggplot(KEGG,aes(-log10(PValue),Term))
q<-p+geom_point(aes(size=Count,col=-log10(PValue)))
q+scale_colour_gradient(low="blue",high="red")