1.Identification and Enrichment Analysis of DEGs (1) #install.packages("RobustRankAggreg") #install.packages("pheatmap") rm(list = ls()) library(RobustRankAggreg) library(pheatmap) logFCfilter=1 fdrFilter=0.05 setwd("C:/Users/hp/Desktop/Organized data diff") files=dir() files=grep(".all.txt", files, value=T) upList=list() downList=list() allFCList=list() for(inputFile in files){ rt=read.table(inputFile, header=T, sep="\t", check.names=F) rt=rt[order(rt$logFC),] header=unlist(strsplit(inputFile, "\\.")) downList[[header[1]]]=as.vector(rt[,1]) upList[[header[1]]]=rev(as.vector(rt[,1])) fcCol=rt[,1:2] colnames(fcCol)=c("Gene", header[[1]]) allFCList[[header[1]]]=fcCol } mergeLe=function(x,y){merge(x,y,by="Gene",all=T)} newTab=Reduce(mergeLe, allFCList) rownames(newTab)=newTab[,1] newTab=newTab[,2:ncol(newTab)] newTab[is.na(newTab)]=0 write.table(newTab, file="newTab.xls", sep="\t", quote=F, row.names=F) upMatrix=rankMatrix(upList) upAR=aggregateRanks(rmat=upMatrix) colnames(upAR)=c("Name", "pvalue") upAdj=p.adjust(upAR$pvalue, method="fdr") upXls=cbind(upAR, fdr=upAdj) upFC=newTab[as.vector(upXls[,1]),] upXls=cbind(upXls, logFC=rowMeans(upFC)) write.table(upXls, file="up.all.xls", sep="\t", quote=F, row.names=F) upSig=upXls[(upXls$pvaluelogFCfilter),] downMatrix=rankMatrix(downList) downAR=aggregateRanks(rmat=downMatrix) colnames(downAR)=c("Name", "pvalue") downAdj=p.adjust(downAR$pvalue, method="fdr") downXls=cbind(downAR, fdr=downAdj) downFC=newTab[as.vector(downXls[,1]),] downXls=cbind(downXls, logFC=rowMeans(downFC)) write.table(downXls, file="down.all.xls", sep="\t", quote=F, row.names=F) downSig=downXls[(downXls$pvalue1 & P.Value < 0.05 )), ] diffSigOut=rbind(id=colnames(diffSig),diffSig) write.table(diffSigOut, file="Batch.diff.txt", sep="\t", quote=F, col.names=F) diffGeneExp=data[row.names(diffSig),] diffGeneExpOut=rbind(id=paste0(colnames(diffGeneExp),"_",Type), diffGeneExp) write.table(diffGeneExpOut, file="diffGeneExp.txt", sep="\t", quote=F, col.names=F) geneNum=30 diffSig=diffSig[order(as.numeric(as.vector(diffSig$logFC))),] diffGeneName=as.vector(rownames(diffSig)) diffLength=length(diffGeneName) hmGene=c() if(diffLength>(2*geneNum)){ hmGene=diffGeneName[c(1:geneNum,(diffLength-geneNum+1):diffLength)] }else{ hmGene=diffGeneName } hmExp=data[hmGene,] names(Type)=colnames(data) Type=as.data.frame(Type) Type=cbind(Project, Type) pdf(file="Batch.heatmap.pdf", width=18, height=16) pheatmap(hmExp, annotation=Type, color = colorRampPalette(c("blue", "white", "red"))(50), cluster_cols =F, show_colnames = F, scale="row", fontsize = 8, fontsize_row=6, fontsize_col=8) dev.off() (5) #install.packages("VennDiagram") rm(list =ls()) library(VennDiagram) library(grid) library(futile.logger) setwd("C:/Users/hp/Desktop/veen") geneList=list() rt=read.table("Batch.diff.txt", header=T, sep="\t", check.names=F) geneNames=as.vector(rt[,1]) geneNames=gsub("^ | $","",geneNames) uniqGene=unique(geneNames) geneList[["DEG"]]=uniqGene rt=read.table("gene.txt", header=F, sep="\t", check.names=F) geneNames=as.vector(rt[,1]) geneNames=gsub("^ | $","",geneNames) uniqGene=unique(geneNames) geneList[["Autophagy"]]=uniqGene venn.plot=venn.diagram(geneList,filename=NULL,fill=c("cornflowerblue", "darkorchid1"),scaled=FALSE,cat.pos=c(-1,1),cat.col = c("cornflowerblue", "darkorchid1"),cat.cex = 1.2) pdf(file="venn.pdf", width=5, height=5) grid.draw(venn.plot) dev.off() interGenes=Reduce(intersect, geneList) write.table(interGenes, file="interGene.txt", sep="\t", quote=F, col.names=F, row.names=F) 2.Machine Learning Analysis to Identify Core Genes (1)SVM-REF rm(list =ls()) setwd("C:/Users/hp/Desktop/1.SVM-REF") library(tidyverse) library(glmnet) source('msvmRFE.R') library(VennDiagram) library(sigFeature) library(e1071) library(caret) library(randomForest) library(limma) inputFile="merge.normalzie.txt" rt=read.table(inputFile, header=T, sep="\t", check.names=F) rt=as.matrix(rt) rownames(rt)=rt[,1] exp=rt[,2:ncol(rt)] dimnames=list(rownames(exp),colnames(exp)) data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames) data=avereps(data) data=t(data) data=data[,read.table("interGene.txt", header=F, sep="\t", check.names=F)[,1]] sample=read.table("1_1.txt",sep="\t",header=F,check.names=F,row.names = 1) data=data[rownames(sample),] afcon=as.matrix(table(sample[,1]))[1,1] afcon=as.vector(afcon) group=c(rep("0",afcon),rep("1",nrow(data)-afcon)) group=as.matrix(as.numeric(group)) rownames(group)=rownames(data) colnames(group)="Type" input <- as.data.frame(cbind(group,data)) input$Type=as.factor(input$Type) svmRFE(input, k = 10, halve.above = 100) nfold = 10 nrows = nrow(input) folds = rep(1:nfold, len=nrows)[sample(nrows)] folds = lapply(1:nfold, function(x) which(folds == x)) results = lapply(folds, svmRFE.wrap, input, k=10, halve.above=100) top.features = WriteFeatures(results, input, save=F) head(top.features) featsweep = lapply(1:15, FeatSweep.wrap, results, input) no.info = min(prop.table(table(input[,1]))) errors = sapply(featsweep, function(x) ifelse(is.null(x), NA, x$error)) #dev.new(width=4, height=4, bg='white') pdf("svm-error.pdf",width = 8,height = 8) PlotErrors(errors, no.info=no.info) dev.off() pdf("svm-accuracy.pdf",width = 8,height = 8) Plotaccuracy(1-errors,no.info=no.info) dev.off() which.min(errors) (2)lasso library(survival) library(glmnet) library(ggplot2) library(ggsci) library(patchwork) library(limma) inputFile="merge.normalzie.txt" setwd("C:/Users/hp/Desktop/lasso") rt=read.table(inputFile, header=T, sep="\t", check.names=F) rt=as.matrix(rt) rownames(rt)=rt[,1] exp=rt[,2:ncol(rt)] dimnames=list(rownames(exp),colnames(exp)) data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames) data=avereps(data) data=t(data) data=data[,read.table("interGene.txt", header=F, sep="\t", check.names=F)[,1]] sample=read.table("1_1.txt",sep="\t",header=F,check.names=F,row.names = 1) data=data[rownames(sample),] x=as.matrix(data) afcon=as.matrix(table(sample[,1]))[1,1] afcon=as.vector(afcon) group=c(rep("0",afcon),rep("1",nrow(data)-afcon)) group=as.matrix(group) rownames(group)=rownames(data) y=as.matrix(group[,1]) set.seed(123) cvfit = cv.glmnet(x, y,family = "binomial", nlambda=100, alpha=1,nfolds = 10) fit <- glmnet(x,y,family = "binomial") cvfit$lambda.min coef <- coef(fit, s = cvfit$lambda.min) index <- which(coef != 0) actCoef <- coef[index] lassoGene=row.names(coef)[index] geneCoef=cbind(Gene=lassoGene, Coef=actCoef) write.table(geneCoef, file="geneCoef.xls", sep="\t", quote=F, row.names=F) write.table(file="lassoset.txt",lassoGene,sep="\t",quote=F,col.names=F,row.names=F) (3)RF rm(list =ls()) library(randomForest) library(limma) library(ggpubr) set.seed(123) inputFile="merge.normalzie.txt" setwd("C:/Users/hp/Desktop/randomForest") rt=read.table(inputFile, header=T, sep="\t", check.names=F) rt=as.matrix(rt) rownames(rt)=rt[,1] exp=rt[,2:ncol(rt)] dimnames=list(rownames(exp),colnames(exp)) data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames) data=avereps(data) data=t(data) data=data[,read.table("interGene.txt", header=F, sep="\t", check.names=F)[,1]] sample=read.table("1_1.txt",sep="\t",header=F,check.names=F,row.names = 1) data=data[rownames(sample),] colnames(data)=gsub("-", "afaf", colnames(data)) afcon=as.matrix(table(sample[,1]))[1,1] afcon=as.vector(afcon) group=c(rep("con",afcon),rep("treat",nrow(data)-afcon)) rf=randomForest(as.factor(group)~., data=data, ntree=500) pdf(file="forest.pdf", width=6, height=6) plot(rf, main="Random forest", lwd=2) dev.off() optionTrees=which.min(rf$err.rate[,1]) optionTrees rf2=randomForest(as.factor(group)~., data=data, ntree=21) #ae3=rf["err.rate"] #ae3=as.data.frame(ae3) importance=importance(x=rf2) importance=as.data.frame(importance) importance$size=rownames(importance) importance=importance[,c(2,1)] names(importance)=c("Gene","importance") af=importance[1:10,] p=ggdotchart(af, x = "Gene", y = "importance", color = "importance", # Custom color palette sorting = "descending", # Sort value in descending order add = "segments", # Add segments from y = 0 to dots add.params = list(color = "lightgray", size = 2), # Change segment color and size dot.size = 6, # Add mpg values as dot labels font.label = list(color = "white", size = 9, vjust = 0.5), # Adjust label parameters ggtheme = theme_bw() , # ggplot2 theme rotate=TRUE p1=p+ geom_hline(yintercept = 0, linetype = 2, color = "lightgray")+ gradient_color(palette =c(ggsci::pal_npg()(2)[2],ggsci::pal_npg()(2)[1]) grids() pdf(file="importance.pdf", width=6, height=6) print(p1) dev.off() rfGenes=importance[order(importance[,"importance"], decreasing = TRUE),] write.table(rfGenes, file="rfGenes.xls", sep="\t", quote=F, col.names=T, row.names=F) (4)ROC #install.packages("glmnet") #install.packages("pROC") rm(list =ls()) library(glmnet) library(pROC) expFile="GSE61739_normalize.txt" geneFile="61739.txt" setwd("C:/Users/hp/Desktop/ROC") rt=read.table(expFile, header=T, sep="\t", check.names=F, row.names=1) y=gsub("(.*)\\_(.*)", "\\2", colnames(rt)) y=ifelse(y=="Control", 0, 1) geneRT=read.table(geneFile, header=F, sep="\t", check.names=F) bioCol=rainbow(nrow(geneRT), s=0.9, v=0.9) aucText=c() k=0 for(x in as.vector(geneRT[,1])){ k=k+1 roc1=roc(y, as.numeric(rt[x,])) if(k==1){ pdf(file="ROC.genes.pdf", width=5, height=4.75) plot(roc1, print.auc=F, col=bioCol[k], legacy.axes=T, main="") aucText=c(aucText, paste0(x,", AUC=",sprintf("%.3f",roc1$auc[1]))) }else{ plot(roc1, print.auc=F, col=bioCol[k], legacy.axes=T, main="", add=TRUE) aucText=c(aucText, paste0(x,", AUC=",sprintf("%.3f",roc1$auc[1]))) } } legend("bottomright", aucText, lwd=2, bty="n", col=bioCol[1:(ncol(rt)-1)]) dev.off() rt=rt[as.vector(geneRT[,1]),] rt=as.data.frame(t(rt)) logit=glm(y ~ ., family=binomial(link='logit'), data=rt) pred=predict(logit, newx=rt) roc1=roc(y, as.numeric(pred)) ci1=ci.auc(roc1, method="bootstrap") ciVec=as.numeric(ci1) pdf(file="ROC.model.pdf", width=5, height=4.75) plot(roc1, print.auc=TRUE, col="red", legacy.axes=T, main="Model") text(0.39, 0.43, paste0("95% CI: ",sprintf("%.03f",ciVec[1]),"-",sprintf("%.03f",ciVec[3])), col="red") dev.off() aucText1=as.data.frame(aucText) write.table(aucText1, file="AUC.txt", sep="\t", quote=F, row.names=F) 3.Establishment and Evaluation of a Nomogram Model for Diagnosing Key Genes and Assessing Efficacy (1) rm(list =ls()) library(dplyr) library(pROC) library(ggplot2) library(survival) library(regplot) library(rms) library(ggsci) library(survminer) library(timeROC) library(ggDCA) library(limma) setwd("C:/Users/hp/Desktop/Nomogram") inputFile="merge.normalzie.txt" hub="LASSO1.txt" rt=read.table(inputFile, header=T, sep="\t", check.names=F) rt=as.matrix(rt) rownames(rt)=rt[,1] exp=rt[,2:ncol(rt)] dimnames=list(rownames(exp),colnames(exp)) data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames) data=avereps(data) data=t(data) sample=read.table("1_1.txt",sep="\t",header=F,check.names=F) colnames(sample)=c("ID","Type") data=data[sample$ID,] aSAH1=data[,read.table(hub, header=F, sep="\t", check.names=F)[,1]] aSAH=cbind(sample,aSAH1) aflist=roc(Type~MALAT1+ITM2A+IL18+ASF1A, data = aSAH) g3 <- ggroc(aflist, size = 1.2,alpha=.6,) g5=g3+ggsci::scale_color_lancet() print(g5) dd=datadist(aSAH) options(datadist="dd") fit <- lrm(formula = Type ~ MALAT1+ITM2A+IL18+ASF1A, data =aSAH) print(fit) coef=as.data.frame(fit$coefficients)[-1,,drop=F] coefout=cbind(ID=rownames(coef),coef) write.table(coefout,file="coefficients.txt",sep="\t",quote=F,row.names = F) pdf(file="nomogram.pdf", width=9, height=7.5) plot(nomogram(fit,fun.at = seq(0.05,0.95,0.05)),funlabel = "nomogram model") dev.off() plot(regplot(fit,plots=c("density","boxes"), observation=T, title="Prediction Nomogram", clickable=T, points=TRUE,droplines=TRUE)) nomoscore=predict(fit, data=t(aSAH)) aSAH$nomoscore=nomoscore write.table(aSAH,file="nomoscore.txt",sep="\t",quote=F,row.names = F) 4.Immune Cell Infiltration Analysis (1) #install.packages('e1071') #if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") #BiocManager::install("preprocessCore") rm(list =ls()) inputFile="normalzie.txt" setwd("C:/Users/hp/Desktop/CIBERSORT") source("geoCRG11.CIBERSORT.R") outTab=CIBERSORT("ref.txt", inputFile, perm=1000, QN=T) outTab1=outTab[outTab] outTab1=as.matrix(outTab[,1:(ncol(outTab)-3)]) outTab2=rbind(id=colnames(outTab1),outTab1) write.table(outTab2, file="CIBERSORT-Results.txt", sep="\t", quote=F, col.names=F) outTab=outTab[outTab[,"P-value"]<0.05,] outTab=as.matrix(outTab[,1:(ncol(outTab)-3)]) outTab=rbind(id=colnames(outTab),outTab) write.table(outTab, file="CIBERSORT-Results.txt", sep="\t", quote=F, col.names=F) write.table(outTab, file="CIBERSORT-Results1.txt", sep="\t", quote=F, col.names=F) (2) #install.packages("vioplot") library(vioplot) inputFile="CIBERSORT-Results1.txt" setwd("C:/Users/hp/Desktop/vioplot") rt=read.table(inputFile, header=T, sep="\t", check.names=F, row.names=1) con=grepl("_Control", rownames(rt), ignore.case=T) treat=grepl("_Treat", rownames(rt), ignore.case=T) conData=rt[con,] treatData=rt[treat,] conNum=nrow(conData) treatNum=nrow(treatData) rt=rbind(conData,treatData) outTab=data.frame() pdf(file="vioplot.pdf", width=13, height=8) par(las=1,mar=c(10,6,3,3)) x=c(1:ncol(rt)) y=c(1:ncol(rt)) plot(x, y, xlim=c(0,63), ylim=c(min(rt), max(rt)+0.05), xlab="", ylab="Fraction", main="", pch=21, col="white", xaxt="n") for(i in 1:ncol(rt)){ if(sd(rt[1:conNum,i])==0){ rt[1,i]=0.00001 } if(sd(rt[(conNum+1):(conNum+treatNum),i])==0){ rt[(conNum+1),i]=0.00001 } conData=rt[1:conNum,i] treatData=rt[(conNum+1):(conNum+treatNum),i] vioplot(conData,at=3*(i-1),lty=1,add = T,col = 'blue') vioplot(treatData,at=3*(i-1)+1,lty=1,add = T,col = 'red') wilcoxTest=wilcox.test(conData,treatData) p=wilcoxTest$p.value if(p<0.05){ cellPvalue=cbind(Cell=colnames(rt)[i],pvalue=p) outTab=rbind(outTab,cellPvalue) } mx=max(c(conData,treatData)) lines(c(x=3*(i-1)+0.2,x=3*(i-1)+0.8),c(mx,mx)) text(x=3*(i-1)+0.5, y=mx+0.02, labels=ifelse(p<0.001, paste0("p<0.001"), paste0("p=",sprintf("%.03f",p))), cex = 0.8) } legend("topright", c("Control", "Treat"), lwd=3,bty="n",cex=1, col=c("blue","red")) text(seq(1,64,3),-0.04,xpd = NA,labels=colnames(rt),cex = 1,srt = 45,pos=2) dev.off() write.table(outTab,file="immuneDiff.txt",sep="\t",row.names=F,quote=F) 5.Specific Signaling Pathways Associated with FIZ1 and FBXO21 #if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") #BiocManager::install("limma") #BiocManager::install("GSEABase") #BiocManager::install("GSVA") #install.packages("ggpubr") library(reshape2) library(ggpubr) library(limma) library(GSEABase) library(GSVA) gene="TMPO" expFile="merge.normalzie.txt" gmtFile="c2.cp.kegg.symbols.gmt" setwd("C:/Users/hp/Desktop/GSVA") rt=read.table(expFile, header=T, sep="\t", check.names=F) rt=as.matrix(rt) rownames(rt)=rt[,1] exp=rt[,2:ncol(rt)] dimnames=list(rownames(exp),colnames(exp)) data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames) data=avereps(data) group=gsub("(.*)\\_(.*)", "\\2", colnames(data)) data=data[,group=="Treat",drop=F] geneSets=getGmt(gmtFile, geneIdType=SymbolIdentifier()) ssgseaScore=gsva(data, geneSets, method='ssgsea', kcdf='Gaussian', abs.ranking=TRUE) normalize=function(x){ return((x-min(x))/(max(x)-min(x)))} ssgseaScore=normalize(ssgseaScore) ssgseaScore=ssgseaScore[order(apply(ssgseaScore,1,sd),decreasing=T),] ssgseaScore=ssgseaScore[1:50,] lowName=colnames(data)[data[gene,]=median(data[gene,])] lowScore=ssgseaScore[,lowName] highScore=ssgseaScore[,highName] data=cbind(lowScore, highScore) conNum=ncol(lowScore) treatNum=ncol(highScore) Type=c(rep("Control",conNum), rep("Treat",treatNum)) outTab=data.frame() for(i in row.names(data)){ test=t.test(data[i,] ~ Type) pvalue=test$p.value t=test$statistic Sig=ifelse(pvalue>0.05, "Not", ifelse(t>0,"Up","Down")) outTab=rbind(outTab, cbind(Pathway=i, t, pvalue, Sig)) } pdf(file="barplot.pdf", width=12, height=9) outTab$t=as.numeric(outTab$t) outTab$Sig=factor(outTab$Sig, levels=c("Down", "Not", "Up")) gg1=ggbarplot(outTab, x="Pathway", y="t", fill = "Sig", color = "white", palette=c("green3","grey","red3"), sort.val = "asc", sort.by.groups = T, rotate=TRUE, legend="right", title=gene, xlab="Term", ylab="t value of GSVA score", legend.title="Group", x.text.angle=60) print(gg1) dev.off()