library(data.table) ########################################Sample grouping################################### expr <- fread("geneMatrix.txt",sep = "\t",header = T,stringsAsFactors = F,check.names = F) # use fast read expr[1:3,1:3] expr <- as.data.frame(expr) sampleID <- read.table("sample.txt",sep = "\t",check.names = F,stringsAsFactors = F,header = T,row.names = NULL) com_sam <- intersect(sampleID$SampleID,colnames(expr)) out.expr <- expr[,c("geneNames",com_sam)] out.expr[1:3,1:3] write.table(out.expr,file = "out.expr.txt",sep = "\t",row.names = F,quote = F) #####Identification of DEGs ################################### logFoldChange=0.585 P=0.05 library(limma) rt=read.table("out.expr.txt",sep="\t",header=T,check.names=F) rt=as.matrix(rt) rownames(rt)=rt[,1] exp=rt[,2:ncol(rt)] dimnames=list(rownames(exp),colnames(exp)) rt=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames) rt=avereps(rt) rt=normalizeBetweenArrays(as.matrix(rt)) modType=c(rep("con",8),rep("treat",24)) design <- model.matrix(~0+factor(modType)) colnames(design) <- c("con","treat") fit <- lmFit(rt,design) cont.matrix<-makeContrasts(treat-con,levels=design) fit2 <- contrasts.fit(fit, cont.matrix) fit2 <- eBayes(fit2) allDiff=topTable(fit2,adjust='fdr',number=200000) write.table(allDiff,file="limmaTab.xls",sep="\t",quote=F) #write table diffSig <- allDiff[with(allDiff, (abs(logFC)>logFoldChange &P.Value < adjustP )), ] write.table(diffSig,file="diff.txt",sep="\t",quote=F) diffUp <- allDiff[with(allDiff, (logFC>logFoldChange & P.Value < adjustP )), ] write.table(diffUp,file="up.txt",sep="\t",quote=F) diffDown <- allDiff[with(allDiff, (logFC<(-logFoldChange) & P.Value < adjustP )), ] write.table(diffDown,file="down.txt",sep="\t",quote=F) hmExp=rt[rownames(diffSig),] diffExp=rbind(id=colnames(hmExp),hmExp) write.table(diffExp,file="heatmap.txt",sep="\t",quote=F,col.names=F) tiff(file="vol.tiff", width = 12, height =12, units ="cm", compression="lzw", bg="white", res=600) xMax=max(abs(allDiff$logFC)) yMax=max(-log10(allDiff$P.Value)) plot(allDiff$logFC, -log10(allDiff$P.Value), xlab="log2FC",ylab="-log10(P.Value)", main="Volcano",xlim=c(-xMax,xMax),ylim=c(0,yMax),yaxs="i",pch=20, cex=0.8) diffSub=subset(allDiff, P.ValuelogFoldChange) points(diffSub$logFC, -log10(diffSub$P.Value), pch=20, col="palevioletred1",cex=0.8) diffSub=subset(allDiff, P.Value 1) { tmp <- as.matrix(tpms[tum.sam,rownames(lasso_coef.hr)]) risk.score <- apply(tmp,1,function(x) {x %*% lasso_coef.hr$coef}) risk.score.train <- risk.score tmp <- tpms[names(risk.score),1:2] tmp$risk.score <- as.numeric(risk.score) tmp$RiskGroup <- ifelse(tmp$risk.score > median(risk.score) ,"HRisk","LRisk") risk <- rbind.data.frame(risk, data.frame(samID = tum.sam, riskscore = tmp$risk.score, riskgroup = tmp$RiskGroup, cohort = "GEO training", stringsAsFactors = F), stringsAsFactors = F) fit <- glm(Tissue ~ risk.score, data=tmp, family = "gaussian") predicted <- predict(fit, tmp, type = "response") rocobj <- roc(tmp$Tissue,tmp$risk.score) auc <- round(auc(tmp$Tissue,tmp$risk.score), 4) tmp.validate <- as.matrix(geo1.tpms[,rownames(lasso_coef.hr)]) risk.score <- apply(tmp.validate,1,function(x) {x %*% lasso_coef.hr$coef}) risk.score.validate <- risk.score tmp.validate <- geo1.tpms[names(risk.score),1:2] tmp.validate$risk.score <- as.numeric(risk.score) tmp.validate$RiskGroup <- ifelse(tmp.validate$risk.score > median(risk.score) ,"HRisk","LRisk") risk <- rbind.data.frame(risk, data.frame(samID = rownames(geo1.tpms), riskscore = tmp.validate$risk.score, riskgroup = tmp.validate$RiskGroup, cohort = "GEO test", stringsAsFactors = F), stringsAsFactors = F) predicted.test <- predict(fit, tmp.validate, type = "response") rocobj.test <- roc(tmp.validate$Tissue,tmp.validate$risk.score) auc.test <- round(auc(tmp.validate$Tissue,tmp.validate$risk.score), 4) if(auc > 0.5 & auc.test > 0.5) { cat(paste0("seed=",seed,"; auc.train=",auc,"; auc.test=",auc.test,"\n")) cat("\n") outTab <- rbind.data.frame(outTab,data.frame(seed=seed, modelgene.num=nrow(lasso_coef.hr), auc.rain=auc, auc.test=auc.test, modelgene=paste(gsub("-","_",rownames(lasso_coef.hr)),collapse = ","), stringsAsFactors = F), stringsAsFactors = F) p1 <- ggroc(rocobj,color = "red",linetype = 1, size = 1, alpha =1,legacy.axes = T) + geom_abline(intercept=0,slope=1,color="grey",size=1,linetype=1) + labs(x="Specificity", y="Sensivity", title = "GEO train") + annotate("text",x=.8,y=.1,label=paste0("AUC = ",auc), size =5,family="serif") + coord_cartesian(xlim=c(0,1),ylim=c(0,1)) + theme_bw() + theme(panel.background = element_rect(fill='transparent'), axis.ticks.length = unit(0.4,"lines"), axis.ticks = element_line(color='black'), axis.line = element_line(size=.5, colour='black'), axis.title = element_text(colour='black',size=12,face="bold"), axis.text = element_text(colour='black',size=10,face="bold"), text = element_text(colour='black',size=8,family="serif")) p2 <- ggroc(rocobj.test,color = "red",linetype = 1, size = 1, alpha =1,legacy.axes = T) + geom_abline(intercept=0,slope=1,color="grey",size=1,linetype=1) + labs(x="Specificity", y="Sensivity", title = "GEO test") + annotate("text",x=.8,y=.1,label=paste0("AUC = ",auc.test), size =5,family="serif") + coord_cartesian(xlim=c(0,1),ylim=c(0,1)) + theme_bw() + theme(panel.background = element_rect(fill='transparent'), axis.ticks.length = unit(0.4,"lines"), axis.ticks = element_line(color='black'), axis.line = element_line(size=.5, colour='black'), axis.title = element_text(colour='black',size=12,face="bold"), axis.text = element_text(colour='black',size=10,face="bold"), text = element_text(colour='black',size=8,family="serif")) pdf(file = paste0("survivalROC for training dataset",seed,".pdf"),width = 4,height = 4) print(p1) dev.off() pdf(file = paste0("survivalROC for validation dataset",seed,".pdf"),width = 4,height = 4) print(p2) dev.off() } } write.table(outTab,paste0("outTab",seed,".txt"),sep = "\t",row.names = F,quote = F) } write.table(outTab,"outTab.txt",sep = "\t",row.names = F,quote = F) write.table(risk,"risk.txt",sep = "\t",row.names = F,quote = F) darkred <- "#F2042C" darkblue <- "#21498D" cutoff <- 0.1 lasso_coef.hr$gene <- gsub("_","-",lasso_coef.hr$gene) lasso_coef.hr$group <- as.character(cut(lasso_coef.hr$coef, breaks = c(-Inf, -cutoff, cutoff, Inf),labels = c("#21498D","#EABF00","#21498D"))) pdf("lasso_coef_hr.pdf",width = 5,height = 2.5) par(bty="n", mgp = c(1.7,.33,0),mar=c(2.5,2.7,1,1)+.1, las=1, tcl=-.25,xpd = T) a <- barplot(lasso_coef.hr$coef,col = lasso_coef.hr$group,border = NA, horiz = T,xlim = c(-1,1),add=F,xaxt = "n") axis(side = 1, at = c(-1,-0.8,-0.6,-0.4,-0.2,0,0.2,0.4,0.6,0.8,1), labels = c("-1","-.8","-.6","-.4","-.2","0",".2",".4",".6",".8","1")) for (i in 1:nrow(lasso_coef.hr)) { text(y = a[,1][i],x = ifelse(lasso_coef.hr$coef[i] > 0,-0.0001,0.0001),pos = ifelse(lasso_coef.hr$coef[i] > 0,2,4),labels = lasso_coef.hr$gene[i],adj = ifelse(lasso_coef.hr$coef[i]>0,0,1)) } points(0.6,1,pch = 19, cex = 1.5) text(0.6,1,"Coefficient",pos = 4) invisible(dev.off()) write.table(lasso_coef.hr[,1:2], "lasso coefficient.txt",sep = "\t",row.names = F,col.names = T,quote = F) pdf("lasso.pdf",width = 4.5,height = 4) par(bty="o", mgp = c(1.9,.33,0), mar=c(4.1,4.1,2.1,2.1)+.1, las=1, tcl=-.25,xpd = F) plot(cvfit$glmnet.fit, "lambda", label=F) abline(v=log(cvfit$lambda.min),lwd=2,col="grey60",lty=4) invisible(dev.off()) pdf("cvfit.pdf",width = 4.5,height = 4) par(bty="o", mgp = c(1.9,.33,0), mar=c(4.1,4.1,2.1,2.1)+.1, las=1, tcl=-.25) plot(cvfit) abline(h=min(cvfit$cvm),lwd=2,col="black",lty=4) points(log(cvfit$lambda.min),min(cvfit$cvm),pch=18,cex=2,col="black") points(log(cvfit$lambda.min),min(cvfit$cvm),pch=18,cex=1.5,col="#008B8A") invisible(dev.off()) save.image("Heng.RData") ##############Correlation analysis of key genes and immune factors########################################## library(ggplot2) library(stringr) a_1 <- read.table("symbol.txt",header = T,row.names = 1,sep = "\t", quote = "",fill = T,check.names=F) dim(a_1) head(a_1[,1:3]) a_2 <- as.data.frame(t(a_1)) dim(a_2) head(a_2[,1:3]) a_3 <- a_1 a_3$Id <- rownames(a_3) dim(a_3) head(a_3[,1:3]) b_1 <- read.table("Immunomodulator_and_chemokines.txt",header = T,sep = "\t", quote = "",fill = T) dim(b_1) head(b_1) #b_2 <- b_1[b_1$type == "Immunoinhibitor",] b_2 <- b_1[b_1$type == "chemokine",] dim(b_2) head(b_2) data1 <- dplyr::inner_join(b_2,a_3,by="Id") dim(data1) head(data1[,1:6]) data2 <- a_2[,c("C12orf54","FOS","GPR1","OR9A4","MYO5B","RAB39B","KLHL4",data1$Id)] dim(data2) head(data2[,1:5]) library(Hmisc) CorMatrix <- function(cor,p) { ut <- upper.tri(cor) data.frame(row = rownames(cor)[row(cor)[ut]] , column = rownames(cor)[col(cor)[ut]], cor =(cor)[ut], p = p[ut] ) } res <- rcorr(as.matrix(data2),type = "pearson") result_1 <- CorMatrix(res$r, res$P) head(result_1) dim(result_1) result_2 <- result_1[result_1$row == "C12orf54" |result_1$row == "FOS"|result_1$row == "GPR1"|result_1$row == "OR9A4"|result_1$row == "MYO5B"|result_1$row == "RAB39B"|result_1$row == "KLHL4",] dim(result_2) head(b_2) b_2$column <- b_2$Id head(b_2) result_3 <- dplyr::inner_join(result_2,b_2,by="column") dim(result_3) result1 <- result_3[,1:4] head(result1) dim(result1) result1$Regulation <- result1$cor result1[,5][result1[,5] > 0] <- c("postive") result1[,5][result1[,5] < 0] <- c("negative") head(result1) colnames(result1) <- c("gene", "immuneGene", "cor", "pvalue", "Regulation") write.table(result1,file="chemokine.xls",sep="\t",quote=F,col.names=T,row.names = F) a1 <- read.table("chemokine.xls",header = T,sep = "\t", quote = "",fill = T) head(a1) data2 <- a1 library(ggpubr) data2$pvalue <- ifelse(data2$pvalue < 0.05, ifelse(data2$pvalue < 0.01,"**","*"), "") data2$pvalue[1:20] data2$type <- data2$cor summary(data2) data3 <- data2[order(data2$immuneGene,data2$cor),] head(data3) dim(data3) data4 <- data3[data3$pvalue < 0.05,] dim(data4) summary(data4) p <- ggplot(data4,aes(x=gene,y=immuneGene)) + geom_point(aes(colour = cor, size=pvalue)) + labs(x="",y="chemokine") p <- p + scale_colour_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab", name="Pearson\nCorrelation") p <- p + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.text=element_text(size = 15)) + theme(axis.text.x=element_text(colour = "black",angle=0,hjust=0.5,size = 15)) + theme(axis.text.y=element_text(colour = "black", vjust=0,size = 15)) + theme(axis.title =element_text(size = 20)) + theme(text = element_text(size = 15)) p+rotate_x_text(45) ggsave("chemokine.pdf") ###############Visualization of significant relationship pairs############################### library(ggplot2) library(ggExtra) rt=read.table("symbol.txt",sep="\t",header=T,check.names=F,row.names = 1) dat<-as.data.frame(t(rt)) corr_eqn <- function(x,y,digits=3) { test <- cor.test(x,y,type="pearson") paste(paste0("n = ",length(x)), paste0("r = ",round(test$estimate,digits),"(Pearson)"), paste0("p.value= ",round(test$p.value,digits)), sep = ", ") } gene<-as.numeric(dat$IL6) imucell<-dat$CBS corr_eqn(gene,imucell) gg<-ggplot(dat, aes(x=gene, y=imucell)) + geom_point(color = "black") + geom_smooth(method="loess", se=F,color="blue") + labs( y="CCL15", x="CGREF1", title="Scatterplot")+ labs(title = paste0(corr_eqn(gene,imucell)))+ theme_bw() gg gg2 <- ggMarginal(gg, type="density") gg2 <- ggMarginal(gg, type="density",xparams = list(fill ="orange"), yparams = list(fill ="skyblue")) #####################Visualization of GSEA pathways############################################## files=grep(".tsv",dir(),value=T) data = lapply(files,read.delim) names(data) = files dataSet = ldply(data, data.frame) dataSet$pathway = gsub(".tsv","",dataSet$.id) gseaCol=c("#58CDD9","#7A142C","#5D90BA","#431A3D","#91612D","#6E568C","#E0367A","#D8D155","#64495D","#7CC767","#223D6C","#D20A13","#FFD121","#088247","#11AA4D") pGsea=ggplot(dataSet,aes(x=RANK.IN.GENE.LIST,y=RUNNING.ES,colour=pathway,group=pathway))+ geom_line(size = 1.5) + scale_color_manual(values = gseaCol[1:nrow(dataSet)]) + labs(x = "", y = "Enrichment Score", title = "") + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0),limits = c(min(dataSet$RUNNING.ES - 0.02), max(dataSet$RUNNING.ES + 0.02))) + theme_bw() + theme(panel.grid = element_blank()) + theme(panel.border = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.line.x = element_blank(),axis.ticks.x = element_blank(),axis.text.x = element_blank()) + geom_hline(yintercept = 0) + theme(legend.position = c(0,0),legend.justification = c(0,0)) + #legendע?͵?λֵ guides(colour = guide_legend(title = NULL)) + theme(legend.background = element_blank()) + theme(legend.key = element_blank())+theme(legend.key.size=unit(0.5,'cm')) pGene=ggplot(dataSet,aes(RANK.IN.GENE.LIST,pathway,colour=pathway))+geom_tile()+ scale_color_manual(values = gseaCol[1:nrow(dataSet)]) + labs(x = "high expression<----------->low expression", y = "", title = "") + scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0)) + theme_bw() + theme(panel.grid = element_blank()) + theme(panel.border = element_blank()) + theme(axis.line = element_line(colour = "black"))+ theme(axis.line.y = element_blank(),axis.ticks.y = element_blank(),axis.text.y = element_blank())+ guides(color=FALSE) gGsea = ggplot_gtable(ggplot_build(pGsea)) gGene = ggplot_gtable(ggplot_build(pGene)) maxWidth = grid::unit.pmax(gGsea$widths, gGene$widths) gGsea$widths = as.list(maxWidth) gGene$widths = as.list(maxWidth) dev.off() pdf('multipleGSEA.pdf', width=7, height=5.5) par(mar=c(5,5,2,5)) grid.arrange(arrangeGrob(gGsea,gGene,nrow=2,heights=c(.8,.3))) dev.off() ###################The correlation between key genes and OSA disease regulatory genes####################################### library(ggpubr) pFilter=0.99 rt=read.table("ARGexp.txt",sep="\t",header=T,row.names=1,check.names=F) #??ȡ?ļ? rt=log2(rt+1) data=rt Type=read.table("cluster.Immunity.txt",sep="\t",check.names=F,row.names=1,header=F) Type=Type[row.names(data),] colnames(Type)=c("cluster","Subtype") outTab=data.frame() data=cbind(data,Type) for(i in colnames(data[,1:(ncol(data)-2)])){ rt1=data[,c(i,"Subtype")] colnames(rt1)=c("expression","Subtype") ksTest<-kruskal.test(expression ~ Subtype, data = rt1) pValue=ksTest$p.value if(pValue% filter(Subtype %in% c("Normal","OSA")) %>% ggplot(aes(x= gene, y= expression, fill = Subtype, color = Subtype))+ geom_boxplot(alpha=0.3)+ scale_fill_manual(name= "Subtype", values = c("deepskyblue", "hotpink"))+ scale_color_manual(name = "Subtype", values = c("dodgerblue", "plum3"))+ theme_bw()+labs(x="", y="Expression")+ theme(axis.text.x = element_text( vjust = 1,size = 12, hjust = 1,colour = "black"),legend.position="top")+ rotate_x_text(45)+stat_compare_means(aes(group=Subtype),symnum.args=list(cutpoints = c(0, 0.001, 0.01, 0.05, 1), symbols = c("***", "**", "*", "ns")),label = "p.signif",method="anova") library(ggplot2) library(stringr) a_1 <- read.table("symbol.txt",header = T,row.names = 1,sep = "\t", quote = "",fill = T,check.names=F) dim(a_1) head(a_1[,1:3]) a_2 <- as.data.frame(t(a_1)) dim(a_2) head(a_2[,1:3]) a_3 <- a_1 a_3$Id <- rownames(a_3) dim(a_3) head(a_3[,1:3]) b_1 <- read.table("111.txt",header = T,sep = "\t", quote = "",fill = T) dim(b_1) head(b_1) #b_2 <- b_1[b_1$type == "Immunoinhibitor",] b_2 <- b_1[b_1$type == "Disease",] dim(b_2) head(b_2) data1 <- dplyr::inner_join(b_2,a_3,by="Id") dim(data1) head(data1[,1:6]) data2 <- a_2[,c("C12orf54", "FOS","GPR1","OR9A4","MYO5B","RAB39B","KLHL4",data1$Id)] dim(data2) head(data2[,1:5]) library(Hmisc) CorMatrix <- function(cor,p) { ut <- upper.tri(cor) data.frame(row = rownames(cor)[row(cor)[ut]] , column = rownames(cor)[col(cor)[ut]], cor =(cor)[ut], p = p[ut] ) } res <- rcorr(as.matrix(data2),type = "pearson") result_1 <- CorMatrix(res$r, res$P) head(result_1) dim(result_1) result_2 <- result_1[result_1$row == "C12orf54" |result_1$row == "FOS" |result_1$row == "GPR1"|result_1$row == "OR9A4"|result_1$row == "MYO5B"|result_1$row == "RAB39B"|result_1$row == "KLHL4",] dim(result_2) head(b_2) b_2$column <- b_2$Id head(b_2) result_3 <- dplyr::inner_join(result_2,b_2,by="column") dim(result_3) result1 <- result_3[,1:4] head(result1) dim(result1) result1$Regulation <- result1$cor result1[,5][result1[,5] > 0] <- c("postive") result1[,5][result1[,5] < 0] <- c("negative") head(result1) colnames(result1) <- c("gene", "immuneGene", "cor", "pvalue", "Regulation") write.table(result1,file="obstructive sleep apnea.xls",sep="\t",quote=F,col.names=T,row.names = F) a1 <- read.table("obstructive sleep apnea.xls",header = T,sep = "\t", quote = "",fill = T) head(a1) data2 <- a1 library(ggpubr) data2$pvalue <- ifelse(data2$pvalue < 0.05, ifelse(data2$pvalue < 0.01,"**","*"), "") data2$pvalue[1:20] data2$type <- data2$cor summary(data2) data3 <- data2[order(data2$immuneGene,data2$cor),] head(data3) dim(data3) data4 <- data3[data3$pvalue < 0.05,] dim(data4) summary(data4) p <- ggplot(data4,aes(x=gene,y=immuneGene)) + geom_point(aes(colour = cor, size=pvalue)) + labs(x="",y="obstructive sleep apnea genes") p <- p + scale_colour_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab", name="Pearson\nCorrelation") p <- p + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.text=element_text(size = 15)) + theme(axis.text.x=element_text(colour = "black",angle=0,hjust=0.5,size = 15)) + theme(axis.text.y=element_text(colour = "black", vjust=0,size = 15)) + theme(axis.title =element_text(size = 20)) + theme(text = element_text(size = 15)) p+rotate_x_text(45) ggsave("obstructive sleep apnea.pdf") library(ggplot2) library(ggExtra) rt=read.table("symbol.txt",sep="\t",header=T,check.names=F,row.names = 1) dat<-as.data.frame(t(rt)) corr_eqn <- function(x,y,digits=3) { test <- cor.test(x,y,type="pearson") paste(paste0("n = ",length(x)), paste0("r = ",round(test$estimate,digits),"(Pearson)"), paste0("p.value= ",round(test$p.value,digits)), sep = ", ") } gene<-as.numeric(dat$OR9A4) imucell<-dat$ADIPOQ corr_eqn(gene,imucell) gg<-ggplot(dat, aes(x=gene, y=imucell)) + geom_point(color = "black") + geom_smooth(method="loess", se=F,color="blue") + labs( y="ADIPOQ", x="OR9A4", title="Scatterplot")+ #caption = "Source: midwest")+ labs(title = paste0(corr_eqn(gene,imucell)))+ theme_bw() gg gg2 <- ggMarginal(gg, type="density") gg2 <- ggMarginal(gg, type="density",xparams = list(fill ="orange"), yparams = list(fill ="skyblue")) save.image("1.data")