############################################## ##### All Rights Reserved ###### ##### Beijing PlantTech Technologies ###### ##### Email:tech@planttech.com.cn ###### ############################################## #============差异基因ID预处理======================= # 已知差异基因ID及所有基因列表,如Bra021453,Zm00001d000665等等 # 需要先将该ID转换为KEGG特有的编号形式,如K11532,K01835等 # 转换方法:整理所有基因的序列(氨基酸序列,fasta格式),上传至https://www.kegg.jp/ghostkoala/即可转换 # 得到所有基因的KEGG ID之后,再使用Excel的vlookup功能,将差异基因对应的KEGG ID匹配出来 #==================================================== #===未安装R2HTML的,先使用以下命令安装R2HTML=== options("repos" = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/")) options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor") if (!requireNamespace("R2HTML", quietly = TRUE)) install.packages('R2HTML',update=F) #===加载R2HTML软件包,设置工作目录================= library('R2HTML') rm(list=ls()) setwd('E:/H:/3/R相关/R相关文件/KEGG R') #==============设定读入与输出的文件名称============ pathwayfile="kegg.pathway.id" #不变 allfile="all.txt" # 所有基因的ID degfile="deg1.txt" # 差异基因的ID outfile="kegg.enrichment" #输出文件 #==========读入文件=============================== pathinfo=read.delim(pathwayfile,header=T,sep="\t",stringsAsFactors = F) #取所有的pathway信息 pathway=unique(pathinfo[,c('PID','PATHWAY')]) #读取背景基因(所有注释到KEGG的基因)和差异基因列表,以K号表示 allgene=read.delim(allfile,header=F,sep="\t",stringsAsFactors = F) deggene=read.delim(degfile,header=F,sep="\t",stringsAsFactors = F) allgene=unique(allgene$V1) deggene=unique(deggene$V1) #=======将基因与Pathway中有注释的基因取交集======== deginfo=pathinfo[pathinfo[,1]%in%deggene,] allinfo=pathinfo[pathinfo[,1]%in%allgene,] k=length(unique(deginfo[,1])) #差异基因总数 N=length(unique(allinfo[,1])) #总基因数 #=========对每个通路基于超几何分布计算富集p值======= p<-data.frame() urlColor<-NULL # used to color the genes in pathway geneLink<-NULL # used to set hyperlink to DEGs in pathway for (i in seq(1,nrow(pathway))) { pid=pathway[i,'PID'] allInPath=allinfo[allinfo[,'PID']%in%pid,] m=length(unique(allInPath[,1])) #该Pathway基因总数 degInPath=deginfo[deginfo[,'PID']%in%pid,] x=length(unique(degInPath[,1])) #该Pathway中差异基因数 p[i,1]=x p[i,2]=m p[i,3]=phyper(x-1,m,N-m,k,lower.tail=FALSE) #lower.tail=FALSE,计算的是P[X>(x-1)]的概率 urlColor[i]=apply(as.matrix(paste("/",t(degInPath[,1]),'%09red',sep="")),2,paste,collapse="") Link=paste('', degInPath[,1],'',sep="") if(x==0){ geneLink[i]="--" }else{ geneLink[i]=apply(as.matrix(Link),2,paste,collapse=", ") } } #============计算FDR,整理结果============== fdr=p.adjust(p[,3],method="BH") output=cbind(pathway,p,fdr) colnames(output)=c('ID','Pathway','DEGsInPathway','GenesInPathway','Pvalue','FDR') ind=order(output[,"Pvalue"])#按照P值从小到大对结果排序 output=output[ind,] #用于输出到表格文件 urlColor=urlColor[ind] DEGs=geneLink[ind] output2=cbind(output,DEGs) #用于输出到HTML文件 #==============导出到表格文件================ write.table(output,file=paste(outfile,".xls",sep=""),quote = F,row.names = F,col.names = T,sep="\t") #==============导出结果到HTML文件============ urlTitles <- output[,'ID'] url=paste('https://www.genome.jp/kegg-bin/show_pathway?',gsub("ko", "map", urlTitles),urlColor,sep="") htmlOUT <- transform(output2, ID = paste('', urlTitles, '')) target <- HTMLInitFile(Title = "KEGG Pathway Ernichment Results",outdir=getwd(),filename=outfile, BackGroundColor="#f8f8f8",useGrid = T,useLaTeX = F,HTMLframe = F) write("",target,append = T) HTML(as.title("KEGG Pathway Ernichment Results"),file=target) HTML(htmlOUT,file=target,innerBorder = 1,row.names = F,digits=3) HTMLEndFile() #============================================