##############################################
##### All Rights Reserved ######
##### Beijing PlantTech Technologies ######
##### Email:tech@planttech.com.cn ######
##############################################
#============差异基因ID预处理=======================
# 已知差异基因ID及所有基因列表,如Bra021453,Zm00001d000665等等
# 需要先将该ID转换为KEGG特有的编号形式,如K11532,K01835等
# 转换方法:整理所有基因的序列(氨基酸序列,fasta格式),上传至https://www.kegg.jp/ghostkoala/即可转换
# 得到所有基因的KEGG ID之后,再使用Excel的vlookup功能,将差异基因对应的KEGG ID匹配出来
#====================================================
#===未安装R2HTML的,先使用以下命令安装R2HTML===
options("repos" = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
if (!requireNamespace("R2HTML", quietly = TRUE)) install.packages('R2HTML',update=F)
#===加载R2HTML软件包,设置工作目录=================
library('R2HTML')
rm(list=ls())
setwd('E:/H:/3/R相关/R相关文件/KEGG R')
#==============设定读入与输出的文件名称============
pathwayfile="kegg.pathway.id" #不变
allfile="all.txt" # 所有基因的ID
degfile="deg1.txt" # 差异基因的ID
outfile="kegg.enrichment" #输出文件
#==========读入文件===============================
pathinfo=read.delim(pathwayfile,header=T,sep="\t",stringsAsFactors = F)
#取所有的pathway信息
pathway=unique(pathinfo[,c('PID','PATHWAY')])
#读取背景基因(所有注释到KEGG的基因)和差异基因列表,以K号表示
allgene=read.delim(allfile,header=F,sep="\t",stringsAsFactors = F)
deggene=read.delim(degfile,header=F,sep="\t",stringsAsFactors = F)
allgene=unique(allgene$V1)
deggene=unique(deggene$V1)
#=======将基因与Pathway中有注释的基因取交集========
deginfo=pathinfo[pathinfo[,1]%in%deggene,]
allinfo=pathinfo[pathinfo[,1]%in%allgene,]
k=length(unique(deginfo[,1])) #差异基因总数
N=length(unique(allinfo[,1])) #总基因数
#=========对每个通路基于超几何分布计算富集p值=======
p<-data.frame()
urlColor<-NULL # used to color the genes in pathway
geneLink<-NULL # used to set hyperlink to DEGs in pathway
for (i in seq(1,nrow(pathway))) {
pid=pathway[i,'PID']
allInPath=allinfo[allinfo[,'PID']%in%pid,]
m=length(unique(allInPath[,1])) #该Pathway基因总数
degInPath=deginfo[deginfo[,'PID']%in%pid,]
x=length(unique(degInPath[,1])) #该Pathway中差异基因数
p[i,1]=x
p[i,2]=m
p[i,3]=phyper(x-1,m,N-m,k,lower.tail=FALSE) #lower.tail=FALSE,计算的是P[X>(x-1)]的概率
urlColor[i]=apply(as.matrix(paste("/",t(degInPath[,1]),'%09red',sep="")),2,paste,collapse="")
Link=paste('', degInPath[,1],'',sep="")
if(x==0){
geneLink[i]="--"
}else{
geneLink[i]=apply(as.matrix(Link),2,paste,collapse=", ")
}
}
#============计算FDR,整理结果==============
fdr=p.adjust(p[,3],method="BH")
output=cbind(pathway,p,fdr)
colnames(output)=c('ID','Pathway','DEGsInPathway','GenesInPathway','Pvalue','FDR')
ind=order(output[,"Pvalue"])#按照P值从小到大对结果排序
output=output[ind,] #用于输出到表格文件
urlColor=urlColor[ind]
DEGs=geneLink[ind]
output2=cbind(output,DEGs) #用于输出到HTML文件
#==============导出到表格文件================
write.table(output,file=paste(outfile,".xls",sep=""),quote = F,row.names = F,col.names = T,sep="\t")
#==============导出结果到HTML文件============
urlTitles <- output[,'ID']
url=paste('https://www.genome.jp/kegg-bin/show_pathway?',gsub("ko", "map", urlTitles),urlColor,sep="")
htmlOUT <- transform(output2, ID = paste('', urlTitles, ''))
target <- HTMLInitFile(Title = "KEGG Pathway Ernichment Results",outdir=getwd(),filename=outfile, BackGroundColor="#f8f8f8",useGrid = T,useLaTeX = F,HTMLframe = F)
write("",target,append = T)
HTML(as.title("KEGG Pathway Ernichment Results"),file=target)
HTML(htmlOUT,file=target,innerBorder = 1,row.names = F,digits=3)
HTMLEndFile()
#============================================