library("glmnet") library("survival") setwd("D:\\biowolf\\metabolism\\15.lasso") #设置工作目录 rt=read.table("tcgaUniSigExp.txt",header=T,sep="\t",row.names=1) #读取文件 rt$futime=rt$futime/365 #构建模型 x=as.matrix(rt[,c(3:ncol(rt))]) y=data.matrix(Surv(rt$futime,rt$fustat)) fit=glmnet(x, y, family = "cox", maxit = 1000) cvfit=cv.glmnet(x, y, family="cox", maxit = 1000) #输出相关基因系数 coef=coef(fit, s = cvfit$lambda.min) index=which(coef != 0) actCoef=coef[index] lassoGene=row.names(coef)[index] geneCoef=cbind(Gene=lassoGene,Coef=actCoef) write.table(geneCoef,file="geneCoef.txt",sep="\t",quote=F,row.names=F) #输出train组风险值 trainFinalGeneExp=rt[,lassoGene] myFun=function(x){crossprod(as.numeric(x),actCoef)} trainScore=apply(trainFinalGeneExp,1,myFun) outCol=c("futime","fustat",lassoGene) risk=as.vector(ifelse(trainScore>median(trainScore),"high","low")) outTab=cbind(rt[,outCol],riskScore=as.vector(trainScore),risk) write.table(cbind(id=rownames(outTab),outTab),file="tcgaRisk.txt",sep="\t",quote=F,row.names=F) #输出test组风险值 rt=read.table("geoExpTime.txt",header=T,sep="\t",row.names=1) rt[,3:ncol(rt)][rt[,3:ncol(rt)]<0]=0 rt$futime=rt$futime/365 testFinalGeneExp=rt[,lassoGene] testScore=apply(testFinalGeneExp,1,myFun) outCol=c("futime","fustat",lassoGene) risk=as.vector(ifelse(testScore>median(trainScore),"high","low")) outTab=cbind(rt[,outCol],riskScore=as.vector(testScore),risk) write.table(cbind(id=rownames(outTab),outTab),file="geoRisk.txt",sep="\t",quote=F,row.names=F)