#normalize samples for CIBERSORT #limma was originally used, i.e., for microarray analyses and microarray normalisation, and i highly recommend Professor John Quackenbush's great paper on this: http://www.cs.cmu.edu/~zivbj/class05/reading/norm.pdf library("limma") setwd() rt=read.table("symbol.txt",sep="\t",header=T,check.names=F) rt=as.matrix(rt) rownames(rt)=rt[,1] exp=rt[,2:ncol(rt)] dimnames=list(rownames(exp),colnames(exp)) data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames) data=avereps(data) data=data[rowMeans(data)>0,] group=c(rep("normal",normalCount),rep("tumor",tumorCount)) design <- model.matrix(~factor(group)) colnames(design)=levels(factor(group)) rownames(design)=colnames(data) v <-voom(data, design = design, plot = F, save.plot = F) out=v$E out=rbind(ID=colnames(out),out) write.table(out,file="uniq.symbol.txt",sep="\t",quote=F,col.names=F) #CIBERSORT #CIBERSORT is an analytical tool developed by Newman et al. to provide an estimation of the abundances of member cell types in a mixed cell population, using gene expression data. #install.packages('e1071') #install.packages('parallel') #if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") #BiocManager::install("preprocessCore", version = "3.8") setwd() source("CIBERSORT.R") results=CIBERSORT("ref.txt", "uniq.symbol.txt", perm=1000, QN=TRUE) #estimate #From within an R session, type the following: #library(utils) #rforge <- "http://r-forge.r-project.org" #install.packages("estimate", repos=rforge, dependencies=TRUE) #Note that R-Forge only provides binary packages for the current R release; if you need a package for an older version of R, try installing its corresponding source package instead. #Execute the following within the R environment to view the man pages. #help(package="estimate") filterCommonGenes(input.f="uniqsymbol.txt", output.f="commonGenes.gct", id="GeneSymbol") estimateScore(input.ds = "commonGenes.gct", output.ds="estimateScore.gct", platform="illumina") plotPurity(scores="estimateScore.gct", samples= "all_samples", platform="illumina", output.dir = "plots") scores=read.table("estimateScore.gct",skip = 2,header = T) rownames(scores)=scores[,1] scores=t(scores[,3:ncol(scores)]) rownames(scores)=gsub("\\.","\\-",rownames(scores)) out=rbind(ID=colnames(scores),scores) write.table(out,file="scores.txt",sep="\t",quote=F,col.names=F) # lasso regression model #When alpha=1 this is a group-lasso penalty, and otherwise it mixes with quadratic just like elasticnet. A small detail in the Cox model: if death times are tied with censored times, we assume the censored times occurred just before the death times in computing the Breslow approximation; if users prefer the usual convention of after, they can add a small number to all censoring times to achieve this effect. #Lasso #Glmnet webpage with four vignettes https://glmnet.stanford.edu install.packages("glmnet") library(glmnet) dat=read.table("timefilter.txt",header=T,sep="\t",check.names=F) dim(dat) dat.outcome=cbind(time = dat$futime, status = dat$fustat) dat.X = as.matrix(dat[,-c(1:3)]) set.seed(12345) fit5 = glmnet(dat.X, dat.outcome, family="cox") plot(fit5) fit5=cv.glmnet(dat.X, dat.outcome, family="cox",type="class") plot(fit5, xvar="lambda", label=TRUE) coef.min = coef(fit5, s = fit5$lambda.min) active.min = which(coef.min != 0) index.min = coef.min[active.min] index.min coef.min row.names(coef.min)[active.min] predict(cvfit, newx=x[1:5,], type="response", s="lambda.1se") #nomogram library(foreign) library(rms) #Data packaging newdata<-read.csv("risk5.csv") dd<-datadist(newdata) options(datadist="dd") #Generator function f1<-psm(Surv(time,status)~Gender+Age+IPI+clinical stage+PIS,data=newdata,dist="lognormal") med<-Quantile(f1) surv<-Survival(f1) nom<-nomogram(f1,fun=list(function(x) surv(365,x),function(x) surv(1095,x)),funlabel=c("1-year OS","3-year OS")) plot(nom,xfrac=0.2, cex.axis= 1.05) ## coxpbc<-cph(formula = Surv(time,status) ~ Gender+Age+BRAF+riskScore,data= newdata,x=T,y=T,surv = T,na.action=na.delete) #,time.inc =2920 print(coxpbc) ## f2 f2 <- psm(Surv(time,status) ~ Gender+Age+BRAF+riskScore, data = newdata, x=T, y=T, dist='lognormal') ## c-index rcorrcens(Surv(time,status) ~ predict(f2), data = newdata) ## CURVE cal1 <- calibrate(f2, cmethod='KM', method="boot", u=1095, m=140, B=480) plot(cal1)