#normalize samples for CIBERSORT
#limma was originally used, i.e., for microarray analyses and microarray normalisation, and i highly recommend Professor John Quackenbush's great paper on this: http://www.cs.cmu.edu/~zivbj/class05/reading/norm.pdf
library("limma")
setwd()         
rt=read.table("symbol.txt",sep="\t",header=T,check.names=F)         
rt=as.matrix(rt)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp),colnames(exp))
data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
data=avereps(data)
data=data[rowMeans(data)>0,]
group=c(rep("normal",normalCount),rep("tumor",tumorCount))
design <- model.matrix(~factor(group))
colnames(design)=levels(factor(group))
rownames(design)=colnames(data)
v <-voom(data, design = design, plot = F, save.plot = F)
out=v$E
out=rbind(ID=colnames(out),out)
write.table(out,file="uniq.symbol.txt",sep="\t",quote=F,col.names=F)        

#CIBERSORT
#CIBERSORT is an analytical tool developed by Newman et al. to provide an estimation of the abundances of member cell types in a mixed cell population, using gene expression data.
#install.packages('e1071')
#install.packages('parallel')
#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("preprocessCore", version = "3.8")
setwd()
source("CIBERSORT.R")
results=CIBERSORT("ref.txt", "uniq.symbol.txt", perm=1000, QN=TRUE)

#estimate
#From within an R session, type the following:
#library(utils)
#rforge <- "http://r-forge.r-project.org"
#install.packages("estimate", repos=rforge, dependencies=TRUE)
#Note that R-Forge only provides binary packages for the current R release; if you need a package for an older version of R, try installing its corresponding source package instead.
#Execute the following within the R environment to view the man pages.
#help(package="estimate")
filterCommonGenes(input.f="uniqsymbol.txt", 
                  output.f="commonGenes.gct", 
                  id="GeneSymbol")

estimateScore(input.ds = "commonGenes.gct",
              output.ds="estimateScore.gct", 
              platform="illumina")
plotPurity(scores="estimateScore.gct", 
           samples= "all_samples", 
           platform="illumina", output.dir = "plots")
scores=read.table("estimateScore.gct",skip = 2,header = T)
rownames(scores)=scores[,1]
scores=t(scores[,3:ncol(scores)])
rownames(scores)=gsub("\\.","\\-",rownames(scores))
out=rbind(ID=colnames(scores),scores)
write.table(out,file="scores.txt",sep="\t",quote=F,col.names=F)

# lasso regression model
#When alpha=1 this is a group-lasso penalty, and otherwise it mixes with quadratic just like elasticnet. A small detail in the Cox model: if death times are tied with censored times, we assume the censored times occurred just before the death times in computing the Breslow approximation; if users prefer the usual convention of after, they can add a small number to all censoring times to achieve this effect.
#Lasso
#Glmnet webpage with four vignettes https://glmnet.stanford.edu
install.packages("glmnet")
library(glmnet)
dat=read.table("timefilter.txt",header=T,sep="\t",check.names=F)
dim(dat)
dat.outcome=cbind(time = dat$futime, status = dat$fustat)
dat.X = as.matrix(dat[,-c(1:3)])
set.seed(12345)
fit5 = glmnet(dat.X, dat.outcome, family="cox")
plot(fit5)
fit5=cv.glmnet(dat.X, dat.outcome, family="cox",type="class")
plot(fit5, xvar="lambda", label=TRUE)
coef.min = coef(fit5, s = fit5$lambda.min)
active.min = which(coef.min != 0)
index.min = coef.min[active.min]
index.min
coef.min
row.names(coef.min)[active.min]
predict(cvfit, newx=x[1:5,], type="response", s="lambda.1se") 

#nomogram
library(foreign)
library(rms)  
#Data packaging
newdata<-read.csv("risk5.csv")
dd<-datadist(newdata)
options(datadist="dd")
#Generator function
f1<-psm(Surv(time,status)~Gender+Age+IPI+clinical stage+PIS,data=newdata,dist="lognormal")
med<-Quantile(f1)
surv<-Survival(f1)
nom<-nomogram(f1,fun=list(function(x) surv(365,x),function(x) surv(1095,x)),funlabel=c("1-year OS","3-year OS"))
plot(nom,xfrac=0.2, cex.axis= 1.05)
##
coxpbc<-cph(formula = Surv(time,status) ~  Gender+Age+BRAF+riskScore,data= newdata,x=T,y=T,surv = T,na.action=na.delete)  #,time.inc =2920
print(coxpbc)
## f2
f2 <- psm(Surv(time,status) ~ Gender+Age+BRAF+riskScore, data =  newdata, x=T, y=T, dist='lognormal') 
## c-index
rcorrcens(Surv(time,status) ~ predict(f2), data =  newdata)
## CURVE
cal1 <- calibrate(f2, cmethod='KM', method="boot", u=1095, m=140, B=480)
plot(cal1)