lncRNA.BRCA=read.table("TCGA-BRCA-rnaexpr/TCGA-BRCA-rnaexpr.tsv",header = T,sep = '\t',quote = '') dim(lncRNA.BRCA) lncRNA.BRCA[1:4,1:4] rownames(lncRNA.BRCA)=lncRNA.BRCA$Gene_ID lncRNA.BRCA=lncRNA.BRCA[,-1] lncRNA.BRCA.normal=lncRNA.BRCA[,1:105] colnames(lncRNA.BRCA.normal)=substr(colnames(lncRNA.BRCA.normal),13,24) lncRNA.BRCA.tumor=lncRNA.BRCA[,106:942] colnames(lncRNA.BRCA.tumor)=substr(colnames(lncRNA.BRCA.tumor),12,24) lncRNA.BRCA.paried=cbind(lncRNA.BRCA.normal,lncRNA.BRCA.tumor[,colnames(lncRNA.BRCA.normal)]) dim(lncRNA.BRCA.paried) sample.id=as.data.frame(colnames(lncRNA.BRCA.paried)) head(sample.id) sample.id$type=rep(c("BRCA.Normal","BRCA.Tumor"),c(105,105)) table(sample.id$type) sample.id$id=paste(sample.id$type,sample.id$`colnames(lncRNA.BRCA.paried)`, sep = ".") colnames(lncRNA.BRCA.paried)=sample.id$id lncRNA.BRCA.paried[1:4,1:5] dim(lncRNA.BRCA.paried) # clin=read.table("tcga.breat/BRCA_clinicalMatrix.gz",header = T,sep = '\t',quote = '') head(clin) pdata=data.frame(clin$sampleID,clin$OS.time,clin$OS,clin$RFS.time,clin$RFS) head(pdata) # pdata$clin.sampleID=gsub("-",".",pdata[,1]) head(pdata) # pdata$type=factor(ifelse(as.numeric(substr(pdata$clin.sampleID,14,15)) < 10,'BRCA.Tumor','BRCA.Normal')) head(pdata) pdata$id=substr(pdata$clin.sampleID, 1,12) head(pdata) pdata$Type=paste(pdata$type,pdata$id,sep = ".") sub.pdata=pdata[which(pdata$type=="BRCA.Tumor"),] head(sub.pdata) sub.pdata=sub.pdata[!duplicated(sub.pdata$id),] rownames(sub.pdata)=sub.pdata$id # o.id=intersect(colnames(lncRNA.BRCA.tumor),sub.pdata$id) length(o.id) sel.pdata=sub.pdata[o.id,] head(sel.pdata) dim(sel.pdata) ##########filter using survival################## sur_data=lncRNA.BRCA.tumor #write.csv(lncRNA.BRCA.tumor,"TCGA-BRCA-rnaexpr/lncRNA.BRCA.tumor.csv") dfm=NULL for ( n in 1:nrow(sur_data) ){ x=sum(sur_data[n,]==0) y=rownames(sur_data)[n] df= as.data.frame(x,y) dfm <- rbind(dfm,df) } colnames(dfm)="counts" head(dfm) dim(dfm) dfm$lncRNA_id=rownames(dfm) sel_lnc_id=dfm[dfm$counts<167,] dim(sel_lnc_id)##############______4537 lncRNA selected head(sel_lnc_id) ####################################### sur_data=sur_data[rownames(sel_lnc_id),] dim(sur_data) sur_data[1:4,1:4] sur.da=sel.pdata[,2:3] #sur.da=sel.pdata[,4:5] sur.da=na.omit(sur.da) dim(sur.da) head(sur.da) #sur_data=(lncRNA.BRCA.tumor - rowMeans(lncRNA.BRCA.tumor))/apply(lncRNA.BRCA.tumor,1,sd) sur_data=sur_data[,rownames(sur.da)] dim(sur_data) dfs.status=sur.da$clin.OS dfs=sur.da$clin.OS.time #dfs.status=sur.da$clin.RFS #dfs=sur.da$clin.RFS.time log_rank_p <- apply(sur_data, 1, function(values1){ group=ifelse(values1>median(values1),'high','low') survival_dat <- data.frame(group=group,dfs=dfs,dfs.status=dfs.status,stringsAsFactors = F) library(survival) my.surv <- Surv(survival_dat$dfs,survival_dat$dfs.status) kmfit2 <- survfit(my.surv~survival_dat$group) data.survdiff=survdiff(my.surv~group,data = survival_dat) p.val = 1 - pchisq(data.survdiff$chisq, length(data.survdiff$n) - 1) }) names(log_rank_p[log_rank_p<0.05]) length((log_rank_p[log_rank_p<0.05])) sig.list=names(log_rank_p[log_rank_p<0.05]) # cox_results <- apply(sur_data , 1, function(values1){ group=ifelse(values1>median(values1),'Migh','low') survival_dat <- data.frame(group=group,dfs=dfs,dfs.status=dfs.status,stringsAsFactors = F) library(survival) my.surv <- Surv(survival_dat$dfs,survival_dat$dfs.status) m=coxph(my.surv ~ group, data = survival_dat) beta <- coef(m) se <- sqrt(diag(vcov(m))) HR <- exp(beta) HRse <- HR * se #summary(m) tmp <- round(cbind(coef = beta, se = se, z = beta/se, p = 1 - pchisq((beta/se)^2, 1), HR = HR, HRse = HRse, HRz = (HR - 1) / HRse, HRp = 1 - pchisq(((HR - 1)/HRse)^2, 1), HRCILL = exp(beta - qnorm(.975, 0, 1) * se), HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 3) return(tmp['groupMigh',]) }) cox_results[,cox_results[4,]<0.05] Results=cox_results[,cox_results[4,]<0.05] Results[,1:5] dim(Results) write.csv(as.data.frame(t(Results)),"TCGA-BRCA-rnaexpr/deg.381.Results.csv") ov.sur.id=intersect(sig.list,colnames(Results)) length(ov.sur.id) # ##################degs limma ######### library(limma) ########################################## exp=lncRNA.BRCA.paried exp[1:4,1:4] EXP=(exp - rowMeans(exp))/apply(exp,1,sd) EXP=EXP[rownames(sel_lnc_id),] dim(EXP) design2=model.matrix(~0+factor(sample.id$type)) colnames(design2)=c('normal','tumor') head(design2) fit=lmFit(EXP,design2) cont.matrix=makeContrasts('tumor-normal',levels = design2) fit3=contrasts.fit(fit,cont.matrix) fit3=eBayes(fit3) options(digits = 4) topTable(fit3,adjust='BH') sig_fit_2<- topTable(fit3, adjust.method="BH", coef=1,number = Inf,resort.by="p") sig_fit_2$FC=2^(sig_fit_2$logFC) head(sig_fit_2) DEG_2=sig_fit_2 sel_2=DEG_2[which(DEG_2$adj.P.Val <= 0.0001 & abs(DEG_2$logFC) >= 1),] ######__398 lncRNA selected write.csv(sel_2,"TCGA-BRCA-rnaexpr/deg.398.Results.csv") #sel_2=DEG_2[which(DEG_2$adj.P.Val <= 0.0001 & DEG_2$FC>2),] dim(sel_2) head(sel_2) sel_exp=EXP[rownames(sel_2),] dim(sel_exp) # library(ComplexHeatmap) library(circlize) mycol <- colorRamp2(c(-1, 0, 1), c("blue", "white", "red")) annot_df <- data.frame(group=sample.id$type) # Define colors for each levels of qualitative variables # Define gradient color for continuous variable (mpg) col = list(group = c("BRCA.Normal" = "green", "BRCA.Tumor" = "purple") ) # Create the heatmap annotation ha <- HeatmapAnnotation(annot_df, col = col) # Combine the heatmap and the annotation Heatmap(sel_exp, name = "Group", col = mycol, top_annotation = ha,cluster_columns = , show_row_names = T,show_column_names = FALSE) ######################################################### deg.sur.id=intersect(colnames(Results),rownames(sel_2)) length(deg.sur.id)######################################____48 deg and sur lncRNA selected # Combine the heatmap and the annotation sel.sel_exp=sel_exp[deg.sur.id,] Heatmap(sel.sel_exp, name = "Group", col = mycol, top_annotation = ha,cluster_columns = F, show_row_names = T,show_column_names = FALSE) # deg.sur.Results=Results[,deg.sur.id] deg.sur.Results=as.data.frame(t(deg.sur.Results)) head(deg.sur.Results) write.csv(deg.sur.Results,"TCGA-BRCA-rnaexpr/deg.sur.os.Results.csv") # bad.lncRNA=subset(deg.sur.Results,p<0.01) dim(bad.lncRNA) head(bad.lncRNA) # can.lnc=as.data.frame(t(sur_data[rownames(deg.sur.Results),])) dim(can.lnc) dim(sur.da) sur.da=sur.da[rownames(can.lnc),] sur.lnc.comb=cbind(sur.da,can.lnc) sur.lnc.comb[1:3,1:5] write.csv(sur.lnc.comb,"TCGA-BRCA-rnaexpr/sur.lnc.comb.csv") sur.lnc.comb=read.csv("TCGA-BRCA-rnaexpr/sur.lnc.comb.csv",header = T,row.names = 1) sur.lnc.comb$group=ifelse(sur.lnc.comb$ENSG00000230838.1>median(sur.lnc.comb$ENSG00000230838.1),"2","1") #colnames(sur.lnc.comb)="ENSG00000230838.1" #####################________curve_HR library(survival) library(survminer) sur=sur.lnc.comb #ss=Surv(data$DFS, data$DFS_status) ss=Surv(sur$clin.OS.time, sur$clin.OS) cox = summary(coxph(ss~sur$group)) pvalue=cox$sctest['pvalue']; hr = round(cox$conf.int[1],2) hr_left = round(cox$conf.int[3],2) hr_right = round(cox$conf.int[4],2) conf_int = paste(" (", hr_left, " - ", hr_right, ")", sep=""); txt = paste("HR = ", hr, conf_int, "\nlogrank P = ", signif(pvalue, 2), sep="") text(grconvertX(0.98, "npc"), grconvertY(.97, "npc"), labels=txt, adj=c(1, 1)) list(pvalue, hr, hr_left, hr_right) ####################################################### fit2 <- survfit(Surv(sur$clin.OS.time, sur$clin.OS) ~ group, data = sur) ggsurvplot(fit2, data = sur, title =paste(colnames(sur.lnc.comb)[16],"HR","=", hr," (", hr_left, " - ", hr_right, ")"," logrank","P = ", pvalue, sep=""), pval = TRUE, pval.method = TRUE, # Add p-value & method name surv.median.line = "hv", # Add median survival lines legend.title = "group", # Change legend titles legend.labs = c("low","high"), # Change legend labels palette = c("#27408B","#CD2626"), # Use JCO journal color palette risk.table = TRUE, # Add No at risk table cumevents = TRUE, # Add cumulative No of events table tables.height = 0.15, # Specify tables height tables.theme = theme_cleantable(), # Clean theme for tables tables.y.text = FALSE, # Hide tables y axis text ylab=" Overall survival probability", xlab="Time(days)" ) ########################################### write.table(lncRNA.BRCA.paried,"TCGA-BRCA-rnaexpr/lncRNA.BRCA.paried.txt",quote = F,sep = "\t") lncRNA.BRCA.paried[1:4,1:4] dim(lncRNA.BRCA.paried) exp.lnc=as.data.frame(t(EXP[975,])) colnames(exp.lnc)="lnc.ENSG00000230838.1" head(exp.lnc) head(sample.id) bax.plot.data=cbind(sample.id,exp.lnc) head(bax.plot.data) # library(dplyr) library(ggplot2) library(ggpubr) theme_set(theme_pubclean()) e <- ggplot(bax.plot.data, aes(x = type, y =lnc.ENSG00000230838.1 )) e + geom_boxplot( aes(fill = type), position = position_dodge(0.9) ) + scale_fill_manual(values = c("#27408B","#CD2626"))+ stat_compare_means(method = "t.test") ggpaired(bax.plot.data, x = "type", y = "lnc.ENSG00000230838.1", color = "type", line.color = "gray", line.size = 0.4, palette = "lancet")+ stat_compare_means(paired = TRUE) ############ # clin.sum=data.frame( clin$sampleID, clin$OS.time, clin$OS, clin$RFS.time, clin$RFS, clin$age_at_initial_pathologic_diagnosis, clin$gender, clin$pathologic_M, clin$pathologic_N, clin$pathologic_stage, clin$pathologic_T, clin$ER_Status_nature2012, clin$PR_Status_nature2012, clin$HER2_Final_Status_nature2012, clin$additional_pharmaceutical_therapy, clin$lymph_node_examined_count, clin$number_of_lymphnodes_positive_by_he, clin$PAM50_mRNA_nature2012, clin$PAM50Call_RNAseq ) rownames(clin.sum)=clin.sum$clin.sampleID head(clin.sum) clin.sum.sel=cbind(sel.pdata$id,clin.sum[sel.pdata$clin.sampleID,]) dim(clin.sum.sel) dim(sel.pdata) colnames(clin.sum.sel)[1]="id" rownames(clin.sum.sel)=clin.sum.sel$id ########## clin.sum.sel[1:4,1:4] dim(sur.lnc.comb) sur.lnc.comb[1:4,1:4] clin.sum.824=cbind(sur.lnc.comb$group,clin.sum.sel[rownames(sur.lnc.comb),]) clin.sum.824=cbind(clin.sum.824,sur.lnc.comb$ENSG00000230838.1) dim(clin.sum.824) colnames(clin.sum.824)[1]="group.lnc" colnames(clin.sum.824)[22]="ENSG00000230838.1" clin.sum.824$group.LNC=ifelse(clin.sum.824$group.lnc=="1","low","high") clin.sum.824$age=ifelse(clin.sum.824$clin.age_at_initial_pathologic_diagnosis <50, "less.50","lager.50") head(clin.sum.824) write.csv(clin.sum.824,"TCGA-BRCA-rnaexpr/clin.sum.824.samples.csv") #################__cox regression analysis############################### clin.data=read.csv("TCGA-BRCA-rnaexpr/clin.sum.824.samples1.csv",header = T,row.names = 1) head(clin.data) colnames(clin.data)[1]="group" clin.data$LNR=clin.data$clin.number_of_lymphnodes_positive_by_he/clin.data$clin.lymph_node_examined_count head(clin.data) ss = Surv(clin.data$clin.OS.time, clin.data$clin.OS) cox = summary(coxph(ss~clin.data$group+clin.data$clin.age_at_initial_pathologic_diagnosis+ as.factor(clin.data$clin.pathologic_stage)+ clin.data$clin.PAM50Call_RNAseq+ clin.data$clin.ER_Status_nature2012+clin.data$clin.PR_Status_nature2012+ clin.data$clin.HER2_Final_Status_nature2012,data=clin.data)) cox #########################_________Forestplot library(forestplot) # Cochrane data from the 'rmeta'-package cochrane_from_rmeta <- structure(list( HR = c(NA, NA,1.88, 1.04, NA, 1.88, 3.72, 7.24, NA, 3.31, 1.56, 1.86, 2.53, 0.67, 0.56, 0.39 ), lower = c(NA, NA,1.19, 1.02, NA, 0.93, 1.77, 2.94, NA, 1.20, 0.64, 0.76, 0.64, 0.32, 0.29, 0.17), upper = c(NA, NA,2.97, 1.06, NA, 3.80, 7.79, 17.86, NA, 9.13, 3.79, 4.60, 9.98, 1.42, 1.08, 0.89)), .Names = c("HR", "lower", "upper"), row.names = c(NA, -16L), class = "data.frame") tabletext<-cbind( c("", "Multivariate risk factors", "risk_group","age","stage I (ref)","stage II","stage III","stage IV","PAM50.Basal(ref)","PAM50.Her2","PAM50.LumA","PAM50.LumB","PAM50.Normal","ER.status","PR.status","HER2.status"), c("", "HR", "1.88", "1.04", NA, "1.88", "3.72", "7.24", NA, "3.31", "1.56", "1.86", "2.53", "0.67", "0.56", "0.39" ), c("", "95%CI", "(1.19-2.97)","(1.02-1.06)",NA,"(0.93-3.79)","(1.77-7.79)","(2.94-17.86)",NA,"(1.20-9.13)","(0.64-3.79)","(0.76-4.60)","(0.64-9.98)","(0.32-1.42)","(0.29-1.08)","(0.17-0.89)"), c("", "P value","0.007", "<0.0001", NA, "0.077", "0.001", "<0.0001", NA, "0.021", "0.325", "0.177", "0.184", "0.301", "0.083", "0.026" )) forestplot(tabletext, cochrane_from_rmeta, clip=c(0.1,Inf), lwd.zero=2,ci.vertices.height=0.05, xlog=TRUE, boxsize = .3,lwd.ci=3,lty.ci=1,ci.vertices=TRUE, col=fpColors(box="#27408B",line="#CD2626", summary="royalblue")) ########################################################## cox = summary(coxph(ss~clin.data$clin.HER2_Final_Status_nature2012,data=clin.data)) cox # cochrane_from_rmeta <- structure(list( HR = c(NA, NA,1.63, 1.03, NA, 1.41, 2.72, 10.03, NA, 2.66, 1.22, 2.04, 1.70, 1.00, 0.93, 0.96 ), lower = c(NA, NA,1.13, 1.02, NA, 0.80, 1.48, 4.58, NA, 1.31, 0.71, 1.14, 0.63, 0.65, 0.63, 0.53 ), upper = c(NA, NA,2.33, 1.05, NA, 2.50, 5.00, 21.90, NA, 5.39, 2.08, 3.67, 4.60, 1.55, 1.37, 1.72 )), .Names = c("HR", "lower", "upper"), row.names = c(NA, -16L), class = "data.frame") tabletext<-cbind( c("", "Uniivariate risk factors", "risk_group","age","stage I (ref)","stage II","stage III","stage IV","PAM50.Basal(ref)","PAM50.Her2","PAM50.LumA","PAM50.LumB","PAM50.Normal","ER.status","PR.status","HER2.status"), c("", "HR", "1.63","1.03", NA, "1.41", "2.72", "10.03", NA, "2.66", "1.22", "2.04", "1.70", "1.00", "0.93", "0.96" ), c("", "95%CI", "(1.13-2.33)","(1.02-1.05)",NA,"(0.80-2.50)","(1.48-5.00)","(4.58-21.90)",NA,"(1.31-5.39)","(0.71-2.08)","(1.14-3.67)","(0.63-4.60)","(0.65-1.55)","(0.63-1.37)","(0.53-1.72)"), c("", "P value","0.008", "<0.0001", NA, "0.236", "0.001", "<0.0001", NA, "0.007", "0.472", "0.017", "0.295", "0.980", "0.700", "0.880" )) forestplot(tabletext, cochrane_from_rmeta, clip=c(0.1,Inf), lwd.zero=2,ci.vertices.height=0.05, xlog=TRUE, boxsize = .3,lwd.ci=3,lty.ci=1,ci.vertices=TRUE, col=fpColors(box="#27408B",line="#CD2626", summary="royalblue")) ########################survival subgroup analysis########## ########### sub=subset(clin.data,clin.data$clin.ER_Status_nature2012=="Negative"&clin.data$clin.PR_Status_nature2012=="Negative"&clin.data$clin.HER2_Final_Status_nature2012=="Positive") dim(sub) ss=Surv(sub$clin.OS.time, sub$clin.OS) plot(survfit(ss~sub$group.LNC), col=c("#FF0000","#27408B"), main="survival-lnc.ENSG00000230838.1-ER.N-PR.N-HER2.P", cex.main=.9, xlab="Overall survival [days]", ylab="Overall Survival Probability",mark.time=TRUE, lwd=3,las=1) cox = summary(coxph(ss~sub$group)) pvalue=cox$sctest['pvalue']; hr = round(cox$conf.int[1],2) hr_left = round(cox$conf.int[3],2) hr_right = round(cox$conf.int[4],2) conf_int = paste(" (", hr_left, " - ", hr_right, ")", sep=""); txt = paste("HR = ", hr, conf_int, "\nlogrank P = ", signif(pvalue, 2), sep="") text(grconvertX(0.98, "npc"), grconvertY(.97, "npc"), labels=txt, adj=c(1, 1)) list(pvalue, hr, hr_left, hr_right) legendtext = paste(levels(sub$group.LNC),": ",table(sub$group.LNC), "(",table(sub$group.LNC[sub$clin.OS==1]),")",sep="") legend("bottomleft",col=c("#FF0000","#27408B"), legend=legendtext,lty=1,lwd=3) ##################__deg difference between pr, er and her3 status ################################# colnames(clin.data) mean(clin.data$ENSG00000230838.1) sd(clin.data$ENSG00000230838.1) clin.data$z.ENSG00000230838.1=(clin.data$ENSG00000230838.1-6.642)/6.801 # sub=subset(clin.data,clin.data$clin.HER2_Final_Status_nature2012=="Positive"|clin.data$clin.HER2_Final_Status_nature2012=="Negative") e <- ggplot(sub, aes(x = clin.HER2_Final_Status_nature2012, y =z.ENSG00000230838.1 )) e + geom_boxplot( aes(fill = clin.HER2_Final_Status_nature2012), position = position_dodge(0.9) ) + scale_fill_manual(values = c("#27408B","#CD2626"))+stat_compare_means(method = "t.test") ###################___pROC___######### library(pROC) roc.data=read.csv("TCGA-BRCA-rnaexpr/roc.csv",header = T, row.names = 1) head(roc.data) #roc.data=clin.data[which(clin.data$clin.OS.time