library(parallel) library(edgeR) library(limma) library(airway) # https://www.ncbi.nlm.nih.gov/sra/?term=PRJNA669277 data=read.csv("~/bioinfomatrics/salmon_output/SRR12959455_output/quant.sf",sep = "\t") data$Name=as.character(data$Name) data$type=data$Name data$Name=lapply(data$Name,function(x){ x=as.character(x) str=unlist(strsplit(x,"\\|")) geneName=str[6] x=geneName }) data$type=lapply(data$type,function(x){ x=as.character(x) str=unlist(strsplit(x,"\\|")) gengType=str[8] x=gengType }) data$Name=as.character(data$Name) data$type=as.character(data$type) data=data[data$type=="protein_coding",] library(dplyr) data=data[order(data$TPM,decreasing = T),] data=data[!duplicated(data$Name),] # write.csv(data,"~/bioinfomatrics/R_workingSpace/data.csv") ############################################# group_list=c("ss","ss","ss","con","con","con") exprSet=total exprSet=log2(exprSet+1) suppressMessages(library(limma)) design <- model.matrix(~0+factor(group_list)) colnames(design)=levels(factor(group_list)) rownames(design)=colnames(exprSet) fit <- lmFit(exprSet, design) cont.matrix=makeContrasts(contrasts=c('ss-con'),levels = design) fit2=contrasts.fit(fit,cont.matrix) fit2=eBayes(fit2) tempOutput = topTable(fit2, coef='ss-con', n=Inf) DEG = na.omit(tempOutput) choose_DEG=DEG[abs(DEG$logFC)>=1 & DEG$P.Value<=0.05,] # save(total,DEG,choose_DEG,file = "~/bioinfomatrics/R_workingSpace/LT_data.RData") load(file = "~/bioinfomatrics/R_workingSpace/extra_analysis/data.RData") choose_DEG=DEG[abs(DEG$logFC)>=1.5 & DEG$P.Value<=0.05,] write.csv(choose_DEG,"~/bioinfomatrics/R_workingSpace/extra_analysis/choose_DEG.csv") { data=read.table("~/bioinfomatrics/TCGA/rnaSeq/TCGA-LUAD-Counts.csv",header = T,sep = ",") geneName<- data.frame(ENSEMBL=data$X) df <- bitr(unique(geneName$ENSEMBL), fromType = "ENSEMBL", toType = c( "SYMBOL"), OrgDb = org.Hs.eg.db) data=merge(data,df,by.x="X",by.y="ENSEMBL") expr=data expr=expr[!duplicated(expr$SYMBOL),] rownames(expr)=expr$SYMBOL expr=expr[,c(-1,-553)] group_list=ifelse(as.numeric(substr(colnames(expr),14,15)) < 10,'tumor','normal') exprSet=expr suppressMessages(library(limma)) design <- model.matrix(~0+factor(group_list)) colnames(design)=levels(factor(group_list)) rownames(design)=colnames(exprSet) dge <- DGEList(counts=exprSet,group=group_list) dge <- calcNormFactors(dge) logCPM <- cpm(dge, log=TRUE, prior.count=2) #v <- voom(exprSet,design,plot=TRUE) fit <- lmFit(logCPM, design) cont.matrix=makeContrasts(contrasts=c('tumor-normal'),levels = design) fit2=contrasts.fit(fit,cont.matrix) fit2=eBayes(fit2) tempOutput = topTable(fit2, coef='tumor-normal', n=Inf) Dvoom = na.omit(tempOutput) } Dvoom=Dvoom[!str_detect(rownames(Dvoom),"LINC|LOC|MIR"),] save(Dvoom_LUAD,file="LUAD_deg.RData") choose_LUAD=Dvoom[abs(Dvoom$logFC)>=2 & Dvoom$P.Value<=0.05,] load(file = "SS_deg.RData") load(file = "LUAD_deg.RData") pred_target=read.csv("PM_targets.csv") pred_target$Gene=toupper(pred_target$Gene) choose_ss=DEG_ss[abs(DEG_ss$logFC)>=1 & DEG_ss$P.Value<=0.05,] choose_ss$gene=rownames(choose_ss) choose_LUAD=Dvoom_LUAD[abs(Dvoom_LUAD$logFC)>=1 & Dvoom_LUAD$P.Value<=0.05,] choose_LUAD$gene=rownames(choose_LUAD) cor_gene=intersect(choose_ss$gene,choose_LUAD$gene) target=intersect(cor_gene,pred_target$Gene) print(target) NSCLC_DEG=a_gene solasonine_DEG=b_gene predict_target=c_gene venn.diagram(list(predict_target=pred_target, solasonine_DEG=choose_ss,NSCLC_DEG=choose_LUAD), filename="ss_analysis.tiff", lwd=1, lty=1, col=c('#0099CC','#FF6666','#FFCC99'), fill=c('#0099CC','#FF6666','#FFCC99'), cat.col=c('#0099CC','#FF6666','#FFCC99'), cat.cex = 1, rotation.degree = 0, main = "", main.cex = 2, sub.cex = 2, cex=1.5, alpha = 0.5, reverse=TRUE) data=read.table("~/bioinfomatrics/TCGA/rnaSeq/TCGA-LUAD-Counts.csv",header = T,sep = ",") geneName<- data.frame(ENSEMBL=data$X) df <- bitr(unique(geneName$ENSEMBL), fromType = "ENSEMBL", toType = c( "SYMBOL"), OrgDb = org.Hs.eg.db) data=merge(data,df,by.x="X",by.y="ENSEMBL") expr=data expr=expr[!duplicated(expr$SYMBOL),] rownames(expr)=expr$SYMBOL expr=expr[,c(-1,-553)] expr=log(expr+1) group_list=ifelse(as.numeric(substr(colnames(expr),14,15)) < 10,'tumor','normal') showData=data.frame("gene"=t(expr["ACHE",]),"group"=group_list) colnames(showData)=c("gene","group") ggboxplot(showData, x="group", y="gene", color = "group", palette = "jco", add = "jitter")+stat_compare_means() data=read.csv("~/bioinfomatrics/TCGA/depMap/CCLE_expression.csv",header = T,row.names = 1) effect=read.csv("~/bioinfomatrics/TCGA/depMap/Achilles_gene_effect.csv",header = T,row.names = 1) sample_info <- read.csv("~/bioinfomatrics/TCGA/depMap/sample_info.csv") choose_sample=sample_info[sample_info$lineage=="lung" & sample_info$lineage_subtype!="NSCLC",] choose_data=data[rownames(data) %in% choose_sample$DepMap_ID,] colnames(choose_data)=lapply(colnames(choose_data),function(x) unlist(str_split(x,"\\."))[1]) choose_data$cell=rownames(choose_data) choose_effect=effect[rownames(effect) %in% rownames(choose_data),] colnames(choose_effect)=lapply(colnames(choose_effect),function(x) unlist(str_split(x,"\\."))[1]) choose_effect$cell=rownames(choose_effect) gene_effect=data.frame("effect"=choose_effect$ACHE,"effect_cell"=choose_effect$cell) gene_expr=data.frame("expr"=choose_data$ACHE,"expr_cell"=choose_data$cell) total=merge(gene_expr,gene_effect,by.x="expr_cell",by.y="effect_cell") library(ggpubr) ggscatter(total, x = "effect", y = "expr", add = "reg.line", conf.int = TRUE, cor.coef = TRUE, cor.method = "pearson", xlab = "CRISPR_effect", ylab = "expr") ggplot(total, aes(x=effect, y=expr)) + geom_hex(col = "white")+ theme_bw()+ scale_fill_gradient(low="#00BCF2", high="#00188F") + geom_smooth(method=lm , color="black", se=FALSE)+ theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = c(0.9,0.2)) RNAi_effect=read.csv("~/bioinfomatrics/TCGA/depMap/Achilles_gene_dependency.csv",header = T,row.names = 1) sample_info <- read.csv("~/bioinfomatrics/TCGA/depMap/sample_info.csv") data=read.csv("~/bioinfomatrics/TCGA/depMap/CCLE_expression.csv",header = T,row.names = 1) choose_sample=sample_info[sample_info$lineage=="lung" & sample_info$lineage_subtype=="NSCLC",] gene="NGLY1" choose_data=RNAi_effect[rownames(RNAi_effect) %in% choose_sample$DepMap_ID,] colnames(choose_data)=lapply(colnames(choose_data),function(x) unlist(str_split(x,"\\."))[1]) choose_data$cell=rownames(choose_data) choose_data=choose_data[,c(gene,"cell")] colnames(choose_data)=c("effect","cell") choose_expr=data[rownames(data) %in% choose_sample$DepMap_ID,] colnames(choose_expr)=lapply(colnames(choose_expr),function(x) unlist(str_split(x,"\\."))[1]) choose_expr$cell=rownames(choose_expr) choose_expr=choose_expr[,c(gene,"cell")] colnames(choose_expr)=c("expr","cell") RNAi_total=merge(choose_data,choose_expr,by="cell") RNAi_total=na.omit(RNAi_total) ggscatter(RNAi_total, x = "effect", y = "expr", add = "reg.line", conf.int = TRUE, cor.coef = TRUE, cor.method = "pearson", xlab = "RNAi_effect", ylab = "expr") ggplot(RNAi_total, aes(x=effect, y=expr)) + geom_hex(col = "white")+ theme_bw()+ scale_fill_gradient(low="#00BCF2", high="#00188F") + geom_smooth(method=lm , color="black", se=FALSE)+ theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = c(0.9,0.2)) all_medicines=read.csv("~/bioinfomatrics/R_workingSpace/HUWE1_mutation/analysis_data/screened_compunds_rel_8.2.csv",header = T) cell_line_detail=read.csv("~/bioinfomatrics/R_workingSpace/HUWE1_mutation/analysis_data/cell_lines_detail.csv",header = T) medicine_total_GDSC=read.csv("~/bioinfomatrics/R_workingSpace/HUWE1_mutation/analysis_data/medicine_total_GDSC.csv",header = T) cell_expr=read.csv("~/bioinfomatrics/R_workingSpace/HUWE1_mutation/analysis_data/GDSC_expr_deal.csv",header = T) ACHE_expr=cell_expr[cell_expr$X=="ACHE",] ACHE_expr=ACHE_expr[,2:1019] ACHE_expr=data.frame("cell"=as.character(colnames(ACHE_expr)),"value"=as.numeric(ACHE_expr[1,])) ACHE_expr$cell=unlist(lapply(ACHE_expr$cell,function(x) gsub("\\.","-",x))) choose_line=cell_line_detail[cell_line_detail$Site=="lung",] choose_ACHE=ACHE_expr[ACHE_expr$cell %in% choose_line$Line,] choose_ACHE$expr_status=unlist(lapply(choose_ACHE$value,function(x) ifelse(x>median(choose_ACHE$value),1,0))) res=data.frame() for(i in 1:nrow(all_medicines)){ choose_med=medicine_total_GDSC[medicine_total_GDSC$DRUG_NAME==all_medicines[i,]$DRUG_NAME,] medic_1=choose_med #medic_1=merge(medic_1,choose_ACHE,by.x="CELL_LINE_NAME",by.y="cell") medic_1$expr_status=0 medic_1=medic_1[medic_1$CELL_LINE_NAME %in% choose_ACHE$cell,] if(nrow(medic_1)==0){ next } medic_1[(medic_1$CELL_LINE_NAME %in% choose_ACHE[choose_ACHE$expr_status==1,]$cell),]$expr_status=1 #ggboxplot(medic_1, x="expr_status", y="LN_IC50", color = "expr_status", # palette = "jco", add = "jitter")+stat_compare_means() info=as.data.frame(compare_means(LN_IC50~expr_status, data=medic_1, method = "t.test" )) #info=as.data.frame(compare_means(AUC~expr_status, data=medic_1, method = "t.test" )) info$medicine=all_medicines[i,]$DRUG_NAME info$pathway=all_medicines[i,]$TARGET_PATHWAY res=rbind(res,info) print(paste("total_num is",nrow(all_medicines),"; now dealing is",i,sep = " ")) } choose_IC50=res[res$p.adj<=0.05,] choose_AUC=res[res$p.adj<=0.05,] interset_medic=intersect(choose_IC50$medicine,choose_AUC$medicine) interset_IC50=choose_IC50[choose_IC50$medicine %in% interset_medic,] interset_AUC=choose_AUC[choose_AUC$medicine %in% interset_medic,] save(interset_IC50,interset_AUC,file="GDSC_ACHE.RData") #write.csv(interset_IC50,"interset_IC50.csv") #write.csv(interset_AUC,"interset_AUC.csv") load(file="GDSC_ACHE.RData") library(survival) library(survminer) data=read.csv("~/bioinfomatrics/TCGA/rnaSeq/TCGA-LUAD-Counts.csv",header = T,row.names = 1) data$X=rownames(data) geneName<- data.frame(ENSEMBL=data$X) df <- bitr(unique(geneName$ENSEMBL), fromType = "ENSEMBL", toType = c( "SYMBOL"), OrgDb = org.Hs.eg.db) data=merge(data,df,by.x="X",by.y="ENSEMBL") expr=data expr=expr[!duplicated(expr$SYMBOL),] rownames(expr)=expr$SYMBOL group_list=ifelse(as.numeric(substr(colnames(expr),14,15)) < 10,'tumor','normal') expr=expr[,group_list=="tumor"] expr_data=log2(cpm(expr)+1) meta=read.table("~/bioinfomatrics/TCGA/clinical/TCGA-LUAD-clinical.csv",header = T,sep = ",") meta$days_to_death[is.na(meta$days_to_death)]=0 meta$days_to_last_follow_up[is.na(meta$days_to_last_follow_up)]=0 meta$days=as.numeric(meta$days_to_death)+as.numeric(meta$days_to_last_follow_up) meta$event=ifelse(meta$vital_status=="Alive",0,1) meta$age=meta$age_at_index meta$ID=meta$bcr_patient_barcode meta$time=meta$days/30 meta$ID=lapply(meta$ID,function(x) x=gsub("-",".",x)) clinical=meta[,colnames(meta) %in% c("ID","event","age","days","time","gender","tumor_stage","pack_years_smoked")] clinical$pack_years_smoked[is.na(clinical$pack_years_smoked)]=0 clinical= na.omit(clinical) clinical$ID=as.character(clinical$ID) clinical=clinical[clinical$tumor_stage!="not reported",] clinical$tumor_stage=as.character(clinical$tumor_stage) clinical$tumor_stage=lapply(clinical$tumor_stage,function(x) ifelse(x=="stage i"|x=="stage ia"|x=="stage ib","I", ifelse(x=="stage ii"|x=="stage iia"|x=="stage iib","II", ifelse(x=="stage iii"|x=="stage iiia"|x=="stage iiib"|x=="stage iiic","III", ifelse(x=="stage iv"|x=="stage ivb","IV")))) ) clinical$tumor_stage=as.character(clinical$tumor_stage) clinical=demo gene_name="ACHE" chooseData=data.frame("value"=expr_data[c(gene_name),]) chooseData$sample=substr(rownames(chooseData),1,12) clinical=merge(clinical,chooseData,by.x="ID",by.y="sample") clinical$age=ifelse(clinical$age> median(clinical$age),1,0) clinical$pack_years_smoked=ifelse(clinical$pack_years_smoked> median(clinical$pack_years_smoked[clinical$pack_years_smoked!=0]),1,0) print(median(clinical$value)) clinical$expr_status=ifelse(clinical$value> 2.5,1,0) sfit <- survfit(Surv(time, event)~expr_status, data=clinical) ggsurvplot(sfit, pval = TRUE, #palette = c("blue", "red","green"), legend.title = "", #legend.labs=c("ClusterA", "ClusterB"), font.legend = c(17, 'plain', 'black'), # conf.int = TRUE, # surv.median.line = "hv", ggtheme=theme_survminer()+ theme(axis.title.y=element_text(size=20),axis.title.x=element_text(size=20), axis.text.x =element_text(size=17),axis.text.y =element_text(size=17), ), xlab ="Months", linetype = 1,size=1 ) # choose=clinical[clinical$tumor_stage %in% c("III","II"),] data.survdiff=survdiff(Surv(time, event)~expr_status, data=clinical) log_rank_p = 1 - pchisq(data.survdiff$chisq, length(data.survdiff$n) - 1) model=coxph(Surv(time, event)~expr_status, data=clinical) beta <- coef(model) se <- sqrt(diag(vcov(model))) HR <- exp(beta) Cox_p = 1 - pchisq((beta/se)^2, 1) HR_low = exp(beta - qnorm(.975, 0, 1) * se) HR_high = exp(beta + qnorm(.975, 0, 1) * se) x<- summary(model) p.value<-signif(x$wald["pvalue"], digits=2) data.survdiff=survdiff(Surv(time, event)~expr_status+gender+age+pack_years_smoked, data=clinical) log_rank_p = 1 - pchisq(data.survdiff$chisq, length(data.survdiff$n) - 1) model=coxph(Surv(time, event)~tumor_stage, data=choose) ggforest(model, data =choose, main = "Hazard ratio", cpositions = c(0.10, 0.22, 0.4), fontsize = 1.0, refLabel = "1", noDigits = 4) { library(survival) library(survminer) data=read.csv("~/bioinfomatrics/TCGA/rnaSeq/TCGA-LUAD-Counts.csv",header = T,row.names = 1) data$X=rownames(data) geneName<- data.frame(ENSEMBL=data$X) df <- bitr(unique(geneName$ENSEMBL), fromType = "ENSEMBL", toType = c( "SYMBOL"), OrgDb = org.Hs.eg.db) data=merge(data,df,by.x="X",by.y="ENSEMBL") expr=data expr=expr[!duplicated(expr$SYMBOL),] rownames(expr)=expr$SYMBOL group_list=ifelse(as.numeric(substr(colnames(expr),14,15)) < 10,'tumor','normal') expr=expr[,group_list=="tumor"] expr_data=log2(cpm(expr)+1) meta=read.table("~/bioinfomatrics/TCGA/clinical/LUAD_survival.txt.gz",sep = "\t",header = T) meta$OS.time=meta$OS.time/30 meta$DSS.time=meta$DSS.time/30 meta$PFI.time=meta$PFI.time/30 allGenes=rownames(expr_data) survival_res=data.frame() gene_name="ACHE" chooseData=data.frame("value"=expr_data[c(gene_name),]) chooseData$sample=gsub("\\.","-",substr(rownames(chooseData),1,12)) clinical=merge(meta,chooseData,by.x="X_PATIENT",by.y="sample") # median(chooseData$value) clinical$expr_status=ifelse(clinical$value> 6.6,1,0) sfit <- survfit(Surv(PFI.time, PFI)~expr_status, data=clinical) ggsurvplot(sfit, pval = TRUE, legend.title = "", font.legend = c(17, 'plain', 'black'), ggtheme=theme_survminer()+ theme(axis.title.y=element_text(size=20),axis.title.x=element_text(size=20), axis.text.x =element_text(size=17),axis.text.y =element_text(size=17), ), xlab ="Months", linetype = 1,size=1 ) } ####################################################################### data=read.table("~/bioinfomatrics/TCGA/rnaSeq/TCGA-LUAD-Counts.csv",header = T,sep = ",") geneName<- data.frame(ENSEMBL=data$X) df <- bitr(unique(geneName$ENSEMBL), fromType = "ENSEMBL", toType = c( "ENTREZID"), OrgDb = org.Hs.eg.db) data=merge(data,df,by.x="X",by.y="ENSEMBL") expr=data expr=expr[!duplicated(expr$ENTREZID),] rownames(expr)=expr$ENTREZID group_list=ifelse(as.numeric(substr(colnames(expr),14,15)) < 10,'tumor','normal') expr=log2(cpm(expr)+1) tumor_data=expr[,group_list=="tumor"] group=unlist(lapply(tumor_data['43',], function(x) ifelse(x>median(tumor_data['43',]),"H","L"))) table(group) library(aracne.networks) library(biomaRt) sigtureExplist <- list(expre = as.matrix(tumor_data), high = colnames(tumor_data)[group=="H"], low = colnames(tumor_data)[group=="L"]) sigfinalexp <- sigtureExplist[[1]] allHigh <- sigtureExplist[[2]] allLow<- sigtureExplist[[3]] signature <- rowTtest(tumor_data[,allHigh],tumor_data[,allLow]) signature <- (qnorm(signature$p.value/2, lower.tail = FALSE) * sign(signature$statistic))[, 1] nullmodel <- ttestNull(tumor_data[,allHigh],tumor_data[,allLow], per = 100) mra <- msviper(signature, regulonluad, nullmodel = nullmodel) choose_viper=data.frame("pvalue"=mra$es$p.value) choose_viper$gene=rownames(choose_viper) choose_df=df[df$ENTREZID %in% choose_viper$gene,] choose_viper=merge(choose_viper,choose_df,by.x="gene",by.y="ENTREZID") choose_viper=choose_viper[choose_viper$pvalue<=0.01,] # write.csv(choose_viper,"choose_viper.csv") data=read.csv("choose_viper.csv") ########################################################################################### BP_info=read.table("BP_GO.txt",header = T,sep = "\t") KEGG_info=read.table("KEGG.txt",header = T,sep = "\t") info=BP_info info=KEGG_info res=info res$Genes=lapply(res$Genes,function(x) { x=gsub(", ",",",x) }) res$ID=lapply(res$Term,function(x){ x=as.character(x) x=strsplit(x,split = "~")[[1]][1] }) res$Term=lapply(res$Term,function(x){ x=as.character(x) x=strsplit(x,split = "~")[[1]][2] }) res$Term=as.character(res$Term) res=res[res$Term %in% choose_terms,] allGenes=c() for(i in 1:length(res[,1])){ allGenes=c(allGenes,strsplit(as.character(res[i,]$Genes),split = ",")[[1]]) } table(allGenes) allGenes=as.data.frame(table(allGenes)) # allGenes=allGenes[allGenes$Freq>1,] library(GOplot) david=data.frame(Category="BP",ID=as.character(res$ID),Term=as.character(res$Term), genes=as.character(res$Genes),adj_pval=as.character(res$FDR)) len=nrow(allGenes) genelist=data.frame(ID=as.character(allGenes$allGenes),logFC=runif(len,min = 2,max = 3),AveExpr=runif(len,min = 0,max = 5), t=rep(10,len),P.Value=runif(len,min = 1e-8,max = 3e-2),B=rep(40,len)) circ <- circle_dat(david, genelist) chord <- chord_dat(data = circ, genes = genelist, david$Term) GOChord(chord, space = 0.02, gene.space = 0.25, gene.size = 5) library(ggplot2) data = res data$Term=as.character(data$Term) data$GeneRatio=data$Count/sum(data$Count) p = ggplot(data,aes(GeneRatio,Term)) p=p + geom_point() p=p + geom_point(aes(size=Count)) p=p + geom_point(aes(size=PValue)) pbubble = p+ geom_point(aes(size=Count,color=-1*log10(PValue))) pr = pbubble+scale_color_gradient(low="green",high = "red") pr = pr+labs(color=expression(-log[10](PValue)),size="Count", x="GeneRatio",y="Pathway name",title="Pathway enrichment") pr + theme_bw()