# Comment: Set options and libraries rm(list = ls()) library(GSEABase) library(GSVAdata) library(Biobase) library(genefilter) library(limma) library(RColorBrewer) library(GSVA) library(clusterProfiler) library(pheatmap) library(tidyverse) library(survminer) library(survival) library(limma) library(ggpubr) library(stringr) library(RColorBrewer) library(export) library(survivalROC) library(pheatmap) library(data.table) library(org.Hs.eg.db) library(clusterProfiler) library(biomaRt) library(enrichplot) library(ReactomePA) library(igraph) library(ggraph) library(forestplot) library(ggradar) library(fmsb) library(circlize) library(ggsci) library(parallel) library(maftools) library(patchwork) library(ComplexHeatmap) # Comment: Disable converting strings to factors options(stringsAsFactors = FALSE) Sys.setenv(LANGUAGE = "en") # Comment: Set gene name gene = "Score" # Comment: Create directories dir.create(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene)) dir.create(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene, "/5_Immune_Infiltration_Analysis")) dir.create(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene, "/5_Immune_Infiltration_Analysis/6_Immunotherapy")) # Comment: Set working directory setwd("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced\\raw_data\\immunotherapy_data") # Comment: Read gene list input genelist_input <- fread(file = "GSE32894.csv", header = TRUE, sep = ',', data.table = FALSE) genename <- as.character(genelist_input[, 1]) # Comment: Map gene names to ENTREZID gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "ENTREZID", columns = c("SYMBOL")) colnames(gene_map)[1] <- "ID" colnames(genelist_input)[1] <- "ID" genelist_input$ID <- as.character(genelist_input$ID) immudata <- inner_join(gene_map, genelist_input, by = "ID") immudata <- immudata[, -1] immudata <- na.omit(immudata) rownames(immudata) <- immudata$SYMBOL immudata <- immudata[, -1] immudata <- as.data.frame(t(immudata)) # Comment: Read additional data (CCC) CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t") CCC = CCC$id CCC = intersect(colnames(immudata), CCC) AAA = immudata[, CCC] # Comment: Perform PCA analysis pca = prcomp(AAA, scale = TRUE) value = predict(pca) score = value[, 1] - value[, 2] score = as.data.frame(score) score$ID = rownames(score) immudata$ID = row.names(immudata) immudata = inner_join(immudata, score, by = "ID") colnames(immudata)[which(colnames(immudata) == "score")] = gene immudata <- immudata %>% dplyr::select(ID, gene, everything()) # Comment: Read clinical data clindata <- read.csv("GSE32894_clin.csv", header = TRUE, sep = ",") colnames(clindata)[1] <- "ID" AAA <- intersect(clindata$ID, immudata$ID) immudata <- dplyr::filter(immudata, ID %in% AAA) clindata <- dplyr::filter(clindata, ID %in% AAA) rt <- inner_join(clindata, immudata, by = "ID") # Comment: Prepare data for survival analysis rt$time <- rt$time/12 rt$status <- as.numeric(rt$status) # Comment: Perform survival analysis setwd(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene, "/5_Immune_Infiltration_Analysis/6_Immunotherapy")) test <- rt test <- na.omit(test) res.cut <- surv_cutpoint(test, time = "time", event = "status", variables = gene) summary(res.cut) # Comment: Categorize variables res.cat <- surv_categorize(res.cut) head(res.cat) # Comment: Fit survival curves and visualize fit <- survfit(Surv(time, status) ~ get(gene), data = res.cat) # Comment: Create survival plot pdf(file = "GSE32894_best-cutoff-DFS-survival-analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) dev.off() # Comment: Create a barplot res.cat$ID <- test$ID res.cat$Progression <- test$Progression res.cat$Group <- res.cat[, gene] # Comment: Prepare data for the percentage barplot dat = dplyr::count(res.cat, Group, Progression) dat = dat %>% group_by(Group) %>% dplyr::summarise(Progression = Progression, n = n/sum(n)) dat$Progression = factor(dat$Progression, levels = c("no", "yes")) # Comment: Calculate percentages dat = ddply(dat, .(Group), transform, percent = n/sum(n) * 100) # Comment: Calculate position for percentage labels dat = ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n)) dat$label = paste0(sprintf("%.0f", dat$percent), "%") # Comment: Create the percentage barplot bioCol = c("#f87669", "#2fa1dd") p = ggplot(dat, aes(x = factor(Group), y = percent, fill = Progression # Comment: Loop to plot additional ROC curves for (i in 3:ncol(df)){ x <- plot.roc(df[, 1], df[, i], add = TRUE, # Add to the previous plot smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } # Comment: Create a data frame with AUC values auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Comment: Output p-values to a file # Comment: Create a legend legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() } ################################################################################### rm(list = ls()) gene = "Score" setwd("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced\\raw_data\\immunotherapy_data") genelist_input <- fread(file = "GSE35640.csv", header = TRUE, sep = ',', data.table = FALSE) genename <- as.character(genelist_input[, 1]) # Comment: Map gene names to ENTREZID gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "ENTREZID", columns = c("SYMBOL")) colnames(gene_map)[1] <- "ID" colnames(genelist_input)[1] <- "ID" genelist_input$ID <- as.character(genelist_input$ID) immudata <- inner_join(gene_map, genelist_input, by = "ID") immudata <- immudata[, -1] immudata <- na.omit(immudata) rownames(immudata) <- immudata$SYMBOL immudata <- immudata[, -1] immudata <- as.data.frame(t(immudata)) # Comment: Read additional data (CCC) CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t") CCC = CCC$id CCC = intersect(colnames(immudata), CCC) AAA = immudata[, CCC] # Comment: Perform PCA analysis pca = prcomp(AAA, scale = TRUE) value = predict(pca) score = value[, 1] - value[, 2] score = as.data.frame(score) score$ID = rownames(score) immudata$ID = row.names(immudata) immudata = inner_join(immudata, score, by = "ID") colnames(immudata)[which(colnames(immudata) == "score")] = gene immudata <- immudata %>% dplyr::select(ID, gene, everything()) clindata <- read.csv("GSE35640_clin.csv", header = TRUE, sep = ",") colnames(clindata)[1] <- "ID" AAA <- intersect(clindata$ID, immudata$ID) immudata <- dplyr::filter(immudata, ID %in% AAA) clindata <- dplyr::filter(clindata, ID %in% AAA) rt <- inner_join(clindata, immudata, by = "ID") test <- rt test$Response <- factor(test$Response, levels = c('non-responder', 'responder')) my_comparisons <- list(c("non-responder", "responder")) # Comment: Perform differential gene expression analysis setwd(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene, "/5_Immune_Infiltration_Analysis/6_Immunotherapy")) p <- ggboxplot(test, x = "Response", y = gene, fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) + theme(legend.position = 'none') + ylab(label = gene) + stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..)) ggsave(p, filename = paste("GSE35640", gene, 'DifferentialExpression.pdf', sep = ' '), width = 5.6, height = 4.22) # Comment: Predictive ROC Curve library("pROC") df <- dplyr::select(rt, Response, gene, PDCD1, CTLA4, CD274) # Comment: Define a sufficient number of colors for plotting mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2") if (TRUE) { pdf("GSE35640_ROC.pdf", height = 6, width = 6) auc.out <- c() # Comment: Plot the first ROC curve for miRNA1 x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0), smooth = TRUE, # Smooth ROC curve ci = TRUE, main = "", col = mycol[2], # Line color lwd = 2, # Line width legacy.axes = TRUE) # Use legacy axes ci.lower <- round(as.numeric(x$ci[1]), 3) # Confidence interval lower limit ci.upper <- round(as.numeric(x$ci[3]), 3) # Confidence interval upper limit auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) # Comment: Plot additional ROC curves in a loop for (i in 3:ncol(df)) { x <- plot.roc(df[, 1], df[, i], add = TRUE, # Add to the previous plot smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } # Comment: Compare multiple ROC curves # In the "method=" parameter, there are three methods to choose from: "delong," "bootstrap," or "venkatraman" to calculate p-values auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Output p-values to a file # Comment: Create a legend legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() } ################################################################################### rm(list = ls()) gene <- "Score" setwd("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced\\raw_data\\immunotherapy_data") genelist_input <- fread(file = "GSE61676.csv", header = TRUE, sep = ',', data.table = FALSE) immudata <- genelist_input rownames(immudata) <- immudata$ID immudata <- immudata[, -1] immudata <- as.matrix(immudata) immudata <- as.data.frame(t(immudata)) # Comment: Read additional data (CCC) CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t") CCC = CCC$id CCC = intersect(colnames(immudata), CCC) AAA = immudata[, CCC] # Comment: Perform PCA analysis pca = prcomp(AAA, scale = TRUE) value = predict(pca) score = value[, 1] - value[, 2] score = as.data.frame(score) score$ID = rownames(score) immudata$ID = row.names(immudata) immudata = inner_join(immudata, score, by = "ID") colnames(immudata)[which(colnames(immudata) == "score")] = gene immudata <- immudata %>% dplyr::select(ID, gene, everything()) clindata <- read.csv("GSE61676_clin.csv", header = TRUE, sep = ",") colnames(clindata)[1] <- "ID" AAA <- intersect(clindata$ID, immudata$ID) immudata <- dplyr::filter(immudata, ID %in% AAA) clindata <- dplyr::filter(clindata, ID %in% AAA) rt <- inner_join(clindata, immudata, by = "ID") rt$time <- rt$time / 12 rt$status <- as.numeric(rt$status) # Comment: Survival analysis setwd(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene, "/5_Immune_Infiltration_Analysis/6_Immunotherapy")) test <- rt res.cut <- surv_cutpoint(test, time = "time", event = "status", variables = gene) summary(res.cut) # Categorize variables res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize fit <- survfit(Surv(time, status) ~ get(gene), data = res.cat) pdf(file = "GSE61676_best_cutoff_survival_analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) dev.off() # Percentage Plot res.cat$ID <- test$ID res.cat$Response <- test$Response res.cat$Group <- res.cat[, gene] library(dplyr) library(plyr) library(ggplot2) library(ggpubr) # Count frequencies and calculate percentages dat <- dplyr::count(res.cat, Group, Response) dat <- dat %>% group_by(Group) %>% dplyr::summarise(Response = Response, n = n/sum(n)) dat$Response <- factor(dat$Response, levels = c("Non-responder", "Responder")) # Calculate percentages dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100) # Calculate position for labels dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n)) dat$label <- paste0(sprintf("%.0f", dat$percent), "%") bioCol <- c("#f87669", "#2fa1dd") p <- ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) + geom_bar(position = position_stack(), stat = "identity", width = 0.7) + scale_fill_manual(values = bioCol) + xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) + geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) + theme_bw() pdf(file = "GSE61676_barplot.pdf", width = 4, height = 5) print(p) dev.off() # Update Response levels test$Response <- factor(test$Response, levels = c('Non-responder', 'Responder')) my_comparisons <- list(c("Non-responder", "Responder")) # Differential expression analysis p <- ggboxplot(test, x = "Response", y = gene, fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) + theme(legend.position = 'none') + ylab(label = gene) + stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..)) ggsave(p, filename = paste("GSE61676", gene, 'DifferentialExpression.pdf', sep = ' '), width = 5.6, height = 4.22) # Predictive ROC Curve library("pROC") df <- dplyr::select(rt, Response, gene, CTLA4, CD274) df$Response[df$Response == "Responder"] <- 1 df$Response[df$Response == "Non-responder"] <- 0 # Define a sufficient number of colors for plotting mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2") if (TRUE) { pdf("GSE61676_ROC.pdf", height = 6, width = 6) auc.out <- c() # Plot the first ROC curve for miRNA1 x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0), smooth = TRUE, # Smooth ROC curve ci = TRUE, main = "", col = mycol[2], # Line color lwd = 2, # Line width legacy.axes = TRUE) # Use legacy axes ci.lower <- round(as.numeric(x$ci[1]), 3) # Confidence interval lower limit ci.upper <- round(as.numeric(x$ci[3]), 3) # Confidence interval upper limit auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) # Plot additional ROC curves in a loop for (i in 3:ncol(df)) { x <- plot.roc(df[, 1], df[, i], add = TRUE, # Add to the previous plot smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Create a legend legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() } ################################################################################### rm(list = ls()) gene <- "Score" setwd("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced\\raw_data\\immunotherapy_data") genelist_input <- fread(file = "IMvigor210CoreBiologies.csv", header = TRUE, sep = ',', data.table = FALSE) immudata <- genelist_input # Build a boolean vector for indexing index <- duplicated(immudata$symbol) # Filter data immudata <- immudata[!index,] # Select non-duplicated data immudata <- na.omit(immudata) rownames(immudata) <- immudata$symbol immudata <- immudata[, -1] immudata <- log2(immudata + 1) immudata <- as.matrix(immudata) immudata <- as.data.frame(t(immudata)) CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t") CCC <- CCC$id CCC <- intersect(colnames(immudata), CCC) AAA <- immudata[, CCC] # PCA analysis pca <- prcomp(AAA, scale = TRUE) value <- predict(pca) score <- value[, 1] - value[, 2] score <- as.data.frame(score) score$ID <- rownames(score) immudata$ID <- row.names(immudata) immudata <- inner_join(immudata, score, by = "ID") colnames(immudata)[which(colnames(immudata) == "score")] <- gene immudata <- immudata %>% dplyr::select(ID, gene, everything()) clindata <- read.csv("IMvigor210CoreBiologies_clin.csv", header = TRUE, sep = ",") colnames(clindata)[1] <- "ID" AAA <- intersect(clindata$ID, immudata$ID) immudata <- dplyr::filter(immudata, ID %in% AAA) clindata <- dplyr::filter(clindata, ID %in% AAA) rt <- inner_join(clindata, immudata, by = "ID") rt$futime <- rt$futime / 12 rt$fustat <- as.numeric(rt$fustat) # Survival Analysis setwd(paste0("Y:\\lcd code\\TCGA advanced\\", gene, "\\5 Immune Infiltration Analysis\\6 Immunotherapy")) test <- rt res.cut <- surv_cutpoint(test, time = "futime", event = "fustat", variables = gene) summary(res.cut) # Categorize variables res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize fit <- survfit(Surv(futime, fustat) ~ get(gene), data = res.cat) pdf(file = "IMvigor210CoreBiologies Best Cutoff Survival Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) dev.off() # Percentage Plot res.cat$ID <- test$ID res.cat$Response <- test$Response res.cat$Group <- res.cat[, gene] res.cat <- na.omit(res.cat) library(dplyr) library(plyr) library(ggplot2) library(ggpubr) dat <- dplyr::count(res.cat, Group, Response) dat <- dat %>% group_by(Group) %>% dplyr::summarise(Response = Response, n = n/sum(n)) dat$Response = factor(dat$Response, levels = c("CR/PR", "SD/PD")) # Calculate percentages dat = ddply(dat, .(Group), transform, percent = n/sum(n) * 100) # Calculate position for labels dat = ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n)) dat$label = paste0(sprintf("%.0f", dat$percent), "%") bioCol = c("#f87669", "#2fa1dd") p = ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) + geom_bar(position = position_stack(), stat = "identity", width = .7) + scale_fill_manual(values = bioCol) + xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) + geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) + theme_bw() pdf(file = "IMvigor210CoreBiologies_barplot.pdf", width = 4, height = 5) print(p) dev.off() test$Response <- factor(test$Response, levels = c('CR/PR', 'SD/PD')) my_comparisons <- list(c("CR/PR", "SD/PD")) # Differential expression analysis test <- na.omit(test) p <- ggboxplot(test, x = "Response", y = gene, fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) + theme(legend.position = 'none') + ylab(label = gene) + stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..)) ggsave(p, filename = paste("IMvigor210CoreBiologies", gene, 'DifferentialExpression.pdf', sep = ' '), width = 5.6, height = 4.22) # Predictive ROC Curve library("pROC") df <- dplyr::select(rt, Response, gene, CTLA4, CD274, PDCD1) df$Response[df$Response == "SD/PD"] = 1 df$Response[df$Response == "CR/PR"] = 0 # Define a sufficient number of colors for plotting mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2") if (TRUE) { pdf("IMvigor210CoreBiologies_ROC.pdf", height = 6, width = 6) auc.out <- c() # Plot the first ROC curve for miRNA1 x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0), smooth = TRUE, # Smooth ROC curve ci = TRUE, main = "", col = mycol[2], # Line color lwd = 2, # Line width legacy.axes = TRUE) # Use legacy axes ci.lower <- round(as.numeric(x$ci[1]), 3) # Confidence interval lower limit ci.upper <- round(as.numeric(x$ci[3]), 3) # Confidence interval upper limit auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) # Plot additional ROC curves in a loop for (i in 3:ncol(df)) { x <- plot.roc(df[, 1], df[, i], add = TRUE, # Add to the previous plot smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Create a legend legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() } # Clear the workspace rm(list = ls()) gene = "Score" # Set the working directory to the immunotherapy data folder setwd("Y:\\lcd code\\TCGA advanced\\Original Data\\Immunotherapy Data") # Read gene expression data from a CSV file genelist_input <- fread(file = "GSE78220.csv", header = TRUE, sep = ',', data.table = FALSE) immudata <- genelist_input # Create a boolean vector to index duplicated genes index <- duplicated(immudata$Gene) # Filter out duplicated data immudata <- immudata[!index,] # Remove rows with missing values immudata <- na.omit(immudata) # Set gene names as row names rownames(immudata) <- immudata$Gene # Remove the Gene column immudata <- immudata[, -1] # Transpose the data immudata <- as.matrix(immudata) immudata <- as.data.frame(t(immudata)) # Read a list of genes of interest CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t") CCC <- CCC$id # Filter genes that are present in both datasets CCC <- intersect(colnames(immudata), CCC) # Select columns corresponding to the genes of interest AAA <- immudata[, CCC] # Perform Principal Component Analysis (PCA) pca <- prcomp(AAA, scale = TRUE) value <- predict(pca) score <- value[, 1] - value[, 2] score <- as.data.frame(score) # Assign the ID column score$ID <- rownames(score) # Update the column name to 'gene' colnames(score)[which(colnames(score) == "score")] <- gene # Select relevant columns immudata <- immudata %>% dplyr::select(ID, gene, everything()) # Read clinical data clindata <- read.csv("GSE78220_clin.csv", header = TRUE, sep = ",") # Rename the first column to 'ID' colnames(clindata)[1] <- "ID" # Find common IDs between clinical and gene expression data AAA <- intersect(clindata$ID, immudata$ID) # Filter data based on common IDs immudata <- dplyr::filter(immudata, ID %in% AAA) clindata <- dplyr::filter(clindata, ID %in% AAA) # Prepare data for survival analysis rt <- inner_join(clindata, immudata, by = "ID") # Convert OS values to years and Status to numeric rt$OS <- rt$OS/365 rt$Status <- as.numeric(rt$Status) # Set the working directory for survival analysis setwd(paste0("Y:\\lcd code\\TCGA advanced\\", gene, "\\5 Immune Infiltration Analysis\\6 Immunotherapy")) test <- rt # Remove rows with missing values test <- na.omit(test) # Determine optimal cutpoints for survival analysis res.cut <- surv_cutpoint(test, time = "OS", event = "Status", variables = gene) summary(res.cut) # Categorize variables based on the optimal cutpoints res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and create visualizations fit <- survfit(Surv(OS, Status) ~ get(gene), data = res.cat) # Generate a PDF file for survival analysis results pdf(file = "GSE78220 Best Cutoff Survival Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) # Create a survival plot ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) # Close the PDF device dev.off() # Percentage Plot res.cat$ID <- test$ID res.cat$Response <- test$Response res.cat$Group <- res.cat[, gene] # Load necessary libraries library(dplyr) library(plyr) library(ggplot2) library(ggpubr) # Count frequencies of responses and calculate percentages dat <- dplyr::count(res.cat, Group, Response) dat <- dat %>% group_by(Group) %>% dplyr::summarise(Response = Response, n = n/sum(n)) dat$Response = factor(dat$Response, levels = c("PD/SD", "PR/CR")) # Calculate percentages dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100) # Calculate positions for labels dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n)) dat$label <- paste0(sprintf("%.0f", dat$percent), "%") # Define color palette bioCol <- c("#f87669", "#2fa1dd") # Create a percentage plot p <- ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) + geom_bar(position = position_stack(), stat = "identity", width = 0.7) + scale_fill_manual(values = bioCol) + xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) + geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) + theme_bw() # Generate a PDF file for the percentage plot pdf(file = "GSE78220_barplot.pdf", width = 4, height = 5) print(p) dev.off() # Set Response levels test$Response <- factor(test$Response, levels = c('PR/CR', 'PD/SD')) my_comparisons <- list(c("PR/CR", "PD/SD")) # Differential expression analysis plot test <- na.omit(test) p <- ggboxplot(test, x = "Response", y = gene, fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) + theme(legend.position = 'none') + ylab(label = gene) + stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..)) # Save the differential expression plot as a PDF file ggsave(p, filename = paste("GSE78220", gene, 'DifferentialExpression.pdf', sep = ' '), width = 5.6, height = 4.22) # Predictive ROC Curve library("pROC") # Select relevant columns for ROC analysis df <- dplyr::select(rt, Response, gene, CTLA4, CD274, PDCD1) df$Response[df$Response == "SD/PD"] = 1 df$Response[df$Response == "CR/PR"] = 0 # Define a sufficient number of colors for plotting mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2") if (TRUE) { # Generate a PDF file for ROC analysis pdf("GSE78220_ROC.pdf", height = 6, width = 6) auc.out <- c() # Plot the first ROC curve x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0), smooth = TRUE, ci = TRUE, main = "", col = mycol[2], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) # Plot additional ROC curves for (i in 3:ncol(df)) { x <- plot.roc(df[, 1], df[, i], add = TRUE, smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } # Create a legend for the ROC curves auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() } # Clear the current workspace rm(list = ls()) # Define the gene of interest gene <- "Score" # Set the working directory to the location of immunotherapy data setwd("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced\\Raw Data\\Immunotherapy Data") # Read gene expression data from a CSV file named "ICB.Riaz2017_Nivolumab_Melanoma_Naive.csv" genelist_input <- fread(file = "ICB.Riaz2017_Nivolumab_Melanoma_Naive.csv", header = TRUE, sep = ',', data.table = FALSE) genename <- as.character(genelist_input[, 1]) # Extract gene names # Use biomaRt to map gene IDs to gene symbols (ENTREZID to SYMBOL) gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "ENTREZID", columns = c("SYMBOL")) colnames(gene_map)[1] <- "ID" colnames(genelist_input)[1] <- "ID" genelist_input$ID <- as.character(genelist_input$ID) # Merge gene expression data with gene symbols immudata <- inner_join(gene_map, genelist_input, by = "ID") immudata <- immudata[, -1] immudata <- na.omit(immudata) rownames(immudata) <- immudata$SYMBOL immudata <- immudata[, -1] immudata <- as.matrix(immudata) immudata <- as.data.frame(t(immudata)) # Read a list of genes of interest from "uniSigGeneExp.txt" and select relevant columns CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t") CCC <- CCC$id CCC <- intersect(colnames(immudata), CCC) AAA <- immudata[, CCC] # Perform Principal Component Analysis (PCA) pca <- prcomp(AAA, scale = TRUE) value <- predict(pca) score <- value[, 1] - value[, 2] score <- as.data.frame(score) score$ID <- rownames(score) immudata$ID <- row.names(immudata) immudata <- inner_join(immudata, score, by = "ID") colnames(immudata)[which(colnames(immudata) == "score")] <- gene immudata <- immudata %>% dplyr::select(ID, gene, everything()) # Read clinical data clindata <- read.csv("ICB.Riaz2017_Nivolumab_Melanoma_Naive_clin.csv", header = TRUE, sep = ",") colnames(clindata)[1] <- "ID" # Filter data to include only common IDs AAA <- intersect(clindata$ID, immudata$ID) immudata <- dplyr::filter(immudata, ID %in% AAA) clindata <- dplyr::filter(clindata, ID %in% AAA) # Convert OS (Overall Survival) values to years and OS.Event to numeric rt <- inner_join(clindata, immudata, by = "ID") rt$OS <- rt$OS / 365 rt$OS.Event <- as.numeric(rt$OS.Event) # Set the working directory for survival analysis setwd(paste0("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced/", gene, "/5 Immune Infiltration Analysis/6 Immunotherapy")) test <- rt test <- na.omit(test) # Perform survival analysis by determining optimal cutpoints for the specified gene res.cut <- surv_cutpoint(test, time = "OS", event = "OS.Event", variables = gene) summary(res.cut) # Categorize variables based on optimal cutpoints res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize fit <- survfit(Surv(OS, OS.Event) ~ get(gene), data = res.cat) # Generate a PDF file containing survival analysis plots pdf(file = "ICB.Riaz2017_Nivolumab_Melanoma_Naive_OS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) dev.off() # Create percentage bar plots based on the response variable res.cat$ID <- test$ID res.cat$Response <- test$Response res.cat$Response[res.cat$Response == 1] <- "Yes" res.cat$Response[res.cat$Response == 0] <- "No" res.cat$Group <- res.cat[, gene] library(dplyr) library(plyr) library(ggplot2) library(ggpubr) dat <- dplyr::count(res.cat, Group, Response) dat <- dat %>% group_by(Group) %>% dplyr::summarise(Response = Response, n = n/sum(n)) dat$Response <- factor(dat$Response, levels = c("No", "Yes")) # Calculate percentages dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100) # Calculate position for labels dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n)) dat$label <- paste0(sprintf("%.0f", dat$percent), "%") bioCol <- c("#f87669", "#2fa1dd") p <- ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) + geom_bar(position = position_stack(), stat = "identity", width = 0.7) + scale_fill_manual(values = bioCol) + xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) + geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) + theme_bw() # Generate a PDF file containing percentage bar plots pdf(file = "ICB.Riaz2017_Nivolumab_Melanoma_Naive_Barplot.pdf", width = 4, height = 5) print(p) dev.off() # Update the response variable and conduct differential expression analysis test <- rt test <- na.omit(test) test$Response[test$Response == 1] <- "Yes" test$Response[test$Response == 0] <- "No" test$Response <- factor(test$Response, levels = c('No', 'Yes')) my_comparisons <- list(c("No", "Yes")) # Generate a PDF file containing differential expression analysis plots p <- ggboxplot(test, x = "Response", y = gene, fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) + theme(legend.position = 'none') + ylab(label = gene) + stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..)) ggsave(p, filename = paste("ICB.Riaz2017_Nivolumab_Melanoma_Naive", gene, '_Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22) # Perform ROC analysis to predict responsiveness to immunotherapy library("pROC") df <- dplyr::select(rt, Response, gene, PDCD1, CTLA4, CD274) # Define a set of colors for ROC curves mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2") if(TRUE){ pdf("ICB.Riaz2017_Nivolumab_Melanoma_Naive_ROC.pdf", height = 6, width = 6) auc.out <- c() # Plot the first ROC curve (Response vs. PDCD1) x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0), smooth = TRUE, ci = TRUE, main = "", col = mycol[2], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) # Plot additional ROC curves for (i in 3:ncol(df)){ x <- plot.roc(df[, 1], df[, i], add = TRUE, smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Generate a legend for the ROC curves legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() } # Survival Analysis setwd(paste0("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced/", gene, "/5 Immune Infiltration Analysis/6 Immunotherapy")) test <- rt test <- na.omit(test) res.cut <- surv_cutpoint(test, time = "PFS", event = "PFS.Event", variables = gene) summary(res.cut) # Categorize variables res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize fit <- survfit(Surv(PFS, PFS.Event) ~ get(gene), data = res.cat) pdf(file = "ICB.Riaz2017_Nivolumab_Melanoma_Naive_PFS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) dev.off() ################################################################################### rm(list = ls()) gene = "Score" setwd("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced\\Raw Data\\Immunotherapy Data") genelist_input <- fread(file = "ICB.Riaz2017_Nivolumab_Melanoma_Prog.csv", header = TRUE, sep = ',', data.table = FALSE) genename <- as.character(genelist_input[, 1]) # Extract the first column of gene names # Use biomaRt to map gene IDs to gene symbols (ENTREZID to SYMBOL) gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "ENTREZID", columns = c("SYMBOL")) colnames(gene_map)[1] <- "ID" colnames(genelist_input)[1] <- "ID" genelist_input$ID <- as.character(genelist_input$ID) immudata <- inner_join(gene_map, genelist_input, by = "ID") immudata <- immudata[, -1] immudata <- na.omit(immudata) rownames(immudata) <- immudata$SYMBOL immudata <- immudata[, -1] immudata <- as.matrix(immudata) immudata <- as.data.frame(t(immudata)) CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t") CCC <- CCC$id CCC <- intersect(colnames(immudata), CCC) AAA <- immudata[, CCC] # Perform PCA analysis pca <- prcomp(AAA, scale = TRUE) value <- predict(pca) score <- value[, 1] - value[, 2] score <- as.data.frame(score) score$ID <- rownames(score) immudata$ID <- row.names(immudata) immudata <- inner_join(immudata, score, by = "ID") colnames(immudata)[which(colnames(immudata) == "score")] <- gene immudata <- immudata %>% dplyr::select(ID, gene, everything()) clindata <- read.csv("ICB.Riaz2017_Nivolumab_Melanoma_Prog_clin.csv", header = TRUE, sep = ",") colnames(clindata)[1] <- "ID" AAA <- intersect(clindata$ID, immudata$ID) immudata <- dplyr::filter(immudata, ID %in% AAA) clindata <- dplyr::filter(clindata, ID %in% AAA) rt <- inner_join(clindata, immudata, by = "ID") rt$OS <- rt$OS / 365 rt$OS.Event <- as.numeric(rt$OS.Event) # Survival Analysis setwd(paste0("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced/", gene, "/5 Immune Infiltration Analysis/6 Immunotherapy")) test <- rt test <- na.omit(test) res.cut <- surv_cutpoint(test, time = "OS", event = "OS.Event", variables = gene) summary(res.cut) # Categorize variables res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize fit <- survfit(Surv(OS, OS.Event) ~ get(gene), data = res.cat) pdf(file = "ICB.Riaz2017_Nivolumab_Melanoma_Prog_OS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) dev.off() # Percentage Bar Plot res.cat$ID <- test$ID res.cat$Response <- test$Response res.cat$Response[res.cat$Response == 1] <- "Yes" res.cat$Response[res.cat$Response == 0] <- "No" res.cat$Group <- res.cat[, gene] library(dplyr) library(plyr) library(ggplot2) library(ggpubr) dat <- dplyr::count(res.cat, Group, Response) dat <- dat %>% group_by(Group) %>% dplyr::summarise(Response = Response, n = n/sum(n)) dat$Response <- factor(dat$Response, levels = c("No", "Yes")) # Calculate percentages dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100) # Calculate position for labels dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n)) dat$label <- paste0(sprintf("%.0f", dat$percent), "%") bioCol <- c("#f87669", "#2fa1dd") p <- ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) + geom_bar(position = position_stack(), stat = "identity", width = 0.7) + scale_fill_manual(values = bioCol) + xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) + geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) + theme_bw() pdf(file = "ICB.Riaz2017_Nivolumab_Melanoma_Prog_Barplot.pdf", width = 4, height = 5) print(p) dev.off() test <- rt test <- na.omit(test) test$Response[test$Response == 1] <- "Yes" test$Response[test$Response == 0] <- "No" test$Response <- factor(test$Response, levels = c('No', 'Yes')) my_comparisons <- list(c("No", "Yes")) # Differential Expression Boxplot p <- ggboxplot(test, x = "Response", y = gene, fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) + theme(legend.position = 'none') + ylab(label = gene) + stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..)) ggsave(p, filename = paste("ICB.Riaz2017_Nivolumab_Melanoma_Prog", gene, '_Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22) # ROC Analysis to Predict Responsiveness to Immunotherapy library("pROC") df <- dplyr::select(rt, Response, gene, PDCD1, CTLA4, CD274) # Define a set of colors for ROC curves mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2") if (TRUE) { pdf("ICB.Riaz2017_Nivolumab_Melanoma_Prog_ROC.pdf", height = 6, width = 6) auc.out <- c() # Plot the first ROC curve (Response vs. PDCD1) x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0), smooth = TRUE, ci = TRUE, main = "", col = mycol[2], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) # Plot additional ROC curves for (i in 3:ncol(df)) { x <- plot.roc(df[, 1], df[, i], add = TRUE, smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Generate a legend for the ROC curves legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() } # Compare multiple curves # There are three methods available for calculating p-values in the "method=" parameter: "delong", "bootstrap", or "venkatraman" p.out <- c() for (i in 2:(ncol(df) - 1)) { for (j in (i + 1):ncol(df)) { p <- roc.test(df[, 1], df[, i], df[, j], method = "bootstrap") p.tmp <- c(colnames(df)[i], colnames(df)[j], p$p.value) p.out <- rbind(p.out, p.tmp) } } p.out <- as.data.frame(p.out) colnames(p.out) <- c("ROC1", "ROC2", "p.value") auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Output p-values to a file # Generate legend legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend( "bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n" ) dev.off() } # Survival analysis setwd(paste0("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced/", gene, "/5 Immune Infiltration Analysis/6 Immunotherapy")) rt$PFS <- rt$PFS / 365 rt$PFS.Event <- as.numeric(rt$PFS.Event) test <- rt test <- na.omit(test) res.cut <- surv_cutpoint(test, time = "PFS", event = "PFS.Event", variables = gene) summary(res.cut) # Categorize variables res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize fit <- survfit(Surv(PFS, PFS.Event) ~ get(gene), data = res.cat) pdf( file = "ICB.Riaz2017_Nivolumab_Melanoma_Prog_PFS Survival Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE ) ggsurvplot( fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ') ) dev.off() ################################################################################### rm(list = ls()) gene = "Score" setwd("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced\\Raw Data\\Immunotherapy Data") genelist_input <- fread(file = "GSE135222.csv", header = TRUE, sep = ',', data.table = FALSE) genelist_input <- separate(genelist_input, gene_id, c("ID", NA), sep = "\\.") genename <- as.character(genelist_input[, 1]) # Extract the first column of gene names # x - Genomic annotation R package # keys - List of genes to be converted # keytype - Gene name type # columns - Desired gene name type data to return, such as returning NCBI gene ID using ENTREZID gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "ENSEMBL", columns = c("SYMBOL")) colnames(gene_map)[1] <- "ID" colnames(genelist_input)[1] <- "ID" genelist_input$ID <- as.character(genelist_input$ID) immudata <- inner_join(gene_map, genelist_input, by = "ID") immudata <- immudata[, -1] immudata <- na.omit(immudata) # Build a boolean vector, index index <- duplicated(immudata$SYMBOL) index # Filter data immudata <- immudata[!index, ] # Select non-duplicated data immudata <- na.omit(immudata) rownames(immudata) <- immudata$SYMBOL immudata <- immudata[, -1] immudata <- as.matrix(immudata) immudata <- as.data.frame(t(immudata)) CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t") CCC <- CCC$id CCC <- intersect(colnames(immudata), CCC) AAA <- immudata[, CCC] # PCA analysis pca <- prcomp(AAA, scale = TRUE) value <- predict(pca) score <- value[, 1] - value[, 2] score <- as.data.frame(score) score$ID <- rownames(score) immudata$ID <- row.names(immudata) immudata <- inner_join(immudata, score, by = "ID") colnames(immudata)[which(colnames(immudata) == "score")] = gene immudata <- immudata %>% dplyr::select(ID, gene, everything()) clindata <- read.csv("GSE135222_clin.csv", header = TRUE, sep = ",") colnames(clindata)[1] <- "ID" AAA <- intersect(clindata$ID, immudata$ID) immudata <- dplyr::filter(immudata, ID %in% AAA) clindata <- dplyr::filter(clindata, ID %in% AAA) rt <- inner_join(clindata, immudata, by = "ID") rt$PFS <- rt$PFS / 365 rt$PFS.Event <- as.numeric(rt$PFS.Event) # Survival Analysis setwd(paste0("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced/", gene, "/5 Immune Infiltration Analysis/6 Immunotherapy")) test <- rt test <- na.omit(test) res.cut <- surv_cutpoint(test, time = "PFS", event = "PFS.Event", variables = gene) summary(res.cut) # Categorize variables res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize fit <- survfit(Surv(PFS, PFS.Event) ~ get(gene), data = res.cat) pdf( file = "GSE135222_PFS Survival Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE ) ggsurvplot( fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ') ) dev.off() # Percentage Plot res.cat$ID <- test$ID res.cat$Response <- test$Response res.cat$Group <- res.cat[, gene] res.cat <- na.omit(res.cat) library(dplyr) library(plyr) library(ggplot2) library(ggpubr) dat <- dplyr::count(res.cat, Group, Response) dat <- dat %>% group_by(Group) %>% dplyr::summarise(Response = Response, n = n/sum(n)) dat$Response = factor(dat$Response, levels = c("CR/PR", "PD/SD")) # Calculate percentages dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100) # Percentage position dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n)) dat$label = paste0(sprintf("%.0f", dat$percent), "%") bioCol = c("#f87669", "#2fa1dd") p = ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) + geom_bar(position = position_stack(), stat = "identity", width = 0.7) + scale_fill_manual(values = bioCol) + xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) + geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) + #coord_flip()+ theme_bw() pdf(file = "GSE176307_barplot.pdf", width = 4, height = 5) print(p) dev.off() test <- dplyr::select(rt, ID, Response, gene) test <- na.omit(test) test$Response <- factor(test$Response, levels = c('CR/PR', 'PD/SD')) my_comparisons <- list(c("CR/PR", "PD/SD")) # Differential Expression Analysis p <- ggboxplot(test, x = "Response", y = gene, fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) + theme(legend.position = 'none') + ylab(label = gene) + stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..)) ggsave(p, filename = paste("GSE176307", gene, 'Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22) test <- dplyr::select(rt, PFS, Progression, gene) res.cut <- surv_cutpoint(test, time = "PFS", event = "Progression", variables = gene) summary(res.cut) # Categorize Variables res.cat <- surv_categorize(res.cut) head(res.cat) # Fit Survival Curves and Visualize fit <- survfit(Surv(PFS, Progression) ~ get(gene), data = res.cat) pdf(file = "GSE176307_PFS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival Analysis', sep = ' ')) dev.off() # Predict Responsiveness ROC Curve library("pROC") df <- dplyr::select(rt, Response, gene, PDCD1, CTLA4, CD274) df <- na.omit(df) df$Response[df$Response == "PD/SD"] <- 1 df$Response[df$Response == "CR/PR"] <- 0 # Define a sufficient number of colors for later use when drawing lines mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2") if (TRUE) { pdf("GSE176307_ROC.pdf", height = 6, width = 6) auc.out <- c() # Start by drawing the first curve, in this case, it's miRNA1 x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0), smooth = TRUE, # Draw a smooth curve ci = TRUE, main = "", # print.thres = "best", # Display the threshold on the graph where sensitivity + specificity is maximized col = mycol[2], # Line color lwd = 2, # Line thickness legacy.axes = TRUE) # Use axes style commonly seen in papers, with the x-axis as "1-specificity" from 0 to 1 ci.lower <- round(as.numeric(x$ci[1]), 3) # Confidence interval lower limit ci.upper <- round(as.numeric(x$ci[3]), 3) # Confidence interval upper limit auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) # Next, use a loop to draw the second and subsequent curves for (i in 3:ncol(df)) { x <- plot.roc(df[, 1], df[, i], add = TRUE, # Add to the previous plot smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } # Comparing multiple ROC curves # There are three methods available for calculating p-values after `method=`, namely "delong", "bootstrap", or "venkatraman" # Initialize an empty data frame for storing p-values p.out <- data.frame() # Loop through pairs of columns in the data frame df for (i in 2:(ncol(df) - 1)) { for (j in (i + 1):ncol(df)) { # Calculate p-value using the roc.test function with method="bootstrap" p <- roc.test(df[, 1], df[, i], df[, j], method = "bootstrap") # Create a temporary data frame containing ROC1, ROC2, and p-value p.tmp <- c(colnames(df)[i], colnames(df)[j], p$p.value) # Append the temporary data frame to p.out p.out <- rbind(p.out, p.tmp) } } # Rename the columns in p.out colnames(p.out) <- c("ROC1", "ROC2", "p.value") # Convert auc.out to a data frame if it's not already auc.out <- as.data.frame(auc.out) # Rename the columns in auc.out colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Output p-values to a file # (Note: The code for writing p-values to a file is missing here; you can add it as needed.) # Plotting the legend legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep=" ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() } # Cutting survival data res.cut <- surv_cutpoint(test2, time = "PFS.time", event = "PFS", variables = gene) summary(res.cut) # Categorizing variables res.cat <- surv_categorize(res.cut) head(res.cat) # Fitting survival curves and visualization fit <- survfit(Surv(PFS.time, PFS) ~ get(gene), data = res.cat) # Saving survival analysis plot to a PDF file pdf(file = "NCT02684006_Sunitinib_PFS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) dev.off() # Creating a percentage plot res.cat$ID <- test2$ID res.cat$Progression <- test2$PFS res.cat$Group <- res.cat[, gene] res.cat <- na.omit(res.cat) res.cat$Progression[res.cat$PFS == 1] <- "Yes" res.cat$Progression[res.cat$PFS == 0] <- "No" library(dplyr) library(plyr) library(ggplot2) library(ggpubr) dat <- dplyr::count(res.cat, Group, Progression) dat <- dat %>% group_by(Group) %>% dplyr::summarise(Progression = Progression, n = n/sum(n)) dat$Progression <- factor(dat$Progression, levels = c("Yes", "No")) # Calculating percentages dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100) # Percentage positions dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n)) dat$label <- paste0(sprintf("%.0f", dat$percent), "%") bioCol <- c("#f87669", "#2fa1dd") p <- ggplot(dat, aes(x = factor(Group), y = percent, fill = Progression)) + geom_bar(position = position_stack(), stat = "identity", width = 0.7) + scale_fill_manual(values = bioCol) + xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Progression")) + geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) + theme_bw() # Saving the percentage plot to a PDF file pdf(file = "NCT02684006_Sunitinib_Percentage_Plot.pdf", width = 4, height = 5) print(p) dev.off() # Preparing data for boxplot DAT <- dplyr::select(test2, ID, PFS, gene) DAT <- na.omit(DAT) colnames(DAT)[2] <- "Progression" DAT$Progression[DAT$Progression == 1] <- "Yes" DAT$Progression[DAT$Progression == 0] <- "No" DAT$Progression <- factor(DAT$Progression, levels = c('Yes', 'No')) my_comparisons <- list(c("Yes", "No")) # Creating a boxplot for differential expression p <- ggboxplot(DAT, x = "Progression", y = gene, fill = "Progression", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) + theme(legend.position = 'none') + ylab(label = gene) + stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..)) # Saving the boxplot to a PDF file ggsave(p, filename = paste("NCT02684006_Sunitinib", gene, 'Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22) # Predicting responsiveness ROC curves library("pROC") df <- rt %>% dplyr::filter(Treatment == "Sunitinib") %>% dplyr::select(ID, PFS, gene, CTLA4, CD274) df <- na.omit(df) colnames(df)[2] <- "Progression" df <- df[, -1] # Defining a sufficient number of colors for later use in line plotting mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2") if (TRUE) { pdf("NCT02684006_Sunitinib_ROC.pdf", height = 6, width = 6) auc.out <- c() # Plotting the first line (miRNA1) x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0), smooth = TRUE, ci = TRUE, main = "", col = mycol[2], lwd = 2, legacy.axes = TRUE) # Using typical paper-style plotting with "1-specificity" on the x-axis ci.lower <- round(as.numeric(x$ci[1]), 3) # Confidence interval lower limit ci.upper <- round(as.numeric(x$ci[3]), 3) # Confidence interval upper limit auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) # Plotting additional ROC curves in a loop for (i in 3:ncol(df)) { x <- plot.roc(df[, 1], df[, i], add = TRUE, smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } } # Comparing multiple ROC curves # You can choose from three methods in the 'method' parameter: "delong," "bootstrap," or "venkatraman" to calculate p-values # p.out <- c() # for (i in 2:(ncol(df)-1)) { # for (j in (i+1):ncol(df)) { # p <- roc.test(df[,1], df[,i], df[,j], method = "bootstrap") # p.tmp <- c(colnames(df)[i], colnames(df)[j], p$p.value) # p.out <- rbind(p.out, p.tmp) # } # } # p.out <- as.data.frame(p.out) # colnames(p.out) <- c("ROC1", "ROC2", "p.value") auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Output p-values to a file # Plotting the legend legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() ################################################################################################### ################################################################################### rm(list = ls()) gene = "Score" setwd("Y:\\lcd_code\\TCGA_Any\\TCGA_Advanced\\Advanced\\Raw_Data\\Immune_Therapy_Data") genelist_input <- fread(file = "PMID32472114.csv", header = TRUE, sep = ',', data.table = FALSE) rownames(genelist_input) <- genelist_input$gene_name genelist_input <- genelist_input[, -1] immudata <- na.omit(genelist_input) range(immudata) # immudata <- log2(immudata + 1) immudata <- as.matrix(immudata) immudata <- as.data.frame(t(immudata)) CCC = read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t") CCC = CCC$id CCC = intersect(colnames(immudata), CCC) AAA = immudata[, CCC] # PCA analysis pca = prcomp(AAA, scale = TRUE) value = predict(pca) score = value[, 1] - value[, 2] score = as.data.frame(score) score$ID = rownames(score) immudata$ID = row.names(immudata) immudata = inner_join(immudata, score, by = "ID") colnames(immudata)[which(colnames(immudata) == "score")] = gene immudata <- immudata %>% dplyr::select(ID, gene, everything()) clindata <- read.csv("PMID32472114_clin.csv", header = TRUE, sep = ",") colnames(clindata)[1] <- "ID" AAA <- intersect(clindata$ID, immudata$ID) immudata <- dplyr::filter(immudata, ID %in% AAA) clindata <- dplyr::filter(clindata, ID %in% AAA) rt <- inner_join(clindata, immudata, by = "ID") rt$PFS.time <- rt$PFS.time / 12 rt$PFS <- as.numeric(rt$PFS) rt$OS.time <- rt$OS.time / 12 rt$OS <- as.numeric(rt$OS) # Survival analysis setwd(paste0("Y:\\lcd_code\\TCGA_Any\\TCGA_Advanced\\Advanced/", gene, "/5_Immune_Infiltration_Analysis/6_Immune_Therapy")) test <- dplyr::select(rt, ID, OS.time, OS, gene, Treatment, ORR) test1 <- dplyr::filter(test, Treatment == "Nivolumab") test2 <- dplyr::filter(test, Treatment == "Everolimus") res.cut <- surv_cutpoint(test1, time = "OS.time", event = "OS", variables = gene) summary(res.cut) # Categorize variables res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize fit <- survfit(Surv(OS.time, OS) ~ get(gene), data = res.cat) pdf(file = "PMID32472114_Immune_Therapy_OS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, #conf.int = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) dev.off() # Percentage plot res.cat$ID <- test1$ID res.cat$Response <- test1$ORR res.cat$Group <- res.cat[, gene] res.cat <- na.omit(res.cat) library(dplyr) library(plyr) library(ggplot2) library(ggpubr) dat <- dplyr::count(res.cat, Group, Response) dat <- dat %>% group_by(Group) %>% dplyr::summarise(Response = Response, n = n / sum(n)) dat$Response = factor(dat$Response, levels = c("CR/PR", "PD/SD")) # Calculate percentages dat = ddply(dat, .(Group), transform, percent = n / sum(n) * 100) # Percentage positions dat = ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n)) dat$label = paste0(sprintf("%.0f", dat$percent), "%") bioCol = c("#f87669", "#2fa1dd") p = ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) + geom_bar(position = position_stack(), stat = "identity", width = 0.7) + scale_fill_manual(values = bioCol) + xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) + geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) + #coord_flip()+ theme_bw() pdf(file = "PMID32472114_Immune_Therapy_barplot.pdf", width = 4, height = 5) print(p) dev.off() DAT <- dplyr::select(test1, ID, ORR, gene) DAT <- na.omit(DAT) colnames(DAT)[2] <- "Response" DAT$Response <- factor(DAT$Response, levels = c('CR/PR', 'PD/SD')) my_comparisons <- list(c("CR/PR", "PD/SD")) # Differential expression for responders and non-responders p <- ggboxplot(DAT, x = "Response", y = gene, fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) + theme(legend.position = 'none') + ylab(label = gene) + stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..)) ggsave(p, filename = paste("PMID32472114_Immune_Therapy", gene, 'Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22) # Predictive ROC curves library("pROC") df <- rt %>% dplyr::filter(Treatment == "Nivolumab") %>% dplyr::select(ID, ORR, gene, PDCD1, CTLA4, CD274) %>% na.omit() df$ORR[df$ORR == "PD/SD"] = 1 df$ORR[df$ORR == "CR/PR"] = 0 df <- df[, -1] # Define enough colors for selecting colors when drawing lines later mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2") if (TRUE) { pdf("PMID32472114_Immune_Therapy_ROC.pdf", height = 6, width = 6) auc.out <- c() # Start by drawing the first line, which is miRNA1 in this case x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0), smooth = TRUE, # Draw a smooth curve ci = TRUE, main = "", #print.thres = "best", # Put the threshold on the graph where sensitivity + specificity is maximum col = mycol[2], # Line color lwd = 2, # Line thickness legacy.axes = TRUE) # Use the common style for axes where x-axis is "1-specificity" ranging from 0 to 1 ci.lower <- round(as.numeric(x$ci[1]), 3) # Confidence interval lower bound ci.upper <- round(as.numeric(x$ci[3]), 3) # Confidence interval upper bound auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) # Next, use a loop to draw the second and subsequent curves for (i in 3:ncol(df)) { x <- plot.roc(df[, 1], df[, i], add = TRUE, # Add to the previously drawn graph smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } # Compare multiple curves # There are three methods available for calculating p-values after "method=": "delong", "bootstrap", or "venkatraman" p.out <- c() for (i in 2:(ncol(df) - 1)) { for (j in (i + 1):ncol(df)) { p <- roc.test(df[, 1], df[, i], df[, j], method = "bootstrap") p.tmp <- c(colnames(df)[i], colnames(df)[j], p$p.value) p.out <- rbind(p.out, p.tmp) } } p.out <- as.data.frame(p.out) colnames(p.out) <- c("ROC1", "ROC2", "p.value") auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Output p-values to a file # Draw legend legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() } res.cut <- surv_cutpoint(test2, time = "OS.time", event = "OS", variables = gene) summary(res.cut) # Categorize variables res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize fit <- survfit(Surv(OS.time, OS) ~ get(gene), data = res.cat) pdf(file = "PMID32472114_Everolimus_OS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, #conf.int = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) dev.off() # Percentage plot res.cat$ID <- test2$ID res.cat$Progression <- test2$ORR res.cat$Group <- res.cat[, gene] res.cat <- na.omit(res.cat) library(dplyr) library(plyr) library(ggplot2) library(ggpubr) dat <- dplyr::count(res.cat, Group, Progression) dat <- dat %>% group_by(Group) %>% dplyr::summarise(Progression = Progression, n = n / sum(n)) dat$Progression = factor(dat$Progression, levels = c("CR/PR", "PD/SD")) # Calculate percentages dat = ddply(dat, .(Group), transform, percent = n / sum(n) * 100) # Percentage positions dat = ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n)) dat$label = paste0(sprintf("%.0f", dat$percent), "%") bioCol = c("#f87669", "#2fa1dd") p = ggplot(dat, aes(x = factor(Group), y = percent, fill = Progression)) + geom_bar(position = position_stack(), stat = "identity", width = 0.7) + scale_fill_manual(values = bioCol) + xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Progression")) + geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) + #coord_flip()+ theme_bw() pdf(file = "PMID32472114_Everolimus_barplot.pdf", width = 4, height = 5) print(p) dev.off() DAT <- dplyr::select(test2, ID, ORR, gene) DAT <- na.omit(DAT) colnames(DAT)[2] <- "Progression" DAT$Progression <- factor(DAT$Progression, levels = c('CR/PR', 'PD/SD')) my_comparisons <- list(c("CR/PR", "PD/SD")) # Differential expression for responders and non-responders p <- ggboxplot(DAT, x = "Progression", y = gene, fill = "Progression", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) + theme(legend.position = 'none') + ylab(label = gene) + stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..)) ggsave(p, filename = paste("PMID32472114_Everolimus", gene, 'Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22) # Predictive ROC curves library("pROC") df <- rt %>% dplyr::filter(Treatment == "Everolimus") %>% dplyr::select(ID, ORR, gene, CTLA4, CD274) df <- na.omit(df) colnames(df)[2] <- "Progression" df$Progression[df$Progression == "PD/SD"] = 1 df$Progression[df$Progression == "CR/PR"] = 0 df <- df[, -1] # Define enough colors for selecting colors when drawing lines later mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2") if (TRUE) { pdf("PMID32472114_Everolimus_ROC.pdf", height = 6, width = 6) auc.out <- c() # Start by drawing the first line, which is miRNA1 in this case x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0), smooth = TRUE, # Draw a smooth curve ci = TRUE, main = "", #print.thres = "best", # Put the threshold on the graph where sensitivity + specificity is maximum col = mycol[2], # Line color lwd = 2, # Line thickness legacy.axes = TRUE) # Use the common style for axes where x-axis is "1-specificity" ranging from 0 to 1 ci.lower <- round(as.numeric(x$ci[1]), 3) # Confidence interval lower bound ci.upper <- round(as.numeric(x$ci[3]), 3) # Confidence interval upper bound auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) # Next, use a loop to draw the second and subsequent curves for (i in 3:ncol(df)) { x <- plot.roc(df[, 1], df[, i], add = TRUE, # Add to the previously drawn graph smooth = TRUE, ci = TRUE, col = mycol[i], lwd = 2, legacy.axes = TRUE) ci.lower <- round(as.numeric(x$ci[1]), 3) ci.upper <- round(as.numeric(x$ci[3]), 3) auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-")) auc.out <- rbind(auc.out, auc.ci) } } # # Compare multiple curves # # After the "method=" parameter, there are three methods available: "delong," "bootstrap," or "venkatraman" to calculate p-values # p.out <- c() # for (i in 2:(ncol(df) - 1)) { # for (j in (i + 1):ncol(df)) { # p <- roc.test(df[, 1], df[, i], df[, j], method = "bootstrap") # p.tmp <- c(colnames(df)[i], colnames(df)[j], p$p.value) # p.out <- rbind(p.out, p.tmp) # } # } # p.out <- as.data.frame(p.out) # colnames(p.out) <- c("ROC1", "ROC2", "p.value") auc.out <- as.data.frame(auc.out) colnames(auc.out) <- c("Name", "AUC", "AUC CI") # Output p-values to a file # Draw a legend legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ") legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n") dev.off() } # Select relevant data columns test <- dplyr::select(rt, ID, PFS.time, PFS, gene, Treatment, ORR) # Filter data for 'Nivolumab' treatment test1 <- dplyr::filter(test, Treatment == "Nivolumab") # Filter data for 'Everolimus' treatment test2 <- dplyr::filter(test, Treatment == "Everolimus") # Perform survival analysis for 'Nivolumab' treatment res.cut <- surv_cutpoint(test1, time = "PFS.time", event = "PFS", variables = gene) summary(res.cut) # Categorize variables for 'Nivolumab' treatment res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize for 'Nivolumab' treatment fit <- survfit(Surv(PFS.time, PFS) ~ get(gene), data = res.cat) # Create a PDF file for 'Nivolumab' PFS survival analysis pdf(file = "PMID32472114_Immunotherapy_Nivolumab_PFS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) # Generate survival plot for 'Nivolumab' treatment ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, #conf.int = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) # Close the PDF device dev.off() # Perform survival analysis for 'Everolimus' treatment res.cut <- surv_cutpoint(test2, time = "PFS.time", event = "PFS", variables = gene) summary(res.cut) # Categorize variables for 'Everolimus' treatment res.cat <- surv_categorize(res.cut) head(res.cat) # Fit survival curves and visualize for 'Everolimus' treatment fit <- survfit(Surv(PFS.time, PFS) ~ get(gene), data = res.cat) # Create a PDF file for 'Everolimus' PFS survival analysis pdf(file = "PMID32472114_Everolimus_PFS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE) # Generate survival plot for 'Everolimus' treatment ggsurvplot(fit, data = res.cat, risk.table = TRUE, risk.table.col = 'strata', risk.table.y.text = FALSE, risk.table.title = 'Number at risk', pval = TRUE, #conf.int = TRUE, xlab = 'Time (years)', ggtheme = theme_light(), surv.median.line = 'hv', title = paste(gene, 'Survival', sep = ' ')) # Close the PDF device dev.off()