# Comment: Set options and libraries
rm(list = ls())
library(GSEABase)
library(GSVAdata)
library(Biobase)
library(genefilter)
library(limma)
library(RColorBrewer)
library(GSVA)
library(clusterProfiler)
library(pheatmap)
library(tidyverse)
library(survminer)
library(survival)
library(limma)
library(ggpubr)
library(stringr)
library(RColorBrewer)
library(export)
library(survivalROC)
library(pheatmap)
library(data.table)
library(org.Hs.eg.db)
library(clusterProfiler)
library(biomaRt)
library(enrichplot)
library(ReactomePA)
library(igraph)
library(ggraph)
library(forestplot)
library(ggradar)
library(fmsb)
library(circlize)
library(ggsci)
library(parallel)
library(maftools)
library(patchwork)
library(ComplexHeatmap)

# Comment: Disable converting strings to factors
options(stringsAsFactors = FALSE)
Sys.setenv(LANGUAGE = "en")

# Comment: Set gene name
gene = "Score"

# Comment: Create directories
dir.create(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene))
dir.create(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene, "/5_Immune_Infiltration_Analysis"))
dir.create(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene, "/5_Immune_Infiltration_Analysis/6_Immunotherapy"))

# Comment: Set working directory
setwd("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced\\raw_data\\immunotherapy_data")


# Comment: Read gene list input
genelist_input <- fread(file = "GSE32894.csv", header = TRUE, sep = ',', data.table = FALSE)
genename <- as.character(genelist_input[, 1])

# Comment: Map gene names to ENTREZID
gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "ENTREZID", columns = c("SYMBOL"))
colnames(gene_map)[1] <- "ID"
colnames(genelist_input)[1] <- "ID"
genelist_input$ID <- as.character(genelist_input$ID)
immudata <- inner_join(gene_map, genelist_input, by = "ID")
immudata <- immudata[, -1]
immudata <- na.omit(immudata)
rownames(immudata) <- immudata$SYMBOL
immudata <- immudata[, -1]
immudata <- as.data.frame(t(immudata))

# Comment: Read additional data (CCC)
CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t")
CCC = CCC$id
CCC = intersect(colnames(immudata), CCC)
AAA = immudata[, CCC]

# Comment: Perform PCA analysis
pca = prcomp(AAA, scale = TRUE)
value = predict(pca)
score = value[, 1] - value[, 2]
score = as.data.frame(score)
score$ID = rownames(score)
immudata$ID = row.names(immudata)
immudata = inner_join(immudata, score, by = "ID")
colnames(immudata)[which(colnames(immudata) == "score")] = gene
immudata <- immudata %>%
  dplyr::select(ID, gene, everything())

# Comment: Read clinical data
clindata <- read.csv("GSE32894_clin.csv", header = TRUE, sep = ",")
colnames(clindata)[1] <- "ID"
AAA <- intersect(clindata$ID, immudata$ID)
immudata <- dplyr::filter(immudata, ID %in% AAA)
clindata <- dplyr::filter(clindata, ID %in% AAA)
rt <- inner_join(clindata, immudata, by = "ID")

# Comment: Prepare data for survival analysis
rt$time <- rt$time/12
rt$status <- as.numeric(rt$status)

# Comment: Perform survival analysis
setwd(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene, "/5_Immune_Infiltration_Analysis/6_Immunotherapy"))
test <- rt
test <- na.omit(test)
res.cut <- surv_cutpoint(test, time = "time", event = "status", variables = gene)
summary(res.cut)


# Comment: Categorize variables
res.cat <- surv_categorize(res.cut)
head(res.cat)

# Comment: Fit survival curves and visualize
fit <- survfit(Surv(time, status) ~ get(gene), data = res.cat)

# Comment: Create survival plot
pdf(file = "GSE32894_best-cutoff-DFS-survival-analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

dev.off()

# Comment: Create a barplot
res.cat$ID <- test$ID
res.cat$Progression <- test$Progression
res.cat$Group <- res.cat[, gene]

# Comment: Prepare data for the percentage barplot
dat = dplyr::count(res.cat, Group, Progression)
dat = dat %>% group_by(Group) %>%
  dplyr::summarise(Progression = Progression, n = n/sum(n))
dat$Progression = factor(dat$Progression, levels = c("no", "yes"))

# Comment: Calculate percentages
dat = ddply(dat, .(Group), transform, percent = n/sum(n) * 100)

# Comment: Calculate position for percentage labels
dat = ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n))
dat$label = paste0(sprintf("%.0f", dat$percent), "%")

# Comment: Create the percentage barplot
bioCol = c("#f87669", "#2fa1dd")
p = ggplot(dat, aes(x = factor(Group), y = percent, fill = Progression
                    # Comment: Loop to plot additional ROC curves
                    for (i in 3:ncol(df)){
                      x <- plot.roc(df[, 1], df[, i],
                                    add = TRUE, # Add to the previous plot
                                    smooth = TRUE,
                                    ci = TRUE,
                                    col = mycol[i],
                                    lwd = 2,
                                    legacy.axes = TRUE)
                      
                      ci.lower <- round(as.numeric(x$ci[1]), 3)
                      ci.upper <- round(as.numeric(x$ci[3]), 3)
                      
                      auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
                      auc.out <- rbind(auc.out, auc.ci)
                    }
                    
                    # Comment: Create a data frame with AUC values
                    auc.out <- as.data.frame(auc.out)
                    colnames(auc.out) <- c("Name", "AUC", "AUC CI")
                    
                    # Comment: Output p-values to a file
                    
                    # Comment: Create a legend
                    legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
                    legend("bottomright", 
                           legend = legend.name,
                           col = mycol[2:length(df)],
                           lwd = 2,
                           bty = "n")
                    dev.off()
                    }

###################################################################################
rm(list = ls())
gene = "Score"
setwd("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced\\raw_data\\immunotherapy_data")
genelist_input <- fread(file = "GSE35640.csv", header = TRUE, sep = ',', data.table = FALSE)
genename <- as.character(genelist_input[, 1])


# Comment: Map gene names to ENTREZID
gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "ENTREZID", columns = c("SYMBOL"))
colnames(gene_map)[1] <- "ID"
colnames(genelist_input)[1] <- "ID"
genelist_input$ID <- as.character(genelist_input$ID)
immudata <- inner_join(gene_map, genelist_input, by = "ID")
immudata <- immudata[, -1]
immudata <- na.omit(immudata)
rownames(immudata) <- immudata$SYMBOL
immudata <- immudata[, -1]
immudata <- as.data.frame(t(immudata))

# Comment: Read additional data (CCC)
CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t")
CCC = CCC$id
CCC = intersect(colnames(immudata), CCC)
AAA = immudata[, CCC]

# Comment: Perform PCA analysis
pca = prcomp(AAA, scale = TRUE)
value = predict(pca)
score = value[, 1] - value[, 2]
score = as.data.frame(score)
score$ID = rownames(score)
immudata$ID = row.names(immudata)
immudata = inner_join(immudata, score, by = "ID")
colnames(immudata)[which(colnames(immudata) == "score")] = gene
immudata <- immudata %>%
  dplyr::select(ID, gene, everything())

clindata <- read.csv("GSE35640_clin.csv", header = TRUE, sep = ",")
colnames(clindata)[1] <- "ID"
AAA <- intersect(clindata$ID, immudata$ID)
immudata <- dplyr::filter(immudata, ID %in% AAA)
clindata <- dplyr::filter(clindata, ID %in% AAA)
rt <- inner_join(clindata, immudata, by = "ID")

test <- rt
test$Response <- factor(test$Response, levels = c('non-responder', 'responder'))
my_comparisons <- list(c("non-responder", "responder"))

# Comment: Perform differential gene expression analysis
setwd(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene, "/5_Immune_Infiltration_Analysis/6_Immunotherapy"))

p <- ggboxplot(test, x = "Response", y = gene,
               fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) +
  theme(legend.position = 'none') +
  ylab(label = gene) +
  stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..))
ggsave(p, filename = paste("GSE35640", gene, 'DifferentialExpression.pdf', sep = ' '), width = 5.6, height = 4.22)
# Comment: Predictive ROC Curve
library("pROC")
df <- dplyr::select(rt, Response, gene, PDCD1, CTLA4, CD274)

# Comment: Define a sufficient number of colors for plotting
mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2")

if (TRUE) {
  pdf("GSE35640_ROC.pdf", height = 6, width = 6)
  auc.out <- c()
  
  # Comment: Plot the first ROC curve for miRNA1
  x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0),
                smooth = TRUE,  # Smooth ROC curve
                ci = TRUE, 
                main = "",
                col = mycol[2],  # Line color
                lwd = 2,  # Line width
                legacy.axes = TRUE)  # Use legacy axes
  
  ci.lower <- round(as.numeric(x$ci[1]), 3)  # Confidence interval lower limit
  ci.upper <- round(as.numeric(x$ci[3]), 3)  # Confidence interval upper limit
  
  auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
  auc.out <- rbind(auc.out, auc.ci)
  
  # Comment: Plot additional ROC curves in a loop
  for (i in 3:ncol(df)) {
    x <- plot.roc(df[, 1], df[, i],
                  add = TRUE,  # Add to the previous plot
                  smooth = TRUE,
                  ci = TRUE,
                  col = mycol[i],
                  lwd = 2,
                  legacy.axes = TRUE)
    
    ci.lower <- round(as.numeric(x$ci[1]), 3)
    ci.upper <- round(as.numeric(x$ci[3]), 3)
    
    auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
    auc.out <- rbind(auc.out, auc.ci)
  }
  
  # Comment: Compare multiple ROC curves
  # In the "method=" parameter, there are three methods to choose from: "delong," "bootstrap," or "venkatraman" to calculate p-values
  auc.out <- as.data.frame(auc.out)
  colnames(auc.out) <- c("Name", "AUC", "AUC CI")
  # Output p-values to a file
  
  # Comment: Create a legend
  legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
  legend("bottomright", 
         legend = legend.name,
         col = mycol[2:length(df)],
         lwd = 2,
         bty = "n")
  dev.off()
}

###################################################################################
rm(list = ls())
gene <- "Score"
setwd("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced\\raw_data\\immunotherapy_data")
genelist_input <- fread(file = "GSE61676.csv", header = TRUE, sep = ',', data.table = FALSE)
immudata <- genelist_input
rownames(immudata) <- immudata$ID
immudata <- immudata[, -1]
immudata <- as.matrix(immudata)
immudata <- as.data.frame(t(immudata))

# Comment: Read additional data (CCC)
CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t")
CCC = CCC$id
CCC = intersect(colnames(immudata), CCC)
AAA = immudata[, CCC]

# Comment: Perform PCA analysis
pca = prcomp(AAA, scale = TRUE)
value = predict(pca)
score = value[, 1] - value[, 2]
score = as.data.frame(score)
score$ID = rownames(score)
immudata$ID = row.names(immudata)
immudata = inner_join(immudata, score, by = "ID")
colnames(immudata)[which(colnames(immudata) == "score")] = gene
immudata <- immudata %>%
  dplyr::select(ID, gene, everything())

clindata <- read.csv("GSE61676_clin.csv", header = TRUE, sep = ",")
colnames(clindata)[1] <- "ID"
AAA <- intersect(clindata$ID, immudata$ID)
immudata <- dplyr::filter(immudata, ID %in% AAA)
clindata <- dplyr::filter(clindata, ID %in% AAA)
rt <- inner_join(clindata, immudata, by = "ID")

rt$time <- rt$time / 12
rt$status <- as.numeric(rt$status)

# Comment: Survival analysis
setwd(paste0("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced/", gene, "/5_Immune_Infiltration_Analysis/6_Immunotherapy"))
test <- rt
res.cut <- surv_cutpoint(test, time = "time", event = "status",
                         variables = gene)
summary(res.cut)
# Categorize variables
res.cat <- surv_categorize(res.cut)
head(res.cat)
# Fit survival curves and visualize
fit <- survfit(Surv(time, status) ~ get(gene), data = res.cat)

pdf(file = "GSE61676_best_cutoff_survival_analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

dev.off()
# Percentage Plot
res.cat$ID <- test$ID
res.cat$Response <- test$Response
res.cat$Group <- res.cat[, gene]
library(dplyr)
library(plyr)
library(ggplot2)
library(ggpubr)

# Count frequencies and calculate percentages
dat <- dplyr::count(res.cat, Group, Response)
dat <- dat %>% group_by(Group) %>% 
  dplyr::summarise(Response = Response, n = n/sum(n))
dat$Response <- factor(dat$Response, levels = c("Non-responder", "Responder"))

# Calculate percentages
dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100)

# Calculate position for labels
dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n))
dat$label <- paste0(sprintf("%.0f", dat$percent), "%")

bioCol <- c("#f87669", "#2fa1dd")
p <- ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) +
  geom_bar(position = position_stack(), stat = "identity", width = 0.7) +
  scale_fill_manual(values = bioCol) +
  xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) +
  theme_bw()

pdf(file = "GSE61676_barplot.pdf", width = 4, height = 5)
print(p)
dev.off()

# Update Response levels
test$Response <- factor(test$Response, levels = c('Non-responder', 'Responder'))
my_comparisons <- list(c("Non-responder", "Responder"))

# Differential expression analysis
p <- ggboxplot(test, x = "Response", y = gene,
               fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) +
  theme(legend.position = 'none') +
  ylab(label = gene) +
  stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..))
ggsave(p, filename = paste("GSE61676", gene, 'DifferentialExpression.pdf', sep = ' '), width = 5.6, height = 4.22)

# Predictive ROC Curve
library("pROC")
df <- dplyr::select(rt, Response, gene, CTLA4, CD274)
df$Response[df$Response == "Responder"] <- 1
df$Response[df$Response == "Non-responder"] <- 0

# Define a sufficient number of colors for plotting
mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2")

if (TRUE) {
  pdf("GSE61676_ROC.pdf", height = 6, width = 6)
  auc.out <- c()
  
  # Plot the first ROC curve for miRNA1
  x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0),
                smooth = TRUE,  # Smooth ROC curve
                ci = TRUE, 
                main = "",
                col = mycol[2],  # Line color
                lwd = 2,  # Line width
                legacy.axes = TRUE)  # Use legacy axes
  
  ci.lower <- round(as.numeric(x$ci[1]), 3)  # Confidence interval lower limit
  ci.upper <- round(as.numeric(x$ci[3]), 3)  # Confidence interval upper limit
  
  auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
  auc.out <- rbind(auc.out, auc.ci)
  
  # Plot additional ROC curves in a loop
  for (i in 3:ncol(df)) {
    x <- plot.roc(df[, 1], df[, i],
                  add = TRUE,  # Add to the previous plot
                  smooth = TRUE,
                  ci = TRUE,
                  col = mycol[i],
                  lwd = 2,
                  legacy.axes = TRUE)
    
    ci.lower <- round(as.numeric(x$ci[1]), 3)
    ci.upper <- round(as.numeric(x$ci[3]), 3)
    
    auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
    auc.out <- rbind(auc.out, auc.ci)
  }
  
  auc.out <- as.data.frame(auc.out)
  colnames(auc.out) <- c("Name", "AUC", "AUC CI")
  
  # Create a legend
  legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
  legend("bottomright", 
         legend = legend.name,
         col = mycol[2:length(df)],
         lwd = 2,
         bty = "n")
  dev.off()
}

###################################################################################
rm(list = ls())
gene <- "Score"
setwd("Y:\\lcd_code\\TCGA_random\\TCGA_any\\advanced_version\\advanced\\raw_data\\immunotherapy_data")
genelist_input <- fread(file = "IMvigor210CoreBiologies.csv", header = TRUE, sep = ',', data.table = FALSE)
immudata <- genelist_input

# Build a boolean vector for indexing
index <- duplicated(immudata$symbol)

# Filter data
immudata <- immudata[!index,]  # Select non-duplicated data
immudata <- na.omit(immudata)

rownames(immudata) <- immudata$symbol
immudata <- immudata[, -1]
immudata <- log2(immudata + 1)
immudata <- as.matrix(immudata)
immudata <- as.data.frame(t(immudata))

CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t")
CCC <- CCC$id
CCC <- intersect(colnames(immudata), CCC)
AAA <- immudata[, CCC]

# PCA analysis
pca <- prcomp(AAA, scale = TRUE)
value <- predict(pca)
score <- value[, 1] - value[, 2]
score <- as.data.frame(score)
score$ID <- rownames(score)
immudata$ID <- row.names(immudata)
immudata <- inner_join(immudata, score, by = "ID")
colnames(immudata)[which(colnames(immudata) == "score")] <- gene
immudata <- immudata %>%
  dplyr::select(ID, gene, everything())

clindata <- read.csv("IMvigor210CoreBiologies_clin.csv", header = TRUE, sep = ",")
colnames(clindata)[1] <- "ID"
AAA <- intersect(clindata$ID, immudata$ID)
immudata <- dplyr::filter(immudata, ID %in% AAA)
clindata <- dplyr::filter(clindata, ID %in% AAA)
rt <- inner_join(clindata, immudata, by = "ID")

rt$futime <- rt$futime / 12
rt$fustat <- as.numeric(rt$fustat)
# Survival Analysis
setwd(paste0("Y:\\lcd code\\TCGA advanced\\", gene, "\\5 Immune Infiltration Analysis\\6 Immunotherapy"))
test <- rt

res.cut <- surv_cutpoint(test, time = "futime", event = "fustat",
                         variables = gene)
summary(res.cut)
# Categorize variables
res.cat <- surv_categorize(res.cut)
head(res.cat)
# Fit survival curves and visualize
fit <- survfit(Surv(futime, fustat) ~ get(gene), data = res.cat)

pdf(file = "IMvigor210CoreBiologies Best Cutoff Survival Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

dev.off()

# Percentage Plot
res.cat$ID <- test$ID
res.cat$Response <- test$Response
res.cat$Group <- res.cat[, gene]
res.cat <- na.omit(res.cat)
library(dplyr)
library(plyr)
library(ggplot2)
library(ggpubr)
dat <- dplyr::count(res.cat, Group, Response)
dat <- dat %>% group_by(Group) %>% 
  dplyr::summarise(Response = Response, n = n/sum(n))
dat$Response = factor(dat$Response, levels = c("CR/PR", "SD/PD"))

# Calculate percentages
dat = ddply(dat, .(Group), transform, percent = n/sum(n) * 100)
# Calculate position for labels
dat = ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n))
dat$label = paste0(sprintf("%.0f", dat$percent), "%")

bioCol = c("#f87669", "#2fa1dd")
p = ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) +
  geom_bar(position = position_stack(), stat = "identity", width = .7) +
  scale_fill_manual(values = bioCol) +
  xlab("Group") + ylab("Percent weight") +  guides(fill = guide_legend(title = "Response")) +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) +
  theme_bw()
pdf(file = "IMvigor210CoreBiologies_barplot.pdf", width = 4, height = 5)
print(p)
dev.off()

test$Response <- factor(test$Response, levels = c('CR/PR', 'SD/PD'))
my_comparisons <- list(c("CR/PR", "SD/PD"))
# Differential expression analysis
test <- na.omit(test)
p <- ggboxplot(test, x = "Response", y = gene,
               fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) +
  theme(legend.position = 'none') +
  ylab(label = gene) +
  stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..))
ggsave(p, filename = paste("IMvigor210CoreBiologies", gene, 'DifferentialExpression.pdf', sep = ' '), width = 5.6, height = 4.22)

# Predictive ROC Curve
library("pROC")
df <- dplyr::select(rt, Response, gene, CTLA4, CD274, PDCD1)
df$Response[df$Response == "SD/PD"] = 1
df$Response[df$Response == "CR/PR"] = 0

# Define a sufficient number of colors for plotting
mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2")

if (TRUE) {
  pdf("IMvigor210CoreBiologies_ROC.pdf", height = 6, width = 6)
  auc.out <- c()
  
  # Plot the first ROC curve for miRNA1
  x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0),
                smooth = TRUE,  # Smooth ROC curve
                ci = TRUE, 
                main = "",
                col = mycol[2],  # Line color
                lwd = 2,  # Line width
                legacy.axes = TRUE)  # Use legacy axes
  
  ci.lower <- round(as.numeric(x$ci[1]), 3)  # Confidence interval lower limit
  ci.upper <- round(as.numeric(x$ci[3]), 3)  # Confidence interval upper limit
  
  auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
  auc.out <- rbind(auc.out, auc.ci)
  
  # Plot additional ROC curves in a loop
  for (i in 3:ncol(df)) {
    x <- plot.roc(df[, 1], df[, i],
                  add = TRUE,  # Add to the previous plot
                  smooth = TRUE,
                  ci = TRUE,
                  col = mycol[i],
                  lwd = 2,
                  legacy.axes = TRUE)
    
    ci.lower <- round(as.numeric(x$ci[1]), 3)
    ci.upper <- round(as.numeric(x$ci[3]), 3)
    
    auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
    auc.out <- rbind(auc.out, auc.ci)
  }
  
  auc.out <- as.data.frame(auc.out)
  colnames(auc.out) <- c("Name", "AUC", "AUC CI")
  
  # Create a legend
  legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
  legend("bottomright", 
         legend = legend.name,
         col = mycol[2:length(df)],
         lwd = 2,
         bty = "n")
  dev.off()
}
# Clear the workspace
rm(list = ls())
gene = "Score"

# Set the working directory to the immunotherapy data folder
setwd("Y:\\lcd code\\TCGA advanced\\Original Data\\Immunotherapy Data")

# Read gene expression data from a CSV file
genelist_input <- fread(file = "GSE78220.csv", header = TRUE, sep = ',', data.table = FALSE)
immudata <- genelist_input

# Create a boolean vector to index duplicated genes
index <- duplicated(immudata$Gene)

# Filter out duplicated data
immudata <- immudata[!index,]

# Remove rows with missing values
immudata <- na.omit(immudata)

# Set gene names as row names
rownames(immudata) <- immudata$Gene

# Remove the Gene column
immudata <- immudata[, -1]

# Transpose the data
immudata <- as.matrix(immudata)
immudata <- as.data.frame(t(immudata))

# Read a list of genes of interest
CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t")
CCC <- CCC$id

# Filter genes that are present in both datasets
CCC <- intersect(colnames(immudata), CCC)

# Select columns corresponding to the genes of interest
AAA <- immudata[, CCC]

# Perform Principal Component Analysis (PCA)
pca <- prcomp(AAA, scale = TRUE)
value <- predict(pca)
score <- value[, 1] - value[, 2]
score <- as.data.frame(score)

# Assign the ID column
score$ID <- rownames(score)

# Update the column name to 'gene'
colnames(score)[which(colnames(score) == "score")] <- gene

# Select relevant columns
immudata <- immudata %>%
  dplyr::select(ID, gene, everything())

# Read clinical data
clindata <- read.csv("GSE78220_clin.csv", header = TRUE, sep = ",")

# Rename the first column to 'ID'
colnames(clindata)[1] <- "ID"

# Find common IDs between clinical and gene expression data
AAA <- intersect(clindata$ID, immudata$ID)

# Filter data based on common IDs
immudata <- dplyr::filter(immudata, ID %in% AAA)
clindata <- dplyr::filter(clindata, ID %in% AAA)

# Prepare data for survival analysis
rt <- inner_join(clindata, immudata, by = "ID")

# Convert OS values to years and Status to numeric
rt$OS <- rt$OS/365
rt$Status <- as.numeric(rt$Status)

# Set the working directory for survival analysis
setwd(paste0("Y:\\lcd code\\TCGA advanced\\", gene, "\\5 Immune Infiltration Analysis\\6 Immunotherapy"))
test <- rt

# Remove rows with missing values
test <- na.omit(test)

# Determine optimal cutpoints for survival analysis
res.cut <- surv_cutpoint(test, time = "OS", event = "Status", variables = gene)
summary(res.cut)

# Categorize variables based on the optimal cutpoints
res.cat <- surv_categorize(res.cut)
head(res.cat)

# Fit survival curves and create visualizations
fit <- survfit(Surv(OS, Status) ~ get(gene), data = res.cat)

# Generate a PDF file for survival analysis results
pdf(file = "GSE78220 Best Cutoff Survival Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

# Create a survival plot
ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

# Close the PDF device
dev.off()

# Percentage Plot
res.cat$ID <- test$ID
res.cat$Response <- test$Response
res.cat$Group <- res.cat[, gene]

# Load necessary libraries
library(dplyr)
library(plyr)
library(ggplot2)
library(ggpubr)

# Count frequencies of responses and calculate percentages
dat <- dplyr::count(res.cat, Group, Response)
dat <- dat %>% group_by(Group) %>% 
  dplyr::summarise(Response = Response, n = n/sum(n))
dat$Response = factor(dat$Response, levels = c("PD/SD", "PR/CR"))

# Calculate percentages
dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100)

# Calculate positions for labels
dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n))
dat$label <- paste0(sprintf("%.0f", dat$percent), "%")

# Define color palette
bioCol <- c("#f87669", "#2fa1dd")

# Create a percentage plot
p <- ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) +
  geom_bar(position = position_stack(), stat = "identity", width = 0.7) +
  scale_fill_manual(values = bioCol) +
  xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) +
  theme_bw()

# Generate a PDF file for the percentage plot
pdf(file = "GSE78220_barplot.pdf", width = 4, height = 5)
print(p)
dev.off()

# Set Response levels
test$Response <- factor(test$Response, levels = c('PR/CR', 'PD/SD'))
my_comparisons <- list(c("PR/CR", "PD/SD"))

# Differential expression analysis plot
test <- na.omit(test)
p <- ggboxplot(test, x = "Response", y = gene,
               fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) +
  theme(legend.position = 'none') +
  ylab(label = gene) +
  stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..))

# Save the differential expression plot as a PDF file
ggsave(p, filename = paste("GSE78220", gene, 'DifferentialExpression.pdf', sep = ' '), width = 5.6, height = 4.22)

# Predictive ROC Curve
library("pROC")

# Select relevant columns for ROC analysis
df <- dplyr::select(rt, Response, gene, CTLA4, CD274, PDCD1)
df$Response[df$Response == "SD/PD"] = 1
df$Response[df$Response == "CR/PR"] = 0

# Define a sufficient number of colors for plotting
mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2")

if (TRUE) {
  # Generate a PDF file for ROC analysis
  pdf("GSE78220_ROC.pdf", height = 6, width = 6)
  auc.out <- c()
  
  # Plot the first ROC curve
  x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0),
                smooth = TRUE, 
                ci = TRUE, 
                main = "",
                col = mycol[2], 
                lwd = 2, 
                legacy.axes = TRUE)
  
  ci.lower <- round(as.numeric(x$ci[1]), 3) 
  ci.upper <- round(as.numeric(x$ci[3]), 3) 
  
  auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
  auc.out <- rbind(auc.out, auc.ci)
  
  # Plot additional ROC curves
  for (i in 3:ncol(df)) {
    x <- plot.roc(df[, 1], df[, i],
                  add = TRUE, 
                  smooth = TRUE,
                  ci = TRUE,
                  col = mycol[i],
                  lwd = 2,
                  legacy.axes = TRUE)
    
    ci.lower <- round(as.numeric(x$ci[1]), 3)
    ci.upper <- round(as.numeric(x$ci[3]), 3)
    
    auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
    auc.out <- rbind(auc.out, auc.ci)
  }
  
  # Create a legend for the ROC curves
  auc.out <- as.data.frame(auc.out)
  colnames(auc.out) <- c("Name", "AUC", "AUC CI")
  legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
  legend("bottomright", 
         legend = legend.name,
         col = mycol[2:length(df)],
         lwd = 2,
         bty = "n")
  dev.off()
}
# Clear the current workspace
rm(list = ls())

# Define the gene of interest
gene <- "Score"

# Set the working directory to the location of immunotherapy data
setwd("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced\\Raw Data\\Immunotherapy Data")

# Read gene expression data from a CSV file named "ICB.Riaz2017_Nivolumab_Melanoma_Naive.csv"
genelist_input <- fread(file = "ICB.Riaz2017_Nivolumab_Melanoma_Naive.csv", header = TRUE, sep = ',', data.table = FALSE)
genename <- as.character(genelist_input[, 1])  # Extract gene names

# Use biomaRt to map gene IDs to gene symbols (ENTREZID to SYMBOL)
gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "ENTREZID", columns = c("SYMBOL"))
colnames(gene_map)[1] <- "ID"
colnames(genelist_input)[1] <- "ID"
genelist_input$ID <- as.character(genelist_input$ID)

# Merge gene expression data with gene symbols
immudata <- inner_join(gene_map, genelist_input, by = "ID")
immudata <- immudata[, -1]
immudata <- na.omit(immudata)
rownames(immudata) <- immudata$SYMBOL
immudata <- immudata[, -1]
immudata <- as.matrix(immudata)
immudata <- as.data.frame(t(immudata))

# Read a list of genes of interest from "uniSigGeneExp.txt" and select relevant columns
CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t")
CCC <- CCC$id
CCC <- intersect(colnames(immudata), CCC)
AAA <- immudata[, CCC]

# Perform Principal Component Analysis (PCA)
pca <- prcomp(AAA, scale = TRUE)
value <- predict(pca)
score <- value[, 1] - value[, 2]
score <- as.data.frame(score)
score$ID <- rownames(score)
immudata$ID <- row.names(immudata)
immudata <- inner_join(immudata, score, by = "ID")
colnames(immudata)[which(colnames(immudata) == "score")] <- gene
immudata <- immudata %>%
  dplyr::select(ID, gene, everything())

# Read clinical data
clindata <- read.csv("ICB.Riaz2017_Nivolumab_Melanoma_Naive_clin.csv", header = TRUE, sep = ",")
colnames(clindata)[1] <- "ID"

# Filter data to include only common IDs
AAA <- intersect(clindata$ID, immudata$ID)
immudata <- dplyr::filter(immudata, ID %in% AAA)
clindata <- dplyr::filter(clindata, ID %in% AAA)

# Convert OS (Overall Survival) values to years and OS.Event to numeric
rt <- inner_join(clindata, immudata, by = "ID")
rt$OS <- rt$OS / 365
rt$OS.Event <- as.numeric(rt$OS.Event)

# Set the working directory for survival analysis
setwd(paste0("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced/", gene, "/5 Immune Infiltration Analysis/6 Immunotherapy"))
test <- rt
test <- na.omit(test)

# Perform survival analysis by determining optimal cutpoints for the specified gene
res.cut <- surv_cutpoint(test, time = "OS", event = "OS.Event", variables = gene)
summary(res.cut)

# Categorize variables based on optimal cutpoints
res.cat <- surv_categorize(res.cut)
head(res.cat)

# Fit survival curves and visualize
fit <- survfit(Surv(OS, OS.Event) ~ get(gene), data = res.cat)

# Generate a PDF file containing survival analysis plots
pdf(file = "ICB.Riaz2017_Nivolumab_Melanoma_Naive_OS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)
ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))
dev.off()

# Create percentage bar plots based on the response variable
res.cat$ID <- test$ID
res.cat$Response <- test$Response
res.cat$Response[res.cat$Response == 1] <- "Yes"
res.cat$Response[res.cat$Response == 0] <- "No"
res.cat$Group <- res.cat[, gene]

library(dplyr)
library(plyr)
library(ggplot2)
library(ggpubr)

dat <- dplyr::count(res.cat, Group, Response)
dat <- dat %>% group_by(Group) %>% 
  dplyr::summarise(Response = Response, n = n/sum(n))
dat$Response <- factor(dat$Response, levels = c("No", "Yes"))

# Calculate percentages
dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100)

# Calculate position for labels
dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n))
dat$label <- paste0(sprintf("%.0f", dat$percent), "%")

bioCol <- c("#f87669", "#2fa1dd")
p <- ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) +
  geom_bar(position = position_stack(), stat = "identity", width = 0.7) +
  scale_fill_manual(values = bioCol) +
  xlab("Group") + ylab("Percent weight") +  guides(fill = guide_legend(title = "Response")) +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) +
  theme_bw()

# Generate a PDF file containing percentage bar plots
pdf(file = "ICB.Riaz2017_Nivolumab_Melanoma_Naive_Barplot.pdf", width = 4, height = 5)
print(p)
dev.off()

# Update the response variable and conduct differential expression analysis
test <- rt
test <- na.omit(test)
test$Response[test$Response == 1] <- "Yes"
test$Response[test$Response == 0] <- "No"
test$Response <- factor(test$Response, levels = c('No', 'Yes'))
my_comparisons <- list(c("No", "Yes"))

# Generate a PDF file containing differential expression analysis plots
p <- ggboxplot(test, x = "Response", y = gene,
               fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) +
  theme(legend.position = 'none') +
  ylab(label = gene) +
  stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..))
ggsave(p, filename = paste("ICB.Riaz2017_Nivolumab_Melanoma_Naive", gene, '_Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22)

# Perform ROC analysis to predict responsiveness to immunotherapy
library("pROC")
df <- dplyr::select(rt, Response, gene, PDCD1, CTLA4, CD274)

# Define a set of colors for ROC curves
mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2")

if(TRUE){
  pdf("ICB.Riaz2017_Nivolumab_Melanoma_Naive_ROC.pdf", height = 6, width = 6)
  auc.out <- c()
  
  # Plot the first ROC curve (Response vs. PDCD1)
  x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0),
                smooth = TRUE, ci = TRUE, main = "",
                col = mycol[2], lwd = 2, legacy.axes = TRUE)
  
  ci.lower <- round(as.numeric(x$ci[1]), 3)
  ci.upper <- round(as.numeric(x$ci[3]), 3)
  
  auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
  auc.out <- rbind(auc.out, auc.ci)
  
  # Plot additional ROC curves
  for (i in 3:ncol(df)){
    x <- plot.roc(df[, 1], df[, i], add = TRUE, smooth = TRUE, ci = TRUE,
                  col = mycol[i], lwd = 2, legacy.axes = TRUE)
    
    ci.lower <- round(as.numeric(x$ci[1]), 3)
    ci.upper <- round(as.numeric(x$ci[3]), 3)
    
    auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
    auc.out <- rbind(auc.out, auc.ci)
  }
  
  auc.out <- as.data.frame(auc.out)
  colnames(auc.out) <- c("Name", "AUC", "AUC CI")
  
  # Generate a legend for the ROC curves
  legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
  legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n")
  
  dev.off()
}
# Survival Analysis
setwd(paste0("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced/", gene, "/5 Immune Infiltration Analysis/6 Immunotherapy"))
test <- rt
test <- na.omit(test)
res.cut <- surv_cutpoint(test, time = "PFS", event = "PFS.Event", variables = gene)
summary(res.cut)

# Categorize variables
res.cat <- surv_categorize(res.cut)
head(res.cat)

# Fit survival curves and visualize
fit <- survfit(Surv(PFS, PFS.Event) ~ get(gene), data = res.cat)

pdf(file = "ICB.Riaz2017_Nivolumab_Melanoma_Naive_PFS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

dev.off()

###################################################################################
rm(list = ls())
gene = "Score"
setwd("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced\\Raw Data\\Immunotherapy Data")
genelist_input <- fread(file = "ICB.Riaz2017_Nivolumab_Melanoma_Prog.csv", header = TRUE, sep = ',', data.table = FALSE)
genename <- as.character(genelist_input[, 1])  # Extract the first column of gene names

# Use biomaRt to map gene IDs to gene symbols (ENTREZID to SYMBOL)
gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "ENTREZID", columns = c("SYMBOL"))
colnames(gene_map)[1] <- "ID"
colnames(genelist_input)[1] <- "ID"
genelist_input$ID <- as.character(genelist_input$ID)
immudata <- inner_join(gene_map, genelist_input, by = "ID")
immudata <- immudata[, -1]
immudata <- na.omit(immudata)
rownames(immudata) <- immudata$SYMBOL
immudata <- immudata[, -1]
immudata <- as.matrix(immudata)
immudata <- as.data.frame(t(immudata))

CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t")
CCC <- CCC$id
CCC <- intersect(colnames(immudata), CCC)
AAA <- immudata[, CCC]

# Perform PCA analysis
pca <- prcomp(AAA, scale = TRUE)
value <- predict(pca)
score <- value[, 1] - value[, 2]
score <- as.data.frame(score)
score$ID <- rownames(score)
immudata$ID <- row.names(immudata)
immudata <- inner_join(immudata, score, by = "ID")
colnames(immudata)[which(colnames(immudata) == "score")] <- gene
immudata <- immudata %>%
  dplyr::select(ID, gene, everything())

clindata <- read.csv("ICB.Riaz2017_Nivolumab_Melanoma_Prog_clin.csv", header = TRUE, sep = ",")
colnames(clindata)[1] <- "ID"
AAA <- intersect(clindata$ID, immudata$ID)
immudata <- dplyr::filter(immudata, ID %in% AAA)
clindata <- dplyr::filter(clindata, ID %in% AAA)
rt <- inner_join(clindata, immudata, by = "ID")

rt$OS <- rt$OS / 365
rt$OS.Event <- as.numeric(rt$OS.Event)

# Survival Analysis
setwd(paste0("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced/", gene, "/5 Immune Infiltration Analysis/6 Immunotherapy"))
test <- rt
test <- na.omit(test)
res.cut <- surv_cutpoint(test, time = "OS", event = "OS.Event", variables = gene)
summary(res.cut)

# Categorize variables
res.cat <- surv_categorize(res.cut)
head(res.cat)

# Fit survival curves and visualize
fit <- survfit(Surv(OS, OS.Event) ~ get(gene), data = res.cat)

pdf(file = "ICB.Riaz2017_Nivolumab_Melanoma_Prog_OS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

dev.off()

# Percentage Bar Plot
res.cat$ID <- test$ID
res.cat$Response <- test$Response
res.cat$Response[res.cat$Response == 1] <- "Yes"
res.cat$Response[res.cat$Response == 0] <- "No"
res.cat$Group <- res.cat[, gene]

library(dplyr)
library(plyr)
library(ggplot2)
library(ggpubr)

dat <- dplyr::count(res.cat, Group, Response)
dat <- dat %>% group_by(Group) %>% 
  dplyr::summarise(Response = Response, n = n/sum(n))
dat$Response <- factor(dat$Response, levels = c("No", "Yes"))

# Calculate percentages
dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100)

# Calculate position for labels
dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n))
dat$label <- paste0(sprintf("%.0f", dat$percent), "%")

bioCol <- c("#f87669", "#2fa1dd")
p <- ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) +
  geom_bar(position = position_stack(), stat = "identity", width = 0.7) +
  scale_fill_manual(values = bioCol) +
  xlab("Group") + ylab("Percent weight") +  guides(fill = guide_legend(title = "Response")) +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) +
  theme_bw()

pdf(file = "ICB.Riaz2017_Nivolumab_Melanoma_Prog_Barplot.pdf", width = 4, height = 5)
print(p)
dev.off()

test <- rt
test <- na.omit(test)
test$Response[test$Response == 1] <- "Yes"
test$Response[test$Response == 0] <- "No"
test$Response <- factor(test$Response, levels = c('No', 'Yes'))
my_comparisons <- list(c("No", "Yes"))

# Differential Expression Boxplot
p <- ggboxplot(test, x = "Response", y = gene,
               fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) +
  theme(legend.position = 'none') +
  ylab(label = gene) +
  stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..))
ggsave(p, filename = paste("ICB.Riaz2017_Nivolumab_Melanoma_Prog", gene, '_Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22)

# ROC Analysis to Predict Responsiveness to Immunotherapy
library("pROC")
df <- dplyr::select(rt, Response, gene, PDCD1, CTLA4, CD274)

# Define a set of colors for ROC curves
mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2")

if (TRUE) {
  pdf("ICB.Riaz2017_Nivolumab_Melanoma_Prog_ROC.pdf", height = 6, width = 6)
  auc.out <- c()
  
  # Plot the first ROC curve (Response vs. PDCD1)
  x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0),
                smooth = TRUE, ci = TRUE, main = "",
                col = mycol[2], lwd = 2, legacy.axes = TRUE)
  
  ci.lower <- round(as.numeric(x$ci[1]), 3)
  ci.upper <- round as.numeric(x$ci[3]), 3)

auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
auc.out <- rbind(auc.out, auc.ci)

# Plot additional ROC curves
for (i in 3:ncol(df)) {
  x <- plot.roc(df[, 1], df[, i], add = TRUE, smooth = TRUE, ci = TRUE,
                col = mycol[i], lwd = 2, legacy.axes = TRUE)
  
  ci.lower <- round(as.numeric(x$ci[1]), 3)
  ci.upper <- round(as.numeric(x$ci[3]), 3)
  
  auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
  auc.out <- rbind(auc.out, auc.ci)
}

auc.out <- as.data.frame(auc.out)
colnames(auc.out) <- c("Name", "AUC", "AUC CI")

# Generate a legend for the ROC curves
legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
legend("bottomright", legend = legend.name, col = mycol[2:length(df)], lwd = 2, bty = "n")

dev.off()
}
# Compare multiple curves
# There are three methods available for calculating p-values in the "method=" parameter: "delong", "bootstrap", or "venkatraman"
p.out <- c()
for (i in 2:(ncol(df) - 1)) {
  for (j in (i + 1):ncol(df)) {
    p <- roc.test(df[, 1], df[, i], df[, j], method = "bootstrap")
    p.tmp <- c(colnames(df)[i], colnames(df)[j], p$p.value)
    p.out <- rbind(p.out, p.tmp)
  }
}
p.out <- as.data.frame(p.out)
colnames(p.out) <- c("ROC1", "ROC2", "p.value")
auc.out <- as.data.frame(auc.out)
colnames(auc.out) <- c("Name", "AUC", "AUC CI")
# Output p-values to a file

# Generate legend
legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
legend(
  "bottomright",
  legend = legend.name,
  col = mycol[2:length(df)],
  lwd = 2,
  bty = "n"
)
dev.off()
}

# Survival analysis
setwd(paste0("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced/", gene, "/5 Immune Infiltration Analysis/6 Immunotherapy"))
rt$PFS <- rt$PFS / 365
rt$PFS.Event <- as.numeric(rt$PFS.Event)
test <- rt
test <- na.omit(test)
res.cut <- surv_cutpoint(test, time = "PFS", event = "PFS.Event", variables = gene)
summary(res.cut)
# Categorize variables
res.cat <- surv_categorize(res.cut)
head(res.cat)
# Fit survival curves and visualize
fit <- survfit(Surv(PFS, PFS.Event) ~ get(gene), data = res.cat)

pdf(
  file = "ICB.Riaz2017_Nivolumab_Melanoma_Prog_PFS Survival Analysis.pdf",
  width = 5.6,
  height = 5.34,
  onefile = FALSE
)

ggsurvplot(
  fit,
  data = res.cat,
  risk.table = TRUE,
  risk.table.col = 'strata',
  risk.table.y.text = FALSE,
  risk.table.title = 'Number at risk',
  pval = TRUE,
  xlab = 'Time (years)',
  ggtheme = theme_light(),
  surv.median.line = 'hv',
  title = paste(gene, 'Survival', sep = ' ')
)

dev.off()

###################################################################################
rm(list = ls())
gene = "Score"
setwd("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced\\Raw Data\\Immunotherapy Data")
genelist_input <- fread(file = "GSE135222.csv", header = TRUE, sep = ',', data.table = FALSE)
genelist_input <- separate(genelist_input, gene_id, c("ID", NA), sep = "\\.")
genename <- as.character(genelist_input[, 1])  # Extract the first column of gene names
# x - Genomic annotation R package
# keys - List of genes to be converted
# keytype - Gene name type
# columns - Desired gene name type data to return, such as returning NCBI gene ID using ENTREZID
gene_map <- biomaRt::select(org.Hs.eg.db, keys = genename, keytype = "ENSEMBL", columns = c("SYMBOL"))
colnames(gene_map)[1] <- "ID"
colnames(genelist_input)[1] <- "ID"
genelist_input$ID <- as.character(genelist_input$ID)
immudata <- inner_join(gene_map, genelist_input, by = "ID")
immudata <- immudata[, -1]
immudata <- na.omit(immudata)

# Build a boolean vector, index
index <- duplicated(immudata$SYMBOL)
index
# Filter data
immudata <- immudata[!index, ]  # Select non-duplicated data
immudata <- na.omit(immudata)
rownames(immudata) <- immudata$SYMBOL
immudata <- immudata[, -1]
immudata <- as.matrix(immudata)
immudata <- as.data.frame(t(immudata))

CCC <- read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t")
CCC <- CCC$id
CCC <- intersect(colnames(immudata), CCC)

AAA <- immudata[, CCC]
# PCA analysis
pca <- prcomp(AAA, scale = TRUE)
value <- predict(pca)
score <- value[, 1] - value[, 2]
score <- as.data.frame(score)
score$ID <- rownames(score)
immudata$ID <- row.names(immudata)
immudata <- inner_join(immudata, score, by = "ID")
colnames(immudata)[which(colnames(immudata) == "score")] = gene
immudata <- immudata %>%
  dplyr::select(ID, gene, everything())

clindata <- read.csv("GSE135222_clin.csv", header = TRUE, sep = ",")
colnames(clindata)[1] <- "ID"
AAA <- intersect(clindata$ID, immudata$ID)
immudata <- dplyr::filter(immudata, ID %in% AAA)
clindata <- dplyr::filter(clindata, ID %in% AAA)
rt <- inner_join(clindata, immudata, by = "ID")

rt$PFS <- rt$PFS / 365
rt$PFS.Event <- as.numeric(rt$PFS.Event)

# Survival Analysis
setwd(paste0("Y:\\lcd code\\TCGA advanced\\TCGA arbitrary\\Advanced Version\\Advanced/", gene, "/5 Immune Infiltration Analysis/6 Immunotherapy"))
test <- rt
test <- na.omit(test)
res.cut <- surv_cutpoint(test, time = "PFS", event = "PFS.Event", variables = gene)
summary(res.cut)
# Categorize variables
res.cat <- surv_categorize(res.cut)
head(res.cat)
# Fit survival curves and visualize
fit <- survfit(Surv(PFS, PFS.Event) ~ get(gene), data = res.cat)

pdf(
  file = "GSE135222_PFS Survival Analysis.pdf",
  width = 5.6,
  height = 5.34,
  onefile = FALSE
)

ggsurvplot(
  fit,
  data = res.cat,
  risk.table = TRUE,
  risk.table.col = 'strata',
  risk.table.y.text = FALSE,
  risk.table.title = 'Number at risk',
  pval = TRUE,
  xlab = 'Time (years)',
  ggtheme = theme_light(),
  surv.median.line = 'hv',
  title = paste(gene, 'Survival', sep = ' ')
)

dev.off()


# Percentage Plot
res.cat$ID <- test$ID
res.cat$Response <- test$Response
res.cat$Group <- res.cat[, gene]
res.cat <- na.omit(res.cat)
library(dplyr)
library(plyr)
library(ggplot2)
library(ggpubr)
dat <- dplyr::count(res.cat, Group, Response)
dat <- dat %>% group_by(Group) %>%
  dplyr::summarise(Response = Response, n = n/sum(n))
dat$Response = factor(dat$Response, levels = c("CR/PR", "PD/SD"))

# Calculate percentages
dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100)

# Percentage position
dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n))
dat$label = paste0(sprintf("%.0f", dat$percent), "%")

bioCol = c("#f87669", "#2fa1dd")
p = ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) +
  geom_bar(position = position_stack(), stat = "identity", width = 0.7) +
  scale_fill_manual(values = bioCol) +
  xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) +
  #coord_flip()+
  theme_bw()
pdf(file = "GSE176307_barplot.pdf", width = 4, height = 5)
print(p)
dev.off()

test <- dplyr::select(rt, ID, Response, gene)
test <- na.omit(test)
test$Response <- factor(test$Response, levels = c('CR/PR', 'PD/SD'))
my_comparisons <- list(c("CR/PR", "PD/SD"))

# Differential Expression Analysis
p <- ggboxplot(test, x = "Response", y = gene,
               fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) +
  theme(legend.position = 'none') +
  ylab(label = gene) +
  stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..))
ggsave(p, filename = paste("GSE176307", gene, 'Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22)

test <- dplyr::select(rt, PFS, Progression, gene)
res.cut <- surv_cutpoint(test, time = "PFS", event = "Progression",
                         variables = gene)
summary(res.cut)

# Categorize Variables
res.cat <- surv_categorize(res.cut)
head(res.cat)

# Fit Survival Curves and Visualize
fit <- survfit(Surv(PFS, Progression) ~ get(gene), data = res.cat)

pdf(file = "GSE176307_PFS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival Analysis', sep = ' '))

dev.off()
# Predict Responsiveness ROC Curve
library("pROC")
df <- dplyr::select(rt, Response, gene, PDCD1, CTLA4, CD274)
df <- na.omit(df)
df$Response[df$Response == "PD/SD"] <- 1
df$Response[df$Response == "CR/PR"] <- 0

# Define a sufficient number of colors for later use when drawing lines
mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2")

if (TRUE) {
  pdf("GSE176307_ROC.pdf", height = 6, width = 6)
  auc.out <- c()
  
  # Start by drawing the first curve, in this case, it's miRNA1
  x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0),
                smooth = TRUE,  # Draw a smooth curve
                ci = TRUE, 
                main = "",
                # print.thres = "best", # Display the threshold on the graph where sensitivity + specificity is maximized
                col = mycol[2],  # Line color
                lwd = 2,  # Line thickness
                legacy.axes = TRUE)  # Use axes style commonly seen in papers, with the x-axis as "1-specificity" from 0 to 1
  
  ci.lower <- round(as.numeric(x$ci[1]), 3)  # Confidence interval lower limit
  ci.upper <- round(as.numeric(x$ci[3]), 3)  # Confidence interval upper limit
  
  auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
  auc.out <- rbind(auc.out, auc.ci)
  
  # Next, use a loop to draw the second and subsequent curves
  for (i in 3:ncol(df)) {
    x <- plot.roc(df[, 1], df[, i],
                  add = TRUE,  # Add to the previous plot
                  smooth = TRUE,
                  ci = TRUE,
                  col = mycol[i],
                  lwd = 2,
                  legacy.axes = TRUE)
    
    ci.lower <- round(as.numeric(x$ci[1]), 3)
    ci.upper <- round(as.numeric(x$ci[3]), 3)
    
    auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
    auc.out <- rbind(auc.out, auc.ci)
  }
  
  # Comparing multiple ROC curves
  # There are three methods available for calculating p-values after `method=`, namely "delong", "bootstrap", or "venkatraman"
  
  # Initialize an empty data frame for storing p-values
  p.out <- data.frame()
  
  # Loop through pairs of columns in the data frame df
  for (i in 2:(ncol(df) - 1)) {
    for (j in (i + 1):ncol(df)) {
      # Calculate p-value using the roc.test function with method="bootstrap"
      p <- roc.test(df[, 1], df[, i], df[, j], method = "bootstrap")
      
      # Create a temporary data frame containing ROC1, ROC2, and p-value
      p.tmp <- c(colnames(df)[i], colnames(df)[j], p$p.value)
      
      # Append the temporary data frame to p.out
      p.out <- rbind(p.out, p.tmp)
    }
  }
  
  # Rename the columns in p.out
  colnames(p.out) <- c("ROC1", "ROC2", "p.value")
  
  # Convert auc.out to a data frame if it's not already
  auc.out <- as.data.frame(auc.out)
  
  # Rename the columns in auc.out
  colnames(auc.out) <- c("Name", "AUC", "AUC CI")
  
  # Output p-values to a file
  # (Note: The code for writing p-values to a file is missing here; you can add it as needed.)
  # Plotting the legend
  legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep=" ")
  legend("bottomright", 
         legend = legend.name,
         col = mycol[2:length(df)],
         lwd = 2,
         bty = "n")
  dev.off()
}

# Cutting survival data
res.cut <- surv_cutpoint(test2, time = "PFS.time", event = "PFS",
                         variables = gene)
summary(res.cut)
# Categorizing variables
res.cat <- surv_categorize(res.cut)
head(res.cat)
# Fitting survival curves and visualization
fit <- survfit(Surv(PFS.time, PFS) ~ get(gene), data = res.cat)

# Saving survival analysis plot to a PDF file
pdf(file = "NCT02684006_Sunitinib_PFS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

dev.off()

# Creating a percentage plot
res.cat$ID <- test2$ID
res.cat$Progression <- test2$PFS
res.cat$Group <- res.cat[, gene]
res.cat <- na.omit(res.cat)
res.cat$Progression[res.cat$PFS == 1] <- "Yes"
res.cat$Progression[res.cat$PFS == 0] <- "No"
library(dplyr)
library(plyr)
library(ggplot2)
library(ggpubr)
dat <- dplyr::count(res.cat, Group, Progression)
dat <- dat %>% group_by(Group) %>% 
  dplyr::summarise(Progression = Progression, n = n/sum(n))
dat$Progression <- factor(dat$Progression, levels = c("Yes", "No"))

# Calculating percentages
dat <- ddply(dat, .(Group), transform, percent = n/sum(n) * 100)
# Percentage positions
dat <- ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n))
dat$label <- paste0(sprintf("%.0f", dat$percent), "%")

bioCol <- c("#f87669", "#2fa1dd")
p <- ggplot(dat, aes(x = factor(Group), y = percent, fill = Progression)) +
  geom_bar(position = position_stack(), stat = "identity", width = 0.7) +
  scale_fill_manual(values = bioCol) +
  xlab("Group") + ylab("Percent weight") +  guides(fill = guide_legend(title = "Progression")) +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) +
  theme_bw()

# Saving the percentage plot to a PDF file
pdf(file = "NCT02684006_Sunitinib_Percentage_Plot.pdf", width = 4, height = 5)
print(p)
dev.off()

# Preparing data for boxplot
DAT <- dplyr::select(test2, ID, PFS, gene)
DAT <- na.omit(DAT)
colnames(DAT)[2] <- "Progression"
DAT$Progression[DAT$Progression == 1] <- "Yes"
DAT$Progression[DAT$Progression == 0] <- "No"

DAT$Progression <- factor(DAT$Progression, levels = c('Yes', 'No'))
my_comparisons <- list(c("Yes", "No"))

# Creating a boxplot for differential expression
p <- ggboxplot(DAT, x = "Progression", y = gene,
               fill = "Progression", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) +
  theme(legend.position = 'none') +
  ylab(label = gene) +
  stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..))

# Saving the boxplot to a PDF file
ggsave(p, filename = paste("NCT02684006_Sunitinib", gene, 'Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22)

# Predicting responsiveness ROC curves
library("pROC")
df <- rt %>%
  dplyr::filter(Treatment == "Sunitinib") %>%
  dplyr::select(ID, PFS, gene, CTLA4, CD274)

df <- na.omit(df)
colnames(df)[2] <- "Progression"
df <- df[, -1]

# Defining a sufficient number of colors for later use in line plotting
mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2")

if (TRUE) {
  pdf("NCT02684006_Sunitinib_ROC.pdf", height = 6, width = 6)
  auc.out <- c()
  
  # Plotting the first line (miRNA1)
  x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0),
                smooth = TRUE, ci = TRUE, main = "",
                col = mycol[2], lwd = 2, legacy.axes = TRUE)  # Using typical paper-style plotting with "1-specificity" on the x-axis
  
  ci.lower <- round(as.numeric(x$ci[1]), 3)  # Confidence interval lower limit
  ci.upper <- round(as.numeric(x$ci[3]), 3)  # Confidence interval upper limit
  
  auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
  auc.out <- rbind(auc.out, auc.ci)
  
  # Plotting additional ROC curves in a loop
  for (i in 3:ncol(df)) {
    x <- plot.roc(df[, 1], df[, i], add = TRUE, smooth = TRUE, ci = TRUE,
                  col = mycol[i], lwd = 2, legacy.axes = TRUE)
    
    ci.lower <- round(as.numeric(x$ci[1]), 3)
    ci.upper <- round(as.numeric(x$ci[3]), 3)
    
    auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
    auc.out <- rbind(auc.out, auc.ci)
  }
}
# Comparing multiple ROC curves
# You can choose from three methods in the 'method' parameter: "delong," "bootstrap," or "venkatraman" to calculate p-values
# p.out <- c()
# for (i in 2:(ncol(df)-1)) {
#   for (j in (i+1):ncol(df)) {
#     p <- roc.test(df[,1], df[,i], df[,j], method = "bootstrap")
#     p.tmp <- c(colnames(df)[i], colnames(df)[j], p$p.value)
#     p.out <- rbind(p.out, p.tmp)
#   }
# }
# p.out <- as.data.frame(p.out)
# colnames(p.out) <- c("ROC1", "ROC2", "p.value")
auc.out <- as.data.frame(auc.out)
colnames(auc.out) <- c("Name", "AUC", "AUC CI")
# Output p-values to a file

# Plotting the legend
legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
legend("bottomright", 
       legend = legend.name,
       col = mycol[2:length(df)],
       lwd = 2,
       bty = "n")
dev.off()
###################################################################################################
###################################################################################
rm(list = ls())
gene = "Score"

setwd("Y:\\lcd_code\\TCGA_Any\\TCGA_Advanced\\Advanced\\Raw_Data\\Immune_Therapy_Data")
genelist_input <- fread(file = "PMID32472114.csv", header = TRUE, sep = ',', data.table = FALSE)
rownames(genelist_input) <- genelist_input$gene_name
genelist_input <- genelist_input[, -1]
immudata <- na.omit(genelist_input)
range(immudata)
# immudata <- log2(immudata + 1)
immudata <- as.matrix(immudata)
immudata <- as.data.frame(t(immudata))

CCC = read.table("uniSigGeneExp.txt", header = TRUE, sep = "\t")
CCC = CCC$id
CCC = intersect(colnames(immudata), CCC)

AAA = immudata[, CCC]
# PCA analysis
pca = prcomp(AAA, scale = TRUE)
value = predict(pca)
score = value[, 1] - value[, 2]
score = as.data.frame(score)
score$ID = rownames(score)
immudata$ID = row.names(immudata)
immudata = inner_join(immudata, score, by = "ID")
colnames(immudata)[which(colnames(immudata) == "score")] = gene
immudata <- immudata %>%
  dplyr::select(ID, gene, everything())

clindata <- read.csv("PMID32472114_clin.csv", header = TRUE, sep = ",")
colnames(clindata)[1] <- "ID"
AAA <- intersect(clindata$ID, immudata$ID)
immudata <- dplyr::filter(immudata, ID %in% AAA)
clindata <- dplyr::filter(clindata, ID %in% AAA)
rt <- inner_join(clindata, immudata, by = "ID")

rt$PFS.time <- rt$PFS.time / 12
rt$PFS <- as.numeric(rt$PFS)
rt$OS.time <- rt$OS.time / 12
rt$OS <- as.numeric(rt$OS)
# Survival analysis
setwd(paste0("Y:\\lcd_code\\TCGA_Any\\TCGA_Advanced\\Advanced/", gene, "/5_Immune_Infiltration_Analysis/6_Immune_Therapy"))

test <- dplyr::select(rt, ID, OS.time, OS, gene, Treatment, ORR)
test1 <- dplyr::filter(test, Treatment == "Nivolumab")
test2 <- dplyr::filter(test, Treatment == "Everolimus")

res.cut <- surv_cutpoint(test1, time = "OS.time", event = "OS",
                         variables = gene)
summary(res.cut)
# Categorize variables
res.cat <- surv_categorize(res.cut)
head(res.cat)
# Fit survival curves and visualize
fit <- survfit(Surv(OS.time, OS) ~ get(gene), data = res.cat)

pdf(file = "PMID32472114_Immune_Therapy_OS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           #conf.int = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

dev.off()
# Percentage plot
res.cat$ID <- test1$ID
res.cat$Response <- test1$ORR
res.cat$Group <- res.cat[, gene]
res.cat <- na.omit(res.cat)
library(dplyr)
library(plyr)
library(ggplot2)
library(ggpubr)
dat <- dplyr::count(res.cat, Group, Response)
dat <- dat %>% group_by(Group) %>% 
  dplyr::summarise(Response = Response, n = n / sum(n))
dat$Response = factor(dat$Response, levels = c("CR/PR", "PD/SD"))

# Calculate percentages
dat = ddply(dat, .(Group), transform, percent = n / sum(n) * 100)
# Percentage positions
dat = ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n))
dat$label = paste0(sprintf("%.0f", dat$percent), "%")

bioCol = c("#f87669", "#2fa1dd")
p = ggplot(dat, aes(x = factor(Group), y = percent, fill = Response)) +
  geom_bar(position = position_stack(), stat = "identity", width = 0.7) +
  scale_fill_manual(values = bioCol) +
  xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Response")) +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) +
  #coord_flip()+
  theme_bw()
pdf(file = "PMID32472114_Immune_Therapy_barplot.pdf", width = 4, height = 5)
print(p)
dev.off()

DAT <- dplyr::select(test1, ID, ORR, gene)
DAT <- na.omit(DAT)
colnames(DAT)[2] <- "Response"

DAT$Response <- factor(DAT$Response, levels = c('CR/PR', 'PD/SD'))
my_comparisons <- list(c("CR/PR", "PD/SD"))
# Differential expression for responders and non-responders
p <- ggboxplot(DAT, x = "Response", y = gene,
               fill = "Response", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) +
  theme(legend.position = 'none') +
  ylab(label = gene) +
  stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..))
ggsave(p, filename = paste("PMID32472114_Immune_Therapy", gene, 'Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22)

# Predictive ROC curves
library("pROC")
df <- rt %>%
  dplyr::filter(Treatment == "Nivolumab") %>%
  dplyr::select(ID, ORR, gene, PDCD1, CTLA4, CD274) %>%
  na.omit()

df$ORR[df$ORR == "PD/SD"] = 1
df$ORR[df$ORR == "CR/PR"] = 0
df <- df[, -1]
# Define enough colors for selecting colors when drawing lines later
mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2")

if (TRUE) {
  pdf("PMID32472114_Immune_Therapy_ROC.pdf", height = 6, width = 6)
  auc.out <- c()
  
  # Start by drawing the first line, which is miRNA1 in this case
  x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0),
                smooth = TRUE, # Draw a smooth curve
                ci = TRUE, 
                main = "",
                #print.thres = "best", # Put the threshold on the graph where sensitivity + specificity is maximum
                col = mycol[2], # Line color
                lwd = 2, # Line thickness
                legacy.axes = TRUE) # Use the common style for axes where x-axis is "1-specificity" ranging from 0 to 1
  
  ci.lower <- round(as.numeric(x$ci[1]), 3) # Confidence interval lower bound
  ci.upper <- round(as.numeric(x$ci[3]), 3) # Confidence interval upper bound
  
  auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
  auc.out <- rbind(auc.out, auc.ci)
  
  # Next, use a loop to draw the second and subsequent curves
  for (i in 3:ncol(df)) {
    x <- plot.roc(df[, 1], df[, i],
                  add = TRUE, # Add to the previously drawn graph
                  smooth = TRUE,
                  ci = TRUE,
                  col = mycol[i],
                  lwd = 2,
                  legacy.axes = TRUE)
    
    ci.lower <- round(as.numeric(x$ci[1]), 3)
    ci.upper <- round(as.numeric(x$ci[3]), 3)
    
    auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
    auc.out <- rbind(auc.out, auc.ci)
  }
  # Compare multiple curves
  # There are three methods available for calculating p-values after "method=": "delong", "bootstrap", or "venkatraman"
  p.out <- c()
  for (i in 2:(ncol(df) - 1)) {
    for (j in (i + 1):ncol(df)) {
      p <- roc.test(df[, 1], df[, i], df[, j], method = "bootstrap")
      p.tmp <- c(colnames(df)[i], colnames(df)[j], p$p.value)
      p.out <- rbind(p.out, p.tmp)
    }
  }
  p.out <- as.data.frame(p.out)
  colnames(p.out) <- c("ROC1", "ROC2", "p.value")
  auc.out <- as.data.frame(auc.out)
  colnames(auc.out) <- c("Name", "AUC", "AUC CI")
  # Output p-values to a file
  
  # Draw legend
  legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
  legend("bottomright", 
         legend = legend.name,
         col = mycol[2:length(df)],
         lwd = 2,
         bty = "n")
  dev.off()
}

res.cut <- surv_cutpoint(test2, time = "OS.time", event = "OS",
                         variables = gene)
summary(res.cut)
# Categorize variables
res.cat <- surv_categorize(res.cut)
head(res.cat)
# Fit survival curves and visualize
fit <- survfit(Surv(OS.time, OS) ~ get(gene), data = res.cat)

pdf(file = "PMID32472114_Everolimus_OS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           #conf.int = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

dev.off()

# Percentage plot
res.cat$ID <- test2$ID
res.cat$Progression <- test2$ORR
res.cat$Group <- res.cat[, gene]
res.cat <- na.omit(res.cat)

library(dplyr)
library(plyr)
library(ggplot2)
library(ggpubr)
dat <- dplyr::count(res.cat, Group, Progression)
dat <- dat %>% group_by(Group) %>% 
  dplyr::summarise(Progression = Progression, n = n / sum(n))
dat$Progression = factor(dat$Progression, levels = c("CR/PR", "PD/SD"))

# Calculate percentages
dat = ddply(dat, .(Group), transform, percent = n / sum(n) * 100)
# Percentage positions
dat = ddply(dat, .(Group), transform, pos = (cumsum(n) - 0.5 * n))
dat$label = paste0(sprintf("%.0f", dat$percent), "%")

bioCol = c("#f87669", "#2fa1dd")
p = ggplot(dat, aes(x = factor(Group), y = percent, fill = Progression)) +
  geom_bar(position = position_stack(), stat = "identity", width = 0.7) +
  scale_fill_manual(values = bioCol) +
  xlab("Group") + ylab("Percent weight") + guides(fill = guide_legend(title = "Progression")) +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) +
  #coord_flip()+
  theme_bw()
pdf(file = "PMID32472114_Everolimus_barplot.pdf", width = 4, height = 5)
print(p)
dev.off()

DAT <- dplyr::select(test2, ID, ORR, gene)
DAT <- na.omit(DAT)
colnames(DAT)[2] <- "Progression"

DAT$Progression <- factor(DAT$Progression, levels = c('CR/PR', 'PD/SD'))
my_comparisons <- list(c("CR/PR", "PD/SD"))
# Differential expression for responders and non-responders
p <- ggboxplot(DAT, x = "Progression", y = gene,
               fill = "Progression", legend = FALSE, palette = c("#E7B800", "#00AFBB"), bxp.errorbar = TRUE) +
  theme(legend.position = 'none') +
  ylab(label = gene) +
  stat_compare_means(comparisons = my_comparisons, method = 't.test', aes(label = ..p.signif..))
ggsave(p, filename = paste("PMID32472114_Everolimus", gene, 'Differential_Expression.pdf', sep = ' '), width = 5.6, height = 4.22)

# Predictive ROC curves
library("pROC")
df <- rt %>%
  dplyr::filter(Treatment == "Everolimus") %>%
  dplyr::select(ID, ORR, gene, CTLA4, CD274)

df <- na.omit(df)
colnames(df)[2] <- "Progression"
df$Progression[df$Progression == "PD/SD"] = 1
df$Progression[df$Progression == "CR/PR"] = 0
df <- df[, -1]
# Define enough colors for selecting colors when drawing lines later
mycol <- c("slateblue", "seagreen3", "dodgerblue", "firebrick1", "lightgoldenrod", "magenta", "orange2")

if (TRUE) {
  pdf("PMID32472114_Everolimus_ROC.pdf", height = 6, width = 6)
  auc.out <- c()
  
  # Start by drawing the first line, which is miRNA1 in this case
  x <- plot.roc(df[, 1], df[, 2], ylim = c(0, 1), xlim = c(1, 0),
                smooth = TRUE, # Draw a smooth curve
                ci = TRUE, 
                main = "",
                #print.thres = "best", # Put the threshold on the graph where sensitivity + specificity is maximum
                col = mycol[2], # Line color
                lwd = 2, # Line thickness
                legacy.axes = TRUE) # Use the common style for axes where x-axis is "1-specificity" ranging from 0 to 1
  
  ci.lower <- round(as.numeric(x$ci[1]), 3) # Confidence interval lower bound
  ci.upper <- round(as.numeric(x$ci[3]), 3) # Confidence interval upper bound
  
  auc.ci <- c(colnames(df)[2], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
  auc.out <- rbind(auc.out, auc.ci)
  
  # Next, use a loop to draw the second and subsequent curves
  for (i in 3:ncol(df)) {
    x <- plot.roc(df[, 1], df[, i],
                  add = TRUE, # Add to the previously drawn graph
                  smooth = TRUE,
                  ci = TRUE,
                  col = mycol[i],
                  lwd = 2,
                  legacy.axes = TRUE)
    
    ci.lower <- round(as.numeric(x$ci[1]), 3)
    ci.upper <- round(as.numeric(x$ci[3]), 3)
    
    auc.ci <- c(colnames(df)[i], round(as.numeric(x$auc), 3), paste(ci.lower, ci.upper, sep = "-"))
    auc.out <- rbind(auc.out, auc.ci)
  }
}
# # Compare multiple curves
# # After the "method=" parameter, there are three methods available: "delong," "bootstrap," or "venkatraman" to calculate p-values
# p.out <- c()
# for (i in 2:(ncol(df) - 1)) {
#   for (j in (i + 1):ncol(df)) {
#     p <- roc.test(df[, 1], df[, i], df[, j], method = "bootstrap")
#     p.tmp <- c(colnames(df)[i], colnames(df)[j], p$p.value)
#     p.out <- rbind(p.out, p.tmp)
#   }
# }
# p.out <- as.data.frame(p.out)
# colnames(p.out) <- c("ROC1", "ROC2", "p.value")
auc.out <- as.data.frame(auc.out)
colnames(auc.out) <- c("Name", "AUC", "AUC CI")
# Output p-values to a file

# Draw a legend
legend.name <- paste(colnames(df)[2:length(df)], "AUC", auc.out$AUC, sep = " ")
legend("bottomright", 
       legend = legend.name,
       col = mycol[2:length(df)],
       lwd = 2,
       bty = "n")
dev.off()
}
# Select relevant data columns
test <- dplyr::select(rt, ID, PFS.time, PFS, gene, Treatment, ORR)

# Filter data for 'Nivolumab' treatment
test1 <- dplyr::filter(test, Treatment == "Nivolumab")

# Filter data for 'Everolimus' treatment
test2 <- dplyr::filter(test, Treatment == "Everolimus")

# Perform survival analysis for 'Nivolumab' treatment
res.cut <- surv_cutpoint(test1, time = "PFS.time", event = "PFS", variables = gene)
summary(res.cut)

# Categorize variables for 'Nivolumab' treatment
res.cat <- surv_categorize(res.cut)
head(res.cat)

# Fit survival curves and visualize for 'Nivolumab' treatment
fit <- survfit(Surv(PFS.time, PFS) ~ get(gene), data = res.cat)

# Create a PDF file for 'Nivolumab' PFS survival analysis
pdf(file = "PMID32472114_Immunotherapy_Nivolumab_PFS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

# Generate survival plot for 'Nivolumab' treatment
ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           #conf.int = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

# Close the PDF device
dev.off()

# Perform survival analysis for 'Everolimus' treatment
res.cut <- surv_cutpoint(test2, time = "PFS.time", event = "PFS", variables = gene)
summary(res.cut)

# Categorize variables for 'Everolimus' treatment
res.cat <- surv_categorize(res.cut)
head(res.cat)

# Fit survival curves and visualize for 'Everolimus' treatment
fit <- survfit(Surv(PFS.time, PFS) ~ get(gene), data = res.cat)

# Create a PDF file for 'Everolimus' PFS survival analysis
pdf(file = "PMID32472114_Everolimus_PFS_Survival_Analysis.pdf", width = 5.6, height = 5.34, onefile = FALSE)

# Generate survival plot for 'Everolimus' treatment
ggsurvplot(fit,
           data = res.cat,
           risk.table = TRUE,
           risk.table.col = 'strata',
           risk.table.y.text = FALSE,
           risk.table.title = 'Number at risk',
           pval = TRUE,
           #conf.int = TRUE,
           xlab = 'Time (years)',
           ggtheme = theme_light(),
           surv.median.line = 'hv',
           title = paste(gene, 'Survival', sep = ' '))

# Close the PDF device
dev.off()