# Clear the workspace rm(list = ls()) expFile <- "uniSigGeneExp.txt" # Expression input file # Read the input file data <- read.table(expFile, header = TRUE, sep = "\t", check.names = FALSE, row.names = 1) data <- t(data) # Perform PCA analysis pca <- prcomp(data, scale = TRUE) value <- predict(pca) score <- value[, 1] - value[, 2] score <- as.data.frame(score) scoreOut <- rbind(id = colnames(score), score) write.table(scoreOut, file = "score.txt", sep = "\t", quote = FALSE, col.names = FALSE) # Load libraries library(survival) library(survminer) scoreFile <- "score.txt" # Score file cliFile <- "clinicaldata.txt" # Survival data file # Read the input files score <- read.table(scoreFile, header = TRUE, sep = "\t", check.names = FALSE, row.names = 1) sampleType <- gsub("(.*?)\\_.*", "\\1", row.names(score)) score <- cbind(score, sampleType) library(tidyverse) rownames(score) <- str_replace_all(rownames(score), "TCGA_", "") rownames(score) <- str_replace_all(rownames(score), "GSE17538_", "") rownames(score) <- str_replace_all(rownames(score), "GSE39582_", "") cli <- read.table(cliFile, header = TRUE, sep = "\t", check.names = FALSE, row.names = 1) cli <- cli[, 1:2] cli$futime <- cli$futime / 365 # Merge the data sameSample <- intersect(row.names(score), row.names(cli)) data <- cbind(cli[sameSample,], score[sameSample,]) # Get the optimal cutoff res.cut <- surv_cutpoint(data, time = "futime", event = "fustat", variables = c("score")) cutoff <- as.numeric(res.cut$cutpoint[1]) print(cutoff) Type <- ifelse(data[,"score"] <= cutoff, "Low", "High") data$group <- Type outTab <- rbind(id = colnames(data), data) write.table(outTab, file = "score.group.txt", sep = "\t", quote = FALSE, col.names = FALSE) # Calculate survival differences between high and low-risk groups data$group <- factor(data$group, levels = c("Low", "High")) diff <- survdiff(Surv(futime, fustat) ~ group, data = data) length <- length(levels(factor(data[,"group"]))) pValue <- 1 - pchisq(diff$chisq, df = length - 1) if (pValue < 0.001) { pValue <- "p<0.001" } else { pValue <- paste0("p=", sprintf("%.03f", pValue)) } fit <- survfit(Surv(futime, fustat) ~ group, data = data) # Plot survival curves bioCol <- c("#0066FF", "#FF0000", "#6E568C", "#7CC767", "#223D6C", "#D20A13", "#FFD121", "#088247", "#11AA4D") bioCol <- bioCol[1:length] surPlot <- ggsurvplot(fit, data = data, conf.int = FALSE, pval = pValue, pval.size = 6, legend.title = "score", legend.labs = levels(factor(data[,"group"])), legend = c(0.8, 0.8), font.legend = 12, xlab = "Time (years)", break.time.by = 1, palette = bioCol, surv.median.line = "hv", risk.table = TRUE, cumevents = FALSE, risk.table.height = 0.25) # Save the plot as a PDF pdf(file = "survival.pdf", onefile = FALSE, width = 7.5, height = 5.5) print(surPlot) dev.off()