# Clear the current workspace rm(list = ls()) # Load the merged data from a previous session load("merge.RDATA") Merge <- outTab genename <- read.table("ICD.txt", header = TRUE, sep = "\t") # Get the intersection of gene names with merged data AAA <- intersect(genename[, 1], rownames(Merge)) geneexpr <- Merge[AAA, ] # Save the gene expression data to a file write.table(geneexpr, "geneexpr.txt", quote = FALSE, row.names = TRUE, sep = "\t") save(geneexpr, file = "geneexpr.RDATA") # Load required library for ConsensusClusterPlus library(ConsensusClusterPlus) # Read the gene expression data data <- geneexpr data <- as.matrix(data) # Perform clustering using ConsensusClusterPlus maxK <- 9 results <- ConsensusClusterPlus(data, maxK = maxK, reps = 50, pItem = 0.8, pFeature = 1, clusterAlg = "pam", distance = "euclidean", seed = 123456, plot = "png") # Output clustering results clusterNum <- 2 # Number of clusters to consider based on the criteria cluster <- results[[clusterNum]][["consensusClass"]] cluster <- as.data.frame(cluster) colnames(cluster) <- c("cluster") letter <- c("A", "B", "C", "D", "E", "F", "G") uniqClu <- levels(factor(cluster$cluster)) cluster$cluster <- letter[match(cluster$cluster, uniqClu)] clusterOut <- rbind(ID = colnames(cluster), cluster) write.table(clusterOut, file = "Cluster.txt", sep = "\t", quote = FALSE, col.names = FALSE) # Load required libraries for survival analysis library(survival) library(survminer) library(tidyverse) clusterFile <- "Cluster.txt" # Cluster file cliFile <- "clinicaldata.txt" # Clinical data file # Read the cluster file and preprocess it cluster <- read.table(clusterFile, header = TRUE, sep = "\t", check.names = FALSE, row.names = 1) rownames(cluster) <- str_replace_all(rownames(cluster), "TCGA_", "") rownames(cluster) <- str_replace_all(rownames(cluster), "GSE17538_", "") rownames(cluster) <- str_replace_all(rownames(cluster), "GSE39582_", "") # Read the clinical data file cli <- read.table(cliFile, header = TRUE, sep = "\t", check.names = FALSE, row.names = 1) cli <- cli[, 1:2] cli$futime <- cli$futime / 365 # Merge the cluster and clinical data sameSample <- intersect(row.names(cluster), row.names(cli)) rt <- cbind(cli[sameSample, , drop = FALSE], cluster[sameSample, , drop = FALSE]) # Perform survival analysis length <- length(levels(factor(rt$cluster))) diff <- survdiff(Surv(futime, fustat) ~ cluster, data = rt) pValue <- 1 - pchisq(diff$chisq, df = length - 1) if (pValue < 0.001) { pValue <- "p<0.001" } else { pValue <- paste0("p=", sprintf("%.03f", pValue)) } fit <- survfit(Surv(futime, fustat) ~ cluster, data = rt) # Generate survival plots bioCol <- c("#0066FF", "#FF9900", "#FF0000", "#6E568C", "#7CC767", "#223D6C", "#D20A13", "#FFD121", "#088247", "#11AA4D") bioCol <- bioCol[1:length] surPlot <- ggsurvplot(fit, data = rt, conf.int = FALSE, pval = pValue, pval.size = 6, legend.title = "cluster", legend.labs = levels(factor(rt[,"cluster"])), legend = c(0.8, 0.8), font.legend = 10, xlab = "Time (years)", break.time.by = 1, palette = bioCol, surv.median.line = "hv", risk.table = TRUE, cumevents = FALSE, risk.table.height = 0.25) # Print the survival plot and save it as a PDF file print(surPlot) pdf(file = "survival.pdf", onefile = FALSE, width = 7, height = 5.5) print(surPlot) dev.off()