# Clear the environment rm(list = ls()) # Load required packages library(limma) library(sva) library(ggplot2) library(factoextra) library(FactoMineR) # Define input files expFile <- "merge.txt" # Expression input file geneFile <- "ICD.txt" # Gene list file # Read input files and process the data load("merge.RDATA") rt <- outTab data <- outTab gene <- read.table(geneFile, header = TRUE, sep = "\t", check.names = FALSE) setdiff(as.vector(gene[, 1]), rownames(data)) sameGene <- intersect(as.vector(gene[, 1]), rownames(data)) geneExp <- data[sameGene, ] setdiff(gene[, 1], rownames(data)) # Output the results out <- rbind(ID = colnames(geneExp), geneExp) write.table(out, file = "GeneExp.txt", sep = "\t", quote = FALSE, col.names = FALSE) # Load required packages library(limma) library(survival) library(survminer) # Define input files expFile <- "GeneExp.txt" # Expression data file cliFile <- "clinicaldata.txt" # Survival data file # Read expression data and preprocess it rt <- read.table(expFile, header = TRUE, sep = "\t", check.names = FALSE) rt <- as.matrix(rt) rownames(rt) <- rt[, 1] exp <- rt[, 2:ncol(rt)] dimnames <- list(rownames(exp), colnames(exp)) data <- matrix(as.numeric(as.matrix(exp)), nrow = nrow(exp), dimnames = dimnames) data <- avereps(data) data <- t(data) library(tidyverse) rownames(data) <- str_replace_all(rownames(data), "TCGA_", "") rownames(data) <- str_replace_all(rownames(data), "GSE17538_", "") rownames(data) <- str_replace_all(rownames(data), "GSE39582_", "") # Read survival data cli <- read.table(cliFile, header = TRUE, sep = "\t", check.names = FALSE, row.names = 1) cli <- cli[, 1:2] cli$futime <- cli$futime / 365 # Merge the data sameSample <- intersect(row.names(data), row.names(cli)) data <- data[sameSample, ] cli <- cli[sameSample, ] rt <- cbind(cli, data) # Iterate through genes to find prognostic genes outTab <- data.frame() km <- c() gene <- c() for (i in colnames(rt[, 3:ncol(rt)])) { # Cox analysis cox <- coxph(Surv(futime, fustat) ~ rt[, i], data = rt) coxSummary <- summary(cox) coxP <- coxSummary$coefficients[, "Pr(>|z|)"] outTab <- rbind(outTab, cbind(id = i, HR = coxSummary$conf.int[, "exp(coef)"], HR.95L = coxSummary$conf.int[, "lower .95"], HR.95H = coxSummary$conf.int[, "upper .95"], pvalue = coxSummary$coefficients[, "Pr(>|z|)"]) ) # Kaplan-Meier analysis data <- rt[, c("futime", "fustat", i)] colnames(data) <- c("futime", "fustat", "gene") # Get optimal cutoff res.cut <- surv_cutpoint(data, time = "futime", event = "fustat", variables = c("gene")) res.cat <- surv_categorize(res.cut) fit <- survfit(Surv(futime, fustat) ~ gene, data = res.cat) diff <- survdiff(Surv(futime, fustat) ~ gene, data = res.cat) pValue <- 1 - pchisq(diff$chisq, df = 1) km <- c(km, pValue) # Plot survival curves for genes with p-value < 0.05 if (pValue < 0.05) { gene <- c(gene, i) if (pValue < 0.001) { pValue <- "p < 0.001" } else { pValue <- paste0("p = ", sprintf("%.03f", pValue)) } # Generate survival plots surPlot <- ggsurvplot(fit, data = res.cat, pval = pValue, pval.size = 6, legend.title = i, legend.labs = c("high", "low"), xlab = "Time (years)", ylab = "Overall survival", palette = c("red", "blue"), break.time.by = 1, conf.int = TRUE, risk.table = TRUE, risk.table.title = "", risk.table.height = 0.25) pdf(file = paste0("sur.", i, ".pdf"), onefile = FALSE, width = 6, # Image width height = 5) # Image height print(surPlot) dev.off() } } # Output results for single-factor analysis outTab <- cbind(outTab, km) write.table(outTab, file = "uniCox.txt", sep = "\t", row.names = FALSE, quote = FALSE) # Load required packages library(igraph) library(psych) library(reshape2) library("RColorBrewer") # Define input files GeneExpfile <- "GeneExp.txt" # Expression data file Coxfile <- "uniCox.txt" # Prognostic results file # Read input files gene.group <- read.table("uniCox.txt", header = TRUE, sep = "\t") gene.group <- dplyr::filter(gene.group, pvalue < 0.05) gene.group <- as.data.frame(gene.group[, 1]) colnames(gene.group) <- c('id') gene.group$group <- "ICD genes" gene.exp <- read.table(GeneExpfile, header = TRUE, sep = "\t", row.names = 1) gene.cox <- read.table(Coxfile, header = TRUE, sep = "\t") # Get the intersection of genes genelist <- intersect(gene.group$id, gene.cox$id) genelist <- intersect(genelist, rownames(gene.exp)) gene.group <- gene.group[match(genelist, gene.group$id), ] gene.group <- gene.group[order(gene.group$group), ] gene.exp <- gene.exp[match(gene.group$id, rownames(gene.exp)), ] gene.cox <- gene.cox[match(gene.group$id, gene.cox$id), ] # Prepare network files gene.cor <- corr.test(t(gene.exp)) gene.cor.cor <- gene.cor$r gene.cor.pvalue <- gene.cor$p # Process the correlation data gene.cor.cor[upper.tri(gene.cor.cor)] <- NA gene.cor.pvalue[upper.tri(gene.cor.pvalue)] <- NA gene.cor.cor.melt <- melt(gene.cor.cor) # Gene1 \t Gene2 \t Correlation gene.cor.pvalue.melt <- melt(gene.cor.pvalue) gene.melt <- data.frame(from = gene.cor.cor.melt$Var2, to = gene.cor.cor.melt$Var1, cor = gene.cor.cor.melt$value, pvalue = gene.cor.pvalue.melt$value) gene.melt <- gene.melt[gene.melt$from != gene.melt$to & !is.na(gene.melt$pvalue), , drop = FALSE] # Select relationships with p-value < 0.0001 gene.edge <- gene.melt[gene.melt$pvalue < 0.0001, , drop = FALSE] gene.edge$color <- ifelse(gene.edge$cor > 0, 'pink', '#6495ED') gene.edge$weight <- abs(gene.edge$cor) * 6 # Prepare node attributes gene.node <- gene.group group.color <- colorRampPalette(brewer.pal(9, "Set1"))(length(unique(gene.node$group))) gene.node$color <- group.color[as.numeric(as.factor(gene.node$group))] gene.node$shape <- "circle" gene.node$frame <- ifelse(gene.cox$HR > 1, 'purple', "green") gene.node$pvalue <- gene.cox$pvalue # Define p-value size categories pvalue.breaks <- c(0, 0.0001, 0.001, 0.01, 0.05, 1) pvalue.size <- c(16, 14, 12, 10, 8) cutpvalue <- cut(gene.node$pvalue, breaks = pvalue.breaks) gene.node$size <- pvalue.size[as.numeric(cutpvalue)] # Define output files nodefile <- "network.node.txt" edgefile <- "network.edge.txt" # Write node and edge data to files write.table(gene.node, nodefile, sep = "\t", col.names = TRUE, row.names = FALSE, quote = FALSE) write.table(gene.edge, edgefile, sep = "\t", col.names = TRUE, row.names = FALSE, quote = FALSE) # Create the network graph node <- read.table(nodefile, header = TRUE, sep = "\t", comment.char = "") edge <- read.table(edgefile, header = TRUE, sep = "\t", comment.char = "") g <- graph.data.frame(edge, directed = FALSE) node <- node[match(names(components(g)$membership), node$id), ] # Set node attributes if (!is.na(match('color', colnames(node)))) V(g)$color <- node$color if (!is.na(match('size', colnames(node)))) V(g)$size <- node$size if (!is.na(match('shape', colnames(node)))) V(g)$shape <- node$shape if (!is.na(match('frame', colnames(node)))) V(g)$frame <- node$frame # Output the network graph to a PDF file pdf(file = "network.pdf", width = 15, height = 12) par(mar = c(0, 0, 0, 0)) layout(matrix(c(1, 1, 4, 2, 3, 4), nc = 2), height = c(4, 4, 2), width = c(8, 3)) # Define node coordinates coord <- layout_in_circle(g) degree.x <- acos(coord[, 1]) degree.y <- asin(coord[, 2]) degree.alpha <- c() for (i in 1:length(degree.x)) { if (degree.y[i] < 0) degree.alpha <- c(degree.alpha, 2 * pi - degree.x[i]) else degree.alpha <- c(degree.alpha, degree.x[i]) } degree.cut.group <- (0:8) / 4 * pi degree.cut.group[1] <- -0.0001 degree.cut <- cut(degree.alpha, degree.cut.group) degree.degree <- c(-pi / 4, -pi / 4, -pi / 2, -pi / 2, pi / 2, pi / 2, pi / 2, pi / 4) degree <- degree.degree[as.numeric(degree.cut)] # Define pie chart values values <- lapply(node$id, function(x) c(1, 1)) V(g)$pie.color <- lapply(1:nrow(node), function(x) c(node$color[x], node$frame[x])) V(g)$frame <- NA # Draw the network graph plot(g, layout = layout_in_circle, vertex.shape = "pie", vertex.pie = values, vertex.label.cex = V(g)$lable.cex, edge.width = E(g)$weight, edge.arrow.size = 0, vertex.label.color = V(g)$color, vertex.frame.color = V(g)$frame, edge.color = E(g)$color, vertex.label.cex = 2, vertex.label.font = 2, vertex.size = V(g)$size, edge.curved = 0.4, vertex.color = V(g)$color, vertex.label.dist = 1, vertex.label.degree = degree) # Add legend for node attributes par(mar = c(0, 0, 0, 0)) plot(1, type = "n", xlab = "", ylab = "", axes = FALSE) groupinfo <- unique(data.frame(group = node$group, color = node$color)) legend("left", legend = groupinfo$group, col = groupinfo$color, pch = 16, bty = "n", cex = 3) # Add legend for risk factors par(mar = c(0, 0, 0, 0)) plot(1, type = "n", xlab = "", ylab = "", axes = FALSE) legend("left", legend = c('Risk factors', 'Favorable factors'), col = c('purple', 'green'), pch = 16, bty = "n", cex = 2.5) # Add legend for p-values par(mar = c(0, 0, 0, 0)) plot(1, type = "n", xlab = "", axes = FALSE, ylab = "") legend("top", legend = c('Positive correlation with P < 0.0001', 'Negative correlation with P < 0.0001'), lty = 1, lwd = 4, col = c('pink', '#6495ED'), bty = "n", cex = 2.2) legend('bottom', legend = c(0.0001, 0.001, 0.01, 0.05, 1), pch = 16, pt.cex = c(1.6, 1.4, 1.2, 1, 0.8) * 6, bty = "n", ncol = 5, cex = 2.2, col = "black", title = "Cox test, pvalue") # Save the network graph as a PDF file dev.off()