rm(list = ls()) # Load libraries library(plyr) library(ggplot2) library(ggpubr) scoreFile = "score.group.txt" # Scoring data file cliFile = "clinicaldata.txt" # Clinical data file trait = "Tomor_type" # Clinical trait # Read input files score = read.table(scoreFile, header = TRUE, sep = "\t", check.names = FALSE, row.names = 1) cli = read.table(cliFile, header = TRUE, sep = "\t", check.names = FALSE, row.names = 1) # Identify common samples between scoring and clinical data sameSample = intersect(row.names(score), row.names(cli)) rt = cbind(score[sameSample,,drop=FALSE], cli[sameSample,,drop=FALSE]) # Replace "fustat" values with "Dead" and "Alive" rt$fustat[rt$fustat == 1] = "Dead" rt$fustat[rt$fustat == 0] = "Alive" # Define colors for clinical traits bioCol = c("#0066FF","#FF0000","#FF9900","#6E568C","#7CC767","#223D6C","#D20A13","#FFD121","#088247","#11AA4D") bioCol = bioCol[1:length(unique(rt[,trait]))] # Count the number of patients in low and high scoring groups for each trait rt1 = rt[,c(trait, "group")] rt1 <- na.omit(rt1) colnames(rt1) = c("trait", "group") df = as.data.frame(table(rt1)) # Calculate the percentage of patients in each group df = ddply(df, .(group), transform, percent = Freq/sum(Freq) * 100) # Calculate position for percentage labels df = ddply(df, .(group), transform, pos = (cumsum(Freq) - 0.5 * Freq)) df$label = paste0(sprintf("%.0f", df$percent), "%") df$group = factor(df$group, levels = c("Low", "High")) # Create a bar plot to visualize the percentage of patients in low and high score groups for each clinical trait p = ggplot(df, aes(x = factor(group), y = percent, fill = trait)) + geom_bar(position = position_stack(), stat = "identity", width = 0.7) + scale_fill_manual(values = bioCol) + xlab("score") + ylab("Percent weight") + guides(fill = guide_legend(title = trait)) + geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3) + theme_bw() # Save the bar plot to a PDF file pdf(file = "Tomor_type_barplot.pdf", width = 4, height = 5) print(p) dev.off() # Set up comparisons between clinical traits rt2 = rt[,c(trait, "score")] rt2 <- na.omit(rt2) colnames(rt2) = c("trait", "score") type = levels(factor(rt2[,"trait"])) rt2$trait = factor(rt2$trait, levels = type) comp = combn(type, 2) my_comparisons = list() for(i in 1:ncol(comp)){ my_comparisons[[i]] = comp[,i] } # Create a box plot to visualize the distribution of scores for different traits and include trait group comparisons boxplot = ggboxplot(rt2, x = "trait", y = "score", fill = "trait", xlab = trait, ylab = "score", legend.title = trait, palette = bioCol ) + stat_compare_means(comparisons = my_comparisons) # Save the box plot to a PDF file pdf(file = "Tomor_type_boxplot.pdf", width = 5, height = 4.5) print(boxplot) dev.off() # Print column names of the 'rt' data frame colnames(rt)