enrichment_sxbd = read.table("~/KEGGTOP20.txt", header=T, row.names=NULL, sep="\t", quote="") head(enrichment_sxbd) library(plyr) library(stringr) library(ggplot2) library(grid) mixedToFloat <- function(x){ x <- sapply(x, as.character) is.integer <- grepl("^-?\\d+$", x) is.fraction <- grepl("^-?\\d+\\/\\d+$", x) is.float <- grepl("^-?\\d+\\.\\d+$", x) is.mixed <- grepl("^-?\\d+ \\d+\\/\\d+$", x) stopifnot(all(is.integer | is.fraction | is.float | is.mixed)) numbers <- strsplit(x, "[ /]") ifelse(is.integer, as.numeric(sapply(numbers, `[`, 1)), ifelse(is.float, as.numeric(sapply(numbers, `[`, 1)), ifelse(is.fraction, as.numeric(sapply(numbers, `[`, 1)) / as.numeric(sapply(numbers, `[`, 2)), as.numeric(sapply(numbers, `[`, 1)) + as.numeric(sapply(numbers, `[`, 2)) / as.numeric(sapply(numbers, `[`, 3))))) } mixedToFloat(c('1 1/2', '2 3/4', '2/3', '11 1/4', '1')) enrichment_sxbd$GeneRatio = mixedToFloat(enrichment_sxbd$GeneRatio) enrichment_sxbd$Count = mixedToFloat(enrichment_sxbd$Count) log_name = paste0("negLog10_", "pvalue") col_name_enrichment_sxbd <- colnames(enrichment_sxbd) col_name_enrichment_sxbd <- c(col_name_enrichment_sxbd, log_name) enrichment_sxbd$log_name <- log10(enrichment_sxbd$pvalue) * (-1) colnames(enrichment_sxbd) <- col_name_enrichment_sxbd enrichment_sxbd_freq <- as.data.frame(table(enrichment_sxbd$Description)) colnames(enrichment_sxbd_freq) <- c("Description", "IDctct") head(enrichment_sxbd_freq) enrichment_sxbd2 <- merge(enrichment_sxbd, enrichment_sxbd_freq, by="Description") enrichment_sxbd3 <- enrichment_sxbd2[order(enrichment_sxbd2$IDctct, enrichment_sxbd2$GeneRatio, enrichment_sxbd2$negLog10_pvalue), ] term_order <- unique(enrichment_sxbd3$Description) enrichment_sxbd$Description <- factor(enrichment_sxbd$Description, levels=term_order, ordered=T) color_v <- c("green", "red") p <- ggplot(enrichment_sxbd, aes(x=GeneRatio,y=Description)) + labs(x="GeneRatio (%)", y="KEGG Pathway") + labs(title="") p <- p + geom_point(aes(size=Count, color=negLog10_pvalue )) + scale_colour_gradient(low=color_v[1], high=color_v[2], name="negLog10_pvalue") p <- p + scale_y_discrete(labels=function(x) str_wrap(x, width=60)) p <- p + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) top='top' bottom='bottom' left='left' right='right' none='none' legend_pos_par <- right uwid = 0 vhig = 12 if (uwid == 0 || vhig == 0) { x_len = length(unique(enrichment_sxbd$Description)) if(x_len<10){ vhig = 10 } else if(x_len<20) { vhig = 10 + (x_len-10)/3 } else if(x_len<100) { vhig = 13 + (x_len-20)/5 } else { vhig = 40 } uwid = vhig if(legend_pos_par %in% c("left", "right")){ uwid = 1.5 * uwid } } p <- p + theme(legend.position=legend_pos_par) p <- p + theme( panel.grid = element_blank(), panel.border=element_blank(), legend.background = element_blank(), axis.line.x=element_line(size=0.4, colour="black", linetype='solid'), axis.line.y=element_line(size=0.4, colour="black", linetype='solid'), axis.ticks = element_line(size=0.4) ) ggsave(p, filename="LUADKEGG20.pdf", dpi=300, width=uwid, height=vhig, units=c("cm")) p