# Clear the current workspace rm(list = ls()) # Load required packages library(limma) library(ggplot2) # Define input files expFile <- "geneexpr.txt" # Expression input file clusterFile <- "Cluster.txt" # Clustering file # Read the input files and preprocess rt <- read.table(expFile, header = TRUE, sep = "\t", check.names = FALSE) rt <- as.matrix(rt) exp <- rt dimnames <- list(rownames(exp), colnames(exp)) data <- matrix(as.numeric(as.matrix(exp)), nrow = nrow(exp), dimnames = dimnames) data <- avereps(data) data <- data[rowMeans(data) > 0, ] data <- t(data) # Perform PCA analysis data.pca <- prcomp(data, scale. = TRUE) pcaPredict <- predict(data.pca) write.table(pcaPredict, file = "newTab.xls", quote = FALSE, sep = "\t") # Read the clustering file cluster <- read.table(clusterFile, header = TRUE, sep = "\t", check.names = FALSE, row.names = 1) cluster <- as.vector(cluster[, 1]) # Set colors bioCol <- c("#0066FF", "red", "#FF0000", "#6E568C", "#7CC767", "#223D6C", "#D20A13", "#FFD121", "#088247", "#11AA4D") CluCol <- bioCol[1:length(levels(factor(cluster)))] # Visualization PCA <- data.frame(PC1 = pcaPredict[, 1], PC2 = pcaPredict[, 2], cluster = cluster) PCA.mean <- aggregate(PCA[, 1:2], list(cluster = PCA$cluster), mean) pdf(file = "PCA.pdf", height = 5, width = 6.5) ggplot(data = PCA, aes(PC1, PC2)) + geom_point(aes(color = cluster)) + scale_colour_manual(name = "cluster", values = CluCol) + theme_bw() + theme(plot.margin = unit(rep(1.5, 4), 'lines')) + annotate("text", x = PCA.mean$PC1, y = PCA.mean$PC2, label = PCA.mean$cluster, cex = 7) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) dev.off()