Data Import

suppressPackageStartupMessages({
  require(ape)
  require(DT)
  require(ggplot2)
  require(plyr)
  require(reshape2)
  require(rjson)
  require(slam)
  require(vegan)
})
## Warning: package 'ape' was built under R version 3.3.2
## Warning: package 'ggplot2' was built under R version 3.3.2
## Warning: package 'slam' was built under R version 3.3.2
## Warning: package 'vegan' was built under R version 3.3.2
metadata   <- read.delim("MetaData_Ecig_Final.txt")
json       <- rjson::fromJSON(file="Data_Ecig_Final.biom")
WeightedDM <- as.matrix(read.csv("Weighted_UniFrac_Ecig.csv", row.names = 1))

TaxaIDs   <- sapply(json$rows,    function (x) x$id)
SampleIDs <- sapply(json$columns, function (x) x$id)

# Parse the BIOM file (sparse abundance matrix encoded in JSON)
otus <- slam::simple_triplet_matrix(
          i        = sapply(json$data, function (x) x[[1]]) + 1,
          j        = sapply(json$data, function (x) x[[2]]) + 1,
          v        = sapply(json$data, function (x) x[[3]]),
          nrow     = length(TaxaIDs),
          ncol     = length(SampleIDs),
          dimnames = list(TaxaIDs, SampleIDs))

rownames(metadata)          <- metadata[['Sample']]
metadata                    <- metadata[colnames(otus),]
metadata[['Observed OTUs']] <- colapply_simple_triplet_matrix(otus, function (x) { sum(x>0) })
metadata[['Shannon']]       <- colapply_simple_triplet_matrix(otus, function (x) {
                                 x <- x[x>0]; -1 * sum((x / sum(x)) * log(x / sum(x))) })

# Roll up OTU abundances by genus and convert to relative abundance (%)
otu2genus <- setNames(
  nm     = sapply(json$rows, function (x) { x$id }),
  object = sapply(json$rows, function (x) {
    paste(collapse = "; ", sub("^_+", "", x$metadata$taxonomy))
  })
)
otu2genus <- factor(unname(otu2genus[rownames(otus)]))
genera    <- slam::rollup(otus, 1L, otu2genus, sum)
genera$v  <- genera$v / col_sums(genera)[genera$j]


remove("otus", "otu2genus", "json", "TaxaIDs", "SampleIDs")

Figure 1

long <- melt(
  data          = metadata[,c("SampleType", "Group", "Observed OTUs", "Shannon")], 
  id.vars       = c("SampleType", "Group"), 
  variable.name = "Metric", 
  value.name    = "Diversity" )

for (i in levels(long[['SampleType']])) {
  df <- subset(long, SampleType==i)
  
  gg <- ggplot(df, aes(x=Group, y=Diversity, color=Group, fill=Group)) +
    geom_boxplot(position=position_dodge(.79), outlier.shape=NA, alpha=.3, size=1) +
    geom_point(position=position_dodge(.79), alpha=.5, size=1.8) +
    facet_wrap('Metric', scales = "free_y") + ylim(0, NA) + 
    scale_fill_manual( values = c("Con"="#e69f00", "EC"="#56b4e9", "TS"="#009e73")) + 
    scale_color_manual(values = c("Con"="#e69f00", "EC"="#56b4e9", "TS"="#009e73")) + 
    labs(x=NULL, y=i) + 
    theme(text = element_text(size=20), legend.position = 'none')
  print(gg)
}