# Libraries ---------------------------------------------------------------

library(beadplexr)
library(ggplot2)
library(cowplot)
library(dplyr)
library(purrr)
library(tidyr)
library(readr)
library(stringr)


# Plot defaults -----------------------------------------------------------

theme_paper <- function(){
  theme_cowplot(font_size = 6, line_size = 0.2) + 
    theme(axis.line.x = element_line(),
          axis.line.y = element_line()) + 
    theme(legend.background = element_blank(),
          legend.key.size = unit(0.6, "lines"),
          legend.margin = margin(),
          legend.title = element_text(size = 5),
          legend.text = element_text(size = 5))
}
theme_set(theme_paper())

update_geom_defaults("point", list(size = 1, stroke = 0.2))
update_geom_defaults("boxplot", list(size = 0.3))
update_geom_defaults("density", list(size = 0.3))
update_geom_defaults("line", list(size = 0.3))
update_geom_defaults("hline", list(size = 0.2, colour = "black"))
update_geom_defaults("smooth", list(colour = "black", linetype = "dashed", size = 0.3))
update_geom_defaults("text", list(size = 1.75))


# Load data ---------------------------------------------------------------

data(lplex)
# Load one of the panels distributed with the package, see ?load_panel() for
# the included panels
panel_info <- load_panel(.panel_name = "Human Growth Factor Panel (13-plex)")


# Create figure 2 ---------------------------------------------------------

example_set <- lplex[["K3-C0-1.fcs"]]

# Create a monochrome plot and make the axis labels more descriptive
fcs_plot <- example_set %>% 
  facs_plot(.x = "FSC-A", .y = "SSC-A") + 
  labs(x = "Size (FSC-A)", y = "Granularity (SSC-A)")

# Create a pseudo-color plot and make the axis labels more descriptive
fcs_hex <- example_set %>% 
  facs_hexbin(.x = "FSC-A", .y = "SSC-A") + 
  labs(x = "Size (FSC-A)", y = "Granularity (SSC-A)")

plot_grid(fcs_plot,
          fcs_hex,
          labels = "AUTO", label_size = 8) 

ggsave("Figure_02.pdf", width = 6, height = 2, title = "Figure 2")


# Identify analytes -------------------------------------------------------

# The function `identify_legendplex_analyte()` used here is convenience
# around the clustering work horse `identify_analyte`. The
# `identify_legendplex_analyte()` identifies the bead populations according
# to size and granularity, and for each of the two populations the individual
# bead populations are identified
#
# The function requires a named list with analytes from the Panel
# Information, and a list with a list of key-value pairs giving the arguments
# for the bead indentification on the forward and side scatter, and a list of
# key-value pairs giving arguments for the bead identification in each
# subpopulation in the APC channel.
#
# The argument .trim gives the fraction of events furthest from the centers of
# the groups that should be removed. The population center is found by a
# Gaussian kernel estimate. In this case we remove 1% and 3% of the of the
# events based on their distance to the group center. 
#
# The inner lists can be named, but this is not required.
args_ident_analyte <- list(fs = list(.parameter = c("FSC-A", "SSC-A"),
                                     .column_name = "Bead group",
                                     .trim = 0.01),
                           analyte = list(.parameter = "FL6-H",
                                           .column_name = "Analyte ID",
                                           .trim = 0.03))

# The FCS-data is a list of samples, which we combine before cluster
# identification.
analytes_identified <- lplex %>% 
  bind_rows(.id = "Sample") %>%
  identify_legendplex_analyte(.analytes = panel_info$analytes, 
                              .method_args = args_ident_analyte) 


# Create figure 3 ---------------------------------------------------------

# Add factors to `Analyte ID` for pretty plot
example_set <- analytes_identified %>% 
  filter(Sample == "K3-C0-1.fcs") %>%
  mutate(`Analyte ID` =
           factor(`Analyte ID`, levels = names(
             c(panel_info$analytes$A, panel_info$analytes$B)
           )))

# A dotplot as in figure 1, with the bead populations identified, and each
# event categorized
beads_fsc_ssc <- 
  example_set %>% 
  # The NAs dissapear when we plot, so we turn them to string
  mutate(`Bead group` = if_else(is.na(`Bead group`), "NA", `Bead group`)) %>% 
  facs_scatter(.x = "FSC-A", .y = "SSC-A", .beads = "Bead group") +
  scale_color_grey() +
  scale_shape_manual(values = c("A" = 19, "B" = 19, "NA" = 1)) +
  labs(x = "Size (FSC-A)", y = "Granularity (SSC-A)")

# Density plot of the analytes in bead population A
bead_a_dens <- example_set %>% 
  filter(`Bead group` == "A") %>% 
  facs_density1d(.x = "FL6-H", .beads = "Analyte ID")+
  labs(y = "Density", title = "Group A") +
  scale_fill_grey(na.value = "white") +
  labs(x = "APC fluorescence (FL6-H)")

# Density plot of the analytes in bead population B
bead_b_dens <- example_set %>% 
  filter(`Bead group` == "B") %>% 
  facs_density1d(.x = "FL6-H", .beads = "Analyte ID")+
  labs(y = "Density", title = "Group B") +
  scale_fill_grey(na.value = "white") +
  labs(x = "APC fluorescence (FL6-H)")

plot_grid(beads_fsc_ssc,
          bead_a_dens,
          bead_b_dens,
          labels = "AUTO", label_size = 8, ncol = 3) 

ggsave("Figure_03.pdf", width = 6, height = 2, title = "Figure 3")


# Calculate analyte MFI ---------------------------------------------------

# The mean fluorescence intensity is calculated for each sample and analyte.
# The function `calc_analyte_mfi()` provides three ways of calculating the
# MFI: geometric, harmonic, and arithmetic mean.
analyte_mfi <- analytes_identified %>% 
  filter(!is.na(`Analyte ID`)) %>% 
  # Call `calc_analyte_mfi()` for each sample
  group_by(Sample) %>% 
  do(calc_analyte_mfi(., .parameter = "FL2-H", 
                   .column_name = "Analyte ID", 
                   .mean_fun = "geometric")) %>% 
  # Later we will fit the standard curve on a log-log scale, so we transform
  # here
  mutate(`FL2-H` = log10(`FL2-H`))


# Helper function to extract the sample number ----------------------------

#' Cast sample ID to numeric
#'
#' @param .s A string with the sample ID pattern to be cast
#' @param .pattern A string giving the pattern
#'
#' @return
#' A numeric
#' 
as_numeric_sample_id <- function(.s, .pattern = c("C[0-9]+", "S[0-9]+")){
  .pattern <- match.arg(.pattern)

  # Extract the pattern defined just above, remove the first element, and
  # cast to a numeric
  .s %>%
    str_extract(.pattern) %>% 
    str_sub(start = -1L) %>% 
    as.numeric()
} 


# Split in standard and sample --------------------------------------------

# We need to fit a standard curve on the standard samples, and use this curve
# to calculate the concentration of the experimental samples. Here we split the data
# set in two: one with the standard samples and one with the experimental samples.
#
# We need to order the standard samples from high to low in order to
# calculate the concentration of the analytes in the standard sample.
# Incorporating the information into the sample name in terms of an easily
# parsable pattern is a good practice.

# All standard samples have the pattern C[number]
standard_data <- 
  analyte_mfi %>% 
  ungroup() %>% 
  filter(str_detect(Sample, "C[0-9]+")) %>% 
  mutate(`Sample number` = as_numeric_sample_id(Sample, .pattern = "C")) %>% 
  select(-Sample) 

# All non-standards are experimental samples... we could also filter on
# S[number]
experiment_data <- analyte_mfi %>% 
  ungroup() %>% 
  filter(!str_detect(Sample, "C[0-9]+")) %>% 
  mutate(`Sample number` = as_numeric_sample_id(Sample, .pattern = "S")) %>% 
  select(-Sample) 


# To the standard data we have to add additional information such the start
# concentration of each standard analyte and the dilution factor, as well as
# as the analyte names (analyte IDs by themselves do not make much sense).
#
# The concentration of the standard samples is calculated using
# `calc_std_conc()`, which take a vector of sample numbers for ordering, a
# start concentration and a dilution factor.
standard_data <- standard_data %>% 
  left_join(as_data_frame_analyte(panel_info$analytes), by = "Analyte ID") %>%
  rename(`Analyte name` = name) %>% 
  group_by(`Analyte ID`, `Analyte name`) %>%
  mutate(
    Concentration = calc_std_conc(
      `Sample number`,
      concentration,
      .dilution_factor = panel_info$std_dilution
    )
  ) %>% 
  # Later we will fit the standard curve on a log-log scale, so we transform
  # here
  mutate(Concentration = log10(Concentration)) %>%
  select(-concentration, -`Bead group`)

# Nest and combine standard and experimental data ---------------------------------

# Nested data.frames is a great way of combining and working with complex
# data structures.
#
# First we pack all the standard data in to a data.frame with a set of
# data.frames
standard_data <- standard_data %>% 
  nest(-`Analyte ID`, .key = "Standard data")

# The the same for all the experimental data
experiment_data <- experiment_data %>% 
  nest(-`Analyte ID`, .key = "Experimental data")

# Since both structures are data.frames we can easily combine them
plex_data <- inner_join(standard_data, experiment_data, by = "Analyte ID")


# Calculate standard curves -----------------------------------------------

# For each of the analytes we calculate the standard curve. Working with
# nested data.frames means that we have to loop over each row to calculate
# the standard curve using the data.frame in "Standard data"
#
# When clustering is performed with mclust, the package mclust is
# loaded in the background (an unfortunate necessity). The mclust
# package also has a function called `map`, so an unlucky side effect
# of clustering with mclust, is that we need to be specify which map
# function we use.

plex_data <- plex_data %>% 
  group_by(`Analyte ID`) %>% 
  mutate(`Model fit` = purrr::map(`Standard data`, 
                                  fit_standard_curve, 
                                  .parameter = "FL2-H"))


# Calculate experimental sample concentrations ------------------------------------

# Using the standard curve just calculated, we can back-calculate the
# concentration of the standard concentrations, and more importantly the
# concentration of the experimental samples
plex_data <- plex_data %>% 
mutate(`Standard data` = 
         purrr::map2(`Standard data`, `Model fit`, 
                     calculate_concentration, .parameter = "FL2-H")) %>% 
  mutate(`Experimental data` = 
           purrr::map2(`Experimental data`, `Model fit`, 
                       calculate_concentration, .parameter = "FL2-H"))


# Add concentration plots -------------------------------------------------

# We can also loop over each row and add plots to the data.frame
plex_data <-  plex_data %>% 
  mutate(`Std curve` =
           purrr::pmap(list(.data = `Standard data`, 
                            .model = `Model fit`, 
                            .title = `Analyte name`),
                       plot_std_curve, .parameter = "FL2-H")) %>% 
  mutate(`Std conc` =
           purrr::map(`Standard data`,
                      plot_target_est_conc)) %>% 
  mutate(`Est curve` = 
           purrr::pmap(list(`Experimental data`, 
                            `Standard data`, 
                            `Model fit`, 
                            `Analyte name`), 
                       plot_estimate, .parameter = "FL2-H"))


# Create figure 4 ---------------------------------------------------------

# The analyte Angiopoietin-2 is on the second row of the nested data.frame.
# Each nested element in a nested data.frame is actually a list, so we select
# the element

std_fit <- plex_data$`Std curve`[[2]] + 
  labs(x = "Standard concentration (log10[pg/ml])", 
       y = "PE (FL2-H; log10[MFI])", 
       title = NULL) 

std_back_calc <- plex_data$`Std conc`[[2]] + 
  labs(x = "Standard concentration (log10[pg/ml])", 
       y = "Calculated concentration (log10[pg/ml])", 
       title = NULL) 

sample_est <- plex_data$`Est curve`[[2]] + 
  labs(x = "Standard concentration (log10[pg/ml])", 
       y = "PE (FL2-H; log10[MFI])", 
       title = NULL) +
  scale_linetype_identity()


plot_grid(std_fit,
          std_back_calc,
          sample_est,
          labels = "AUTO", label_size = 8, ncol = 3) 

ggsave("Figure_04.pdf", width = 6, height = 2, title = "Figure 4")


# Extract analyte concentration -------------------------------------------

plex_data %>% 
  unnest(`Experimental data`) %>% 
  # Make the names a little more telling and transform them back to useful
  # concentrations
  rename(`Concentration (pg/ml)` = Calc.conc, 
         `Concentration error` = `Calc.conc error`) %>% 
  mutate(`Concentration (pg/ml)` = 10^ `Concentration (pg/ml)`,
         `Concentration error` = 10^`Concentration error` )