# Libraries --------------------------------------------------------------- library(beadplexr) library(ggplot2) library(cowplot) library(dplyr) library(purrr) library(tidyr) library(readr) library(stringr) # Plot defaults ----------------------------------------------------------- theme_paper <- function(){ theme_cowplot(font_size = 6, line_size = 0.2) + theme(axis.line.x = element_line(), axis.line.y = element_line()) + theme(legend.background = element_blank(), legend.key.size = unit(0.6, "lines"), legend.margin = margin(), legend.title = element_text(size = 5), legend.text = element_text(size = 5)) } theme_set(theme_paper()) update_geom_defaults("point", list(size = 1, stroke = 0.2)) update_geom_defaults("boxplot", list(size = 0.3)) update_geom_defaults("density", list(size = 0.3)) update_geom_defaults("line", list(size = 0.3)) update_geom_defaults("hline", list(size = 0.2, colour = "black")) update_geom_defaults("smooth", list(colour = "black", linetype = "dashed", size = 0.3)) update_geom_defaults("text", list(size = 1.75)) # Load data --------------------------------------------------------------- data(lplex) # Load one of the panels distributed with the package, see ?load_panel() for # the included panels panel_info <- load_panel(.panel_name = "Human Growth Factor Panel (13-plex)") # Create figure 2 --------------------------------------------------------- example_set <- lplex[["K3-C0-1.fcs"]] # Create a monochrome plot and make the axis labels more descriptive fcs_plot <- example_set %>% facs_plot(.x = "FSC-A", .y = "SSC-A") + labs(x = "Size (FSC-A)", y = "Granularity (SSC-A)") # Create a pseudo-color plot and make the axis labels more descriptive fcs_hex <- example_set %>% facs_hexbin(.x = "FSC-A", .y = "SSC-A") + labs(x = "Size (FSC-A)", y = "Granularity (SSC-A)") plot_grid(fcs_plot, fcs_hex, labels = "AUTO", label_size = 8) ggsave("Figure_02.pdf", width = 6, height = 2, title = "Figure 2") # Identify analytes ------------------------------------------------------- # The function `identify_legendplex_analyte()` used here is convenience # around the clustering work horse `identify_analyte`. The # `identify_legendplex_analyte()` identifies the bead populations according # to size and granularity, and for each of the two populations the individual # bead populations are identified # # The function requires a named list with analytes from the Panel # Information, and a list with a list of key-value pairs giving the arguments # for the bead indentification on the forward and side scatter, and a list of # key-value pairs giving arguments for the bead identification in each # subpopulation in the APC channel. # # The argument .trim gives the fraction of events furthest from the centers of # the groups that should be removed. The population center is found by a # Gaussian kernel estimate. In this case we remove 1% and 3% of the of the # events based on their distance to the group center. # # The inner lists can be named, but this is not required. args_ident_analyte <- list(fs = list(.parameter = c("FSC-A", "SSC-A"), .column_name = "Bead group", .trim = 0.01), analyte = list(.parameter = "FL6-H", .column_name = "Analyte ID", .trim = 0.03)) # The FCS-data is a list of samples, which we combine before cluster # identification. analytes_identified <- lplex %>% bind_rows(.id = "Sample") %>% identify_legendplex_analyte(.analytes = panel_info$analytes, .method_args = args_ident_analyte) # Create figure 3 --------------------------------------------------------- # Add factors to `Analyte ID` for pretty plot example_set <- analytes_identified %>% filter(Sample == "K3-C0-1.fcs") %>% mutate(`Analyte ID` = factor(`Analyte ID`, levels = names( c(panel_info$analytes$A, panel_info$analytes$B) ))) # A dotplot as in figure 1, with the bead populations identified, and each # event categorized beads_fsc_ssc <- example_set %>% # The NAs dissapear when we plot, so we turn them to string mutate(`Bead group` = if_else(is.na(`Bead group`), "NA", `Bead group`)) %>% facs_scatter(.x = "FSC-A", .y = "SSC-A", .beads = "Bead group") + scale_color_grey() + scale_shape_manual(values = c("A" = 19, "B" = 19, "NA" = 1)) + labs(x = "Size (FSC-A)", y = "Granularity (SSC-A)") # Density plot of the analytes in bead population A bead_a_dens <- example_set %>% filter(`Bead group` == "A") %>% facs_density1d(.x = "FL6-H", .beads = "Analyte ID")+ labs(y = "Density", title = "Group A") + scale_fill_grey(na.value = "white") + labs(x = "APC fluorescence (FL6-H)") # Density plot of the analytes in bead population B bead_b_dens <- example_set %>% filter(`Bead group` == "B") %>% facs_density1d(.x = "FL6-H", .beads = "Analyte ID")+ labs(y = "Density", title = "Group B") + scale_fill_grey(na.value = "white") + labs(x = "APC fluorescence (FL6-H)") plot_grid(beads_fsc_ssc, bead_a_dens, bead_b_dens, labels = "AUTO", label_size = 8, ncol = 3) ggsave("Figure_03.pdf", width = 6, height = 2, title = "Figure 3") # Calculate analyte MFI --------------------------------------------------- # The mean fluorescence intensity is calculated for each sample and analyte. # The function `calc_analyte_mfi()` provides three ways of calculating the # MFI: geometric, harmonic, and arithmetic mean. analyte_mfi <- analytes_identified %>% filter(!is.na(`Analyte ID`)) %>% # Call `calc_analyte_mfi()` for each sample group_by(Sample) %>% do(calc_analyte_mfi(., .parameter = "FL2-H", .column_name = "Analyte ID", .mean_fun = "geometric")) %>% # Later we will fit the standard curve on a log-log scale, so we transform # here mutate(`FL2-H` = log10(`FL2-H`)) # Helper function to extract the sample number ---------------------------- #' Cast sample ID to numeric #' #' @param .s A string with the sample ID pattern to be cast #' @param .pattern A string giving the pattern #' #' @return #' A numeric #' as_numeric_sample_id <- function(.s, .pattern = c("C[0-9]+", "S[0-9]+")){ .pattern <- match.arg(.pattern) # Extract the pattern defined just above, remove the first element, and # cast to a numeric .s %>% str_extract(.pattern) %>% str_sub(start = -1L) %>% as.numeric() } # Split in standard and sample -------------------------------------------- # We need to fit a standard curve on the standard samples, and use this curve # to calculate the concentration of the experimental samples. Here we split the data # set in two: one with the standard samples and one with the experimental samples. # # We need to order the standard samples from high to low in order to # calculate the concentration of the analytes in the standard sample. # Incorporating the information into the sample name in terms of an easily # parsable pattern is a good practice. # All standard samples have the pattern C[number] standard_data <- analyte_mfi %>% ungroup() %>% filter(str_detect(Sample, "C[0-9]+")) %>% mutate(`Sample number` = as_numeric_sample_id(Sample, .pattern = "C")) %>% select(-Sample) # All non-standards are experimental samples... we could also filter on # S[number] experiment_data <- analyte_mfi %>% ungroup() %>% filter(!str_detect(Sample, "C[0-9]+")) %>% mutate(`Sample number` = as_numeric_sample_id(Sample, .pattern = "S")) %>% select(-Sample) # To the standard data we have to add additional information such the start # concentration of each standard analyte and the dilution factor, as well as # as the analyte names (analyte IDs by themselves do not make much sense). # # The concentration of the standard samples is calculated using # `calc_std_conc()`, which take a vector of sample numbers for ordering, a # start concentration and a dilution factor. standard_data <- standard_data %>% left_join(as_data_frame_analyte(panel_info$analytes), by = "Analyte ID") %>% rename(`Analyte name` = name) %>% group_by(`Analyte ID`, `Analyte name`) %>% mutate( Concentration = calc_std_conc( `Sample number`, concentration, .dilution_factor = panel_info$std_dilution ) ) %>% # Later we will fit the standard curve on a log-log scale, so we transform # here mutate(Concentration = log10(Concentration)) %>% select(-concentration, -`Bead group`) # Nest and combine standard and experimental data --------------------------------- # Nested data.frames is a great way of combining and working with complex # data structures. # # First we pack all the standard data in to a data.frame with a set of # data.frames standard_data <- standard_data %>% nest(-`Analyte ID`, .key = "Standard data") # The the same for all the experimental data experiment_data <- experiment_data %>% nest(-`Analyte ID`, .key = "Experimental data") # Since both structures are data.frames we can easily combine them plex_data <- inner_join(standard_data, experiment_data, by = "Analyte ID") # Calculate standard curves ----------------------------------------------- # For each of the analytes we calculate the standard curve. Working with # nested data.frames means that we have to loop over each row to calculate # the standard curve using the data.frame in "Standard data" # # When clustering is performed with mclust, the package mclust is # loaded in the background (an unfortunate necessity). The mclust # package also has a function called `map`, so an unlucky side effect # of clustering with mclust, is that we need to be specify which map # function we use. plex_data <- plex_data %>% group_by(`Analyte ID`) %>% mutate(`Model fit` = purrr::map(`Standard data`, fit_standard_curve, .parameter = "FL2-H")) # Calculate experimental sample concentrations ------------------------------------ # Using the standard curve just calculated, we can back-calculate the # concentration of the standard concentrations, and more importantly the # concentration of the experimental samples plex_data <- plex_data %>% mutate(`Standard data` = purrr::map2(`Standard data`, `Model fit`, calculate_concentration, .parameter = "FL2-H")) %>% mutate(`Experimental data` = purrr::map2(`Experimental data`, `Model fit`, calculate_concentration, .parameter = "FL2-H")) # Add concentration plots ------------------------------------------------- # We can also loop over each row and add plots to the data.frame plex_data <- plex_data %>% mutate(`Std curve` = purrr::pmap(list(.data = `Standard data`, .model = `Model fit`, .title = `Analyte name`), plot_std_curve, .parameter = "FL2-H")) %>% mutate(`Std conc` = purrr::map(`Standard data`, plot_target_est_conc)) %>% mutate(`Est curve` = purrr::pmap(list(`Experimental data`, `Standard data`, `Model fit`, `Analyte name`), plot_estimate, .parameter = "FL2-H")) # Create figure 4 --------------------------------------------------------- # The analyte Angiopoietin-2 is on the second row of the nested data.frame. # Each nested element in a nested data.frame is actually a list, so we select # the element std_fit <- plex_data$`Std curve`[[2]] + labs(x = "Standard concentration (log10[pg/ml])", y = "PE (FL2-H; log10[MFI])", title = NULL) std_back_calc <- plex_data$`Std conc`[[2]] + labs(x = "Standard concentration (log10[pg/ml])", y = "Calculated concentration (log10[pg/ml])", title = NULL) sample_est <- plex_data$`Est curve`[[2]] + labs(x = "Standard concentration (log10[pg/ml])", y = "PE (FL2-H; log10[MFI])", title = NULL) + scale_linetype_identity() plot_grid(std_fit, std_back_calc, sample_est, labels = "AUTO", label_size = 8, ncol = 3) ggsave("Figure_04.pdf", width = 6, height = 2, title = "Figure 4") # Extract analyte concentration ------------------------------------------- plex_data %>% unnest(`Experimental data`) %>% # Make the names a little more telling and transform them back to useful # concentrations rename(`Concentration (pg/ml)` = Calc.conc, `Concentration error` = `Calc.conc error`) %>% mutate(`Concentration (pg/ml)` = 10^ `Concentration (pg/ml)`, `Concentration error` = 10^`Concentration error` )