###Importing and cleaning the data, and running the Bayesian model using Stan # Loading necessary packages ---------------------------------------------- library(tidyverse) library(cmdstanr) #The user must have CmdStan installed # Importing data ---------------------------------------------------------- ##Note: The data required for this analysis can be downloaded from Dryad ##digital repository (https://doi.org/10.5061/dryad.76hdr7stg). The Stan ##model file is included in the PeerJ manuscript supplementary material. The ##Stan model will be compiled in a later section of the script below. ##Importing the raw data (in this case, from a subdirectory folder named "Data") raw_data <- read_csv("Data/JAV_-_parasite_and_host_data.csv") # Initial data cleaning and restructuring --------------------------------- ##Initial cleaning ptarm_data <- raw_data %>% mutate(across(where(is.character), str_trim), #Removing starting and trailing #whitespace from character #strings Year = factor(CollYear)) %>% #Converting collection year to a factor rename("E. muta" = em_n, #Converting species codes to explicit names "E. rjupa" = er_n, "C. caudinflata" = cc_n, "T. tenuis" = tt_n, "P. serpentulus" = ps_n, "M. c. tetrathyridia" = mctet, "T. lagopi (presence)" = tlinf, "T. lagopi" = tltot, "S. holoaspis" = shtot, "M. islandicus" = mitot, "M. borealis" = mbyof, "M. lagopus (score)" = ml_14, "M. lagopus" = ml_fi, #From filter "G. lagopi" = gltot, "L. affinis" = latot, "A. lagopi" = altot, "O. chloropus" = oc_n, "O. chloropus (pupae)" = ocpup, "C. garei (flea)" = Flea) %>% filter(Phase == "early") %>% #Removing late-sampled birds, as that different #sampling protocol does not ensure accurate #quantification of the ectoparasites dplyr::select(-c( #Removing columns for parasites whose prevalences were too #low, or quantification methods were only proxies (see #explanation in manuscript at doi: 10.1111/jav.02472) "P. serpentulus", "M. c. tetrathyridia", "T. lagopi (presence)", "M. lagopus (score)", "O. chloropus (pupae)", "C. garei (flea)" ), -CollYear, -Phase) #Removing other unnecessary columns ##Converting main data to long format (each observation/row becomes one ##parasite abundance in one host) long_data <- ptarm_data %>% pivot_longer(`E. muta`:`O. chloropus`, names_to = "Parasite", values_to = "Abundance") %>% drop_na() %>% #Removing empty parasite abundance numbers mutate(Endo_ecto = ifelse(Parasite %in% c("E. muta", "E. rjupa", "C. caudinflata", "T. tenuis"), "Endoparasite", "Ectoparasite"), #Specifying endoparasite vs. #ectoparasite Par_group = case_when( #Broad parasite grouping Parasite %in% c("E. muta", "E. rjupa") ~ "Coccidian", Parasite %in% c("C. caudinflata", "T. tenuis") ~ "Nematode", Parasite %in% c("A. lagopi", "L. affinis", "G. lagopi", "O. chloropus") ~ "Insect", TRUE ~ "Mite" )) %>% mutate(across(where(is.character), factor)) #Converting character strings to #factors # Preparing data for modeling --------------------------------------------- ##Function for Poulin's D calculation d_calc <- function(x) { x_sorted <- sort(x) 1 - (2 * sum(cumsum(x_sorted))) / (mean(x) * length(x) * (length(x) + 1)) } ##Summarizing sample data for modeling d_summ_data <- long_data %>% group_by(Parasite, Endo_ecto, Par_group, Year) %>% summarize(D = d_calc(Abundance), #Quantifying aggregation Mean_abund = mean(Abundance)) #Quantifying mean abundances ##Conversion of factors to numeric (for Stan), and transforming/ ##standardizing mean abundances d_prep_data <- d_summ_data %>% ungroup() %>% filter(across(where(is.numeric), ~ is.finite(.x))) %>% mutate(across(where(is.factor), ~ as.numeric(.x)), Mean_abund = (log2(Mean_abund) - mean(log2(Mean_abund))) / sd(log2(Mean_abund))) ##Final conversion to list, and providing vector lengths as variables for Stan d_mod_data <- d_prep_data %>% as.list() %>% c(n = length(.$Mean_abund), n_Year = max(.$Year), n_Parasite = max(.$Parasite), n_Endo_ecto = max(.$Endo_ecto), n_Par_group = max(.$Par_group)) # Fitting Stan model ------------------------------------------------------ ##Compiling the Stan model (here, saved in a subdirectory folder named ##"Stan model"), and loading in R stan_mod <- cmdstan_model("Stan model/Beta regression - best-fitting.stan") ##Running the MCMC chains in Stan and saving the model output model_fit <- stan_mod$sample(data = d_mod_data, seed = 3, chains = 3, parallel_chains = 3, iter_warmup = 2500, iter_sampling = 1500, max_treedepth = 10, adapt_delta = 0.99) ##Fitted model summary ##(Note that the parameters may not be listed in the exact same order as they ##appear in Table 1 of the manuscript, and that here phi is equivalent ##to what is listed as nu in the manuscript.) model_fit$summary(variables = c("theta", "beta", "phi")) #-------------------------------------------------------------------------