Kakapo Fertility Projpred modelling
set.seed(1111)
<- setwd(dirname(rstudioapi::getSourceEditorContext()$path))
curr_dir <- file.path(curr_dir, "./Supplemental_Data_S1.csv")
clutch_file <- file.path(curr_dir, './Supplemental_Data_S2.csv')
kakapo_demog_file
<- "./figure/"
outdir <- 1.00 # proportion of posterior which intersects with ROPE.
ropeci <- 4
nchains <- 30000 # iterations for brm niter
1 Credits
- Markdown from
readthedown
template, from packagermdformats
. - Code written by Alejandro Catalina and Andrew Digby.
2 Purpose
Code and results for the publication Hidden impacts of conservation management on fertility of the critically endangered kakapo
. Model validation and diagnostics not included.
- Use projected predictive variable selection package
projpred
to assess factors influencing kakapo fertility using a Bayesian model. - Compare multiple copulation frequency with kakapo density and sex ratio
3 Requirements
- Requires the
workflow
branch ofprojpred
.
4 Data
Input data:
- Clutch fertility data frame from file Supplemental_Data_S1.csv.
- Kakapo numbers per island per year from Supplemental_Data_S2.csv.
5 Fertility model
5.1 Load data
<- function(fit, newdata = NULL) {
ref_predfun return(t(posterior_linpred(fit,
newdata = newdata,
transform = FALSE
)))
}
# Retrieve model data
<- function(object, newdata = NULL, wrhs = NULL,
extract_model_data orhs = NULL, extract_y = TRUE) {
if (!extract_y) {
<- NULL
resp_form else {
} <- ~.y
resp_form
}
if (is.null(newdata)) {
<- data
newdata
}
if (is.null(wrhs) && !is.null(object) &&
!is.null(object$weights) && length(object$weights) != 0) {
<- ~weights
wrhs <- cbind(newdata, weights = object$weights)
newdata
}
if (is.null(orhs) && !is.null(object) &&
!is.null(object$offset) && length(object$offset) != 0) {
<- ~offset
orhs <- cbind(newdata, offset = object$offset)
newdata
}
<- nlist(object, newdata, wrhs, orhs, resp_form)
args return(do_call(projpred:::.extract_model_data, args))
}
# For more readable parameter names in plots
<- function(string){
fn_labels <- sub('b_','',string)
string <- gsub('scmother_age','Mother age',string)
string <- gsub('scfather_age','Father age',string)
string <- gsub('schandrear.mother','Mother HR',string)
string <- gsub('schandrear.father','Father HR',string)
string <- gsub('handrear.mother','Mother HR',string)
string <- gsub('handrear.father','Father HR',string)
string <- gsub('HRTRUE', 'HR', string)
string <- gsub('scnumber.of.copulations','No. copulations',string)
string <- gsub('scnumber.of.males','No. males',string)
string <- gsub('sccopulations.mates','Mother copulations: ',string)
string <- gsub('copulations.mates','Mother copulations: ',string)
string <- gsub('Intercept','Intercept',string)
string <- gsub('Differentmales','Different males',string)
string <- gsub('2Pcopulations','1 male, >1 copulation',string)
string <- gsub('2\\+ copulations','1 male, >1 copulation',string)
string <- gsub('sckin','Mother/father kinship',string)
string <- gsub("scprev.male.copulations", "Father previous copulations", string)
string <- gsub("scprev.female.copulations", "Mother previous copulations", string)
string <- gsub("sd_mother__Intercept", "SD(Mother intercept)", string)
string <- gsub("sd_father__Intercept", "SD(Father intercept)", string)
string <- gsub("sd_year__Intercept", "SD(Year intercept)", string)
string <- gsub(": $","",string) # remove colon at end of string (to remove "Mating:" if no subsequent variable)
string
return(string)
}
- Remove observations with missing values
- Scale and centre continuous predictors to mean of zero and SD=0.5 (see http://www.stat.columbia.edu/~gelman/research/published/priors11.pdf)
# Read clutch data file:
<- read.csv(clutch_file, header = T, stringsAsFactors = T) %>%
full_data mutate(year = as.factor(year),
handrear.mother = as.factor(handrear.mother),
handrear.father = as.factor(handrear.father)) %>%
rename_with(~ gsub("\\.","_", .x))
cat(sprintf("Loaded %d records from %s\n", nrow(full_data), basename(clutch_file)))
Loaded 225 records from Supplemental_Data_S1.csv
# Retain only variables of interest and drop rows with missing values
<- full_data %>% dplyr::select(clutch_fert, mother_age, handrear_mother, father_age,
data %>%
handrear_father, copulations_mates, prev_male_copulations, prev_female_copulations, kin, father, mother, year) ::drop_na()
tidyr
# Scale and centre inputs to mean of zero & SD = 0.5
<- data %>% mutate(
data scmother_age = 0.5*scale(mother_age, center=T, scale=T) ,
scfather_age = 0.5*scale(father_age, center=T, scale=T),
scprev_male_copulations = 0.5*scale(prev_male_copulations, center=T, scale=T),
scprev_female_copulations = 0.5*scale(prev_female_copulations, center=T, scale=T),
sckin = 0.5*scale(kin, center=T, scale=T),
)
# Convert matrix columns to vectors (result of using scale)
<- data %>% mutate(scmother_age = as.vector(scmother_age),
data scfather_age = as.vector(scfather_age),
sckin = as.vector(sckin),
scprev_male_copulations = as.vector(scprev_male_copulations),
scprev_female_copulations = as.vector(scprev_female_copulations))
Show predictor distributions for numeric variables
<- c('mother_age'='Mother age (years)', 'father_age' = 'Father age (years)', 'prev_male_copulations' = 'Father previous copulations', 'prev_female_copulations' = 'Mother previous copulations', 'kin' = 'Mother/father kinship')
facetlabs <- data %>% dplyr::select(mother_age, father_age, prev_male_copulations, prev_female_copulations, kin)
df <- df %>% pivot_longer(everything(), names_to='variable') %>%
dfl mutate(variable = factor(variable, levels=c('mother_age', 'father_age', 'prev_male_copulations', 'prev_female_copulations', 'kin')))
<- brewer.pal(name='Set2',n=3)[1]
fill_col1 <- brewer.pal(name='Set2',n=3)[2]
fill_col2 <- brewer.pal(name='Set2',n=3)[3]
fill_col3 ggplot(dfl, aes(x=value)) +
# geom_histogram(data=subset(dfl, variable %in% c('mother_age', 'father_age', 'prev_male_copulations')), binwidth = 2, closed = 'left') +
geom_histogram(data=subset(dfl, variable %in% c('mother_age', 'father_age')), breaks = seq(0, 50, 2), closed = 'right', fill = fill_col1, colour = 'black' ) +
geom_histogram(data=subset(dfl, variable %in% c('prev_male_copulations')), breaks = seq(0,40,2), closed = 'right', fill = fill_col2, colour = 'black' ) +
geom_histogram(data=subset(dfl, variable %in% c('prev_female_copulations')), breaks = seq(0,20,1), closed = 'right', fill = fill_col2, colour = 'black' ) +
geom_histogram(data=subset(dfl, variable %in% c('kin')), breaks = seq(0, 0.3, 0.01), closed = 'right', fill = fill_col3, colour = 'black' ) +
facet_wrap(~variable, scales='free', labeller=labeller(variable=facetlabs), ncol=2) +
scale_x_continuous(expand = c(0.01,0)) +
labs(y = 'Frequency', x = '') +
guides(fill = 'none')
ggsave("./figure/figure_s2_predictor_distributions.pdf", height = 7, width = 8)
# Random variables:
<- c("father", "mother", "year")
grouping_variables
# Predictors:
<- c(
predictors "scmother_age", "handrear_mother", "scfather_age",
"handrear_father", "copulations_mates", "scprev_male_copulations",
"scprev_female_copulations", "sckin"
)
5.2 Reference model
Fit full reference model
# Formula for model
<- paste(
formula "clutch_fert ~", paste0(predictors, collapse = " + "), " + ",
paste(paste0("(1 | ", grouping_variables, ")"), collapse = " + "),
"+ handrear_mother:handrear_father"
%>%
) as.formula()
# Fit original model with {brms}
<- brm(formula,
fit_original data = data, family = bernoulli(),
prior = set_prior(horseshoe()),
iter = niter,
chains = nchains,
seed = 1111,
control = list(adapt_delta = 0.99)
) fit_original
Family: bernoulli
Links: mu = logit
Formula: clutch_fert ~ scmother_age + handrear_mother + scfather_age + handrear_father + copulations_mates + scprev_male_copulations + scprev_female_copulations + sckin + (1 | father) + (1 | mother) + (1 | year) + handrear_mother:handrear_father
Data: data (Number of observations: 217)
Draws: 4 chains, each with iter = 30000; warmup = 15000; thin = 1;
total post-warmup draws = 60000
Group-Level Effects:
~father (Number of levels: 50)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 1.31 0.47 0.45 2.34 1.00 11905 11570
~mother (Number of levels: 60)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.66 0.43 0.03 1.64 1.00 9430 17488
~year (Number of levels: 16)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.65 0.51 0.03 1.91 1.00 13467 22681
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI
Intercept 0.34 0.50 -0.67 1.33
scmother_age -0.11 0.37 -1.01 0.58
handrear_motherTRUE -0.18 0.42 -1.21 0.52
scfather_age 0.25 0.46 -0.47 1.39
handrear_fatherTRUE -0.54 0.66 -2.10 0.36
copulations_mates2Pcopulations 0.50 0.53 -0.19 1.70
copulations_matesDifferentmales 1.04 0.60 -0.01 2.23
scprev_male_copulations -0.18 0.40 -1.20 0.46
scprev_female_copulations -0.24 0.41 -1.26 0.37
sckin -0.21 0.34 -1.03 0.30
handrear_motherTRUE:handrear_fatherTRUE -0.25 0.58 -1.69 0.73
Rhat Bulk_ESS Tail_ESS
Intercept 1.00 26170 34085
scmother_age 1.00 54597 52449
handrear_motherTRUE 1.00 47172 54689
scfather_age 1.00 41212 47335
handrear_fatherTRUE 1.00 29749 48986
copulations_mates2Pcopulations 1.00 24999 48199
copulations_matesDifferentmales 1.00 22274 14814
scprev_male_copulations 1.00 35644 47661
scprev_female_copulations 1.00 40404 51617
sckin 1.00 33317 45872
handrear_motherTRUE:handrear_fatherTRUE 1.00 51141 53152
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
5.3 Restricted variable selection
# Set up variable selection
## run variable selection on the unconstrained latent space
<- brms:::get_refmodel.brmsfit(fit_original, seed = 1111)
ref_original <- colMeans(posterior_linpred(fit_original, transform = FALSE))
f_latent ".y"]] <- f_latent
data[[<- rep(1, 2*niter)
dis_latent
# Get reference model
<- init_refmodel(fit_original,
ref data = data, formula = formula, family = gaussian(),
ref_predfun = ref_predfun, div_minimizer = projpred:::linear_multilevel_mle,
proj_predfun = projpred:::linear_multilevel_proj_predfun, dis = dis_latent,
extract_model_data = extract_model_data, cvfun = ref_original$cvfun,
seed = 1111
)
Constrain the variable selection to select the fixed terms first, then the random terms.
# Order variables for selection: first fixed terms, then random terms
<- c(
search_terms "1",
"handrear_mother + handrear_father + handrear_mother:handrear_father",
"scmother_age",
"handrear_mother",
"scfather_age",
"handrear_father",
"copulations_mates",
"scprev_male_copulations",
"scprev_female_copulations",
"sckin",
paste(
"scmother_age + handrear_mother + scfather_age + handrear_father +",
"copulations_mates + scprev_male_copulations + scprev_female_copulations + sckin +",
"handrear_mother:handrear_father + (1 | father)"
),paste(
"scmother_age + handrear_mother + scfather_age + handrear_father +",
"copulations_mates + scprev_male_copulations + scprev_female_copulations + sckin +",
"handrear_mother:handrear_father + (1 | mother)"
),paste(
"scmother_age + handrear_mother + scfather_age + handrear_father +",
"copulations_mates + scprev_male_copulations + scprev_female_copulations + sckin +",
"handrear_mother:handrear_father + (1 | year)"
) )
<- varsel(ref, search_terms = search_terms, seed=1111) vs_restricted
[1] "10% of terms selected."
[1] "20% of terms selected."
[1] "30% of terms selected."
[1] "40% of terms selected."
[1] "50% of terms selected."
[1] "60% of terms selected."
[1] "70% of terms selected."
[1] "80% of terms selected."
[1] "90% of terms selected."
[1] "100% of terms selected."
summary(vs_restricted, stats = c('elpd'), type = c('mean', 'se', 'lower', 'upper', 'diff', 'diff_se'), deltas = T)
Family: gaussian
Link function: identity
Formula: clutch_fert ~ scmother_age + handrear_mother + scfather_age +
handrear_father + copulations_mates + scprev_male_copulations +
scprev_female_copulations + sckin + (1 | father) + (1 | mother) +
(1 | year) + handrear_mother:handrear_father
<environment: 0x10e7435f0>
Observations: 217
Search method: forward, maximum number of terms 12
Draws used for selection: 20, in 20 clusters
Draws used for prediction: 400
Suggested Projection Size: 11
Selection Summary:
size solution_terms elpd se lower upper
0 <NA> -101.2 3.4 -104.6 -97.8
1 handrear_father -79.3 2.9 -82.2 -76.4
2 copulations_mates -65.6 2.3 -67.9 -63.3
3 handrear_mother -61.4 2.4 -63.8 -59.0
4 scfather_age -59.6 2.5 -62.0 -57.1
5 scprev_female_copulations -57.7 2.6 -60.3 -55.1
6 handrear_father:handrear_mother -57.4 2.6 -60.0 -54.9
7 scmother_age -57.3 2.5 -59.8 -54.8
8 scprev_male_copulations -57.2 2.5 -59.7 -54.7
9 sckin -57.1 2.5 -59.7 -54.6
10 (1 | father) -5.3 0.5 -5.9 -4.8
11 (1 | mother) -0.1 0.4 -0.5 0.3
12 (1 | year) 0.0 0.3 -0.3 0.3
<- vs_restricted
vs_binom
# Project onto latent space and then back to binomial: get better results than if just projecting original full model.
<- ref_original$family$ll_fun
ll_fun <- ref_original$family$linkinv
linkinv for (k in seq_along(vs_restricted$summaries$sub)) {
$summaries$sub[[k]]$mu <- linkinv(vs_restricted$summaries$sub[[k]]$mu)
vs_binom$summaries$sub[[k]]$draws <-
vs_binomlinkinv(vs_restricted$summaries$sub[[k]]$draws)
<- ll_fun(
lppd mu=vs_binom$summaries$sub[[k]]$draws,
dis=NULL, y=as.numeric(as.matrix(data[, "clutch_fert"])),
weights = ref$wobs
)$summaries$sub[[k]]$lppd <- apply(
vs_binom1, projpred:::log_weighted_mean_exp, rep(1 / NCOL(lppd), NCOL(lppd))
lppd,
)
}
# Transform back to binomial space:
$summaries$ref$mu <- linkinv(vs_restricted$summaries$ref$mu)
vs_binom$summaries$ref$draws <- linkinv(vs_restricted$summaries$ref$draws)
vs_binom<- ll_fun(
lppd $summaries$ref$draws,
vs_binomNULL, as.numeric(as.matrix(data[, "clutch_fert"])),
weights = ref$wobs
)$summaries$ref$lppd <- apply(
vs_binom1, projpred:::log_weighted_mean_exp, ref$wsample
lppd, )
## Plot variable selection results
<- function(string){
elpd_replace <- paste(toupper(string), " difference vs reference model")
string
}plot(vs_binom, stats = c('elpd'), deltas=T, alpha=0.32) +
scale_x_continuous(breaks = c(0, seq_along(vs_restricted$solution_terms)),
labels = c("Intercept", fn_labels(vs_restricted$solution_terms))) +
scale_y_continuous(expand = c(0,0)) +
expand_limits(y=c(-60,1)) +
facet_wrap(statistic ~ ., strip.position = "left", labeller = labeller(statistic=elpd_replace)) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
axis.ticks = element_line(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
strip.text = element_blank(), # no facet text
strip.placement = "outside",
strip.background = element_blank(),
panel.border = element_rect(fill = NA, colour = 'black')) +
labs(x='Terms in the submodel', y = 'ELPD difference from the reference model')
ggsave(file.path(outdir,"figure_1_varsel.pdf"), height = 6, width = 8)
# Makek table of variablel selection results
<- summary(vs_binom, type=c('mean', 'se', 'diff', 'lower', 'upper'), stats=c('elpd'), deltas=T)
d
# Add contribution of difference to ELPD for each term, from the previous term:
<- d$selection$elpd[nrow(d$selection)] - d$selection$elpd[1]
tot_elpd <- d$selection %>% mutate(elpd_diff = elpd - lag(elpd),
dsel elpd_diff_pc = 100 * elpd_diff / tot_elpd)
format(dsel, digits=2)
size solution_terms elpd se lower upper elpd_diff
1 0 NA -52.00 4.97 -56.95 -47.1 NA
2 1 handrear_father -44.45 5.13 -49.55 -39.3 7.5529
3 2 copulations_mates -38.98 4.77 -43.73 -34.2 5.4651
4 3 handrear_mother -37.76 4.60 -42.33 -33.2 1.2217
5 4 scfather_age -37.52 4.54 -42.03 -33.0 0.2401
6 5 scprev_female_copulations -36.42 4.49 -40.88 -32.0 1.1045
7 6 handrear_father:handrear_mother -36.41 4.50 -40.89 -31.9 0.0035
8 7 scmother_age -36.37 4.45 -40.80 -31.9 0.0427
9 8 scprev_male_copulations -36.35 4.41 -40.74 -32.0 0.0193
10 9 sckin -36.20 4.40 -40.58 -31.8 0.1503
11 10 (1 | father) -10.46 1.56 -12.01 -8.9 25.7451
12 11 (1 | mother) -4.30 0.92 -5.22 -3.4 6.1542
13 12 (1 | year) -0.16 0.27 -0.43 0.1 4.1392
elpd_diff_pc
1 NA
2 14.5701
3 10.5425
4 2.3568
5 0.4631
6 2.1307
7 0.0068
8 0.0823
9 0.0373
10 0.2900
11 49.6639
12 11.8718
13 7.9848
# Random term contributions to ELPD:
cat(sprintf("Contribution of random model terms to ELPD difference = %.1f%%", dsel %>% filter(grepl("|", solution_terms, fixed=T)) %>% summarise(tot_elpd_diff_pc = sum(elpd_diff_pc)) %>% as.numeric()))
Contribution of random model terms to ELPD difference = 69.5%
5.3.1 Projection - full model
Projected marginals of the full model
# Set the solution terms - all terms
if (suggest_size(vs_binom) > length(vs_binom$solution_terms)){
<- vs_binom$solution_terms[seq_len(suggest_size(vs_binom)-1)]
solution_terms else {
} <- vs_binom$solution_terms[seq_len(suggest_size(vs_binom))]
solution_terms
}<- projpred::project(vs_restricted, solution_terms = solution_terms, seed=1111)
projection
#Project onto latent space and then back to binomial: get better results than if just projecting original full model.
<- ref_original$family$ll_fun
ll_fun <- ref_original$family$linkinv
linkinv for (k in seq_along(projection$summaries$sub)) {
$summaries$sub[[k]]$mu <- linkinv(projection$summaries$sub[[k]]$mu)
projection$summaries$sub[[k]]$draws <-
projectionlinkinv(projection$summaries$sub[[k]]$draws)
<- ll_fun(
lppd $summaries$sub[[k]]$draws,
projectionNULL, as.numeric(as.matrix(data[, "clutch_fert"])),
weights = ref$wobs
)$summaries$sub[[k]]$lppd <- apply(
projection1, projpred:::log_weighted_mean_exp, rep(1 / NCOL(lppd), NCOL(lppd))
lppd,
)
}
# Project back to binomial space
$summaries$ref$mu <- linkinv(projection$summaries$ref$mu)
projection$summaries$ref$draws <- vs_binom$summaries$ref$draws
projection<- ll_fun(
lppd $summaries$ref$draws,
projectionNULL, as.numeric(as.matrix(data[, "clutch_fert"])),
weights = ref$wobs
)$summaries$ref$lppd <- apply(
projection1, projpred:::log_weighted_mean_exp, ref$wsample
lppd, )
R^2 for original model:
# Compare with brms::bayes_R2 function
bayes_R2(fit_original)
Estimate Est.Error Q2.5 Q97.5
R2 0.3124318 0.07100593 0.1709913 0.4513358
R^2 for full projection:
# Modified function to calculate R2 for projected model
# Modified from https://avehtari.github.io/bayes_R2/bayes_R2.html#1_Introduction
<- function(proj) {
bayes_R2_res <- get_y(proj$refmodel$fit)
y # Use proj_linpred instead of rstanarm::posterior_epred
# use $pred to get values in response space
<- proj_linpred(proj, transform = T)$pred
ypred_latent # Apply inverse link function to output of proj_linpred to convert from latent function
<- ref_original$family$linkinv(ypred_latent)
ypred
if (proj$refmodel$fit$family$family == "binomial" && NCOL(y) == 2) {
<- rowSums(y)
trials <- y[, 1]
y <- ypred %*% diag(trials)
ypred
}<- -1 * sweep(ypred, 2, y)
e <- apply(ypred, 1, var)
var_ypred <- apply(e, 1, var)
var_e <- var_ypred / (var_ypred + var_e)
r2 %>% as.data.frame(.) %>%
r2 summarise(Mean = mean(r2), SD = sd(r2), Q2.5 = quantile(r2, 0.025), Q97.5 = quantile(r2, 0.975))
}<- bayes_R2_res(projection)) (R2_res
Mean SD Q2.5 Q97.5
1 0.3114775 0.07144011 0.1651088 0.4516667
ROPE plot and posterior statistics. Use rope_range
to specify range of ROPE, which is -0.1813799, 0.1813799 for logistic regression.
# Plot ROPE
<- as.data.frame(as.matrix(projection)) %>%
mp select(!starts_with("r_"), -sigma) # don't plot random factors or sigma
# Add median and HDI
<- pivot_longer(mp %>% select(-b_Intercept), cols=everything(), names_to='variable')
gmp
# Retain order of variables for plot:
$variable <- fct_relevel(gmp$variable, names(mp))
gmp$y <- gmp$variable
gmp<- plot(rope(mp,ci=ropeci, range=rope_range(fit_original))) +
(p stat_pointinterval(data=gmp, aes(y=factor(variable, levels=names(mp)), x=value, height=NULL,fill=NULL), point_interval=median_hdi, .width=c(0.5, 0.95), normalize='xy',point_size=3, point_colour='black', interval_colour='blue', shape=21, point_fill='yellow', alpha=0.5) +
scale_y_discrete(labels=fn_labels, expand = expansion(add = c(-0,1.2))) +
scale_x_continuous(labels = function(x)x/2) + # to convert posterior scale from 0.5 SD to SD
labs(title='', y = 'Variable', x = "Possible values (SD)") +
guides(fill = 'none')
)
ggsave(file.path(outdir, "figure_2_posterior_full.pdf"), height = 6, width = 8)
# Posterior statistics
names(mp) <- fn_labels(names(mp))
describe_posterior(mp, ci_method='hdi', ci=0.95, rope_ci=ropeci, rope_range=rope_range(fit_original), centrality = "median",test = c("p_direction", "p_significance", "rope", 'equivalence_test'))
Summary of Posterior Distribution
Parameter | Median | 95% CI | pd | ps | ROPE | % in ROPE | Equivalence (ROPE)
-----------------------------------------------------------------------------------------------------------------------------------
Intercept | 0.43 | [-0.44, 1.25] | 85.25% | 0.73 | [-0.18, 0.18] | 19.25% | Undecided
Father HR | -0.76 | [-2.00, 0.19] | 94.00% | 0.87 | [-0.18, 0.18] | 10.75% | Undecided
Mother copulations: 1 male, >1 copulation | 0.35 | [-0.26, 1.64] | 81.75% | 0.61 | [-0.18, 0.18] | 35.25% | Undecided
Mother copulations: Different males | 1.11 | [-0.03, 2.05] | 96.25% | 0.90 | [-0.18, 0.18] | 9.50% | Undecided
Mother HR | -0.16 | [-1.24, 0.56] | 71.00% | 0.48 | [-0.18, 0.18] | 39.00% | Undecided
Father age | 0.11 | [-0.62, 1.16] | 64.75% | 0.43 | [-0.18, 0.18] | 43.75% | Undecided
Mother previous copulations | -0.12 | [-1.06, 0.48] | 72.50% | 0.43 | [-0.18, 0.18] | 50.00% | Undecided
Mother age | -0.06 | [-0.98, 0.45] | 64.75% | 0.34 | [-0.18, 0.18] | 51.25% | Undecided
Father previous copulations | -0.04 | [-1.00, 0.58] | 63.00% | 0.34 | [-0.18, 0.18] | 55.00% | Undecided
Mother/father kinship | -0.12 | [-1.01, 0.31] | 79.25% | 0.45 | [-0.18, 0.18] | 50.00% | Undecided
Father HR:Mother HR | -0.06 | [-1.48, 0.75] | 64.00% | 0.36 | [-0.18, 0.18] | 51.75% | Undecided
SD(Mother intercept) | 0.53 | [ 0.00, 1.17] | 100% | 0.85 | [-0.18, 0.18] | 14.75% | Undecided
SD(Father intercept) | 0.85 | [ 0.32, 1.67] | 100% | 0.99 | [-0.18, 0.18] | 1.00% | Rejected
SD(Year intercept) | 0.41 | [ 0.00, 1.00] | 100% | 0.83 | [-0.18, 0.18] | 17.25% | Undecided
5.3.2 Projection - reduced model
Project only the copulations
and hand-rearing
fixed predictors, since these contribute the most to the variance and have non-zero posteriors in the full model, plus the random terms
# Set solution terms: best two fixed terms and all random terms
<- c(vs_binom$solution_terms[1:2],
solution_terms $solution_terms[10:12])
vs_binom<- projpred::project(vs_restricted, solution_terms = solution_terms, seed=1111)
projection
# Project onto latent space and then back to binomial: get better results than if just projecting original full model.
<- ref_original$family$ll_fun
ll_fun <- ref_original$family$linkinv
linkinv for (k in seq_along(projection$summaries$sub)) {
$summaries$sub[[k]]$mu <- linkinv(projection$summaries$sub[[k]]$mu)
projection$summaries$sub[[k]]$draws <-
projectionlinkinv(projection$summaries$sub[[k]]$draws)
<- ll_fun(
lppd $summaries$sub[[k]]$draws,
projectionNULL, as.numeric(as.matrix(data[, "clutch_fert"])),
weights = ref$wobs
)$summaries$sub[[k]]$lppd <- apply(
projection1, projpred:::log_weighted_mean_exp, rep(1 / NCOL(lppd), NCOL(lppd))
lppd,
)
}
# Project back to binomial
$summaries$ref$mu <- linkinv(projection$summaries$ref$mu)
projection$summaries$ref$draws <- vs_binom$summaries$ref$draws
projection<- ll_fun(
lppd $summaries$ref$draws,
projectionNULL, as.numeric(as.matrix(data[, "clutch_fert"])),
weights = ref$wobs
)$summaries$ref$lppd <- apply(
projection1, projpred:::log_weighted_mean_exp, ref$wsample
lppd, )
R2 of projected reduced model:
<- bayes_R2_res(projection)) (R2_res
Mean SD Q2.5 Q97.5
1 0.3065322 0.07057051 0.1618977 0.4478909
ROPE plot and posterior statistics:
# Plot ROPE
<- as.data.frame(as.matrix(projection))%>%
mp select(!starts_with("r_"), -sigma) # don't plot random factors or sigma
# Add median and HDI
<- pivot_longer(mp %>% select(-b_Intercept), cols=everything(), names_to='variable')
gmp
# Retain order of variables:
$variable <- fct_relevel(gmp$variable, names(mp))
gmp$y <- gmp$variable
gmpplot(rope(mp,ci=ropeci, range=rope_range(fit_original))) +
stat_pointinterval(data=gmp, aes(y=factor(variable, levels=names(mp)), x=value, height=NULL,fill=NULL), point_interval=median_hdi, .width=c(0.5, 0.95), normalize='xy',point_size=3, point_colour='black', interval_colour='blue', shape=21,point_fill='yellow', alpha=0.5) +
scale_y_discrete(labels=fn_labels, expand = expansion(add = c(-0,1.2))) +
scale_x_continuous(labels = function(x)x/2) + # to convert posterior scale from 0.5 SD to SD
labs(title='', y = 'Variable', x = "Possible values (SD)") +
guides(fill='none')
ggsave(file.path(outdir, "figure_3_posterior_reduced.pdf"), height = 6, width = 8)
# Posterior stats:
names(mp) <- fn_labels(names(mp))
describe_posterior(mp, ci_method='hdi', ci=0.95, rope_ci=ropeci, rope_range=rope_range(fit_original), centrality = "median",test = c("p_direction", "p_significance", "rope", 'equivalence_test'))
Summary of Posterior Distribution
Parameter | Median | 95% CI | pd | ps | ROPE | % in ROPE | Equivalence (ROPE)
-----------------------------------------------------------------------------------------------------------------------------------
Intercept | 0.58 | [-0.22, 1.51] | 92.25% | 0.84 | [-0.18, 0.18] | 13.50% | Undecided
Father HR | -1.03 | [-2.08, 0.16] | 97.50% | 0.93 | [-0.18, 0.18] | 6.50% | Undecided
Mother copulations: 1 male, >1 copulation | 0.33 | [-0.24, 1.56] | 81.00% | 0.60 | [-0.18, 0.18] | 35.75% | Undecided
Mother copulations: Different males | 1.14 | [-0.03, 2.06] | 96.75% | 0.92 | [-0.18, 0.18] | 8.50% | Undecided
SD(Mother intercept) | 0.65 | [ 0.05, 1.36] | 100% | 0.95 | [-0.18, 0.18] | 5.25% | Undecided
SD(Father intercept) | 1.27 | [ 0.30, 2.18] | 100% | 1.00 | [-0.18, 0.18] | 0.25% | Rejected
SD(Year intercept) | 0.63 | [ 0.00, 1.61] | 98.75% | 0.92 | [-0.18, 0.18] | 8.25% | Undecided
5.3.2.1 Marginal means
Evaluate interaction plot of estimated marginal means from the projected posterior for hand-rearing and number of copulations combined:
# Create projection dataframe
<- as.data.frame(as.matrix(projection)) %>% select(starts_with("b_")) %>% rename_with(.fn=function(x) gsub("b_","",x), .cols=everything()) %>% as.matrix(.)
mpb
# Grid for copulations_mates
<- qdrg(~ copulations_mates + handrear_father , data=fit_original$data, mcmc=mpb[, c('Intercept','copulations_mates2+ copulations', 'copulations_matesDifferent males', 'handrear_fatherTRUE')], link='logit')
grd_hr2
# Plot without data:
<- emmip(grd_hr2, formula("~copulations_mates + handrear_father"), type='response', plotit=F) %>%
proj.int mutate(father_rear= case_when(handrear_father=='TRUE' ~ 'Hand-reared',
=='FALSE' ~ 'Wild-reared'))
handrear_father<- ggplot() +
ghr_mate geom_pointrange(data=proj.int, aes(x=copulations_mates, y=yvar, ymin=LCL, ymax=UCL, colour=father_rear, shape = father_rear), position=position_dodge(width=0.1)) +
labs(y='Probability of clutch fertility', x='', colour='Father rearing', shape = 'Father rearing') +
scale_x_discrete(labels=fn_labels) +
scale_colour_brewer(palette = 'Set2', direction = -1) # colour-blind friendly
ggsave(file.path(outdir, "figure_4_predict.pdf"), ghr_mate, height = 6, width = 8)
# Plot with data:
+
ghr_mate geom_jitter(data=fit_original$data %>%
mutate(father_rear= case_when(
=='TRUE' ~ 'Hand-reared',
handrear_father=='FALSE' ~ 'Wild-reared')),
handrear_fatheraes(x=copulations_mates, y=clutch_fert, colour=father_rear, shape = father_rear), alpha=0.5, width=0.1, height=0.05)
ggsave(file.path(outdir, "figure_4_predict_data.pdf"), height = 6, width = 8)
# Results table:
%>% rename(clutch.fertility = yvar) %>%
proj.int mutate(father_rear= case_when(handrear_father=='TRUE' ~ 'Hand-reared',
=='FALSE' ~ 'Wild-reared')) %>%
handrear_fatherselect(copulations_mates, father_rear, clutch.fertility, LCL, UCL) %>%
format(., digits=3)
copulations_mates father_rear clutch.fertility LCL UCL
1 1 copulation Wild-reared 0.641 0.468 0.837
2 2+ copulations Wild-reared 0.718 0.524 0.924
3 Different males Wild-reared 0.842 0.666 0.972
4 1 copulation Hand-reared 0.385 0.154 0.732
5 2+ copulations Hand-reared 0.504 0.202 0.833
6 Different males Hand-reared 0.658 0.348 0.932
6 Multiple copulation and kakapo density
# Read number of kakapo per island per year
<- read.csv(kakapo_demog_file, header = T)
nk
# Number of copulations/males per island per year per number of matings/mates
<- table(full_data$Island, full_data$year, full_data$copulations_mates)
tc <- data.frame(tc) %>% rename(Island = Var1, year = Var2, copulations = Var3) %>% mutate(year = as.numeric(as.character(year)))
dtc
# Total clutches per island:
<- dtc %>% group_by(year, Island) %>% summarise(totclutch=sum(Freq))
dtc_tot <- left_join(dtc, dtc_tot, by=c('year', 'Island'))
dtc
# Combine dataframes: number of kakapo per island and copulations proportions
<- left_join(dtc, nk %>% filter(ageClass=='Adult'), by=c('year', 'Island'), suffix = c('.cop', '.kak')) %>%
nk2 mutate(prop.kak = Freq.cop / Freq.kak,
prop.clutch = Freq.cop / totclutch) %>%
filter(!is.na(Sex) & !is.na(Freq.kak) & Freq.kak > 0)
Combine repeated copulations with the same male and copulations with different males to compare single vs multiple copulations.
Only consider Whenua Hou from 1990 onwards.
<- nk2 %>% mutate(copulations_comb = fct_collapse(copulations,
nk2 single = "1 copulation",
multiple = c("2+ copulations", "Different males")))
# Totals:
<- nk2 %>%
nkg group_by(Island, year, Sex, copulations_comb) %>%
summarise(Freq.cop=sum(Freq.cop, na.rm=T), Freq.kak = mean(Freq.kak, na.rm = T), totclutch = mean(totclutch, na.rm = T)) %>%
mutate(prop.clutch = Freq.cop / totclutch)
# Only Whenua Hou from 1990:
<- nkg %>% filter(Island=='Whenua Hou' & year>=1990 & copulations_comb == 'multiple') nkgsub
6.1 Number of copulations
Show table number of copulations vs number of mates
table(`Number of mates` = full_data$number_of_males, `Number of copulations` = full_data$number_of_copulations)
Number of copulations
Number of mates 1 2 3 4
1 105 46 4 0
2 0 45 14 2
3 0 0 1 1
6.2 Number of kakapo
Multiple copulation proportion vs number of males and females
# Calculate correlation:
<- nkgsub %>% ungroup(.) %>% dplyr::select(Sex, Freq.kak, prop.clutch) %>% group_by( Sex) %>% correlation()) (cor
# Correlation Matrix (pearson-method)
Group | Parameter1 | Parameter2 | r | 95% CI | t(8) | p
---------------------------------------------------------------------------
Female | Freq.kak | prop.clutch | 0.93 | [ 0.74, 0.98] | 7.44 | < .001***
Male | Freq.kak | prop.clutch | 0.61 | [-0.02, 0.90] | 2.20 | 0.059
p-value adjustment method: Holm (1979)
Observations: 10
$p cor
[1] 7.319028e-05 5.875909e-02
# Base plot:
<- brewer.pal(name='Set1',n=3)[2]
col <- ggplot(nkgsub %>% filter(!is.na(prop.clutch)), aes(x= Freq.kak, y = prop.clutch)) +
gwh1990_comb geom_point(size=2, colour=col) +
geom_smooth(aes(shape=NULL), fill=col, method=lm, alpha=0.2) +
labs(y = "Proportion of clutches with multiple copulations", x = 'Number of adult kakapo') +
facet_wrap(~Sex, scales='free_x')
6.3 Sex ratio
Multiple copulation proportion vs sex ratio:
# Calculate sex ratio
<- nkgsub %>%
srg pivot_wider(id_cols = c(Island, year, copulations_comb, Freq.cop, totclutch, prop.clutch), names_from = 'Sex', values_from="Freq.kak") %>%
mutate(sex.ratio = Female/Male)
# Calculation correlation:
<- srg %>% ungroup(.) %>% dplyr::select(sex.ratio, prop.clutch) %>% correlation(bayesian=F)) (corsex
# Correlation Matrix (pearson-method)
Parameter1 | Parameter2 | r | 95% CI | t(8) | p
-----------------------------------------------------------------
sex.ratio | prop.clutch | 0.92 | [0.71, 0.98] | 6.88 | < .001***
p-value adjustment method: Holm (1979)
Observations: 10
$p corsex
[1] 0.0001274216
<- brewer.pal(name='Set1',n=3)[1]
col # Plot of multiple copulation frequency vs sex ratio
<- ggplot(srg %>% filter(copulations_comb=='multiple'), aes(x= sex.ratio, y=prop.clutch)) +
gswh1990comb geom_point(size=2, colour=col) + geom_smooth(aes(shape=NULL), method=lm, alpha=0.2, colour=col, fill=col) +
labs(y = "Prop. of clutches with multiple copulations", x = 'Female:male sex ratio')
6.4 Number and sex ratio combined
Combine number of male/female and sex ratio plots:
# Separate panels for females, males and sex ratio
# Multiple copulation frequency vs number of females
<- brewer.pal(name='Set2',n=3)[1] # colour-blind friendly
col <- ggplot(nkgsub %>% filter(!is.na(prop.clutch) & Sex == 'Female'), aes(x= Freq.kak, y = prop.clutch)) +
gwh1990_female geom_point(size=2, colour=col) +
geom_smooth(aes(shape=NULL), colour = col, fill=col, method=lm, alpha=0.2) +
labs(y = "Proportion of clutches", x = 'Number of females') +
coord_cartesian(ylim = c(0,1)) +
expand_limits(x=40)
# Multiple copulation frequency vs number of males
<- brewer.pal(name='Set2',n=3)[2]
col <- ggplot(nkgsub %>% filter(!is.na(prop.clutch) & Sex == 'Male'), aes(x= Freq.kak, y = prop.clutch)) +
gwh1990_male geom_point(size=2, colour=col) +
geom_smooth(aes(shape=NULL), fill=col, method=lm, alpha=0.2, colour=col, fill=col) +
labs(y = "", x = 'Number of males') +
coord_cartesian(ylim = c(0,1)) +
expand_limits(x=c(15,30))
# Multiple copulation frequency vs sex ratio
<- brewer.pal(name='Set2',n=3)[3]
col <- ggplot(srg %>% filter(copulations_comb=='multiple'), aes(x= sex.ratio, y=prop.clutch)) +
gwh1990_sexratio geom_point(size=2, colour=col) +
geom_smooth(aes(shape=NULL), method=lm, alpha=0.2, colour=col, fill=col) +
labs(y = "", x = 'Female:male sex ratio') +
coord_cartesian(ylim = c(0,1)) +
expand_limits(x=0.4)
ggarrange(gwh1990_female,
gwh1990_male,
gwh1990_sexratio,common.legend = T ,
labels = c("A", "B", "C"),
label.x = 0.25,
label.y = 0.98,
ncol = 3,
nrow = 1,
widths=c(1,1,1))
# For publication:
ggsave(filename = file.path(outdir, 'figure_5_copulation_sex_ratio.pdf'), plot=last_plot(), device = 'pdf', width=10, height=6 )
6.5 Change over time
Show the change in Whenua Hou population and sex ratio over time (note that these data include some years in which there was no breeding):
# Total population
ggplot(srg %>% filter(!is.na(sex.ratio)) , aes(x = year, y = Female + Male)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = seq(min(srg$year), max(srg$year), 5), minor_breaks = c(min(srg$year): max(srg$year))) +
scale_y_continuous(breaks = seq(0,1,0.2)) +
labs(y = 'Number of adult kakapo')
# Male and female population
ggplot(srg %>% filter(!is.na(sex.ratio) & !is.nan(prop.clutch)) %>% pivot_longer(cols = Female:Male, names_to = 'variable') , aes(x = year, y = value, colour = variable)) +
geom_point() +
geom_line() +
facet_wrap(~variable, scales = 'fixed') +
guides(colour = 'none') +
scale_x_continuous(breaks = seq(min(srg$year), max(srg$year), 5), minor_breaks = c(min(srg$year): max(srg$year))) +
labs(y = 'Number of adult kakapo')
# Sex ratio
ggplot(srg %>% filter(!is.na(sex.ratio)& !is.nan(prop.clutch)) , aes(x = year, y = sex.ratio)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = seq(min(srg$year), max(srg$year), 5), minor_breaks = c(min(srg$year): max(srg$year))) +
labs(y = 'F:M adult sex ratio')
Change in multiple copulation rate over time:
# Sex ratio
ggplot(srg %>% filter(!is.na(sex.ratio)& !is.nan(prop.clutch)) , aes(x = year, y = prop.clutch)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = seq(min(srg$year), max(srg$year), 5), minor_breaks = c(min(srg$year): max(srg$year))) +
scale_y_continuous(breaks = seq(0,1,0.2)) +
labs(y = 'Proportion of multiple copulations')
7 Session information
sessionInfo()
R version 4.1.2 (2021-11-01)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Monterey 12.6.2
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggpubr_0.4.0 RColorBrewer_1.1-3 correlation_0.8.0
[4] emmeans_1.7.3 loo_2.5.1 brms_2.16.3
[7] Rcpp_1.0.9 doRNG_1.8.2 rngtools_1.5.2
[10] doFuture_0.12.1 future_1.24.0 foreach_1.5.2
[13] ggdist_3.1.1 see_0.6.9 bayestestR_0.11.5
[16] bayesplot_1.9.0 lme4_1.1-29 Matrix_1.4-1
[19] rstan_2.21.5 StanHeaders_2.21.0-7 forcats_0.5.1
[22] stringr_1.4.0 dplyr_1.0.9 purrr_0.3.4
[25] readr_2.1.1 tidyr_1.1.4 tibble_3.1.7
[28] ggplot2_3.3.6 tidyverse_1.3.2 projpred_2.0.5.9000
loaded via a namespace (and not attached):
[1] readxl_1.3.1 backports_1.4.1 plyr_1.8.7
[4] igraph_1.2.11 splines_4.1.2 crosstalk_1.2.0
[7] listenv_0.8.0 optimx_2021-10.12 rstantools_2.2.0
[10] inline_0.3.19 digest_0.6.29 htmltools_0.5.2
[13] fansi_1.0.3 magrittr_2.0.3 checkmate_2.1.0
[16] googlesheets4_1.0.0 tzdb_0.2.0 globals_0.14.0
[19] modelr_0.1.8 RcppParallel_5.1.5 matrixStats_0.62.0
[22] xts_0.12.1 rmdformats_1.0.3 prettyunits_1.1.1
[25] colorspace_2.0-3 rvest_1.0.2 haven_2.4.3
[28] xfun_0.28 callr_3.7.0 crayon_1.5.1
[31] jsonlite_1.8.0 zoo_1.8-9 iterators_1.0.14
[34] glue_1.6.2 gtable_0.3.0 gargle_1.2.0
[37] distributional_0.3.0 car_3.0-12 pkgbuild_1.3.0
[40] abind_1.4-5 scales_1.2.0 mvtnorm_1.1-3
[43] DBI_1.1.1 rstatix_0.7.0 miniUI_0.1.1.1
[46] xtable_1.8-4 HDInterval_0.2.2 DT_0.20
[49] stats4_4.1.2 datawizard_0.4.0 htmlwidgets_1.5.4
[52] httr_1.4.2 threejs_0.3.3 posterior_1.2.1
[55] ellipsis_0.3.2 pkgconfig_2.0.3 farver_2.1.0
[58] sass_0.4.0 dbplyr_2.1.1 utf8_1.2.2
[61] labeling_0.4.2 reshape2_1.4.4 tidyselect_1.1.2
[64] rlang_1.0.2 later_1.3.0 munsell_0.5.0
[67] cellranger_1.1.0 tools_4.1.2 cli_3.3.0
[70] generics_0.1.2 broom_0.7.10 ggridges_0.5.3
[73] evaluate_0.15 fastmap_1.1.0 yaml_2.3.5
[76] processx_3.5.3 knitr_1.36 fs_1.5.2
[79] nlme_3.1-153 mime_0.12 xml2_1.3.3
[82] shinythemes_1.2.0 compiler_4.1.2 rstudioapi_0.13
[85] gamm4_0.2-6 ggsignif_0.6.3 reprex_2.0.1
[88] bslib_0.3.1 stringi_1.7.6 parameters_0.17.0
[91] highr_0.9 ps_1.7.0 Brobdingnag_1.2-8
[94] lattice_0.20-45 markdown_1.1 nloptr_2.0.0
[97] shinyjs_2.1.0 tensorA_0.36.2 vctrs_0.4.1
[100] pillar_1.7.0 lifecycle_1.0.1 jquerylib_0.1.4
[103] bridgesampling_1.1-2 estimability_1.3 cowplot_1.1.1
[106] insight_0.17.0 httpuv_1.6.5 R6_2.5.1
[109] bookdown_0.24 promises_1.2.0.1 gridExtra_2.3
[112] parallelly_1.30.0 codetools_0.2-18 gtools_3.9.2
[115] colourpicker_1.1.1 boot_1.3-28 MASS_7.3-54
[118] assertthat_0.2.1 withr_2.5.0 shinystan_2.6.0
[121] mgcv_1.8-38 parallel_4.1.2 hms_1.1.1
[124] grid_4.1.2 coda_0.19-4 minqa_1.2.4
[127] rmarkdown_2.11 carData_3.0-5 googledrive_2.0.0
[130] numDeriv_2016.8-1.1 shiny_1.7.1 lubridate_1.8.0
[133] base64enc_0.1-3 dygraphs_1.1.1.6