--- title: Supplementary information for the paper 'Individual behavioral type captured by a Bayesian model comparison in cap making by sponge crab' --- This document is the supplementary information for the paper 'Individual behavioral type captured by a Bayesian framework in cap making by sponge crab' by Harada and Kagaya. The basic conceptural framework for data analysis in the paper is a Bayesian framework based on the mathematical theory founded by Dr. Akaike and Dr. Watanabe (Watanabe, 'Mathematical Theory of Bayesian Statistics', 2018). The author of this document wrote an example of model selection by Widely Applicable Information Criterion (WAIC) using the data of 'artificial case' (http://rpubs.com/katzkagaya/460937). It may be of help for clarifying what the analysis of this document is doing, by using numerically generated 'artificial' data and several relatively simple hierarchical models. In this study, we deal with the behavioral data of a 'natural case'. Therefore, we do not know the true distribution generating the data. Here, we applied 26 statistical models in total for each behavioral aspect of the crab and compare the performances of models in terms of predictatility by WAIC. The models are categorized into four observed variables as random response variables. 1. size of chosen sponge --- 6 models 2. trimmed area of the sponge which is not used for making cap --- 8 models 3. cavity area size excavated for the cap --- 6 models 4. time for making the cap --- 6 models We construct generalized linear models for these random variables as response variables. Additionally, a key feature of the dataset is that in several cases we repeatedly observed the data for each crab, because we are interested in the 'individual behavioral type' of each crab. Therefore, we parametrize this point as hierarchical structures into the models. After building up each predictive distributions, we will compare the appropriateness of each model using WAIC including the hierarchical model. The best performed models for the four behaviours is described in the Materials and Methods section in the main text. We here walk through all of the models, with those implementations in Stan and plots for the main figures with its R codes. # load libraries Load packages used in this analysis. ```{r, message=FALSE} library(tidyverse) library(rstan) library(forcats) library(ggforce) library(lubridate) library(tictoc) library(viridis) ``` # utilities We define several utility functions used in this analysis. ```{r, message=FALSE} nth_dens <- function(pdens, n){ # sample n from the highest density in decreasing order # n / 512 points dens <- density(pdens) nth_x <- c() for (i in 1:n){ nth_x <- c(nth_x, dens$y %>% order(decreasing=T) %>% nth(i) %>% dens$x[.] ) } nth_x } waic <- function(log_lik) { # calculate WAIC in the 'focus' of this study, cf. http://rpubs.com/katzkagaya/460937 # Shortly, the definition of WAIC is different when the focus of the prediction change Tn <- - mean(log(colMeans(exp(log_lik)))) V_div_N <- mean(colMeans(log_lik^2) - colMeans(log_lik)^2) waic <- Tn + V_div_N return(waic) } check_rhat <- function(fit){ # We are not using this function in this document, but you can check Rhat all(summary(fit)$summary[,"Rhat"] <= 1.10, na.rm=T) } add_chosen_col_choice <- function(d){ # needed for the preprocessing the data for the analysis of sponge choice (#1) d <- d %>% arrange(id, choice) x <- paste(d$id, d$choice, sep="") %>% factor() %>% fct_inorder() xx <- table(x) chosen_ind <- names(xx) chosen_num <- c() for (i in xx){ chosen_num <- c(chosen_num, 1:i) } d$chosen_num <- chosen_num d } add_chosen_col_days <- function(d){ # needed for the preprocessing the data for the analysis of the 'days' for making cap (#4) d <- d %>% arrange(id, days_to_choice) x <- paste(d$id, d$days_to_choice, sep="") %>% factor() %>% fct_inorder() xx <- table(x) chosen_ind <- names(xx) chosen_num <- c() for (i in xx){ chosen_num <- c(chosen_num, 1:i) } d$chosen_num <- chosen_num d } make_xyz <- function(x, y, n=200){ # z is density value for fill=z in the geom_raster plot z <- MASS::kde2d(x, y, n=n) d <- matrix(nrow=n*n, ncol=3) k = 1 for (i in 1:n){ for (j in 1:n){ d[k,] <- c(z$x[i], z$y[j], z$z[i,j]) k <- k + 1 } } d <- as_tibble(d) colnames(d) <- c("x", "y", "z") d } make_xyzy <- function(x=data$C_width_new, ymc=ms$pred_Y, yn=200, ylim=c(0.7, 3.3)){ x_len <- dim(ymc)[2] zout <- matrix(nrow=x_len, ncol=yn) yout <- matrix(nrow=yn, ncol=x_len) k <- 1 for (i in 1:x_len) { ymc_i <- ymc[,i] #ymc[,i] %>% .[.>ylim[1] & .ylim[1] & ymc_i% dplyr::select(c('shell_width', 'choice', #'whole_px', 'cm_per_px', 'id', 'leg_lack' )) d <- d[complete.cases(d), ] # renumber animal id from 1 id <- as.factor(d$id) levels(id) <- as.character(1:length(levels(id))) d$id <- as.integer(id) d$choice <- as.factor(d$choice) levels(d$choice) <- c(2,1,3) # label M,L,No, as 1,2,3 d$choice <- as.integer(as.character(d$choice)) d$leg_lack <- as.factor(d$leg_lack) levels(d$leg_lack) <- c(2,3,1) # label no_leg_lack, A, C, as 1, 2, 3 d$leg_lack <- as.integer(as.character(d$leg_lack)) C_width_new <- seq(min(d$shell_width), max(d$shell_width), length.out=600) data <- list(N=nrow(d), K=max(d$choice), L=max(d$id), Y=d$choice, Np=500, C_width_p = seq(min(d$shell_width), max(d$shell_width), length.out=100), #cap_ex_size=d$cap_ex_size, C_width=d$shell_width, C_width_new=C_width_new, Leg_lack=d$leg_lack, ID=d$id) ``` $N_{animal}=$ `r data$L`, $N_{act}=$ `r data$N`. ## choice, model 1_1 ### Stan code ```{stan, output.var='dump', eval=FALSE} // model 1_1 in Harada and Kagaya, 2018 functions { real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Y, real cwl, real cwno, real ll, real lno, real C_width, real Leg_lack, real bias_no0, real bias_l_K){ vector[3] mu; mu[1] = 0; mu[2] = bias_l_K + cwl*C_width + ll*Leg_lack; mu[3] = bias_no0 + cwno*C_width + lno*Leg_lack; return(categorical_logit_lpmf(Y | mu)); } real log_lik_Simpson_rest(int Y, real cwl, real cwno, real ll, real lno, real C_width, real Leg_lack, real bias_no0, real bias_l_lower, real bias_l_upper, int M){ vector[M+1] lp; real h; h = (bias_l_upper - bias_l_lower)/M; lp[1] = f2(Y, cwl, cwno, ll, lno, C_width, Leg_lack, bias_no0, bias_l_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Y, cwl, cwno, ll, lno, C_width, Leg_lack, bias_no0, bias_l_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Y, cwl, cwno, ll, lno, C_width, Leg_lack, bias_no0, bias_l_lower + h*2*m); lp[M+1] = f2(Y, cwl, cwno, ll, lno, C_width, Leg_lack, bias_no0, bias_l_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; int L; int Y[N]; real C_width[N]; real C_width_new[600]; int Leg_lack[N]; int ID[N]; } parameters { real bias_l[L]; real bias_l0; real ll; real cwl; real s_bias_l; real cwno; real bias_no0; real lno; } transformed parameters { matrix[N,K] mu; for (n in 1:N){ mu[n,1] = 0; mu[n,2] = bias_l[ID[n]] + cwl*C_width[n] + ll*Leg_lack[n]; mu[n,3] = bias_no0 + cwno*C_width[n] + lno*Leg_lack[n]; } } model { for (l in 1:L){ bias_l[l] ~ normal(bias_l0, s_bias_l); } for (n in 1:N){ Y[n] ~ categorical_logit(mu[n,]'); } } generated quantities { vector[N] log_lik; real log_lik_l; int pred_Y[600]; matrix[600,3] mu_new; real bias_l_new; // for making computing fast, separately compute loglik unrelated to Y[n] and other parameters log_lik_l = log_lik_Simpson(bias_l0, s_bias_l, bias_l0 - 5*s_bias_l, bias_l0 + 5*s_bias_l, 100); for (n in 1:N){ log_lik[n] = log_lik_l + log_lik_Simpson_rest(Y[n], cwl, cwno, ll, lno, C_width[n], Leg_lack[n], bias_no0, bias_l0 - 5*s_bias_l, bias_l0 + 5*s_bias_l, 100); } bias_l_new = normal_rng(bias_l0, s_bias_l); for ( k in 1:600) { mu_new[k,1] = 0; mu_new[k,2] = bias_l_new + cwl*C_width_new[k] + ll; // Leg_lack is fixed to 1 mu_new[k,3] = bias_no0 + cwno*C_width_new[k] + lno; // Leg_lack is fixed to 1 pred_Y[k] = categorical_logit_rng(mu_new[k,]'); } } ``` Note that the parameters $bias\_l[l], l=1,...,L$ are integrated out in the 'generated quantities' block using Simpson's rule to compute the WAIC in the focus of our prediction. ### sampling ```{r} # model <- stan_model("choice_RE_random_intercept3_cw_leg.stan") # fit <- sampling(model, ### # data=data, seed=1234, # chains=4, iter=6000, warmup=1000, thin=1) # # save(fit, file= "choice_RE_random_intercept3_cw_leg.rdata") load("choice_RE_random_intercept3_cw_leg.rdata") ``` Please save the Stan code above as 'choice_RE_random_intercept3_cw_leg.stan' and uncomment the region to perform sampling in your environment. Here we just load the result saved as 'choice_RE_random_intercept3_cw_leg.rdata' in our local environment (the session information is added the last part of this document). Do the same thing for all of the models below. ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w11 w11 ``` ### plot posterior distributions of the parameters ```{r} stan_plot(fit, c("cwl", "cwno", "ll", "lno")) ``` # preparing data for figure 5 (model 1_1 prediction) ```{r} line_n <- 10 bias_l_new <- nth_dens(ms$bias_l_new, line_n) cw_l <- nth_dens(ms$cwl, line_n) bias_no0 <- nth_dens(ms$bias_no0, line_n) cw_no <- nth_dens(ms$cwno, line_n) mu2 <- mu3 <- theta2 <- theta3 <- matrix(nrow=500, ncol=line_n) new_C_width <- seq(min(d$shell_width), max(d$shell_width), length.out=500) for (j in 1:line_n) { for (i in 1:500){ mu2[i, j] <- bias_l_new[j] + cw_l[j]*new_C_width[i] mu3[i, j] <- bias_no0[j] + cw_no[j]*new_C_width[i] theta2[i, j] <- 1 + exp(mu2[i, j])/(1 + exp(mu2[i, j]) + exp(mu3[i, j])) theta3[i, j] <- 1 + 2 * exp(mu3[i, j])/(1 + exp(mu2[i, j]) + exp(mu3[i, j])) } } pred <- tibble() for (j in 1:line_n) { pred <- rbind(pred, tibble(n=j, carapace_width=new_C_width, theta2=theta2[,j], theta3=theta3[,j])) } # pred <- cbind(pred, col=rep(viridis_pal()(line_n), each=500)) gid <- levels(d$id)[table(d$id) > 1] mul <- factor(rep("s", dim(d)[1]), levels=c("g", "s")) for (i in 1:dim(d)[1]){ if (d$id[i] %in% gid){ mul[i] <- c("g") } } d$mul <- mul dd <- d dd$choice <- as.integer(d$choice) d_seg <- dd %>% group_by(id) %>% summarise( choice_min=min(choice), choice_max=max(choice), shell_width=first(shell_width)) %>% drop_na() d <- add_chosen_col_choice(d) # min_x <- min(data$C_width_new) - 0.2 # max_x <- max(data$C_width_new) + 0.2 # tic() # ddd <- make_xyz(c(rep(data$C_width_new, each=20000), min_x, max_x), # y=c(as.vector(ms$pred_Y), 0.8, 3.2)) # adding two points for adjusting plot range of y axis # toc() # take some time tic() ddd <- make_xyzy(data$C_width_new, ms$pred_Y, yn = 200, ylim=c(0.7, 3.3)) toc() ``` # plot figure 5 ```{r} plt_fig5_model1_1 <- ggplot(ddd) + geom_raster(aes(x=x, y=y, fill=z)) + geom_line(aes(carapace_width, theta2, group=n, alpha=1/n), color="white", data=pred) + geom_line(aes(carapace_width, theta3, group=n, alpha=1/n), color="white", data=pred) + geom_point(aes(shell_width, choice, size=chosen_num), pch=0,col="white", alpha=0.8, data=d) + geom_segment(aes(x = shell_width, y = choice_min, xend = shell_width, yend = choice_max), data = d_seg, alpha = 0.5, lty=3, col='white') + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) + ggtitle(paste("model 1_1", "WAIC:", w11)) + theme(legend.position='none') + xlab("carapace width (cm)") + ylab("sponge choice") + scale_fill_viridis() + scale_colour_viridis_c() save(plt_fig5_model1_1, file="plt_fig5_model1_1.rdata") ggsave(plt_fig5_model1_1, file="plt_fig5_model1_1.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig5_model1_1 ``` ## choice, model 1_2 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Y, real cw_l, real cw_no, real C_width, real bias_no0, real bias_l_K){ vector[3] mu; mu[1] = 0; mu[2] = bias_l_K + cw_l*C_width; mu[3] = bias_no0 + cw_no*C_width; return(categorical_logit_lpmf(Y | mu)); } real log_lik_Simpson_rest(int Y, real cw_l, real cw_no, real C_width, real bias_no0, real bias_l_lower, real bias_l_upper, int M){ vector[M+1] lp; real h; h = (bias_l_upper - bias_l_lower)/M; lp[1] = f2(Y, cw_l, cw_no, C_width, bias_no0, bias_l_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Y, cw_l, cw_no, C_width, bias_no0, bias_l_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Y, cw_l, cw_no, C_width, bias_no0, bias_l_lower + h*2*m); lp[M+1] = f2(Y, cw_l, cw_no, C_width, bias_no0, bias_l_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; int L; int Y[N]; real C_width[N]; int Leg_lack[N]; int ID[N]; } parameters { real bias_l[L]; real bias_l0; real cw_l; real s_bias_l; real bias_no0; real cw_no; } transformed parameters { matrix[N,K] mu; for (n in 1:N){ mu[n,1] = 0; mu[n,2] = bias_l[ID[n]] + cw_l*C_width[n]; mu[n,3] = bias_no0 + cw_no*C_width[n]; } } model { for (l in 1:L){ bias_l[l] ~ normal(bias_l0, s_bias_l); } for (n in 1:N){ Y[n] ~ categorical_logit(mu[n,]'); } } generated quantities { vector[N] log_lik; real log_lik_l; log_lik_l = log_lik_Simpson(bias_l0, s_bias_l, bias_l0 - 5*s_bias_l, bias_l0 + 5*s_bias_l, 100); for (n in 1:N){ log_lik[n] = log_lik_l + log_lik_Simpson_rest(Y[n], cw_l, cw_no, C_width[n], bias_no0, bias_l0 - 5*s_bias_l, bias_l0 + 5*s_bias_l, 100); } } ``` ### sampling ```{r} # fit <- stan(file='choice_RE_random_intercept3.stan', ### # data=data, seed=1234, # chains=4, iter=6000, warmup=1000) # save(fit, file="choice_RE_random_intercept3.rdata") load("choice_RE_random_intercept3.rdata") ``` ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w12 w12 ``` ## choice, model 1_3 ### Stan code ```{stan, stan, output.var='dump', eval=FALSE} functions { real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Y, real bias_no0, real bias_l_K){ vector[3] mu; mu[1] = 0; mu[2] = bias_l_K; mu[3] = bias_no0; return(categorical_logit_lpmf(Y | mu)); } real log_lik_Simpson_rest(int Y, real bias_no0, real bias_l_lower, real bias_l_upper, int M){ vector[M+1] lp; real h; h = (bias_l_upper - bias_l_lower)/M; lp[1] = f2(Y, bias_no0, bias_l_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Y, bias_no0, bias_l_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Y, bias_no0, bias_l_lower + h*2*m); lp[M+1] = f2(Y, bias_no0, bias_l_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; int L; int Y[N]; real C_width[N]; int Leg_lack[N]; int ID[N]; } parameters { real bias_l[L]; real bias_l0; real s_bias_l; real bias_no0; } transformed parameters { matrix[N,K] mu; for (n in 1:N){ mu[n,1] = 0; mu[n,2] = bias_l[ID[n]]; mu[n,3] = bias_no0; } } model { for (l in 1:L){ bias_l[l] ~ normal(bias_l0, s_bias_l); } for (n in 1:N){ Y[n] ~ categorical_logit(mu[n,]'); } } generated quantities { vector[N] log_lik; real log_lik_l; log_lik_l = log_lik_Simpson(bias_l0, s_bias_l, bias_l0 - 5*s_bias_l, bias_l0 + 5*s_bias_l, 100); for (n in 1:N){ log_lik[n] = log_lik_l + log_lik_Simpson_rest(Y[n], bias_no0, bias_l0 - 5*s_bias_l, bias_l0 + 5*s_bias_l, 100); } } ``` ### sampling ```{r} # fit <- stan(file='choice_RE_random_intercept3_only.stan', ### # data=data, seed=1234, # chains=4, iter=6000, warmup=1000, thin=1) # save(fit, file="choice_RE_random_intercept3_only.rdata") load("choice_RE_random_intercept3_only.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w13 w13 ``` ## choice, model 1_4 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Y, real ll, real lno, real Leg_lack, real bias_no0, real bias_l_K){ vector[3] mu; mu[1] = 0; mu[2] = bias_l_K + ll*Leg_lack; mu[3] = bias_no0 + lno*Leg_lack; return(categorical_logit_lpmf(Y | mu)); } real log_lik_Simpson_rest(int Y, real ll, real lno, real Leg_lack, real bias_no0, real bias_l_lower, real bias_l_upper, int M){ vector[M+1] lp; real h; h = (bias_l_upper - bias_l_lower)/M; lp[1] = f2(Y, ll, lno, Leg_lack, bias_no0, bias_l_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Y, ll, lno, Leg_lack, bias_no0, bias_l_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Y, ll, lno, Leg_lack, bias_no0, bias_l_lower + h*2*m); lp[M+1] = f2(Y, ll, lno, Leg_lack, bias_no0, bias_l_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; int L; int Y[N]; real C_width[N]; int Leg_lack[N]; int ID[N]; } parameters { real bias_l[L]; real bias_l0; real ll; real s_bias_l; real bias_no0; real lno; } transformed parameters { matrix[N,K] mu; for (n in 1:N){ mu[n,1] = 0; mu[n,2] = bias_l[ID[n]] + ll*Leg_lack[n]; mu[n,3] = bias_no0 + lno*Leg_lack[n]; } } model { for (l in 1:L){ bias_l[l] ~ normal(bias_l0, s_bias_l); } for (n in 1:N){ Y[n] ~ categorical_logit(mu[n,]'); } } generated quantities { vector[N] log_lik; real log_lik_l; log_lik_l = log_lik_Simpson(bias_l0, s_bias_l, bias_l0 - 5*s_bias_l, bias_l0 + 5*s_bias_l, 100); for (n in 1:N){ log_lik[n] = log_lik_l + log_lik_Simpson_rest(Y[n], ll, lno, Leg_lack[n], bias_no0, bias_l0 - 5*s_bias_l, bias_l0 + 5*s_bias_l, 100); } } ``` ### sampling ```{r} # fit <- stan(file='choice_RE_random_intercept3_leg.stan', ### # data=data, seed=1234, # chains=4, iter=6000, warmup=1000, thin=1) # save(fit, "choice_RE_random_intercept3_leg.rdata") load("choice_RE_random_intercept3_leg.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w14 w14 ``` ## choice, model 1_5 ### Stan code ```{stan, output.var='dump', eval=FALSE} data { int N; int K; int L; int Y[N]; real C_width[N]; int Leg_lack[N]; int ID[N]; } parameters { real bias_l0; real cw_l; real bias_no0; real cw_no; } transformed parameters { matrix[N,K] mu; for (n in 1:N){ mu[n,1] = 0; mu[n,2] = bias_l0 + cw_l*C_width[n]; mu[n,3] = bias_no0 + cw_no*C_width[n]; } } model { for (n in 1:N){ Y[n] ~ categorical_logit(mu[n,]'); } } generated quantities { vector[N] log_lik; for (n in 1:N){ log_lik[n] = categorical_logit_lpmf(Y[n] | mu[n,]'); } } ``` ### sampling ```{r} # fit <- stan(file='choice_no_RE3.stan', ### # data=data, seed=1234, # chains=4, iter=6000, warmup=1000) # save(fit, file="choice_no_RE3.rdata") load("choice_no_RE3.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w15 w15 ``` ## choice, model 1_6 ### Stan code ```{stan, output.var='dump', eval=FALSE} data { int N; int K; int L; int Y[N]; real C_width[N]; int Leg_lack[N]; } parameters { real bias_l0; real cw_l; real ll_l; real bias_no0; real cw_no; real ll_no; } transformed parameters { matrix[N,K] mu; for (n in 1:N){ mu[n,1] = 0; mu[n,2] = bias_l0 + cw_l*C_width[n] + ll_l*Leg_lack[n]; mu[n,3] = bias_no0 + cw_no*C_width[n] + ll_no*Leg_lack[n]; } } model { for (n in 1:N){ Y[n] ~ categorical_logit(mu[n,]'); } } generated quantities { real log_lik[N]; for (n in 1:N){ log_lik[n] = categorical_logit_lpmf(Y[n] | mu[n,]'); } } ``` ### sampling ```{r} # fit <- stan(file='choice_no_RE.stan', # data=data, seed=1234, # chains=4, iter=2000, warmup=500, thin=1) # save(fit, file="choice_no_RE.rdata") load("choice_no_RE.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w16 w16 ``` ## preparing for plot 1_6 ```{r} line_n <- 10 bias_l0 <- nth_dens(ms$bias_l0, line_n) cw_l <- nth_dens(ms$cwl, line_n) bias_no0 <- nth_dens(ms$bias_no0, line_n) cw_no <- nth_dens(ms$cwno, line_n) mu2 <- mu3 <- theta2 <- theta3 <- matrix(nrow=500, ncol=line_n) new_C_width <- seq(min(d$shell_width), max(d$shell_width), length.out=500) for (j in 1:line_n) { for (i in 1:500){ mu2[i, j] <- bias_l0[j] + cw_l[j]*new_C_width[i] mu3[i, j] <- bias_no0[j] + cw_no[j]*new_C_width[i] theta2[i, j] <- 1 + exp(mu2[i, j])/(1 + exp(mu2[i, j]) + exp(mu3[i, j])) theta3[i, j] <- 1 + 2 * exp(mu3[i, j])/(1 + exp(mu2[i, j]) + exp(mu3[i, j])) } } pred <- tibble() for (j in 1:line_n) { pred <- rbind(pred, tibble(n=j, carapace_width=new_C_width, theta2=theta2[,j], theta3=theta3[,j])) } d <- add_chosen_col_choice(d) # tic() # ddd <- make_xyz(c(rep(data$C_width_new, each=6000), min_x, max_x), # y=c(as.vector(ms$pred_Y), 0.8, 3.2)) # adding two points for adjusting plot range of y axis # toc() # # take some time ddd <- make_xyzy(data$C_width_new, ms$pred_Y, yn = 200, ylim=c(0.7, 3.3)) ``` ## plot for model 1_6 ```{r} plt_fig5_model1_6 <- ggplot(ddd) + geom_raster(aes(x=x, y=y, fill=z)) + geom_line(aes(carapace_width, theta2, group=n, alpha=1/n), color="white", data=pred) + geom_line(aes(carapace_width, theta3, group=n, alpha=1/n), color="white", data=pred) + geom_point(aes(shell_width, choice, size=chosen_num), pch=0,col="white", alpha=0.8, data=d) + geom_segment(aes(x = shell_width, y = choice_min, xend = shell_width, yend = choice_max), data = d_seg, alpha = 0.5, lty=3, col='white') + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) + ggtitle(paste("model 1_6", "WAIC:", w16)) + theme(legend.position='none') + xlab("carapace width (cm)") + ylab("sponge choice") + scale_fill_viridis() + scale_colour_viridis_c() save(plt_fig5_model1_6, file="plt_fig5_model1_6.rdata") ggsave(plt_fig5_model1_6, file="plt_fig5_model1_6.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig5_model1_6 ``` ## comparing WAIC values in AIC scale ```{r} waicv_m1 <- c(w11, w12, w13, w14, w15, w16) * data$N waicv_m1 <- waicv_m1 - min(waicv_m1) plot(waicv_m1, ylab=c("WAICs for models for sponge choice")) ``` # 2. Trimming ## preprocessing ```{r} d <- read_csv(file='Dromiidae.csv') d <- d %>% dplyr::select(c('shell_width', 'choice', 'whole_px', 'cm_per_px', 'id', 'leg_lack', 'cutting' )) d <- d[complete.cases(d), ] ## calculate exterior cap size d$cap_ex_size <- d$whole_px * d$cm_per_px^2 / 1.15 # renumber animal id from 1 id <- as.factor(d$id) levels(id) <- as.character(1:length(levels(id))) d$id <- as.integer(id) d$choice <- as.factor(d$choice) levels(d$choice) <- c(1,0) # label M,L, as 0,1 # 3 is deleted because no cap_ex_size d$choice <- as.integer(as.character(d$choice)) d$leg_lack <- as.factor(d$leg_lack) levels(d$leg_lack) <- c(2,3,1) # label no_leg_lack, A, C, as 1, 2, 3 d$leg_lack <- as.integer(as.character(d$leg_lack)) d$cutting <- as.integer(as.factor(d$cutting)) dd <- d %>% mutate(cap_ex_size=if_else(choice==0 & cutting == "1", 51, cap_ex_size)) dd <- dd %>% mutate(cap_ex_size=if_else(choice==0 & cap_ex_size > 51, 51, cap_ex_size)) dd <- dd %>% mutate(cap_ex_size=if_else(choice==1 & cutting == "1", 210, cap_ex_size)) dd <- dd %>% mutate(cap_ex_size=if_else(choice==1 & cap_ex_size > 210, 210, cap_ex_size)) ddd <- dd %>% mutate(removed_size = if_else(choice==0, 51 - cap_ex_size, if_else(choice==1, 210 - cap_ex_size, 1000))) ddd$removed_size <- round(ddd$removed_size) data <- list(N=nrow(ddd), K=max(ddd$id), Choice=ddd$choice, C_width=d$shell_width, C_width_new=seq(min(d$shell_width), max(d$shell_width), length.out=600), ID=as.integer(ddd$id), Removed=ddd$removed_size) ``` The sample size of animals and acts are, $N_{animal}=$ `r data$K` and $N_{act}=$ `r data$N`, respectively. ## trimming, model 2_1 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } int myZIP_rng(real q, real lambda) { int choice; choice = bernoulli_rng(q); if (choice==0) { return( 0 ); } else { return( poisson_log_rng(lambda) ); } } real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Removed, real a1, real a3, real b1, real b3, real C_w, real Choice, real ak, real bk){ real q; real lambda; q = inv_logit(ak + a1*C_w + a3*Choice); lambda = bk + b1*C_w + b3*Choice; if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } real log_lik_Simpson_ak(int Removed, real a1, real a3, real b1, real b3, real C_w, real Choice, real a_lower, real a_upper, real bk, int M){ vector[M+1] lp; real h; h = (a_upper - a_lower)/M; lp[1] = f2(Removed, a1, a3, b1, b3, C_w, Choice, a_lower, bk); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Removed, a1, a3, b1, b3, C_w, Choice, a_lower + h*(2*m-1), bk); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Removed, a1, a3, b1, b3, C_w, Choice, a_lower + h*2*m, bk); lp[M+1] = f2(Removed, a1, a3, b1, b3, C_w, Choice, a_upper, bk); return(log(h/3) + log_sum_exp(lp)); } real log_lik_Simpson_rest(int Removed, real a1, real a3, real b1, real b3, real C_w, real Choice, real a_lower, real a_upper, real b_lower, real b_upper, int M){ vector[M+1] lp; real h; h = (b_upper - b_lower)/M; lp[1] = log_lik_Simpson_ak(Removed, a1, a3, b1, b3, C_w, Choice, a_lower, a_upper, b_lower, M); for (m in 1:(M/2)) lp[2*m] = log(4) + log_lik_Simpson_ak(Removed, a1, a3, b1, b3, C_w, Choice, a_lower, a_upper, b_lower + h*(2*m-1), M); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + log_lik_Simpson_ak(Removed, a1, a3, b1, b3, C_w, Choice, a_lower, a_upper, b_lower + h*2*m, M); lp[M+1] = log_lik_Simpson_ak(Removed, a1, a3, b1, b3, C_w, Choice, a_lower, a_upper, b_upper, M); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; // number of individuals int Removed[N]; real Choice[N]; real C_width[N]; int ID[N]; real C_width_new[600]; } parameters { real a1; real a2_0; vector[K] a2; // real a2w[K]; real a2_s; real a3; real b1; real b2_0; vector[K] b2; // real b2w[K]; real b2_s; real b3; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N){ q[n] = inv_logit(a2[ID[n]] + a1*C_width[n] + a3*Choice[n]); lambda[n] = b2[ID[n]] + b1*C_width[n] + b3*Choice[n]; } } model { a2_s ~ student_t(4, 0, 10); b2_s ~ student_t(4, 0, 10); a2 ~ normal(a2_0, a2_s); b2 ~ normal(b2_0, b2_s); for (n in 1:N){ Removed[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik[N]; real log_lik_a; real log_lik_b; int pRemoved[600]; real pp[600]; real plambda[600]; real pa2; real pb2; // here we have two local parameters to be marginalized out log_lik_a = log_lik_Simpson(a2_0, a2_s, a2_0 - 5*a2_s, a2_0 + 5*a2_s, 100); log_lik_b = log_lik_Simpson(b2_0, b2_s, b2_0 - 5*b2_s, b2_0 + 5*b2_s, 100); for (n in 1:N){ log_lik[n] = log_lik_a + log_lik_b + log_lik_Simpson_rest(Removed[n], a1, a3, b1, b3, C_width[n], Choice[n], a2_0 - 5*a2_s, a2_0 + 5*a2_s, b2_0 - 5*b2_s, b2_0 + 5*b2_s, 100); } pa2 = normal_rng(a2_0, a2_s); pb2 = normal_rng(b2_0, b2_s); for (n in 1:600){ pp[n] = inv_logit(pa2 + a1*C_width_new[n] + a3); // fixed to choice L plambda[n] = pb2 + b1*C_width_new[n] + b3; // fixed to choice L pRemoved[n] = myZIP_rng(pp[n], plambda[n]); } } ``` ### sampling ```{r} # fit <- stan(file='choice_and_cutting2_2.stan', ### # data=data, seed=2134, # chains=4, iter=6000, warmup=2000, thin=1 # ) # save(fit, file="choice_and_cutting2_2.rdata") load(file="choice_and_cutting2_2.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w21 w21 ``` ### plot posterior distribution of the parameters ```{r} stan_plot(fit, c("a1", "a3", "b1", "b3")) # b_{cut}, c_{cut}, e_{cut}, f_{cut} ``` ### plot figure For the figure 6. #### preprocessing data for figure 6 ```{r} pdens_lambda <- make_xyzy(data$C_width_new, ms$plambda, ylim=c(-1,10)) pdens_pRemoved <- make_xyzy(data$C_width_new, ms$pRemoved, ylim=c(-5, max(data$Removed))) pdens_pRemoved2 <- make_xyzy(data$C_width_new, ms$pRemoved, ylim=c(0, max(data$Removed))) ``` ```{r} plt_fig6_model2_1 <- ggplot(pdens_lambda) + geom_raster(aes(x, y, fill=z)) + scale_fill_viridis() + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0), limits=c(-1, 10)) + geom_point(aes(shell_width, log(removed_size)), pch=1, col="white",size=2, data=ddd %>% dplyr::filter(removed_size != 0)) + theme(legend.position='none') + xlab("carapace width (cm)") + ylab("removed size of sponge (log(cm^2))") + ggtitle(paste("model 2_1 WAIC:", w21)) ggsave(plt_fig6_model2_1, file="plt_fig6_model2_1.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig6_model2_1 ``` ```{r} line_n <- 50 # pb2 + b1*C_width_new[n] + b3 pb2 <- nth_dens(ms$pb2, line_n) b1 <- nth_dens(ms$b1, line_n) b3 <- nth_dens(ms$b3, line_n) plambda <- matrix(nrow=line_n, ncol=600) for (i in 1:line_n){ plambda[i,] <- pb2[i] + b1[i]*data$C_width_new + b3[i] } pred <- tibble() for(i in 1:line_n) { pred <- rbind(pred, tibble(n=i, C_width_new=data$C_width_new, plambda=plambda[i,])) } plot(NA, xlim=c(min(data$C_width_new), max(data$C_width_new)), ylim=c(0, 200)) for (i in 1:line_n) {lines(data$C_width_new, exp(plambda[i,]))} ``` ```{r} d_seg <- ddd %>% group_by(id) %>% summarise( removed_min=min(removed_size), removed_max=max(removed_size), shell_width=first(shell_width)) %>% drop_na() plt_fig6_model2_1_removed <- ggplot(pdens_pRemoved) + geom_raster(aes(x, y, fill=z)) + scale_fill_viridis() + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0), limits=c(-5, max(data$Removed))) + geom_point(aes(shell_width, removed_size, shape=as.factor(choice)), col="white",size=2, data=ddd) + geom_line(aes(C_width_new, exp(plambda), group=n, alpha=1/n), col="white", data=pred) + geom_segment( aes(x = shell_width, y = removed_min, xend = shell_width, yend = removed_max), data = d_seg, alpha = 0.5, lty=3, col="white") + theme(legend.position='none') + scale_shape_manual(values=c(0,1)) + xlab("carapace width (cm)") + ylab("removed size of sponge (cm^2)") + ggtitle(paste("model 2_1 WAIC:", w21)) ggsave(plt_fig6_model2_1_removed, file="plt_fig6_model2_1_removed.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig6_model2_1_removed ``` ```{r} plt_fig6_model2_1_removed2 <- ggplot(pdens_pRemoved2) + geom_raster(aes(x, y, fill=z)) + scale_fill_viridis() + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0), limits=c(-5, max(data$Removed))) + geom_point(aes(shell_width, removed_size, shape=as.factor(choice)), col="white",size=2, data=ddd %>% filter(removed_size != 0)) + geom_line(aes(C_width_new, exp(plambda), group=n, alpha=1/n), col="white", data=pred) + geom_segment( aes(x = shell_width, y = removed_min, xend = shell_width, yend = removed_max), data = d_seg, alpha = 0.5, lty=3, col="white") + theme(legend.position='none') + scale_shape_manual(values=c(0, 1)) + xlab("carapace width (cm)") + ylab("removed size of sponge (cm^2)") + ggtitle(paste("model 2_1 2 WAIC:", w21)) ggsave(plt_fig6_model2_1_removed2, file="plt_fig6_model2_1_removed2.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig6_model2_1_removed2 ``` ## trimming, model 2_2 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } int myZIP_rng(real q, real lambda) { int choice; choice = bernoulli_rng(q); if (choice==0) { return( 0 ); } else { return( poisson_log_rng(lambda) ); } } real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Removed, real a1, real a3, real b3, int Choice, real bk){ real q; real lambda; q = inv_logit(a1 + a3*Choice); lambda = bk + b3*Choice; return(ZIP_lpmf(Removed | q, lambda)); } real log_lik_Simpson_rest(int Removed, real a1, real a3, real b3, int Choice, real b1_lower, real b1_upper, int M){ vector[M+1] lp; real h; h = (b1_upper - b1_lower)/M; lp[1] = f2(Removed, a1, a3, b3, Choice, b1_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Removed, a1, a3, b3, Choice, b1_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Removed, a1, a3, b3, Choice, b1_lower + h*2*m); lp[M+1] = f2(Removed, a1, a3, b3, Choice, b1_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; // number of individuals int Removed[N]; int Choice[N]; real C_width[N]; int ID[N]; } parameters { real a1; real a3; real b3; real b1_0; vector[K] b1; real b1_s; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N){ q[n] = inv_logit(a1 + a3*Choice[n]); lambda[n] = b1[ID[n]] + b3*Choice[n]; } } model { b1_s ~ student_t(4, 0, 10); b1 ~ normal(b1_0, b1_s); for (n in 1:N){ Removed[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik_fix; vector[N] log_lik; real pb1; real plambda[100]; pb1 = normal_rng(b1_0, b1_s); log_lik_fix = log_lik_Simpson(b1_0, b1_s, b1_0-5*b1_s, b1_0+5*b1_s, 100); for (n in 1:N) { log_lik[n] = log_lik_fix + log_lik_Simpson_rest(Removed[n], a1, a3, b3, Choice[n], b1_0-5*b1_s, b1_0+5*b1_s, 100); } for (n in 1:100) { plambda[n] = pb1 + b3; } } ``` ### sampling ```{r} # fit <- stan(file='choice_and_cutting_waic_no_Cw.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1 # ) # save(fit, file="choice_and_cutting_waic_no_Cw.rdata") load("choice_and_cutting_waic_no_Cw.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w22 w22 ``` ## trimming, model 2_3 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } int myZIP_rng(real q, real lambda) { int choice; choice = bernoulli_rng(q); if (choice==0) { return( 0 ); } else { return( poisson_log_rng(lambda) ); } } real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Removed, real a1, real a2, real a3, real b2, real b3, real C_width, int Choice, real bk){ real q; real lambda; q = inv_logit(a1 + a2*C_width + a3*Choice); lambda = bk + b2*C_width + b3*Choice; return(ZIP_lpmf(Removed | q, lambda)); } real log_lik_Simpson_rest(int Removed, real a1, real a2, real a3, real b2, real b3, real C_width, int Choice, real b1_lower, real b1_upper, int M){ vector[M+1] lp; real h; h = (b1_upper - b1_lower)/M; lp[1] = f2(Removed, a1, a2, a3, b2, b3, C_width, Choice, b1_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Removed, a1, a2, a3, b2, b3, C_width, Choice, b1_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Removed, a1, a2, a3, b2, b3, C_width, Choice, b1_lower + h*2*m); lp[M+1] = f2(Removed, a1, a2, a3, b2, b3, C_width, Choice, b1_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; // number of individuals int Removed[N]; int Choice[N]; real C_width[N]; int ID[N]; real C_width_new[100]; } parameters { real a1; real a2; real a3; real b2; real b3; real b1_0; vector[K] b1; real b1_s; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N){ q[n] = inv_logit(a1 + a2*C_width[n] + a3*Choice[n]); lambda[n] = b1[ID[n]] + b2*C_width[n] + b3*Choice[n]; } } model { b1 ~ normal(b1_0, b1_s); for (n in 1:N){ Removed[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik_fix; vector[N] log_lik; real pb1; real plambda[100]; log_lik_fix = log_lik_Simpson(b1_0, b1_s, b1_0-5*b1_s, b1_0+5*b1_s, 100); for (n in 1:N) { log_lik[n] = log_lik_fix + log_lik_Simpson_rest(Removed[n], a1, a2, a3, b2, b3, C_width[n], Choice[n], b1_0-5*b1_s, b1_0+5*b1_s, 100); } pb1 = normal_rng(b1_0, b1_s); for (n in 1:100){ plambda[n] = pb1 + b2*C_width_new[100] + b3; } } ``` ### sampling ```{r} # fit <- stan(file='choice_and_cutting_waic.stan', ### # data=data, seed=1234, # chains=4, iter=3000, warmup=1000, thin=1 # ) # save(fit, file="choice_and_cutting_waic.rdata") load("choice_and_cutting_waic.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w23 w23 ``` ## trimming, model 2_4 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Removed, real a1, real bk){ real q; real lambda; q = inv_logit(a1); lambda = bk; return(ZIP_lpmf(Removed | q, lambda)); } real log_lik_Simpson_rest(int Removed, real a1, real b1_lower, real b1_upper, int M){ vector[M+1] lp; real h; h = (b1_upper - b1_lower)/M; lp[1] = f2(Removed, a1, b1_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Removed, a1, b1_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Removed, a1, b1_lower + h*2*m); lp[M+1] = f2(Removed, a1, b1_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; // number of individuals int Removed[N]; int Choice[N]; real C_width[N]; int ID[N]; } parameters { real a1; real b1_0; vector[K] b1; // real b1w[K]; real b1_s; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N){ q[n] = inv_logit(a1); lambda[n] = b1[ID[n]]; } } model { b1 ~ normal(b1_0, b1_s); for (n in 1:N){ Removed[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik_fix; vector[N] log_lik; log_lik_fix = log_lik_Simpson(b1_0, b1_s, b1_0-5*b1_s, b1_0+5*b1_s, 100); for (n in 1:N) { log_lik[n] = log_lik_fix + log_lik_Simpson_rest(Removed[n], a1, b1_0-5*b1_s, b1_0+5*b1_s, 100); } } ``` ### sampling ```{r} # fit <- stan(file='choice_and_cutting_waic_no_Cw_no_Choice.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1 # ) # save(fit, file="choice_and_cutting_waic_no_Cw_no_Choice.rdata") load("choice_and_cutting_waic_no_Cw_no_Choice.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w24 w24 ``` ## trimming, mode 2_5 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Removed, real a1, real a2, real b2, real C_width, real bk){ real q; real lambda; q = inv_logit(a1 + a2*C_width); lambda = bk + b2*C_width; return(ZIP_lpmf(Removed | q, lambda)); } real log_lik_Simpson_rest(int Removed, real a1, real a2, real b2, real C_width, real b1_lower, real b1_upper, int M){ vector[M+1] lp; real h; h = (b1_upper - b1_lower)/M; lp[1] = f2(Removed, a1, a2, b2, C_width, b1_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Removed, a1, a2, b2, C_width, b1_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Removed, a1, a2, b2, C_width, b1_lower + h*2*m); lp[M+1] = f2(Removed, a1, a2, b2, C_width, b1_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; // number of individuals int Removed[N]; int Choice[N]; real C_width[N]; int ID[N]; } parameters { real a1; real a2; real b2; vector[K] b1; real b1_0; real b1_s; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N){ q[n] = inv_logit(a1 + a2*C_width[n]); lambda[n] = b1[ID[n]] + b2*C_width[n]; } } model { b1 ~ normal(b1_0, b1_s); for (n in 1:N){ Removed[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik_fix; vector[N] log_lik; log_lik_fix = log_lik_Simpson(b1_0, b1_s, b1_0-5*b1_s, b1_0+5*b1_s, 100); for (n in 1:N) { log_lik[n] = log_lik_fix + log_lik_Simpson_rest(Removed[n], a1, a2, b2, C_width[n], b1_0-5*b1_s, b1_0+5*b1_s, 100); } } ``` ### sampling ```{r} # fit <- stan(file='choice_and_cutting_waic_no_Choice.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1 # ) # save(fit, file="choice_and_cutting_waic_no_Choice.rdata") load("choice_and_cutting_waic_no_Choice.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w25 w25 ``` ## trimming, model 2_6 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } int myZIP_rng(real q, real lambda) { int choice; choice = bernoulli_rng(q); if (choice==0) { return( 0 ); } else { return( poisson_log_rng(lambda) ); } } } data { int N; int K; // number of individuals int Removed[N]; real Choice[N]; real C_width[N]; int ID[N]; real C_width_new[100]; } parameters { real a1; real a2; real a3; real b1; real b2; real b3; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N){ q[n] = inv_logit(a1 + a2*C_width[n] + a3*Choice[n]); lambda[n] = b1 + b2*C_width[n] + b3*Choice[n]; } } model { for (n in 1:N){ Removed[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { vector[N] log_lik; real plambda[100]; real pRemoved[100]; real pq[100]; for (n in 1:N) { log_lik[n] = ZIP_lpmf(Removed[n] | q[n], lambda[n]); } for (n in 1:100){ pq[n] = inv_logit(a1 + a2*C_width_new[n] + a3); plambda[n] = b1 + b2*C_width_new[n] + b3; pRemoved[n] = myZIP_rng(pq[n], plambda[n]); } } ``` ### sampling ```{r} # fit <- stan(file='choice_and_cutting_no_RE.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1 # #control = list(max_treedepth = 16, # # adapt_delta=0.99) # ) # save(fit, file="choice_and_cutting_no_RE.rdata") load("choice_and_cutting_no_RE.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w26 w26 ``` ### # plot for figure 6 model 2_6 ```{r} pdens_lambda <- make_xyzy(data$C_width_new, ms$plambda, ylim=c(-1, 10)) pdens_pRemoved <- make_xyzy(data$C_width_new, ms$pRemoved, ylim=c(-5, max(data$Removed))) pdens_pRemoved2 <- make_xyzy(data$C_width_new, ms$pRemoved, ylim=c(0, max(data$Removed))) ``` ```{r} line_n <- 50 b1 <- nth_dens(ms$b1, line_n) b2 <- nth_dens(ms$b2, line_n) b3 <- nth_dens(ms$b3, line_n) plambda <- matrix(nrow=line_n, ncol=600) for (i in 1:line_n){ plambda[i,] <- b1[i] + b2[i]*data$C_width_new + b3[i] } pred <- tibble() for(i in 1:line_n) { pred <- rbind(pred, tibble(n=i, C_width_new=data$C_width_new, plambda=plambda[i,])) } plot(NA, xlim=c(min(data$C_width_new), max(data$C_width_new)), ylim=c(0, 200)) for (i in 1:line_n) {lines(data$C_width_new, exp(plambda[i,]))} ``` ```{r} plt_fig6_model2_6 <- ggplot(pdens_lambda) + geom_raster(aes(x, y, fill=z)) + scale_fill_viridis() + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0), limits=c(-1, 10)) + geom_point(aes(shell_width, log(removed_size)), pch=1, col="white",size=2, data=ddd %>% dplyr::filter(removed_size != 0)) + geom_point(aes(shell_width, log(removed_size), col=as.factor(id)), size=1, alpha=0.5, data=ddd %>% dplyr::filter(removed_size != 0)) + theme(legend.position = 'none') + xlab("carapace width (cm)") + ylab("removed size of sponge (log(cm^2))") + ggtitle(paste("model 2_6 WAIC:", w26)) ggsave(plt_fig6_model2_6, file="plt_fig6_model2_6.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig6_model2_6 ``` ```{r} plt_fig6_model2_6_removed <- ggplot(pdens_pRemoved) + geom_raster(aes(x, y, fill=z)) + scale_fill_viridis() + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0), limits=c(-5, max(data$Removed))) + geom_point(aes(shell_width, removed_size, shape=as.factor(choice)), col="white",size=2, data=ddd) + geom_line(aes(C_width_new, exp(plambda), alpha=1/n, group=n), col="white", data=pred) + geom_segment( aes(x = shell_width, y = removed_min, xend = shell_width, yend = removed_max), data = d_seg, alpha = 0.5, lty=3, col="white") + theme(legend.position='none') + scale_shape_manual(values=c(0, 1)) + xlab("carapace width (cm)") + ylab("removed size of sponge (cm^2)") + ggtitle(paste("model 2_6 WAIC:", w26)) ggsave(plt_fig6_model2_6_removed, file="plt_fig6_model2_6_removed.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig6_model2_6_removed ``` ```{r} plt_fig6_model2_6_removed2 <- ggplot(pdens_pRemoved2) + geom_raster(aes(x, y, fill=z)) + scale_fill_viridis() + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) + geom_point(aes(shell_width, removed_size, shape=as.factor(cutting)), col="white",size=2, data=ddd %>% dplyr::filter(removed_size != 0)) + geom_segment( aes(x = shell_width, y = removed_min, xend = shell_width, yend = removed_max), data = d_seg, alpha = 0.5, lty=3, col="white") + theme(legend.position='none') + scale_shape_manual(values=c(0, 1)) + xlab("carapace width (cm)") + ylab("removed size of sponge (cm^2)") + ggtitle(paste("model 2_6 WAIC:", w26)) ggsave(plt_fig6_model2_6_removed2, file="plt_fig6_model2_6_removed2.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig6_model2_6_removed2 ``` ## trimming, model 2_7 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } } data { int N; int K; // number of individuals int Removed[N]; real Choice[N]; real C_width[N]; int ID[N]; } parameters { real a1; real a2; real b1; real b2; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N){ q[n] = inv_logit(a1 + a2*C_width[n]); lambda[n] = b1 + b2*C_width[n]; } } model { for (n in 1:N){ Removed[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { vector[N] log_lik; for (n in 1:N) { log_lik[n] = ZIP_lpmf(Removed[n] | q[n], lambda[n]); } } ``` ### sampling ```{r} # fit <- stan(file='choice_and_cutting_no_RE_C_width.stan', ### # data=data, seed=1234, # chains=4, iter=3000, warmup=1000, thin=1 # ) # save(fit, file="choice_and_cutting_no_RE_C_width.rdata") load("choice_and_cutting_no_RE_C_width.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w27 w27 ``` ## trimming, model 2_8 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } } data { int N; int K; // number of individuals int Removed[N]; real Choice[N]; real C_width[N]; int ID[N]; } parameters { real a1; real b1; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N){ q[n] = inv_logit(a1); lambda[n] = b1; } } model { for (n in 1:N){ Removed[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { vector[N] log_lik; for (n in 1:N) { log_lik[n] = ZIP_lpmf(Removed[n] | q[n], lambda[n]); } } ``` ### sampling ```{r} # fit <- stan(file='choice_and_cutting_no_RE_constant.stan', ### # data=data, seed=1234, # chains=4, iter=3000, warmup=1000, thin=1 # ) # save(fit, file="choice_and_cutting_no_RE_constant.rdata") load("choice_and_cutting_no_RE_constant.rdata") ``` ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w28 w28 ``` ## comparing WAIC values in AIC scale ```{r} waicv_m2 <- c(w21, w22, w23, w24, w25, w26) * data$N waicv_m2<- waicv_m2 - min(waicv_m2) plot(waicv_m2, ylab=c("WAICs for models for sponge choice")) ``` # 3. cavity making ## preprocessing ```{r} d <- read_csv(file='Dromiidae.csv') d <- d %>% dplyr::select(shell_width, hole_px, id, cm_per_px) # select(c('shell_width', 'hole_px', 'id', 'cm_per_px')) d <- d[complete.cases(d), ] # calculate hole size d$hole_size <- d$hole_px * d$cm_per_px^2 / 1.15 # renumber animal id from 1 id <- as.factor(d$id) levels(id) <- as.character(1:length(levels(id))) d$id <- as.integer(id) data <- list(N=nrow(d), K=max(d$id), C_width=d$shell_width, C_width_new=seq(min(d$shell_width)-0.5, max(d$shell_width)+0.5, length.out=600), Hole_size=d$hole_size, ID=d$id) ``` The sample size of animals and acts are, $N_{animal}=$ `r data$K` and $N_{act}=$ `r data$N`, respectively. ## model 3_1 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(real Hole_size, real shape, real b, real C_w, real ak){ return(gamma_lpdf(Hole_size | shape, shape/exp(ak + b*C_w))); } real log_lik_Simpson_rest(real Hole_size, real shape, real b, real C_w, real ak_lower, real ak_upper, int M){ vector[M+1] lp; real h; h = (ak_upper - ak_lower)/M; lp[1] = f2(Hole_size, shape, b, C_w, ak_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Hole_size, shape, b, C_w, ak_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Hole_size, shape, b, C_w, ak_lower + h*2*m); lp[M+1] = f2(Hole_size, shape, b, C_w, ak_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; real C_width[N]; real C_width_new[600]; real Hole_size[N]; int ID[N]; } parameters { real shape; real a0; real b; real a_id[K]; real s_a; } transformed parameters { real a[K]; for (k in 1:K){ a[k] = a0 + a_id[k]; } } model { for (k in 1:K){ a_id[k] ~ normal(0, s_a); } for (n in 1:N){ Hole_size[n] ~ gamma(shape, shape / exp(a[ID[n]] + b*C_width[n])); } } generated quantities { real log_lik[N]; real log_lik_a; real pred_Y[600]; real paid; log_lik_a = log_lik_Simpson(a0, s_a, a0-5*s_a, a0+5*s_a, 100); paid = normal_rng(a0, s_a); for (n in 1:N) { log_lik[n] = log_lik_a + log_lik_Simpson_rest(Hole_size[n], shape, b, C_width[n], a0-5*s_a, a0+5*s_a, 100); } for (n in 1:600) { pred_Y[n] = gamma_rng(shape, shape / exp(paid + b*C_width_new[n])); } } ``` ### sampling ```{r} # fit <- stan(file='shell_hole2.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=2) # save(fit, file="shell_hole2.rdata") load("shell_hole2.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w31 w31 ``` ### plot posterior ```{r} stan_plot(fit, c("b")) ``` ### plot prediction This is the code for the figure 7. # revised plot ```{r} tic() ddd <- make_xyzy(data$C_width_new, ms$pred_Y, ylim=c(-1, max(data$Hole_size)+5), yn=500) toc() d_seg <- d %>% group_by(id) %>% summarise( hole_size_min=min(hole_size), hole_size_max=max(hole_size), shell_width=first(shell_width)) %>% drop_na() ``` ```{r} line_n <- 50 paid <- nth_dens(ms$paid, line_n) b <- nth_dens(ms$b, line_n) shape <- nth_dens(ms$shape, line_n) mu <- matrix(nrow=line_n, ncol=600) for (i in 1:line_n){ for (j in 1:600) { mu[i, j] <- exp(paid[i] + b[i]*data$C_width_new[j]) } } plot(mu[1,], type="l") for (i in 1:line_n) lines(mu[i,]) pred <- tibble() for (i in 1:line_n) { pred <- rbind(pred, tibble(n=i, mu=mu[i,], C_width_new=data$C_width_new)) } ``` ```{r} plt_fig7_model3_1 <- ggplot(ddd) + geom_raster(aes(x, y, fill=z)) + geom_point(aes(shell_width, hole_size), pch=1, col="white", data=d) + geom_line(aes(C_width_new, mu, group=n, alpha=1/n), col="white", data=pred) + # geom_point(aes(shell_width, hole_size, col=as.factor(id)), data=d) + geom_segment( aes(x = shell_width, y = hole_size_min, xend = shell_width, yend = hole_size_max), data = d_seg, alpha = 0.5, lty=3, col="white") + scale_x_continuous(expand=c(0, 0), limits=c(min(data$C_width_new), max(data$C_width_new))) + scale_y_continuous(expand=c(0, 0), limits=c(-1, max(data$Hole_size)+5)) + xlab("carapace width (cm)") + ylab("hole size (cm^2)") + scale_fill_viridis() + ggtitle(paste("model 3_1 WAIC:", w31)) + theme(legend.position = 'none') ggsave(plt_fig7_model3_1, file="plt_fig7_model3_1.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig7_model3_1 ``` ## model 3_2 ### Stan code ```{stan, output.var='dump', eval=FALSE} data { int N; real C_width[N]; real Hole_size[N]; real C_width_new[600]; } parameters { real shape; real a0; real b0; } model { for (n in 1:N){ Hole_size[n] ~ gamma(shape, shape / exp(a0 + b0*C_width[n])); } } generated quantities { real log_lik[N]; real pred_Y[100]; for (n in 1:N){ log_lik[n] = gamma_lpdf(Hole_size[n] | shape, shape/exp(a0+b0*C_width[n])); } for (n in 1:600) { pred_Y[n] = gamma_rng(shape, shape / exp(a0 + b0*C_width_new[n])); } } ``` ### sampling ```{r} # fit <- stan(file='shell_hole_gamma_no_RF.stan', ### # data=data, seed=1234, # chains=4, iter=6000, warmup=1000, thin=1) # save(fit, file="shell_hole_gamma_no_RF.rdata") load("shell_hole_gamma_no_RF.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w32 w32 ``` ```{r} tic() ddd <- make_xyzy(data$C_width_new, ms$pred_Y, ylim=c(0, max(data$Hole_size)+5), yn=500) toc() ``` ```{r} line_n <- 50 a <- nth_dens(ms$a0, line_n) b <- nth_dens(ms$b0, line_n) shape <- nth_dens(ms$shape, line_n) mu <- matrix(nrow=line_n, ncol=600) for (i in 1:line_n){ for (j in 1:600) { mu[i, j] <- exp(a[i] + b[i]*data$C_width_new[j]) } } plot(mu[1,], type="l") for (i in 1:line_n) lines(mu[i,]) pred <- tibble() for (i in 1:line_n) { pred <- rbind(pred, tibble(n=i, mu=mu[i,], C_width_new=data$C_width_new)) } ``` ```{r} plt_fig7_model3_2 <- ggplot(ddd) + geom_raster(aes(x, y, fill=z)) + geom_point(aes(shell_width, hole_size), pch=1, col="white", data=d) + geom_line(aes(C_width_new, mu, alpha=1/n, group=n), col="white", data=pred) + # geom_point(aes(shell_width, hole_size, col=as.factor(id)), data=d) + # geom_segment( # aes(x = shell_width, # y = hole_size_min, # xend = shell_width, # yend = hole_size_max), data = d_seg, alpha = 0.5, lty=3, col="white") + scale_x_continuous(expand=c(0, 0), limits=c(min(data$C_width_new), max(data$C_width_new))) + scale_y_continuous(expand=c(0, 0), limits=c(0, max(data$Hole_size)+5)) + scale_fill_viridis() + ggtitle(paste("model 3_2 WAIC:", w32)) + theme(legend.position = 'none') ggsave(plt_fig7_model3_2, file="plt_fig7_model3_2.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig7_model3_2 ``` ## model 3_3 ### Stan code ```{stan, output.var='dump', eval=FALSE} data { int N; real shell_width[N]; real hole_size[N]; real gender[N]; } parameters { real shape; real a0; real b0; real c0; } model { for (n in 1:N){ hole_size[n] ~ gamma(shape, shape / exp(a0 + b0*shell_width[n] + c0*gender[n])); } } generated quantities { real y_base_new[N]; real y_new[N]; vector[N] log_lik; for (n in 1:N){ y_base_new[n] = exp(a0 + b0 * shell_width[n] + c0*gender[n]); y_new[n] = gamma_rng(shape, shape / y_base_new[n]); } for (n in 1:N){ log_lik[n] = gamma_lpdf(hole_size[n]|shape, shape / exp(a0 + b0 * shell_width[n] + c0*gender[n])); } } ``` ### sampling ```{r} # fit <- stan(file='shell_hole_no_RF.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1) # save(fit, file='shell_hole_no_RF.rdata') ### load('shell_hole_no_RF.rdata') ### ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w33 w33 ``` ## model 3_4 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real f(real Y, real[] theta, real x){ return(normal_lpdf(x | theta[1], theta[2]) + gamma_lpdf(Y | theta[3], theta[3]/exp(x))); } real log_lik_Simpson(real Y, real[] theta, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(Y, theta, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(Y, theta, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(Y, theta, a + h*2*m); lp[M+1] = f(Y, theta, b); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; real hole_size[N]; int id[N]; } parameters { real shape; real a0; real a_id[K]; real s_a; } transformed parameters { real a[K]; real theta[3]; for (k in 1:K){ a[k] = a0 + a_id[k]; } theta[1] = a0; theta[2] = s_a; theta[3] = shape; } model { for (k in 1:K){ a_id[k] ~ normal(0, s_a); } for (n in 1:N){ hole_size[n] ~ gamma(shape, shape / exp(a[id[n]])); } } generated quantities { real log_lik[N]; for (n in 1:N){ log_lik[n] = log_lik_Simpson(hole_size[n], theta, a0-5*s_a, a0+5*s_a, 100); } } ``` ### sampling ```{r} # fit <- stan(file='shell_hole_constant.stan', ### # data=data, seed=1234, # chains=4, iter=2000, warmup=500, thin=1) # save(fit, file='shell_hole_constant.rdata') load('shell_hole_constant.rdata') ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w34 w34 ``` ## model 3_5 ### Stan code ```{stan, output.var='dump', eval=FALSE} data { int N; real C_width[N]; real Hole_size[N]; } parameters { real a; real b; real sigma; } model { for (n in 1:N){ Hole_size[n] ~ normal(a + b * C_width[n], sigma); } } generated quantities { real log_lik[N]; for (n in 1:N){ log_lik[n] = normal_lpdf(Hole_size[n] | a + b*C_width[n], sigma); } } ``` ### sampling ```{r} # fit <- stan(file='shell_hole_linear.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1) # save(fit, file='shell_hole_linear.rdata') load('shell_hole_linear.rdata') ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w35 w35 ``` ## model 3_6 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(real Hole_size, real shape, real C_w, real ak, real bk){ return(gamma_lpdf(Hole_size | shape, shape/exp(ak + bk*C_w))); } real log_lik_Simpson_ak(real Hole_size, real shape, real C_w, real ak_lower, real ak_upper, real bk, int M){ vector[M+1] lp; real h; h = (ak_upper - ak_lower)/M; lp[1] = f2(Hole_size, shape, C_w, ak_lower, bk); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Hole_size, shape, C_w, ak_lower + h*(2*m-1), bk); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Hole_size, shape, C_w, ak_lower + h*2*m, bk); lp[M+1] = f2(Hole_size, shape, C_w, ak_upper, bk); return(log(h/3) + log_sum_exp(lp)); } real log_lik_Simpson_rest(real Hole_size, real shape, real C_w, real ak_lower, real ak_upper, real bk_lower, real bk_upper, int M){ vector[M+1] lp; real h; h = (bk_upper - bk_lower)/M; lp[1] = log_lik_Simpson_ak(Hole_size, shape, C_w, ak_lower, ak_upper, bk_lower, M); for (m in 1:(M/2)) lp[2*m] = log(4) + log_lik_Simpson_ak(Hole_size, shape, C_w, ak_lower, ak_upper, bk_lower + h*(2*m-1), M); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + log_lik_Simpson_ak(Hole_size, shape, C_w, ak_lower, ak_upper, bk_lower + h*2*m, M); lp[M+1] = log_lik_Simpson_ak(Hole_size, shape, C_w, ak_lower, ak_upper, bk_upper, M); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; real C_width[N]; real C_width_new[600]; real Hole_size[N]; int ID[N]; } parameters { real shape; real a0; real b0; vector[K] a; vector[K] b; real s_a; real s_b; } model { s_a ~ student_t(4, 0, 5); s_b ~ student_t(4, 0, 5); a ~ normal(a0, s_a); b ~ normal(b0, s_b); for (n in 1:N){ Hole_size[n] ~ gamma(shape, shape / exp(a[ID[n]] + b[ID[n]]*C_width[n])); } } generated quantities { real log_lik[N]; real log_lik_a; real log_lik_b; real pa; real pb; real pHole_size[600]; log_lik_a = log_lik_Simpson(a0, s_a, a0-5*s_a, a0+5*s_a, 100); log_lik_b = log_lik_Simpson(b0, s_b, b0-5*s_b, b0+5*s_b, 100); pa = normal_rng(a0, s_a); pb = normal_rng(b0, s_b); for (n in 1:600) { pHole_size[n] = gamma_rng(shape, shape/exp(pa + pb*C_width_new[n])); } for (n in 1:N) { log_lik[n] = log_lik_a + log_lik_b + log_lik_Simpson_rest(Hole_size[n], shape, C_width[n], a0-5*s_a, a0+5*s_a, b0-5*s_b, b0+5*s_b, 100); } } ``` ### sampling ```{r} # fit <- stan(file='shell_hole.stan', ### # data=data, seed=1234, # chains=4, iter=14000, warmup=4000, thin=2) # # save(fit, file="shell_hole.rdata") load("shell_hole.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w36 w36 ``` # plot ```{r} tic() ddd <- make_xyzy(data$C_width_new, ms$pHole_size, ylim=c(0, max(data$Hole_size)+5), yn=500) toc() ``` ```{r} plt_fig7_model3_6 <- ggplot(ddd) + geom_raster(aes(x, y, fill=z)) + geom_point(aes(shell_width, hole_size), pch=1, col="white", data=d) + # geom_point(aes(shell_width, hole_size, col=as.factor(id)), data=d) + geom_segment( aes(x = shell_width, y = hole_size_min, xend = shell_width, yend = hole_size_max), data = d_seg, alpha = 0.5, lty=3, col="white") + scale_x_continuous(expand=c(0, 0), limits=c(min(data$C_width_new), max(data$C_width_new))) + scale_y_continuous(expand=c(0, 0), limits=c(0, max(data$Hole_size)+5)) + scale_fill_viridis() + ggtitle(paste("model 3_6 WAIC:", w36)) + theme(legend.position = 'none') ggsave(plt_fig7_model3_6, file="plt_fig7_model3_6.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig7_model3_6 ``` ## comparing WAIC values in AIC scale ```{r} waicv_m3 <- c(w31, w32, w33, w34, w35, w36) * data$N waicv_m3<- waicv_m3 - min(waicv_m3) plot(waicv_m3, ylab=c("WAICs for models for sponge choice")) ``` # 4. days to carrying ## preprocessing ```{r} d <- read_csv("Dromiidae.csv") d$start_date <- as_date(d$start_date) d$end_date <- as_date(d$end_date) d <- d %>% mutate(days_to_choice=end_date-start_date) %>% dplyr::select("id", "gender", "days_to_choice", "shell_width", "choice") %>% dplyr::filter(choice != "no_choice") d$choice <- as.factor(d$choice) levels(d$choice) <- c(2,1,3) # label M,L,No, as 1,2,3 d$choice <- as.integer(as.character(d$choice)) d$gender <- as.integer(as.factor(d$gender)) # renumber animal id from 1 id <- as.factor(d$id) levels(id) <- as.character(1:length(levels(id))) d$id <- as.integer(id) d$days_to_choice <- as.integer(d$days_to_choice) - 1 d <- add_chosen_col_days(d) data <- list(N=nrow(d), K=length(unique(d$id)), Days=d$days_to_choice, C_width=d$shell_width, C_width_new=seq(min(d$shell_width), max(d$shell_width), length.out=600), Choice=d$choice, ID=d$id) ``` The sample size of animals and acts are, $N_{animal}=$ `r data$K` and $N_{act}=$ `r data$N`, respectively. ## model 4_1 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } int myZIP_rng(real q, real lambda) { int choice; choice = bernoulli_rng(q); if (choice==0) { return( 0 ); } else { return( poisson_log_rng(lambda) ); } } real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Days, real a, real bk){ real q; real lambda; q = inv_logit(a); lambda = bk; if (Days == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Days | lambda); } } real log_lik_Simpson_rest(int Days, real a, real bk_lower, real bk_upper, int M) { vector[M+1] lp; real h; h = (bk_upper - bk_lower)/M; lp[1] = f2(Days, a, bk_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Days, a, bk_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Days, a, bk_lower + h*2*m); lp[M+1] = f2(Days, a, bk_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; real C_width[N]; // real C_width_new[600]; int Days[N]; int Choice[N]; int ID[N]; } parameters { real a; real b0; real rb[K]; real s_b; } transformed parameters { real q[N]; real lambda[N]; real b[K]; for (k in 1:K){ b[k] = b0 + rb[k]; } for (n in 1:N) { q[n] = inv_logit(a); lambda[n] = b[ID[n]]; } } model { for (k in 1:K) { rb[k] ~ normal(0, s_b); } for (n in 1:N) { Days[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik[N]; real log_lik_b; real pp[600]; real pb; real plambda[600]; real pDays[600]; log_lik_b = log_lik_Simpson(b0, s_b, b0-5*s_b, b0+5*s_b, 100); for (n in 1:N){ log_lik[n] = log_lik_b + log_lik_Simpson_rest(Days[n], a, b0-5*s_b, b0+5*s_b, 100); } pb = normal_rng(b0, s_b); for (n in 1:600){ pp[n] = inv_logit(a); plambda[n] = pb; pDays[n] = myZIP_rng(pp[n], plambda[n]); } } ``` ### sampling ```{r} # fit <- stan(file='days_constant2.stan', ### # data=data, seed=1234, # chains=4, iter=6000, warmup=2000, thin=1) # save(fit, file="days_constant2.rdata") load("days_constant2.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w41 w41 ``` ```{r} pdays <- make_xyzy(seq(min(data$C_width), max(data$C_width), length.out=600), ms$pDays, ylim=c(-0.2, 6)) ``` ```{r} d_seg <- d %>% group_by(id) %>% summarise( dates_min=min(days_to_choice), dates_max=max(days_to_choice), shell_width=first(shell_width)) %>% drop_na() line_n <- 500 plambda <- nth_dens(ms$pb, line_n) pred <- tibble() for (i in 1:line_n) { pred <- rbind(pred, tibble(n=i, C_width_new=data$C_width_new, plambda=plambda[i])) } ``` ```{r} plt_fig8_model4_1 <- ggplot(pdays) + geom_raster(aes(x, y, fill=z)) + geom_point(aes(shell_width, days_to_choice, size=chosen_num ), alpha=1, pch=0, col="white", data=d) + geom_segment(aes(x=shell_width, xend=shell_width, y=dates_min, yend=dates_max), lty=3, col="white", data=d_seg) + # geom_line(aes(C_width_new, exp(plambda), group=n, alpha=1/n), col="white", lty=2, data=pred) + scale_x_continuous(expand=c(0,0)) + scale_y_continuous(expand=c(0,0), limits=c(-0.2, 6)) + xlab("carapace width (cm)") + ylab("time for making cap (days)") + theme(legend.position='none') + ggtitle(paste("model 4_1 WAIC:", w41)) + scale_fill_viridis() ggsave(plt_fig8_model4_1, file="plt_fig8_model4_1.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig8_model4_1 ``` ## model 4_2 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } int myZIP_rng(real q, real lambda) { int choice; choice = bernoulli_rng(q); if (choice==0) { return( 0 ); } else { return( poisson_log_rng(lambda) ); } } } data { int N; int K; real C_width[N]; int Days[N]; int Choice[N]; } parameters { real a; real b; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N) { q[n] = inv_logit(a); lambda[n] = b; } } model { for (n in 1:N) { Days[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik[N]; int pDays[600]; for (n in 1:N){ log_lik[n] = ZIP_lpmf(Days[n] | q[n], lambda[n]); } for (n in 1:600) { pDays[n] = myZIP_rng(inv_logit(a), b); } } ``` ### sampling ```{r} # fit <- stan(file='days_no_RE_constant2.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1) # save(fit, file="days_no_RE_constant2.rdata") load(file="days_no_RE_constant2.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w42 w42 ``` ```{r} pdays <- make_xyzy(seq(min(data$C_width), max(data$C_width), length.out=600), ms$pDays, ylim=c(-0.2, 6)) ``` ```{r} d_seg <- d %>% group_by(id) %>% summarise( dates_min=min(days_to_choice), dates_max=max(days_to_choice), shell_width=first(shell_width)) %>% drop_na() ``` ```{r} plt_fig8_model4_2 <- ggplot(pdays) + geom_raster(aes(x, y, fill=z)) + geom_point(aes(shell_width, days_to_choice, size=chosen_num ), alpha=1, pch=0, col="white", data=d) + # geom_segment(aes(x=shell_width, xend=shell_width, # y=dates_min, yend=dates_max), lty=3, col="white", data=d_seg) + scale_x_continuous(expand=c(0,0)) + scale_y_continuous(expand=c(0,0)) + xlab("carapace width (cm)") + ylab("time for making cap (days)") + theme(legend.position='none') + ggtitle(paste("model 4_2 WAIC:", w42)) + scale_fill_viridis() ggsave(plt_fig8_model4_2, file="plt_fig8_model4_2.pdf", width=4*3/2, height=3*3/2, useDingbats=F) plt_fig8_model4_2 ``` ## model 4_3 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } } data { int N; int K; real C_width[N]; int Days[N]; int Choice[N]; } parameters { real a; real b1; real b2; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N) { q[n] = inv_logit(a); lambda[n] = b1 + b2*Choice[n]; } } model { for (n in 1:N) { Days[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik[N]; for (n in 1:N){ log_lik[n] = ZIP_lpmf(Days[n] | q[n], lambda[n]); } } ``` ### sampling ```{r} # fit <- stan(file='days_no_RE_no_C_width2.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1) # save(fit, file="days_no_RE_no_C_width2.rdata") load("days_no_RE_no_C_width2.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w43 w43 ``` ## model 4_4 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } } data { int N; int K; real C_width[N]; int Days[N]; int Choice[N]; } parameters { real a; real b1; real b2; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N) { q[n] = inv_logit(a); lambda[n] = b1 + b2*C_width[n]; } } model { for (n in 1:N) { Days[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik[N]; for (n in 1:N){ log_lik[n] = ZIP_lpmf(Days[n] | q[n], lambda[n]); } } ``` ### sampling ```{r} # fit <- stan(file='days_no_RE_C_width2.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1) # save(fit, file="days_no_RE_C_width2.rdata") load("days_no_RE_C_width2.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w44 w44 ``` ## model 4_5 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } } data { int N; int K; real C_width[N]; int Days[N]; int Choice[N]; } parameters { real a1; real a2; real a3; real b1; real b2; real b3; } transformed parameters { real q[N]; real lambda[N]; for (n in 1:N) { q[n] = inv_logit(a1 + a2*C_width[n] + a3*Choice[n]); lambda[n] = b1 + b2*C_width[n] + b3*Choice[n]; } } model { for (n in 1:N) { Days[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik[N]; for (n in 1:N){ log_lik[n] = ZIP_lpmf(Days[n] | q[n], lambda[n]); } } ``` ### sampling ```{r} # fit <- stan(file='days_no_RE2.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1) # save(fit, file="days_no_RE2.rdata") load("days_no_RE2.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w45 w45 ``` ## mode 4_6 ### Stan code ```{stan, output.var='dump', eval=FALSE} functions { real ZIP_lpmf(int Removed, real q, real lambda) { if (Removed == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Removed | lambda); } } int myZIP_rng(real q, real lambda) { int choice; choice = bernoulli_rng(q); if (choice==0) { return( 0 ); } else { return( poisson_log_rng(lambda) ); } } real f(real mu, real s, real x){ return(normal_lpdf(x | mu, s)); } real log_lik_Simpson(real mu, real s, real a, real b, int M) { vector[M+1] lp; real h; h = (b-a)/M; lp[1] = f(mu, s, a); for (m in 1:(M/2)) lp[2*m] = log(4) + f(mu, s, a + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f(mu, s, a + h*2*m); lp[M+1] = f(mu, s, b); return(log(h/3) + log_sum_exp(lp)); } real f2(int Days, real a, real b2, real C_width, real bk){ real q; real lambda; q = inv_logit(a); lambda = bk + b2*C_width; if (Days == 0) { return log_sum_exp( bernoulli_lpmf(0 | q), bernoulli_lpmf(1 | q) + poisson_log_lpmf(0 | lambda) ); } else { return bernoulli_lpmf(1 | q) + poisson_log_lpmf(Days | lambda); } } real log_lik_Simpson_rest(int Days, real a, real b2, real C_width, real bk_lower, real bk_upper, int M) { vector[M+1] lp; real h; h = (bk_upper - bk_lower)/M; lp[1] = f2(Days, a, b2, C_width, bk_lower); for (m in 1:(M/2)) lp[2*m] = log(4) + f2(Days, a, b2, C_width, bk_lower + h*(2*m-1)); for (m in 1:(M/2-1)) lp[2*m+1] = log(2) + f2(Days, a, b2, C_width, bk_lower + h*2*m); lp[M+1] = f2(Days, a, b2, C_width, bk_upper); return(log(h/3) + log_sum_exp(lp)); } } data { int N; int K; real C_width[N]; int Days[N]; int Choice[N]; int ID[N]; real C_width_new[600]; } parameters { real a; vector[K] b1w; real b0; real s_b; real b2; } transformed parameters { real q[N]; real lambda[N]; vector[K] b1; b1 = b0+b1w*s_b; for (n in 1:N) { q[n] = inv_logit(a); lambda[n] = b1[ID[n]] + b2*C_width[n]; } } model { b1w ~ normal(0, 1); for (n in 1:N) { Days[n] ~ ZIP(q[n], lambda[n]); } } generated quantities { real log_lik[N]; real log_lik_b; real pp[600]; real pb; real plambda[600]; real pDays[600]; log_lik_b = log_lik_Simpson(b0, s_b, b0-5*s_b, b0+5*s_b, 100); for (n in 1:N){ log_lik[n] = log_lik_b + log_lik_Simpson_rest(Days[n], a, b2, C_width[n], b0-5*s_b, b0+5*s_b, 100); } pb = normal_rng(b0, s_b); for (n in 1:600){ pp[n] = inv_logit(a); plambda[n] = pb + b2*C_width_new[n]; pDays[n] = myZIP_rng(pp[n], plambda[n]); } } ``` ### sampling ```{r} # fit <- stan(file='days_C_width2.stan', ### # data=data, seed=1234, # chains=4, iter=4000, warmup=1000, thin=1) # save(fit, file="days_C_width2.rdata") load("days_C_width2.rdata") ``` ### WAIC ```{r} ms <- rstan::extract(fit) ms$log_lik %>% waic() -> w46 w46 ``` ## comparing WAIC values in AIC scale ```{r} waicv_m4 <- c(w41, w42, w43, w44, w45, w46) * data$N waicv_m4<- waicv_m4 - min(waicv_m4) plot(waicv_m4, ylab=c("WAICs for models for sponge choice")) ``` # session info ```{r} sessionInfo() ```