# R-code used for statistical analyses # in "Honey bees communicate distance via non-linear waggle duration functions" # by Patrick L. Kohl and Benjamin Rutschmann # renaming of the data set ("data_figure3" provided with publication): Feedertraining_Steigerwald <- data_figure3 # packages needed: library(segmented) library(bbmle) library(ggplot2) library(cowplot) #### Analysis of waggle duration vs. feeder distance ## the linear model: wd_lm <- lm(waggle_duration ~ distance_km, data=Feedertraining_Steigerwald) summary(wd_lm) coef(wd_lm) ## the non-linear model based on von Frisch and Kratky (1962) wd_Frischformula <- nls(waggle_duration ~ a+ (b/c)*(1- exp(-c*distance_km)),start=list(a=0.5,b=2,c=1), data=Feedertraining_Steigerwald) summary(wd_Frischformula) coef(wd_Frischformula) ## the break-point model, starting value of the break-point set to 0.9 km: wd_step <- segmented(wd_lm, seg.Z = ~ distance_km, psi = 0.9) summary(wd_step) #The slopes of the two lines of the break-point model: slope(wd_step) #$distance_km #Est. St.Err. t value CI(95%).l CI(95%).u #slope1 1.42820 0.14121 10.1140 1.14490 1.7116 #slope2 0.66826 0.17928 3.7274 0.30851 1.0280 #The break-point: wd_step$psi #Initial Est. St.Err #psi1.distance_km 0.9 1.032837 0.1512069 # Calculation of the y-intercept of the second line: # formula line one: y=b0 + b1 * x # formula line two: y=c0 + c1 * x # intercept of line two: c0 = b0 + b1 * breakpoint_x - c1 * breakpoint_x # 1.076674 = 0.2917599+ 1.4282198*1.032837 - 0.6682608*1.032837 # Formulas: # line one: y= 0.2917599+ 1.4282198x # line two: y= 1.076674 + 0.6682608x # breakpoint: x= 1.032837 y= 1.766878 ## Plots of fitted values versus residuals to check for prediction biases: wd_check <- data.frame(wd_lm_fitted=fitted(wd_lm), wd_lm_residuals=residuals(wd_lm), wd_Frischformula_fitted=fitted(wd_Frischformula), wd_Frischformula_residuals=residuals(wd_Frischformula), wd_step_fitted=fitted(wd_step), wd_step_residuals=residuals(wd_step)) ggplot(wd_check, aes(x=wd_lm_fitted, y=wd_lm_residuals)) + geom_point()+ geom_smooth() ggplot(wd_check, aes(x=wd_Frischformula_fitted, y=wd_Frischformula_residuals)) + geom_point()+ geom_smooth() ggplot(wd_check, aes(x=wd_step_fitted, y=wd_step_residuals)) + geom_point()+ geom_smooth() ## Calculation of Pearson's r to compare goodness-of-fit: wd_fit_comparison <- data.frame(wd=Feedertraining_Steigerwald$waggle_duration, distance_km = Feedertraining_Steigerwald$distance_km, predict_lm = predict(wd_lm), predict_Frischformula = predict(wd_Frischformula), predict_step = predict(wd_step)) cor(wd_fit_comparison$predict_lm, wd_fit_comparison$wd) #[1] 0.935186 cor(wd_fit_comparison$predict_Frischformula, wd_fit_comparison$wd) #[1] 0.946075 cor(wd_fit_comparison$predict_step, wd_fit_comparison$wd) #[1] 0.946944 ## Comparison of AICc values: AICctab(wd_step, wd_lm, wd_Frischformula) #dAICc df #wd_Frischformula 0.0 4 #wd_step 1.5 5 #wd_lm_nls 7.7 3 #### Analysis of return duarion versus feeder distance: ## the linear model: rd_lm <- lm(return_duration ~ distance_km, data=Feedertraining_Steigerwald) summary(rd_lm) coef(rd_lm) ## the non-linear model based on von Frisch and Kratky (1962): rd_Frischformula <- nls(return_duration ~ a + (b/c)*(1- exp(-c*distance_km)),start=list(a=0.5,b=2,c=1), data=Feedertraining_Steigerwald) summary(rd_Frischformula) coef(rd_Frischformula) ## the break-point model, starting value of the break-point set to 0.9 km: rd_step <- segmented(rd_lm, seg.Z = ~ distance_km, psi = 0.9) summary(rd_step) #The slopes of the two lines of the break-point model: slope(rd_step) #$distance_km #Est. St.Err. t value CI(95%).l CI(95%).u #slope1 0.86134 0.61238 1.4065 -0.367490 2.09020 #slope2 0.37249 0.17724 2.1016 0.016826 0.72815 #The break-point: rd_step$psi #Initial Est. St.Err #psi1.distance_km 0.9 0.7000177 0.5850156 # Calculation of the y-intercept of the second line: # formula line one: y=b0 + b1 * x # formula line two: y=c0 + c1 * x # intercept of line two: c0 = b0 + b1 * breakpoint_x - c1 * breakpoint_x # 1.572504 = 1.2303 + 0.86134 * 0.7000177 - 0.37249 * 0.7000177 # Formulas: # line one: y= 1.2303 + 0.86134 x # line two: y= 1.572504 + 0.37249 x # breakpoint: x= 0.7000177 y= 1.833253 ## Plots of fitted values versus residuals to check for prediction biases: rd_check <- data.frame(rd_lm_fitted=fitted(rd_lm), rd_lm_residuals=residuals(rd_lm), rd_Frischformula_fitted=fitted(rd_Frischformula), rd_Frischformula_residuals=residuals(rd_Frischformula), rd_step_fitted=fitted(rd_step), rd_step_residuals=residuals(rd_step)) ggplot(rd_check, aes(x=rd_lm_fitted, y=rd_lm_residuals)) + geom_point()+ geom_smooth() ggplot(rd_check, aes(x=rd_Frischformula_fitted, y=rd_Frischformula_residuals)) + geom_point()+ geom_smooth() ggplot(rd_check, aes(x=rd_step_fitted, y=rd_step_residuals)) + geom_point()+ geom_smooth() ## Calculation of Pearson's r to compare goodness-of-fit: rd_fit_comparison <- data.frame(rd=Feedertraining_Steigerwald$return_duration, distance_km = Feedertraining_Steigerwald$distance_km, predict_lm = predict(rd_lm), predict_step = predict(rd_step), predict_Frischformula = predict(rd_Frischformula)) cor(rd_fit_comparison$predict_lm, rd_fit_comparison$rd) #[1] 0.5824713 cor(rd_fit_comparison$predict_Frischformula, rd_fit_comparison$rd) #[1] 0.5871784 cor(rd_fit_comparison$predict_step, rd_fit_comparison$rd) #[1] 0.5958169 ##Comparison of AICc values: AICctab(rd_lm, rd_Frischformula, rd_step) #dAICc df #rd_lm 0.0 3 #rd_Frischformula 1.9 4 #rd_step 3.4 5 #### Analysis of circuit duarion versus feeder distance ## the linear model: cd_lm <- lm(circuit_duration ~ distance_km, data=Feedertraining_Steigerwald) summary(cd_lm) coef(cd_lm) ## the non-linear model based on von Frisch and Kratky (1962): cd_Frischformula <- nls(circuit_duration ~ a + (b/c)*(1- exp(-c*distance_km)),start=list(a=0.5,b=2,c=1), data=Feedertraining_Steigerwald) summary(cd_Frischformula) coef(cd_Frischformula) ## the break-point model, starting value of the break-point set to 0.9 km (since the break-point of the waggle duration model was estimated to be at 1 km): cd_step <- segmented(cd_lm, seg.Z = ~ distance_km, psi = 0.9) summary(cd_step) #The slopes of the two lines of the break-point model: slope(cd_step) #Est. St.Err. t value CI(95%).l CI(95%).u #slope1 2.4201 0.38388 6.3043 1.64980 3.1904 #slope2 1.2889 0.26706 4.8263 0.75301 1.8248 #The break-point: cd_step$psi # Initial Est. St.Err #psi1.distance_km 0.9 0.7000006 0.2037305 # Calculation of the y-intercept of the second line: # formula line one: y=b0 + b1 * x # formula line two: y=c0 + c1 * x # intercept of line two: c0 = b0 + b1 * breakpoint_x - c1 * breakpoint_x # 2.284411 = 1.492571+ 2.420096*0.7000006 - 1.288897*0.7000006 #Formulas: # line one: y= 1.492571 + 2.420096x # line two: y= 2.284411 + 1.288897x # breakpoint x= 0.7000006 y= 3.18664 ## Plots of fitted values versus residuals to check for prediction biases: cd_check <- data.frame(cd_lm_fitted=fitted(cd_lm), cd_lm_residuals=residuals(cd_lm), cd_Frischformula_fitted=fitted(cd_Frischformula), cd_Frischformula_residuals=residuals(cd_Frischformula), cd_step_fitted=fitted(cd_step), cd_step_residuals=residuals(cd_step)) ggplot(cd_check, aes(x=cd_lm_fitted, y=cd_lm_residuals)) + geom_point()+ geom_smooth() ggplot(cd_check, aes(x=cd_Frischformula_fitted, y=cd_Frischformula_residuals)) + geom_point()+ geom_smooth() ggplot(cd_check, aes(x=cd_step_fitted, y=cd_step_residuals)) + geom_point()+ geom_smooth() ## Calculation of Pearson's r to compare goodness-of-fit: cd_fit_comparison <- data.frame(cd=Feedertraining_Steigerwald$circuit_duration, distance_km = Feedertraining_Steigerwald$distance_km, predict_lm = predict(cd_lm), predict_Frischformula = predict(cd_Frischformula), predict_step = predict(cd_step)) cor(cd_fit_comparison$predict_lm, cd_fit_comparison$cd) #[1] 0.8943298 cor(cd_fit_comparison$predict_Frischformula, cd_fit_comparison$cd) #[1] 0.9037041 cor(cd_fit_comparison$predict_step, cd_fit_comparison$cd) #[1] 0.9055953 ## Comparison of AICc values: AICctab(cd_lm, cd_Frischformula, cd_step) #dAICc df #cd_Frischformula 0.0 4 #cd_step 1.4 5 #cd_lm 2.6 3 #### Composite figure of the data ("figure 3"): prediction_wd <- data.frame(distance_km=seq(0.1, 1.7, length.out=100)) prediction_wd$waggle_duration <- predict(wd_Frischformula, prediction_wd) prediction_rd <- data.frame(distance_km=seq(0.1, 1.7, length.out=100)) prediction_rd$return_duration <- predict(rd_lm, prediction_rd) prediction_cd <- data.frame(distance_km=seq(0.1, 1.7, length.out=100)) prediction_cd$circuit_duration <- predict(cd_Frischformula, prediction_cd) # auxiliary function to make tick marks without labels: every_nth <- function(x, nth, empty = TRUE, inverse = FALSE) { if (!inverse) { if(empty) { x[1:nth == 1] <- "" x } else { x[1:nth != 1] } } else { if(empty) { x[1:nth != 1] <- "" x } else { x[1:nth == 1] } } } wd_plot <- ggplot(Feedertraining_Steigerwald, aes(x = distance_km, y = waggle_duration)) + geom_point(aes(x=distance_km, y= mean_waggle_duration), size=2.5)+ geom_point(colour="#F24D29", shape=1, size=1.5, position=position_jitter(width=.005, height=0))+ labs(x="", y="Duration [s]")+ theme_classic()+ scale_x_continuous(breaks= seq(0.1,1.7, by=0.2),labels=every_nth(seq(0.1,1.7, by=0.2),2,inverse = TRUE), expand=c(0,0))+ scale_y_continuous(breaks= seq(0,5, by=1),expand=c(0,0))+ coord_cartesian(xlim = c(0,1.75), ylim = c(0,5.15))+ geom_line(data=prediction_wd, lwd=0.7, lineend="round", colour="#F24D29")+ annotation_raster(wd_comic, 0.1, 0.653, 4, 5) rd_plot <- ggplot(Feedertraining_Steigerwald, aes(x = distance_km, y = return_duration)) + geom_point(aes(x=distance_km, y= mean_return_duration), size=2.5)+ geom_point(colour="#1C366B", shape=1, size=1.5, position=position_jitter(width=.005, height=0))+ labs(x="Feeder distance [km]", y="")+ theme_classic()+ scale_x_continuous(breaks= seq(0.1,1.7, by=0.2),labels=every_nth(seq(0.1,1.7, by=0.2),2,inverse = TRUE), expand=c(0,0))+ scale_y_continuous(breaks= seq(0,5, by=1),expand=c(0,0))+ coord_cartesian(xlim = c(0,1.75), ylim = c(0,5.15))+ geom_line(data=prediction_rd, lwd=0.7, lineend="round", colour="#1C366B")+ annotation_raster(rd_comic, 0.1, 0.653, 4, 5) cd_plot <- ggplot(Feedertraining_Steigerwald, aes(x = distance_km, y = circuit_duration)) + geom_point(aes(x=distance_km, y= mean_circuit_duration), size=2.5)+ geom_point(colour="#1DACE8", shape=1, size=1.5, position=position_jitter(width=.005, height=0))+ labs(x="", y="")+ theme_classic()+ scale_x_continuous(breaks= seq(0.1,1.7, by=0.2),labels=every_nth(seq(0.1,1.7, by=0.2),2,inverse = TRUE), expand=c(0,0))+ scale_y_continuous(breaks= seq(0,5, by=1),expand=c(0,0))+ coord_cartesian(xlim = c(0,1.75), ylim = c(0,5.15))+ geom_line(data=prediction_cd, lwd=0.7, lineend="round", colour="#1DACE8")+ annotation_raster(cd_comic, 0.1, 0.653, 4, 5) plot_grid(wd_plot, rd_plot, cd_plot, ncol = 3, nrow = 1, align = "h", rel_widths = c(1, 1, 1), labels=c("A","B","C")) ggsave("Figure_3.pdf", width=18, height=9, unit="cm", dpi=600)