# R-code used for statistical analyses 
# in "Honey bees communicate distance via non-linear waggle duration functions"
# by Patrick L. Kohl and Benjamin Rutschmann

# renaming of the data set ("data_figure3" provided with publication):
Feedertraining_Steigerwald <- data_figure3

# packages needed:
library(segmented)
library(bbmle)
library(ggplot2)
library(cowplot)

#### Analysis of waggle duration vs. feeder distance

## the linear model:
wd_lm <- lm(waggle_duration ~ distance_km, data=Feedertraining_Steigerwald)
summary(wd_lm)
coef(wd_lm)

## the non-linear model based on von Frisch and Kratky (1962)
wd_Frischformula <- nls(waggle_duration ~ a+ (b/c)*(1- exp(-c*distance_km)),start=list(a=0.5,b=2,c=1), data=Feedertraining_Steigerwald)
summary(wd_Frischformula)
coef(wd_Frischformula)

## the break-point model, starting value of the break-point set to 0.9 km:
wd_step <- segmented(wd_lm, seg.Z = ~ distance_km, psi = 0.9)
summary(wd_step)

#The slopes of the two lines of the break-point model:
slope(wd_step)
#$distance_km
#Est. St.Err. t value CI(95%).l CI(95%).u
#slope1 1.42820 0.14121 10.1140   1.14490    1.7116
#slope2 0.66826 0.17928  3.7274   0.30851    1.0280

#The break-point:
wd_step$psi
#Initial     Est.    St.Err
#psi1.distance_km     0.9 1.032837 0.1512069

# Calculation of the y-intercept of the second line:
# formula line one: y=b0 + b1 * x
# formula line two: y=c0 + c1 * x
# intercept of line two: c0 = b0 + b1 * breakpoint_x - c1 * breakpoint_x
# 1.076674 = 0.2917599+ 1.4282198*1.032837 - 0.6682608*1.032837

# Formulas:
# line one: y= 0.2917599+ 1.4282198x
# line two: y= 1.076674 + 0.6682608x
# breakpoint: x= 1.032837 y= 1.766878

## Plots of fitted values versus residuals to check for prediction biases:

wd_check <- data.frame(wd_lm_fitted=fitted(wd_lm),
                       wd_lm_residuals=residuals(wd_lm),
                       wd_Frischformula_fitted=fitted(wd_Frischformula), 
                       wd_Frischformula_residuals=residuals(wd_Frischformula),
                       wd_step_fitted=fitted(wd_step),
                       wd_step_residuals=residuals(wd_step))

ggplot(wd_check, aes(x=wd_lm_fitted, y=wd_lm_residuals)) + geom_point()+ geom_smooth()
ggplot(wd_check, aes(x=wd_Frischformula_fitted, y=wd_Frischformula_residuals)) + geom_point()+ geom_smooth()
ggplot(wd_check, aes(x=wd_step_fitted, y=wd_step_residuals)) + geom_point()+ geom_smooth()

## Calculation of Pearson's r to compare goodness-of-fit:

wd_fit_comparison <- data.frame(wd=Feedertraining_Steigerwald$waggle_duration,
                                distance_km = Feedertraining_Steigerwald$distance_km,
                                predict_lm = predict(wd_lm),
                                predict_Frischformula = predict(wd_Frischformula),
                                predict_step = predict(wd_step))
 
cor(wd_fit_comparison$predict_lm, wd_fit_comparison$wd)
#[1] 0.935186

cor(wd_fit_comparison$predict_Frischformula, wd_fit_comparison$wd)
#[1] 0.946075

cor(wd_fit_comparison$predict_step, wd_fit_comparison$wd)
#[1] 0.946944

## Comparison of AICc values:

AICctab(wd_step, wd_lm,  wd_Frischformula)
#dAICc df
#wd_Frischformula 0.0   4 
#wd_step          1.5   5 
#wd_lm_nls        7.7   3 



#### Analysis of return duarion versus feeder distance:

## the linear model:
rd_lm <- lm(return_duration ~ distance_km, data=Feedertraining_Steigerwald)
summary(rd_lm)
coef(rd_lm)

## the non-linear model based on von Frisch and Kratky (1962):
rd_Frischformula <- nls(return_duration ~ a + (b/c)*(1- exp(-c*distance_km)),start=list(a=0.5,b=2,c=1), data=Feedertraining_Steigerwald)
summary(rd_Frischformula)
coef(rd_Frischformula)

## the break-point model, starting value of the break-point set to 0.9 km:
rd_step <- segmented(rd_lm, seg.Z = ~ distance_km, psi = 0.9)
summary(rd_step)

#The slopes of the two lines of the break-point model:
slope(rd_step)
#$distance_km
#Est. St.Err. t value CI(95%).l CI(95%).u
#slope1 0.86134 0.61238  1.4065 -0.367490   2.09020
#slope2 0.37249 0.17724  2.1016  0.016826   0.72815

#The break-point:
rd_step$psi
#Initial      Est.    St.Err
#psi1.distance_km     0.9 0.7000177 0.5850156

# Calculation of the y-intercept of the second line:
# formula line one: y=b0 + b1 * x
# formula line two: y=c0 + c1 * x
# intercept of line two: c0 = b0 + b1 * breakpoint_x - c1 * breakpoint_x
# 1.572504 = 1.2303 + 0.86134 * 0.7000177 - 0.37249 * 0.7000177

# Formulas:
# line one: y= 1.2303 + 0.86134 x
# line two: y= 1.572504 + 0.37249 x
# breakpoint: x= 0.7000177 y= 1.833253

## Plots of fitted values versus residuals to check for prediction biases:

rd_check <- data.frame(rd_lm_fitted=fitted(rd_lm),
                       rd_lm_residuals=residuals(rd_lm),
                       rd_Frischformula_fitted=fitted(rd_Frischformula), 
                       rd_Frischformula_residuals=residuals(rd_Frischformula),
                       rd_step_fitted=fitted(rd_step),
                       rd_step_residuals=residuals(rd_step))

ggplot(rd_check, aes(x=rd_lm_fitted, y=rd_lm_residuals)) + geom_point()+ geom_smooth()
ggplot(rd_check, aes(x=rd_Frischformula_fitted, y=rd_Frischformula_residuals)) + geom_point()+ geom_smooth()
ggplot(rd_check, aes(x=rd_step_fitted, y=rd_step_residuals)) + geom_point()+ geom_smooth()


## Calculation of Pearson's r to compare goodness-of-fit:

rd_fit_comparison <- data.frame(rd=Feedertraining_Steigerwald$return_duration,
                                distance_km = Feedertraining_Steigerwald$distance_km,
                                predict_lm = predict(rd_lm),
                                predict_step = predict(rd_step),
                                predict_Frischformula = predict(rd_Frischformula))

cor(rd_fit_comparison$predict_lm, rd_fit_comparison$rd)
#[1] 0.5824713

cor(rd_fit_comparison$predict_Frischformula, rd_fit_comparison$rd)
#[1] 0.5871784

cor(rd_fit_comparison$predict_step, rd_fit_comparison$rd)
#[1] 0.5958169

##Comparison of AICc values:

AICctab(rd_lm, rd_Frischformula, rd_step)
#dAICc df
#rd_lm            0.0   3 
#rd_Frischformula 1.9   4 
#rd_step          3.4   5 



#### Analysis of circuit duarion versus feeder distance

## the linear model:
cd_lm <- lm(circuit_duration ~ distance_km, data=Feedertraining_Steigerwald)
summary(cd_lm)
coef(cd_lm)

## the non-linear model based on von Frisch and Kratky (1962):
cd_Frischformula <- nls(circuit_duration ~ a + (b/c)*(1- exp(-c*distance_km)),start=list(a=0.5,b=2,c=1), data=Feedertraining_Steigerwald)
summary(cd_Frischformula)
coef(cd_Frischformula)

## the break-point model, starting value of the break-point set to 0.9 km (since the break-point of the waggle duration model was estimated to be at 1 km):
cd_step <- segmented(cd_lm, seg.Z = ~ distance_km, psi = 0.9)
summary(cd_step)

#The slopes of the two lines of the break-point model:
slope(cd_step)
#Est. St.Err. t value CI(95%).l CI(95%).u
#slope1 2.4201 0.38388  6.3043   1.64980    3.1904
#slope2 1.2889 0.26706  4.8263   0.75301    1.8248

#The break-point:
cd_step$psi
#                  Initial Est.    St.Err
#psi1.distance_km     0.9 0.7000006 0.2037305

# Calculation of the y-intercept of the second line:
# formula line one: y=b0 + b1 * x
# formula line two: y=c0 + c1 * x
# intercept of line two: c0 = b0 + b1 * breakpoint_x - c1 * breakpoint_x
# 2.284411 = 1.492571+ 2.420096*0.7000006 - 1.288897*0.7000006

#Formulas:
# line one: y= 1.492571 + 2.420096x
# line two: y= 2.284411 + 1.288897x
# breakpoint x= 0.7000006 y= 3.18664

## Plots of fitted values versus residuals to check for prediction biases:

cd_check <- data.frame(cd_lm_fitted=fitted(cd_lm),
                       cd_lm_residuals=residuals(cd_lm),
                       cd_Frischformula_fitted=fitted(cd_Frischformula), 
                       cd_Frischformula_residuals=residuals(cd_Frischformula),
                       cd_step_fitted=fitted(cd_step),
                       cd_step_residuals=residuals(cd_step))

ggplot(cd_check, aes(x=cd_lm_fitted, y=cd_lm_residuals)) + geom_point()+ geom_smooth()
ggplot(cd_check, aes(x=cd_Frischformula_fitted, y=cd_Frischformula_residuals)) + geom_point()+ geom_smooth()
ggplot(cd_check, aes(x=cd_step_fitted, y=cd_step_residuals)) + geom_point()+ geom_smooth()


## Calculation of Pearson's r to compare goodness-of-fit:

cd_fit_comparison <- data.frame(cd=Feedertraining_Steigerwald$circuit_duration,
                                distance_km = Feedertraining_Steigerwald$distance_km,
                                predict_lm = predict(cd_lm),
                                predict_Frischformula = predict(cd_Frischformula),
                                predict_step = predict(cd_step))

cor(cd_fit_comparison$predict_lm, cd_fit_comparison$cd)
#[1] 0.8943298

cor(cd_fit_comparison$predict_Frischformula, cd_fit_comparison$cd)
#[1] 0.9037041

cor(cd_fit_comparison$predict_step, cd_fit_comparison$cd)
#[1] 0.9055953

## Comparison of AICc values:

AICctab(cd_lm, cd_Frischformula, cd_step)
#dAICc df
#cd_Frischformula 0.0   4 
#cd_step          1.4   5 
#cd_lm            2.6   3 


#### Composite figure of the data ("figure 3"):

prediction_wd <- data.frame(distance_km=seq(0.1, 1.7, length.out=100))
prediction_wd$waggle_duration <- predict(wd_Frischformula, prediction_wd)

prediction_rd <- data.frame(distance_km=seq(0.1, 1.7, length.out=100))
prediction_rd$return_duration <- predict(rd_lm, prediction_rd)

prediction_cd <- data.frame(distance_km=seq(0.1, 1.7, length.out=100))
prediction_cd$circuit_duration <- predict(cd_Frischformula, prediction_cd)

# auxiliary function to make tick marks without labels:
every_nth <- function(x, nth, empty = TRUE, inverse = FALSE)
{
  if (!inverse) { 
    if(empty) {
      x[1:nth == 1] <- ""
      x
    } else {
      x[1:nth != 1]
    }
  } else {
    if(empty) {
      x[1:nth != 1] <- ""
      x
    } else {
      x[1:nth == 1]
    }
  }
}

  
wd_plot <- ggplot(Feedertraining_Steigerwald, aes(x = distance_km, y = waggle_duration)) + 
  geom_point(aes(x=distance_km, y= mean_waggle_duration), size=2.5)+
  geom_point(colour="#F24D29", shape=1, size=1.5, position=position_jitter(width=.005, height=0))+
  labs(x="", y="Duration [s]")+
  theme_classic()+
  scale_x_continuous(breaks= seq(0.1,1.7, by=0.2),labels=every_nth(seq(0.1,1.7, by=0.2),2,inverse = TRUE), expand=c(0,0))+
  scale_y_continuous(breaks= seq(0,5, by=1),expand=c(0,0))+
  coord_cartesian(xlim = c(0,1.75), ylim = c(0,5.15))+
  geom_line(data=prediction_wd, lwd=0.7, lineend="round", colour="#F24D29")+
  annotation_raster(wd_comic, 0.1, 0.653, 4, 5)

rd_plot <- ggplot(Feedertraining_Steigerwald, aes(x = distance_km, y = return_duration)) + 
  geom_point(aes(x=distance_km, y= mean_return_duration), size=2.5)+
  geom_point(colour="#1C366B", shape=1, size=1.5, position=position_jitter(width=.005, height=0))+
  labs(x="Feeder distance [km]", y="")+
  theme_classic()+
  scale_x_continuous(breaks= seq(0.1,1.7, by=0.2),labels=every_nth(seq(0.1,1.7, by=0.2),2,inverse = TRUE), expand=c(0,0))+
  scale_y_continuous(breaks= seq(0,5, by=1),expand=c(0,0))+
  coord_cartesian(xlim = c(0,1.75), ylim = c(0,5.15))+
  geom_line(data=prediction_rd, lwd=0.7, lineend="round", colour="#1C366B")+
  annotation_raster(rd_comic, 0.1, 0.653, 4, 5)

cd_plot <- ggplot(Feedertraining_Steigerwald, aes(x = distance_km, y = circuit_duration)) + 
  geom_point(aes(x=distance_km, y= mean_circuit_duration), size=2.5)+
  geom_point(colour="#1DACE8", shape=1, size=1.5, position=position_jitter(width=.005, height=0))+
  labs(x="", y="")+
  theme_classic()+
  scale_x_continuous(breaks= seq(0.1,1.7, by=0.2),labels=every_nth(seq(0.1,1.7, by=0.2),2,inverse = TRUE), expand=c(0,0))+
  scale_y_continuous(breaks= seq(0,5, by=1),expand=c(0,0))+
  coord_cartesian(xlim = c(0,1.75), ylim = c(0,5.15))+
  geom_line(data=prediction_cd, lwd=0.7, lineend="round", colour="#1DACE8")+
  annotation_raster(cd_comic, 0.1, 0.653, 4, 5)

plot_grid(wd_plot, rd_plot, cd_plot,
          ncol = 3, nrow = 1, align = "h", rel_widths = c(1, 1, 1), labels=c("A","B","C"))

ggsave("Figure_3.pdf", width=18, height=9, unit="cm", dpi=600)