library(psych)
library(sjstats)
library(tidyverse)
library(ggfortify)
library(pwr)
library(summarytools)
library(emmeans)
library(Rmisc)
library(sjstats)
library(car)
library(outliers)
library(cowplot)



##################################

### FIELD DATA

#################################

rm(list = ls ()) # clears R's memory

fieldfo <- read.csv("RDuckField_6.30.csv", na.strings = c("","NA"), header=TRUE, fileEncoding = 'UTF-8-BOM') # opens file

str(fieldfo)

# assigning different vars to factors
fieldfo$Date_Num <- as.factor(fieldfo$Date_Num)
fieldfo$Sex <- as.factor(fieldfo$Sex) 
fieldfo$Speed <- as.factor(fieldfo$Speed) 
fieldfo$Light <- as.factor(fieldfo$Light)
fieldfo$Brake <- as.factor(fieldfo$Brake)
fieldfo$Brake_Type <- as.factor(fieldfo$Brake_Type)


# checking that the changes were made
str(fieldfo)
view(fieldfo)

# we quickly look at whether body mass varies with sex
bmvssex <- lm(data = fieldfo, Weight ~ Sex) 

# quick summary of the model
summary (bmvssex)

# direction of the effect
emmeans(bmvssex, ~Sex) 

# Males are larger than females; continue with body mass to maximize degrees of freedom


# Looking at associations between environmental independent continuous factors

ind.cont <- fieldfo[c("Temp", "Wind_kmh", "Wind_Dir")] # grouping all independent continuous factors into a single object to facilitate running the correlations
corr.test(ind.cont, use = "pairwise", method = "pearson", adjust = "none") # runs the pairwise correlations between the independent continuous factors. 

# because of the high association between temp and wind speed, we drop wind speed
# also Wind_Dir has only 3 values and should not be included as a continuous confounding factor

# Looking at associations between distances

str(fieldfo)

ind.cont <- fieldfo[c("VP_Dist", "Cage_Dist")] # grouping all independent continuous factors into a single object to facilitate running the correlations
corr.test(ind.cont, use = "pairwise", method = "pearson", adjust = "none") # runs the pairwise correlations between the independent continuous factors. 

pairs(~fieldfo$VP_Dist + fieldfo$Cage_Dist, pch = 19, lower.panel = NULL)


View(fieldfo)

# subsetting to analyze responses that involved escape

fieldfos1<-fieldfo[which(fieldfo$Brake_Bin == 0),]

View (fieldfos1)

str (fieldfos1)



########### General linear models based on fieldfos1 dataframe

#########  Dependent: FID

fidmodel1 <- lm(data = fieldfos1, FID ~ Speed + Light + Speed:Light + Weight + Temp) 

summary(fidmodel1)

Anova(fidmodel1, type = 3)

emmeans(fidmodel1, ~Speed) 

# GLM assumption checking starts here

# checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions
autoplot (fidmodel1, smooth.color = NA)


# we found deviations from both homogeneity of variances and the normality of the residuals


# checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors
ggplot(data = fieldfos1, aes(x = Weight, y = FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = fieldfos1, aes(x = Weight, y = FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

ggplot(data = fieldfos1, aes(x = Temp, y = FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = fieldfos1, aes(x = Temp, y = FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

# we found that overall the additivity assumptions is met


# the next assumption to check is only if you have more than 1 independent continuous factor
# checking for collinearity via Pearson product moment correlations between all pairs of independent continuous factors
ind.cont <- fieldfos1[c("Weight", "Temp")] # grouping all independent continuous factors into a single object to facilitate running the correlations
corr.test(ind.cont, use = "pairwise", method = "pearson", adjust = "none") # runs the pairwise correlations between the independent continuous factors. 

# you can make scatterplots of the association between all pairs of independent continuous factors much faster with the following trick 
pairs(~fieldfos1$Weight + fieldfos1$Temp, pch = 19, lower.panel = NULL)

# the collinearity assumption is met


# what follows is about transformations given that we are not meeting very well two of the assumptions
# we focus the transformations on the continuous (dependent or independent) factors of interest
# Thus, we will transform FID

# for the log-transformation, remember that if there are 0 values, you want to do use this other option: 
#log(name_variable_to_transform + 1). 

# for the square root transformation, remember that it does not transform 0 values. If you had 0 values, you would need to add 0.5 to each value before transforming: sqrt (name_variable_to_transform + 0.5). 

# we transform 
fieldfos1 <- mutate(fieldfos1, log_FID = log(FID+1))
fieldfos1 <- mutate(fieldfos1, sqrt_FID = sqrt(FID+0.5))


# we check that the transformations took place
str(fieldfos1)

# now comparing the skewness and kurtosis for the continuous variables untransformed vs. transformed

descr(fieldfos1$FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 
descr(fieldfos1$log_FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 
descr(fieldfos1$sqrt_FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 


# based mostly on the skewness, we decide to use log_FID

# we also log-transform weight 
fieldfos1 <- mutate(fieldfos1, log_bodymass = log(Weight))

str (fieldfos1)

# Dependent: log_FID

fidmodel2 <- lm(data = fieldfos1, log_FID ~ Speed + Light + Speed:Light + log_bodymass + Temp) 

summary(fidmodel2)



# GLM assumption checking starts here

# checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions
autoplot (fidmodel2, smooth.color = NA)

#vs the previous model
autoplot (fidmodel1, smooth.color = NA)

# the fit to both the homogeneity of variances and the normality of the residuals assumptions has improved


# checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors
ggplot(data = fieldfos1, aes(x = log_bodymass, y = log_FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = fieldfos1, aes(x = log_bodymass, y = log_FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

ggplot(data = fieldfos1, aes(x = Temp, y = log_FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = fieldfos1, aes(x = Temp, y = log_FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

# we found that overall the additivity assumptions is met


# the next assumption to check is only if you have more than 1 independent continuous factor
# checking for collinearity via Pearson product moment correlations between all pairs of independent continuous factors
ind.cont <- fieldfos1[c("log_bodymass", "Temp")] # grouping all independent continuous factors into a single object to facilitate running the correlations
corr.test(ind.cont, use = "pairwise", method = "pearson", adjust = "none") # runs the pairwise correlations between the independent continuous factors. 

# you can make scatterplots of the association between all pairs of independent continuous factors much faster with the following trick 
pairs(~fieldfos1$log_bodymass + fieldfos1$Temp, pch = 19, lower.panel = NULL)

# the  collinearity assumption assumption is met


# start checking for outliers

# box plots show the median as the line within the box, the box defines the 25%-75% quartile (also known as the Inter Quartile Range), and the lines define 1.5 times the 25%Q and 75%Q, ultimately showing the limits of 90% of your data. Box plots allow us to identify observations outside of the range of 90% of our data, which may or may not be outliers

# the following are box plots of the continuous factors (dependent or independent) 

boxplot(fieldfos1$log_FID, ylab="log_FID")

# we now test whether the maximum or minimum value in the distribution is an outlier
# null hypothesis = no outlier, so if the P value you obtain from the test is < 0.05 it means that value is #significantly an outlier. Type = 10 is a test for one outlier
grubbs.test(fieldfos1$log_FID, type = 10) 

# the side of the distribution we are testing is detected automatically and can be reversed by opposite parameter
# testing whether the value on the opposite side of the distribution is an outlier 
grubbs.test(fieldfos1$log_FID, type = 10, opposite = TRUE) 

# No outliers detected
# If one is identified, there are more algorithms that can be used to go deeper into the role of those outliers



# we now interpret the results of the model with transformations

# we make sure we use Type 3 sums of squares when multiple independent factors are included
Anova(fidmodel2, type =3) 

emmeans(fidmodel2, ~Speed) 

emmeans(fidmodel2, ~Light) 

emmeans(fidmodel2, ~Speed:Light) 

table (fieldfos1$Speed, fieldfos1$Light)


#provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant)
coefficients (fidmodel2) 

#provides the effect size (we are reporting Omega squared)
anova_stats(fidmodel2, digits =3) 


# we calculate the means of the independent factor that turned out to be significant to plot them 
# (note we are skipping the first step we used to do (define the model) because we already defined the model before and assigned it to object model

# Speed effects
means_speed <- emmeans(fidmodel2, ~Speed) #we calculate the means within that model relative to the independent categorical factor that was significant 
means_speed_df <- data.frame(means_speed) #we associate those means to a dataframe object that we will use to run the ggplot

# now the graph using confidence intervals (font size, y-axis label changed during revision)
ggplot(data = means_speed_df, aes(x = Speed, y = emmean)) +
  geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) + ylim(0, 5.0) +
  theme_classic() + scale_color_brewer(palette = "Dark2") +
  xlab ("Speed (km/h)") + ylab ("log(Flight initiation distance + 1)") +
  theme(axis.text.x = element_text(size = 22, color = "black", face = "bold"),
        axis.text.y = element_text(size = 22, color = "black", face = "bold")) + 
  theme(axis.title.x = element_text(size = 24, color = "black", face = "bold"),
        axis.title.y = element_text(size = 24, color = "black", face = "bold"))



# Light effects, which were not significant, but to see the variation 
means_light <- emmeans(fidmodel2, ~Light) #we calculate the means within that model relative to the independent categorical factor that was significant 
means_light_df <- data.frame(means_light) #we associate those means to a dataframe object that we will use to run the ggplot

# now the graph using confidence intervals 
ggplot(data = means_light_df, aes(x = Light, y = emmean)) +
  geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  theme_classic() + scale_color_brewer(palette = "Dark2") +
  xlab ("Light") + ylab ("(log + 1) FID") +
  theme(axis.text.x = element_text(size = 14, color = "black", face = "bold"),
        axis.text.y = element_text(size = 14, color = "black", face = "bold")) + 
  theme(axis.title.x = element_text(size = 16, color = "black", face = "bold"),
        axis.title.y = element_text(size = 16, color = "black", face = "bold"))



str(fieldfos1)



######### Dependent: TTC

ttcmodel1 <- lm(data = fieldfos1, TTC ~ Speed + Light + Speed:Light + log_bodymass + Temp) 

summary(ttcmodel1)

Anova(ttcmodel1, type =3) 

# GLM assumption checking starts here

# checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions
autoplot (ttcmodel1, smooth.color = NA)


# we found deviations from both homogeneity of variances and the normality of the residuals


# checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors
ggplot(data = fieldfos1, aes(x = log_bodymass, y = TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = fieldfos1, aes(x = log_bodymass, y = TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

ggplot(data = fieldfos1, aes(x = Temp, y = TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = fieldfos1, aes(x = Temp, y = TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

# we found that overall the additivity assumptions is met


# the next assumption to check is only if you have more than 1 independent continuous factor
# checking for collinearity via Pearson product moment correlations between all pairs of independent continuous factors
ind.cont <- fieldfos1[c("log_bodymass", "Temp")] # grouping all independent continuous factors into a single object to facilitate running the correlations
corr.test(ind.cont, use = "pairwise", method = "pearson", adjust = "none") # runs the pairwise correlations between the independent continuous factors. 

# you can make scatterplots of the association between all pairs of independent continuous factors much faster with the following trick 
pairs(~fieldfos1$log_bodymass + fieldfos1$Temp, pch = 19, lower.panel = NULL)

# the collinearity assumption is met


# what follows is about transformations given that we are not meeting very well two of the assumptions
# we focus the transformations on the continuous (dependent or independent) factors of interest
# Thus, we will transform FID

# for the log-transformation, remember that if there are 0 values, you want to do use this other option: 
#log(name_variable_to_transform + 1). 

# for the square root transformation, remember that it does not transform 0 values. If you had 0 values, you would need to add 0.5 to each value before transforming: sqrt (name_variable_to_transform + 0.5). 

# we transform 
fieldfos1 <- mutate(fieldfos1, log_TTC = log(TTC+1))
fieldfos1 <- mutate(fieldfos1, sqrt_TTC = sqrt(TTC+0.5))

# we check that the transformations took place
str(fieldfos1)

# now comparing the skewness and kurtosis for the continuous variables untransformed vs. transformed

descr(fieldfos1$TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 
descr(fieldfos1$log_TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 
descr(fieldfos1$sqrt_TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 


# based mostly on the skewness, we decide to use log_TTC

str (fieldfos1)



# Dependent: log_TTC

ttcmodel2 <- lm(data = fieldfos1, log_TTC ~ Speed + Light + Speed:Light + log_bodymass + Temp) 

summary(ttcmodel2)
na.action(ttcmodel2)

# GLM assumption checking starts here

# checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions
autoplot (ttcmodel2, smooth.color = NA)

#vs the previous model
autoplot (ttcmodel1, smooth.color = NA)

# the fit to both the homogeneity of variances and the normality of the residuals assumptions has improved


# checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors
ggplot(data = fieldfos1, aes(x = log_bodymass, y = log_TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = fieldfos1, aes(x = log_bodymass, y = log_TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

ggplot(data = fieldfos1, aes(x = Temp, y = log_TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = fieldfos1, aes(x = Temp, y = log_TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

# we found that overall the additivity assumptions is met


# we previously checked for the collinearity assumption assumption and it is met



# start checking for outliers

# box plots show the median as the line within the box, the box defines the 25%-75% quartile (also known as the Inter Quartile Range), and the lines define 1.5 times the 25%Q and 75%Q, ultimately showing the limits of 90% of your data. Box plots allow us to identify observations outside of the range of 90% of our data, which may or may not be outliers

# the following are box plots of the continuous factors (dependent or independent) 

boxplot(fieldfos1$log_TTC, ylab="log_TTC")

# we now test whether the maximum or minimum value in the distribution is an outlier
# null hypothesis = no outlier, so if the P value you obtain from the test is < 0.05 it means that value is #significantly an outlier. Type = 10 is a test for one outlier
grubbs.test(fieldfos1$log_TTC, type = 10) 

# the side of the distribution we are testing is detected automatically and can be reversed by opposite paramete
# testing whether the value on the opposite side of the distribution is an outlier 
grubbs.test(fieldfos1$log_TTC, type = 10, opposite = TRUE) 

# No outliers detected
# If one is identified, there are more algorithms that can be used to go deeper into the role of those outliers


# we now interpret the results of the model with transformations

# we make sure we use Type 3 sums of squares when multiple independent factors are included
Anova(ttcmodel2, type =3) 

emmeans(ttcmodel2, ~Speed) 

emmeans(ttcmodel2, ~Light) 

emmeans(ttcmodel2, ~Speed:Light) 


#provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant)
coefficients (ttcmodel2) 

#provides the effect size (we are reporting Omega squared)
anova_stats(ttcmodel2, digits =3) 


# we calculate the means of the independent factor that turned out to be significant to plot them 
# (note we are skipping the first step we used to do (define the model) because we already defined the model before and assigned it to object model

# Speed effects
means_speed <- emmeans(ttcmodel2, ~Speed) #we calculate the means within that model relative to the independent categorical factor that was significant 
means_speed_df <- data.frame(means_speed) #we associate those means to a dataframe object that we will use to run the ggplot

# now the graph using confidence intervals 
ggplot(data = means_speed_df, aes(x = Speed, y = emmean)) +
  geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) + ylim(0, 5.0) +
  theme_classic() + scale_color_brewer(palette = "Dark2") +
  xlab ("Speed (km/h)") + ylab ("log(Time-to-collision + 1)") +
  theme(axis.text.x = element_text(size = 22, color = "black", face = "bold"),
        axis.text.y = element_text(size = 22, color = "black", face = "bold")) + 
  theme(axis.title.x = element_text(size = 24, color = "black", face = "bold"),
        axis.title.y = element_text(size = 24, color = "black", face = "bold"))



# Light effects, which were not significant, but to see the variation 
means_light <- emmeans(ttcmodel2, ~Light) #we calculate the means within that model relative to the independent categorical factor that was significant 
means_light_df <- data.frame(means_light) #we associate those means to a dataframe object that we will use to run the ggplot

# now the graph using confidence intervals 
ggplot(data = means_light_df, aes(x = Light, y = emmean)) +
  geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  theme_classic() + scale_color_brewer(palette = "Dark2") +
  xlab ("Light") + ylab ("(log + 1) TTC") +
  theme(axis.text.x = element_text(size = 14, color = "black", face = "bold"),
        axis.text.y = element_text(size = 14, color = "black", face = "bold")) + 
  theme(axis.title.x = element_text(size = 16, color = "black", face = "bold"),
        axis.title.y = element_text(size = 16, color = "black", face = "bold"))





######### Dependent: Distance to Vehicle Path (VP_Dist)

vp_distmodel1 <- lm(data = fieldfos1, VP_Dist ~ Speed + Light + Speed:Light + log_bodymass + Temp) 

summary(vp_distmodel1)


# GLM assumption checking starts here

# checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions
autoplot (vp_distmodel1, smooth.color = NA)


# we found deviations from the homogeneity of variances 

# checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors
ggplot(data = fieldfos1, aes(x = log_bodymass, y = VP_Dist)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = fieldfos1, aes(x = log_bodymass, y = VP_Dist)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

ggplot(data = fieldfos1, aes(x = Temp, y = VP_Dist)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = fieldfos1, aes(x = Temp, y = VP_Dist)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

# we found that overall the additivity assumptions is met


# We checked the collinearity assumption before and it is met



# what follows is about transformations given that we are not meeting very well two of the assumptions
# we focus the transformations on the continuous (dependent or independent) factors of interest
# Thus, we will transform FID

View(fieldfo)

# for the log-transformation, remember that if there are 0 values, you want to do use this other option: 
#log(name_variable_to_transform + 1). 

# for the square root transformation, remember that it does not transform 0 values. If you had 0 values, you would need to add 0.5 to each value before transforming: sqrt (name_variable_to_transform + 0.5). 

# we transform 
fieldfos1 <- mutate(fieldfos1, log_VP_Dist = log(VP_Dist+1))
fieldfos1 <- mutate(fieldfos1, sqrt_VP_Dist = sqrt(VP_Dist+0.5))


# we check that the transformations took place
str(fieldfos1)

# now comparing the skewness and kurtosis for the continuous variables untransformed vs. transformed

descr(fieldfos1$VP_Dist, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 
descr(fieldfos1$log_VP_Dist, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 
descr(fieldfos1$sqrt_VP_Dist, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 


# based mostly on the skewness, we decide to use sqrt_VP_Dist


# new model for VP_Dist with the sqrt transformation

vp_distmodel2 <- lm(data = fieldfos1, sqrt_VP_Dist ~ Speed + Light + Speed:Light + log_bodymass + Temp) 
summary(vp_distmodel2)


# GLM assumption checking starts here

# checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions
autoplot (vp_distmodel2, smooth.color = NA)

# compared to 
autoplot (vp_distmodel1, smooth.color = NA)

# we found no improvement in the homogeneity of variance assumption. Therefore, we will keep the original 
# variable despite the slight deviation from the assumptions. This will facilitate the interpretation of the results


# start checking for outliers

# box plots show the median as the line within the box, the box defines the 25%-75% quartile (also known as the Inter Quartile Range), and the lines define 1.5 times the 25%Q and 75%Q, ultimately showing the limits of 90% of your data. Box plots allow us to identify observations outside of the range of 90% of our data, which may or may not be outliers

# the following are box plots of the continuous factors (dependent or independent) 

boxplot(fieldfos1$sqrt_VP_Dist, ylab="VP_Dist")

# we now test whether the maximum or minimum value in the distribution is an outlier
# null hypothesis = no outlier, so if the P value you obtain from the test is < 0.05 it means that value is #significantly an outlier. Type = 10 is a test for one outlier
grubbs.test(fieldfos1$sqrt_VP_Dist, type = 10) 

# the side of the distribution we are testing is detected automatically and can be reversed by an opposite parameter
# testing whether the value on the opposite side of the distribution is an outlier 
grubbs.test(fieldfos1$sqrt_VP_Dist, type = 10, opposite = TRUE) 

# No outliers detected

# we now interpret the results of the model without transformations

# we make sure we use Type 3 sums of squares when multiple independent factors are included

Anova(vp_distmodel1, type =3)

emmeans(vp_distmodel1, ~Speed) 

emmeans(vp_distmodel1, ~Light) 

emmeans(vp_distmodel1, ~Speed:Light) 


#provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant)
coefficients (vp_distmodel1) 

#provides the effect size (we are reporting partial Omega squared)
anova_stats(vp_distmodel1, digits =3) 


# we calculate the means of the independent factor that turned out to be significant to plot them 
# (note we are skipping the first step we used to do (define the model) because we already defined the model before and assigned it to object model

# Speed effects
means_speed <- emmeans(vp_distmodel1, ~Speed) #we calculate the means within that model relative to the independent categorical factor that was significant 
means_speed_df <- data.frame(means_speed) #we associate those means to a dataframe object that we will use to run the ggplot

# now the graph using confidence intervals 
ggplot(data = means_speed_df, aes(x = Speed, y = emmean)) +
  geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  theme_classic() + scale_color_brewer(palette = "Dark2") +
  xlab ("Speed (km/h)") + ylab ("Distance to vehicle path") +
  theme(axis.text.x = element_text(size = 14, color = "black", face = "bold"),
        axis.text.y = element_text(size = 14, color = "black", face = "bold")) + 
  theme(axis.title.x = element_text(size = 16, color = "black", face = "bold"),
        axis.title.y = element_text(size = 16, color = "black", face = "bold"))


# Interaction between speed and light 
means_speedlight <- emmeans(vp_distmodel1, ~Speed:Light) #we calculate the means within that model relative to the independent categorical factor that was significant 
means_speedlight_df <- data.frame(means_speedlight) #we associate those means to a dataframe object that we will use to run the ggplot

# now the graph using confidence intervals (font size changed during revision)
ggplot(data = means_speedlight_df, aes(x = Speed, y = emmean, group = Light, shape = Light)) + geom_point(size = 5, position = position_dodge(width = 0.2)) + 
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, position = position_dodge(width = 0.2)) + scale_color_brewer(palette = "Dark2") +  theme_classic(base_size = 18) + 
  xlab ("Speed (km/h)") + ylab ("Distance to vehicle path (m)") +
  theme(axis.text.x = element_text(size = 22, color = "black", face = "bold"),
        axis.text.y = element_text(size = 22, color = "black", face = "bold")) + 
  theme(axis.title.x = element_text(size = 24, color = "black", face = "bold"),
        axis.title.y = element_text(size = 24, color = "black", face = "bold"))


#calculating means for interaction 
emm_int <- emmeans(vp_distmodel1, "Speed", by = c("Light"))
emm_int

# post-hoc pairwise comparisons 
pairs(emm_int)



#### We are now running generalized linear models, so we go back to the fieldfo dataframe

str(fieldfo)

fieldfo$Brake_Bin <- as.factor(fieldfo$Brake_Bin)
fieldfo$Preflight_Brake <- as.factor(fieldfo$Preflight_Brake)
fieldfo$Postflight_Brake <- as.factor(fieldfo$Postflight_Brake)

# generating log_bodymass
fieldfo <- mutate(fieldfo, log_bodymass = log(Weight))




######### Dependent: Flight_Bin

probfid_m1 <- glm(data = fieldfo, Flight_Bin ~ Speed + Light + Speed:Light + Brake_Bin + log_bodymass + Temp, family = binomial(link = "logit")) 
probfid_m2 <- glm(data = fieldfo, Flight_Bin ~ Speed + Light + Speed:Light + log_bodymass + Temp, family = binomial(link = "logit"))
# summary of the model
summary(probfid_m1)
summary(probfid_m2)
# coefficients of the model 
coef(probfid_m1)
coef(probfid_m2)

# looking at whether each independent factor is significantly explaining the variation in the dependent vars 
anova(probfid_m1, test="Chisq")
anova(probfid_m2, test="Chisq")


# We calculate the significance of the overall model 
# (which is different from the significance of each individual factor as calculated before). 
# the following test asks whether the model with ALL the predictors fits significantly better than a model with just an intercept (i.e., a null model). The test statistic is the difference between the residual deviance for the model with predictors and the null model. The test statistic is distributed chi-squared with degrees of freedom equal to the differences in degrees of freedom between the current and the null model (i.e., the number of predictor variables in the model). 

# To find the difference in deviance for the two models (i.e., the test statistic):
with(probfid_m1, null.deviance - deviance)
with(probfid_m2, null.deviance - deviance)

# To find the degrees of freedom for the difference between the two models is equal to the number of predictor variables in the model:
with(probfid_m1, df.null - df.residual)
with(probfid_m2, df.null - df.residual)

#Finally, here is the p-value of the previously calculated statistic for the whole model
with(probfid_m1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))
with(probfid_m2, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))

#The model I fit without braking (probfid_m2) is not significant (P = 0.51). I don't think it is worth reporting in the overall manuscript.

#This is sometimes called a likelihood ratio test (the deviance residual is -2*log likelihood). To see the model's log likelihood:
logLik(probfid_m1)

# For the overall model significance, we report the Chi2, df, and P value and also include the log likelihood. 

# we get a pseudo estimate of the proportion of the variability explained by the model (pseudoR2)
objects(probfid_m1)
pseudoR2 <- ((probfid_m1$null.deviance-probfid_m1$deviance)/probfid_m1$null.deviance)
pseudoR2


emmeans (type = response)

meanprobbrake <- emmeans(probfid_m1, "Brake_Bin", type = "response")
meanprobbrake
meanprobbrake_df <- data.frame(meanprobbrake)

meanprobSpeedField <- emmeans(probfid_m1, "Speed", type = "response")
meanprobSpeedField
meanprobLightField <- emmeans(probfid_m1, "Light", type = "response")
meanprobLightField
emmeans(probfid_m1, ~Speed:Light, type = "response")

meanprobspeed <- emmeans(probfid_m2, "Speed", type = "response")
meanprobspeed

ggplot(data = meanprobbrake_df, aes(x = Brake_Bin, y = prob)) +   
  geom_point(size = 2) + geom_errorbar (aes(ymin =prob - SE, ymax = prob + SE), width = 0) + 
  scale_color_brewer(palette = "Dark2") + guides(colour=FALSE) +
  theme_classic(base_size = 16) +
  xlab ("Braking") + ylab ("Probability of taking flight") +
  geom_point(data = fieldfo, aes(x = Brake_Bin, y = Flight_Bin), colour = "grey", alpha = .4, position = position_jitter(height = 0, width = 0.3)) +
  theme_cowplot(font_size = 11)


######### Dependent: Success_Bin_TTC (Probability Success = TTC > 1.0 sec)

probsucttc_m1 <- glm(data = fieldfo, Success_Bin_TTC ~ Speed + Light + Speed:Light + Preflight_Brake + log_bodymass + Temp, family = binomial(link = "logit"))

# summary of the model
summary(probsucttc_m1)

# coefficients of the model 
coef(probsucttc_m1)

# looking at whether each independent factor is significantly explaining the variation in the dependent vars 
anova(probsucttc_m1, test="Chisq")


# We calculate the significance of the overall model 
# (which is different from the significance of each individual factor as calculated before). 
# the following test asks whether the model with ALL the predictors fits significantly better than a model with just an intercept (i.e., a null model). The test statistic is the difference between the residual deviance for the model with predictors and the null model. The test statistic is distributed chi-squared with degrees of freedom equal to the differences in degrees of freedom between the current and the null model (i.e., the number of predictor variables in the model). 

# To find the difference in deviance for the two models (i.e., the test statistic):
with(probsucttc_m1, null.deviance - deviance)

# To find the degrees of freedom for the difference between the two models is equal to the number of predictor variables in the model:
with(probsucttc_m1, df.null - df.residual)

#Finally, here is the p-value of the previously calculated statistic for the whole model
with(probsucttc_m1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))

#This is sometimes called a likelihood ratio test (the deviance residual is -2*log likelihood). To see the model's log likelihood:
logLik(probsucttc_m1)

# For the overall model significance, we report the Chi2, df, and P value and also include the log likelihood. 

# we get a pseudo estimate of the proportion of the variability explained by the model (pseudoR2): 
objects(probsucttc_m1)
pseudoR2 <- ((probsucttc_m1$null.deviance-probsucttc_m1$deviance)/probsucttc_m1$null.deviance)
pseudoR2


meanprobspeed <- emmeans(probsucttc_m1, "Speed", type = "response")
meanprobspeed
meanprobspeed_df <- data.frame(meanprobspeed)

meanproblight <- emmeans(probsucttc_m1, "Light", type = "response")
meanproblight

emmeans(probsucttc_m1, ~Speed:Light, type = "response") 


ggplot(data = meanprobspeed_df, aes(x = Speed, y = prob)) +   
  geom_point(size = 2) + geom_errorbar (aes(ymin =prob - SE, ymax = prob + SE), width = 0) + 
  scale_color_brewer(palette = "Dark2") + guides(colour=FALSE) +
  theme_classic(base_size = 16) +
  xlab ("Speed (km/h)") + ylab ("Theoretical probability of successful avoidance") +
  geom_point(data = fieldfo, aes(x = Speed, y = Success_Bin_TTC), colour = "grey", alpha = .4, position = position_jitter(height = 0, width = 0.3)) +
  theme_cowplot(font_size = 11)



######### Dependent: Live_Duck (Probability Success = Presence in Vehicle Path at Collision)

probliveduck_m1 <- glm(data = fieldfo, Live_Duck ~ Speed + Light + Speed:Light + Postflight_Brake + log_bodymass + Temp, family = binomial(link = "logit")) 
liveduck_m2 <- glm(data = fieldfo, Live_Duck ~ Speed + Light + Speed:Light + log_bodymass + Temp, family = binomial(link = "logit"))

# summary of the model
summary(probliveduck_m1)
summary(liveduck_m2)

# coefficients of the model 
coef(probliveduck_m1)

# looking at whether each independent factor is significantly explaining the variation in the dependent vars 
anova(probliveduck_m1, test="Chisq")
anova(liveduck_m2, test="Chisq")

# We calculate the significance of the overall model 
# (which is different from the significance of each individual factor as calculated before). 
# the following test asks whether the model with ALL the predictors fits significantly better than a model with just an intercept (i.e., a null model). The test statistic is the difference between the residual deviance for the model with predictors and the null model. The test statistic is distributed chi-squared with degrees of freedom equal to the differences in degrees of freedom between the current and the null model (i.e., the number of predictor variables in the model). 

# To find the difference in deviance for the two models (i.e., the test statistic):
with(probliveduck_m1, null.deviance - deviance)

# To find the degrees of freedom for the difference between the two models is equal to the number of predictor variables in the model:
with(probliveduck_m1, df.null - df.residual)

#Finally, here is the p-value of the previously calculated statistic for the whole model
with(probliveduck_m1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))

#This is sometimes called a likelihood ratio test (the deviance residual is -2*log likelihood). To see the model's log likelihood:
logLik(probliveduck_m1)

# For the overall model significance, we report the Chi2, df, and P value and also include the log likelihood. 

# we get a pseudo estimate of the proportion of the variability explained by the model (pseudoR2): 
objects(probliveduck_m1)
pseudoR2 <- ((probliveduck_m1$null.deviance-probliveduck_m1$deviance)/probliveduck_m1$null.deviance)
pseudoR2


meanprobspeed <- emmeans(probliveduck_m1, "Postflight_Brake", type = "response")
meanprobspeed

emmeans(liveduck_m2, ~Speed, type = "response") 

emmeans(liveduck_m2, ~Light, type = "response") 

emmeans(liveduck_m2, ~Speed:Light, type = "response") 






##################################

### LAB DATA

#################################

setwd("C:/Rfiles") # sets folder

rm(list = ls ()) # clears R's memory

labfo <- read.csv("RDuckVideo_7.07.csv", na.strings = c("","NA"), header=TRUE, fileEncoding = 'UTF-8-BOM') # opens file

str(labfo)

View (labfo)

# assigning different vars to factors
labfo$Sex <- as.factor(labfo$Sex) 
labfo$Speed <- as.factor(labfo$Speed) 
labfo$Light <- as.factor(labfo$Light)

# checking that the changes were made
str(labfo)


# we quickly look at whether body mass varies with sex
bmvssex <- lm(data = labfo, Weight ~ Sex) 

# quick summary of the model
summary (bmvssex)

# direction of the effect
emmeans(bmvssex, ~Sex) 

# Males are larger than females so we cannot include in the models both sex and body mass as they are 
# associated. Suggestion: include the continuous independent factor to minimize using up degrees of freedom



# subsetting to analyze responses that involved escape

labfos1<-labfo[which(labfo$Flight_Bin == 1),]

View (labfos1)

str (labfos1)

# getting sds of raw data
sd(labfos1$FID, na.rm=TRUE)
sd(labfos1$TTC, na.rm=TRUE)
labfos1_noTTCoutlier <- labfos1 [-which(labfos1$TTC > 5),]
sd(labfos1_noTTCoutlier$TTC, na.rm=TRUE)

ggplot(data = labfos1, aes(x = Speed, y = FID)) + geom_bar() + error.bars() + theme_classic()

# we  log-transform weight 
labfos1 <- mutate(labfos1, log_bodymass = log(Weight))



#########  Dependent: FID

fidm1 <- lm(data = labfos1, FID ~ Speed + Light + Speed:Light + log_bodymass) 

summary(fidm1)

Anova (fidm1, type = 3)

# GLM assumption checking starts here

# checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions
autoplot (fidm1, smooth.color = NA)

# we found deviations from both homogeneity of variances and the normality of the residuals

# checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors
ggplot(data = labfos1, aes(x = log_bodymass, y = FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = labfos1, aes(x = log_bodymass, y = FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

# additivity assumption seems to be met

# we transform 
labfos1 <- mutate(labfos1, log_FID = log(FID+1))
labfos1 <- mutate(labfos1, sqrt_FID = sqrt(FID+0.5))


# we check that the transformations took place
str(labfos1)

# now let's compare the skewness and kurtosis for the continuous variables untransformed vs. transformed

descr(labfos1$FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 
descr(labfos1$log_FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 
descr(labfos1$sqrt_FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 


# based mostly on the skewness, we decide to use sqrt_FID



#########  Dependent: sqrt_FID

fidm2 <- lm(data = labfos1, sqrt_FID ~ Speed + Light + Speed:Light + log_bodymass) 

summary(fidm2)


# GLM assumption checking starts here

# checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions
autoplot (fidm2, smooth.color = NA)

# compared to...
autoplot (fidm1, smooth.color = NA)

# the sqrt transformation slightly improved the homogeneity of variances and the normality of the residuals, but the assumption
#is not fully met, so results should be interpreted with care

# checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors
ggplot(data = labfos1, aes(x = log_bodymass, y = sqrt_FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = labfos1, aes(x = log_bodymass, y = sqrt_FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

# additivity assumption seems to be met


# outliers

# start checking for outliers

# box plots show the median as the line within the box, the box defines the 25%-75% quartile (also known as the Inter Quartile Range), and the lines define 1.5 times the 25%Q and 75%Q, ultimately showing the limits of 90% of your data. Box plots allow us to identify observations outside of the range of 90% of our data, which may or may not be outliers

# the following are box plots of the continuous factors (dependent or independent) 

boxplot(labfos1$sqrt_FID, ylab="sqrt_FID")

# we now test whether the maximum or minimum value in the distribution is an outlier
# null hypothesis = no outlier, so if the P value you obtain from the test is < 0.05 it means that value is #significantly an outlier. Type = 10 is a test for one outlier
grubbs.test(labfos1$sqrt_FID, type = 10) 

# the side of the distribution we are testing is detected automatically 
# and can be reversed by opposite parameter
# that tests whether the value on the opposite side of the distribution is an outlier 
grubbs.test(labfos1$sqrt_FID, type = 10, opposite = TRUE) 

# No outliers detected
# If one is identified, there are more algorithms that can be used to go deeper into the role of those outliers


# we make sure we use Type 3 sums of squares when multiple independent factors are included
Anova(fidm2, type =3) 

emmeans(fidm2, ~Speed, type = "response") 

emmeans(fidm2, ~Light) 

emmeans(fidm2, ~Speed:Light) 



#provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant)
coefficients (fidm2) 

#provides the effect size (we are reporting Omega squared)
anova_stats(fidm2, digits =3) 





#########  Dependent: TTC

ttcm1 <- lm(data = labfos1, TTC ~ Speed + Light + Speed:Light + log_bodymass) 

summary(ttcm1)


# GLM assumption checking starts here

# checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions
autoplot (ttcm1, smooth.color = NA)

# we found deviations from both homogeneity of variances and the normality of the residuals

# checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors
ggplot(data = labfos1, aes(x = log_bodymass, y = TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = labfos1, aes(x = log_bodymass, y = TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

# additivity assumption seems to be met

# we transform 
labfos1 <- mutate(labfos1, log_TTC = log(TTC+1))
labfos1 <- mutate(labfos1, sqrt_TTC = sqrt(TTC+0.5))


# we check that the transformations took place
str(labfos1)

# now let's compare the skewness and kurtosis for the continuous variables untrasformed vs. transformed

descr(labfos1$TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 
descr(labfos1$log_TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 
descr(labfos1$sqrt_TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") 


# based mostly on the skewness, we decide to use log_TTC



#########  Dependent: log_TTC

ttcm2 <- lm(data = labfos1, log_TTC ~ Speed + Light + Speed:Light + log_bodymass) 

summary(ttcm2)


# GLM assumption checking starts here

# checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions
autoplot (ttcm2, smooth.color = NA)

# compared to...
autoplot (ttcm1, smooth.color = NA)

# good improvement in meeting the assumptions with the log_TTC transformation 

# checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors
ggplot(data = labfos1, aes(x = log_bodymass, y = log_TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 
ggplot(data = labfos1, aes(x = log_bodymass, y = log_TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() 

# additivity assumption seems to be met


# outliers

# start checking for outliers

# box plots show the median as the line within the box, the box defines the 25%-75% quartile (also known as the Inter Quartile Range), and the lines define 1.5 times the 25%Q and 75%Q, ultimately showing the limits of 90% of your data. Box plots allow us to identify observations outside of the range of 90% of our data, which may or may not be outliers

# the following are box plots of the continuous factors (dependent or independent) 

boxplot(labfos1$log_TTC, ylab="log_TTC")

# we now test whether the maximum or minimum value in the distribution is an outlier
# null hypothesis = no outlier, so if the P value you obtain from the test is < 0.05 it means that value is #significantly an outlier. Type = 10 is a test for one outlier
grubbs.test(labfos1$log_TTC, type = 10) 

# the side of the distribution we are testing is detected automatically 
# and can be reversed by opposite parameter â see this line of code
# that tests whether the value on the opposite side of the distribution is an outlier 
grubbs.test(labfos1$log_TTC, type = 10, opposite = TRUE) 

# An outlier was detected: 2.75544256340481

# We have to present two results: with the outlier and without the outlier

#### Results with the outlier

# we make sure we use Type 3 sums of squares when multiple independent factors are included
Anova(ttcm2, type =3) 

emmeans(ttcm2, ~Speed) 

emmeans(ttcm2, ~Light) 

emmeans(ttcm2, ~Speed:Light) 



#provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant)
coefficients (ttcm2) 

#provides the effect size (we are reporting Omega squared)
anova_stats(ttcm2, digits =3) 


# Light effects  
means_light2 <- emmeans(ttcm2, ~Light) #we calculate the means within that model relative to the independent categorical factor that was significant 
means_light_df2 <- data.frame(means_light2) #we associate those means to a dataframe object that we will use to run the ggplot

# now the graph using confidence intervals 
ggplot(data = means_light_df2, aes(x = Light, y = emmean)) +
  geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) + ylim(0.6, 1.6) +
  theme_classic() + scale_color_brewer(palette = "Dark2") +
  xlab ("Time of day") + ylab ("log(TTC+1)") +
  theme(axis.text.x = element_text(size = 14, color = "black", face = "bold"),
        axis.text.y = element_text(size = 14, color = "black", face = "bold")) + 
  theme(axis.title.x = element_text(size = 16, color = "black", face = "bold"),
        axis.title.y = element_text(size = 16, color = "black", face = "bold"))



#### Results without the outlier

View (labfos1)

ttcm3 <- lm(data = labfos1 [-which(labfos1$log_TTC > 2.75),], log_TTC ~ Speed + Light + Speed:Light + log_bodymass) 

summary(ttcm3)

Anova (ttcm3, type = 3)

emmeans(ttcm3, ~Speed) 

emmeans(ttcm3, ~Light) 

emmeans(ttcm3, ~Speed:Light) 

#provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant)
coefficients (ttcm3) 

#provides the effect size (we are reporting Omega squared)
anova_stats(ttcm3, digits =3) 


# Light effects  
means_light <- emmeans(ttcm3, ~Light) #we calculate the means within that model relative to the independent categorical factor that was significant 
means_light_df <- data.frame(means_light) #we associate those means to a dataframe object that we will use to run the ggplot


# now the graph using confidence intervals (Modified during revision to change y-axis title, range, and font size)
ggplot(data = means_light_df, aes(x = Light, y = emmean)) +
  geom_point(size = 8) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.4) + ylim(0.0, 1.6) +
  theme_classic() + scale_color_brewer(palette = "Dark2") +
  xlab ("Time of day") + ylab ("log(Time-to-collision + 1)") +
  theme(axis.text.x = element_text(size = 24, color = "black", face = "bold"),
        axis.text.y = element_text(size = 24, color = "black", face = "bold")) + 
  theme(axis.title.x = element_text(size = 26, color = "black", face = "bold"),
        axis.title.y = element_text(size = 26, color = "black", face = "bold"))

#provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant)
coefficients (ttcm3) 

#provides the effect size (we are reporting Omega squared)
anova_stats(ttcm3, digits =3) 




#### We are now running generalized linear models, so we go back to the labfo dataframe

str(labfo)

# generating log_bodymass
labfo <- mutate(labfo, log_bodymass = log(Weight))


View (labfo)

######### Dependent: Flight_Bin

probfid_m1 <- glm(data = labfo, Flight_Bin ~ Speed + Light + Speed:Light + log_bodymass, family = binomial(link = "logit")) 

# summary of the model
summary(probfid_m1)

# coefficients of the model 
coef(probfid_m1)

# looking at whether each independent factor is significantly explaining the variation in the dependent vars 
anova(probfid_m1, test="Chisq")


# We calculate the significance of the overall model 
# (which is different from the significance of each individual factor as calculated before). 
# the following test asks whether the model with ALL the predictors fits significantly better than a model with just an intercept (i.e., a null model). The test statistic is the difference between the residual deviance for the model with predictors and the null model. The test statistic is distributed chi-squared with degrees of freedom equal to the differences in degrees of freedom between the current and the null model (i.e., the number of predictor variables in the model). 

# To find the difference in deviance for the two models (i.e., the test statistic):
with(probfid_m1, null.deviance - deviance)

# To find the degrees of freedom for the difference between the two models is equal to the number of predictor variables in the model:
with(probfid_m1, df.null - df.residual)

#Finally, here is the p-value of the previously calculated statistic for the whole model
with(probfid_m1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))

#This is sometimes called a likelihood ratio test (the deviance residual is -2*log likelihood). To see the model's log likelihood:
logLik(probfid_m1)

# For the overall model significance, we report the Chi2, df, and P value and also include the log likelihood. 

# we get a pseudo estimate of the proportion of the variability explained by the model (pseudoR2)
objects(probfid_m1)
pseudoR2 <- ((probfid_m1$null.deviance-probfid_m1$deviance)/probfid_m1$null.deviance)
pseudoR2

####Marginal Means

meanproblight <- emmeans(probfid_m1, "Light", type = "response")
meanproblight
meanproblight_df <- data.frame(meanproblight)

meanprobspeed1 <- emmeans(probfid_m1, "Speed", type = "response")
meanprobspeed1

emmeans(probfid_m1, ~Speed:Light, type = "response") 

emmeans(probfid_m1, ~log_bodymass, type = "response")

dev.off()
ggplot(data = meanproblight_df, aes(x = Light, y = prob)) +   
  geom_point(size = 2) + geom_errorbar (aes(ymin =prob - SE, ymax = prob + SE), width = 0) + 
  scale_color_brewer(palette = "Dark2") + guides(colour=FALSE) +
  theme_classic(base_size = 16) +
  xlab ("Time of day") + ylab ("Probability of flight response") +
  geom_point(data = labfo, aes(x = Light, y = Flight_Bin), colour = "grey", alpha = .4, position = position_jitter(height = 0, width = 0.3)) +
  theme_cowplot(font_size = 11)


str(labfo)



######### Dependent: Success_Bin

probsucc_m1 <- glm(data = labfo, Success_Bin ~ Speed + Light + Speed:Light + log_bodymass, family = binomial(link = "logit")) 

# summary of the model
summary(probsucc_m1)

# coefficients of the model 
coef(probsucc_m1)

# looking at whether each independent factor is significantly explaining the variation in the dependent vars 
anova(probsucc_m1, test="Chisq")


# We calculate the significance of the overall model 
# (which is different from the significance of each individual factor as calculated before). 
# the following test asks whether the model with ALL the predictors fits significantly better than a model with just an intercept (i.e., a null model). The test statistic is the difference between the residual deviance for the model with predictors and the null model. The test statistic is distributed chi-squared with degrees of freedom equal to the differences in degrees of freedom between the current and the null model (i.e., the number of predictor variables in the model). 

# To find the difference in deviance for the two models (i.e., the test statistic):
with(probsucc_m1, null.deviance - deviance)

# To find the degrees of freedom for the difference between the two models is equal to the number of predictor variables in the model:
with(probsucc_m1, df.null - df.residual)

#Finally, here is the p-value of the previously calculated statistic for the whole model
with(probsucc_m1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))

#This is sometimes called a likelihood ratio test (the deviance residual is -2*log likelihood). To see the model's log likelihood:
logLik(probsucc_m1)

# For the overall model significance, we report the Chi2, df, and P value and also include the log likelihood. 

# we get a pseudo estimate of the proportion of the variability explained by the model (pseudoR2):
objects(probsucc_m1)
pseudoR2 <- ((probsucc_m1$null.deviance-probsucc_m1$deviance)/probsucc_m1$null.deviance)
pseudoR2

####Marginal Means

meansucclight <- emmeans(probsucc_m1, "Light", type = "response")
meansucclight


meansuccspeed1 <- emmeans(probsucc_m1, "Speed", type = "response")
meansuccspeed1
emmeans(probsucc_m1, ~Speed:Light, type = "response") 



meanprobspeed <- emmeans(probsucc_m1, "Speed", type = "response")
meanprobspeed
pairs(meanprobspeed)
meanprobspeed_df <- data.frame(meanprobspeed)

# Figure to interpret results (Point size and font size changed during revision)
ggplot(data = meanprobspeed_df, aes(x = Speed, y = prob)) +   
  geom_point(size = 5) + geom_errorbar (aes(ymin =prob - SE, ymax = prob + SE), width = 0) + 
  scale_color_brewer(palette = "Dark2") + guides(colour=FALSE) +
  theme_classic(base_size = 16) +
  xlab ("Speed (km/h)") + ylab ("Probability of successful avoidance") +
  geom_point(data = labfo, aes(x = Speed, y = Success_Bin), colour = "grey", alpha = .4, position = position_jitter(height = 0, width = 0.3)) +
  theme_cowplot(font_size = 22)