library(psych) library(sjstats) library(tidyverse) library(ggfortify) library(pwr) library(summarytools) library(emmeans) library(Rmisc) library(sjstats) library(car) library(outliers) library(cowplot) ################################## ### FIELD DATA ################################# rm(list = ls ()) # clears R's memory fieldfo <- read.csv("RDuckField_6.30.csv", na.strings = c("","NA"), header=TRUE, fileEncoding = 'UTF-8-BOM') # opens file str(fieldfo) # assigning different vars to factors fieldfo$Date_Num <- as.factor(fieldfo$Date_Num) fieldfo$Sex <- as.factor(fieldfo$Sex) fieldfo$Speed <- as.factor(fieldfo$Speed) fieldfo$Light <- as.factor(fieldfo$Light) fieldfo$Brake <- as.factor(fieldfo$Brake) fieldfo$Brake_Type <- as.factor(fieldfo$Brake_Type) # checking that the changes were made str(fieldfo) view(fieldfo) # we quickly look at whether body mass varies with sex bmvssex <- lm(data = fieldfo, Weight ~ Sex) # quick summary of the model summary (bmvssex) # direction of the effect emmeans(bmvssex, ~Sex) # Males are larger than females; continue with body mass to maximize degrees of freedom # Looking at associations between environmental independent continuous factors ind.cont <- fieldfo[c("Temp", "Wind_kmh", "Wind_Dir")] # grouping all independent continuous factors into a single object to facilitate running the correlations corr.test(ind.cont, use = "pairwise", method = "pearson", adjust = "none") # runs the pairwise correlations between the independent continuous factors. # because of the high association between temp and wind speed, we drop wind speed # also Wind_Dir has only 3 values and should not be included as a continuous confounding factor # Looking at associations between distances str(fieldfo) ind.cont <- fieldfo[c("VP_Dist", "Cage_Dist")] # grouping all independent continuous factors into a single object to facilitate running the correlations corr.test(ind.cont, use = "pairwise", method = "pearson", adjust = "none") # runs the pairwise correlations between the independent continuous factors. pairs(~fieldfo$VP_Dist + fieldfo$Cage_Dist, pch = 19, lower.panel = NULL) View(fieldfo) # subsetting to analyze responses that involved escape fieldfos1<-fieldfo[which(fieldfo$Brake_Bin == 0),] View (fieldfos1) str (fieldfos1) ########### General linear models based on fieldfos1 dataframe ######### Dependent: FID fidmodel1 <- lm(data = fieldfos1, FID ~ Speed + Light + Speed:Light + Weight + Temp) summary(fidmodel1) Anova(fidmodel1, type = 3) emmeans(fidmodel1, ~Speed) # GLM assumption checking starts here # checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions autoplot (fidmodel1, smooth.color = NA) # we found deviations from both homogeneity of variances and the normality of the residuals # checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors ggplot(data = fieldfos1, aes(x = Weight, y = FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Weight, y = FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Temp, y = FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Temp, y = FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() # we found that overall the additivity assumptions is met # the next assumption to check is only if you have more than 1 independent continuous factor # checking for collinearity via Pearson product moment correlations between all pairs of independent continuous factors ind.cont <- fieldfos1[c("Weight", "Temp")] # grouping all independent continuous factors into a single object to facilitate running the correlations corr.test(ind.cont, use = "pairwise", method = "pearson", adjust = "none") # runs the pairwise correlations between the independent continuous factors. # you can make scatterplots of the association between all pairs of independent continuous factors much faster with the following trick pairs(~fieldfos1$Weight + fieldfos1$Temp, pch = 19, lower.panel = NULL) # the collinearity assumption is met # what follows is about transformations given that we are not meeting very well two of the assumptions # we focus the transformations on the continuous (dependent or independent) factors of interest # Thus, we will transform FID # for the log-transformation, remember that if there are 0 values, you want to do use this other option: #log(name_variable_to_transform + 1). # for the square root transformation, remember that it does not transform 0 values. If you had 0 values, you would need to add 0.5 to each value before transforming: sqrt (name_variable_to_transform + 0.5). # we transform fieldfos1 <- mutate(fieldfos1, log_FID = log(FID+1)) fieldfos1 <- mutate(fieldfos1, sqrt_FID = sqrt(FID+0.5)) # we check that the transformations took place str(fieldfos1) # now comparing the skewness and kurtosis for the continuous variables untransformed vs. transformed descr(fieldfos1$FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") descr(fieldfos1$log_FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") descr(fieldfos1$sqrt_FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") # based mostly on the skewness, we decide to use log_FID # we also log-transform weight fieldfos1 <- mutate(fieldfos1, log_bodymass = log(Weight)) str (fieldfos1) # Dependent: log_FID fidmodel2 <- lm(data = fieldfos1, log_FID ~ Speed + Light + Speed:Light + log_bodymass + Temp) summary(fidmodel2) # GLM assumption checking starts here # checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions autoplot (fidmodel2, smooth.color = NA) #vs the previous model autoplot (fidmodel1, smooth.color = NA) # the fit to both the homogeneity of variances and the normality of the residuals assumptions has improved # checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors ggplot(data = fieldfos1, aes(x = log_bodymass, y = log_FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = log_bodymass, y = log_FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Temp, y = log_FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Temp, y = log_FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() # we found that overall the additivity assumptions is met # the next assumption to check is only if you have more than 1 independent continuous factor # checking for collinearity via Pearson product moment correlations between all pairs of independent continuous factors ind.cont <- fieldfos1[c("log_bodymass", "Temp")] # grouping all independent continuous factors into a single object to facilitate running the correlations corr.test(ind.cont, use = "pairwise", method = "pearson", adjust = "none") # runs the pairwise correlations between the independent continuous factors. # you can make scatterplots of the association between all pairs of independent continuous factors much faster with the following trick pairs(~fieldfos1$log_bodymass + fieldfos1$Temp, pch = 19, lower.panel = NULL) # the collinearity assumption assumption is met # start checking for outliers # box plots show the median as the line within the box, the box defines the 25%-75% quartile (also known as the Inter Quartile Range), and the lines define 1.5 times the 25%Q and 75%Q, ultimately showing the limits of 90% of your data. Box plots allow us to identify observations outside of the range of 90% of our data, which may or may not be outliers # the following are box plots of the continuous factors (dependent or independent) boxplot(fieldfos1$log_FID, ylab="log_FID") # we now test whether the maximum or minimum value in the distribution is an outlier # null hypothesis = no outlier, so if the P value you obtain from the test is < 0.05 it means that value is #significantly an outlier. Type = 10 is a test for one outlier grubbs.test(fieldfos1$log_FID, type = 10) # the side of the distribution we are testing is detected automatically and can be reversed by opposite parameter # testing whether the value on the opposite side of the distribution is an outlier grubbs.test(fieldfos1$log_FID, type = 10, opposite = TRUE) # No outliers detected # If one is identified, there are more algorithms that can be used to go deeper into the role of those outliers # we now interpret the results of the model with transformations # we make sure we use Type 3 sums of squares when multiple independent factors are included Anova(fidmodel2, type =3) emmeans(fidmodel2, ~Speed) emmeans(fidmodel2, ~Light) emmeans(fidmodel2, ~Speed:Light) table (fieldfos1$Speed, fieldfos1$Light) #provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant) coefficients (fidmodel2) #provides the effect size (we are reporting Omega squared) anova_stats(fidmodel2, digits =3) # we calculate the means of the independent factor that turned out to be significant to plot them # (note we are skipping the first step we used to do (define the model) because we already defined the model before and assigned it to object model # Speed effects means_speed <- emmeans(fidmodel2, ~Speed) #we calculate the means within that model relative to the independent categorical factor that was significant means_speed_df <- data.frame(means_speed) #we associate those means to a dataframe object that we will use to run the ggplot # now the graph using confidence intervals (font size, y-axis label changed during revision) ggplot(data = means_speed_df, aes(x = Speed, y = emmean)) + geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) + ylim(0, 5.0) + theme_classic() + scale_color_brewer(palette = "Dark2") + xlab ("Speed (km/h)") + ylab ("log(Flight initiation distance + 1)") + theme(axis.text.x = element_text(size = 22, color = "black", face = "bold"), axis.text.y = element_text(size = 22, color = "black", face = "bold")) + theme(axis.title.x = element_text(size = 24, color = "black", face = "bold"), axis.title.y = element_text(size = 24, color = "black", face = "bold")) # Light effects, which were not significant, but to see the variation means_light <- emmeans(fidmodel2, ~Light) #we calculate the means within that model relative to the independent categorical factor that was significant means_light_df <- data.frame(means_light) #we associate those means to a dataframe object that we will use to run the ggplot # now the graph using confidence intervals ggplot(data = means_light_df, aes(x = Light, y = emmean)) + geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) + theme_classic() + scale_color_brewer(palette = "Dark2") + xlab ("Light") + ylab ("(log + 1) FID") + theme(axis.text.x = element_text(size = 14, color = "black", face = "bold"), axis.text.y = element_text(size = 14, color = "black", face = "bold")) + theme(axis.title.x = element_text(size = 16, color = "black", face = "bold"), axis.title.y = element_text(size = 16, color = "black", face = "bold")) str(fieldfos1) ######### Dependent: TTC ttcmodel1 <- lm(data = fieldfos1, TTC ~ Speed + Light + Speed:Light + log_bodymass + Temp) summary(ttcmodel1) Anova(ttcmodel1, type =3) # GLM assumption checking starts here # checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions autoplot (ttcmodel1, smooth.color = NA) # we found deviations from both homogeneity of variances and the normality of the residuals # checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors ggplot(data = fieldfos1, aes(x = log_bodymass, y = TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = log_bodymass, y = TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Temp, y = TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Temp, y = TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() # we found that overall the additivity assumptions is met # the next assumption to check is only if you have more than 1 independent continuous factor # checking for collinearity via Pearson product moment correlations between all pairs of independent continuous factors ind.cont <- fieldfos1[c("log_bodymass", "Temp")] # grouping all independent continuous factors into a single object to facilitate running the correlations corr.test(ind.cont, use = "pairwise", method = "pearson", adjust = "none") # runs the pairwise correlations between the independent continuous factors. # you can make scatterplots of the association between all pairs of independent continuous factors much faster with the following trick pairs(~fieldfos1$log_bodymass + fieldfos1$Temp, pch = 19, lower.panel = NULL) # the collinearity assumption is met # what follows is about transformations given that we are not meeting very well two of the assumptions # we focus the transformations on the continuous (dependent or independent) factors of interest # Thus, we will transform FID # for the log-transformation, remember that if there are 0 values, you want to do use this other option: #log(name_variable_to_transform + 1). # for the square root transformation, remember that it does not transform 0 values. If you had 0 values, you would need to add 0.5 to each value before transforming: sqrt (name_variable_to_transform + 0.5). # we transform fieldfos1 <- mutate(fieldfos1, log_TTC = log(TTC+1)) fieldfos1 <- mutate(fieldfos1, sqrt_TTC = sqrt(TTC+0.5)) # we check that the transformations took place str(fieldfos1) # now comparing the skewness and kurtosis for the continuous variables untransformed vs. transformed descr(fieldfos1$TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") descr(fieldfos1$log_TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") descr(fieldfos1$sqrt_TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") # based mostly on the skewness, we decide to use log_TTC str (fieldfos1) # Dependent: log_TTC ttcmodel2 <- lm(data = fieldfos1, log_TTC ~ Speed + Light + Speed:Light + log_bodymass + Temp) summary(ttcmodel2) na.action(ttcmodel2) # GLM assumption checking starts here # checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions autoplot (ttcmodel2, smooth.color = NA) #vs the previous model autoplot (ttcmodel1, smooth.color = NA) # the fit to both the homogeneity of variances and the normality of the residuals assumptions has improved # checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors ggplot(data = fieldfos1, aes(x = log_bodymass, y = log_TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = log_bodymass, y = log_TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Temp, y = log_TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Temp, y = log_TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() # we found that overall the additivity assumptions is met # we previously checked for the collinearity assumption assumption and it is met # start checking for outliers # box plots show the median as the line within the box, the box defines the 25%-75% quartile (also known as the Inter Quartile Range), and the lines define 1.5 times the 25%Q and 75%Q, ultimately showing the limits of 90% of your data. Box plots allow us to identify observations outside of the range of 90% of our data, which may or may not be outliers # the following are box plots of the continuous factors (dependent or independent) boxplot(fieldfos1$log_TTC, ylab="log_TTC") # we now test whether the maximum or minimum value in the distribution is an outlier # null hypothesis = no outlier, so if the P value you obtain from the test is < 0.05 it means that value is #significantly an outlier. Type = 10 is a test for one outlier grubbs.test(fieldfos1$log_TTC, type = 10) # the side of the distribution we are testing is detected automatically and can be reversed by opposite paramete # testing whether the value on the opposite side of the distribution is an outlier grubbs.test(fieldfos1$log_TTC, type = 10, opposite = TRUE) # No outliers detected # If one is identified, there are more algorithms that can be used to go deeper into the role of those outliers # we now interpret the results of the model with transformations # we make sure we use Type 3 sums of squares when multiple independent factors are included Anova(ttcmodel2, type =3) emmeans(ttcmodel2, ~Speed) emmeans(ttcmodel2, ~Light) emmeans(ttcmodel2, ~Speed:Light) #provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant) coefficients (ttcmodel2) #provides the effect size (we are reporting Omega squared) anova_stats(ttcmodel2, digits =3) # we calculate the means of the independent factor that turned out to be significant to plot them # (note we are skipping the first step we used to do (define the model) because we already defined the model before and assigned it to object model # Speed effects means_speed <- emmeans(ttcmodel2, ~Speed) #we calculate the means within that model relative to the independent categorical factor that was significant means_speed_df <- data.frame(means_speed) #we associate those means to a dataframe object that we will use to run the ggplot # now the graph using confidence intervals ggplot(data = means_speed_df, aes(x = Speed, y = emmean)) + geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) + ylim(0, 5.0) + theme_classic() + scale_color_brewer(palette = "Dark2") + xlab ("Speed (km/h)") + ylab ("log(Time-to-collision + 1)") + theme(axis.text.x = element_text(size = 22, color = "black", face = "bold"), axis.text.y = element_text(size = 22, color = "black", face = "bold")) + theme(axis.title.x = element_text(size = 24, color = "black", face = "bold"), axis.title.y = element_text(size = 24, color = "black", face = "bold")) # Light effects, which were not significant, but to see the variation means_light <- emmeans(ttcmodel2, ~Light) #we calculate the means within that model relative to the independent categorical factor that was significant means_light_df <- data.frame(means_light) #we associate those means to a dataframe object that we will use to run the ggplot # now the graph using confidence intervals ggplot(data = means_light_df, aes(x = Light, y = emmean)) + geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) + theme_classic() + scale_color_brewer(palette = "Dark2") + xlab ("Light") + ylab ("(log + 1) TTC") + theme(axis.text.x = element_text(size = 14, color = "black", face = "bold"), axis.text.y = element_text(size = 14, color = "black", face = "bold")) + theme(axis.title.x = element_text(size = 16, color = "black", face = "bold"), axis.title.y = element_text(size = 16, color = "black", face = "bold")) ######### Dependent: Distance to Vehicle Path (VP_Dist) vp_distmodel1 <- lm(data = fieldfos1, VP_Dist ~ Speed + Light + Speed:Light + log_bodymass + Temp) summary(vp_distmodel1) # GLM assumption checking starts here # checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions autoplot (vp_distmodel1, smooth.color = NA) # we found deviations from the homogeneity of variances # checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors ggplot(data = fieldfos1, aes(x = log_bodymass, y = VP_Dist)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = log_bodymass, y = VP_Dist)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Temp, y = VP_Dist)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = fieldfos1, aes(x = Temp, y = VP_Dist)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() # we found that overall the additivity assumptions is met # We checked the collinearity assumption before and it is met # what follows is about transformations given that we are not meeting very well two of the assumptions # we focus the transformations on the continuous (dependent or independent) factors of interest # Thus, we will transform FID View(fieldfo) # for the log-transformation, remember that if there are 0 values, you want to do use this other option: #log(name_variable_to_transform + 1). # for the square root transformation, remember that it does not transform 0 values. If you had 0 values, you would need to add 0.5 to each value before transforming: sqrt (name_variable_to_transform + 0.5). # we transform fieldfos1 <- mutate(fieldfos1, log_VP_Dist = log(VP_Dist+1)) fieldfos1 <- mutate(fieldfos1, sqrt_VP_Dist = sqrt(VP_Dist+0.5)) # we check that the transformations took place str(fieldfos1) # now comparing the skewness and kurtosis for the continuous variables untransformed vs. transformed descr(fieldfos1$VP_Dist, stats = c("Skewness", "Kurtosis"), style="rmarkdown") descr(fieldfos1$log_VP_Dist, stats = c("Skewness", "Kurtosis"), style="rmarkdown") descr(fieldfos1$sqrt_VP_Dist, stats = c("Skewness", "Kurtosis"), style="rmarkdown") # based mostly on the skewness, we decide to use sqrt_VP_Dist # new model for VP_Dist with the sqrt transformation vp_distmodel2 <- lm(data = fieldfos1, sqrt_VP_Dist ~ Speed + Light + Speed:Light + log_bodymass + Temp) summary(vp_distmodel2) # GLM assumption checking starts here # checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions autoplot (vp_distmodel2, smooth.color = NA) # compared to autoplot (vp_distmodel1, smooth.color = NA) # we found no improvement in the homogeneity of variance assumption. Therefore, we will keep the original # variable despite the slight deviation from the assumptions. This will facilitate the interpretation of the results # start checking for outliers # box plots show the median as the line within the box, the box defines the 25%-75% quartile (also known as the Inter Quartile Range), and the lines define 1.5 times the 25%Q and 75%Q, ultimately showing the limits of 90% of your data. Box plots allow us to identify observations outside of the range of 90% of our data, which may or may not be outliers # the following are box plots of the continuous factors (dependent or independent) boxplot(fieldfos1$sqrt_VP_Dist, ylab="VP_Dist") # we now test whether the maximum or minimum value in the distribution is an outlier # null hypothesis = no outlier, so if the P value you obtain from the test is < 0.05 it means that value is #significantly an outlier. Type = 10 is a test for one outlier grubbs.test(fieldfos1$sqrt_VP_Dist, type = 10) # the side of the distribution we are testing is detected automatically and can be reversed by an opposite parameter # testing whether the value on the opposite side of the distribution is an outlier grubbs.test(fieldfos1$sqrt_VP_Dist, type = 10, opposite = TRUE) # No outliers detected # we now interpret the results of the model without transformations # we make sure we use Type 3 sums of squares when multiple independent factors are included Anova(vp_distmodel1, type =3) emmeans(vp_distmodel1, ~Speed) emmeans(vp_distmodel1, ~Light) emmeans(vp_distmodel1, ~Speed:Light) #provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant) coefficients (vp_distmodel1) #provides the effect size (we are reporting partial Omega squared) anova_stats(vp_distmodel1, digits =3) # we calculate the means of the independent factor that turned out to be significant to plot them # (note we are skipping the first step we used to do (define the model) because we already defined the model before and assigned it to object model # Speed effects means_speed <- emmeans(vp_distmodel1, ~Speed) #we calculate the means within that model relative to the independent categorical factor that was significant means_speed_df <- data.frame(means_speed) #we associate those means to a dataframe object that we will use to run the ggplot # now the graph using confidence intervals ggplot(data = means_speed_df, aes(x = Speed, y = emmean)) + geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) + theme_classic() + scale_color_brewer(palette = "Dark2") + xlab ("Speed (km/h)") + ylab ("Distance to vehicle path") + theme(axis.text.x = element_text(size = 14, color = "black", face = "bold"), axis.text.y = element_text(size = 14, color = "black", face = "bold")) + theme(axis.title.x = element_text(size = 16, color = "black", face = "bold"), axis.title.y = element_text(size = 16, color = "black", face = "bold")) # Interaction between speed and light means_speedlight <- emmeans(vp_distmodel1, ~Speed:Light) #we calculate the means within that model relative to the independent categorical factor that was significant means_speedlight_df <- data.frame(means_speedlight) #we associate those means to a dataframe object that we will use to run the ggplot # now the graph using confidence intervals (font size changed during revision) ggplot(data = means_speedlight_df, aes(x = Speed, y = emmean, group = Light, shape = Light)) + geom_point(size = 5, position = position_dodge(width = 0.2)) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, position = position_dodge(width = 0.2)) + scale_color_brewer(palette = "Dark2") + theme_classic(base_size = 18) + xlab ("Speed (km/h)") + ylab ("Distance to vehicle path (m)") + theme(axis.text.x = element_text(size = 22, color = "black", face = "bold"), axis.text.y = element_text(size = 22, color = "black", face = "bold")) + theme(axis.title.x = element_text(size = 24, color = "black", face = "bold"), axis.title.y = element_text(size = 24, color = "black", face = "bold")) #calculating means for interaction emm_int <- emmeans(vp_distmodel1, "Speed", by = c("Light")) emm_int # post-hoc pairwise comparisons pairs(emm_int) #### We are now running generalized linear models, so we go back to the fieldfo dataframe str(fieldfo) fieldfo$Brake_Bin <- as.factor(fieldfo$Brake_Bin) fieldfo$Preflight_Brake <- as.factor(fieldfo$Preflight_Brake) fieldfo$Postflight_Brake <- as.factor(fieldfo$Postflight_Brake) # generating log_bodymass fieldfo <- mutate(fieldfo, log_bodymass = log(Weight)) ######### Dependent: Flight_Bin probfid_m1 <- glm(data = fieldfo, Flight_Bin ~ Speed + Light + Speed:Light + Brake_Bin + log_bodymass + Temp, family = binomial(link = "logit")) probfid_m2 <- glm(data = fieldfo, Flight_Bin ~ Speed + Light + Speed:Light + log_bodymass + Temp, family = binomial(link = "logit")) # summary of the model summary(probfid_m1) summary(probfid_m2) # coefficients of the model coef(probfid_m1) coef(probfid_m2) # looking at whether each independent factor is significantly explaining the variation in the dependent vars anova(probfid_m1, test="Chisq") anova(probfid_m2, test="Chisq") # We calculate the significance of the overall model # (which is different from the significance of each individual factor as calculated before). # the following test asks whether the model with ALL the predictors fits significantly better than a model with just an intercept (i.e., a null model). The test statistic is the difference between the residual deviance for the model with predictors and the null model. The test statistic is distributed chi-squared with degrees of freedom equal to the differences in degrees of freedom between the current and the null model (i.e., the number of predictor variables in the model). # To find the difference in deviance for the two models (i.e., the test statistic): with(probfid_m1, null.deviance - deviance) with(probfid_m2, null.deviance - deviance) # To find the degrees of freedom for the difference between the two models is equal to the number of predictor variables in the model: with(probfid_m1, df.null - df.residual) with(probfid_m2, df.null - df.residual) #Finally, here is the p-value of the previously calculated statistic for the whole model with(probfid_m1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE)) with(probfid_m2, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE)) #The model I fit without braking (probfid_m2) is not significant (P = 0.51). I don't think it is worth reporting in the overall manuscript. #This is sometimes called a likelihood ratio test (the deviance residual is -2*log likelihood). To see the model's log likelihood: logLik(probfid_m1) # For the overall model significance, we report the Chi2, df, and P value and also include the log likelihood. # we get a pseudo estimate of the proportion of the variability explained by the model (pseudoR2) objects(probfid_m1) pseudoR2 <- ((probfid_m1$null.deviance-probfid_m1$deviance)/probfid_m1$null.deviance) pseudoR2 emmeans (type = response) meanprobbrake <- emmeans(probfid_m1, "Brake_Bin", type = "response") meanprobbrake meanprobbrake_df <- data.frame(meanprobbrake) meanprobSpeedField <- emmeans(probfid_m1, "Speed", type = "response") meanprobSpeedField meanprobLightField <- emmeans(probfid_m1, "Light", type = "response") meanprobLightField emmeans(probfid_m1, ~Speed:Light, type = "response") meanprobspeed <- emmeans(probfid_m2, "Speed", type = "response") meanprobspeed ggplot(data = meanprobbrake_df, aes(x = Brake_Bin, y = prob)) + geom_point(size = 2) + geom_errorbar (aes(ymin =prob - SE, ymax = prob + SE), width = 0) + scale_color_brewer(palette = "Dark2") + guides(colour=FALSE) + theme_classic(base_size = 16) + xlab ("Braking") + ylab ("Probability of taking flight") + geom_point(data = fieldfo, aes(x = Brake_Bin, y = Flight_Bin), colour = "grey", alpha = .4, position = position_jitter(height = 0, width = 0.3)) + theme_cowplot(font_size = 11) ######### Dependent: Success_Bin_TTC (Probability Success = TTC > 1.0 sec) probsucttc_m1 <- glm(data = fieldfo, Success_Bin_TTC ~ Speed + Light + Speed:Light + Preflight_Brake + log_bodymass + Temp, family = binomial(link = "logit")) # summary of the model summary(probsucttc_m1) # coefficients of the model coef(probsucttc_m1) # looking at whether each independent factor is significantly explaining the variation in the dependent vars anova(probsucttc_m1, test="Chisq") # We calculate the significance of the overall model # (which is different from the significance of each individual factor as calculated before). # the following test asks whether the model with ALL the predictors fits significantly better than a model with just an intercept (i.e., a null model). The test statistic is the difference between the residual deviance for the model with predictors and the null model. The test statistic is distributed chi-squared with degrees of freedom equal to the differences in degrees of freedom between the current and the null model (i.e., the number of predictor variables in the model). # To find the difference in deviance for the two models (i.e., the test statistic): with(probsucttc_m1, null.deviance - deviance) # To find the degrees of freedom for the difference between the two models is equal to the number of predictor variables in the model: with(probsucttc_m1, df.null - df.residual) #Finally, here is the p-value of the previously calculated statistic for the whole model with(probsucttc_m1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE)) #This is sometimes called a likelihood ratio test (the deviance residual is -2*log likelihood). To see the model's log likelihood: logLik(probsucttc_m1) # For the overall model significance, we report the Chi2, df, and P value and also include the log likelihood. # we get a pseudo estimate of the proportion of the variability explained by the model (pseudoR2): objects(probsucttc_m1) pseudoR2 <- ((probsucttc_m1$null.deviance-probsucttc_m1$deviance)/probsucttc_m1$null.deviance) pseudoR2 meanprobspeed <- emmeans(probsucttc_m1, "Speed", type = "response") meanprobspeed meanprobspeed_df <- data.frame(meanprobspeed) meanproblight <- emmeans(probsucttc_m1, "Light", type = "response") meanproblight emmeans(probsucttc_m1, ~Speed:Light, type = "response") ggplot(data = meanprobspeed_df, aes(x = Speed, y = prob)) + geom_point(size = 2) + geom_errorbar (aes(ymin =prob - SE, ymax = prob + SE), width = 0) + scale_color_brewer(palette = "Dark2") + guides(colour=FALSE) + theme_classic(base_size = 16) + xlab ("Speed (km/h)") + ylab ("Theoretical probability of successful avoidance") + geom_point(data = fieldfo, aes(x = Speed, y = Success_Bin_TTC), colour = "grey", alpha = .4, position = position_jitter(height = 0, width = 0.3)) + theme_cowplot(font_size = 11) ######### Dependent: Live_Duck (Probability Success = Presence in Vehicle Path at Collision) probliveduck_m1 <- glm(data = fieldfo, Live_Duck ~ Speed + Light + Speed:Light + Postflight_Brake + log_bodymass + Temp, family = binomial(link = "logit")) liveduck_m2 <- glm(data = fieldfo, Live_Duck ~ Speed + Light + Speed:Light + log_bodymass + Temp, family = binomial(link = "logit")) # summary of the model summary(probliveduck_m1) summary(liveduck_m2) # coefficients of the model coef(probliveduck_m1) # looking at whether each independent factor is significantly explaining the variation in the dependent vars anova(probliveduck_m1, test="Chisq") anova(liveduck_m2, test="Chisq") # We calculate the significance of the overall model # (which is different from the significance of each individual factor as calculated before). # the following test asks whether the model with ALL the predictors fits significantly better than a model with just an intercept (i.e., a null model). The test statistic is the difference between the residual deviance for the model with predictors and the null model. The test statistic is distributed chi-squared with degrees of freedom equal to the differences in degrees of freedom between the current and the null model (i.e., the number of predictor variables in the model). # To find the difference in deviance for the two models (i.e., the test statistic): with(probliveduck_m1, null.deviance - deviance) # To find the degrees of freedom for the difference between the two models is equal to the number of predictor variables in the model: with(probliveduck_m1, df.null - df.residual) #Finally, here is the p-value of the previously calculated statistic for the whole model with(probliveduck_m1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE)) #This is sometimes called a likelihood ratio test (the deviance residual is -2*log likelihood). To see the model's log likelihood: logLik(probliveduck_m1) # For the overall model significance, we report the Chi2, df, and P value and also include the log likelihood. # we get a pseudo estimate of the proportion of the variability explained by the model (pseudoR2): objects(probliveduck_m1) pseudoR2 <- ((probliveduck_m1$null.deviance-probliveduck_m1$deviance)/probliveduck_m1$null.deviance) pseudoR2 meanprobspeed <- emmeans(probliveduck_m1, "Postflight_Brake", type = "response") meanprobspeed emmeans(liveduck_m2, ~Speed, type = "response") emmeans(liveduck_m2, ~Light, type = "response") emmeans(liveduck_m2, ~Speed:Light, type = "response") ################################## ### LAB DATA ################################# setwd("C:/Rfiles") # sets folder rm(list = ls ()) # clears R's memory labfo <- read.csv("RDuckVideo_7.07.csv", na.strings = c("","NA"), header=TRUE, fileEncoding = 'UTF-8-BOM') # opens file str(labfo) View (labfo) # assigning different vars to factors labfo$Sex <- as.factor(labfo$Sex) labfo$Speed <- as.factor(labfo$Speed) labfo$Light <- as.factor(labfo$Light) # checking that the changes were made str(labfo) # we quickly look at whether body mass varies with sex bmvssex <- lm(data = labfo, Weight ~ Sex) # quick summary of the model summary (bmvssex) # direction of the effect emmeans(bmvssex, ~Sex) # Males are larger than females so we cannot include in the models both sex and body mass as they are # associated. Suggestion: include the continuous independent factor to minimize using up degrees of freedom # subsetting to analyze responses that involved escape labfos1<-labfo[which(labfo$Flight_Bin == 1),] View (labfos1) str (labfos1) # getting sds of raw data sd(labfos1$FID, na.rm=TRUE) sd(labfos1$TTC, na.rm=TRUE) labfos1_noTTCoutlier <- labfos1 [-which(labfos1$TTC > 5),] sd(labfos1_noTTCoutlier$TTC, na.rm=TRUE) ggplot(data = labfos1, aes(x = Speed, y = FID)) + geom_bar() + error.bars() + theme_classic() # we log-transform weight labfos1 <- mutate(labfos1, log_bodymass = log(Weight)) ######### Dependent: FID fidm1 <- lm(data = labfos1, FID ~ Speed + Light + Speed:Light + log_bodymass) summary(fidm1) Anova (fidm1, type = 3) # GLM assumption checking starts here # checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions autoplot (fidm1, smooth.color = NA) # we found deviations from both homogeneity of variances and the normality of the residuals # checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors ggplot(data = labfos1, aes(x = log_bodymass, y = FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = labfos1, aes(x = log_bodymass, y = FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() # additivity assumption seems to be met # we transform labfos1 <- mutate(labfos1, log_FID = log(FID+1)) labfos1 <- mutate(labfos1, sqrt_FID = sqrt(FID+0.5)) # we check that the transformations took place str(labfos1) # now let's compare the skewness and kurtosis for the continuous variables untransformed vs. transformed descr(labfos1$FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") descr(labfos1$log_FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") descr(labfos1$sqrt_FID, stats = c("Skewness", "Kurtosis"), style="rmarkdown") # based mostly on the skewness, we decide to use sqrt_FID ######### Dependent: sqrt_FID fidm2 <- lm(data = labfos1, sqrt_FID ~ Speed + Light + Speed:Light + log_bodymass) summary(fidm2) # GLM assumption checking starts here # checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions autoplot (fidm2, smooth.color = NA) # compared to... autoplot (fidm1, smooth.color = NA) # the sqrt transformation slightly improved the homogeneity of variances and the normality of the residuals, but the assumption #is not fully met, so results should be interpreted with care # checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors ggplot(data = labfos1, aes(x = log_bodymass, y = sqrt_FID)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = labfos1, aes(x = log_bodymass, y = sqrt_FID)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() # additivity assumption seems to be met # outliers # start checking for outliers # box plots show the median as the line within the box, the box defines the 25%-75% quartile (also known as the Inter Quartile Range), and the lines define 1.5 times the 25%Q and 75%Q, ultimately showing the limits of 90% of your data. Box plots allow us to identify observations outside of the range of 90% of our data, which may or may not be outliers # the following are box plots of the continuous factors (dependent or independent) boxplot(labfos1$sqrt_FID, ylab="sqrt_FID") # we now test whether the maximum or minimum value in the distribution is an outlier # null hypothesis = no outlier, so if the P value you obtain from the test is < 0.05 it means that value is #significantly an outlier. Type = 10 is a test for one outlier grubbs.test(labfos1$sqrt_FID, type = 10) # the side of the distribution we are testing is detected automatically # and can be reversed by opposite parameter # that tests whether the value on the opposite side of the distribution is an outlier grubbs.test(labfos1$sqrt_FID, type = 10, opposite = TRUE) # No outliers detected # If one is identified, there are more algorithms that can be used to go deeper into the role of those outliers # we make sure we use Type 3 sums of squares when multiple independent factors are included Anova(fidm2, type =3) emmeans(fidm2, ~Speed, type = "response") emmeans(fidm2, ~Light) emmeans(fidm2, ~Speed:Light) #provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant) coefficients (fidm2) #provides the effect size (we are reporting Omega squared) anova_stats(fidm2, digits =3) ######### Dependent: TTC ttcm1 <- lm(data = labfos1, TTC ~ Speed + Light + Speed:Light + log_bodymass) summary(ttcm1) # GLM assumption checking starts here # checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions autoplot (ttcm1, smooth.color = NA) # we found deviations from both homogeneity of variances and the normality of the residuals # checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors ggplot(data = labfos1, aes(x = log_bodymass, y = TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = labfos1, aes(x = log_bodymass, y = TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() # additivity assumption seems to be met # we transform labfos1 <- mutate(labfos1, log_TTC = log(TTC+1)) labfos1 <- mutate(labfos1, sqrt_TTC = sqrt(TTC+0.5)) # we check that the transformations took place str(labfos1) # now let's compare the skewness and kurtosis for the continuous variables untrasformed vs. transformed descr(labfos1$TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") descr(labfos1$log_TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") descr(labfos1$sqrt_TTC, stats = c("Skewness", "Kurtosis"), style="rmarkdown") # based mostly on the skewness, we decide to use log_TTC ######### Dependent: log_TTC ttcm2 <- lm(data = labfos1, log_TTC ~ Speed + Light + Speed:Light + log_bodymass) summary(ttcm2) # GLM assumption checking starts here # checking for the homogeneity of variance (bottom-left graph labelled Scale-Location) and normality of residuals (top-right graph labelled Normal Q-Q) assumptions autoplot (ttcm2, smooth.color = NA) # compared to... autoplot (ttcm1, smooth.color = NA) # good improvement in meeting the assumptions with the log_TTC transformation # checking for the additivity/linearity assumption via scatterplots of the association between dependent factor and each of the independent continuous factors ggplot(data = labfos1, aes(x = log_bodymass, y = log_TTC)) + geom_point() + geom_smooth(method = "lm", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() ggplot(data = labfos1, aes(x = log_bodymass, y = log_TTC)) + geom_point() + geom_smooth(method = "loess", se=FALSE) + scale_color_brewer(palette = "Dark2") + theme_classic() # additivity assumption seems to be met # outliers # start checking for outliers # box plots show the median as the line within the box, the box defines the 25%-75% quartile (also known as the Inter Quartile Range), and the lines define 1.5 times the 25%Q and 75%Q, ultimately showing the limits of 90% of your data. Box plots allow us to identify observations outside of the range of 90% of our data, which may or may not be outliers # the following are box plots of the continuous factors (dependent or independent) boxplot(labfos1$log_TTC, ylab="log_TTC") # we now test whether the maximum or minimum value in the distribution is an outlier # null hypothesis = no outlier, so if the P value you obtain from the test is < 0.05 it means that value is #significantly an outlier. Type = 10 is a test for one outlier grubbs.test(labfos1$log_TTC, type = 10) # the side of the distribution we are testing is detected automatically # and can be reversed by opposite parameter – see this line of code # that tests whether the value on the opposite side of the distribution is an outlier grubbs.test(labfos1$log_TTC, type = 10, opposite = TRUE) # An outlier was detected: 2.75544256340481 # We have to present two results: with the outlier and without the outlier #### Results with the outlier # we make sure we use Type 3 sums of squares when multiple independent factors are included Anova(ttcm2, type =3) emmeans(ttcm2, ~Speed) emmeans(ttcm2, ~Light) emmeans(ttcm2, ~Speed:Light) #provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant) coefficients (ttcm2) #provides the effect size (we are reporting Omega squared) anova_stats(ttcm2, digits =3) # Light effects means_light2 <- emmeans(ttcm2, ~Light) #we calculate the means within that model relative to the independent categorical factor that was significant means_light_df2 <- data.frame(means_light2) #we associate those means to a dataframe object that we will use to run the ggplot # now the graph using confidence intervals ggplot(data = means_light_df2, aes(x = Light, y = emmean)) + geom_point(size = 5) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) + ylim(0.6, 1.6) + theme_classic() + scale_color_brewer(palette = "Dark2") + xlab ("Time of day") + ylab ("log(TTC+1)") + theme(axis.text.x = element_text(size = 14, color = "black", face = "bold"), axis.text.y = element_text(size = 14, color = "black", face = "bold")) + theme(axis.title.x = element_text(size = 16, color = "black", face = "bold"), axis.title.y = element_text(size = 16, color = "black", face = "bold")) #### Results without the outlier View (labfos1) ttcm3 <- lm(data = labfos1 [-which(labfos1$log_TTC > 2.75),], log_TTC ~ Speed + Light + Speed:Light + log_bodymass) summary(ttcm3) Anova (ttcm3, type = 3) emmeans(ttcm3, ~Speed) emmeans(ttcm3, ~Light) emmeans(ttcm3, ~Speed:Light) #provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant) coefficients (ttcm3) #provides the effect size (we are reporting Omega squared) anova_stats(ttcm3, digits =3) # Light effects means_light <- emmeans(ttcm3, ~Light) #we calculate the means within that model relative to the independent categorical factor that was significant means_light_df <- data.frame(means_light) #we associate those means to a dataframe object that we will use to run the ggplot # now the graph using confidence intervals (Modified during revision to change y-axis title, range, and font size) ggplot(data = means_light_df, aes(x = Light, y = emmean)) + geom_point(size = 8) + geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.4) + ylim(0.0, 1.6) + theme_classic() + scale_color_brewer(palette = "Dark2") + xlab ("Time of day") + ylab ("log(Time-to-collision + 1)") + theme(axis.text.x = element_text(size = 24, color = "black", face = "bold"), axis.text.y = element_text(size = 24, color = "black", face = "bold")) + theme(axis.title.x = element_text(size = 26, color = "black", face = "bold"), axis.title.y = element_text(size = 26, color = "black", face = "bold")) #provides the coefficients of the model (these are conditional coefficients: focus on one variable holding the others constant) coefficients (ttcm3) #provides the effect size (we are reporting Omega squared) anova_stats(ttcm3, digits =3) #### We are now running generalized linear models, so we go back to the labfo dataframe str(labfo) # generating log_bodymass labfo <- mutate(labfo, log_bodymass = log(Weight)) View (labfo) ######### Dependent: Flight_Bin probfid_m1 <- glm(data = labfo, Flight_Bin ~ Speed + Light + Speed:Light + log_bodymass, family = binomial(link = "logit")) # summary of the model summary(probfid_m1) # coefficients of the model coef(probfid_m1) # looking at whether each independent factor is significantly explaining the variation in the dependent vars anova(probfid_m1, test="Chisq") # We calculate the significance of the overall model # (which is different from the significance of each individual factor as calculated before). # the following test asks whether the model with ALL the predictors fits significantly better than a model with just an intercept (i.e., a null model). The test statistic is the difference between the residual deviance for the model with predictors and the null model. The test statistic is distributed chi-squared with degrees of freedom equal to the differences in degrees of freedom between the current and the null model (i.e., the number of predictor variables in the model). # To find the difference in deviance for the two models (i.e., the test statistic): with(probfid_m1, null.deviance - deviance) # To find the degrees of freedom for the difference between the two models is equal to the number of predictor variables in the model: with(probfid_m1, df.null - df.residual) #Finally, here is the p-value of the previously calculated statistic for the whole model with(probfid_m1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE)) #This is sometimes called a likelihood ratio test (the deviance residual is -2*log likelihood). To see the model's log likelihood: logLik(probfid_m1) # For the overall model significance, we report the Chi2, df, and P value and also include the log likelihood. # we get a pseudo estimate of the proportion of the variability explained by the model (pseudoR2) objects(probfid_m1) pseudoR2 <- ((probfid_m1$null.deviance-probfid_m1$deviance)/probfid_m1$null.deviance) pseudoR2 ####Marginal Means meanproblight <- emmeans(probfid_m1, "Light", type = "response") meanproblight meanproblight_df <- data.frame(meanproblight) meanprobspeed1 <- emmeans(probfid_m1, "Speed", type = "response") meanprobspeed1 emmeans(probfid_m1, ~Speed:Light, type = "response") emmeans(probfid_m1, ~log_bodymass, type = "response") dev.off() ggplot(data = meanproblight_df, aes(x = Light, y = prob)) + geom_point(size = 2) + geom_errorbar (aes(ymin =prob - SE, ymax = prob + SE), width = 0) + scale_color_brewer(palette = "Dark2") + guides(colour=FALSE) + theme_classic(base_size = 16) + xlab ("Time of day") + ylab ("Probability of flight response") + geom_point(data = labfo, aes(x = Light, y = Flight_Bin), colour = "grey", alpha = .4, position = position_jitter(height = 0, width = 0.3)) + theme_cowplot(font_size = 11) str(labfo) ######### Dependent: Success_Bin probsucc_m1 <- glm(data = labfo, Success_Bin ~ Speed + Light + Speed:Light + log_bodymass, family = binomial(link = "logit")) # summary of the model summary(probsucc_m1) # coefficients of the model coef(probsucc_m1) # looking at whether each independent factor is significantly explaining the variation in the dependent vars anova(probsucc_m1, test="Chisq") # We calculate the significance of the overall model # (which is different from the significance of each individual factor as calculated before). # the following test asks whether the model with ALL the predictors fits significantly better than a model with just an intercept (i.e., a null model). The test statistic is the difference between the residual deviance for the model with predictors and the null model. The test statistic is distributed chi-squared with degrees of freedom equal to the differences in degrees of freedom between the current and the null model (i.e., the number of predictor variables in the model). # To find the difference in deviance for the two models (i.e., the test statistic): with(probsucc_m1, null.deviance - deviance) # To find the degrees of freedom for the difference between the two models is equal to the number of predictor variables in the model: with(probsucc_m1, df.null - df.residual) #Finally, here is the p-value of the previously calculated statistic for the whole model with(probsucc_m1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE)) #This is sometimes called a likelihood ratio test (the deviance residual is -2*log likelihood). To see the model's log likelihood: logLik(probsucc_m1) # For the overall model significance, we report the Chi2, df, and P value and also include the log likelihood. # we get a pseudo estimate of the proportion of the variability explained by the model (pseudoR2): objects(probsucc_m1) pseudoR2 <- ((probsucc_m1$null.deviance-probsucc_m1$deviance)/probsucc_m1$null.deviance) pseudoR2 ####Marginal Means meansucclight <- emmeans(probsucc_m1, "Light", type = "response") meansucclight meansuccspeed1 <- emmeans(probsucc_m1, "Speed", type = "response") meansuccspeed1 emmeans(probsucc_m1, ~Speed:Light, type = "response") meanprobspeed <- emmeans(probsucc_m1, "Speed", type = "response") meanprobspeed pairs(meanprobspeed) meanprobspeed_df <- data.frame(meanprobspeed) # Figure to interpret results (Point size and font size changed during revision) ggplot(data = meanprobspeed_df, aes(x = Speed, y = prob)) + geom_point(size = 5) + geom_errorbar (aes(ymin =prob - SE, ymax = prob + SE), width = 0) + scale_color_brewer(palette = "Dark2") + guides(colour=FALSE) + theme_classic(base_size = 16) + xlab ("Speed (km/h)") + ylab ("Probability of successful avoidance") + geom_point(data = labfo, aes(x = Speed, y = Success_Bin), colour = "grey", alpha = .4, position = position_jitter(height = 0, width = 0.3)) + theme_cowplot(font_size = 22)