--- title: "DATA S1 - Tixier et al - killer whale - blue-eye trevalla" author: "Paul Tixier" date: "16 May 2018" output: word_document: default pdf_document: default html_document: default --- # Analysis of the occurence of killer whale interactions ### Code for Generalised Linear Models fitted to the presence/absence of killer whales per fishing day Manuscript # 25457 " Killer whale (Orcinus orca) interactions with blue-eye trevalla (Hyperoglyphe antarctica) longline fisheries." Paul Tixier1, Mary-Anne Lea2, Mark A. Hindell2, Christophe Guinet3, Nicolas Gasco4, Guy Duhamel4, John P.Y. Arnould1 1 Centre for Integrative Ecology, School of Life and Environmental Sciences, Deakin University, Burwood, Victoria, Australia 2 Ecology and Biodiversity Centre, Institute for Marine and Antarctic Studies, University of Tasmania, Hobart, Tasmania, Australia 3 Centre d'Etudes Biologiques de Chizé (CEBC), UMR 7372 Université de la Rochelle-CNRS, Villiers-en-Bois, France 4 Département Adaptations du vivant, UMR BOREA, Museum National d'Histoire Naturelle, Paris, France Package Loading ```{r setup, include=FALSE} library(SDMTools) library(mgcv) library(fields) library(maps) library(spatstat) library(maptools) library(sp) library(trip) library(rgdal) library(ade4) library(tcltk) library(tkrplot) library(adehabitat) library(grid) library(gridExtra) library(ggplot2) library(gridBase) library(marmap) library(mapdata) library(Grid2Polygons) library(mapproj) library(Rmisc) library(dplyr) library(effects) library(tidyr) library(plyr) library(ggpubr) library(MuMIn) ``` Data input ```{r} setwd("C:/Users/LES_research211/Documents/003_ANALYSES/011_BLUE_EYE_KIW") d1=read.csv("data_Tixier_et_al_killer_whale_blue_eye_trevalla_fisheries_interactions.csv",h=T,sep=",") ``` # Analysis ### Prepare data for modelling Convert year as a continuous term ```{r} d1$year=as.numeric(as.character(d1$year)) with(d1, table(year, area)) ``` Convert latitudes into positive values ```{r} d1$lat = d1$Y d1$long = d1$X d1$lat <- -(d1$lat) ``` # GLMs using stepwise forward selection # Amsterdam/St Paul Select data from Amsterdam/St Paul ```{r} d1_fr <- subset(d1, d1$area=="FRA") ``` Range/histogram of mean latitudes per day ```{r} range(d1_fr$lat) hist(d1_fr$lat) ``` Range/histogram of mean longitudes per day ```{r} range(d1_fr$long) hist(d1_fr$long) ``` Range/histogram of mean depths per day ```{r} range(d1_fr$depth) hist(d1_fr$depth) ``` Range/histogram of effort per day ```{r} range(d1_fr$effort) hist(d1_fr$effort) ``` Range/histogram of size area fished per day ```{r} range(d1_fr$area_size) hist(d1_fr$area_size) ``` Range/histogram of distances from previous day ```{r} range(d1_fr$distance) hist(d1_fr$distance) ``` rescale all continuous terms ```{r} datsc_fr <- d1_fr datsc_fr$distance <- as.numeric(scale(datsc_fr$distance)) datsc_fr$lat <- as.numeric(scale(datsc_fr$lat)) datsc_fr$year <- as.numeric(scale(datsc_fr$year)) datsc_fr$long <- as.numeric(scale(datsc_fr$long)) datsc_fr$depth <- as.numeric(scale(datsc_fr$depth)) datsc_fr$area_size <- as.numeric(scale(datsc_fr$area_size)) datsc_fr$effort <- as.numeric(scale(datsc_fr$effort)) ``` Null model ```{r} glm_null_0 <- glm(kiw_pres ~ 1, family=binomial(link = "logit"), data=datsc_fr) summary(glm_null_0) ``` Check for temporal autocorrelation ```{r} acf(resid(glm_null_0, type="pearson"), plot=F, type="correlation") acf(resid(glm_null_0, type="pearson"), plot=T, type="correlation") ``` Null model with presence of killer whales during previous day in interaction with the distance travelled by vessels as a structural interaction term in the null model ```{r} glm_null <- glm(kiw_pres ~ kiw_pres_lag1*distance, family=binomial(link = "logit"), data=datsc_fr) summary(glm_null) ``` Check for temporal autocorrelation ```{r} acf(resid(glm_null, type="pearson"), plot=F, type="correlation") acf(resid(glm_null, type="pearson"), plot=T, type="correlation") ``` # Single term models Add the year effect ```{r} glm_1.1 <- glm(kiw_pres ~ kiw_pres_lag1*distance + year, family=binomial(link = "logit"), data=datsc_fr) summary(glm_1.1) ``` Add the season effect ```{r} glm_1.2 <- glm(kiw_pres ~ kiw_pres_lag1*distance + season, family=binomial(link = "logit"), data=datsc_fr) summary(glm_1.2) ``` Add the depth effect ```{r} glm_1.3 <- glm(kiw_pres ~ kiw_pres_lag1*distance + depth, family=binomial(link = "logit"), data=datsc_fr) summary(glm_1.3) ``` Add the latitude effect ```{r} glm_1.4 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat, family=binomial(link = "logit"), data=datsc_fr) summary(glm_1.4) ``` Add the longitude effect ```{r} glm_1.5 <- glm(kiw_pres ~ kiw_pres_lag1*distance + long, family=binomial(link = "logit"), data=datsc_fr) summary(glm_1.5) ``` Add the total effort ```{r} glm_1.6 <- glm(kiw_pres ~ kiw_pres_lag1*distance + effort, family=binomial(link = "logit"), data=datsc_fr) summary(glm_1.6) ``` Add the size of fished area ```{r} glm_1.7 <- glm(kiw_pres ~ kiw_pres_lag1*distance + area_size, family=binomial(link = "logit"), data=datsc_fr) summary(glm_1.7) ``` Model with the observer iD ```{r} glm_1.8 <- glm(kiw_pres ~ kiw_pres_lag1*distance + observer, family=binomial(link = "logit"), data=datsc_fr) summary(glm_1.8) ``` ### AIC summary ```{r} model.sel(list(glm_null, glm_1.1, glm_1.2, glm_1.3, glm_1.4, glm_1.5,glm_1.6,glm_1.7,glm_1.8)) ``` Model with the latitude with lowest AIC # Two terms models ```{r} glm_2.1 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat, family=binomial(link = "logit"), data=datsc_fr) summary(glm_2.1) ``` Model with the year ```{r} glm_2.2 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat + year, family=binomial(link = "logit"), data=datsc_fr) summary(glm_2.2) ``` Model with the season ```{r} glm_2.3 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat + season, family=binomial(link = "logit"), data=datsc_fr) summary(glm_2.3) ``` Model with the depth ```{r} glm_2.4 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat + depth, family=binomial(link = "logit"), data=datsc_fr) summary(glm_2.4) ``` Model with the longitude ```{r} glm_2.5 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat + long, family=binomial(link = "logit"), data=datsc_fr) summary(glm_2.5) ``` Model with the total effort ```{r} glm_2.6 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat + effort, family=binomial(link = "logit"), data=datsc_fr) summary(glm_2.6) ``` Model with the size of fished area ```{r} glm_2.7 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat + area_size, family=binomial(link = "logit"), data=datsc_fr) summary(glm_2.7) ``` Model with the observer ```{r} glm_2.8 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat + observer, family=binomial(link = "logit"), data=datsc_fr) summary(glm_2.8) ``` ### AIC summary ```{r} model.sel(list(glm_2.1, glm_2.2, glm_2.3, glm_2.4, glm_2.5,glm_2.6,glm_2.7,glm_2.8)) ``` Model with the year with lowest AIC # Three terms models ```{r} glm_3.1 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat+year, family=binomial(link = "logit"), data=datsc_fr) summary(glm_3.1) ``` Model with the season ```{r} glm_3.2 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat+year+season, family=binomial(link = "logit"), data=datsc_fr) summary(glm_3.2) ``` Model with the depth ```{r} glm_3.3 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat+year+depth, family=binomial(link = "logit"), data=datsc_fr) summary(glm_3.3) ``` Model with the longitude ```{r} glm_3.4 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat+year+long, family=binomial(link = "logit"), data=datsc_fr) summary(glm_3.4) ``` Model with the effort ```{r} glm_3.5 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat+year+effort, family=binomial(link = "logit"), data=datsc_fr) summary(glm_3.5) ``` Model with the size of area fished ```{r} glm_3.6 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat+year+area_size, family=binomial(link = "logit"), data=datsc_fr) summary(glm_3.6) ``` Model with the observer iD ```{r} glm_3.7 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat+year+observer, family=binomial(link = "logit"), data=datsc_fr) summary(glm_3.7) ``` ### AIC summary ```{r} model.sel(list(glm_3.1, glm_3.2, glm_3.3, glm_3.4, glm_3.5,glm_3.6,glm_3.7)) ``` None of the 3 terms models further improved the AIC The final model should be ```{r} glm_3.1 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat+year, family=binomial(link = "logit"), data=datsc_fr) summary(glm_3.1) ``` # Model validation plots of residuals versus fitted values for check of linearity and homoscedasticity of variance ```{r} plot(fitted(glm_3.1), resid(glm_3.1), main="Residual Plot for linearity and homoscedasticity of variance",xlab="Fitted value",ylab="Residuals") ``` test for normality of the residuals ```{r} hist(resid(glm_3.1),xlab="Residuals",main="Normality of the Residuals") ``` plot the residuals against each explanatory variables to test independency ```{r} d1_fr2 <- subset(d1_fr, !is.na(d1_fr$distance)) plot(resid(glm_3.1)~d1_fr2$distance,pch=20,main="Residual Plot for independency 1",xlab="Distance (km)",ylab="Residuals") plot(resid(glm_3.1)~d1_fr2$lat,pch=20,main="Residual Plot for independency 2",xlab="Latitude",ylab="Residuals") ``` Check for temporal autocorrelation ```{r} acf(resid(glm_3.1, type="pearson"), plot=T, type="correlation") ``` # SE Australia Select data from SE Australia ```{r} d1_au <- subset(d1, d1$area=="AUS") ``` Range/histogram of mean latitudes per day ```{r} range(d1_au$lat) hist(d1_au$lat) ``` Range/histogram of mean longitudes per day ```{r} range(d1_au$long) hist(d1_au$long) ``` Range/histogram of mean depths per day ```{r} range(d1_au$depth, na.rm=TRUE) hist(d1_au$depth) ``` Range/histogram of effort per day ```{r} range(d1_au$effort) hist(d1_au$effort) ``` Range/histogram of size area fished per trip ```{r} range(d1_au$area_size) hist(d1_au$area_size) ``` Range/histogram of distances from previous day ```{r} range(d1_au$distance,na.rm=TRUE) hist(d1_au$distance) ``` One extreme value of distance is removed from the data used for modeling ```{r} d1_au <- subset(d1_au, d1_au$distance<=800) range(d1_au$distance,na.rm=TRUE) ``` rescale all continuous terms ```{r} datsc_au <- d1_au datsc_au$distance <- as.numeric(scale(datsc_au$distance)) datsc_au$lat <- as.numeric(scale(datsc_au$lat)) datsc_au$year <- as.numeric(scale(datsc_au$year)) datsc_au$long <- as.numeric(scale(datsc_au$long)) datsc_au$depth <- as.numeric(scale(datsc_au$depth)) datsc_au$area_size <- as.numeric(scale(datsc_au$area_size)) datsc_au$effort <- as.numeric(scale(datsc_au$effort)) ``` Null model ```{r} glm_null_0 <- glm(kiw_pres ~ 1, family=binomial(link = "logit"), data=datsc_au) summary(glm_null_0) ``` Check for temporal autocorrelation ```{r} acf(resid(glm_null_0, type="pearson"), plot=F, type="correlation") acf(resid(glm_null_0, type="pearson"), plot=T, type="correlation") ``` Presence of killer whales during previous day in interaction with the distance travelled by vessels as a structural interaction term in the null model ```{r} glm_null <- glm(kiw_pres ~ kiw_pres_lag1*distance, family=binomial(link = "logit"), data=datsc_au) summary(glm_null) ``` Check for temporal autocorrelation ```{r} acf(resid(glm_null, type="pearson"), plot=F, type="correlation") acf(resid(glm_null, type="pearson"), plot=T, type="correlation") ``` # Single term models Add the year effect ```{r} glm_1.1 <- glm(kiw_pres ~ kiw_pres_lag1*distance + year, family=binomial(link = "logit"), data=datsc_au) summary(glm_1.1) ``` Add the season effect ```{r} glm_1.2 <- glm(kiw_pres ~ kiw_pres_lag1*distance + season, family=binomial(link = "logit"), data=datsc_au) summary(glm_1.2) ``` Add the depth effect ```{r} glm_1.3 <- glm(kiw_pres ~ kiw_pres_lag1*distance + depth, family=binomial(link = "logit"), data=datsc_au) summary(glm_1.3) ``` Add the latitude effect ```{r} glm_1.4 <- glm(kiw_pres ~ kiw_pres_lag1*distance + lat, family=binomial(link = "logit"), data=datsc_au) summary(glm_1.4) ``` Add the longitude effect ```{r} glm_1.5 <- glm(kiw_pres ~ kiw_pres_lag1*distance + long, family=binomial(link = "logit"), data=datsc_au) summary(glm_1.5) ``` Add the total effort ```{r} glm_1.6 <- glm(kiw_pres ~ kiw_pres_lag1*distance + effort, family=binomial(link = "logit"), data=datsc_au) summary(glm_1.6) ``` Add the size of fished area ```{r} glm_1.7 <- glm(kiw_pres ~ kiw_pres_lag1*distance + area_size, family=binomial(link = "logit"), data=datsc_au) summary(glm_1.7) ``` Add the captain ```{r} glm_1.8 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain, family=binomial(link = "logit"), data=datsc_au) summary(glm_1.8) ``` ### AIC summary ```{r} model.sel(list(glm_null, glm_1.1, glm_1.2, glm_1.3, glm_1.4, glm_1.5,glm_1.6,glm_1.7,glm_1.8)) ``` Model with the captain with lowest AIC # Two terms models ```{r} glm_2.1 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain, family=binomial(link = "logit"), data=datsc_au) summary(glm_2.1) ``` Model with the year ```{r} glm_2.2 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain + year, family=binomial(link = "logit"), data=datsc_au) summary(glm_2.2) ``` Model with the season ```{r} glm_2.3 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain + season, family=binomial(link = "logit"), data=datsc_au) summary(glm_2.3) ``` Model with the depth ```{r} glm_2.4 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain + depth, family=binomial(link = "logit"), data=datsc_au) summary(glm_2.4) ``` Model with the longitude ```{r} glm_2.5 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain + long, family=binomial(link = "logit"), data=datsc_au) summary(glm_2.5) ``` Model with the total effort ```{r} glm_2.6 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain + effort, family=binomial(link = "logit"), data=datsc_au) summary(glm_2.6) ``` Model with the size of fished area ```{r} glm_2.7 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain + area_size, family=binomial(link = "logit"), data=datsc_au) summary(glm_2.7) ``` Model with the latitude ```{r} glm_2.8 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain + lat, family=binomial(link = "logit"), data=datsc_au) summary(glm_2.8) ``` ### AIC summary ```{r} model.sel(list(glm_2.1, glm_2.2, glm_2.3, glm_2.4, glm_2.5,glm_2.6,glm_2.7,glm_2.8)) ``` Model with the effort with lowest AIC # Three terms models ```{r} glm_3.1 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort, family=binomial(link = "logit"), data=datsc_au) summary(glm_3.1) ``` Model with the season ```{r} glm_3.2 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+ season, family=binomial(link = "logit"), data=datsc_au) summary(glm_3.2) ``` Model with the depth ```{r} glm_3.3 <- glm(kiw_pres ~ kiw_pres_lag1*distance +captain+effort+depth, family=binomial(link = "logit"), data=datsc_au) summary(glm_3.3) ``` Model with the longitude ```{r} glm_3.4 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+long, family=binomial(link = "logit"), data=datsc_au) summary(glm_3.4) ``` Model with the latitude ```{r} glm_3.5 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+lat, family=binomial(link = "logit"), data=datsc_au) summary(glm_3.5) ``` Model with the size of area fished ```{r} glm_3.6 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+area_size, family=binomial(link = "logit"), data=datsc_au) summary(glm_3.6) ``` Model with the year ```{r} glm_3.7 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+year, family=binomial(link = "logit"), data=datsc_au) summary(glm_3.7) ``` ### AIC summary ```{r} model.sel(list(glm_3.1, glm_3.2, glm_3.3, glm_3.4, glm_3.5,glm_3.6,glm_3.7)) ``` Model with the season with lowest AIC # Four terms models ```{r} glm_4.1 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+season, family=binomial(link = "logit"), data=datsc_au) summary(glm_4.1) ``` Model with the year ```{r} glm_4.2 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+season+year, family=binomial(link = "logit"), data=datsc_au) summary(glm_4.2) ``` Model with the depth ```{r} glm_4.3 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+season+depth, family=binomial(link = "logit"), data=datsc_au) summary(glm_4.3) ``` Model with the longitude ```{r} glm_4.4 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+season+long, family=binomial(link = "logit"), data=datsc_au) summary(glm_4.4) ``` Model with the latitude ```{r} glm_4.5 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+season+lat, family=binomial(link = "logit"), data=datsc_au) summary(glm_4.5) ``` Model with the size of area fished ```{r} glm_4.6 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+season+area_size, family=binomial(link = "logit"), data=datsc_au) summary(glm_4.6) ``` ### AIC summary ```{r} model.sel(list(glm_4.1, glm_4.2, glm_4.3, glm_4.4, glm_4.5,glm_4.6)) ``` None of the 4 terms models further improved the AIC the final model should be ```{r} glm_4.1 <- glm(kiw_pres ~ kiw_pres_lag1*distance + captain+effort+season, family=binomial(link = "logit"), data=datsc_au) summary(glm_4.1) ``` # Model validation plots of residuals versus fitted values for check of linearity and homoscedasticity of variance ```{r} plot(fitted(glm_4.1), resid(glm_4.1), main="Residual Plot for linearity and homoscedasticity of variance",xlab="Fitted value",ylab="Residuals") # ``` test for normality of the residuals ```{r} hist(resid(glm_4.1),xlab="Residuals",main="Normality of the Residuals") ``` plot the residuals against each explanatory variables to test independency ```{r} plot(resid(glm_4.1)~d1_au$distance,pch=20,main="Residual Plot for independency 1",xlab="Distance (km)",ylab="Residuals") plot(resid(glm_4.1)~d1_au$effort,pch=20,main="Residual Plot for independency 2",xlab="Effort (hooks)",ylab="Residuals") ``` Check for temporal autocorrelation ```{r} acf(resid(glm_4.1, type="pearson"), plot=T, type="correlation") ```