####################################### ####################################### ##### R Script for ##### ##### Identifying rafting ##### ##### from GPS data, ##### ##### data conversions and ##### ##### and statistical analyses ##### ##### ##### ####################################### ####################################### # Set the working directory: setwd(" ") # Load packages: library(multirich); library(reshape2); library(dplyr); library(plyr); library(ggplot2); library(extrafont); library(extrafontdb) # ________________________________________________________________________________ # ________________________________________________________________________________ # Code order: # # 1. Read in 83 csv files containing Manx shearwater GPS data; # 2. For each GPS file (1-83) calculate the bearings between each GPS fix; # 3. Make each dataframe into a list, combine all lists, and save as "all_tracks"; # 4. Extract the rafting data from the all_tracks dataframe; # 5. Use the Vincente equation to calculate the distance between successive GPS fixes; # 6. Calculate the speed of Manx shearwaters from GPS data; # 7. Identifying the cut off between rafting and flying; # 8. Calculate the distance from Skomer for all and retain only those >200 m and <5000 m; # 9. Combine the weather and GPS data ready for circular analysis; # 10. Statistics # ________________________________________________________________________________ # ________________________________________________________________________________ # ________________________________________________________________________________ # 1. Read in 83 .csv files containing GPS data from individual birds: # ________________________________________________________________________________ dat1 <- read.csv("1.csv", header= TRUE) dat2 <- read.csv("2.csv", header= TRUE) dat3 <- read.csv("3.csv", header= TRUE) dat4 <- read.csv("4.csv", header= TRUE) dat5 <- read.csv("5.csv", header= TRUE) dat6 <- read.csv("6.csv", header= TRUE) dat7 <- read.csv("7.csv", header= TRUE) dat8 <- read.csv("8.csv", header= TRUE) dat9 <- read.csv("9.csv", header= TRUE) dat10 <- read.csv("10.csv", header= TRUE) dat11 <- read.csv("11.csv", header= TRUE) dat12 <- read.csv("12.csv", header= TRUE) dat13 <- read.csv("13.csv", header= TRUE) dat14 <- read.csv("14.csv", header= TRUE) dat15 <- read.csv("15.csv", header= TRUE) dat16 <- read.csv("16.csv", header= TRUE) dat17 <- read.csv("17.csv", header= TRUE) dat18 <- read.csv("18.csv", header= TRUE) dat19 <- read.csv("19.csv", header= TRUE) dat20 <- read.csv("20.csv", header= TRUE) dat21 <- read.csv("21.csv", header= TRUE) dat22 <- read.csv("22.csv", header= TRUE) dat23 <- read.csv("23.csv", header= TRUE) dat24 <- read.csv("24.csv", header= TRUE) dat25 <- read.csv("25.csv", header= TRUE) dat26 <- read.csv("26.csv", header= TRUE) dat27 <- read.csv("27.csv", header= TRUE) dat28 <- read.csv("28.csv", header= TRUE) dat29 <- read.csv("29.csv", header= TRUE) dat30 <- read.csv("30.csv", header= TRUE) dat31 <- read.csv("31.csv", header= TRUE) dat32 <- read.csv("32.csv", header= TRUE) dat33 <- read.csv("33.csv", header= TRUE) dat34 <- read.csv("34.csv", header= TRUE) dat35 <- read.csv("35.csv", header= TRUE) dat36 <- read.csv("36.csv", header= TRUE) dat37 <- read.csv("37.csv", header= TRUE) dat38 <- read.csv("38.csv", header= TRUE) dat39 <- read.csv("39.csv", header= TRUE) dat40 <- read.csv("40.csv", header= TRUE) dat41 <- read.csv("41.csv", header= TRUE) dat42 <- read.csv("42.csv", header= TRUE) dat43 <- read.csv("43.csv", header= TRUE) dat44 <- read.csv("44.csv", header= TRUE) dat45 <- read.csv("45.csv", header= TRUE) dat46 <- read.csv("46.csv", header= TRUE) dat47 <- read.csv("47.csv", header= TRUE) dat48 <- read.csv("48.csv", header= TRUE) dat49 <- read.csv("49.csv", header= TRUE) dat50 <- read.csv("50.csv", header= TRUE) dat51 <- read.csv("51.csv", header= TRUE) dat52 <- read.csv("52.csv", header= TRUE) dat53 <- read.csv("53.csv", header= TRUE) dat55 <- read.csv("55.csv", header= TRUE) dat56 <- read.csv("56.csv", header= TRUE) dat57 <- read.csv("57.csv", header= TRUE) dat58 <- read.csv("58.csv", header= TRUE) dat59 <- read.csv("59.csv", header= TRUE) dat60 <- read.csv("60.csv", header= TRUE) dat61 <- read.csv("61.csv", header= TRUE) dat62 <- read.csv("62.csv", header= TRUE) dat63 <- read.csv("63.csv", header= TRUE) dat64 <- read.csv("64.csv", header= TRUE) dat66 <- read.csv("66.csv", header= TRUE) dat67 <- read.csv("67.csv", header= TRUE) dat68 <- read.csv("68.csv", header= TRUE) dat69 <- read.csv("69.csv", header= TRUE) dat70 <- read.csv("70.csv", header= TRUE) dat71 <- read.csv("71.csv", header= TRUE) dat72 <- read.csv("72.csv", header= TRUE) dat73 <- read.csv("73.csv", header= TRUE) dat74 <- read.csv("74.csv", header= TRUE) dat75 <- read.csv("75.csv", header= TRUE) dat76 <- read.csv("76.csv", header= TRUE) dat77 <- read.csv("77.csv", header= TRUE) dat78 <- read.csv("78.csv", header= TRUE) dat79 <- read.csv("79.csv", header= TRUE) dat80 <- read.csv("80.csv", header= TRUE) dat81 <- read.csv("81.csv", header= TRUE) dat82 <- read.csv("82.csv", header= TRUE) dat83 <- read.csv("83.csv", header= TRUE) # ________________________________________________________________________________ # 2. For each GPS file (1-83) calculate the bearings between each GPS fix: # ________________________________________________________________________________ deg2rad <- function(deg) return(deg*pi/180) rad2deg <- function(rad) {(rad * 180) / (pi)} dat1$Lat_rad <- deg2rad(dat1$Latitude) dat1$Long_rad <- deg2rad(dat1$Longitude) dat1$Lat_rad2 <- c(dat1$Lat_rad[2:nrow(dat1)], NA) dat1$Long_rad2 <- c(dat1$Long_rad[2:nrow(dat1)], NA) dat1$bearing <- atan2(sin(dat1$Long_rad2-dat1$Long_rad)*cos(dat1$Lat_rad2), cos(dat1$Lat_rad)*sin(dat1$Lat_rad2)-sin(dat1$Lat_rad)*cos(dat1$Lat_rad2)*cos(dat1$Long_rad2-dat1$Long_rad)) dat1$bearing <- rad2deg(dat1$bearing) dat1$bearing <- abs(dat1$bearing) # ________________________________________________________________________________ # 3. Make each csv into a list: # ________________________________________________________________________________ list<- list(dat1, dat2,dat3,dat4,dat5,dat6,dat7,dat8,dat9,dat10,dat11, dat12,dat13,dat14,dat15,dat16,dat17,dat18,dat19,dat20,dat21, dat22,dat23,dat24,dat25,dat26,dat27,dat28,dat29,dat30,dat31, dat32,dat33,dat34,dat35,dat36,dat37,dat38,dat39,dat40,dat41, dat42,dat43,dat44,dat45,dat46,dat47,dat48,dat49,dat50,dat51, dat52,dat53,dat55,dat56,dat57,dat58,dat59,dat60,dat61,dat62, dat63,dat64,dat66,dat67,dat68,dat69,dat70,dat71,dat72,dat73, dat74,dat75,dat76,dat77,dat78,dat79,dat80,dat81,dat82,dat83) # Assign each list a bird ID: list[[1]]$ID <- "bird 1" list[[2]]$ID <- "bird 2" list[[3]]$ID <- "bird 3" list[[4]]$ID <- "bird 4" list[[5]]$ID <- "bird 5" list[[6]]$ID <- "bird 6" list[[7]]$ID <- "bird 7" list[[8]]$ID <- "bird 8" list[[9]]$ID <- "bird 9" list[[10]]$ID <- "bird 10" list[[11]]$ID <- "bird 11" list[[12]]$ID <- "bird 12" list[[13]]$ID <- "bird 13" list[[14]]$ID <- "bird 14" list[[15]]$ID <- "bird 15" list[[16]]$ID <- "bird 16" list[[17]]$ID <- "bird 17" list[[18]]$ID <- "bird 18" list[[19]]$ID <- "bird 19" list[[20]]$ID <- "bird 20" list[[21]]$ID <- "bird 21" list[[22]]$ID <- "bird 22" list[[23]]$ID <- "bird 23" list[[24]]$ID <- "bird 24" list[[25]]$ID <- "bird 25" list[[26]]$ID <- "bird 26" list[[27]]$ID <- "bird 27" list[[28]]$ID <- "bird 28" list[[29]]$ID <- "bird 29" list[[30]]$ID <- "bird 30" list[[31]]$ID <- "bird 31" list[[32]]$ID <- "bird 32" list[[33]]$ID <- "bird 33" list[[34]]$ID <- "bird 34" list[[35]]$ID <- "bird 35" list[[36]]$ID <- "bird 36" list[[37]]$ID <- "bird 37" list[[38]]$ID <- "bird 38" list[[39]]$ID <- "bird 39" list[[40]]$ID <- "bird 40" list[[41]]$ID <- "bird 41" list[[42]]$ID <- "bird 42" list[[43]]$ID <- "bird 43" list[[44]]$ID <- "bird 44" list[[45]]$ID <- "bird 45" list[[46]]$ID <- "bird 46" list[[47]]$ID <- "bird 47" list[[48]]$ID <- "bird 48" list[[49]]$ID <- "bird 49" list[[50]]$ID <- "bird 50" list[[51]]$ID <- "bird 51" list[[52]]$ID <- "bird 52" list[[53]]$ID <- "bird 53" list[[54]]$ID <- "bird 54" list[[55]]$ID <- "bird 55" list[[56]]$ID <- "bird 56" list[[57]]$ID <- "bird 57" list[[58]]$ID <- "bird 58" list[[59]]$ID <- "bird 59" list[[60]]$ID <- "bird 60" list[[61]]$ID <- "bird 61" list[[62]]$ID <- "bird 62" list[[63]]$ID <- "bird 63" list[[64]]$ID <- "bird 64" list[[65]]$ID <- "bird 65" list[[66]]$ID <- "bird 66" list[[67]]$ID <- "bird 67" list[[68]]$ID <- "bird 68" list[[69]]$ID <- "bird 69" list[[70]]$ID <- "bird 70" list[[71]]$ID <- "bird 71" list[[72]]$ID <- "bird 72" list[[73]]$ID <- "bird 73" list[[74]]$ID <- "bird 74" list[[75]]$ID <- "bird 75" list[[76]]$ID <- "bird 76" list[[77]]$ID <- "bird 77" list[[78]]$ID <- "bird 78" list[[79]]$ID <- "bird 79" list[[80]]$ID <- "bird 80" list[[81]]$ID <- "bird 81" list[[82]]$ID <- "bird 82" list[[83]]$ID <- "bird 83" # Bind all lists into one dataframe "all_tracks": all_tracks <- do.call("rbind", list) head(all_tracks) setwd("C:/Users/cerre/Desktop/Manxie Rafts/GPS SECTION") write.csv(all_tracks, "all_tracks.csv") # ________________________________________________________________________________ # 4. Extract the rafting data from the all_tracks dataframe # ________________________________________________________________________________ # Rename the data: data <- all_tracks # Add new column with combined date and time: data$date_time <- paste(data$Date, data$Time, sep = " ") # Add a new column which makes the date_time column in POSIXct format: data$POSIXct <- as.POSIXct(data$date_time, format = "%Y/%m/%d %H:%M:%S", tz = "gmt") head(data) # Add two columns which change the Latitude and Longitude columns to radians: data$Lat_rad <- deg2rad(data$Latitude) data$Long_rad <- deg2rad(data$Longitude) # Add two columns which deletes the first row of the Lat_rad and Long_rad columns # and replaces the last row with "NA": data$Lat_rad2 <- c(data$Lat_rad[2:nrow(data)], NA) data$Long_rad2 <- c(data$Long_rad[2:nrow(data)], NA) # ________________________________________________________________________________ # 5. Use the Vincente equation to calculate the distance between successive GPS fixes. # Adds new column "distance" for distance between fixes in km: # Vincente Code from: # Pineda-Krch, M. 2010, Great-circle distance calculations in R, [Online], # Available: https://www.r-bloggers.com/great-circle-distance-calculations-in-r/ [accessed 2016, September 17th]. # ________________________________________________________________________________ gcd.vif <- function(long1, lat1, long2, lat2) { # WGS-84 ellipsoid parameters a <- 6378137 # length of major axis of the ellipsoid (radius at equator) b <- 6356752.314245 # ength of minor axis of the ellipsoid (radius at the poles) f <- 1/298.257223563 # flattening of the ellipsoid L <- long2-long1 # difference in longitude U1 <- atan((1-f) * tan(lat1)) # reduced latitude U2 <- atan((1-f) * tan(lat2)) # reduced latitude sinU1 <- sin(U1) cosU1 <- cos(U1) sinU2 <- sin(U2) cosU2 <- cos(U2) cosSqAlpha <- NULL sinSigma <- NULL cosSigma <- NULL cos2SigmaM <- NULL sigma <- NULL lambda <- L lambdaP <- 0 iterLimit <- 100 while (abs(lambda-lambdaP) > 1e-12 & iterLimit>0) { sinLambda <- sin(lambda) cosLambda <- cos(lambda) sinSigma <- sqrt( (cosU2*sinLambda) * (cosU2*sinLambda) + (cosU1*sinU2-sinU1*cosU2*cosLambda) * (cosU1*sinU2-sinU1*cosU2*cosLambda) ) if (sinSigma==0) return(0) # Co-incident points cosSigma <- sinU1*sinU2 + cosU1*cosU2*cosLambda sigma <- atan2(sinSigma, cosSigma) sinAlpha <- cosU1 * cosU2 * sinLambda / sinSigma cosSqAlpha <- 1 - sinAlpha*sinAlpha cos2SigmaM <- cosSigma - 2*sinU1*sinU2/cosSqAlpha if (is.na(cos2SigmaM)) cos2SigmaM <- 0 # Equatorial line: cosSqAlpha=0 C <- f/16*cosSqAlpha*(4+f*(4-3*cosSqAlpha)) lambdaP <- lambda lambda <- L + (1-C) * f * sinAlpha * (sigma + C*sinSigma*(cos2SigmaM+C*cosSigma*(-1+2*cos2SigmaM*cos2SigmaM))) iterLimit <- iterLimit - 1 } if (iterLimit==0) return(NA) # formula failed to converge uSq <- cosSqAlpha * (a*a - b*b) / (b*b) A <- 1 + uSq/16384*(4096+uSq*(-768+uSq*(320-175*uSq))) B <- uSq/1024 * (256+uSq*(-128+uSq*(74-47*uSq))) deltaSigma = B*sinSigma*(cos2SigmaM+B/4*(cosSigma*(-1+2*cos2SigmaM^2) - B/6*cos2SigmaM*(-3+4*sinSigma^2)*(-3+4*cos2SigmaM^2))) s <- b*A*(sigma-deltaSigma) / 1000 return(s) # Distance in km } data$distance <- NA isEmpty <- function(x) { return(length(x)==0) } for(i in 1:(nrow(data)-1)){ if(isEmpty(gcd.vif(data$Long_rad[i], data$Lat_rad[i], data$Long_rad2[i], data$Lat_rad2[i]))){ data$distance[i] <- 0 } else { data$distance[i] <- gcd.vif(data$Long_rad[i], data$Lat_rad[i], data$Long_rad2[i], data$Lat_rad2[i]) } } data$distance <- as.numeric(data$distance) # ________________________________________________________________________________ # 6. Calculate the speed of Manx shearwaters from GPS data. # ________________________________________________________________________________ #Create a second POSIXct column to allow calculate the time between two GPS fixes: data$POSIXct2 <- as.POSIXct(c(as.character(data$POSIXct)[2:nrow(data)], NA), tz = "gmt") head(data) data$time_diff <- difftime(data$POSIXct2, data$POSIXct, units = "secs") # Change from km to m: data$distance_m <- data$distance*1000 # Calculate speed in m/s: data$calc_speed <- data$distance_m/as.numeric(data$time_diff) head(data) # Remove erroneous GPS fixes that exceed 30 m/s data <- data[which(data$calc_speed < 30),] # ________________________________________________________________________________ # 7. Identifying the cut off between rafting and flying. # ________________________________________________________________________________ library(mixtools) mix <- normalmixEM(data$calc_speed, k = 2) plot(mix, which = 2) # Speed distributions showing ~2.5 m/s is the cut off between rafting and flying, so # will now use this to separate the two behaviours: data$b_state <- ifelse(data$calc_speed > 2.5, 2, 1) # Create new dataframe only containing rafting behaviours: rafting<-data[which(data$b_state=='1'),] # ________________________________________________________________________________ # 8. Calculate the distance from Skomer colony for all and retain only # those >200m and <5000m # ________________________________________________________________________________ # Lat and long for tracked Manx shearwater colony on Skomer Island: lat1<- deg2rad(51.73724783452497) long1<- deg2rad(-5.2821654081344604) # Change lat and long to radians rafting$lat2<- deg2rad(rafting$Latitude) rafting$long2<- deg2rad(rafting$Longitude) # Spherical trigonometry to calculate the distance of rafts from the colony rafting$a<- sin((rafting$lat2-lat1)/2)^2+cos(lat1)*cos(rafting$lat2)*sin((rafting$long2-long1)/2)^2 rafting$c <- 2*atan2(sqrt(rafting$a), sqrt(1-rafting$a)) rafting$dist_from_skomer <- 6371000*rafting$c # Assign a cut off for values closer and further away than 5 km rafting$five_km_cut_off <- ifelse(rafting$dist_from_skomer > 5000, 2, 1) # Remove values within 200 m of the island # Note: This removes majority of GPS fixes on land. All remaining fixes were manually removed. rafting$behaviour2<- ifelse(rafting$dist_from_skomer > 200, 2, 1) # Create new dataframe with only fixes >200 m and <5000 m from the Skomer colony GPS_5km <-rafting[which(rafting$five_km_cut_off=='1' & rafting$behaviour2=='2'),] # ________________________________________________________________________________ # 9. Combine the weather and GPS data ready for circular analysis # ________________________________________________________________________________ # Read in weather data from Wooltack Point: weather<- read.csv("weather.csv", header = TRUE) # Add in an extra date and time column, and it's POSIXct form for GPS and weather dataframes: ## For GPS: GPS_5km$date_time2 <- paste(GPS_5km$Date, GPS_5km$Time, sep = " ") GPS_5km$POSIXct3 <- as.POSIXct(GPS_5km$date_time2, format = "%d/%m/%Y %H:%M:%S", tz = "gmt") ## For weather: weather$date_time <- paste(weather$Date, weather$Time, sep = " ") weather$POSIXct <- as.POSIXct(weather$date_time2, format = "%d/%m/%Y %H:%M:%S", tz = "gmt") # Identify the closest date and time from the weather dataframe to the GPS dataframe: closest_date <- c() for(i in 1:length(GPS_5km$POSIXct3)){ closest_date[i] <- weather$POSIXct[which.min(abs(GPS_5km$POSIXct3[i] - weather$POSIXct))] } # Add the closest date column to the GPS_5km dataframe: GPS_5km$closest_date <- closest_date # Match the weather GPS and weather data weather$numbers <- as.numeric(weather$POSIXct) GPS_5km$match<- match(GPS_5km$closest_date, weather$numbers) GPS_5km$wind <- weather$WindDir[match(GPS_5km$closest_date, weather$numbers)] # Save file: write.csv(file="GPS_5km.csv", GPS_5km, row.names = FALSE) # ________________________________________________________________________________ # 10. Statistics # ________________________________________________________________________________ rafts <- read.csv("Supplemental Data S3.csv") #read in behavioural observation data GPS_5km <- read.csv("Supplemental Data S4.csv") #read in GPS data # ________________________________________________________________________________ # Jammalamadaka-Sarma Circular Correlations # ________________________________________________________________________________ library(circular) deg2rad<- function(deg) return(deg*pi/180) # To change values from degrees to radians #For behavioural obervations: #______________________________ rafts$bearing_rad <- deg2rad(rafts$bearing) # Change raft bearings from degrees to radians rafts$wind_rad <- deg2rad(rafts$wind_dir) # Change wind direction bearings from degrees to radians bearing_rad<- circular(rafts$bearing_rad) # Make rafting values circular wind_rad<- circular(rafts$wind_rad) # Make wind values circular cor.circular(y=bearing_rad, x=wind_rad, test=TRUE) # Run the Jammalamadaka-Sarma Circular Correlation and give p-value #For GPS tracks: #____________________ GPS_5km$bearing_rad <- deg2rad(GPS_5km$bearing) # Change GPS raft bearings from degrees to radians GPS_5km$wind_rad <- deg2rad(GPS_5km$wind) # Change wind direction bearings from degrees to radians bearing_rad<- circular(GPS_5km$bearing_rad) # Make rafting values circular wind_rad<- circular(GPS_5km$wind_rad) # Make wind values circular cor.circular(y=bearing_rad, x=wind_rad, test=TRUE) # Run the Jammalamadaka-Sarma Circular Correlation and give p-value # ________________________________________________________________________________ # Model selection procedure for number of birds in rafts (count) ~ wind speed # ________________________________________________________________________________ library(nlme) library(MASS) m1 <- glm.nb(count~wind_speed, data = rafts) plot(m1) #poor fit to the data (negative binomial) m2 <- lme(count~wind_speed, random=~1|location, method = "ML", data = rafts) #poor fit to the data so must transform qqnorm(m2) plot(m2) hist(resid(m2)) m3 <- lme(log(count)~wind_speed, random=~1|location, method = "ML", data = rafts) qqnorm(m3) plot(m3) hist(resid(m3)) m4 <- gls(log(count)~wind_speed, method = "ML", data = rafts) qqnorm(m4) plot(m4) hist(resid(m4)) rafts$julian<- strptime(rafts$date, "%d/%m/%Y")$yday # converted days to Julian days m5 <-gls(log(count)~wind_speed,cor=corARMA(q=4,form=~1|julian), method="ML", data = rafts) #inclusion of autocorrelation reduced the AIC score qqnorm(m5) plot(m5) hist(resid(m5)) #meets assumptions of the model and is a good fit m6 <-gls(log(count)~1,cor=corARMA(q=4,form=~1|julian), method="ML", data = rafts) #the model is a significant improvement from the null model anova(m5, m6) m7 <-gls(log(count)~wind_speed,cor=corARMA(q=5,form=~1|julian), method="ML", data = rafts) m8 <-gls(log(count)~1,cor=corARMA(q=5,form=~1|julian), method="ML", data = rafts) anova(m7, m8) # Increasing the autocorrelation to q = 5 reduced the model AIC score, however, the model (m7) # was not a significant improvement from the null model (m8) therefore an autocorrelation of # q = 4 (m5) was selected because it offered a significant improvement from the null model (m6). #including location has no effect for count and does not improve the model, therefore will not be included m9 <-gls(log(count)~wind_speed*location,cor=corARMA(q=4,form=~1|julian), method="ML", data = rafts) anova(m5, m9) #Final accepted model = m5 summary(m5) # ________________________________________________________________________________ # GAMMs to describe the trends in raft distance from shore through time # and number of birds in nearshore waters through time # ________________________________________________________________________________ # For raft distance from shore through time: #_______________________________________________________ m10<- gamm(dist_from_skomer~s(hour), data = GPS_5km, random = list(ID=~1), family = poisson) plot(m10$gam) # For number of birds in nearshore water through time: #_______________________________________________________ library(dplyr) num_birds <- GPS_5km %>% group_by(hour) %>% count(ID) num_birds <- num_birds %>% group_by(hour) %>% summarise(no_rows = length(ID)) #summarise number of birds per hour # must add the hours that have no birds library(tidyverse) num_birds <- add_row(num_birds, hour = "13", no_rows = 0) num_birds <- add_row(num_birds, hour = "8", no_rows = 0) num_birds <- add_row(num_birds, hour = "9", no_rows = 0) num_birds <- add_row(num_birds, hour = "11", no_rows = 0) colnames(num_birds)<-c("hour", "numbirds") m11<- gamm(numbirds~s(hour), data = num_birds, family = poisson) plot(m11$gam)