###########################################################################
#Script used to calculate phenological trends in migratory birds passing thoriugh Manoment Inc. in fall and fall using quantile regression. 
#This script runs one model and outputs three files (summary, trace plots, list of draws)
#Case example: Hierarchical Cormack-Jolly-Seber model using fall Swamp Sparrow data
#Text files of code for both group-effects and hierarchical models available in supplemental materials
#Code written by N.N.Dorian
#Last revised: December 17, 2019
############################################################################

#you may need to use install.packages("package name") first
library(quantreg) #run quantile regression
library(lubridate) #handle dates more easily

##set working directory and load data
setwd()
data<-read.csv("OVEN.csv")

##clean data and format columns
data$Date<-as.Date(data$Date, format = "%m/%d/%Y") #year must be 4 digits
data$Time<-hm(as.character(data$Time))
data$Year<-year(data$Date)#year column
data$DOY<-yday(data$Date) #day of year
data$Hour<-hour(data$Time)#hour of day
data$Weight<-as.numeric(as.character(data$Weight))#make weight continuous 
data<-data[data$Band != 0,] #remove absent bands

##subset data by first captures only and filter to between 1970 and 2015
year_bounds<-c(1970, 2015)
data_first<-data[data$Repeat == "1",]
data_first$Year<-as.numeric(as.character(data_first$Year))
data_first<-data_first[data_first$Year >= 1970 & data_first$Year <= 2015,]

##create spring and fall datasets for separate analyses
spring_dates<-c(105, 166)
fall_dates<-c(227, 319)

data_spring<-data_first[data_first$DOY >= spring_dates[1] & data_first$DOY <= spring_dates[2],]
data_fall<-data_first[data_first$DOY >= fall_dates[1] & data_first$DOY <= fall_dates[2],]

##sample size
nrow(data_spring)
nrow(data_fall)

##median date of capture
median(data_spring$DOY, na.rm =T)
median(data_fall$DOY, na.rm = T)

##average length of migration
quantile(data_spring$DOY, probs = 0.85)-quantile(data_spring$DOY, probs = 0.15)
quantile(data_fall$DOY, probs = 0.85)-quantile(data_fall$DOY, probs = 0.15)

#center year variable
data_spring$Year_scale<-data_spring$Year-1970
data_fall$Year_scale<-data_fall$Year-1970

##run quantile regression models
qr1585_spring<-rq(DOY ~ Year_scale, tau = c(0.15,0.5, 0.85), data = data_spring)
qr1585_fall<-rq(DOY ~ Year_scale, tau = c(0.15,0.5, 0.85), data = data_fall)

##calculate confidence intervals
qr1585_spring_boot<-summary(qr1585_spring, se = "boot", bsmethod = "xy")
qr1585_fall_boot<-summary(qr1585_fall ,se = "boot", bsmethod = "xy")

##summarize model results for spring
#slopes and CIS for 0.15, 0.5, and 0.85 quantiles
#If coefficients are super close to zero (1e-16), see bottom of script for solution.
as.matrix(
  rbind(
    c(coef(qr1585_spring_boot[[1]])[2,1], 
      coef(qr1585_spring_boot[[1]])[2,1]-1.96*coef(qr1585_spring_boot[[1]])[2,2], 
      coef(qr1585_spring_boot[[1]])[2,1]+1.96*coef(qr1585_spring_boot[[1]])[2,2]),
    
    c(coef(qr1585_spring_boot[[2]])[2,1], 
      coef(qr1585_spring_boot[[2]])[2,1]-1.96*coef(qr1585_spring_boot[[2]])[2,2], 
      coef(qr1585_spring_boot[[2]])[2,1]+1.96*coef(qr1585_spring_boot[[2]])[2,2]),
    
    c(coef(qr1585_spring_boot[[3]])[2,1], 
      coef(qr1585_spring_boot[[3]])[2,1]-1.96*coef(qr1585_spring_boot[[3]])[2,2], 
      coef(qr1585_spring_boot[[3]])[2,1]+1.96*coef(qr1585_spring_boot[[3]])[2,2])))

##summarize model results for fall
#slopes and CIS for 0.15, 0.5, and 0.85 quantiles
#If coefficients are super close to zero (1e-16), see bottom of script for solution.
as.matrix(
  rbind(
    c(coef(qr1585_fall_boot[[1]])[2,1], 
      coef(qr1585_fall_boot[[1]])[2,1]-1.96*coef(qr1585_fall_boot[[1]])[2,2], 
      coef(qr1585_fall_boot[[1]])[2,1]+1.96*coef(qr1585_fall_boot[[1]])[2,2]),
    
    c(coef(qr1585_fall_boot[[2]])[2,1], 
      coef(qr1585_fall_boot[[2]])[2,1]-1.96*coef(qr1585_fall_boot[[2]])[2,2], 
      coef(qr1585_fall_boot[[2]])[2,1]+1.96*coef(qr1585_fall_boot[[2]])[2,2]),
    
    c(coef(qr1585_fall_boot[[3]])[2,1], 
      coef(qr1585_fall_boot[[3]])[2,1]-1.96*coef(qr1585_fall_boot[[3]])[2,2], 
      coef(qr1585_fall_boot[[3]])[2,1]+1.96*coef(qr1585_fall_boot[[3]])[2,2])
  ))


##calculate rate of change in migration duration (i.e. asymmetry) 
diff_spring<-coef(qr1585_spring)[2,3]-coef(qr1585_spring)[2,1]
diff_fall<-coef(qr1585_fall)[2,3]-coef(qr1585_fall)[2,1]
print(c(diff_spring, diff_fall))

##bootstrap CIs around asymmetry values
#spring dataset
boot_dat_spring<-c()
for(i in 1:1000){
  boot_doy<-sample(data_spring$DOY,size = length(data_spring$DOY), replace = T)
  boot_year<-sample(data_spring$Year_scale, size = length(data_spring$Year_scale), replace = T)
  boot_qr<-rq(boot_doy ~ boot_year, tau = c(0.15, 0.85))
  boot_dat_spring[i]<-coef(boot_qr)[2,2]-coef(boot_qr)[2,1]
}

diff_spring-sd(boot_dat_spring)*1.96
diff_spring+sd(boot_dat_spring)*1.96

#fall dataset
boot_dat_fall<-c()

for(i in 1:1000){
  boot_doy<-sample(data_fall$DOY,size = length(data_fall$DOY), replace = T)
  boot_year<-sample(data_fall$Year_scale, size = length(data_fall$Year_scale), replace = T)
  boot_qr<-rq(boot_doy ~ boot_year, tau = c(0.15, 0.85))
  boot_dat_fall[i]<-coef(boot_qr)[2,2]-coef(boot_qr)[2,1]
}

diff_fall-sd(boot_dat_fall)*1.96
diff_fall+sd(boot_dat_fall)*1.96

#plots
par(mar = c(6, 6, 2, 2), mfrow = c(1,1))
year_vals<-seq(0,50, by = 1)
spring_lim = c(100, 150)
fall_lim = c(220, 300)
spring_seq = seq(100, 150, by = 10)
fall_seq= seq(220, 300, by = 20)

#plot data
plot(data_fall$Year-1970, data_fall$DOY, xaxt = "n", xlab = "", yaxt = "n", ylab = "", bty = "n", ylim = fall_lim, pch = 1, col = "gray70") 

#plot median
lines(year_vals, coef(qr1585_fall_boot[[2]])[1,1]+coef(qr1585_fall_boot[[2]])[2,1]*year_vals, lwd = 3, col = "red")

#plot 10%  
lines(year_vals, coef(qr1585_fall_boot[[1]])[1,1]+coef(qr1585_fall_boot[[1]])[2,1]*year_vals, lwd =2, col = "red", lty = 2)

#plot 90%
lines(year_vals, coef(qr1585_fall_boot[[3]])[1,1]+coef(qr1585_fall_boot[[3]])[2,1]*year_vals, lwd = 2, col = "red", lty = 2)

#axes
axis(side = 1, labels = seq(1970, 2015, by = 5), at = seq(0, 45, by = 5), cex.axis = 1.25)
axis(side = 2, labels = fall_seq, at = fall_seq, las = 1, cex.axis = 1.25)

#axis labels
mtext("Year", 1, 3.5, cex = 1.75)
mtext("Day of Year", 2, 3.5, cex = 1.75)
#text(41, 170, "Ovenbird, fall", cex = 1.5)
#text(42, 330, "Ovenbird, spring", cex = 1.5)


##If coefficients are super close to zero (1e-16),data may need to be jittered (= dithered) to better estimate parameters. This means that you need to add a small amount of noise, generally Uniform(0,1), to each data point. This need for this arises from the fact that quantile regression works best on continuous data, but phenology data (though continuous in a sense) are discrete, e.g., DOY 255 or 256 but not 255.5. For additional information, see:  Machado and Santo Silva (2005) Journal of the American Statistical Association. 100: 1226-1237
#The following code adds a small amount of noise to each data point and loops over the jittering 100 times to ensure that parameter estimates are not biased by random sampling. 

jitt_coefs<-matrix(NA, nrow = 100, ncol = 6)
for(i in 1:100){
  data_spring$DOY_jitt<-data_spring$DOY+runif(length(data_spring$DOY), -0.5, 0.5) #jitter data, change to fall if need be
  m1<-rq(DOY_jitt ~ Year_scale, tau = c(0.15, 0.5, 0.85), data = data_spring) #re-run model on jittered data
  temp<-summary(m1, se = "boot", bsmethod = "xy") #calc new std. errors
  temp15<-coef(temp[[1]])[2,1] #extract coefs and std. errors for each quantile
  temp15se<-coef(temp[[1]])[2,2]
  temp50<-coef(temp[[2]])[2,1]
  temp50se<-coef(temp[[2]])[2,2]
  temp85<-coef(temp[[3]])[2,1]
  temp85se<-coef(temp[[3]])[2,2]
  
  jitt_coefs[i,]<-c(temp15,temp15se, temp50, temp50se, temp85, temp85se)
}

#print out jittered mean and confidence intervals for each quantile 
mean(jitt_coefs[,1])
mean(jitt_coefs[,1]+1.96*jitt_coefs[,2])
mean(jitt_coefs[,1]-1.96*jitt_coefs[,2])

mean(jitt_coefs[,3])
mean(jitt_coefs[,3]+1.96*jitt_coefs[,4])
mean(jitt_coefs[,3]-1.96*jitt_coefs[,4])

mean(jitt_coefs[,5])
mean(jitt_coefs[,5]+1.96*jitt_coefs[,6])
mean(jitt_coefs[,5]-1.96*jitt_coefs[,6])

mean(jitt_coefs[,5]-jitt_coefs[,1]) #print out jittered asymmetry


##boostrap confidence intervals around jittered asymmetry 
boot_dat_jitt<-c()
jitt_coefs_temp15<-c()
jitt_coefs_temp85<-c()

#bootstrap first by sampling randomly from existing data and then calculating mean asymmetry from jittered data
for(i in 1:1000){
  boot_doy<-sample(data_spring$DOY,size = length(data_spring$DOY), replace = T) 
  boot_year<-sample(data_spring$Year_scale, size = length(data_spring$Year_scale), replace = T)
  
  for(j in 1:100){
    boot_doy_jitt<-boot_doy+runif(length(boot_doy), -0.5, 0.5)
    temp<-coef(rq(boot_doy ~ boot_year, tau = c(0.15, 0.85)))[2,]
    jitt_coefs_temp15<-temp[1]
    jitt_coefs_temp85<-temp[2]
  }
  
  boot_dat_jitt[i]<-mean(jitt_coefs_temp85-jitt_coefs_temp15)
}

sdbootdat<-sd(boot_dat_jitt) #calculate standard error of bootstrapped asymmetry
mean(jitt_coefs[,5]-jitt_coefs[,1])+1.96*sdbootdat#ui
mean(jitt_coefs[,5]-jitt_coefs[,1])-1.96*sdbootdat#li