#Set working directory
setwd("~/Desktop/Analysis")

#Save corresponding excel sheets as csv files in working directory and add to global environment
A <- read.csv("Representation.csv")
S <- read.csv("Yearly Summary.csv")
Pairs <- read.csv("Pair ID.csv")
Data <- read.csv("Age and Familiarity.csv")
library(arm)

# Determining if sampled koalas/sample pairing data is a good representation of the whole population/complete pairing data
CS <- cbind(A$Copulations, A$Pairings-A$Copulations)
BS <- cbind(A$Offspring, A$Copulations-A$Offspring)
OS <- cbind(A$Surviving.Offspring, A$Offspring-A$Surviving.Offspring)
M1 <- glm(CS ~ A$Type + A$Year, family = "binomial")
summary(M1)
M2 <- glm(BS ~ A$Type + A$Year, family = "binomial")
summary(M2)
M3 <- glm(OS ~ A$Type + A$Year, family = "binomial")
summary(M3)
#Type does not differ therefore rest of analyses are performed on complete pairing data

# Determining if number of pairings has significantly changed over time
Model1 <- glm(S$Total.Pairings ~ S$Year, family = "poisson")
summary(Model1)
# Yes - determine intercept and slope for graph
intercept1 <- coef(Model1) [1]
slope1 <- coef(Model1) [2]
x1 <- 1984:2012
y1 <- exp(slope1*x1+intercept1)

# Determining if number of breeding recommendations has significantly changed over time
Model2 <- glm(S$Breeding.Recommendations ~ S$Year, family = "poisson")
summary(Model2)

# Graph of Pairings and Breeding Recommendations over time
plot(x = S$Year, y = S$Total.Pairings, ylim = c(0,100),
     xlab ="Year", ylab = "Pairings", type = "o", family = "sans",
     cex.lab = 1.2)
points(x = x1, y = y1, type = "l")
points(x = S$Year, y = S$Breeding.Recommendations, type = "o",
       pch = 19)

# Determining if copulation success has significantly changed over time
S$Year.No <- S$Year-1984
CopSuc <- cbind(S$Copulations, S$Total.Pairings - S$Copulations)
Model3 <- glm(CopSuc ~ S$Year.No, family = "binomial")
summary(Model3)
# Yes - determine intercept + slope + 95% CI
intercept3 <- coef(Model3) [1]
slope3 <- coef(Model3) [2]
x3 <- seq(0,28,1)
y3 <- invlogit(slope3*x3+intercept3)*100
intercept3.e <- summary(Model3)$coef[1,2]
slope3.e <- summary(Model3)$coef[2,2]
boots.i3 <- rnorm(1000, mean=intercept3, sd=intercept3.e)
boots.s3 <- rnorm(1000, mean=slope3, sd=slope3.e)
hist(boots.i3 + boots.s3, main= "Bootstrapped Coef")
abline(v = (intercept3 + slope3), col="red", lwd=2)
errors3 <- array(NA, dim = c(length(boots.i3), length(x3)))
for (i in 1:length(x3)) errors3[,i] <- boots.i3 + boots.s3*x3[i]
errors3.t <- invlogit(errors3)
uppers3 <- rep(NA, length(x3))
for (i in 1:length(x3)) uppers3[i] <- quantile(errors3.t[,i], probs = 0.975)
lowers3 <- rep(NA, length(x3))
for (i in 1:length(x3)) lowers3[i] <- quantile(errors3.t[,i], probs = 0.025)


# Determining if breeding success has significantly changed over time
BreedSuc <- cbind(S$Offspring, S$Copulations - S$Offspring)
Model4 <- glm(BreedSuc ~ S$Year.No, family = "binomial")
summary(Model4)
# Yes - determine intercept + slope + 95% CI
intercept4 <- coef(Model4) [1]
slope4 <- coef(Model4) [2]
x4 <- seq(0,28,1)
y4 <- invlogit(slope4*x4+intercept4)*100
intercept4.e <- summary(Model4)$coef[1,2]
slope4.e <- summary(Model4)$coef[2,2]
boots.i4 <- rnorm(1000, mean=intercept4, sd=intercept4.e)
boots.s4 <- rnorm(1000, mean=slope4, sd=slope4.e)
hist(boots.i4 + boots.s4, main= "Bootstrapped Coef")
abline(v = (intercept4 + slope4), col="red", lwd=2)
errors4 <- array(NA, dim = c(length(boots.i4), length(x4)))
for (i in 1:length(x4)) errors4[,i] <- boots.i4 + boots.s4*x4[i]
errors4.t <- invlogit(errors4)
uppers4 <- rep(NA, length(x4))
for (i in 1:length(x4)) uppers4[i] <- quantile(errors4.t[,i], probs = 0.975)
lowers4 <- rep(NA, length(x4))
for (i in 1:length(x4)) lowers4[i] <- quantile(errors4.t[,i], probs = 0.025)

# Determining if offspring survival has significantly changed over time
OffSuc <- cbind(S$Surviving.Offspring, S$Offspring - S$Surviving.Offspring)
Model5 <- glm(OffSuc ~ S$Year.No, family = "binomial")
summary(Model5)

#Graphs of mating success over time
par(mfrow = c(3,1), mar =  c(5, 5, 2, 2), family = "sans",
    mar =  c(2, 2, 2, 2), oma = c(4,4,4,0), xpd = NA)
plot(x = S$Year.No, y = S$Copulation.Suc,
     cex = (S$Total.Pairings/20), xaxt = "n", xlab = NA,
     ylab = "Copulation Success", ylim = c(0,100), 
     cex.axis = 2, cex.lab = 2.2)
axis(side = 1, at = c(1, 6, 11, 16, 21, 26), labels = FALSE)
points(x = x3, y = y3, type = "l")
points(x = x3, y = uppers3*100, type = "l", lty = "dotted")
points(x = x3, y = lowers3*100, type = "l", lty = "dotted")
plot(x = S$Year.No, y = S$Breeding.Suc, 
     cex = (S$Copulations/10), xaxt = "n", xlab = NA,
     ylab = "Breeding Success", ylim = c(0,100),
     cex.axis = 2, cex.lab = 2.2)
axis(side = 1, at = c(1, 6, 11, 16, 21, 26), labels = FALSE)
points(x = x4, y = y4, type = "l")
points(x = x4, y = uppers4*100, type = "l", lty = "dotted")
points(x = x4, y = lowers4*100, type = "l", lty = "dotted")
plot(x = S$Year.No, y = S$Offspring.Suc, 
     cex = (S$Offspring/5), xlab ="Year", xaxt = "n",
     ylab = "Offspring Success", ylim = c(0,100),
     cex.axis = 2, cex.lab = 2.2)
axis(side = 1, at = c(1, 6, 11, 16, 21, 26), labels = c(1985, 1990, 1995, 2000, 2005, 2010), cex.axis = 2)

# Testing correlation between predictor variables 
cor.test(Data$Year, Data$Male.Age, method = "spearman")
# NOTE: swap different variables in above formula to determine correlation between each combination of predictor variables

#Showing correlation between Age and Age difference
par(mfrow = c(2,1), family = "sans", mar = c(4,4,0.5,0.5), oma = c(2,2,2,2), cex.axis = 1, cex.lab = 1.2)
plot(x = Data$Age.Difference, y = Data$Male.Age, col = rgb(0,0,0,0.2), 
     xaxt = "n", xlab = NA, ylab = "Male Age")
abline(lm(Data$Male.Age ~ Data$Age.Difference), lwd = 3)
axis(side = 1, labels = FALSE)
plot(x = Data$Age.Difference, y = Data$Female.Age, col = rgb(0,0,0,0.2), 
     xlab = "Age Difference", ylab = "Female Age")
abline(lm(Data$Female.Age ~ Data$Age.Difference), lwd = 3)

# Determining multicolinearity for predictor variables
library(fmsb)
VIF(lm(Data$Year ~
         Data$Female.Age +
         Data$Male.Age +
         Data$Familiarity))
VIF(lm(Data$Female.Age ~
         Data$Year +
         Data$Male.Age +
         Data$Familiarity))
VIF(lm(Data$Male.Age ~
         Data$Year +
         Data$Female.Age +
         Data$Familiarity))
VIF(lm(Data$Familiarity ~
         Data$Year +
         Data$Male.Age +
         Data$Female.Age))

# Determining whether Pair ID needs to be a random factor in analysis
# Pair representation out of the total pairings
hist(Pairs$N.Years)
abline(v = mean(Pairs$N.Years), lwd = 2) # adds a "v"ertical line at the mean
sd(Pairs$N.Years)
length(which(Pairs$N.Years == 1)) / length(Pairs$N.Years) # percent of values that == 1

# Pair representation out of the total copulations
hist(Pairs$N.Years.Cop)
abline(v = mean(Pairs$N.Years.Cop), lwd = 2) # adds a "v"ertical line at the mean
sd(Pairs$N.Years.Cop)
length(which(Pairs$N.Years.Cop == 1)) / (length(Pairs$N.Years.Cop) - length(which(Pairs$N.Years.Cop == 0))) # percent of values that == 1

# Pair representation out of the total copulations resulting in offspring
hist(Pairs$N.Years.Off)
abline(v = mean(Pairs$N.Years.Off), lwd = 2) # adds a "v"ertical line at the mean
sd(Pairs$N.Years.Off)
length(which(Pairs$N.Years.Off == 1)) / (length(Pairs$N.Years.Off) - length(which(Pairs$N.Years.Off == 0))) # percent of values that == 1

# Plotting the above histograms together
par(mfrow = c(3,1), mar =  c(5, 5, 2, 2), family = "sans",
    mar =  c(6, 6, 2, 2), oma = c(2,2,0,0),
    cex.axis = 2, cex.lab = 2.2)
hist(Pairs$N.Years, main = NA, xlab = "Pairings")
abline(v = mean(Pairs$N.Years), lwd = 2)
hist(Pairs$N.Years.Cop, main = NA, xlab = "Copulations")
abline(v = mean(Pairs$N.Years.Cop), lwd = 2)
hist(Pairs$N.Years.Off, main = NA, xlab = "Offspring")
abline(v = mean(Pairs$N.Years.Off), lwd = 2)

# Most pairs are only represented in 1 year of the dataset therefore not needed as a random factor

#Determining what factors may be influencing changes in copulation success 
Model6 <- glm(Copulation.Success ~ Year + 
                I(Female.Age^2) + Female.Age +
                I(Male.Age^2) + Male.Age +
                Familiarity, family = "binomial", data = Data)
summary(Model6)
# Male age and familiarity influence copulation success

s.Model6 <- standardize(Model6) #Standardise predictor variables by mean and 2 SD
summary(s.Model6)

write.csv(data.frame(summary(s.Model6)$coefficients), file="Cop.csv") #Save results

#Plotting Familiarity

m.Familiarity <- mean(Data$Familiarity)
sd.Familiarity <- sd(Data$Familiarity) 

intercept1 <- coef(s.Model6) [1]
slope1 <- coef(s.Model6) [7]
x1 <- seq(0, max(Data$Familiarity), 0.25) # Get x values you want to plot
x1.m <- (x1 - m.Familiarity)/(2*sd.Familiarity) # Get x values from standardised model
y1 <- invlogit(intercept1 + slope1*x1.m) # Get y values
intercept1.e <- summary(s.Model6)$coef[1,2]
slope1.e <- summary(s.Model6)$coef[7,2]
boots.i1 <- rnorm(1000, mean=intercept1, sd=intercept1.e)
boots.s1 <- rnorm(1000, mean=slope1, sd=slope1.e)
hist(boots.i1 + boots.s1, main= "Bootstrapped Coef")
abline(v = (intercept1 + slope1), col="red", lwd=2)
errors1 <- array(NA, dim = c(length(boots.i1), length(x1)))
for (i in 1:length(x1)) errors1[,i] <- boots.i1 + boots.s1*x1.m[i] #Use model x values
errors1.t <- invlogit(errors1)
uppers1 <- rep(NA, length(x1))
for (i in 1:length(x1)) uppers1[i] <- quantile(errors1.t[,i], probs = 0.975)
lowers1 <- rep(NA, length(x1))
for (i in 1:length(x1)) lowers1[i] <- quantile(errors1.t[,i], probs = 0.025)

plot(x = Data$Familiarity, y = jitter(Data$Copulation.Success, factor = 0.3),
     ylab = "Copulation Success", yaxt = "n", xlab = "Familiarity",
     family = "sans", col = rgb(0,0,0,0.2), pch = 19, cex.lab = 1.2) #Plot the graph
axis(side = 2, at = seq(0,1))
points(x = x1, y = y1, ylim = c(0,1), type = "l")
points(x = x1, y = uppers1, type = "l", lty = "dotted")
points(x = x1, y = lowers1, type = "l", lty = "dotted")

# Plotting Male Age
m.Male.Age <- mean(Data$Male.Age)
sd.Male.Age <- sd(Data$Male.Age) 

intercept1 <- coef(s.Model6) [1]
slope1 <- coef(s.Model6) [5]
slope2 <- coef(s.Model6) [6]
x1 <- seq(1, max(Data$Male.Age), 0.25) # Get x values you want to plot
x1.m <- (x1 - m.Male.Age)/(2*sd.Male.Age) # Get x values from standardised model
y1 <- invlogit(slope1*(x1.m^2) + slope2*(x1.m) + intercept1) # Get y values
intercept1.e <- summary(s.Model6)$coef[1,2]
slope1.e <- summary(s.Model6)$coef[7,2]
boots.i1 <- rnorm(1000, mean=intercept1, sd=intercept1.e)
boots.s1 <- rnorm(1000, mean=slope1, sd=slope1.e)
hist(boots.i1 + boots.s1, main= "Bootstrapped Coef")
abline(v = (intercept1 + slope1), col="red", lwd=2)
errors1 <- array(NA, dim = c(length(boots.i1), length(x1)))
for (i in 1:length(x1)) errors1[,i] <- boots.i1 + boots.s1*x1.m[i] #Use model x values
errors1.t <- invlogit(errors1)
uppers1 <- rep(NA, length(x1))
for (i in 1:length(x1)) uppers1[i] <- quantile(errors1.t[,i], probs = 0.975)
lowers1 <- rep(NA, length(x1))
for (i in 1:length(x1)) lowers1[i] <- quantile(errors1.t[,i], probs = 0.025)

plot(x = Data$Male.Age, y = jitter(Data$Copulation.Success, factor = 0.3),
     ylab = "Copulation Success", yaxt = "n", xlab = "Male Age",
     family = "sans", col = rgb(0,0,0,0.2), pch = 19, cex.lab = 1.2) #Plot the graph
axis(side = 2, at = seq(0,1))
points(x = x1, y = y1, ylim = c(0,1), type = "l")

#Determining what factors may be influencing changes in breeding success
Data2 <- Data[Data$Copulation.Success == 1, ]
Model7 <- glm(Breeding.Success ~ Year + 
                I(Female.Age^2) + Female.Age +
                I(Male.Age^2) + Male.Age +
                Familiarity, family = "binomial", data = Data2)
summary(Model7)
# No factors influence breeding success

s.Model7 <- standardize(Model7) #Standardise predictor variables by mean and 2 SD
summary(s.Model7)

write.csv(data.frame(summary(s.Model7)$coefficients), file="Breed.csv") #Save results


#Determining what factors may be influencing changes in offspring success 
Data3 <- Data[Data$Breeding.Success == 1, ]
Model8 <- glm(Offspring.Success ~ Year + 
                I(Female.Age^2) + Female.Age +
                I(Male.Age^2) + Male.Age +
                Familiarity, family = "binomial", data = Data3)
summary(Model8)
# No factors influence offspring success

s.Model8 <- standardize(Model8) #Standardise predictor variables by mean and 2 SD
summary(s.Model8)

write.csv(data.frame(summary(s.Model8)$coefficients), file="Off.csv") #Save results