### R Scripts to accompany "The implementation of rare events logistic regression to predict the distribution of mesophotic hard corals across the main Hawaiian Islands" (Veazey et al. 2016). 
### All code assembled by Lindsay Veazey (lindsayv@hawaii.edu). Adapt freely, but please give credit if you feel it is due!
### Please note that the code below is a selection of code used for our study. We have included code using Montipora and Leptoseris where helpful, but generally will only present our code for Leptoseris.

###### Correlation Scatterplot ######

# To determine correlations between predictor variables and between each covariate and the response variable, we adapted code from the R Cookbook (page 155) by Paul Teetor (2011). This code produces a useful scatterplot matrix.

Lep <- read.csv("Leptoseris.csv") # All Leptoseris data

panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y, use="complete.obs"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 1.0/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * (1 + r) / 2)
}
# To show histograms of each variable along the diagonal, we’ll define  panel.hist :
panel.hist <- function(x, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks
nB <- length(breaks)
y <- h$counts
y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col="white", ...)
}

### Checking the correlation between the oceanographic variables (current/modeled estimates)
pairs(Lep[,5:19], upper.panel = panel.cor,
diag.panel = panel.hist,
lower.panel = panel.smooth)

### Repeat the process for the Montipora dataset. We excluded potential covariates that 1) were very weakly (< 5%) correlated with the response variable, 2) displayed no clear distribution pattern (= noise), 3) and/or were too strongly correlated with one another (> 70%; only winter significant wave height (SWHW) and summer significant wave height (SWHS) exceeded this limit; the variable less correlated with the response variable was removed).

###### glmulti ######

# We randomly divided each Lep. and Mon. dataset (for leave-one-out cross-validation) and then used 'glmulti' to exhaustively iterate through all possible GLM combinations of predictors.

### Leptoseris

LepMB <- read.csv("LeptoserisModelBuild.csv")

all.model <- glm(Leptoseris ~ 
		Depth + 
		DepthSq +
		Slope +
		Rugosity +
		MCVEWinter +
		MCVNSummer +
		SWHW,
	data = LepMB, family = binomial)

best.model <- glmulti(all.model,
			level = 1, # looking at main effects 
			crit = "aicc")

summary(best.model)

weightable(best.model)

### Montipora

MonMB <- read.csv("MontiporaModelBuild.csv")

all.model <- glm(Montipora ~ 
		Depth + 
		DepthSq +
		Rugosity +
		SubstrateHardness +
		MCVNWinter +
		SWHW,
	data = MonMB, family = binomial)

best.model <- glmulti(all.model,
			level = 1, # looking at main effects 
			crit = "aicc")

summary(best.model)

weightable(best.model)

###### GLM ######

### Now that we have identified the set of covariates to be included in our GLM, we can run one (ordinary logistic regression, or OLR):

# Best Leptoseris GLM: Leptoseris ~ Depth + DepthSq + MCVNSummer + MCVEWinter + Slope + Rugosity

LepOLR <- glm(Leptoseris ~ Depth + DepthSq + MCVNSummer + MCVEWinter + Slope + Rugosity, data = LepMB, family = binomial)

summary(LepOLR)

na.omit(LepMB) # Remove all rows with NAs to check spatial autocorrelation

# Check autocorrelation
library(ncf)
correlog1 <- correlog(LepMB$Longitude, LepMB$Latitude, residuals(LepOLR), na.rm=T, increment= .5, resamp=0)

# Plotted the first X distance classes
par(mar=c(5,5,0.1,0.1))
plot(correlog1$correlation[1:6], type = 'b', pch=16, cex=1.5, lwd=1.5, xlab="Distance class", ylab ="Moran's I", cex.lab = 2, cex.axis=1.5); abline(h=0)

# Make a map of residuals
plot(LepMB$Longitude, LepMB$Latitude, col=c("blue", "red")[sign(resid(LepOLR))/2+1.5], pch=19, cex=abs(resid(LepOLR))/max(resid(LepOLR))*2, xlab="Longitude", ylab = "Latitude")

# Calculate Moran's I Values explicitly for a certain distance and test for significance
library(spdep)
Lep.nb <- dnearneigh(as.matrix(LepMB[1:2]), 0, 1)

# Turns neighborhood object into weighted list
Lep.list <- nb2listw(Lep.nb)
MT1 <- moran.test(residuals(LepOLR), listw=Lep.list)

# Calculate Geary's C (a test of local spatial autocorrelation)
# The value of Geary's C lies between 0 and 2. 1 means no spatial autocorrelation. Values lower than 1 demonstrate increasing positive spatial autocorrelation, whilst values higher than 1 illustrate increasing negative spatial autocorrelation.
GC1 <- geary.test(residuals(LepOLR), listw=Lep.list)
GC1

### Repeat for Montipora model building dataset

###### Rare Events Regression ######

### We proceed with a rare events logistic regression using package 'Zelig' (Imai et al. 2010):
library(Zelig)

# Specify proportion of 1s to 0s in full dataset
tau = 620/11929

rare.mod <- zelig(Leptoseris ~ Depth + I(Depth^2) + MCVNSummer + MCVEWinter + Slope + Rugosity, data = LepMB, model = "relogit", tau = tau)

summary(rare.mod)

###### Rare Events Regression Confidence Intervals ######

# Let's produce some figures displaying the confidence intervals associated with our coefficient (beta) estimates. Here, we use the depth covariate as an example:

Depth.range <- c(30, 180) # Tell R what range to display across your x axis
x.Depth <- setx(rare.mod, Depth = Depth.range)
s.out <- sim(rare.mod, x = x.Depth)

xl <- "Depth (m)" # x axis 
yl <- "Probability of Leptoseris Occurrence" # y axis
ma <- "Predicted Probability of Leptoseris Occurrence Across Depth" # main title
su <- " " # subtitle
plot.ci(s.out, qi = "pr", xlab = xl, ylab = yl, main = ma, leg = 4)

###### Model Assessment ######

### We need to examine which theta threshold level allows the most useful interpretation of our model output. Here, for thresholds 0.005 - 0.095, we can examine how our ratio of correctly identified pixels to incorrectly identified pixels within our model building dataset changes as the threshold level changes. True Positives (TP) are coded "1"; True Negatives (TN) are coded "999"; False Positives (FP) are coded "2"; False Negatives (FN) are coded "0".

LepMB$thresh.005 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .005 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.005[i] = "0"}  
else { 
if (LepMB$theta[i] < .005 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.005[i] = "999"}  
else { 

if (LepMB$theta[i] >= .005 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.005[i] = "1"}  
else { 
if (LepMB$theta[i] >= .005 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.005[i] = "2"}  
else { 
LepMB$thresh.005[i] = "NA"
}}}}}

##########.01 threshold##############
LepMB$thresh.01 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .01 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.01[i] = "0"}  
else { 
if (LepMB$theta[i] < .01 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.01[i] = "999"}  
else { 
if (LepMB$theta[i] >= .01 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.01[i] = "1"}  
else { 
if (LepMB$theta[i] >= .01 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.01[i] = "2"}  
else { 
LepMB$thresh.01[i] = "NA"
}}}}}

##########.015 threshold##############
LepMB$thresh.015 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .015 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.015[i] = "0"}  
else { 
if (LepMB$theta[i] < .015 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.015[i] = "999"}  
else { 
if (LepMB$theta[i] >= .015 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.015[i] = "1"}  
else { 
if (LepMB$theta[i] >= .015 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.015[i] = "2"}  
else { 
LepMB$thresh.015[i] = "NA"
}}}}}

##########.02 threshold##############
LepMB$thresh.02 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .02 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.02[i] = "0"}  
else { 
if (LepMB$theta[i] < .02 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.02[i] = "999"}  
else { 
if (LepMB$theta[i] >= .02 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.02[i] = "1"}  
else { 
if (LepMB$theta[i] >= .02 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.02[i] = "2"}  
else { 
LepMB$thresh.02[i] = "NA"
}}}}}

##########.025 threshold##############
LepMB$thresh.025 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .025 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.025[i] = "0"}  
else { 
if (LepMB$theta[i] < .025 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.025[i] = "999"}  
else { 
if (LepMB$theta[i] >= .025 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.025[i] = "1"}  
else { 
if (LepMB$theta[i] >= .025 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.025[i] = "2"}  
else { 
LepMB$thresh.025[i] = "NA"
}}}}}

##########.03 threshold##############
LepMB$thresh.03 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .03 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.03[i] = "0"}  
else { 
if (LepMB$theta[i] < .03 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.03[i] = "999"}  
else { 
if (LepMB$theta[i] >= .03 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.03[i] = "1"}  
else { 
if (LepMB$theta[i] >= .03 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.03[i] = "2"}  
else { 
LepMB$thresh.03[i] = "NA"
}}}}}

##########.035 threshold##############
LepMB$thresh.035 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .035 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.035[i] = "0"}  
else { 
if (LepMB$theta[i] < .035 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.035[i] = "999"}  
else { 
if (LepMB$theta[i] >= .035 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.035[i] = "1"}  
else { 
if (LepMB$theta[i] >= .035 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.035[i] = "2"}  
else { 
LepMB$thresh.035[i] = "NA"
}}}}}

##########.04 threshold##############
LepMB$thresh.04 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .04 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.04[i] = "0"}  
else { 
if (LepMB$theta[i] < .04 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.04[i] = "999"}  
else { 
if (LepMB$theta[i] >= .04 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.04[i] = "1"}  
else { 
if (LepMB$theta[i] >= .04 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.04[i] = "2"}  
else { 
LepMB$thresh.04[i] = "NA"
}}}}}

##########.045 threshold##############
LepMB$thresh.045 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .045 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.045[i] = "0"}  
else { 
if (LepMB$theta[i] < .045 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.045[i] = "999"}  
else { 
if (LepMB$theta[i] >= .045 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.045[i] = "1"}  
else { 
if (LepMB$theta[i] >= .045 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.045[i] = "2"}  
else { 
LepMB$thresh.045[i] = "NA"
}}}}}

##########.05 threshold##############
LepMB$thresh.05 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .05 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.05[i] = "0"}  
else { 
if (LepMB$theta[i] < .05 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.05[i] = "999"}  
else { 
if (LepMB$theta[i] >= .05 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.05[i] = "1"}  
else { 
if (LepMB$theta[i] >= .05 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.05[i] = "2"}  
else { 
LepMB$thresh.05[i] = "NA"
}}}}}

##########.055 threshold##############
LepMB$thresh.055 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .055 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.055[i] = "0"}  
else { 
if (LepMB$theta[i] < .055 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.055[i] = "999"}  
else { 
if (LepMB$theta[i] >= .055 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.055[i] = "1"}  
else { 
if (LepMB$theta[i] >= .055 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.055[i] = "2"}  
else { 
LepMB$thresh.055[i] = "NA"
}}}}}

##########.06 threshold##############
LepMB$thresh.06 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .06 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.06[i] = "0"}  
else { 
if (LepMB$theta[i] < .06 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.06[i] = "999"}  
else { 
if (LepMB$theta[i] >= .06 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.06[i] = "1"}  
else { 
if (LepMB$theta[i] >= .06 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.06[i] = "2"}  
else { 
LepMB$thresh.06[i] = "NA"
}}}}}

##########.065 threshold##############
LepMB$thresh.065 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .065 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.065[i] = "0"}  
else { 
if (LepMB$theta[i] < .065 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.065[i] = "999"}  
else { 
if (LepMB$theta[i] >= .065 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.065[i] = "1"}  
else { 
if (LepMB$theta[i] >= .065 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.065[i] = "2"}  
else { 
LepMB$thresh.065[i] = "NA"
}}}}}

##########.07 threshold##############
LepMB$thresh.07 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .07 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.07[i] = "0"}  
else { 
if (LepMB$theta[i] < .07 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.07[i] = "999"}  
else { 
if (LepMB$theta[i] >= .07 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.07[i] = "1"}  
else { 
if (LepMB$theta[i] >= .07 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.07[i] = "2"}  
else { 
LepMB$thresh.07[i] = "NA"
}}}}}

##########.075 threshold##############
LepMB$thresh.075 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .075 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.075[i] = "0"}  
else { 
if (LepMB$theta[i] < .075 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.075[i] = "999"}  
else { 
if (LepMB$theta[i] >= .075 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.075[i] = "1"}  
else { 
if (LepMB$theta[i] >= .075 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.075[i] = "2"}  
else { 
LepMB$thresh.075[i] = "NA"
}}}}}

##########.08 threshold##############
LepMB$thresh.08 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .08 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.08[i] = "0"}  
else { 
if (LepMB$theta[i] < .08 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.08[i] = "999"}  
else { 
if (LepMB$theta[i] >= .08 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.08[i] = "1"}  
else { 
if (LepMB$theta[i] >= .08 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.08[i] = "2"}  
else { 
LepMB$thresh.08[i] = "NA"
}}}}}

##########.085 threshold##############
LepMB$thresh.085 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .085 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.085[i] = "0"}  
else { 
if (LepMB$theta[i] < .085 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.085[i] = "999"}  
else { 
if (LepMB$theta[i] >= .085 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.085[i] = "1"}  
else { 
if (LepMB$theta[i] >= .085 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.085[i] = "2"}  
else { 
LepMB$thresh.085[i] = "NA"
}}}}}

##########.09 threshold##############
LepMB$thresh.09 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .09 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.09[i] = "0"}  
else { 
if (LepMB$theta[i] < .09 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.09[i] = "999"}  
else { 
if (LepMB$theta[i] >= .09 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.09[i] = "1"}  
else { 
if (LepMB$theta[i] >= .09 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.09[i] = "2"}  
else { 
LepMB$thresh.09[i] = "NA"
}}}}}

##########.095 threshold##############
LepMB$thresh.095 <- NA
for (i in 1:nrow(Lep)) {
if (LepMB$theta[i] < .095 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.095[i] = "0"}  
else { 
if (LepMB$theta[i] < .095 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.095[i] = "999"}  
else { 
if (LepMB$theta[i] >= .095 && LepMB$Leptoseris[i] == 1) {
LepMB$thresh.095[i] = "1"}  
else { 
if (LepMB$theta[i] >= .095 && LepMB$Leptoseris[i] == 0) {
LepMB$thresh.095[i] = "2"}  
else { 
LepMB$thresh.095[i] = "NA"
}}}}}

######Calculate Sensitivity#######

### Sensitivity: 1/(1+0), or the number of correctly identified positives divided by all positive observations in the dataset.

# threshold = .005: 
TP <- subset(Lep, LepMB$thresh.005 == 1)
FN <- subset(Lep, LepMB$thresh.005 == 0)
Sens.005 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.005 

# threshold = .01: 
TP <- subset(Lep, LepMB$thresh.01 == 1)
FN <- subset(Lep, LepMB$thresh.01 == 0)
Sens.01 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.01 

# threshold = .015: 
TP <- subset(Lep, LepMB$thresh.015 == 1)
FN <- subset(Lep, LepMB$thresh.015 == 0)
Sens.015 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.015 

# threshold = .02: 
TP <- subset(Lep, LepMB$thresh.02 == 1)
FN <- subset(Lep, LepMB$thresh.02 == 0)
Sens.02 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.02 

# threshold = .025: 
TP <- subset(Lep, LepMB$thresh.025 == 1)
FN <- subset(Lep, LepMB$thresh.025 == 0)
Sens.025 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.025 

# threshold = .03: 
TP <- subset(Lep, LepMB$thresh.03 == 1)
FN <- subset(Lep, LepMB$thresh.03 == 0)
Sens.03 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.03 

# threshold = .035: 
TP <- subset(Lep, LepMB$thresh.035 == 1)
FN <- subset(Lep, LepMB$thresh.035 == 0)
Sens.035 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.035 

# threshold = .04: 
TP <- subset(Lep, LepMB$thresh.04 == 1)
FN <- subset(Lep, LepMB$thresh.04 == 0)
Sens.04 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.04 

# threshold = .045: 
TP <- subset(Lep, LepMB$thresh.045 == 1)
FN <- subset(Lep, LepMB$thresh.045 == 0)
Sens.045 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.045 

# threshold = .05: 
TP <- subset(Lep, LepMB$thresh.05 == 1)
FN <- subset(Lep, LepMB$thresh.05 == 0)
Sens.05 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.05 

# threshold = .055: 
TP <- subset(Lep, LepMB$thresh.055 == 1)
FN <- subset(Lep, LepMB$thresh.055 == 0)
Sens.055 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.055 

# threshold = .06: 
TP <- subset(Lep, LepMB$thresh.06 == 1)
FN <- subset(Lep, LepMB$thresh.06 == 0)
Sens.06 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.06 

# threshold = .065: 
TP <- subset(Lep, LepMB$thresh.065 == 1)
FN <- subset(Lep, LepMB$thresh.065 == 0)
Sens.065 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.065 

# threshold = .07: 
TP <- subset(Lep, LepMB$thresh.07 == 1)
FN <- subset(Lep, LepMB$thresh.07 == 0)
Sens.07 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.07 

# threshold = .075: 
TP <- subset(Lep, LepMB$thresh.075 == 1)
FN <- subset(Lep, LepMB$thresh.075 == 0)
Sens.075 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.075 

# threshold = .08: 
TP <- subset(Lep, LepMB$thresh.08 == 1)
FN <- subset(Lep, LepMB$thresh.08 == 0)
Sens.08 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.08 

# threshold = .085: 
TP <- subset(Lep, LepMB$thresh.085 == 1)
FN <- subset(Lep, LepMB$thresh.085 == 0)
Sens.085 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.085 

# threshold = .09: 
TP <- subset(Lep, LepMB$thresh.09 == 1)
FN <- subset(Lep, LepMB$thresh.09 == 0)
Sens.09 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.09 

# threshold = .095: 
TP <- subset(Lep, LepMB$thresh.095 == 1)
FN <- subset(Lep, LepMB$thresh.095 == 0)
Sens.095 = (nrow(TP))/(nrow(TP) + nrow(FN))
Sens.095 

######Calculate Specificity######

### Specificity: 999/(999 + 2), or the number of correctly identified negatives divided by all negative observations in the dataset.

# threshold = .005: 
TN <- subset(Lep, LepMB$thresh.005 == 999)
FP <- subset(Lep, LepMB$thresh.005 == 2)
Spec.005 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.005 

# threshold = .01: 
TN <- subset(Lep, LepMB$thresh.01 == 999)
FP <- subset(Lep, LepMB$thresh.01 == 2)
Spec.01 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.01 

# threshold = .015: 
TN <- subset(Lep, LepMB$thresh.015 == 999)
FP <- subset(Lep, LepMB$thresh.015 == 2)
Spec.015 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.015 

# threshold = .02: 
TN <- subset(Lep, LepMB$thresh.02 == 999)
FP <- subset(Lep, LepMB$thresh.02 == 2)
Spec.02 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.02 

# threshold = .025: 
TN <- subset(Lep, LepMB$thresh.025 == 999)
FP <- subset(Lep, LepMB$thresh.025 == 2)
Spec.025 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.025 

# threshold = .03: 
TN <- subset(Lep, LepMB$thresh.03 == 999)
FP <- subset(Lep, LepMB$thresh.03 == 2)
Spec.03 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.03 

# threshold = .035: 
TN <- subset(Lep, LepMB$thresh.035 == 999)
FP <- subset(Lep, LepMB$thresh.035 == 2)
Spec.035 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.035 

# threshold = .04: 
TN <- subset(Lep, LepMB$thresh.04 == 999)
FP <- subset(Lep, LepMB$thresh.04 == 2)
Spec.04 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.04 

# threshold = .045: 
TN <- subset(Lep, LepMB$thresh.045 == 999)
FP <- subset(Lep, LepMB$thresh.045 == 2)
Spec.045 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.045 

# threshold = .05: 
TN <- subset(Lep, LepMB$thresh.05 == 999)
FP <- subset(Lep, LepMB$thresh.05 == 2)
Spec.05 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.05

# threshold = .055: 
TN <- subset(Lep, LepMB$thresh.055 == 999)
FP <- subset(Lep, LepMB$thresh.055 == 2)
Spec.055 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.055 

# threshold = .06: 
TN <- subset(Lep, LepMB$thresh.06 == 999)
FP <- subset(Lep, LepMB$thresh.06 == 2)
Spec.06 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.06 

# threshold = .065: 
TN <- subset(Lep, LepMB$thresh.065 == 999)
FP <- subset(Lep, LepMB$thresh.065 == 2)
Spec.065 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.065 

# threshold = .07: 
TN <- subset(Lep, LepMB$thresh.07 == 999)
FP <- subset(Lep, LepMB$thresh.07 == 2)
Spec.07 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.07 

# threshold = .075: 
TN <- subset(Lep, LepMB$thresh.075 == 999)
FP <- subset(Lep, LepMB$thresh.075 == 2)
Spec.075 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.075 

# threshold = .08: 
TN <- subset(Lep, LepMB$thresh.08 == 999)
FP <- subset(Lep, LepMB$thresh.08 == 2)
Spec.08 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.08 

# threshold = .085: 
TN <- subset(Lep, LepMB$thresh.085 == 999)
FP <- subset(Lep, LepMB$thresh.085 == 2)
Spec.085 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.085

# threshold = .09: 
TN <- subset(Lep, LepMB$thresh.09 == 999)
FP <- subset(Lep, LepMB$thresh.09 == 2)
Spec.09 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.09 

# threshold = .095: 
TN <- subset(Lep, LepMB$thresh.095 == 999)
FP <- subset(Lep, LepMB$thresh.095 == 2)
Spec.095 = (nrow(TN))/(nrow(TN) + nrow(FP))
Spec.095 

### Repeat the steps above for the Montipora model building dataset. Perform cross validation by using the selected theta threshold value to check the Leptoseris and Montipora model testing datasets.

######ROC and AUC######

### We create a receiver operating characteristics (ROC) curve to display how changing theta thresholds changes our interpretations of our model output. 
library(ggplot2)
ROC <- read.csv("ROC.csv") # Load sensitivity/specificity values for all models and all genera
ggplot(ROC, aes(x=X1.Specificity, y=Sensitivity, shape = Model, color = Model, )) + geom_point() + geom_line(aes(color=Model)) + xlab("1-Specificity (False Positive Rate)") + ylab("Sensitivity (True Positive Rate)") + ggtitle("ROC curves") + geom_abline(intercept = 0, slope = 1, colour = 'black', size = 0.8, linetype = 'dotted')

### Using our calculated sensitivity and specificity values at different thresholds, we can calculate area under the curve (AUC). 

sens <- c(1, Sens.05, Sens.1, Sens.15, Sens.2, Sens.25, Sens.3, Sens.35, Sens.4, Sens.45, Sens.5, Sens.55, Sens.6, Sens.65, Sens.7, Sens.75, Sens.8, Sens.85, Sens.9, Sens.95, 0)

omspec <- c(1, 1-Spec.05, 1-Spec.1, 1-Spec.15, 1-Spec.2, 1-Spec.25, 1-Spec.3, 1-Spec.35, 1-Spec.4, 1-Spec.45, 1-Spec.5, 1-Spec.55, 1-Spec.6, 1-Spec.65, 1-Spec.7, 1-Spec.75, 1-Spec.8, 1-Spec.85, 1-Spec.9, 1-Spec.95, 0) # One minus specificity

# Calculate AUC for each model
height = (sens[-1]+sens[-length(sens)])/2
width = -diff(omspec)
sum(height*width)

#######Overall Prediction Success######

# For each model, simply divide the number of correctly identified pixels (TP + TN) by the number of incorrectly identified pixels (FP + FN) within that dataset at your chosen theta threshold.