### R Scripts to accompany "The implementation of rare events logistic regression to predict the distribution of mesophotic hard corals across the main Hawaiian Islands" (Veazey et al. 2016). ### All code assembled by Lindsay Veazey (lindsayv@hawaii.edu). Adapt freely, but please give credit if you feel it is due! ### Please note that the code below is a selection of code used for our study. We have included code using Montipora and Leptoseris where helpful, but generally will only present our code for Leptoseris. ###### Correlation Scatterplot ###### # To determine correlations between predictor variables and between each covariate and the response variable, we adapted code from the R Cookbook (page 155) by Paul Teetor (2011). This code produces a useful scatterplot matrix. Lep <- read.csv("Leptoseris.csv") # All Leptoseris data panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) { usr <- par("usr") on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y, use="complete.obs")) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex.cor <- 1.0/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor * (1 + r) / 2) } # To show histograms of each variable along the diagonal, we’ll define panel.hist : panel.hist <- function(x, ...) { usr <- par("usr") on.exit(par(usr)) par(usr = c(usr[1:2], 0, 1.5) ) h <- hist(x, plot = FALSE) breaks <- h$breaks nB <- length(breaks) y <- h$counts y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y, col="white", ...) } ### Checking the correlation between the oceanographic variables (current/modeled estimates) pairs(Lep[,5:19], upper.panel = panel.cor, diag.panel = panel.hist, lower.panel = panel.smooth) ### Repeat the process for the Montipora dataset. We excluded potential covariates that 1) were very weakly (< 5%) correlated with the response variable, 2) displayed no clear distribution pattern (= noise), 3) and/or were too strongly correlated with one another (> 70%; only winter significant wave height (SWHW) and summer significant wave height (SWHS) exceeded this limit; the variable less correlated with the response variable was removed). ###### glmulti ###### # We randomly divided each Lep. and Mon. dataset (for leave-one-out cross-validation) and then used 'glmulti' to exhaustively iterate through all possible GLM combinations of predictors. ### Leptoseris LepMB <- read.csv("LeptoserisModelBuild.csv") all.model <- glm(Leptoseris ~ Depth + DepthSq + Slope + Rugosity + MCVEWinter + MCVNSummer + SWHW, data = LepMB, family = binomial) best.model <- glmulti(all.model, level = 1, # looking at main effects crit = "aicc") summary(best.model) weightable(best.model) ### Montipora MonMB <- read.csv("MontiporaModelBuild.csv") all.model <- glm(Montipora ~ Depth + DepthSq + Rugosity + SubstrateHardness + MCVNWinter + SWHW, data = MonMB, family = binomial) best.model <- glmulti(all.model, level = 1, # looking at main effects crit = "aicc") summary(best.model) weightable(best.model) ###### GLM ###### ### Now that we have identified the set of covariates to be included in our GLM, we can run one (ordinary logistic regression, or OLR): # Best Leptoseris GLM: Leptoseris ~ Depth + DepthSq + MCVNSummer + MCVEWinter + Slope + Rugosity LepOLR <- glm(Leptoseris ~ Depth + DepthSq + MCVNSummer + MCVEWinter + Slope + Rugosity, data = LepMB, family = binomial) summary(LepOLR) na.omit(LepMB) # Remove all rows with NAs to check spatial autocorrelation # Check autocorrelation library(ncf) correlog1 <- correlog(LepMB$Longitude, LepMB$Latitude, residuals(LepOLR), na.rm=T, increment= .5, resamp=0) # Plotted the first X distance classes par(mar=c(5,5,0.1,0.1)) plot(correlog1$correlation[1:6], type = 'b', pch=16, cex=1.5, lwd=1.5, xlab="Distance class", ylab ="Moran's I", cex.lab = 2, cex.axis=1.5); abline(h=0) # Make a map of residuals plot(LepMB$Longitude, LepMB$Latitude, col=c("blue", "red")[sign(resid(LepOLR))/2+1.5], pch=19, cex=abs(resid(LepOLR))/max(resid(LepOLR))*2, xlab="Longitude", ylab = "Latitude") # Calculate Moran's I Values explicitly for a certain distance and test for significance library(spdep) Lep.nb <- dnearneigh(as.matrix(LepMB[1:2]), 0, 1) # Turns neighborhood object into weighted list Lep.list <- nb2listw(Lep.nb) MT1 <- moran.test(residuals(LepOLR), listw=Lep.list) # Calculate Geary's C (a test of local spatial autocorrelation) # The value of Geary's C lies between 0 and 2. 1 means no spatial autocorrelation. Values lower than 1 demonstrate increasing positive spatial autocorrelation, whilst values higher than 1 illustrate increasing negative spatial autocorrelation. GC1 <- geary.test(residuals(LepOLR), listw=Lep.list) GC1 ### Repeat for Montipora model building dataset ###### Rare Events Regression ###### ### We proceed with a rare events logistic regression using package 'Zelig' (Imai et al. 2010): library(Zelig) # Specify proportion of 1s to 0s in full dataset tau = 620/11929 rare.mod <- zelig(Leptoseris ~ Depth + I(Depth^2) + MCVNSummer + MCVEWinter + Slope + Rugosity, data = LepMB, model = "relogit", tau = tau) summary(rare.mod) ###### Rare Events Regression Confidence Intervals ###### # Let's produce some figures displaying the confidence intervals associated with our coefficient (beta) estimates. Here, we use the depth covariate as an example: Depth.range <- c(30, 180) # Tell R what range to display across your x axis x.Depth <- setx(rare.mod, Depth = Depth.range) s.out <- sim(rare.mod, x = x.Depth) xl <- "Depth (m)" # x axis yl <- "Probability of Leptoseris Occurrence" # y axis ma <- "Predicted Probability of Leptoseris Occurrence Across Depth" # main title su <- " " # subtitle plot.ci(s.out, qi = "pr", xlab = xl, ylab = yl, main = ma, leg = 4) ###### Model Assessment ###### ### We need to examine which theta threshold level allows the most useful interpretation of our model output. Here, for thresholds 0.005 - 0.095, we can examine how our ratio of correctly identified pixels to incorrectly identified pixels within our model building dataset changes as the threshold level changes. True Positives (TP) are coded "1"; True Negatives (TN) are coded "999"; False Positives (FP) are coded "2"; False Negatives (FN) are coded "0". LepMB$thresh.005 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .005 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.005[i] = "0"} else { if (LepMB$theta[i] < .005 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.005[i] = "999"} else { if (LepMB$theta[i] >= .005 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.005[i] = "1"} else { if (LepMB$theta[i] >= .005 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.005[i] = "2"} else { LepMB$thresh.005[i] = "NA" }}}}} ##########.01 threshold############## LepMB$thresh.01 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .01 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.01[i] = "0"} else { if (LepMB$theta[i] < .01 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.01[i] = "999"} else { if (LepMB$theta[i] >= .01 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.01[i] = "1"} else { if (LepMB$theta[i] >= .01 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.01[i] = "2"} else { LepMB$thresh.01[i] = "NA" }}}}} ##########.015 threshold############## LepMB$thresh.015 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .015 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.015[i] = "0"} else { if (LepMB$theta[i] < .015 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.015[i] = "999"} else { if (LepMB$theta[i] >= .015 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.015[i] = "1"} else { if (LepMB$theta[i] >= .015 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.015[i] = "2"} else { LepMB$thresh.015[i] = "NA" }}}}} ##########.02 threshold############## LepMB$thresh.02 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .02 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.02[i] = "0"} else { if (LepMB$theta[i] < .02 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.02[i] = "999"} else { if (LepMB$theta[i] >= .02 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.02[i] = "1"} else { if (LepMB$theta[i] >= .02 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.02[i] = "2"} else { LepMB$thresh.02[i] = "NA" }}}}} ##########.025 threshold############## LepMB$thresh.025 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .025 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.025[i] = "0"} else { if (LepMB$theta[i] < .025 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.025[i] = "999"} else { if (LepMB$theta[i] >= .025 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.025[i] = "1"} else { if (LepMB$theta[i] >= .025 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.025[i] = "2"} else { LepMB$thresh.025[i] = "NA" }}}}} ##########.03 threshold############## LepMB$thresh.03 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .03 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.03[i] = "0"} else { if (LepMB$theta[i] < .03 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.03[i] = "999"} else { if (LepMB$theta[i] >= .03 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.03[i] = "1"} else { if (LepMB$theta[i] >= .03 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.03[i] = "2"} else { LepMB$thresh.03[i] = "NA" }}}}} ##########.035 threshold############## LepMB$thresh.035 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .035 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.035[i] = "0"} else { if (LepMB$theta[i] < .035 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.035[i] = "999"} else { if (LepMB$theta[i] >= .035 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.035[i] = "1"} else { if (LepMB$theta[i] >= .035 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.035[i] = "2"} else { LepMB$thresh.035[i] = "NA" }}}}} ##########.04 threshold############## LepMB$thresh.04 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .04 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.04[i] = "0"} else { if (LepMB$theta[i] < .04 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.04[i] = "999"} else { if (LepMB$theta[i] >= .04 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.04[i] = "1"} else { if (LepMB$theta[i] >= .04 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.04[i] = "2"} else { LepMB$thresh.04[i] = "NA" }}}}} ##########.045 threshold############## LepMB$thresh.045 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .045 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.045[i] = "0"} else { if (LepMB$theta[i] < .045 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.045[i] = "999"} else { if (LepMB$theta[i] >= .045 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.045[i] = "1"} else { if (LepMB$theta[i] >= .045 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.045[i] = "2"} else { LepMB$thresh.045[i] = "NA" }}}}} ##########.05 threshold############## LepMB$thresh.05 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .05 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.05[i] = "0"} else { if (LepMB$theta[i] < .05 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.05[i] = "999"} else { if (LepMB$theta[i] >= .05 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.05[i] = "1"} else { if (LepMB$theta[i] >= .05 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.05[i] = "2"} else { LepMB$thresh.05[i] = "NA" }}}}} ##########.055 threshold############## LepMB$thresh.055 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .055 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.055[i] = "0"} else { if (LepMB$theta[i] < .055 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.055[i] = "999"} else { if (LepMB$theta[i] >= .055 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.055[i] = "1"} else { if (LepMB$theta[i] >= .055 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.055[i] = "2"} else { LepMB$thresh.055[i] = "NA" }}}}} ##########.06 threshold############## LepMB$thresh.06 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .06 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.06[i] = "0"} else { if (LepMB$theta[i] < .06 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.06[i] = "999"} else { if (LepMB$theta[i] >= .06 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.06[i] = "1"} else { if (LepMB$theta[i] >= .06 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.06[i] = "2"} else { LepMB$thresh.06[i] = "NA" }}}}} ##########.065 threshold############## LepMB$thresh.065 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .065 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.065[i] = "0"} else { if (LepMB$theta[i] < .065 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.065[i] = "999"} else { if (LepMB$theta[i] >= .065 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.065[i] = "1"} else { if (LepMB$theta[i] >= .065 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.065[i] = "2"} else { LepMB$thresh.065[i] = "NA" }}}}} ##########.07 threshold############## LepMB$thresh.07 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .07 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.07[i] = "0"} else { if (LepMB$theta[i] < .07 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.07[i] = "999"} else { if (LepMB$theta[i] >= .07 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.07[i] = "1"} else { if (LepMB$theta[i] >= .07 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.07[i] = "2"} else { LepMB$thresh.07[i] = "NA" }}}}} ##########.075 threshold############## LepMB$thresh.075 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .075 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.075[i] = "0"} else { if (LepMB$theta[i] < .075 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.075[i] = "999"} else { if (LepMB$theta[i] >= .075 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.075[i] = "1"} else { if (LepMB$theta[i] >= .075 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.075[i] = "2"} else { LepMB$thresh.075[i] = "NA" }}}}} ##########.08 threshold############## LepMB$thresh.08 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .08 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.08[i] = "0"} else { if (LepMB$theta[i] < .08 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.08[i] = "999"} else { if (LepMB$theta[i] >= .08 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.08[i] = "1"} else { if (LepMB$theta[i] >= .08 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.08[i] = "2"} else { LepMB$thresh.08[i] = "NA" }}}}} ##########.085 threshold############## LepMB$thresh.085 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .085 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.085[i] = "0"} else { if (LepMB$theta[i] < .085 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.085[i] = "999"} else { if (LepMB$theta[i] >= .085 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.085[i] = "1"} else { if (LepMB$theta[i] >= .085 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.085[i] = "2"} else { LepMB$thresh.085[i] = "NA" }}}}} ##########.09 threshold############## LepMB$thresh.09 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .09 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.09[i] = "0"} else { if (LepMB$theta[i] < .09 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.09[i] = "999"} else { if (LepMB$theta[i] >= .09 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.09[i] = "1"} else { if (LepMB$theta[i] >= .09 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.09[i] = "2"} else { LepMB$thresh.09[i] = "NA" }}}}} ##########.095 threshold############## LepMB$thresh.095 <- NA for (i in 1:nrow(Lep)) { if (LepMB$theta[i] < .095 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.095[i] = "0"} else { if (LepMB$theta[i] < .095 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.095[i] = "999"} else { if (LepMB$theta[i] >= .095 && LepMB$Leptoseris[i] == 1) { LepMB$thresh.095[i] = "1"} else { if (LepMB$theta[i] >= .095 && LepMB$Leptoseris[i] == 0) { LepMB$thresh.095[i] = "2"} else { LepMB$thresh.095[i] = "NA" }}}}} ######Calculate Sensitivity####### ### Sensitivity: 1/(1+0), or the number of correctly identified positives divided by all positive observations in the dataset. # threshold = .005: TP <- subset(Lep, LepMB$thresh.005 == 1) FN <- subset(Lep, LepMB$thresh.005 == 0) Sens.005 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.005 # threshold = .01: TP <- subset(Lep, LepMB$thresh.01 == 1) FN <- subset(Lep, LepMB$thresh.01 == 0) Sens.01 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.01 # threshold = .015: TP <- subset(Lep, LepMB$thresh.015 == 1) FN <- subset(Lep, LepMB$thresh.015 == 0) Sens.015 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.015 # threshold = .02: TP <- subset(Lep, LepMB$thresh.02 == 1) FN <- subset(Lep, LepMB$thresh.02 == 0) Sens.02 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.02 # threshold = .025: TP <- subset(Lep, LepMB$thresh.025 == 1) FN <- subset(Lep, LepMB$thresh.025 == 0) Sens.025 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.025 # threshold = .03: TP <- subset(Lep, LepMB$thresh.03 == 1) FN <- subset(Lep, LepMB$thresh.03 == 0) Sens.03 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.03 # threshold = .035: TP <- subset(Lep, LepMB$thresh.035 == 1) FN <- subset(Lep, LepMB$thresh.035 == 0) Sens.035 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.035 # threshold = .04: TP <- subset(Lep, LepMB$thresh.04 == 1) FN <- subset(Lep, LepMB$thresh.04 == 0) Sens.04 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.04 # threshold = .045: TP <- subset(Lep, LepMB$thresh.045 == 1) FN <- subset(Lep, LepMB$thresh.045 == 0) Sens.045 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.045 # threshold = .05: TP <- subset(Lep, LepMB$thresh.05 == 1) FN <- subset(Lep, LepMB$thresh.05 == 0) Sens.05 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.05 # threshold = .055: TP <- subset(Lep, LepMB$thresh.055 == 1) FN <- subset(Lep, LepMB$thresh.055 == 0) Sens.055 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.055 # threshold = .06: TP <- subset(Lep, LepMB$thresh.06 == 1) FN <- subset(Lep, LepMB$thresh.06 == 0) Sens.06 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.06 # threshold = .065: TP <- subset(Lep, LepMB$thresh.065 == 1) FN <- subset(Lep, LepMB$thresh.065 == 0) Sens.065 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.065 # threshold = .07: TP <- subset(Lep, LepMB$thresh.07 == 1) FN <- subset(Lep, LepMB$thresh.07 == 0) Sens.07 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.07 # threshold = .075: TP <- subset(Lep, LepMB$thresh.075 == 1) FN <- subset(Lep, LepMB$thresh.075 == 0) Sens.075 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.075 # threshold = .08: TP <- subset(Lep, LepMB$thresh.08 == 1) FN <- subset(Lep, LepMB$thresh.08 == 0) Sens.08 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.08 # threshold = .085: TP <- subset(Lep, LepMB$thresh.085 == 1) FN <- subset(Lep, LepMB$thresh.085 == 0) Sens.085 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.085 # threshold = .09: TP <- subset(Lep, LepMB$thresh.09 == 1) FN <- subset(Lep, LepMB$thresh.09 == 0) Sens.09 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.09 # threshold = .095: TP <- subset(Lep, LepMB$thresh.095 == 1) FN <- subset(Lep, LepMB$thresh.095 == 0) Sens.095 = (nrow(TP))/(nrow(TP) + nrow(FN)) Sens.095 ######Calculate Specificity###### ### Specificity: 999/(999 + 2), or the number of correctly identified negatives divided by all negative observations in the dataset. # threshold = .005: TN <- subset(Lep, LepMB$thresh.005 == 999) FP <- subset(Lep, LepMB$thresh.005 == 2) Spec.005 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.005 # threshold = .01: TN <- subset(Lep, LepMB$thresh.01 == 999) FP <- subset(Lep, LepMB$thresh.01 == 2) Spec.01 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.01 # threshold = .015: TN <- subset(Lep, LepMB$thresh.015 == 999) FP <- subset(Lep, LepMB$thresh.015 == 2) Spec.015 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.015 # threshold = .02: TN <- subset(Lep, LepMB$thresh.02 == 999) FP <- subset(Lep, LepMB$thresh.02 == 2) Spec.02 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.02 # threshold = .025: TN <- subset(Lep, LepMB$thresh.025 == 999) FP <- subset(Lep, LepMB$thresh.025 == 2) Spec.025 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.025 # threshold = .03: TN <- subset(Lep, LepMB$thresh.03 == 999) FP <- subset(Lep, LepMB$thresh.03 == 2) Spec.03 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.03 # threshold = .035: TN <- subset(Lep, LepMB$thresh.035 == 999) FP <- subset(Lep, LepMB$thresh.035 == 2) Spec.035 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.035 # threshold = .04: TN <- subset(Lep, LepMB$thresh.04 == 999) FP <- subset(Lep, LepMB$thresh.04 == 2) Spec.04 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.04 # threshold = .045: TN <- subset(Lep, LepMB$thresh.045 == 999) FP <- subset(Lep, LepMB$thresh.045 == 2) Spec.045 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.045 # threshold = .05: TN <- subset(Lep, LepMB$thresh.05 == 999) FP <- subset(Lep, LepMB$thresh.05 == 2) Spec.05 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.05 # threshold = .055: TN <- subset(Lep, LepMB$thresh.055 == 999) FP <- subset(Lep, LepMB$thresh.055 == 2) Spec.055 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.055 # threshold = .06: TN <- subset(Lep, LepMB$thresh.06 == 999) FP <- subset(Lep, LepMB$thresh.06 == 2) Spec.06 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.06 # threshold = .065: TN <- subset(Lep, LepMB$thresh.065 == 999) FP <- subset(Lep, LepMB$thresh.065 == 2) Spec.065 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.065 # threshold = .07: TN <- subset(Lep, LepMB$thresh.07 == 999) FP <- subset(Lep, LepMB$thresh.07 == 2) Spec.07 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.07 # threshold = .075: TN <- subset(Lep, LepMB$thresh.075 == 999) FP <- subset(Lep, LepMB$thresh.075 == 2) Spec.075 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.075 # threshold = .08: TN <- subset(Lep, LepMB$thresh.08 == 999) FP <- subset(Lep, LepMB$thresh.08 == 2) Spec.08 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.08 # threshold = .085: TN <- subset(Lep, LepMB$thresh.085 == 999) FP <- subset(Lep, LepMB$thresh.085 == 2) Spec.085 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.085 # threshold = .09: TN <- subset(Lep, LepMB$thresh.09 == 999) FP <- subset(Lep, LepMB$thresh.09 == 2) Spec.09 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.09 # threshold = .095: TN <- subset(Lep, LepMB$thresh.095 == 999) FP <- subset(Lep, LepMB$thresh.095 == 2) Spec.095 = (nrow(TN))/(nrow(TN) + nrow(FP)) Spec.095 ### Repeat the steps above for the Montipora model building dataset. Perform cross validation by using the selected theta threshold value to check the Leptoseris and Montipora model testing datasets. ######ROC and AUC###### ### We create a receiver operating characteristics (ROC) curve to display how changing theta thresholds changes our interpretations of our model output. library(ggplot2) ROC <- read.csv("ROC.csv") # Load sensitivity/specificity values for all models and all genera ggplot(ROC, aes(x=X1.Specificity, y=Sensitivity, shape = Model, color = Model, )) + geom_point() + geom_line(aes(color=Model)) + xlab("1-Specificity (False Positive Rate)") + ylab("Sensitivity (True Positive Rate)") + ggtitle("ROC curves") + geom_abline(intercept = 0, slope = 1, colour = 'black', size = 0.8, linetype = 'dotted') ### Using our calculated sensitivity and specificity values at different thresholds, we can calculate area under the curve (AUC). sens <- c(1, Sens.05, Sens.1, Sens.15, Sens.2, Sens.25, Sens.3, Sens.35, Sens.4, Sens.45, Sens.5, Sens.55, Sens.6, Sens.65, Sens.7, Sens.75, Sens.8, Sens.85, Sens.9, Sens.95, 0) omspec <- c(1, 1-Spec.05, 1-Spec.1, 1-Spec.15, 1-Spec.2, 1-Spec.25, 1-Spec.3, 1-Spec.35, 1-Spec.4, 1-Spec.45, 1-Spec.5, 1-Spec.55, 1-Spec.6, 1-Spec.65, 1-Spec.7, 1-Spec.75, 1-Spec.8, 1-Spec.85, 1-Spec.9, 1-Spec.95, 0) # One minus specificity # Calculate AUC for each model height = (sens[-1]+sens[-length(sens)])/2 width = -diff(omspec) sum(height*width) #######Overall Prediction Success###### # For each model, simply divide the number of correctly identified pixels (TP + TN) by the number of incorrectly identified pixels (FP + FN) within that dataset at your chosen theta threshold.