####################################################################################################### ## Bogdan Cristescu. R Studio Version 1.0.143 Script ## Cristescu B, Domokos C, Teichman KJ, Nielsen SE. Large carnivore habitat suitability modelling ## for Romania and associated predictions for protected areas. PeerJ. ####################################################################################################### ## BEAR getwd() setwd("C:/ProtArea/GWR") warnings() ## Merge shapefile and .csv to obtain spatial data object # library(raster) # bear1 <- shapefile("C:/ProtArea/GWR/bear_distribution.shp") # bear2 <- read.csv("C:/ProtArea/GWR/Raw_bear.csv") # RawBear <- merge(bear1, bear2, by.bear1="WMU", by.bear2="WMU") # RawBear ## Save as shapefile again: # shapefile(RawBear, "C:/ProtArea/GWR/RawBearAll.shp") ## Import shapefile #####################################################library(rgdal) library(rgdal) RawBear <- readOGR(".", "RawBearAll") summary(RawBear) ## Map a component of the shapefile, in this case tribmn library(tmap) tm_shape(RawBear) + tm_polygons(style="quantile", col = "presence") + tm_legend(outside = TRUE, text.size = .4) ## Display missing data RawBear@data[!complete.cases(RawBear@data),] ## Function to remove missing data in the SpatialPolygonsDataFrame class object ## "margin" removes rows (1) or columns (2) sp.na.omit <- function(RawBear, margin=1) { if (!inherits(RawBear, "SpatialPointsDataFrame") & !inherits(RawBear, "SpatialPolygonsDataFrame")) stop("MUST BE SpatialPointsDataFrame OR SpatialPolygonsDataFrame CLASS OBJECT") na.index <- unique(as.data.frame(which(is.na(RawBear@data),arr.ind=TRUE))[,margin]) if(margin == 1) { cat("DELETING ROWS: ", na.index, "\n") return( RawBear[-na.index,] ) } if(margin == 2) { cat("DELETING COLUMNS: ", na.index, "\n") return( RawBear[,-na.index] ) } } ## Delete missing values ("NA") and show changes using "dim" RawBear2 <- sp.na.omit(RawBear) tm_shape(RawBear2) + tm_polygons(style="quantile", col = "presence") + tm_legend(outside = TRUE, text.size = .4) dim(RawBear) dim(RawBear2) ## Plot deleted polygons in red plot(RawBear, col="red", pch=20) plot(RawBear2, col="black", pch=20, add=TRUE) ## Covariate correlation matrix, building a dataframe ("CovariateDF") first library(corrplot) source("C:/ProtArea/GWR/Function_rquery_cormat.r") # source the rquery.cormat function CovariateDFB <- as.data.frame(RawBear2) head(CovariateDFB) dim(CovariateDFB) CovariateDFB[, c(-(1:16),-22,-(33:34),-37)] # inspect columns to include in correlation matrix rquery.cormat(CovariateDFB[, c(-(1:16),-22,-(33:34),-37)]) ## Not used: Export the data frame to Excel # write.csv(RawBear2,'GWR_Bear.csv') ## Standardize variables for regression analyses SDagricbmn <- (RawBear2$agricbmn - mean(RawBear2$agricbmn))/sd(RawBear2$agricbmn) SDagricbmn2 <- (RawBear2$agricbmn2 - mean(RawBear2$agricbmn2))/sd(RawBear2$agricbmn2) SDartifbmn <- (RawBear2$artifbmn - mean(RawBear2$artifbmn))/sd(RawBear2$artifbmn) SDnatrdbmn <- (RawBear2$natrdbmn - mean(RawBear2$natrdbmn))/sd(RawBear2$natrdbmn) SDnatrdbmn2 <- (RawBear2$natrdbmn2 - mean(RawBear2$natrdbmn2))/sd(RawBear2$natrdbmn2) SDmixedbmn <- (RawBear2$mixedbmn - mean(RawBear2$mixedbmn))/sd(RawBear2$mixedbmn) SDshrbmn <- (RawBear2$shrbmn - mean(RawBear2$shrbmn))/sd(RawBear2$shrbmn) SDtribmn <- (RawBear2$tribmn - mean(RawBear2$tribmn))/sd(RawBear2$tribmn) SDtribmn2 <- (RawBear2$tribmn2 - mean(RawBear2$tribmn2))/sd(RawBear2$tribmn2) SDconifbmn <- (RawBear2$conifbmn - mean(RawBear2$conifbmn))/sd(RawBear2$conifbmn) SDshrbmn <- (RawBear2$shrbmn - mean(RawBear2$shrbmn))/sd(RawBear2$shrbmn) SDcomrdbmn <- (RawBear2$comrdbmn - mean(RawBear2$comrdbmn))/sd(RawBear2$comrdbmn) SDcomrdbmn2 <- (RawBear2$comrdbmn2 - mean(RawBear2$comrdbmn2))/sd(RawBear2$comrdbmn2) SDcourdbmn <- (RawBear2$courdbmn - mean(RawBear2$courdbmn))/sd(RawBear2$courdbmn) SDcourdbmn2 <- (RawBear2$courdbmn2 - mean(RawBear2$courdbmn2))/sd(RawBear2$courdbmn2) SDbrdlfbmn <- (RawBear2$brdlfbmn - mean(RawBear2$brdlfbmn))/sd(RawBear2$brdlfbmn) SDpastbmn <- (RawBear2$pastbmn - mean(RawBear2$pastbmn))/sd(RawBear2$pastbmn) SDpastbmn2 <- (RawBear2$pastbmn2 - mean(RawBear2$pastbmn2))/sd(RawBear2$pastbmn2) ## Attach standardized variables to data frame RawBear2$SDagricbmn <- SDagricbmn RawBear2$SDagricbmn2 <- SDagricbmn2 RawBear2$SDartifbmn <- SDartifbmn RawBear2$SDnatrdbmn <- SDnatrdbmn RawBear2$SDnatrdbmn2 <- SDnatrdbmn2 RawBear2$SDmixedbmn <- SDmixedbmn RawBear2$SDshrbmn <- SDshrbmn RawBear2$SDtribmn <- SDtribmn RawBear2$SDtribmn2 <- SDtribmn2 RawBear2$SDconifbmn <- SDconifbmn RawBear2$SDshrbmn <- SDshrbmn RawBear2$SDcomrdbmn <- SDcomrdbmn RawBear2$SDcomrdbmn2 <- SDcomrdbmn2 RawBear2$SDcourdbmn <- SDcourdbmn RawBear2$SDcourdbmn2 <- SDcourdbmn2 RawBear2$SDbrdlfbmn <- SDbrdlfbmn RawBear2$SDpastbmn <- SDpastbmn RawBear2$SDpastbmn2 <- SDpastbmn2 names(RawBear2) ## Driscoll and Kraay (1998) robust standard errors for cross-sectional autocorrelation ## Robust standard error calculations applied after logistic regression ## lag length set to: floor(4 * (T / 100)^(2/9)) ## rule of thumb proposed by Hoechle (2007) based on Newey & West (1994) library(sandwich) H0 <- glm(presence~1, family = binomial(logit), data = RawBear2) H0 anova(H0) summary(H0) sqrt(diag(vcovPL(H0, lag = "NW1994"))) H1 <- glm(presence~SDtribmn+SDtribmn2, family = binomial(logit), data = RawBear2) H1 anova(H1) summary(H1) sqrt(diag(vcovPL(H1, lag = "NW1994"))) H2 <- glm(presence~SDbrdlfbmn+SDmixedbmn+SDconifbmn, family = binomial(logit), data = RawBear2) H2 anova(H2) summary(H2) sqrt(diag(vcovPL(H2, lag = "NW1994"))) H3 <- glm(presence~SDbrdlfbmn+SDmixedbmn+SDconifbmn+SDshrbmn, family = binomial(logit), data = RawBear2) H3 anova(H3) summary(H3) sqrt(diag(vcovPL(H3, lag = "NW1994"))) H4 <- glm(presence~SDbrdlfbmn+SDconifbmn+SDtribmn+SDtribmn2, family = binomial(logit), data = RawBear2) H4 anova(H4) summary(H4) sqrt(diag(vcovPL(H4, lag = "NW1994"))) H5 <- glm(presence~SDpastbmn+SDpastbmn2+SDagricbmn+SDagricbmn2+SDartifbmn, family = binomial(logit), data = RawBear2) H5 anova(H5) summary(H5) sqrt(diag(vcovPL(H5, lag = "NW1994"))) H6 <- glm(presence~SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2, family = binomial(logit), data = RawBear2) H6 anova(H6) summary(H6) sqrt(diag(vcovPL(H6, lag = "NW1994"))) H7 <- glm(presence~SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2+SDartifbmn, family = binomial(logit), data = RawBear2) H7 anova(H7) summary(H7) sqrt(diag(vcovPL(H7, lag = "NW1994"))) H8 <- glm(presence~SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2+SDpastbmn+SDpastbmn2+SDagricbmn+SDagricbmn2+SDartifbmn, family = binomial(logit), data = RawBear2) H8 anova(H8) summary(H8) sqrt(diag(vcovPL(H8, lag = "NW1994"))) H9 <- glm(presence~SDartifbmn+SDtribmn+SDtribmn2, family = binomial(logit), data = RawBear2) H9 anova(H9) summary(H9) sqrt(diag(vcovPL(H9, lag = "NW1994"))) H10 <- glm(presence~SDpastbmn+SDpastbmn2+SDartifbmn+SDtribmn+SDtribmn2, family = binomial(logit), data = RawBear2) H10 anova(H10) summary(H10) sqrt(diag(vcovPL(H10, lag = "NW1994"))) H11 <- glm(presence~SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2+SDtribmn+SDtribmn2, family = binomial(logit), data = RawBear2) H11 anova(H11) summary(H11) sqrt(diag(vcovPL(H11, lag = "NW1994"))) H12 <- glm(presence~SDpastbmn+SDpastbmn2+SDartifbmn+SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2+SDtribmn+SDtribmn2, family = binomial(logit), data = RawBear2) H12 anova(H12) summary(H12) sqrt(diag(vcovPL(H12, lag = "NW1994"))) H13 <- glm(presence~SDbrdlfbmn+SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2, family = binomial(logit), data = RawBear2) H13 anova(H13) summary(H13) sqrt(diag(vcovPL(H13, lag = "NW1994"))) H14 <- glm(presence~SDbrdlfbmn+SDmixedbmn+SDconifbmn+SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2, family = binomial(logit), data = RawBear2) H14 anova(H14) summary(H14) sqrt(diag(vcovPL(H14, lag = "NW1994"))) H15 <- glm(presence~SDbrdlfbmn+SDmixedbmn+SDconifbmn+SDshrbmn+SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2, family = binomial(logit), data = RawBear2) H15 anova(H15) summary(H15) sqrt(diag(vcovPL(H15, lag = "NW1994"))) H16 <- glm(presence~SDmixedbmn+SDconifbmn+SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2+SDpastbmn+SDpastbmn2+SDagricbmn+SDagricbmn2+SDartifbmn, family = binomial(logit), data = RawBear2) H16 anova(H16) summary(H16) sqrt(diag(vcovPL(H16, lag = "NW1994"))) ## H16 is the top bear model based on AIC ranking. # Obtain odds ratios library(broom) library(magrittr) library(dplyr) modelH16.df <- tidy(H16) print.data.frame(modelH16.df) modelH16.df %>% mutate(or = exp(estimate), # Odds ratio/gradient options(pillar.sigfig=7)) # Increase number of decimals visible ## Check multicollinearity between independent variables library(car) vif(H16) ## H16 is the top bear model based on AIC ranking. Check residual plots library(boot) glm.diag.plots(H16) ## Attach deviance residuals and fitted values (probabilities) to the SpatialPolygonsDataFrame class object dim(RawBear2) head(RawBear2) RawBear2$DevRes <- resid(H16, type = c("deviance")) RawBear2$RelProb <- fitted(H16, fitted = fitted(H16)) dim(RawBear2) head(RawBear2) summary(RawBear2$RelProb) ## Convert fitted values to presence/absence based on cutoff 0.5 RawBear2$PrPres <- ifelse(RawBear2$RelProb > 0.5, 1, 0) head(RawBear2) ## Export as shapefile writeOGR(obj=RawBear2, dsn="C:/ProtArea/GWR", layer = "Bear_Final", driver="ESRI Shapefile") ## Generate predicted probabilities for the top bear model (H16) library(ggplot2) ## SDmixedbmn ## Generate a dataframe with all variables except the variable of interest kept at mean level DFSDmixedbmn <- with(CovariateDFB, data.frame(mixedbmn, SDmixedbmn, SDconifbmn = mean(SDconifbmn), SDnatrdbmn = mean(SDnatrdbmn), SDnatrdbmn2 = mean(SDnatrdbmn2), SDcourdbmn = mean(SDcourdbmn), SDcourdbmn2 = mean(SDcourdbmn2), SDcomrdbmn = mean(SDcomrdbmn), SDcomrdbmn2 = mean(SDcomrdbmn2), SDpastbmn = mean(SDpastbmn), SDpastbmn2 = mean(SDpastbmn2), SDagricbmn = mean(SDagricbmn), SDagricbmn2 = mean(SDagricbmn2), SDartifbmn = mean(SDartifbmn))) head(DFSDmixedbmn) summary(DFSDmixedbmn$mixedbmn) summary(DFSDmixedbmn$SDmixedbmn) ## Obtain predicted probabilities DFSDmixedbmn$PredProb <- predict(H16, newdata = DFSDmixedbmn, type = "response") DFSDmixedbmn ## Plot predicted probabilities ggplot(DFSDmixedbmn, aes(x = mixedbmn, y = PredProb)) + xlim(c(0, 1)) + ylim(c(0, 1)) + geom_line(aes(colour = mixedbmn), size = 5) + labs(x = "Mixed forest", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDconifbmn DFSDconifbmn <- with(CovariateDFB, data.frame(SDmixedbmn = mean(SDmixedbmn), conifbmn, SDconifbmn, SDnatrdbmn = mean(SDnatrdbmn), SDnatrdbmn2 = mean(SDnatrdbmn2), SDcourdbmn = mean(SDcourdbmn), SDcourdbmn2 = mean(SDcourdbmn2), SDcomrdbmn = mean(SDcomrdbmn), SDcomrdbmn2 = mean(SDcomrdbmn2), SDpastbmn = mean(SDpastbmn), SDpastbmn2 = mean(SDpastbmn2), SDagricbmn = mean(SDagricbmn), SDagricbmn2 = mean(SDagricbmn2), SDartifbmn = mean(SDartifbmn))) head(DFSDconifbmn) summary(DFSDconifbmn$conifbmn) summary(DFSDconifbmn$SDconifbmn) ## Obtain predicted probabilities DFSDconifbmn$PredProb <- predict(H16, newdata = DFSDconifbmn, type = "response") DFSDconifbmn ## Plot predicted probabilities ggplot(DFSDconifbmn, aes(x = conifbmn, y = PredProb)) + xlim(c(0, 1)) + ylim(c(0, 1)) + geom_line(aes(colour = conifbmn), size = 5) + labs(x = "Conifer forest", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDnatrdbmn DFSDnatrdbmn <- with(CovariateDFB, data.frame(SDmixedbmn = mean(SDmixedbmn), SDconifbmn = mean(SDconifbmn), natrdbmn, SDnatrdbmn, SDnatrdbmn2 = mean(SDnatrdbmn2), SDcourdbmn = mean(SDcourdbmn), SDcourdbmn2 = mean(SDcourdbmn2), SDcomrdbmn = mean(SDcomrdbmn), SDcomrdbmn2 = mean(SDcomrdbmn2), SDpastbmn = mean(SDpastbmn), SDpastbmn2 = mean(SDpastbmn2), SDagricbmn = mean(SDagricbmn), SDagricbmn2 = mean(SDagricbmn2), SDartifbmn = mean(SDartifbmn))) head(DFSDnatrdbmn) summary(DFSDnatrdbmn$natrdbmn) summary(DFSDnatrdbmn$SDnatrdbmn) ## Obtain predicted probabilities DFSDnatrdbmn$PredProb <- predict(H16, newdata = DFSDnatrdbmn, type = "response") DFSDnatrdbmn ## Plot predicted probabilities ggplot(DFSDnatrdbmn, aes(x = natrdbmn, y = PredProb)) + xlim(c(0, 0.15)) + ylim(c(0, 1)) + geom_line(aes(colour = natrdbmn), size = 5) + labs(x = "National road", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDcourdbmn DFSDcourdbmn <- with(CovariateDFB, data.frame(SDmixedbmn = mean(SDmixedbmn), SDconifbmn = mean(SDconifbmn), SDnatrdbmn = mean(SDnatrdbmn), SDnatrdbmn2 = mean(SDnatrdbmn2), courdbmn, SDcourdbmn, SDcourdbmn2 = mean(SDcourdbmn2), SDcomrdbmn = mean(SDcomrdbmn), SDcomrdbmn2 = mean(SDcomrdbmn2), SDpastbmn = mean(SDpastbmn), SDpastbmn2 = mean(SDpastbmn2), SDagricbmn = mean(SDagricbmn), SDagricbmn2 = mean(SDagricbmn2), SDartifbmn = mean(SDartifbmn))) head(DFSDcourdbmn) summary(DFSDcourdbmn$courdbmn) summary(DFSDcourdbmn$SDcourdbmn) ## Obtain predicted probabilities DFSDcourdbmn$PredProb <- predict(H16, newdata = DFSDcourdbmn, type = "response") DFSDcourdbmn ## Plot predicted probabilities ggplot(DFSDcourdbmn, aes(x = courdbmn, y = PredProb)) + xlim(c(0, 0.35)) + ylim(c(0, 1)) + geom_line(aes(colour = courdbmn), size = 5) + labs(x = "County road", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDcomrdbmn DFSDcomrdbmn <- with(CovariateDFB, data.frame(SDmixedbmn = mean(SDmixedbmn), SDconifbmn = mean(SDconifbmn), SDnatrdbmn = mean(SDnatrdbmn), SDnatrdbmn2 = mean(SDnatrdbmn2), SDcourdbmn = mean(SDcourdbmn), SDcourdbmn2 = mean(SDcourdbmn2), comrdbmn, SDcomrdbmn, SDcomrdbmn2 = mean(SDcomrdbmn2), SDpastbmn = mean(SDpastbmn), SDpastbmn2 = mean(SDpastbmn2), SDagricbmn = mean(SDagricbmn), SDagricbmn2 = mean(SDagricbmn2), SDartifbmn = mean(SDartifbmn))) head(DFSDcomrdbmn) summary(DFSDcomrdbmn$comrdbmn) summary(DFSDcomrdbmn$SDcomrdbmn) ## Obtain predicted probabilities DFSDcomrdbmn$PredProb <- predict(H16, newdata = DFSDcomrdbmn, type = "response") DFSDcomrdbmn ## Plot predicted probabilities ggplot(DFSDcomrdbmn, aes(x = comrdbmn, y = PredProb)) + xlim(c(0, 0.8)) + ylim(c(0, 1)) + geom_line(aes(colour = comrdbmn), size = 5) + labs(x = "Communal road", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDpastbmn DFSDpastbmn <- with(CovariateDFB, data.frame(SDmixedbmn = mean(SDmixedbmn), SDconifbmn = mean(SDconifbmn), SDnatrdbmn = mean(SDnatrdbmn), SDnatrdbmn2 = mean(SDnatrdbmn2), SDcourdbmn = mean(SDcourdbmn), SDcourdbmn2 = mean(SDcourdbmn2), SDcomrdbmn = mean(SDcomrdbmn), SDcomrdbmn2 = mean(SDcomrdbmn2), pastbmn, SDpastbmn, SDpastbmn2 = mean(SDpastbmn2), SDagricbmn = mean(SDagricbmn), SDagricbmn2 = mean(SDagricbmn2), SDartifbmn = mean(SDartifbmn))) head(DFSDpastbmn) summary(DFSDpastbmn$pastbmn) summary(DFSDpastbmn$SDpastbmn) ## Obtain predicted probabilities DFSDpastbmn$PredProb <- predict(H16, newdata = DFSDpastbmn, type = "response") DFSDpastbmn ## Plot predicted probabilities ggplot(DFSDpastbmn, aes(x = pastbmn, y = PredProb)) + xlim(c(0, 0.6)) + ylim(c(0, 1)) + geom_line(aes(colour = pastbmn), size = 5) + labs(x = "Pasture", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDagricbmn DFSDagricbmn <- with(CovariateDFB, data.frame(SDmixedbmn = mean(SDmixedbmn), SDconifbmn = mean(SDconifbmn), SDnatrdbmn = mean(SDnatrdbmn), SDnatrdbmn2 = mean(SDnatrdbmn2), SDcourdbmn = mean(SDcourdbmn), SDcourdbmn2 = mean(SDcourdbmn2), SDcomrdbmn = mean(SDcomrdbmn), SDcomrdbmn2 = mean(SDcomrdbmn2), SDpastbmn = mean(SDpastbmn), SDpastbmn2 = mean(SDpastbmn2), agricbmn, SDagricbmn, SDagricbmn2 = mean(SDagricbmn2), SDartifbmn = mean(SDartifbmn))) head(DFSDagricbmn) summary(DFSDagricbmn$agricbmn) summary(DFSDagricbmn$SDagricbmn) ## Obtain predicted probabilities DFSDagricbmn$PredProb <- predict(H16, newdata = DFSDagricbmn, type = "response") DFSDagricbmn ## Plot predicted probabilities ggplot(DFSDagricbmn, aes(x = agricbmn, y = PredProb)) + xlim(c(0, 1)) + ylim(c(0, 1)) + geom_line(aes(colour = agricbmn), size = 5) + labs(x = "Cultivation", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDartifbmn DFSDartifbmn <- with(CovariateDFB, data.frame(SDmixedbmn = mean(SDmixedbmn), SDconifbmn = mean(SDconifbmn), SDnatrdbmn = mean(SDnatrdbmn), SDnatrdbmn2 = mean(SDnatrdbmn2), SDcourdbmn = mean(SDcourdbmn), SDcourdbmn2 = mean(SDcourdbmn2), SDcomrdbmn = mean(SDcomrdbmn), SDcomrdbmn2 = mean(SDcomrdbmn2), SDpastbmn = mean(SDpastbmn), SDpastbmn2 = mean(SDpastbmn2), SDagricbmn = mean(SDagricbmn), SDagricbmn2 = mean(SDagricbmn2), artifbmn, SDartifbmn)) head(DFSDartifbmn) summary(DFSDartifbmn$artifbmn) summary(DFSDartifbmn$SDartifbmn) ## Obtain predicted probabilities DFSDartifbmn$PredProb <- predict(H16, newdata = DFSDartifbmn, type = "response") DFSDartifbmn ## Plot predicted probabilities ggplot(DFSDartifbmn, aes(x = artifbmn, y = PredProb)) + xlim(c(0, 0.6)) + ylim(c(0, 1)) + geom_line(aes(colour = artifbmn), size = 5) + labs(x = "Artificial", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## K-fold cross-validation ## Huberty's Rule percentTestData <- function(nvar) { # v1.1 (28 Mar 2013) # heuristic ('rule of thumb') of Huberty (1994; in Fielding & Bell 1997) to determine the test/training data ratio for presence-absence models # nvar: number of predictor variables huberty <- 1 / (1 + sqrt(nvar - 1)) return(round(100 * huberty, 1)) } percentTestData(14) # 14 represents the number of parameters in the model ## K-fold library(boot) cv.errorB <- cv.glm(RawBear2, H16, K=5)$delta[1] 1-cv.errorB dim(CovariateDFB) H17 <- glm(presence~SDbrdlfbmn+SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2+SDtribmn+SDtribmn2, family = binomial(logit), data = RawBear2) H17 anova(H17) summary(H17) sqrt(diag(vcovPL(H17, lag = "NW1994"))) H18 <- glm(presence~SDbrdlfbmn+SDconifbmn+SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2+SDtribmn+SDtribmn2, family = binomial(logit), data = RawBear2) H18 anova(H18) summary(H18) sqrt(diag(vcovPL(H18, lag = "NW1994"))) H19 <- glm(presence~SDbrdlfbmn+SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2+SDpastbmn+SDpastbmn2+SDartifbmn+SDtribmn+SDtribmn2, family = binomial(logit), data = RawBear2) H19 anova(H19) summary(H19) sqrt(diag(vcovPL(H19, lag = "NW1994"))) H20 <- glm(presence~SDbrdlfbmn+SDconifbmn+SDnatrdbmn+SDnatrdbmn2+SDcourdbmn+SDcourdbmn2+SDcomrdbmn+SDcomrdbmn2+SDpastbmn+SDpastbmn2+SDartifbmn+SDtribmn+SDtribmn2, family = binomial(logit), data = RawBear2) H20 anova(H20) summary(H20) sqrt(diag(vcovPL(H20, lag = "NW1994"))) ## Full memory clean-up rm(list = ls()) # remove gc() # garbage collection .rs.restartR() ####################################################################################################### ## WOLF getwd() setwd("C:/ProtArea/GWR") ## Merge shapefile and .csv to obtain spatial data object # library(raster) # wolf1 <- shapefile("C:/ProtArea/GWR/wolf_distribution.shp") # wolf2 <- read.csv("C:/ProtArea/GWR/Raw_wolf.csv") # RawWolf <- merge(wolf1, wolf2, by.wolf1="WMU", by.wolf2="WMU") # RawWolf ## Save as shapefile again: # shapefile(RawWolf, "C:/ProtArea/GWR/RawWolfAll.shp") ## Import shapefile library(rgdal) RawWolf <- readOGR(".", "RawWolfAll") summary(RawWolf) ## Map a component of the shapefile, in this case triwmn library(tmap) tm_shape(RawWolf) + tm_polygons(style="quantile", col = "presence") + tm_legend(outside = TRUE, text.size = .4) ## Display missing data RawWolf@data[!complete.cases(RawWolf@data),] ## Function to remove missing data in the SpatialPolygonsDataFrame class object ## "margin" removes rows (1) or columns (2) sp.na.omit <- function(RawWolf, margin=1) { if (!inherits(RawWolf, "SpatialPointsDataFrame") & !inherits(RawWolf, "SpatialPolygonsDataFrame")) stop("MUST BE SpatialPointsDataFrame OR SpatialPolygonsDataFrame CLASS OBJECT") na.index <- unique(as.data.frame(which(is.na(RawWolf@data),arr.ind=TRUE))[,margin]) if(margin == 1) { cat("DELETING ROWS: ", na.index, "\n") return( RawWolf[-na.index,] ) } if(margin == 2) { cat("DELETING COLUMNS: ", na.index, "\n") return( RawWolf[,-na.index] ) } } ## Delete missing values ("NA") and show changes using "dim" RawWolf2 <- sp.na.omit(RawWolf) tm_shape(RawWolf2) + tm_polygons(style="quantile", col = "presence") + tm_legend(outside = TRUE, text.size = .4) dim(RawWolf) dim(RawWolf2) ## Plot deleted polygons in red plot(RawWolf, col="red", pch=20) plot(RawWolf2, col="black", pch=20, add=TRUE) ## Covariate correlation matrix, building a dataframe ("CovariateDF") first library(corrplot) source("C:/ProtArea/GWR/Function_rquery_cormat.r") # source the rquery.cormat function CovariateDFW <- as.data.frame(RawWolf2) head(CovariateDFW) dim(CovariateDFW) CovariateDFW[, c(-(1:16),-22,-(33:34))] # inspect columns to include in correlation matrix rquery.cormat(CovariateDFW[, c(-(1:16),-22,-(33:34))]) ## Not used: Export the data frame to Excel # write.csv(RawWolf,'GWR_Wolf.csv') ## Standardize variables for regression analyses SDagricwmn <- (RawWolf2$agricwmn - mean(RawWolf2$agricwmn))/sd(RawWolf2$agricwmn) SDagricwmn2 <- (RawWolf2$agricwmn2 - mean(RawWolf2$agricwmn2))/sd(RawWolf2$agricwmn2) SDartifwmn <- (RawWolf2$artifwmn - mean(RawWolf2$artifwmn))/sd(RawWolf2$artifwmn) SDnatrdwmn <- (RawWolf2$natrdwmn - mean(RawWolf2$natrdwmn))/sd(RawWolf2$natrdwmn) SDnatrdwmn2 <- (RawWolf2$natrdwmn2 - mean(RawWolf2$natrdwmn2))/sd(RawWolf2$natrdwmn2) SDmixedwmn <- (RawWolf2$mixedwmn - mean(RawWolf2$mixedwmn))/sd(RawWolf2$mixedwmn) SDshrwmn <- (RawWolf2$shrwmn - mean(RawWolf2$shrwmn))/sd(RawWolf2$shrwmn) SDtriwmn <- (RawWolf2$triwmn - mean(RawWolf2$triwmn))/sd(RawWolf2$triwmn) SDtriwmn2 <- (RawWolf2$triwmn2 - mean(RawWolf2$triwmn2))/sd(RawWolf2$triwmn2) SDconifwmn <- (RawWolf2$conifwmn - mean(RawWolf2$conifwmn))/sd(RawWolf2$conifwmn) SDshrwmn <- (RawWolf2$shrwmn - mean(RawWolf2$shrwmn))/sd(RawWolf2$shrwmn) SDcomrdwmn <- (RawWolf2$comrdwmn - mean(RawWolf2$comrdwmn))/sd(RawWolf2$comrdwmn) SDcomrdwmn2 <- (RawWolf2$comrdwmn2 - mean(RawWolf2$comrdwmn2))/sd(RawWolf2$comrdwmn2) SDcourdwmn <- (RawWolf2$courdwmn - mean(RawWolf2$courdwmn))/sd(RawWolf2$courdwmn) SDcourdwmn2 <- (RawWolf2$courdwmn2 - mean(RawWolf2$courdwmn2))/sd(RawWolf2$courdwmn2) SDbrdlfwmn <- (RawWolf2$brdlfwmn - mean(RawWolf2$brdlfwmn))/sd(RawWolf2$brdlfwmn) SDpastwmn <- (RawWolf2$pastwmn - mean(RawWolf2$pastwmn))/sd(RawWolf2$pastwmn) SDpastwmn2 <- (RawWolf2$pastwmn2 - mean(RawWolf2$pastwmn2))/sd(RawWolf2$pastwmn2) ## Attach standardized variables to data frame RawWolf2$SDagricwmn <- SDagricwmn RawWolf2$SDagricwmn2 <- SDagricwmn2 RawWolf2$SDartifwmn <- SDartifwmn RawWolf2$SDnatrdwmn <- SDnatrdwmn RawWolf2$SDnatrdwmn2 <- SDnatrdwmn2 RawWolf2$SDmixedwmn <- SDmixedwmn RawWolf2$SDshrwmn <- SDshrwmn RawWolf2$SDtriwmn <- SDtriwmn RawWolf2$SDtriwmn2 <- SDtriwmn2 RawWolf2$SDconifwmn <- SDconifwmn RawWolf2$SDshrwmn <- SDshrwmn RawWolf2$SDcomrdwmn <- SDcomrdwmn RawWolf2$SDcomrdwmn2 <- SDcomrdwmn2 RawWolf2$SDcourdwmn <- SDcourdwmn RawWolf2$SDcourdwmn2 <- SDcourdwmn2 RawWolf2$SDbrdlfwmn <- SDbrdlfwmn RawWolf2$SDpastwmn <- SDpastwmn RawWolf2$SDpastwmn2 <- SDpastwmn2 names(RawWolf2) ## Driscoll and Kraay (1998) robust standard errors for cross-sectional autocorrelation ## Robust standard error calculations applied after logistic regression ## lag length set to: floor(4 * (T / 100)^(2/9)) ## rule of thumb proposed by Hoechle (2007) based on Newey & West (1994) library(sandwich) H0 <- glm(presence~1, family = binomial(logit), data = RawWolf2) H0 anova(H0) summary(H0) sqrt(diag(vcovPL(H0, lag = "NW1994"))) H1 <- glm(presence~SDtriwmn+SDtriwmn2, family = binomial(logit), data = RawWolf2) H1 anova(H1) summary(H1) sqrt(diag(vcovPL(H1, lag = "NW1994"))) H2 <- glm(presence~SDbrdlfwmn+SDmixedwmn+SDconifwmn, family = binomial(logit), data = RawWolf2) H2 anova(H2) summary(H2) sqrt(diag(vcovPL(H2, lag = "NW1994"))) H3 <- glm(presence~SDbrdlfwmn+SDmixedwmn+SDconifwmn+SDshrwmn, family = binomial(logit), data = RawWolf2) H3 anova(H3) summary(H3) sqrt(diag(vcovPL(H3, lag = "NW1994"))) H4 <- glm(presence~SDbrdlfwmn+SDconifwmn+SDtriwmn+SDtriwmn2, family = binomial(logit), data = RawWolf2) H4 anova(H4) summary(H4) sqrt(diag(vcovPL(H4, lag = "NW1994"))) H5 <- glm(presence~SDpastwmn+SDpastwmn2+SDagricwmn+SDagricwmn2+SDartifwmn, family = binomial(logit), data = RawWolf2) H5 anova(H5) summary(H5) sqrt(diag(vcovPL(H5, lag = "NW1994"))) H6 <- glm(presence~SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2, family = binomial(logit), data = RawWolf2) H6 anova(H6) summary(H6) sqrt(diag(vcovPL(H6, lag = "NW1994"))) H7 <- glm(presence~SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2+SDartifwmn, family = binomial(logit), data = RawWolf2) H7 anova(H7) summary(H7) sqrt(diag(vcovPL(H7, lag = "NW1994"))) H8 <- glm(presence~SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2+SDpastwmn+SDpastwmn2+SDagricwmn+SDagricwmn2+SDartifwmn, family = binomial(logit), data = RawWolf2) H8 anova(H8) summary(H8) sqrt(diag(vcovPL(H8, lag = "NW1994"))) H9 <- glm(presence~SDartifwmn+SDtriwmn+SDtriwmn2, family = binomial(logit), data = RawWolf2) H9 anova(H9) summary(H9) sqrt(diag(vcovPL(H9, lag = "NW1994"))) H10 <- glm(presence~SDpastwmn+SDpastwmn2+SDartifwmn+SDtriwmn+SDtriwmn2, family = binomial(logit), data = RawWolf2) H10 anova(H10) summary(H10) sqrt(diag(vcovPL(H10, lag = "NW1994"))) H11 <- glm(presence~SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2+SDtriwmn+SDtriwmn2, family = binomial(logit), data = RawWolf2) H11 anova(H11) summary(H11) sqrt(diag(vcovPL(H11, lag = "NW1994"))) H12 <- glm(presence~SDpastwmn+SDpastwmn2+SDartifwmn+SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2+SDtriwmn+SDtriwmn2, family = binomial(logit), data = RawWolf2) H12 anova(H12) summary(H12) sqrt(diag(vcovPL(H12, lag = "NW1994"))) H13 <- glm(presence~SDbrdlfwmn+SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2, family = binomial(logit), data = RawWolf2) H13 anova(H13) summary(H13) sqrt(diag(vcovPL(H13, lag = "NW1994"))) H14 <- glm(presence~SDbrdlfwmn+SDmixedwmn+SDconifwmn+SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2, family = binomial(logit), data = RawWolf2) H14 anova(H14) summary(H14) sqrt(diag(vcovPL(H14, lag = "NW1994"))) H15 <- glm(presence~SDbrdlfwmn+SDmixedwmn+SDconifwmn+SDshrwmn+SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2, family = binomial(logit), data = RawWolf2) H15 anova(H15) summary(H15) sqrt(diag(vcovPL(H15, lag = "NW1994"))) H16 <- glm(presence~SDmixedwmn+SDconifwmn+SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2+SDpastwmn+SDpastwmn2+SDagricwmn+SDagricwmn2+SDartifwmn, family = binomial(logit), data = RawWolf2) H16 anova(H16) summary(H16) sqrt(diag(vcovPL(H16, lag = "NW1994"))) H17 <- glm(presence~SDbrdlfwmn+SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2+SDtriwmn+SDtriwmn2, family = binomial(logit), data = RawWolf2) H17 anova(H17) summary(H17) sqrt(diag(vcovPL(H17, lag = "NW1994"))) H18 <- glm(presence~SDbrdlfwmn+SDconifwmn+SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2+SDtriwmn+SDtriwmn2, family = binomial(logit), data = RawWolf2) H18 anova(H18) summary(H18) sqrt(diag(vcovPL(H18, lag = "NW1994"))) H19 <- glm(presence~SDbrdlfwmn+SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2+SDpastwmn+SDpastwmn2+SDartifwmn+SDtriwmn+SDtriwmn2, family = binomial(logit), data = RawWolf2) H19 anova(H19) summary(H19) sqrt(diag(vcovPL(H19, lag = "NW1994"))) H20 <- glm(presence~SDbrdlfwmn+SDconifwmn+SDnatrdwmn+SDnatrdwmn2+SDcourdwmn+SDcourdwmn2+SDcomrdwmn+SDcomrdwmn2+SDpastwmn+SDpastwmn2+SDartifwmn+SDtriwmn+SDtriwmn2, family = binomial(logit), data = RawWolf2) H20 anova(H20) summary(H20) sqrt(diag(vcovPL(H20, lag = "NW1994"))) ## H20 is the top wolf model based on AIC ranking. # Obtain odds ratios library(broom) library(magrittr) library(dplyr) modelH20.df <- tidy(H20) print.data.frame(modelH20.df) modelH20.df %>% mutate(or = exp(estimate), # Odds ratio/gradient options(pillar.sigfig=7)) # Increase number of decimals visible ## Check multicollinearity between independent variables library(car) vif(H20) ## H20 is the top wolf model based on AIC ranking. Check residual plots library(boot) glm.diag.plots(H20) ## Attach deviance residuals and fitted values (probabilities) to the SpatialPolygonsDataFrame class object dim(RawWolf2) head(RawWolf2) RawWolf2$DevRes <- resid(H20, type = c("deviance")) RawWolf2$RelProb <- fitted(H20, fitted = fitted(H20)) dim(RawWolf2) head(RawWolf2) summary(RawWolf2$RelProb) ## Convert fitted values to presence/absence based on cutoff 0.5 RawWolf2$PrPres <- ifelse(RawWolf2$RelProb > 0.5, 1, 0) head(RawWolf2) ## Export as shapefile writeOGR(obj=RawWolf2, dsn="C:/ProtArea/GWR", layer = "Wolf_Final", driver="ESRI Shapefile") ## Generate predicted probabilities for the top wolf model (H20) library(ggplot2) ## SDtriwmn DFSDtriwmn <- with(CovariateDFW, data.frame(triwmn, SDtriwmn, SDtriwmn2 = mean(SDtriwmn2), SDbrdlfwmn = mean(SDbrdlfwmn), SDconifwmn = mean(SDconifwmn), SDnatrdwmn = mean(SDnatrdwmn), SDnatrdwmn2 = mean(SDnatrdwmn2), SDcourdwmn = mean(SDcourdwmn), SDcourdwmn2 = mean(SDcourdwmn2), SDcomrdwmn = mean(SDcomrdwmn), SDcomrdwmn2 = mean(SDcomrdwmn2), SDpastwmn = mean(SDpastwmn), SDpastwmn2 = mean(SDpastwmn2), SDartifwmn = mean(SDartifwmn))) head(DFSDtriwmn) summary(DFSDtriwmn$triwmn) summary(DFSDtriwmn$SDtriwmn) ## Obtain predicted probabilities DFSDtriwmn$PredProb <- predict(H20, newdata = DFSDtriwmn, type = "response") DFSDtriwmn ## Plot predicted probabilities ggplot(DFSDtriwmn, aes(x = triwmn, y = PredProb)) + xlim(c(0, 100)) + ylim(c(0, 1)) + geom_line(aes(colour = triwmn), size = 5) + labs(x = "Ruggedness", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDbrdlfwmn ## Generate a dataframe with all variables except the variable of interest kept at mean level DFSDbrdlfwmn <- with(CovariateDFW, data.frame(SDtriwmn = mean(SDtriwmn), SDtriwmn2 = mean(SDtriwmn2), brdlfwmn, SDbrdlfwmn, SDconifwmn = mean(SDconifwmn), SDnatrdwmn = mean(SDnatrdwmn), SDnatrdwmn2 = mean(SDnatrdwmn2), SDcourdwmn = mean(SDcourdwmn), SDcourdwmn2 = mean(SDcourdwmn2), SDcomrdwmn = mean(SDcomrdwmn), SDcomrdwmn2 = mean(SDcomrdwmn2), SDpastwmn = mean(SDpastwmn), SDpastwmn2 = mean(SDpastwmn2), SDartifwmn = mean(SDartifwmn))) head(DFSDbrdlfwmn) summary(DFSDbrdlfwmn$brdlfwmn) summary(DFSDbrdlfwmn$SDbrdlfwmn) ## Obtain predicted probabilities DFSDbrdlfwmn$PredProb <- predict(H20, newdata = DFSDbrdlfwmn, type = "response") DFSDbrdlfwmn ## Plot predicted probabilities ggplot(DFSDbrdlfwmn, aes(x = brdlfwmn, y = PredProb)) + xlim(c(0, 1)) + ylim(c(0, 1)) + geom_line(aes(colour = brdlfwmn), size = 5) + labs(x = "Broadleaf forest", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDconifwmn DFSDconifwmn <- with(CovariateDFW, data.frame(SDtriwmn = mean(SDtriwmn), SDtriwmn2 = mean(SDtriwmn2), SDbrdlfwmn = mean(SDbrdlfwmn), conifwmn, SDconifwmn, SDnatrdwmn = mean(SDnatrdwmn), SDnatrdwmn2 = mean(SDnatrdwmn2), SDcourdwmn = mean(SDcourdwmn), SDcourdwmn2 = mean(SDcourdwmn2), SDcomrdwmn = mean(SDcomrdwmn), SDcomrdwmn2 = mean(SDcomrdwmn2), SDpastwmn = mean(SDpastwmn), SDpastwmn2 = mean(SDpastwmn2), SDartifwmn = mean(SDartifwmn))) head(DFSDconifwmn) summary(DFSDconifwmn$conifwmn) summary(DFSDconifwmn$SDconifwmn) ## Obtain predicted probabilities DFSDconifwmn$PredProb <- predict(H20, newdata = DFSDconifwmn, type = "response") DFSDconifwmn ## Plot predicted probabilities ggplot(DFSDconifwmn, aes(x = conifwmn, y = PredProb)) + xlim(c(0, 1)) + ylim(c(0, 1)) + geom_line(aes(colour = conifwmn), size = 5) + labs(x = "Conifer forest", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDnatrdwmn DFSDnatrdwmn <- with(CovariateDFW, data.frame(SDtriwmn = mean(SDtriwmn), SDtriwmn2 = mean(SDtriwmn2), SDbrdlfwmn = mean(SDbrdlfwmn), SDconifwmn = mean(SDconifwmn), natrdwmn, SDnatrdwmn, SDnatrdwmn2 = mean(SDnatrdwmn2), SDcourdwmn = mean(SDcourdwmn), SDcourdwmn2 = mean(SDcourdwmn2), SDcomrdwmn = mean(SDcomrdwmn), SDcomrdwmn2 = mean(SDcomrdwmn2), SDpastwmn = mean(SDpastwmn), SDpastwmn2 = mean(SDpastwmn2), SDartifwmn = mean(SDartifwmn))) head(DFSDnatrdwmn) summary(DFSDnatrdwmn$natrwbmn) summary(DFSDnatrdwmn$SDnatrdwmn) ## Obtain predicted probabilities DFSDnatrdwmn$PredProb <- predict(H20, newdata = DFSDnatrdwmn, type = "response") DFSDnatrdwmn ## Plot predicted probabilities ggplot(DFSDnatrdwmn, aes(x = natrdwmn, y = PredProb)) + xlim(c(0, 0.15)) + ylim(c(0, 1)) + geom_line(aes(colour = natrdwmn), size = 5) + labs(x = "National road", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDcourdwmn DFSDcourdwmn <- with(CovariateDFW, data.frame(SDtriwmn = mean(SDtriwmn), SDtriwmn2 = mean(SDtriwmn2), SDbrdlfwmn = mean(SDbrdlfwmn), SDconifwmn = mean(SDconifwmn), SDnatrdwmn = mean(SDnatrdwmn), SDnatrdwmn2 = mean(SDnatrdwmn2), courdwmn, SDcourdwmn, SDcourdwmn2 = mean(SDcourdwmn2), SDcomrdwmn = mean(SDcomrdwmn), SDcomrdwmn2 = mean(SDcomrdwmn2), SDpastwmn = mean(SDpastwmn), SDpastwmn2 = mean(SDpastwmn2), SDartifwmn = mean(SDartifwmn))) head(DFSDcourdwmn) summary(DFSDcourdwmn$courdwmn) summary(DFSDcourdwmn$SDcourdwmn) ## Obtain predicted probabilities DFSDcourdwmn$PredProb <- predict(H20, newdata = DFSDcourdwmn, type = "response") DFSDcourdwmn ## Plot predicted probabilities ggplot(DFSDcourdwmn, aes(x = courdwmn, y = PredProb)) + xlim(c(0, 0.35)) + ylim(c(0, 1)) + geom_line(aes(colour = courdwmn), size = 5) + labs(x = "County road", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDcomrdwmn DFSDcomrdwmn <- with(CovariateDFW, data.frame(SDtriwmn = mean(SDtriwmn), SDtriwmn2 = mean(SDtriwmn2), SDbrdlfwmn = mean(SDbrdlfwmn), SDconifwmn = mean(SDconifwmn), SDnatrdwmn = mean(SDnatrdwmn), SDnatrdwmn2 = mean(SDnatrdwmn2), SDcourdwmn = mean(SDcourdwmn), SDcourdwmn2 = mean(SDcourdwmn2), comrdwmn, SDcomrdwmn, SDcomrdwmn2 = mean(SDcomrdwmn2), SDpastwmn = mean(SDpastwmn), SDpastwmn2 = mean(SDpastwmn2), SDartifwmn = mean(SDartifwmn))) head(DFSDcomrdwmn) summary(DFSDcomrdwmn$comrdwmn) summary(DFSDcomrdwmn$SDcomrdwmn) ## Obtain predicted probabilities DFSDcomrdwmn$PredProb <- predict(H20, newdata = DFSDcomrdwmn, type = "response") DFSDcomrdwmn ## Plot predicted probabilities ggplot(DFSDcomrdwmn, aes(x = comrdwmn, y = PredProb)) + xlim(c(0, 0.8)) + ylim(c(0, 1)) + geom_line(aes(colour = comrdwmn), size = 5) + labs(x = "Communal road", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDpastwmn DFSDpastwmn <- with(CovariateDFW, data.frame(SDtriwmn = mean(SDtriwmn), SDtriwmn2 = mean(SDtriwmn2), SDbrdlfwmn = mean(SDbrdlfwmn), SDconifwmn = mean(SDconifwmn), SDnatrdwmn = mean(SDnatrdwmn), SDnatrdwmn2 = mean(SDnatrdwmn2), SDcourdwmn = mean(SDcourdwmn), SDcourdwmn2 = mean(SDcourdwmn2), SDcomrdwmn = mean(SDcomrdwmn), SDcomrdwmn2 = mean(SDcomrdwmn2), pastwmn, SDpastwmn, SDpastwmn2 = mean(SDpastwmn2), SDartifwmn = mean(SDartifwmn))) head(DFSDpastwmn) summary(DFSDpastwmn$pastwmn) summary(DFSDpastwmn$SDpastwmn) ## Obtain predicted probabilities DFSDpastwmn$PredProb <- predict(H20, newdata = DFSDpastwmn, type = "response") DFSDpastwmn ## Plot predicted probabilities ggplot(DFSDpastwmn, aes(x = pastwmn, y = PredProb)) + xlim(c(0, 0.6)) + ylim(c(0, 1)) + geom_line(aes(colour = pastwmn), size = 5) + labs(x = "Pasture", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDartifwmn DFSDartifwmn <- with(CovariateDFW, data.frame(SDtriwmn = mean(SDtriwmn), SDtriwmn2 = mean(SDtriwmn2), SDbrdlfwmn = mean(SDbrdlfwmn), SDconifwmn = mean(SDconifwmn), SDnatrdwmn = mean(SDnatrdwmn), SDnatrdwmn2 = mean(SDnatrdwmn2), SDcourdwmn = mean(SDcourdwmn), SDcourdwmn2 = mean(SDcourdwmn2), SDcomrdwmn = mean(SDcomrdwmn), SDcomrdwmn2 = mean(SDcomrdwmn2), SDpastwmn = mean(SDpastwmn), SDpastwmn2 = mean(SDpastwmn2), artifwmn, SDartifwmn)) head(DFSDartifwmn) summary(DFSDartifwmn$artifwmn) summary(DFSDartifwmn$SDartifwmn) ## Obtain predicted probabilities DFSDartifwmn$PredProb <- predict(H20, newdata = DFSDartifwmn, type = "response") DFSDartifwmn ## Plot predicted probabilities ggplot(DFSDartifwmn, aes(x = artifwmn, y = PredProb)) + xlim(c(0, 0.6)) + ylim(c(0, 1)) + geom_line(aes(colour = artifwmn), size = 5) + labs(x = "Artificial", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## K-fold cross-validation ## Huberty's Rule percentTestData <- function(nvar) { # v1.1 (28 Mar 2013) # heuristic ('rule of thumb') of Huberty (1994; in Fielding & Bell 1997) to determine the test/training data ratio for presence-absence models # nvar: number of predictor variables huberty <- 1 / (1 + sqrt(nvar - 1)) return(round(100 * huberty, 1)) } percentTestData(14) # 14 represents the number of parameters in the model ## K-fold library(boot) cv.errorW <- cv.glm(RawWolf2, H20, K=5)$delta[1] 1-cv.errorW dim(CovariateDFW) ## Full memory clean-up rm(list = ls()) # remove gc() # garbage collection .rs.restartR() ####################################################################################################### ## LYNX getwd() setwd("C:/ProtArea/GWR") ## Merge shapefile and .csv to obtain spatial data object # library(raster) # lynx1 <- shapefile("C:/ProtArea/GWR/lynx_distribution.shp") # lynx2 <- read.csv("C:/ProtArea/GWR/Raw_lynx.csv") # RawLynx <- merge(lynx1, lynx2, by.lynx1="WMU", by.lynx2="WMU") # RawLynx ## Save as shapefile again: # shapefile(RawLynx, "C:/ProtArea/GWR/RawLynxAll.shp") ## Import shapefile library(rgdal) RawLynx <- readOGR(".", "RawLynxAll") summary(RawLynx) ## Map a component of the shapefile, in this case trilmn library(tmap) tm_shape(RawLynx) + tm_polygons(style="quantile", col = "presence") + tm_legend(outside = TRUE, text.size = .4) ## Display missing data RawLynx@data[!complete.cases(RawLynx@data),] ## Function to remove missing data in the SpatialPolygonsDataFrame class object ## "margin" removes rows (1) or columns (2) sp.na.omit <- function(RawLynx, margin=1) { if (!inherits(RawLynx, "SpatialPointsDataFrame") & !inherits(RawLynx, "SpatialPolygonsDataFrame")) stop("MUST BE SpatialPointsDataFrame OR SpatialPolygonsDataFrame CLASS OBJECT") na.index <- unique(as.data.frame(which(is.na(RawLynx@data),arr.ind=TRUE))[,margin]) if(margin == 1) { cat("DELETING ROWS: ", na.index, "\n") return( RawLynx[-na.index,] ) } if(margin == 2) { cat("DELETING COLUMNS: ", na.index, "\n") return( RawLynx[,-na.index] ) } } ## Delete missing values ("NA") and show changes using "dim" RawLynx2 <- sp.na.omit(RawLynx) dim(RawLynx) dim(RawLynx2) ## Plot deleted polygons in red plot(RawLynx, col="red", pch=20) plot(RawLynx2, col="black", pch=20, add=TRUE) ## Covariate correlation matrix, building a dataframe ("CovariateDF") first library(corrplot) source("C:/ProtArea/GWR/Function_rquery_cormat.r") # source the rquery.cormat function CovariateDFL <- as.data.frame(RawLynx2) head(CovariateDFL) dim(CovariateDFL) CovariateDFL[, c(-(1:16),-22,-(33:34))] # inspect columns to include in correlation matrix rquery.cormat(CovariateDFL[, c(-(1:16),-22,-(33:34))]) ## Not used: Export the data frame to Excel # write.csv(RawLynx,'GWR_Lynx.csv') ## Standardize variables for regression analyses SDagriclmn <- (RawLynx2$agriclmn - mean(RawLynx2$agriclmn))/sd(RawLynx2$agriclmn) SDagriclmn2 <- (RawLynx2$agriclmn2 - mean(RawLynx2$agriclmn2))/sd(RawLynx2$agriclmn2) SDartiflmn <- (RawLynx2$artiflmn - mean(RawLynx2$artiflmn))/sd(RawLynx2$artiflmn) SDnatrdlmn <- (RawLynx2$natrdlmn - mean(RawLynx2$natrdlmn))/sd(RawLynx2$natrdlmn) SDnatrdlmn2 <- (RawLynx2$natrdlmn2 - mean(RawLynx2$natrdlmn2))/sd(RawLynx2$natrdlmn2) SDmixedlmn <- (RawLynx2$mixedlmn - mean(RawLynx2$mixedlmn))/sd(RawLynx2$mixedlmn) SDshrlmn <- (RawLynx2$shrlmn - mean(RawLynx2$shrlmn))/sd(RawLynx2$shrlmn) SDtrilmn <- (RawLynx2$trilmn - mean(RawLynx2$trilmn))/sd(RawLynx2$trilmn) SDtrilmn2 <- (RawLynx2$trilmn2 - mean(RawLynx2$trilmn2))/sd(RawLynx2$trilmn2) SDconiflmn <- (RawLynx2$coniflmn - mean(RawLynx2$coniflmn))/sd(RawLynx2$coniflmn) SDshrlmn <- (RawLynx2$shrlmn - mean(RawLynx2$shrlmn))/sd(RawLynx2$shrlmn) SDcomrdlmn <- (RawLynx2$comrdlmn - mean(RawLynx2$comrdlmn))/sd(RawLynx2$comrdlmn) SDcomrdlmn2 <- (RawLynx2$comrdlmn2 - mean(RawLynx2$comrdlmn2))/sd(RawLynx2$comrdlmn2) SDcourdlmn <- (RawLynx2$courdlmn - mean(RawLynx2$courdlmn))/sd(RawLynx2$courdlmn) SDcourdlmn2 <- (RawLynx2$courdlmn2 - mean(RawLynx2$courdlmn2))/sd(RawLynx2$courdlmn2) SDbrdlflmn <- (RawLynx2$brdlflmn - mean(RawLynx2$brdlflmn))/sd(RawLynx2$brdlflmn) SDpastlmn <- (RawLynx2$pastlmn - mean(RawLynx2$pastlmn))/sd(RawLynx2$pastlmn) SDpastlmn2 <- (RawLynx2$pastlmn2 - mean(RawLynx2$pastlmn2))/sd(RawLynx2$pastlmn2) ## Attach standardized variables to data frame RawLynx2$SDagriclmn <- SDagriclmn RawLynx2$SDagriclmn2 <- SDagriclmn2 RawLynx2$SDartiflmn <- SDartiflmn RawLynx2$SDnatrdlmn <- SDnatrdlmn RawLynx2$SDnatrdlmn2 <- SDnatrdlmn2 RawLynx2$SDmixedlmn <- SDmixedlmn RawLynx2$SDshrlmn <- SDshrlmn RawLynx2$SDtrilmn <- SDtrilmn RawLynx2$SDtrilmn2 <- SDtrilmn2 RawLynx2$SDconiflmn <- SDconiflmn RawLynx2$SDshrlmn <- SDshrlmn RawLynx2$SDcomrdlmn <- SDcomrdlmn RawLynx2$SDcomrdlmn2 <- SDcomrdlmn2 RawLynx2$SDcourdlmn <- SDcourdlmn RawLynx2$SDcourdlmn2 <- SDcourdlmn2 RawLynx2$SDbrdlflmn <- SDbrdlflmn RawLynx2$SDpastlmn <- SDpastlmn RawLynx2$SDpastlmn2 <- SDpastlmn2 names(RawLynx2) ## Driscoll and Kraay (1998) robust standard errors for cross-sectional autocorrelation ## Robust standard error calculations applied after logistic regression ## lag length set to: floor(4 * (T / 100)^(2/9)) ## rule of thumb proposed by Hoechle (2007) based on Newey & West (1994) library(sandwich) H0 <- glm(presence~1, family = binomial(logit), data = RawLynx2) H0 anova(H0) summary(H0) sqrt(diag(vcovPL(H0, lag = "NW1994"))) H1 <- glm(presence~SDtrilmn+SDtrilmn2, family = binomial(logit), data = RawLynx2) H1 anova(H1) summary(H1) sqrt(diag(vcovPL(H1, lag = "NW1994"))) H2 <- glm(presence~SDbrdlflmn+SDmixedlmn+SDconiflmn, family = binomial(logit), data = RawLynx2) H2 anova(H2) summary(H2) sqrt(diag(vcovPL(H2, lag = "NW1994"))) H3 <- glm(presence~SDbrdlflmn+SDmixedlmn+SDconiflmn+SDshrlmn, family = binomial(logit), data = RawLynx2) H3 anova(H3) summary(H3) sqrt(diag(vcovPL(H3, lag = "NW1994"))) H4 <- glm(presence~SDbrdlflmn+SDconiflmn+SDtrilmn+SDtrilmn2, family = binomial(logit), data = RawLynx2) H4 anova(H4) summary(H4) sqrt(diag(vcovPL(H4, lag = "NW1994"))) H5 <- glm(presence~SDpastlmn+SDpastlmn2+SDagriclmn+SDagriclmn2+SDartiflmn, family = binomial(logit), data = RawLynx2) H5 anova(H5) summary(H5) sqrt(diag(vcovPL(H5, lag = "NW1994"))) H6 <- glm(presence~SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2, family = binomial(logit), data = RawLynx2) H6 anova(H6) summary(H6) sqrt(diag(vcovPL(H6, lag = "NW1994"))) H7 <- glm(presence~SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2+SDartiflmn, family = binomial(logit), data = RawLynx2) H7 anova(H7) summary(H7) sqrt(diag(vcovPL(H7, lag = "NW1994"))) H8 <- glm(presence~SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2+SDpastlmn+SDpastlmn2+SDagriclmn+SDagriclmn2+SDartiflmn, family = binomial(logit), data = RawLynx2) H8 anova(H8) summary(H8) sqrt(diag(vcovPL(H8, lag = "NW1994"))) H9 <- glm(presence~SDartiflmn+SDtrilmn+SDtrilmn2, family = binomial(logit), data = RawLynx2) H9 anova(H9) summary(H9) sqrt(diag(vcovPL(H9, lag = "NW1994"))) H10 <- glm(presence~SDpastlmn+SDpastlmn2+SDartiflmn+SDtrilmn+SDtrilmn2, family = binomial(logit), data = RawLynx2) H10 anova(H10) summary(H10) sqrt(diag(vcovPL(H10, lag = "NW1994"))) H11 <- glm(presence~SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2+SDtrilmn+SDtrilmn2, family = binomial(logit), data = RawLynx2) H11 anova(H11) summary(H11) sqrt(diag(vcovPL(H11, lag = "NW1994"))) H12 <- glm(presence~SDpastlmn+SDpastlmn2+SDartiflmn+SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2+SDtrilmn+SDtrilmn2, family = binomial(logit), data = RawLynx2) H12 anova(H12) summary(H12) sqrt(diag(vcovPL(H12, lag = "NW1994"))) H13 <- glm(presence~SDbrdlflmn+SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2, family = binomial(logit), data = RawLynx2) H13 anova(H13) summary(H13) sqrt(diag(vcovPL(H13, lag = "NW1994"))) H14 <- glm(presence~SDbrdlflmn+SDmixedlmn+SDconiflmn+SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2, family = binomial(logit), data = RawLynx2) H14 anova(H14) summary(H14) sqrt(diag(vcovPL(H14, lag = "NW1994"))) H15 <- glm(presence~SDbrdlflmn+SDmixedlmn+SDconiflmn+SDshrlmn+SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2, family = binomial(logit), data = RawLynx2) H15 anova(H15) summary(H15) sqrt(diag(vcovPL(H15, lag = "NW1994"))) H16 <- glm(presence~SDmixedlmn+SDconiflmn+SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2+SDpastlmn+SDpastlmn2+SDagriclmn+SDagriclmn2+SDartiflmn, family = binomial(logit), data = RawLynx2) H16 anova(H16) summary(H16) sqrt(diag(vcovPL(H16, lag = "NW1994"))) ## H16 is the top lynx model based on AIC ranking. # Obtain odds ratios library(broom) library(magrittr) library(dplyr) modelH16.df <- tidy(H16) print.data.frame(modelH16.df) modelH16.df %>% mutate(or = exp(estimate), # Odds ratio/gradient options(pillar.sigfig=7)) # Increase number of decimals visible ## Check multicollinearity between independent variables library(car) vif(H16) ## H16 is the top lynx model based on AIC ranking. Check residual plots library(boot) glm.diag.plots(H16) ## Attach deviance residuals and fitted values (probabilities) to the SpatialPolygonsDataFrame class object dim(RawLynx2) head(RawLynx2) RawLynx2$DevRes <- resid(H16, type = c("deviance")) RawLynx2$RelProb <- fitted(H16, fitted = fitted(H16)) dim(RawLynx2) head(RawLynx2) ## Convert fitted values to presence/absence based on cutoff 0.5 RawLynx2$PrPres <- ifelse(RawLynx2$RelProb > 0.5, 1, 0) head(RawLynx2) ## Export as shapefile writeOGR(obj=RawLynx2, dsn="C:/ProtArea/GWR", layer = "Lynx_Final", driver="ESRI Shapefile") ## Generate predicted probabilities for the top lynx model (H16) library(ggplot2) ## SDmixedlmn ## Generate a dataframe with all variables except the variable of interest kept at mean level DFSDmixedlmn <- with(CovariateDFL, data.frame(mixedlmn, SDmixedlmn, SDconiflmn = mean(SDconiflmn), SDnatrdlmn = mean(SDnatrdlmn), SDnatrdlmn2 = mean(SDnatrdlmn2), SDcourdlmn = mean(SDcourdlmn), SDcourdlmn2 = mean(SDcourdlmn2), SDcomrdlmn = mean(SDcomrdlmn), SDcomrdlmn2 = mean(SDcomrdlmn2), SDpastlmn = mean(SDpastlmn), SDpastlmn2 = mean(SDpastlmn2), SDagriclmn = mean(SDagriclmn), SDagriclmn2 = mean(SDagriclmn2), SDartiflmn = mean(SDartiflmn))) head(DFSDmixedlmn) summary(DFSDmixedlmn$mixedlmn) summary(DFSDmixedlmn$SDmixedlmn) ## Obtain predicted probabilities DFSDmixedlmn$PredProb <- predict(H16, newdata = DFSDmixedlmn, type = "response") DFSDmixedlmn ## Plot predicted probabilities ggplot(DFSDmixedlmn, aes(x = mixedlmn, y = PredProb)) + xlim(c(0, 1)) + ylim(c(0, 1)) + geom_line(aes(colour = mixedlmn), size = 5) + labs(x = "Mixed forest", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDconiflmn DFSDconiflmn <- with(CovariateDFL, data.frame(SDmixedlmn = mean(SDmixedlmn), coniflmn, SDconiflmn, SDnatrdlmn = mean(SDnatrdlmn), SDnatrdlmn2 = mean(SDnatrdlmn2), SDcourdlmn = mean(SDcourdlmn), SDcourdlmn2 = mean(SDcourdlmn2), SDcomrdlmn = mean(SDcomrdlmn), SDcomrdlmn2 = mean(SDcomrdlmn2), SDpastlmn = mean(SDpastlmn), SDpastlmn2 = mean(SDpastlmn2), SDagriclmn = mean(SDagriclmn), SDagriclmn2 = mean(SDagriclmn2), SDartiflmn = mean(SDartiflmn))) head(DFSDconiflmn) summary(DFSDconiflmn$coniflmn) summary(DFSDconiflmn$SDconiflmn) ## Obtain predicted probabilities DFSDconiflmn$PredProb <- predict(H16, newdata = DFSDconiflmn, type = "response") DFSDconiflmn ## Plot predicted probabilities ggplot(DFSDconiflmn, aes(x = coniflmn, y = PredProb)) + xlim(c(0, 1)) + ylim(c(0, 1)) + geom_line(aes(colour = coniflmn), size = 5) + labs(x = "Conifer forest", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDnatrdlmn DFSDnatrdlmn <- with(CovariateDFL, data.frame(SDmixedlmn = mean(SDmixedlmn), SDconiflmn = mean(SDconiflmn), natrdlmn, SDnatrdlmn, SDnatrdlmn2 = mean(SDnatrdlmn2), SDcourdlmn = mean(SDcourdlmn), SDcourdlmn2 = mean(SDcourdlmn2), SDcomrdlmn = mean(SDcomrdlmn), SDcomrdlmn2 = mean(SDcomrdlmn2), SDpastlmn = mean(SDpastlmn), SDpastlmn2 = mean(SDpastlmn2), SDagriclmn = mean(SDagriclmn), SDagriclmn2 = mean(SDagriclmn2), SDartiflmn = mean(SDartiflmn))) head(DFSDnatrdlmn) summary(DFSDnatrdlmn$natrdlmn) summary(DFSDnatrdlmn$SDnatrdlmn) ## Obtain predicted probabilities DFSDnatrdlmn$PredProb <- predict(H16, newdata = DFSDnatrdlmn, type = "response") DFSDnatrdlmn ## Plot predicted probabilities ggplot(DFSDnatrdlmn, aes(x = natrdlmn, y = PredProb)) + xlim(c(0, 0.15)) + ylim(c(0, 1)) + geom_line(aes(colour = natrdlmn), size = 5) + labs(x = "National road", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDcourdlmn DFSDcourdlmn <- with(CovariateDFL, data.frame(SDmixedlmn = mean(SDmixedlmn), SDconiflmn = mean(SDconiflmn), SDnatrdlmn = mean(SDnatrdlmn), SDnatrdlmn2 = mean(SDnatrdlmn2), courdlmn, SDcourdlmn, SDcourdlmn2 = mean(SDcourdlmn2), SDcomrdlmn = mean(SDcomrdlmn), SDcomrdlmn2 = mean(SDcomrdlmn2), SDpastlmn = mean(SDpastlmn), SDpastlmn2 = mean(SDpastlmn2), SDagriclmn = mean(SDagriclmn), SDagriclmn2 = mean(SDagriclmn2), SDartiflmn = mean(SDartiflmn))) head(DFSDcourdlmn) summary(DFSDcourdlmn$courdlmn) summary(DFSDcourdlmn$SDcourdlmn) ## Obtain predicted probabilities DFSDcourdlmn$PredProb <- predict(H16, newdata = DFSDcourdlmn, type = "response") DFSDcourdlmn ## Plot predicted probabilities ggplot(DFSDcourdlmn, aes(x = courdlmn, y = PredProb)) + xlim(c(0, 0.35)) + ylim(c(0, 1)) + geom_line(aes(colour = courdlmn), size = 5) + labs(x = "County road", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDcomrdlmn DFSDcomrdlmn <- with(CovariateDFL, data.frame(SDmixedlmn = mean(SDmixedlmn), SDconiflmn = mean(SDconiflmn), SDnatrdlmn = mean(SDnatrdlmn), SDnatrdlmn2 = mean(SDnatrdlmn2), SDcourdlmn = mean(SDcourdlmn), SDcourdlmn2 = mean(SDcourdlmn2), comrdlmn, SDcomrdlmn, SDcomrdlmn2 = mean(SDcomrdlmn2), SDpastlmn = mean(SDpastlmn), SDpastlmn2 = mean(SDpastlmn2), SDagriclmn = mean(SDagriclmn), SDagriclmn2 = mean(SDagriclmn2), SDartiflmn = mean(SDartiflmn))) head(DFSDcomrdlmn) summary(DFSDcomrdlmn$comrdlmn) summary(DFSDcomrdlmn$SDcomrdlmn) ## Obtain predicted probabilities DFSDcomrdlmn$PredProb <- predict(H16, newdata = DFSDcomrdlmn, type = "response") DFSDcomrdlmn ## Plot predicted probabilities ggplot(DFSDcomrdlmn, aes(x = comrdlmn, y = PredProb)) + xlim(c(0, 0.8)) + ylim(c(0, 1)) + geom_line(aes(colour = comrdlmn), size = 5) + labs(x = "Communal road", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDpastlmn DFSDpastlmn <- with(CovariateDFL, data.frame(SDmixedlmn = mean(SDmixedlmn), SDconiflmn = mean(SDconiflmn), SDnatrdlmn = mean(SDnatrdlmn), SDnatrdlmn2 = mean(SDnatrdlmn2), SDcourdlmn = mean(SDcourdlmn), SDcourdlmn2 = mean(SDcourdlmn2), SDcomrdlmn = mean(SDcomrdlmn), SDcomrdlmn2 = mean(SDcomrdlmn2), pastlmn, SDpastlmn, SDpastlmn2 = mean(SDpastlmn2), SDagriclmn = mean(SDagriclmn), SDagriclmn2 = mean(SDagriclmn2), SDartiflmn = mean(SDartiflmn))) head(DFSDpastlmn) summary(DFSDpastlmn$pastlmn) summary(DFSDpastlmn$SDpastlmn) ## Obtain predicted probabilities DFSDpastlmn$PredProb <- predict(H16, newdata = DFSDpastlmn, type = "response") DFSDpastlmn ## Plot predicted probabilities ggplot(DFSDpastlmn, aes(x = pastlmn, y = PredProb)) + xlim(c(0, 0.6)) + ylim(c(0, 1)) + geom_line(aes(colour = pastlmn), size = 5) + labs(x = "Pasture", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDagriclmn DFSDagriclmn <- with(CovariateDFL, data.frame(SDmixedlmn = mean(SDmixedlmn), SDconiflmn = mean(SDconiflmn), SDnatrdlmn = mean(SDnatrdlmn), SDnatrdlmn2 = mean(SDnatrdlmn2), SDcourdlmn = mean(SDcourdlmn), SDcourdlmn2 = mean(SDcourdlmn2), SDcomrdlmn = mean(SDcomrdlmn), SDcomrdlmn2 = mean(SDcomrdlmn2), SDpastlmn = mean(SDpastlmn), SDpastlmn2 = mean(SDpastlmn2), agriclmn, SDagriclmn, SDagriclmn2 = mean(SDagriclmn2), SDartiflmn = mean(SDartiflmn))) head(DFSDagriclmn) summary(DFSDagriclmn$agriclmn) summary(DFSDagriclmn$SDagriclmn) ## Obtain predicted probabilities DFSDagriclmn$PredProb <- predict(H16, newdata = DFSDagriclmn, type = "response") DFSDagriclmn ## Plot predicted probabilities ggplot(DFSDagriclmn, aes(x = agriclmn, y = PredProb)) + xlim(c(0, 1)) + ylim(c(0, 1)) + geom_line(aes(colour = agriclmn), size = 5) + labs(x = "Cultivation", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## SDartiflmn DFSDartiflmn <- with(CovariateDFL, data.frame(SDmixedlmn = mean(SDmixedlmn), SDconiflmn = mean(SDconiflmn), SDnatrdlmn = mean(SDnatrdlmn), SDnatrdlmn2 = mean(SDnatrdlmn2), SDcourdlmn = mean(SDcourdlmn), SDcourdlmn2 = mean(SDcourdlmn2), SDcomrdlmn = mean(SDcomrdlmn), SDcomrdlmn2 = mean(SDcomrdlmn2), SDpastlmn = mean(SDpastlmn), SDpastlmn2 = mean(SDpastlmn2), SDagriclmn = mean(SDagriclmn), SDagriclmn2 = mean(SDagriclmn2), artiflmn, SDartiflmn)) head(DFSDartiflmn) summary(DFSDartiflmn$artiflmn) summary(DFSDartiflmn$SDartiflmn) ## Obtain predicted probabilities DFSDartiflmn$PredProb <- predict(H16, newdata = DFSDartiflmn, type = "response") DFSDartiflmn ## Plot predicted probabilities ggplot(DFSDartiflmn, aes(x = artiflmn, y = PredProb)) + xlim(c(0, 0.6)) + ylim(c(0, 1)) + geom_line(aes(colour = artiflmn), size = 5) + labs(x = "Artificial", y = "Predicted probability") + theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), legend.position="none", text = element_text(size=36), axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 10, l = 0))) ## K-fold cross-validation ## Huberty's Rule percentTestData <- function(nvar) { # v1.1 (28 Mar 2013) # heuristic ('rule of thumb') of Huberty (1994; in Fielding & Bell 1997) to determine the test/training data ratio for presence-absence models # nvar: number of predictor variables huberty <- 1 / (1 + sqrt(nvar - 1)) return(round(100 * huberty, 1)) } percentTestData(14) # 14 represents the number of parameters in the model ## K-fold library(boot) cv.errorL <- cv.glm(RawLynx2, H16, K=5)$delta[1] 1-cv.errorL H17 <- glm(presence~SDbrdlflmn+SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2+SDtrilmn+SDtrilmn2, family = binomial(logit), data = RawLynx2) H17 anova(H17) summary(H17) sqrt(diag(vcovPL(H17, lag = "NW1994"))) H18 <- glm(presence~SDbrdlflmn+SDconiflmn+SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2+SDtrilmn+SDtrilmn2, family = binomial(logit), data = RawLynx2) H18 anova(H18) summary(H18) sqrt(diag(vcovPL(H18, lag = "NW1994"))) H19 <- glm(presence~SDbrdlflmn+SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2+SDpastlmn+SDpastlmn2+SDartiflmn+SDtrilmn+SDtrilmn2, family = binomial(logit), data = RawLynx2) H19 anova(H19) summary(H19) sqrt(diag(vcovPL(H19, lag = "NW1994"))) H20 <- glm(presence~SDbrdlflmn+SDconiflmn+SDnatrdlmn+SDnatrdlmn2+SDcourdlmn+SDcourdlmn2+SDcomrdlmn+SDcomrdlmn2+SDpastlmn+SDpastlmn2+SDartiflmn+SDtrilmn+SDtrilmn2, family = binomial(logit), data = RawLynx2) H20 anova(H20) summary(H20) sqrt(diag(vcovPL(H20, lag = "NW1994"))) ## Full memory clean-up rm(list = ls()) # remove gc() # garbage collection .rs.restartR()