library(ff) library(things) library(rmaxent) library(raster) library(dismo) library(sp) library(latticeExtra) library(data.table) library(rgdal) library(rgeos) library(gdalUtils) setwd("C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis") # Load occurrence data occ <- fread('current/spp.csv') coordinates(occ) <- ~Longitude+Latitude proj4string(occ) <- '+init=epsg:4326' #WGS84 occ_albers <- spTransform(occ, CRS('+init=epsg:3577')) # Load background bg5000_albers <- readRDS('current/background_all_plants_oeh_avh_with_ibra.rds') # # define extent e <- extent(raster('f:/Manuscripts/CSIRO_soil_data/bulk_density/BDW_000_005_05_N_P_AU_NAT_C_20140801.tif')) e_albers <- spTransform(e, CRS('+init=epsg:3577')) # Project env rasters ## First clip to common extent ff <- list.files('env', '\\.tif$', full.names = TRUE) sapply(ff, function(f) { gdalwarp(f, sub(dirname(f), 'env_clipped', f), r='bilinear', multi=TRUE, dstnodata=-9999, te=c(bbox(e))) }) # ## Then project the climate grids to albers ff <- list.files('env_clipped', '\\.tif$', full.names = TRUE) sapply(ff, function(f) { gdalwarp(f, sub(dirname(f), 'env_albers', f), t_srs='+init=epsg:3577', r='bilinear', tr=c(5000, 5000), multi=TRUE, dstnodata=-9999) }) # # ## and the soil grids too ff2 <- list.files('C:/Users/Yasmin/Desktop/Manuscripts/CSIRO_soil_data', '20140801\\.tif$', recursive=TRUE, full.names=TRUE) sapply(ff2[1], function(f) { outfile <- sub(dirname(f), 'env_albers', f) if(!file.exists(outfile)) { gdalwarp(f, outfile, t_srs='+init=epsg:3577', r='bilinear', tr=c(5000, 5000), multi=TRUE, dstnodata=-9999, verbose=TRUE, te=c(bbox(raster('env_albers/coldp.tif')))) } }) # simplify names of soil raster files file.rename(ff <- list.files('env_albers/', '20140801\\.tif$', full=TRUE), # tolower(sub('/([^_]+)_.*', '/\\1.tif', ff))) ############################################################################################ # Load template raster to determine cell numbers for points r5000 <- raster(raster('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/env_albers/bdw.tif')) r5000 <- raster(raster('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/env_albers_clim/coldp.tif')) predictors_current <- list.files('env_albers/', '\\.tif$', full.names=TRUE) #predictors_current <- list.files('env_albers_soil/', '\\.tif$', full.names=TRUE) #predictors_current <- list.files('env_albers_clim/', '\\.tif$', full.names=TRUE) head(occ_albers) occ_by_sp <- split(occ_albers, occ_albers$Species) ###################################################################################### # Set the base output directory. Change to external hard drive if low space on # computer's hard drives. outdir_base <- "maxent_models/output_soil_product" #outdir_base <- "maxent_models/output_clim_product" #outdir_base <- "maxent_models/output_clim_soil_product" #doing stack all clim rasters s <- stack(predictors_current) s_ff <- ff(vmode="double", dim=c(ncell(s), nlayers(s)), filename='data/climsoil/current_climsoil_Australia_5000m.ffdata') for(i in 1:nlayers(s)) { s_ff[,i] <- s[[i]][] } colnames(s_ff) <- names(s) saveRDS(s_ff, file='data/climsoil/current_climsoil_Australia_ff_5000m.rds') rm(s); gc() s_ff <- readRDS('data/clim/current_clim_Australia_ff_5000m.rds') #to read ready stacked data s_ff <- readRDS('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/data/climsoil/current_climsoil_Australia_ff_5000m.rds') #s_ff <- readRDS('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/data/soil/current_soil_Australia_ff_5000m.rds') #sp_name <- list.files('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/maxent_models/output_clim_soil') lapply(1:length(occ_by_sp), function(i) { require(ff) sp_name <- names(occ_by_sp)[i] message('Doing species: ', sp_name) # set and create (if necessary) output directory outdir_sp <- file.path(outdir_base, sp_name) if(!file.exists(outdir_sp)) dir.create(outdir_sp, recursive=TRUE) # subset SPDF to current species sp <- subset(occ_albers, Species==sp_name); # make plot of background polys png(file.path(outdir_sp, 'bg_polys.png'), type='cairo'); sp_bg_polys <- adjacent_ibra(sp, plot_output=TRUE, min_n=1, include_type='polygon', expand_type='polygon') dev.off() # Get unique cell numbers for species occurrences sp_cells <- cellFromXY(r5000, sp) sp_not_dupes <- which(!duplicated(sp_cells) & !is.na(sp_cells)) # clean out dupes and NAs (points outside extent of climate data) sp <- sp[sp_not_dupes, ] sp_cells <- sp_cells[sp_not_dupes] message(nrow(sp), ' occurrence records (unique cells).') # subset bg to the selected polys sp_bg <- subset(bg5000_albers, POLY_ID %in% unique(sp_bg_polys$POLY_ID)) sp_bg_cells <- cellFromXY(r5000, sp_bg) sp_bg <- sp_bg[which(!is.na(sp_bg_cells)), ] # clean out dupes and NAs (points outside extent of climate data) sp_bg_not_dupes <- which(!duplicated(sp_bg_cells) & !is.na(sp_bg_cells)) # clean out dupes and NAs (points outside extent of climate data) sp_bg <- sp_bg[sp_bg_not_dupes, ] sp_bg_cells <- sp_bg_cells[sp_bg_not_dupes] sp_bg_cells <- cellFromXY(r5000, sp_bg) # Save objects for future reference saveRDS(sp_bg_polys, file.path(outdir_sp, 'bg_polys.rds')) writeOGR(sp_bg_polys, outdir_sp, 'bg_polys', 'ESRI Shapefile', overwrite_layer=TRUE) saveRDS(sp_bg, file.path(outdir_sp, 'bg.rds')) writeOGR(sp_bg, outdir_sp, 'bg', 'ESRI Shapefile', overwrite_layer=TRUE) saveRDS(sp, file.path(outdir_sp, 'occ.rds')) writeOGR(sp, outdir_sp, 'occ', 'ESRI Shapefile', overwrite_layer=TRUE) if(length(sp) < 5) { warning('Fewer occurrence records than the number of cross-validation replicates ', 'for species ', sp_name, '. Model not fit for this species.') return(NULL) } # Sample predictors at occurrence and background points swd_occ <- s_ff[sp_cells, ] swd_occ <- SpatialPointsDataFrame(coordinates(sp), as.data.frame(swd_occ), proj4string=CRS('+init=epsg:3577')) saveRDS(swd_occ, file.path(outdir_sp, 'occ_swd.rds')) writeOGR(swd_occ, outdir_sp, 'occ_swd', 'ESRI Shapefile', overwrite_layer=TRUE) swd_bg <- s_ff[sp_bg_cells, ] swd_bg <- SpatialPointsDataFrame(coordinates(sp_bg), as.data.frame(swd_bg), proj4string=CRS('+init=epsg:3577')) saveRDS(swd_bg, file.path(outdir_sp, 'bg_swd.rds')) writeOGR(swd_bg, outdir_sp, 'bg_swd', 'ESRI Shapefile', overwrite_layer=TRUE) # Combine occ and bg SWD data swd <- as.data.frame(rbind(swd_occ@data, swd_bg@data)) saveRDS(swd, file.path(outdir_sp, 'swd.rds')) pa <- rep(1:0, c(nrow(swd_occ), nrow(swd_bg))) me <- maxent(swd, pa, path=outdir_sp, args=c( 'threshold=FALSE', 'responsecurves=TRUE', 'hinge=False', 'jackknife=FALSE', 'replicates=5', 'writeplotdata=TRUE', 'writebackgroundpredictions=TRUE', 'writemess=TRUE')) # Save fitted model object, and the model-fitting data. saveRDS(list(me=me, swd=swd, pa=pa), file.path(outdir_sp, 'maxent_fitted.rds')) return(invisible(NULL)) }) ############################################################################# ## Project models library(ff) library(raster) library(data.table) library(dismo) library(things) library(rmaxent) dirs <- 'env_albers_clim' dirs <- 'env_albers_soil' dirs <- 'env_albers' outdir_base <- "maxent_models/output_soil_product" #outdir_base <- "maxent_models/output_clim_product" #outdir_base <- "maxent_models/output_clim_soil_product" # Create `locs.csv`, containing cell numbers and corresponding coordinates of # non-NA cells. r5000 <- raster(raster('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/env_albers/bdw.tif')) r5000 <- raster(raster('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/env_albers_clim/coldp.tif')) ## Load template 5000m raster file r5000 <- sum(stack(list.files(dirs[1], full=TRUE))) locs5000 <- which(!is.na(values(r5000))) saveRDS(raster(r5000), 'maxent_models/r5000_template.rds') locs5000 <- cbind(cell=locs5000, xyFromCell(r5000, locs5000)) saveRDS(locs5000, 'maxent_models/locs5000.rds') locs5000 <- readRDS('maxent_models/locs5000.rds') cells <- locs5000[, 'cell'] rm(locs5000) r5000 <- readRDS('maxent_models/r5000_template.rds') gc() # Using ff_matrix objects (this will make calculating the mean and sd a lot easier). # Loop over climate scenarios, holding each swd in memory while looping over SDMs models <- list.files(outdir_base, '^maxent_fitted\\.rds$', recursive=TRUE, full=TRUE) lapply(dirs, function(d) { require(ff) files <- list.files(d, '\\.tif$', full=TRUE) scen <- gsub('/', '_', sub('.*complete/', '', d)) message('Preparing enviro data for ', scen) s <- raster::stack(files) # create empty ff_matrix s_ff <- ff(vmode="double", dim=c(length(cells), nlayers(s)), filename=ff_swd <- tempfile(fileext='.ff')) # fill ff_matrix with data for(i in 1:nlayers(s)) { s_ff[, i] <- s[[i]][][cells] } colnames(s_ff) <- names(s) rm(s); gc() lapply(models, function(model) { species <- basename(dirname(model)) outfile <- sprintf('maxent_projections/soil_product/%s_%s_5000m_prediction.ff', #outfile <- sprintf('maxent_projections/clim_product/%s_%s_5000m_prediction.ff', #outfile <- sprintf('maxent_projections/clim_soil_product/%s_%s_5000m_prediction.ff', gsub(' +', '_', species), scen) occ <- readRDS(sub('maxent_fitted\\.rds$', 'occ.rds', model)) if(!file.exists(outfile)) { r_mean <- r_mean_raw <- r_sd <- r5000 mm <- readRDS(model)$me message('Doing species ', species) nfolds <- length(mm@models) preds_ff <- ff(vmode="double", dim=c(length(cells), nfolds), filename=tempfile(), dimnames=list(NULL, paste0('fold', seq_len(nfolds)))) out_ff <- ff(vmode="double", dim=c(length(cells), 2), filename=outfile, dimnames=list(NULL, c('avg', 'sd'))) finalizer(out_ff) <- 'close' preds_ff_raw <- ff(vmode="double", dim=c(length(cells), nfolds), filename=tempfile(), dimnames=list(NULL, paste0('fold', seq_len(nfolds)))) finalizer(preds_ff_raw) <- 'close' # Logistic output mean message('Calculating mean over folds') out_ff[, 1] <- rowMeans(preds_ff[, seq_len(nfolds)]) r_mean[cells] <- out_ff[, 1] writeRaster(r_mean, sub('\\.ff$', '_mean.tif', outfile)) writeRaster(r_mean, sub('\\.ff$', '_mean.asc', outfile)) png(sub('\\.ff$', '_mean.png', outfile), 4, 4, units='in', res=300, type='cairo') library(viridis) plot(r_mean, col=viridis(100), zlim=c(0, 1)) dev.off() # Logistic output sd message('Calculating sd over folds') out_ff[, 2] <- sqrt(rowSums(sweep(preds_ff[, seq_len(nfolds)], 1, out_ff[, 1])^2)/(nfolds-1)) r_sd[cells] <- out_ff[, 2] writeRaster(r_sd, sub('\\.ff$', '_sd.tif', outfile)) # Raw output mean message('Calculating mean over folds (raw)') r_mean_raw[cells] <- rowMeans(preds_ff_raw[, seq_len(nfolds)]) writeRaster(r_mean_raw, sub('\\.ff$', '_mean_raw.tif', outfile)) saveRDS(out_ff, extension(outfile, 'rds')) close(out_ff) delete(preds_ff) rm(preds_ff, out_ff, r_mean, r_sd) } }) delete(s_ff) rm(s_ff) gc() }) ############################################################################################################## #Calculate TSS maxtss2 <- function(x) { ff <- list.files(x, 'omission\\.csv$', full.names=TRUE) tss_max <- sapply(ff, function(f) { d <- read.csv(f) 1 - min(d$Test.omission + d$Fractional.area) }) c(mean=mean(tss_max), sd=sd(tss_max)) } # clim only dirs <- dirname(list.files('D:/Manuscripts/for_maxent_analysis/maxent_models/output_clim_product', 'species_0_omission\\.csv$', recursive=TRUE, full.name=TRUE)) tss <- t(sapply(dirs, maxtss2)) write.csv(tss, 'D:/Manuscripts/for_maxent_analysis/maxent_models/output_clim_product/max_tss.csv') # clim + soil dirs <- dirname(list.files('D:/Manuscripts/for_maxent_analysis/maxent_models/output_clim_soil_product', 'species_0_omission\\.csv$', recursive=TRUE, full.name=TRUE)) tss <- t(sapply(dirs, maxtss2)) write.csv(tss, 'D:/Manuscripts/for_maxent_analysis/maxent_models/output_clim_soil_product/max_tss.csv') #soil only dirs <- dirname(list.files('D:/Manuscripts/for_maxent_analysis/maxent_models/output_soil_product', 'species_0_omission\\.csv$', recursive=TRUE, full.name=TRUE)) tss <- t(sapply(dirs, maxtss2)) write.csv(tss, 'D:/Manuscripts/for_maxent_analysis/maxent_models/output_soil_product/max_tss.csv') ################################################################################################################ #Calculate change in habitat area # load library library(raster) # Function to calculate change summaries dist_change_binary <- function(from, to) { require(raster) s <- stack(from, to) d <- data.frame( from=from, to=to, count_from = sum(values(s[[1]])*29, na.rm=TRUE), count_to = sum(values(s[[2]])*29, na.rm=TRUE), stay_good = sum(values(s[[1]] & s[[2]])*29, na.rm=TRUE), stay_bad = sum(values(!(s[[1]] | s[[2]]))*29, na.rm=TRUE), gained = sum(values(!s[[1]] & s[[2]])*29, na.rm=TRUE), lost = sum(values(s[[1]] & !s[[2]])*29, na.rm=TRUE)) within(d, { percent_from_lost <- lost/count_from*100 percent_to_gained <- gained/count_to*100 percent_change <- (count_to - count_from)/count_from*100 percent_stay_good <- stay_good/count_from*100 }) } ff <- list.files("C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/binary/all",full.names=TRUE) ff_list <- split(ff, sub('_clim.tif','_climsoil.tif', ff)) change_summaries <- lapply(ff_list, function(x) { sapply(2:length(x), function(y) { from <- x[1] to <- x[y] dist_change_binary(from, to) }) }) change_summaries_mat <- do.call(rbind, change_summaries) write.csv(change_summaries_mat, 'distr_diff.csv' , row.names=FALSE) ################################################################################################## #To extract parameter number from Lambdas file library(rmaxent) library(dplyr) ff <- list.files('D:/Manuscripts/for_maxent_analysis/maxent_models/output_soil_product', '\\.lambdas$', recursive=TRUE, full.names=TRUE) k <- lapply(setNames(split(ff, dirname(ff)), unique(basename(dirname(dirname(dirname(ff)))))), function(sp) { lam <- lapply(sp, function(x) subset(parse_lambdas(x)$lambdas, lambda != 0)) lam <- lapply(lam, function(x) { x$type <- sub('.*product', 'product', x$type) x }) all <- mean(sapply(lam, nrow)) k <- unlist(lapply(lam, function(x) table(x$type))) k_mean <- rbind(list('all', all), aggregate(k, list(type=names(k)), mean)) c(setNames(k_mean$x, k_mean$type)) }) k <- bind_rows(lapply(k, as.list), .id='species') write.csv(k, 'parameteres_soil_product_.csv', row.names=FALSE)