library(ff)
library(things)
library(rmaxent)
library(raster)
library(dismo)
library(sp)
library(latticeExtra)
library(data.table)
library(rgdal)
library(rgeos)
library(gdalUtils)

setwd("C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis")

# Load occurrence data
occ <- fread('current/spp.csv')
coordinates(occ) <- ~Longitude+Latitude
proj4string(occ) <- '+init=epsg:4326' #WGS84
occ_albers <- spTransform(occ, CRS('+init=epsg:3577'))

# Load background
bg5000_albers <- readRDS('current/background_all_plants_oeh_avh_with_ibra.rds')

# # define extent
e <- extent(raster('f:/Manuscripts/CSIRO_soil_data/bulk_density/BDW_000_005_05_N_P_AU_NAT_C_20140801.tif'))
e_albers <- spTransform(e, CRS('+init=epsg:3577'))

# Project env rasters
## First clip to common extent
ff <- list.files('env', '\\.tif$', full.names = TRUE)
 sapply(ff, function(f) {
    gdalwarp(f, sub(dirname(f), 'env_clipped', f),
          r='bilinear', multi=TRUE, dstnodata=-9999,
           te=c(bbox(e)))
 })
 
# ## Then project the climate grids to albers
 ff <- list.files('env_clipped', '\\.tif$', full.names = TRUE)
     sapply(ff, function(f) {
     gdalwarp(f, sub(dirname(f), 'env_albers', f), t_srs='+init=epsg:3577',
            r='bilinear', tr=c(5000, 5000), multi=TRUE, dstnodata=-9999)
 })
# 
# ## and the soil grids too
 ff2 <- list.files('C:/Users/Yasmin/Desktop/Manuscripts/CSIRO_soil_data', '20140801\\.tif$', 
                   recursive=TRUE, full.names=TRUE)
 sapply(ff2[1], function(f) {
   outfile <- sub(dirname(f), 'env_albers', f)
  if(!file.exists(outfile)) {
     gdalwarp(f, outfile, t_srs='+init=epsg:3577',
              r='bilinear', tr=c(5000, 5000), multi=TRUE, dstnodata=-9999,
              verbose=TRUE, te=c(bbox(raster('env_albers/coldp.tif')))) 
   }
 })

# simplify names of soil raster files
 file.rename(ff <- list.files('env_albers/', '20140801\\.tif$', full=TRUE),
#             tolower(sub('/([^_]+)_.*', '/\\1.tif', ff)))


############################################################################################

# Load template raster to determine cell numbers for points

r5000 <- raster(raster('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/env_albers/bdw.tif'))
r5000 <- raster(raster('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/env_albers_clim/coldp.tif'))

predictors_current <- list.files('env_albers/', '\\.tif$', full.names=TRUE)
#predictors_current <- list.files('env_albers_soil/', '\\.tif$', full.names=TRUE)
#predictors_current <- list.files('env_albers_clim/', '\\.tif$', full.names=TRUE)


head(occ_albers)
occ_by_sp <- split(occ_albers, occ_albers$Species)

######################################################################################

# Set the base output directory. Change to external hard drive if low space on
# computer's hard drives.

outdir_base <- "maxent_models/output_soil_product"
#outdir_base <- "maxent_models/output_clim_product"
#outdir_base <- "maxent_models/output_clim_soil_product"

#doing stack all clim rasters
s <- stack(predictors_current)
s_ff <- ff(vmode="double", dim=c(ncell(s), nlayers(s)),
            filename='data/climsoil/current_climsoil_Australia_5000m.ffdata')
for(i in 1:nlayers(s)) {
s_ff[,i] <- s[[i]][] 
 }
colnames(s_ff) <- names(s)
saveRDS(s_ff, file='data/climsoil/current_climsoil_Australia_ff_5000m.rds')
rm(s); gc()
s_ff <- readRDS('data/clim/current_clim_Australia_ff_5000m.rds')

#to read ready stacked data
s_ff <- readRDS('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/data/climsoil/current_climsoil_Australia_ff_5000m.rds')
#s_ff <- readRDS('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/data/soil/current_soil_Australia_ff_5000m.rds')


#sp_name <- list.files('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/maxent_models/output_clim_soil')
lapply(1:length(occ_by_sp), function(i)   {
  require(ff)
  sp_name <- names(occ_by_sp)[i]
  message('Doing species: ', sp_name)
  
  # set and create (if necessary) output directory
  outdir_sp <- file.path(outdir_base, sp_name)
  if(!file.exists(outdir_sp)) dir.create(outdir_sp, recursive=TRUE)
  
  # subset SPDF to current species
  sp <- subset(occ_albers, Species==sp_name);
  
  # make plot of background polys
  png(file.path(outdir_sp, 'bg_polys.png'), type='cairo');
  sp_bg_polys <- adjacent_ibra(sp, plot_output=TRUE, min_n=1, 
                               include_type='polygon', expand_type='polygon')
  dev.off()
  
  # Get unique cell numbers for species occurrences
  sp_cells <- cellFromXY(r5000, sp)
  sp_not_dupes <- which(!duplicated(sp_cells) & !is.na(sp_cells)) # clean out dupes and NAs (points outside extent of climate data)
  sp <- sp[sp_not_dupes, ]
  sp_cells <- sp_cells[sp_not_dupes]
  message(nrow(sp), ' occurrence records (unique cells).')
  
  # subset bg to the selected polys
  sp_bg <- subset(bg5000_albers, POLY_ID %in% unique(sp_bg_polys$POLY_ID))
  sp_bg_cells <- cellFromXY(r5000, sp_bg)
  sp_bg <- sp_bg[which(!is.na(sp_bg_cells)), ] # clean out dupes and NAs (points outside extent of climate data)
  sp_bg_not_dupes <- which(!duplicated(sp_bg_cells) & !is.na(sp_bg_cells)) # clean out dupes and NAs (points outside extent of climate data)
  sp_bg <- sp_bg[sp_bg_not_dupes, ]
  sp_bg_cells <- sp_bg_cells[sp_bg_not_dupes]

  sp_bg_cells <- cellFromXY(r5000, sp_bg)
  
  # Save objects for future reference
  saveRDS(sp_bg_polys, file.path(outdir_sp, 'bg_polys.rds'))
  writeOGR(sp_bg_polys, outdir_sp, 'bg_polys', 'ESRI Shapefile', overwrite_layer=TRUE)
  saveRDS(sp_bg, file.path(outdir_sp, 'bg.rds'))
  writeOGR(sp_bg, outdir_sp, 'bg', 'ESRI Shapefile', overwrite_layer=TRUE)
  saveRDS(sp, file.path(outdir_sp, 'occ.rds'))
  writeOGR(sp, outdir_sp, 'occ', 'ESRI Shapefile', overwrite_layer=TRUE)
  
  if(length(sp) < 5) {
    warning('Fewer occurrence records than the number of cross-validation replicates ',
            'for species ', sp_name, '. Model not fit for this species.')
    return(NULL)
  }
  
  # Sample predictors at occurrence and background points
  swd_occ <- s_ff[sp_cells, ]
  swd_occ <- SpatialPointsDataFrame(coordinates(sp), as.data.frame(swd_occ), 
                                    proj4string=CRS('+init=epsg:3577'))
  saveRDS(swd_occ, file.path(outdir_sp, 'occ_swd.rds'))
  writeOGR(swd_occ, outdir_sp, 'occ_swd', 'ESRI Shapefile', overwrite_layer=TRUE)
  
  swd_bg <- s_ff[sp_bg_cells, ]
  swd_bg <- SpatialPointsDataFrame(coordinates(sp_bg), as.data.frame(swd_bg), 
                                   proj4string=CRS('+init=epsg:3577'))
  saveRDS(swd_bg, file.path(outdir_sp, 'bg_swd.rds'))
  writeOGR(swd_bg, outdir_sp, 'bg_swd', 'ESRI Shapefile', overwrite_layer=TRUE)
  
  # Combine occ and bg SWD data
  swd <- as.data.frame(rbind(swd_occ@data, swd_bg@data))
  saveRDS(swd, file.path(outdir_sp, 'swd.rds'))
  pa <- rep(1:0, c(nrow(swd_occ), nrow(swd_bg)))
  
  me <- maxent(swd, pa, path=outdir_sp,
               args=c( 'threshold=FALSE', 'responsecurves=TRUE', 'hinge=False',
                       'jackknife=FALSE', 'replicates=5', 'writeplotdata=TRUE',
                       'writebackgroundpredictions=TRUE', 'writemess=TRUE'))
  
  
  
  # Save fitted model object, and the model-fitting data.
  saveRDS(list(me=me, swd=swd, pa=pa), 
          file.path(outdir_sp, 'maxent_fitted.rds'))
  return(invisible(NULL))
})

#############################################################################

## Project models

library(ff)
library(raster)
library(data.table)
library(dismo)
library(things)
library(rmaxent)


dirs <- 'env_albers_clim'
dirs <- 'env_albers_soil'
dirs <- 'env_albers'

outdir_base <- "maxent_models/output_soil_product"
#outdir_base <- "maxent_models/output_clim_product"
#outdir_base <- "maxent_models/output_clim_soil_product"


# Create `locs.csv`, containing cell numbers and corresponding coordinates of 
# non-NA cells.

r5000 <- raster(raster('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/env_albers/bdw.tif'))
r5000 <- raster(raster('C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/env_albers_clim/coldp.tif'))

## Load template 5000m raster file 
r5000 <- sum(stack(list.files(dirs[1], full=TRUE)))
locs5000 <- which(!is.na(values(r5000)))
saveRDS(raster(r5000), 'maxent_models/r5000_template.rds')
locs5000 <- cbind(cell=locs5000, xyFromCell(r5000, locs5000))
saveRDS(locs5000, 'maxent_models/locs5000.rds')
locs5000 <- readRDS('maxent_models/locs5000.rds')
cells <- locs5000[, 'cell']
rm(locs5000)
r5000 <- readRDS('maxent_models/r5000_template.rds')
gc()


# Using ff_matrix objects (this will make calculating the mean and sd a lot easier).
# Loop over climate scenarios, holding each swd in memory while looping over SDMs

models <- list.files(outdir_base, '^maxent_fitted\\.rds$', 
                     recursive=TRUE, full=TRUE)

lapply(dirs, function(d) {
  require(ff)
  files <- list.files(d, '\\.tif$', full=TRUE)
  scen <- gsub('/', '_', sub('.*complete/', '', d))
  message('Preparing enviro data for ', scen)
  s <- raster::stack(files)
  # create empty ff_matrix
  s_ff <- ff(vmode="double", dim=c(length(cells), nlayers(s)),
             filename=ff_swd <- tempfile(fileext='.ff'))
  # fill ff_matrix with data
  for(i in 1:nlayers(s)) {
    s_ff[, i] <- s[[i]][][cells]
  }
  colnames(s_ff) <- names(s)
  rm(s); gc()
  lapply(models, function(model) {
    species <- basename(dirname(model))
    outfile <- sprintf('maxent_projections/soil_product/%s_%s_5000m_prediction.ff', 
    #outfile <- sprintf('maxent_projections/clim_product/%s_%s_5000m_prediction.ff', 
    #outfile <- sprintf('maxent_projections/clim_soil_product/%s_%s_5000m_prediction.ff', 
    
                       gsub(' +', '_', species), scen)
    occ <- readRDS(sub('maxent_fitted\\.rds$', 'occ.rds', model))
    if(!file.exists(outfile)) {
      r_mean <- r_mean_raw <- r_sd <- r5000
      mm <- readRDS(model)$me
      message('Doing species ', species)
      nfolds <- length(mm@models)
      preds_ff <- ff(vmode="double", dim=c(length(cells), nfolds),
                     filename=tempfile(),
                     dimnames=list(NULL, paste0('fold', seq_len(nfolds))))
      out_ff <- ff(vmode="double", dim=c(length(cells), 2),
                   filename=outfile, dimnames=list(NULL, c('avg', 'sd')))
      finalizer(out_ff) <- 'close'
      
      preds_ff_raw <- ff(vmode="double", dim=c(length(cells), nfolds),
                         filename=tempfile(),
                         dimnames=list(NULL, paste0('fold', seq_len(nfolds))))
      finalizer(preds_ff_raw) <- 'close'
      
      
      
      # Logistic output mean
      message('Calculating mean over folds')
      out_ff[, 1] <- rowMeans(preds_ff[, seq_len(nfolds)])
      r_mean[cells] <- out_ff[, 1]
      writeRaster(r_mean, sub('\\.ff$', '_mean.tif', outfile))
      writeRaster(r_mean, sub('\\.ff$', '_mean.asc', outfile))
      png(sub('\\.ff$', '_mean.png', outfile), 4, 4, units='in', res=300, type='cairo')
      
      library(viridis)
      
      plot(r_mean, col=viridis(100), zlim=c(0, 1))      
      dev.off()
      
      # Logistic output sd
      message('Calculating sd over folds')
      out_ff[, 2] <- 
        sqrt(rowSums(sweep(preds_ff[, seq_len(nfolds)], 1, 
                           out_ff[, 1])^2)/(nfolds-1))
      r_sd[cells] <- out_ff[, 2]
      writeRaster(r_sd, sub('\\.ff$', '_sd.tif', outfile))

      # Raw output mean
      message('Calculating mean over folds (raw)')
      r_mean_raw[cells] <- rowMeans(preds_ff_raw[, seq_len(nfolds)])
      writeRaster(r_mean_raw, sub('\\.ff$', '_mean_raw.tif', outfile))

      saveRDS(out_ff, extension(outfile, 'rds'))
      close(out_ff)
      delete(preds_ff)
      rm(preds_ff, out_ff, r_mean, r_sd)
      
    }
  })
  delete(s_ff)
  rm(s_ff)
  gc()
  
})
##############################################################################################################

#Calculate TSS

maxtss2 <- function(x) {
  ff <- list.files(x, 'omission\\.csv$', full.names=TRUE)
  tss_max <- sapply(ff, function(f) {
    d <- read.csv(f)
    1 - min(d$Test.omission + d$Fractional.area)
  })
  c(mean=mean(tss_max), sd=sd(tss_max))
}

# clim only
dirs <- dirname(list.files('D:/Manuscripts/for_maxent_analysis/maxent_models/output_clim_product', 'species_0_omission\\.csv$', recursive=TRUE, full.name=TRUE))
tss <- t(sapply(dirs, maxtss2))
write.csv(tss, 'D:/Manuscripts/for_maxent_analysis/maxent_models/output_clim_product/max_tss.csv')

# clim + soil
dirs <- dirname(list.files('D:/Manuscripts/for_maxent_analysis/maxent_models/output_clim_soil_product', 'species_0_omission\\.csv$', recursive=TRUE, full.name=TRUE))
tss <- t(sapply(dirs, maxtss2))
write.csv(tss, 'D:/Manuscripts/for_maxent_analysis/maxent_models/output_clim_soil_product/max_tss.csv')


#soil only
dirs <- dirname(list.files('D:/Manuscripts/for_maxent_analysis/maxent_models/output_soil_product', 'species_0_omission\\.csv$', recursive=TRUE, full.name=TRUE))
tss <- t(sapply(dirs, maxtss2))
write.csv(tss, 'D:/Manuscripts/for_maxent_analysis/maxent_models/output_soil_product/max_tss.csv')
################################################################################################################

#Calculate change in habitat area

# load library 
library(raster)

# Function to calculate change summaries
dist_change_binary <- function(from, to) {
  require(raster)
  s <- stack(from, to)
  d <- data.frame(
    from=from,
    to=to,
    count_from = sum(values(s[[1]])*29, na.rm=TRUE),
    count_to = sum(values(s[[2]])*29, na.rm=TRUE),
    stay_good = sum(values(s[[1]] & s[[2]])*29, na.rm=TRUE),
    stay_bad = sum(values(!(s[[1]] | s[[2]]))*29, na.rm=TRUE),
    gained = sum(values(!s[[1]] & s[[2]])*29, na.rm=TRUE),
    lost = sum(values(s[[1]] & !s[[2]])*29, na.rm=TRUE))
  within(d, {
    percent_from_lost <- lost/count_from*100
    percent_to_gained <- gained/count_to*100
    percent_change <- (count_to - count_from)/count_from*100
    percent_stay_good <- stay_good/count_from*100
  })
}

ff <- list.files("C:/Users/Yasmin/Desktop/Manuscripts/for_maxent_analysis/binary/all",full.names=TRUE)
ff_list <- split(ff, sub('_clim.tif','_climsoil.tif', ff))


change_summaries <- lapply(ff_list, function(x) {
  sapply(2:length(x), function(y) {
    from <- x[1]
    to <- x[y]
    dist_change_binary(from, to)
  })
})
change_summaries_mat <- do.call(rbind, change_summaries)
write.csv(change_summaries_mat, 'distr_diff.csv' , row.names=FALSE)

##################################################################################################
#To extract parameter number from Lambdas file
library(rmaxent)
library(dplyr)
ff <- list.files('D:/Manuscripts/for_maxent_analysis/maxent_models/output_soil_product',
                 '\\.lambdas$', recursive=TRUE, full.names=TRUE)

k <- lapply(setNames(split(ff, dirname(ff)), unique(basename(dirname(dirname(dirname(ff)))))), function(sp) {
  lam <- lapply(sp, function(x) subset(parse_lambdas(x)$lambdas, lambda != 0))
  lam <- lapply(lam, function(x) {
    x$type <- sub('.*product', 'product', x$type)
    x
  })
  all <- mean(sapply(lam, nrow))
  k <- unlist(lapply(lam, function(x) table(x$type)))
  k_mean <- rbind(list('all', all), 
                  aggregate(k, list(type=names(k)), mean))
  c(setNames(k_mean$x, k_mean$type))
})
k <- bind_rows(lapply(k, as.list), .id='species')

write.csv(k, 'parameteres_soil_product_.csv', row.names=FALSE)