# ------- all utility functions ------------------------------------------ # ---------------------- only for SSDM ---------------------------------- # --- collection utility functions to extract relevant objects and metrics -------- # ----- get mean distribution map by algorithm --------------------------- get_mean_dist_model <- function(model_objects){ al_sdms = model_objects[[1]]@sdms mod_list = sapply(model_objects[[1]]@sdms,function(x) x@name,simplify = TRUE) sel_models = str_remove_all(mod_list,'.SDM')%>%unique() pred_dist_list <- vector('list',length = length(sel_models)) names(pred_dist_list) <- sel_models for(i in 1:length(sel_models)){ sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i])) sel_mods <- al_sdms[sel_mod_index] al_projs <- lapply(sel_mods, function(x) x@projection) al_proj_stk <- raster::stack(al_projs) pred_dist_list[[i]] <- mean(al_proj_stk,na.rm=TRUE) } pred_dist_stk <- raster::stack(pred_dist_list) pred_dist_stk } # --------- get mean variable importance ---------------------------------- get_mean_var_imp_model <- function(model_objects){ al_sdms = model_objects[[1]]@sdms mod_list = sapply(model_objects[[1]]@sdms,function(x) x@name,simplify = TRUE) sel_models = str_remove_all(mod_list,'.SDM')%>%unique() v_imp_list <- vector('list',length = length(sel_models)) names(v_imp_list) <- sel_models for(i in 1:length(sel_models)){ sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i])) sel_mods <- al_sdms[sel_mod_index] if(sel_models[i]=='MARS'){ sel_mod_index_tmp <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i+1])) sel_mods_tmp <- al_sdms[sel_mod_index_tmp] sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i])) sel_mods <- al_sdms[sel_mod_index] v_imp_tmp <-sapply(sel_mods_tmp,function(x) x@variable.importance) v_imp <-sapply(sel_mods,function(x) x@variable.importance) rownames(v_imp) <- rownames(v_imp_tmp) proc_v_imp <-v_imp%>%as.data.frame()%>%rownames_to_column(var = 'predictors')%>% pivot_longer(cols = starts_with('V'),names_to = 'reps',values_to = 'values')%>% unnest(cols = values) }else{ v_imp <-sapply(sel_mods,function(x) x@variable.importance) proc_v_imp <-v_imp%>%as.data.frame()%>%rownames_to_column(var = 'predictors')%>% pivot_longer(cols = starts_with('V'),names_to = 'reps',values_to = 'values')%>% unnest(cols = values) } mean_imp <- proc_v_imp%>%group_by(predictors)%>%summarise(m_imp = mean(values,na.rm = TRUE)) v_imp_list[[i]] <- mean_imp%>%mutate(model = sel_models[i]) } v_imp_df <- do.call('rbind',v_imp_list) v_imp_df } # ------- get mean model performance ------------------------------------- get_mean_perf_model <- function(model_objects){ al_sdms = model_objects[[1]]@sdms mod_list = sapply(model_objects[[1]]@sdms,function(x) x@name,simplify = TRUE) sel_models = str_remove_all(mod_list,'.SDM')%>%unique() mod_eval_list <- vector('list',length = length(sel_models)) names(mod_eval_list) <- sel_models for(i in 1:length(sel_models)){ sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i])) sel_mods <- al_sdms[sel_mod_index] mod_eval <-sapply(sel_mods,function(x) x@evaluation) proc_mod_eval<-mod_eval%>%as.data.frame()%>%rownames_to_column(var = 'predictors')%>% pivot_longer(cols = tidyr::starts_with('V'),names_to = 'reps',values_to = 'values')%>% unnest(cols = 'values') mean_eval <- proc_mod_eval%>%group_by(predictors)%>%summarise(m_imp = mean(values,na.rm = TRUE)) mod_eval_list[[i]] <- mean_eval%>%mutate(model = sel_models[i]) } mod_eval_df <- do.call('rbind',mod_eval_list) mod_eval_df } # ---------------------- get partial effects ---------------------------- get_partial_effect <- function(model_object){ # proces model output al_sdms = model_object@sdms mod_list = sapply(model_object@sdms,function(x) x@name,simplify = TRUE) sel_models = str_remove_all(mod_list,'.SDM')%>%unique() mod_part_eff_list <- vector('list',length = length(sel_models)) names(mod_part_eff_list) <- sel_models for(i in 1:length(sel_models)){ sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i])) sel_mods <- al_sdms[sel_mod_index] #cat('first stage \n') al_mods <-lapply(sel_mods,function(x) SSDM:::get_model(x)) al_dats <- lapply(sel_mods,function(x) x@data) sel_dats = lapply(al_dats,function(x) x%>%dplyr::select(-c(X,Y,Presence,train))) comb_dat = do.call('rbind',sel_dats) name_vars= names(comb_dat) pred_dats = vector('list',length = length(sel_dats)) if(sel_models[i]=='GBM'){ cat('n.trees for GBM is set at 1500. change if different in the modelling step \n') } for(j in 1:length(sel_dats)){ #tmp_dat = sel_dats[[j]] #cat('second stage \n') # change this part ma n_sel = comb_dat%>% pivot_longer(cols = all_of(name_vars),names_to = 'vars',values_to = 'value')%>% arrange(rev(desc(vars)))%>%group_by(vars)%>% summarise(n_vals =list(seq(min(value),max(value),length.out = 100)))%>% dplyr::select(vars,n_vals)%>%unnest(n_vals) n_sel2 = n_sel%>% mutate(preds = NA,reps=j) for(k in 1:ncol(comb_dat)){ #cat('last stage \n') new_dat = n_sel%>%pivot_wider(names_from = vars,values_from = n_vals)%>%unnest() new_dat[,-k] = apply(comb_dat[,-(k)],2,function(x) mean(x,na.rm = TRUE))%>% mefa:::rep.data.frame(each=nrow(new_dat)) if(sel_models[i]=='GBM'){ part_preds = predict(al_mods[[j]],newdata = new_dat,n.trees=1500) }else{ part_preds = predict(al_mods[[j]],newdata = new_dat) } part_preds[part_preds<0]=0 n_sel2$preds[n_sel2$vars==names(comb_dat)[k]] = part_preds } pred_dats[[j]] =n_sel2 } mod_part_eff_list[[i]] = do.call('rbind',pred_dats)%>% group_by(vars,n_vals)%>% summarise(m_pred = mean(preds,na.rm=TRUE))%>% ungroup() } mod_part_eff_list } # --------- get all occurrence and background data ---------------------- get_occ_background <- function(model_object){ # --- process models ----------------------------------------------------- al_sdms = model_object@sdms mod_list = sapply(model_object@sdms,function(x) x@name,simplify = TRUE) sel_models = str_remove_all(mod_list,'.SDM')%>%unique() mod_occ_bkgnd_list <- vector('list',length = length(sel_models)) names(mod_occ_bkgnd_list) <- sel_models for(i in 1:length(sel_models)){ sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i])) sel_mods <- al_sdms[sel_mod_index] al_dats <- lapply(sel_mods,function(x) x@data) #sel_dats = lapply(al_dats,function(x) x%>%mutate(reps)) #comb_dat = do.call('rbind',sel_dats) set.seed(1234) sIndex =sample(1:10, size = 4,replace = FALSE)%>%sort() sel_dats = al_dats[sIndex] comb_dat = purrr::map_df(sel_dats,.f = rbind,.id = 'model_index') mod_occ_bkgnd_list[[i]] = comb_dat } mod_occ_bkgnd_list } # ----------- function to prepare occurence locations with base map--------------------------------- prep_base_map <- function(env_rast, bathy_loc,base_map,occ_data){ samp <- raster::sampleRegular(env_rast[[bathy_loc]], 5e+05, asRaster = TRUE) map_df <- raster::as.data.frame(samp, xy = TRUE, centroids = TRUE, na.rm = TRUE) colnames(map_df) <- c("longitude", "latitude", "depth") mid <- stats::median(map_df$depth) base_1 <- ggplot() + geom_tile(data = map_df, aes(x =longitude, y = latitude, fill = depth)) + scale_fill_gradient2(low = "darkred",mid = "yellow", high = "darkgreen", midpoint = mid) + guides(fill = FALSE) + labs(x = "longitude", y = "latitude")+ geom_sf(data=base_map,fill='gray50')+ theme_bw() base_1 }