# -------  all utility functions ------------------------------------------

# ----------------------  only for SSDM  ----------------------------------
# --- collection utility functions to extract relevant  objects and metrics --------

# -----  get mean distribution map by algorithm ---------------------------
get_mean_dist_model <- function(model_objects){
  
  al_sdms =  model_objects[[1]]@sdms
  mod_list = sapply(model_objects[[1]]@sdms,function(x) x@name,simplify = TRUE)
  
  sel_models  = str_remove_all(mod_list,'.SDM')%>%unique()
  
  pred_dist_list <- vector('list',length = length(sel_models))
  names(pred_dist_list) <- sel_models
  
  for(i in 1:length(sel_models)){
    sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i]))
    sel_mods <- al_sdms[sel_mod_index]
    al_projs <- lapply(sel_mods, function(x) x@projection)
    al_proj_stk <- raster::stack(al_projs)  
    pred_dist_list[[i]]  <- mean(al_proj_stk,na.rm=TRUE)
  }
  pred_dist_stk <- raster::stack(pred_dist_list)
  pred_dist_stk
}

# --------- get mean variable importance ----------------------------------
get_mean_var_imp_model <- function(model_objects){
  
  
  al_sdms =  model_objects[[1]]@sdms
  mod_list = sapply(model_objects[[1]]@sdms,function(x) x@name,simplify = TRUE)
  sel_models  = str_remove_all(mod_list,'.SDM')%>%unique()
  
  v_imp_list <- vector('list',length = length(sel_models))
  names(v_imp_list) <- sel_models
  
  for(i in 1:length(sel_models)){
    sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i]))
    sel_mods <- al_sdms[sel_mod_index]
    if(sel_models[i]=='MARS'){
      sel_mod_index_tmp <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i+1]))
      sel_mods_tmp <- al_sdms[sel_mod_index_tmp]
      
      sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i]))
      sel_mods <- al_sdms[sel_mod_index]
      
      v_imp_tmp <-sapply(sel_mods_tmp,function(x) x@variable.importance)
      
      v_imp <-sapply(sel_mods,function(x) x@variable.importance)
      rownames(v_imp) <- rownames(v_imp_tmp)
      
      proc_v_imp <-v_imp%>%as.data.frame()%>%rownames_to_column(var = 'predictors')%>%
        pivot_longer(cols = starts_with('V'),names_to = 'reps',values_to = 'values')%>%
        unnest(cols = values)
      
      
    }else{
      v_imp <-sapply(sel_mods,function(x) x@variable.importance)
      proc_v_imp <-v_imp%>%as.data.frame()%>%rownames_to_column(var = 'predictors')%>%
        pivot_longer(cols = starts_with('V'),names_to = 'reps',values_to = 'values')%>%
        unnest(cols = values)
    }
    
    
    mean_imp <- proc_v_imp%>%group_by(predictors)%>%summarise(m_imp = mean(values,na.rm = TRUE))
    v_imp_list[[i]]  <- mean_imp%>%mutate(model = sel_models[i])
  }
  v_imp_df <- do.call('rbind',v_imp_list)
  
  v_imp_df
}


# ------- get mean model performance  -------------------------------------
get_mean_perf_model <- function(model_objects){
  
  al_sdms =  model_objects[[1]]@sdms
  mod_list = sapply(model_objects[[1]]@sdms,function(x) x@name,simplify = TRUE)
  
  sel_models  = str_remove_all(mod_list,'.SDM')%>%unique()
  
  mod_eval_list <- vector('list',length = length(sel_models))
  names(mod_eval_list) <- sel_models
  
  for(i in 1:length(sel_models)){
    sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i]))
    sel_mods <- al_sdms[sel_mod_index]
    mod_eval <-sapply(sel_mods,function(x) x@evaluation)
    proc_mod_eval<-mod_eval%>%as.data.frame()%>%rownames_to_column(var = 'predictors')%>%
      pivot_longer(cols = tidyr::starts_with('V'),names_to = 'reps',values_to = 'values')%>%
      unnest(cols = 'values')
    mean_eval <- proc_mod_eval%>%group_by(predictors)%>%summarise(m_imp = mean(values,na.rm = TRUE))
    mod_eval_list[[i]]  <- mean_eval%>%mutate(model = sel_models[i])
  }
  mod_eval_df <- do.call('rbind',mod_eval_list)
  
  mod_eval_df

}


# ----------------------  get partial effects  ----------------------------
get_partial_effect <- function(model_object){
  # proces model output
  
  al_sdms =  model_object@sdms
  mod_list = sapply(model_object@sdms,function(x) x@name,simplify = TRUE)
  
  sel_models  = str_remove_all(mod_list,'.SDM')%>%unique()
  
  mod_part_eff_list <- vector('list',length = length(sel_models))
  names(mod_part_eff_list) <- sel_models
  
  
  for(i in 1:length(sel_models)){
    sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i]))
    sel_mods <- al_sdms[sel_mod_index]
    
    #cat('first stage \n')
    al_mods <-lapply(sel_mods,function(x) SSDM:::get_model(x))
    al_dats <- lapply(sel_mods,function(x) x@data)
    sel_dats  = lapply(al_dats,function(x) x%>%dplyr::select(-c(X,Y,Presence,train)))
    comb_dat =  do.call('rbind',sel_dats)
    name_vars= names(comb_dat)
    
    pred_dats  = vector('list',length = length(sel_dats))
    
    if(sel_models[i]=='GBM'){
     cat('n.trees for GBM is set at 1500. change if different in the modelling step \n')
    }  
      
    for(j in 1:length(sel_dats)){
      #tmp_dat = sel_dats[[j]]
      #cat('second stage \n')
      # change this part ma
      n_sel = comb_dat%>%
        pivot_longer(cols = all_of(name_vars),names_to = 'vars',values_to = 'value')%>%
        arrange(rev(desc(vars)))%>%group_by(vars)%>%
        summarise(n_vals =list(seq(min(value),max(value),length.out = 100)))%>%
        dplyr::select(vars,n_vals)%>%unnest(n_vals)
      
      n_sel2 = n_sel%>%
        mutate(preds = NA,reps=j)
      
      for(k in 1:ncol(comb_dat)){
        #cat('last stage \n')
        new_dat = n_sel%>%pivot_wider(names_from = vars,values_from = n_vals)%>%unnest()
        new_dat[,-k] = apply(comb_dat[,-(k)],2,function(x) mean(x,na.rm = TRUE))%>%
          mefa:::rep.data.frame(each=nrow(new_dat))
        if(sel_models[i]=='GBM'){
          part_preds = predict(al_mods[[j]],newdata = new_dat,n.trees=1500)
        }else{
          part_preds = predict(al_mods[[j]],newdata = new_dat)
        }
      
        
        part_preds[part_preds<0]=0
        
       n_sel2$preds[n_sel2$vars==names(comb_dat)[k]] = part_preds
       
      }
      
      pred_dats[[j]] =n_sel2
    
    }
  mod_part_eff_list[[i]] =  do.call('rbind',pred_dats)%>%
    group_by(vars,n_vals)%>%
    summarise(m_pred = mean(preds,na.rm=TRUE))%>%
    ungroup()
  }
mod_part_eff_list
}

#   --------- get all occurrence and background data ----------------------
get_occ_background <- function(model_object){

    # --- process models  -----------------------------------------------------
  al_sdms =  model_object@sdms
  mod_list = sapply(model_object@sdms,function(x) x@name,simplify = TRUE)
  
  sel_models  = str_remove_all(mod_list,'.SDM')%>%unique()
  
  mod_occ_bkgnd_list <- vector('list',length = length(sel_models))
  names(mod_occ_bkgnd_list) <- sel_models
  
  for(i in 1:length(sel_models)){
    sel_mod_index <- sapply(al_sdms,function(x) str_detect(x@name,sel_models[i]))
    sel_mods <- al_sdms[sel_mod_index]
    
    
    al_dats <- lapply(sel_mods,function(x) x@data)
    #sel_dats  = lapply(al_dats,function(x) x%>%mutate(reps))
    #comb_dat =  do.call('rbind',sel_dats)
    set.seed(1234)
    sIndex =sample(1:10, size = 4,replace = FALSE)%>%sort()
    
    sel_dats =  al_dats[sIndex]
    comb_dat = purrr::map_df(sel_dats,.f = rbind,.id = 'model_index')
    
    mod_occ_bkgnd_list[[i]] = comb_dat
    
  }
  
  mod_occ_bkgnd_list
  
}


# ----------- function to prepare occurence locations with base map---------------------------------
prep_base_map <- function(env_rast, bathy_loc,base_map,occ_data){
  samp <- raster::sampleRegular(env_rast[[bathy_loc]], 
                                5e+05, asRaster = TRUE)
  map_df <- raster::as.data.frame(samp, xy = TRUE, 
                                  centroids = TRUE, na.rm = TRUE)
  colnames(map_df) <- c("longitude", "latitude", 
                        "depth")

  mid <- stats::median(map_df$depth)
  base_1 <- ggplot() + 
    geom_tile(data = map_df, aes(x =longitude, y = latitude, fill = depth)) + 
    scale_fill_gradient2(low = "darkred",mid = "yellow", high = "darkgreen", midpoint = mid) + 
    guides(fill = FALSE) + labs(x = "longitude", y = "latitude")+
    geom_sf(data=base_map,fill='gray50')+
    theme_bw()
  

  base_1
}