# ------ to be used to process raw occurrence data ----------------------- # ---- load required libraries ------------------------------------------- libs <- c('tidyverse','lubridate','purrr','here');lapply(libs, library,character.only=TRUE) # ------- load raw occurrence data ---------------------------------------- all_csv_files <- list.files('data/raw/',pattern = '.csv') # ------- process occurrence data ----------------------------------------- col_names_files <- purrr::map(here::here('data/raw',all_csv_files), .f = function(csv_files = .x) read_csv(csv_files)%>%names()) cust_reorder_names <- function(fls,f_name){ tmp_names <- c("Record_Type", "Platform", "ID_Number", "IDCR_code", "dates", "Year", "Month", "Day", "Latitude", "Longitude", "Summer_Winter") t_dat <- read_csv(fls)%>% dplyr::rename_at(.vars = vars(ends_with("_Typ")), .funs = funs(str_replace_all(.,pattern = "_Typ",replacement = "_Type")))%>% dplyr::rename_at(.vars = vars(matches("SW")), .funs = funs(str_replace_all(.,pattern = "SW",replacement = "Summer_Winter")))%>% dplyr::select(!!tmp_names)%>%mutate(f_name = str_replace_all(f_name,'.csv','')) t_dat } all_dat_sorted <-purrr::map2(.x = here::here('data/raw',all_csv_files),.y = all_csv_files, .f = ~cust_reorder_names(fls = .x,f_name = .y)) lapply(all_dat_sorted,function(x) names(x)) # write processed data to a file ------------------------------------------ purrr::map2(.x = all_dat_sorted,.y = str_replace_all(all_csv_files,pattern = '.csv',replacement = '_processed.csv'), .f = function(dat = .x,fname = .y) {write_csv(path = here::here('data/tidy',fname),x = dat)}) # ------ combine all species data and write to a file --------------------- all_data_combined <- do.call('rbind',all_dat_sorted) all_data_combined <- all_data_combined%>%mutate(species = case_when( str_detect(string = f_name,pattern = "^BW") ~ "Bryde whale", str_detect(string = f_name,pattern = "^HBW") ~ "Humpback whale", str_detect(string = f_name,pattern = "^SRW") ~ "Southern right whale", str_detect(string = f_name,pattern = "^SW") ~ "Sperm whale" ), season = case_when( str_detect(f_name,"summer") ~ "Summer", str_detect(f_name,"winter") ~ "Winter" )) write_csv(x = all_data_combined,path = 'data/tidy/processed_combined_all_species_occurence.csv') # ------- prepare data just for SSDM -------------------------------------- all_dat_ssdm <- all_data_combined%>%dplyr::select(species,season,Longitude,Latitude)%>% filter_if(any_vars(is.numeric(.)),any_vars(!is.na(.)))%>% unite('species_season',species:season,remove = FALSE,sep = '_')%>% dplyr::select(Longitude,Latitude,species_season) # ----- write processed data for ssdm ------------------------------------ write_csv(x = all_dat_ssdm,path = 'data/tidy/processed_combined_all_species_for_ssdm.csv')