###### Script for CPDI Doughnut Effect Paper - Producing analysis results and figures for publication #### Written by: Stephen R. Scherrer #### 26 January 2017 ####### Cleaning workspace and setting directories ---- ### Clearing Workspace rm(list=ls()) # Clear workspace ### Starting Script Timer script_timer <- proc.time() ### Linking to Project Directories project_dir = getwd() data_dir = file.path(project_dir, 'data') results_dir = file.path(project_dir, 'results') figure_dir = file.path(project_dir, 'figures') source_dir = file.path(project_dir, 'code') ### Setting Project Directory setwd(project_dir) ### Establishing History savehistory(file= file.path(results_dir, "Rhistory")) ####### Importing principle dependencies ---- # install.packages('geosphere') library('geosphere') # distGeo() Note: wrapped in old lldist function # install.packages('reshape') library('reshape') # melt() # install.packages('MuMIn') library('MuMIn') #AICc() # install.packages('dplyr') library('dplyr') # filter() #install.packages('doParallel') library('doParallel') #install.package('lubridate') library('lubridate') # floor_date() # install.packages('mgcv') library('mgcv') # gam() ## install.packages('beepr') library('beepr') # beep() ## install.packages('notifyR') library('notifyR') # send_push() ## install.packages('ggplot2') library('ggplot2') # geom_bar() ### Sourcing Mechanistic CPDI Model source(file.path(source_dir, 'Mechanistic CPDI Model.R')) # model_receiver_interferenece() ####### Setting Up Parallel Environment ---- ### Setting up parallel processing for later analysis using pdredge() and foreach() ## Setting number of cores n_cores = detectCores() # Default to the number of cores available on the computer ## Creating a cluster clust = makeCluster(n_cores) clusterEvalQ(clust, library('MuMIn')) # loading MuMIn package to clusters for using pdredge function clusterEvalQ(clust, library('mgcv')) # loading mgcv package to clusters for using GAM function ####### Utility Functions ---- datenum2posix = function(x, timez = "HST") { ## Function to convert matlab datenum to R posix date # Author: Luke Miller Feb 20, 2011 # Convert a numeric MATLAB datenum (days since 0000-1-1 00:00) to seconds in # the Unix epoch (seconds since 1970-1-1 00:00). Specify a time zone if the # input datenum is anything other than the GMT/UTC time zone. days = x - 719529 # 719529 = days from 1-1-0000 to 1-1-1970 secs = days * 86400 # 86400 seconds in a day # This next string of functions is a complete disaster, but it works. # It tries to outsmart R by converting the secs value to a POSIXct value # in the UTC time zone, then converts that to a time/date string that # should lose the time zone, and then it performs a second as.POSIXct() # conversion on the time/date string to get a POSIXct value in the user's # specified timezone. Time zones are a goddamned nightmare. return(as.POSIXct(strftime(as.POSIXct(x = secs, units = 'secs', origin = '1970-1-1', tz = 'UTC'), format = '%Y-%m-%d %H:%M:%S', tz = 'UTC', usetz = FALSE), tz = timez)) } convert_tz = function(datetime, new.tz = 'HST'){ ## Function to convert GMT/UTC times to HST time datetime.new.tz = strptime(datetime, format = '%Y-%m-%d %H:%M:%S', tz = new.tz) dateoffset = datetime-datetime.new.tz datetime.new.tz = datetime.new.tz + dateoffset return(datetime.new.tz) } convert_lat_lon = function(ll_deg, ll_min = FALSE){ ## Converts latitude and longitude between ll minutes and ll decimal degrees # 2 usages: # Convert decimal degrees to degree minutes # 1 argument # ll_pref is a single argument of latitude or longitude in decimal degrees # Returns a prefix and decimal for that argument # Convert degree minutes to decimal degrees # 2 arguments # ll_pref is the latitude or longitude's degree # ll_min is the degree minutes # returns a single float of ll in decimal degrees if (ll_min[1] == FALSE){ #then we are going from one number to two ll_deg = as.numeric(as.character(ll_deg)) ll_bin = matrix(0, length(ll_deg), 2) for (r in 1:length(ll_deg)){ if (isTRUE(ll_deg[r] >= 0)){ ll_dec = ll_deg[r] - floor(ll_deg[r]) ll_bin[r, ] = c(floor(ll_deg[r]), (ll_dec)*60) } else { ll_dec = (ll_deg[r] - ceiling(ll_deg[r]))*-1 ll_bin[r, ] = c(ceiling(ll_deg[r]), (ll_dec)*60) } } }else{ #if we are converting from two numbers to one ll_deg = as.numeric(as.character(ll_deg)) ll_min = as.numeric(as.character(ll_min)) ll_bin = matrix(0, length(ll_deg), 1) for (r in 1:length(ll_deg)){ ll_dec_deg = abs(ll_deg[r]) + (abs(ll_min[r])/60) if (isTRUE(ll_deg[r] < 0)){ ll_dec_deg = ll_dec_deg*(-1) } ll_bin[r] = ll_dec_deg } } return (ll_bin) } lldist = function(point1, point2){ ## Wrapper function to calculate distance between two lat lon points on a geodesic sphere. # Requires distGeo() from the geosphere package distance = distGeo(p1 = point1, p2 = point2) / 1000 return(distance) } clean_receiver = function(receiver){ ## Returns receiver serial number as a factor, remvoing the 'VR2W-' Prefix cleaned_receiver = as.factor(substring(receiver, 6)) return (cleaned_receiver) } load_receiver = function(filename, filepath = FALSE){ ## Function to load in receiver data .csv file proj_dir = getwd() if (filepath != FALSE){ setwd(filepath) } receiver_dates = receiver_col_names(read.csv(filename)) receiver_dates$deployment_date = strptime(receiver_dates$deployment_date, format = '%m/%d/%y %H:%M', tz = 'HST') receiver_dates$recovery_date = strptime(receiver_dates$recovery_date, format = '%m/%d/%y %H:%M', tz = 'HST') receiver_dates$lat = convert_lat_lon(receiver_dates$lat_deg, receiver_dates$lat_min) receiver_dates$lon = convert_lat_lon(receiver_dates$lon_deg, receiver_dates$lon_min) setwd(proj_dir) return (receiver_dates) } receiver_col_names = function(receiver_file_raw){ ## Function that renames columns for an imported receiver data file receiver_file = receiver_file_raw colnames(receiver_file)[1] = 'serviced' colnames(receiver_file)[2] = 'station_name' colnames(receiver_file)[3] = 'consecutive_deployment_number' colnames(receiver_file)[4] = 'deployment_date' colnames(receiver_file)[5] = 'recovery_date' colnames(receiver_file)[6] = 'recovered' colnames(receiver_file)[7] = 'in_data_set' colnames(receiver_file)[8] = 'lat_deg' colnames(receiver_file)[9] = 'lat_min' colnames(receiver_file)[10] = 'lon_deg' colnames(receiver_file)[11] = 'lon_min' colnames(receiver_file)[12] = 'depth' colnames(receiver_file)[13] = 'vr2w_serial' colnames(receiver_file)[14] = 'acoustic_release_serial' colnames(receiver_file)[15] = 'acoustic_release_battery_life' colnames(receiver_file)[16] = 'acoustic_release_voltage_at_deployment' colnames(receiver_file)[17] = 'acoustic_release_serial_code' colnames(receiver_file)[18] = 'temperature_logger_serial' colnames(receiver_file)[19] = 'location_code' colnames(receiver_file)[20] = 'deployed_by' colnames(receiver_file)[21] = 'recovered_by' colnames(receiver_file)[22] = 'comments_deployment' colnames(receiver_file)[23] = 'comments_recovery' return (receiver_file) } clean_vue_lat_lon = function(vue_data_df, receiver_data_df){ ## Function that replaces the lat lon positions a receiver may have been initialized with, to field records of coordinates for the actual deployment location. station = rep(NA, times = length(vue_data_df$datetime)) for (i in 1:length(receiver_data_df$station_name)){ receiver_subset_index = which(vue_data_df$receiver == receiver_data_df$vr2w_serial[i]) deploy_subset_index = which(vue_data_df$datetime >= receiver_data_df$deployment_date[i]) recover_subset_index = which(vue_data_df$datetime < na.omit(receiver_data_df$recovery_date[i])) ind = Reduce(intersect, list(receiver_subset_index, deploy_subset_index, recover_subset_index)) vue_data_df$lat[ind] = receiver_data_df$lat[i] vue_data_df$lon[ind] = receiver_data_df$lon[i] station[ind] = as.character(receiver_data_df$station_name[i]) } vue_data_df$station = as.factor(station) return(vue_data_df) } clean_tag_id = function(tag_id){ ## Function that returns tag ID number as a factor, removing the 'A69-####-' prefix cleaned_id = as.factor(substring(tag_id, 10)) return (cleaned_id) } vue_col_names = function(vue_data_raw){ # Function that renames columns for an imported vemco data set colnames(vue_data_raw)[1] <- 'datetime' colnames(vue_data_raw)[2] <- 'receiver' colnames(vue_data_raw)[3] <- 'tag_id' colnames(vue_data_raw)[4] <- 'name' colnames(vue_data_raw)[5] <- 'tag_serial' colnames(vue_data_raw)[6] <- 'sensor_value' colnames(vue_data_raw)[7] <- 'sensor.unit' colnames(vue_data_raw)[8] <- 'station' colnames(vue_data_raw)[9] <- 'lat' colnames(vue_data_raw)[10] <- 'lon' return (vue_data_raw) } load_vemco = function(filename, filepath = FALSE, format = '%Y-%m-%d %H:%M:%S'){ ## Function that loads in Vemco Database from a VUE Export file proj_dir = getwd() if (isTRUE(filepath != FALSE)) {setwd(filepath)} vue_data_raw = read.csv(filename) vue_data_cleaned = vue_col_names(vue_data_raw) vue_data_cleaned$datetime = strptime(vue_data_cleaned$datetime, format = format, tz = "GMT") vue_data_cleaned$datetime = convert_tz(vue_data_cleaned$datetime, new.tz = 'HST') vue_data_cleaned$tag_id = clean_tag_id(vue_data_cleaned$tag_id) vue_data_cleaned$receiver = clean_receiver(vue_data_cleaned$receiver) setwd(proj_dir) return (vue_data_cleaned) } compute_standard_error = function(numeric_vector){ stderror = sd(numeric_vector) / sqrt(length(numeric_vector)) } as.number = function(x){ if(class(x) == 'factor'){ x = levels(x)[x] } as.numeric(x) } ####### COMPONENT 1: Deep Water Range Test ---- #### Loading In and Cleaning Data Files ---- setwd(data_dir) ### Loading Receiver Data receiver_data = load_receiver(filename = 'DEPLOYMENT_RECOVERY_LOG.csv') # dim(receiver_data) # 217 28 ### Loading VUE data vue_data = load_vemco(filename = 'Range_Test_June_2014_All_Receivers.csv', format = '%m/%d/%y %H:%M') # dim(vue_data) # 130284 10 ### Importing Windspeed Data from from NOAA Honolulu station Station ID 1612340 # http://www.ndbc.noaa.gov/view_text_file.php?filename=oouh1h2014.txt.gz&dir=data/historical/stdmet/ # parent: http://www.ndbc.noaa.gov/station_history.php?station=oouh1 # Data dates range between dates 2014-01-01 00:00:00 and 2014-12-31 23:54:00 # with the following format: # YY MM DD hh mm WDIR WSPD GST WVHT DPD APD MWD PRES ATMP WTMP DEWP VIS TIDE # with the following units: # yr mo dy hr mn degT m/s m/s m sec sec degT hPa degC degC degC mi ft wind_data = as.data.frame(read.table("oouh1h2014.txt"), header = TRUE, sep = " ") # dim(wind_data) # 85864 18 colnames(wind_data) = c("YY", "MM", "DD", "hh", "mm", "WDIR", "WSPD", "GST", "WVHT", "DPD", "APD", "MWD", "PRES", "ATMP", "WTMP", "DEWP", "VIS", "TIDE") ### Convert dates and times to POSIXct format. (Station data time is in local (UTC) time by default) wind_data$datetime = as.POSIXct(paste(wind_data$YY, "-", wind_data$MM, "-", wind_data$DD, " ", wind_data$hh, ":", wind_data$mm, sep = ""), format = "%Y-%m-%d %H:%M", tz = "GMT") ### Convert datetime to HST wind_data$datetime = strftime(wind_data$datetime, format = "%Y-%m-%d %H:%M") wind_data$hourly = strftime(wind_data$datetime, format = "%Y-%m-%d %H") ### Grouping windspeed data into hourly means (mean_wspd) wind_data_grouped = group_by(wind_data, hourly) wspd_by_hour = summarize(wind_data_grouped, mean_wspd = mean(WSPD)) wspd_by_hour$hourly = as.POSIXct(wspd_by_hour$hourly, format = "%Y-%m-%d %H") ### Grouping windgust data into hourly means gst_by_hour = summarize(wind_data_grouped, mean_gst = mean(GST)) gst_by_hour$hourly = as.POSIXct(gst_by_hour$hourly, format = "%Y-%m-%d %H") ### Importing tide data from: NOAA Honolulu station, accessed 28 OCT 2015 # http://tidesandcurrents.noaa.gov/waterlevels.html?id=1612340&units=standard&bdate=20110930&edate=20100930&timezone=LST&datum=MLLW&interval=h&action=## Meta data available here: # Data accessed for dates ranging between 2013-09-30 00:00:00 HST and 2014-09-30 13:00:00 HST # Data is already in HST tide_data = read.csv('noaa tide data 2013-2014.csv') # dim(tide_data) # 8774 5 colnames(tide_data) = c("hour_bin", "water_level", 'sigma', 'I', "L") tide_data$hour_bin = as.POSIXct(tide_data$hour_bin, format = "%Y-%m-%d %H:%M") ### Determining if tide was going in or out. ## First assume all data is headed in tide_data$direction = "In" ## Then loop through data. If current water level is less than previous water level, tide is going out. for(i in 2:length(tide_data$direction)){ if(tide_data$water_level[i-1] > tide_data$water_level[i]){ tide_data$direction[i] = "Out" } } tide_data$direction = as.factor(tide_data$direction) ### Importing Tag Deployment Meta Data File tag_meta_data = read.csv('Range Test June 2014 - Tag Meta Data Locations and Depth.csv') # dim(tag_meta_data) # 12 9 ### Converting tag_meta_data Lat Lons from degree minutes to decimal degrees tag_meta_data$lat = convert_lat_lon(tag_meta_data$lat_deg, tag_meta_data$lat_min) tag_meta_data$lon = convert_lat_lon(tag_meta_data$lon_deg, tag_meta_data$lon_min) ### Determining distance of tag from receiver string using lon and lat waypoints from gps for (i in 1:length(tag_meta_data$distance)){ tag_meta_data$distance[i] = round(1000*(lldist(point1 = c(tag_meta_data$lon[i], tag_meta_data$lat[i]), point2 = c(receiver_data$lon[receiver_data$station_name == 'Range Test - June 2014 - Diamond Head 1m'][1], receiver_data$lat[receiver_data$station_name == 'Range Test - June 2014 - Diamond Head 1m'][1])))) } unique(tag_meta_data$distance) # 0 199 399 578 766 959 ### Removing any other receivers in the database receiver_1m = 123736 # serial number of receiver in 1 m depth receiver_30m = 123732 # serial number of receiver in 30 m depth vue_data = vue_data[which(vue_data$receiver %in% c(receiver_1m, receiver_30m)), ] ### Removing detections prior to experiment (occurred durring experiment set up on boat) and determining an end to the experiment. start_date = as.POSIXct('2014-06-07 8:00:00', tz = "HST") # "2014-06-07 08:00:00 HST" end_date = as.POSIXct("2014-06-26 05:00:00", format = "%Y-%m-%d %H:%M:%S") # "2014-06-26 05:00:00 HST" vue_data = vue_data[which(vue_data$datetime >= start_date & vue_data$datetime < end_date), ] ### Rounding off of final partial hour to account for transmissions received while gear was recovered vue_data = vue_data[which(vue_data$datetime >= ceiling_date(min(vue_data$datetime), unit = 'hour') & vue_data$datetime < floor_date(max(vue_data$datetime), unit = 'hour')), ] range(vue_data$datetime) # "2014-06-07 09:00:00 HST" "2014-06-26 03:59:00 HST" ## How many total detections occurred durring the course of the experiment? (includes tags not part of range test) detections_all = dim(vue_data)[1] # 129622 ### Removing tags not in experiment. This could be a tagged fish that swam by or a false detection tags_0m = c(18236, 18237) tags_200m = c(18238, 18239) tags_400m = c(18240, 18241) tags_600m = c(18242, 18243) tags_800m = c(18244, 18245) tags_1000m = c(18246, 18247) tag_ids = c(tags_0m, tags_200m, tags_400m, tags_600m, tags_800m, tags_1000m) vue_data = vue_data[as.numeric(levels(vue_data$tag_id))[vue_data$tag_id] %in% tag_ids, ] ## How many detections were from tags that were part of the experiment? detections_experiment = dim(vue_data)[1] # 128178 detections_experiment / detections_all * 100 # 98.88599 % ## How many detections were from tags that were not part of the experiment? detections_other = detections_all - detections_experiment # 1444 detections_other / detections_all * 100 # 1.114008 ### Assigning Lon Lat positions to vue_data vue_data = clean_vue_lat_lon(vue_data_df = vue_data, receiver_data_df = receiver_data) ### Getting estimated distances between receivers and tags based on deployment coordinates for (i in 1:length(tag_meta_data$lon)){ tag_meta_data$distance[i] = lldist(point1 = c(unique(vue_data$lon)[1], unique(vue_data$lat)[1]), point2 = c((tag_meta_data$lon)[i], (tag_meta_data$lat)[i])) * 1000 # m/km } round(unique(tag_meta_data$distance)) # 0 199 399 578 766 959 #### Hourly Detection Data Analysis ---- ### Separating data by tag, receiver, and hour bin ## First assign each datum to an date and hour bin vue_data$hour_bin = floor_date(vue_data$datetime, unit = "hour") ## Then loop through combinations of date/hour, tag id, and receiver to get the number of detections from a tag at a receiver each hour detection_df_1 = data.frame() registerDoParallel(cores = n_cores) detection_df_1 = foreach(b = c(1:length(unique(vue_data$hour_bin))), .combine = rbind) %:% foreach(i = c(1:length(unique(vue_data$tag_id))), .combine = rbind) %:% foreach(r = c(1:length(unique(vue_data$receiver))), .combine = rbind) %dopar%{ filtered = filter(vue_data, hour_bin == unique(vue_data$hour_bin)[b], tag_id == unique(vue_data$tag_id)[i], receiver == unique(vue_data$receiver)[r]) write_line = cbind(as.character(unique(vue_data$hour_bin))[b], as.character(unique(vue_data$tag_id))[i], as.character(unique(vue_data$receiver))[r], dim(filtered)[1]) return(write_line) } ## Cleaning detection_df_1 detection_df_1 = as.data.frame(detection_df_1) colnames(detection_df_1) = c('hour_bin', 'tag_id', 'receiver', 'detections') detection_df_1$detections = as.numeric(as.character(detection_df_1$detections)) # dim(detection_df_1) # 10824 4 ### Combining detection dataframe with distance, bottom depth, height condition, lat, and lon tag_meta_data detection_df_1 = merge(detection_df_1, tag_meta_data, by.x = colnames(detection_df_1) == 'tag_id', by.y = colnames(tag_meta_data) == 'tag_id') detection_df_1$distance = detection_df_1$distance detection_df_1$height_off_bottom = as.factor(detection_df_1$height_off_bottom) detection_df_1$recovery_rate = detection_df_1$detections / 60 detection_df_1$day = as.factor(strftime(detection_df_1$hour_bin, format = "%Y-%m-%d")) detection_df_1$time = as.factor(strftime(detection_df_1$hour_bin, format = "%H:%M:%S")) colnames(detection_df_1)[colnames(detection_df_1) == 'height_off_bottom'] = "height_of_tag_off_bottom" detection_df_1$hour_bin = as.POSIXct(detection_df_1$hour_bin, format = "%Y-%m-%d %H:%M:%S") ### Combining detection data with wind speed, and wind gust data detection_df_1 = merge(detection_df_1, wspd_by_hour, by.x = colnames(detection_df_1) == 'hour_bin', by.y = colnames(wspd_by_hour) == 'hourly') detection_df_1 = merge(detection_df_1, gst_by_hour, by.x = colnames(detection_df_1) == 'hour_bin', by.y = colnames(gst_by_hour) == 'hourly') detection_df_1 = merge(detection_df_1, tide_data, by.x = colnames(detection_df_1) == 'hour_bin', by.y = colnames(tide_data) == 'hour_bin') detection_df_1$hour_bin = as.factor(detection_df_1$hour_bin) ### Assigning diurnal period to detections based around 12 hour time bins. Before 6am or after 6pm considered night, between 6am and 6pm considered day detection_df_1$day_night = 'Night' for(i in 1:length(detection_df_1$hour_bin)){ if(hour(detection_df_1$hour_bin[i]) < 18 & hour(detection_df_1$hour_bin[i]) > 6){ detection_df_1$day_night[i] = 'Day' } } detection_df_1$day_night = as.factor(detection_df_1$day_night) ### Assigning receiver height detection_df_1$height_of_receiver_off_bottom = NULL detection_df_1$height_of_receiver_off_bottom[detection_df_1$receiver == receiver_1m] = '1' detection_df_1$height_of_receiver_off_bottom[detection_df_1$receiver == receiver_30m] = '30' detection_df_1$height_of_receiver_off_bottom = as.factor(detection_df_1$height_of_receiver_off_bottom) ### Determining recovery rate based on the number of detections logged and an average of 60 transmissions per tag per hour detection_df_1$recovery_rate = detection_df_1$detections / 60 # dim(detection_df_1) # 10824 26 #### Statistical Analysis - Modeling Detection Probability ---- ### Setting Model Parameters alpha = 0.05 transmissions_per_hour = 60 detection_threshold = (alpha * transmissions_per_hour) # 3 ### Fitting a GAM model global.gam_1 = gam( formula = detections ~ s(distance, k = 6) + s(day_night, bs = 're') + height_of_tag_off_bottom + height_of_receiver_off_bottom + s(direction, bs = "re") + s(mean_wspd, bs = "re") + s(mean_gst, bs = "re") + s(water_level, bs = "re"), data = detection_df_1, keepData = TRUE, family = poisson(link = 'log'), na.action = "na.fail") global_gam.summary_1 = summary(global.gam_1) ## R sqr value from GAM global_gam.summary_1$r.sq # r.sq = 0.6465411 ## Checking GAM smoother terms are appropriate gam.check(global.gam_1) ### Using Dredge function to compare this GAM to all possible combinations of variables ## exporting mod.gam and data to cluster for using pdredge function clusterExport(clust, c('global.gam_1', 'detection_df_1')) mod.dredged_1 = pdredge(global.gam_1, cluster = clust) ### Comparing predictions for various canadate models from dredge that are within delta 2 of the lowest AICc candidate_models_1 = get.models(mod.dredged_1, subset = delta <= 2) # length(candidate_models_1) # 8 #### Making Figures and Tables ---- ## Setting up repository for results table_1 = data.frame(NULL) ## Looping through different combinations of predictor variables for(time_of_day in unique(detection_df_1$day_night)){ for(receiver_height in as.numeric(levels(unique(detection_df_1$height_of_receiver_off_bottom))[unique(detection_df_1$height_of_receiver_off_bottom)])){ for(tag_height in as.numeric(levels(unique(detection_df_1$height_of_tag_off_bottom))[unique(detection_df_1$height_of_tag_off_bottom)])){ for(tide_direction in unique(detection_df_1$direction)){ ## A Master list of all predictor variables from global model to be used for predicting with candidate models ## Updated with values from loops var_predictors = list( day_night = time_of_day, height_of_tag_off_bottom = tag_height, height_of_receiver_off_bottom = receiver_height, mean_wspd = median(detection_df_1$mean_wspd), mean_gst = median(detection_df_1$mean_gst), direction = tide_direction, water_level = median(detection_df_1$water_level) ) ## Setting up output dataframes for summary statistics and predictions from candidate models candidate_model_summary_1 = as.data.frame(NULL) candidate_model_predictions_1 = list(NULL) ## Looping through candidate models for(i in 1:length(candidate_models_1)){ candidate_model_id = paste('model', names(candidate_models_1)[i], sep = '_') candidate_model = candidate_models_1[[i]] ## Subsetting terms from master list of predictors from global model if they appear as model terms in the candidate model candidate_model_terms = attr(candidate_model$terms, which = 'term.labels') new_mod_data = var_predictors[names(var_predictors) %in% candidate_model_terms] ### Predicting detection rates using the candidate model under evaluation predicted_rates = c() predicted_error_fits.fit = c() predicted_error_fits.se = c() for(dist in 0:1000){ new_mod_data$distance = dist predicted_rates = c(predicted_rates, predict(candidate_model, newdata = new_mod_data, type = "response")[[1]]) predicted_error_fits.fit = c(predicted_error_fits.fit, predict(candidate_model, newdata = new_mod_data, type = "response", se.fit = TRUE)$fit[[1]]) predicted_error_fits.se = c(predicted_error_fits.se, predict(candidate_model, newdata = new_mod_data, type = "response", se.fit = TRUE)$se.fit[[1]]) } ## Saving model predictions model_predictions = list() model_predictions$predicted_rates = predicted_rates model_predictions$predicted_error_fits.fit = predicted_error_fits.fit model_predictions$predicted_error_fits.se = predicted_error_fits.se candidate_model_predictions_1[[i]] = model_predictions ### Summary Stats ## Determining the average maximum detection radius ave_max_distance = 500+(which(abs(predicted_rates[500:1000] - detection_threshold) == min(abs(predicted_rates[500:1000] - detection_threshold))))-1 #subtract 1 because predictions start at 0 but indexing starts at 1 1 ## Determining CPDI Extent #cpdi_extent = which.max(round(predicted_rates, digits = 0))-1 # subtract 1 because predictions start at 0, while indexing starts at 1 ## Changing our CPDI Criterion max_rate_index = which.max(predicted_rates) max_rate_minus_se = predicted_rates[max_rate_index] - predicted_error_fits.se[max_rate_index] all_rates_plus_se = predicted_rates[0:max_rate_index] + predicted_error_fits.se[0:max_rate_index] cpdi_extent = which.min(abs(max_rate_minus_se - all_rates_plus_se)) - 1 # subtract 1 because predictions start at 0, while indexing starts at 1 ## Determining the model's R^2 value r.sq = summary(candidate_model)$r.sq ## Determinning deviance explained by the model dev.exp = summary(candidate_model)$dev.expl ## Saving summary stats to to model summary dataframe candidate_model_summary_1 = rbind(candidate_model_summary_1, cbind(candidate_model_id, paste(candidate_model_terms, sep = ' ', collapse = ' '), r.sq, dev.exp, candidate_model$aic, ave_max_distance, cpdi_extent)) } colnames(candidate_model_summary_1) = c('model_id', 'model_terms', 'r.sq', 'dev.expl', 'aic', 'ave_max_distance', 'cpdi_extent') names(candidate_model_predictions_1) = as.character(candidate_model_summary_1$model_id) ## R sq range from candidate models rsq = fivenum(as.numeric(as.character(candidate_model_summary_1$r.sq))) # 0.6465015 0.6465026 0.6465209 0.6465408 0.6465411 ## Deviance explained by candidate models dev_explained = fivenum(as.numeric(as.character(candidate_model_summary_1$dev.expl))) ## Average maximum detection radius amdr = fivenum(as.numeric(as.character(candidate_model_summary_1$ave_max_distance))) ## CPDI extent from candidate models cpdi_extent = fivenum(as.numeric(as.character(candidate_model_summary_1$cpdi_extent))) ### Running Mechanistic CPDI model for receiver position cpdi_model = predict_cpdi_interference(bottom_depth = 300, ave_max_detection_radius = amdr[3], receiver_depth = (300 - receiver_height), speed_of_sound = 1530, max_horizontal_dist = 1000, evaluation_interval = 1, blanking_interval = .260, plot = FALSE) ## Getting CPDI prediction from model cpdi_prediction = min(which(cpdi_model[(300 - tag_height), ] == 1)) - 1 # because distances are indexed starting at 1 and not 0 print(cpdi_prediction) ## Writing output of all this to table print(cbind(receiver_height, tag_height, tide_direction, time_of_day, paste(amdr, collapse =", "), paste(cpdi_extent, collapse =", "), cpdi_prediction)) table_1 = rbind(table_1, cbind(receiver_height, tag_height, tide_direction, time_of_day, paste(amdr, collapse =", "), paste(cpdi_extent, collapse =", "), cpdi_prediction)) #### Plotting Mechanistic Model Prediction and GAM Fit prediction ## Setting directory to figure directory setwd(figure_dir) ## Subsetting data subset_prediction_data = detection_df_1[which(detection_df_1$height_of_receiver_off_bottom == receiver_height & detection_df_1$height_of_tag_off_bottom == tag_height & detection_df_1$direction == tide_direction & detection_df_1$day_night == time_of_day), ] ### Creating Figure image_name = paste('DWRT-RH', receiver_height, '-TH', tag_height, '-TD', tide_direction, '-TOD', time_of_day, '.png', sep = "") png(filename = image_name, width = 1000, height = 500) par(mfrow = c(1,2), oma = c(0, 0, 3, 0)) plot_heat_map(cpdi_model, save = FALSE, save_description = parameter_description_for_figure_name, main = "Mechanistic Model Prediction", compress_x_labels = 100, compress_y_labels = 50) ## Creating confidence intervals from predicted error fits upper_ci = predicted_rates + (2 * predicted_error_fits.se) lower_ci = predicted_rates - (2 * predicted_error_fits.se) ## Converting detections to recovery rate recovery_rates = predicted_rates / 60 ## Plotting Empty Model plot(x = subset_prediction_data$distance, y = subset_prediction_data$recovery_rate, ylim=c(0,1), main = 'GAM Criteron Estimation', xlab = 'Distance Between Tag and Receiver (m)', ylab = '% Transmissions Detected', col = 'black', pch = 19, cex = .25 ) ## Looping through model fits for(r in 1:length(candidate_model_predictions_1)){ predicted_recovery_rate = (candidate_model_predictions_1[[r]]$predicted_rates)/60 lines(x = 0:1000 ,y = predicted_recovery_rate) } ## Plotting individual data points points(x = subset_prediction_data$distance, y = subset_prediction_data$recovery_rate, pch = 19, cex = .25, col = 'black') ## Adding average maximum distance text max_distance = 500+(which(abs(round(predicted_rates[500:1000]) - detection_threshold) == min(abs(round(predicted_rates[500:1000]) - detection_threshold))))-1 #subtract 1 because predictions start at 0, not 1 max_distance_lower_ci = 500+(which(abs(lower_ci[500:1000] - detection_threshold) == min(abs(lower_ci[500:1000] - detection_threshold)))) # subtract 1 because predictions start at 0, not 1 max_distance_upper_ci = 500+(which(abs(upper_ci[500:1000] - detection_threshold) == min(abs(upper_ci[500:1000] - detection_threshold)))) # subtract 1 because predictions start at 0, not 1 ## Adding AMDR to plot points(x = amdr[3], y = .05, pch = 8) text(x = max_distance +50, y = .15, labels = paste('AMDR \n', max_distance-1, 'm'), col = 'black', cex = .75) ## Adding CPDI to plot if(cpdi_extent[3] != 0){ points(x = cpdi_extent[3], y = predicted_rates[cpdi_extent[3]], pch = 4) text(x = cpdi_extent[3], y = max(recovery_rates) + .1, labels = paste('CPDI Extent \n ', cpdi_extent[3], 'm'), cex = .75) } mtext('CPDI Predictions for Deep Water Ranging Experiment \n ', font = 2,outer = TRUE) mtext(paste(' \n Receiver Height =', receiver_height, 'm, Tag Height =', tag_height, 'm, Tide Direction =', tide_direction, 'Diurnal Period =', time_of_day),outer = TRUE) dev.off() # beep(1) } } } } ## Saving table_1 colnames(table_1) = c('Receiver Height (m)', 'Transmitter Height (m)', 'Tide Direction', 'Diurnal Period', 'Average Maximum Detection Radius (m)', 'CPDI Extent From GAM (m)', 'CPDI Prediction from Model (m)') setwd(results_dir) write.csv(table_1, 'CPDI_TABLE_1.csv') #### Analysis Cleanup ----- ### Saving workspace in results folder setwd(results_dir) save.image(file = 'Deep Water Range Test Results.R') ###### COMPONENT 1: Shallow Water Range Test ---- #### Loading In and Cleaning Data Files ---- setwd(data_dir) ### Loading Receiver Data receiver_data = load_receiver(filename = 'DEPLOYMENT_RECOVERY_LOG.csv') # dim(receiver_data) # 217 28 ### Loading VUE data vue_data = load_vemco('sand_island_range_test_nov_dec_2014.csv') # dim(vue_data) # 426181 10 ### Importing Windspeed Data from from NOAA Honolulu station Station ID 1612340 # http://www.ndbc.noaa.gov/view_text_file.php?filename=oouh1h2015.txt.gz&dir=data/historical/stdmet/ # parent: http://www.ndbc.noaa.gov/station_history.php?station=oouh1 # Data dates range between dates 2014-01-01 00:00:00 and 2014-12-31 23:54:00 # with the following format: # YY MM DD hh mm WDIR WSPD GST WVHT DPD APD MWD PRES ATMP WTMP DEWP VIS TIDE # with the following units: # yr mo dy hr mn degT m/s m/s m sec sec degT hPa degC degC degC mi ft wind_data = as.data.frame(read.table("oouh1h2014.txt"), header = TRUE, sep = " ") # dim(wind_data) # 85864 18 colnames(wind_data) = c("YY", "MM", "DD", "hh", "mm", "WDIR", "WSPD", "GST", "WVHT", "DPD", "APD", "MWD", "PRES", "ATMP", "WTMP", "DEWP", "VIS", "TIDE") ### Convert dates and times to POSIXct format. (Station data time is in local (UTC) time by default) wind_data$datetime = as.POSIXct(paste(wind_data$YY, "-", wind_data$MM, "-", wind_data$DD, " ", wind_data$hh, ":", wind_data$mm, sep = ""), format = "%Y-%m-%d %H:%M", tz = "GMT") ### Convert datetime to HST wind_data$datetime = strftime(wind_data$datetime, format = "%Y-%m-%d %H:%M") wind_data$hourly = strftime(wind_data$datetime, format = "%Y-%m-%d %H") ### Grouping windspeed data into hourly means (mean_wspd) wind_data_grouped = group_by(wind_data, hourly) wspd_by_hour = summarize(wind_data_grouped, mean_wspd = mean(WSPD)) wspd_by_hour$hourly = as.POSIXct(wspd_by_hour$hourly, format = "%Y-%m-%d %H") ### Grouping windgust data into hourly means gst_by_hour = summarize(wind_data_grouped, mean_gst = mean(GST)) gst_by_hour$hourly = as.POSIXct(gst_by_hour$hourly, format = "%Y-%m-%d %H") ### Importing tide data from: NOAA Honolulu station, accessed 27 Jan 2017 # http://tidesandcurrents.noaa.gov/waterlevels.html?id=1612340&units=standard&bdate=20110930&edate=20100930&timezone=LST&datum=MLLW&interval=h&action=## Meta data available here: # Data accessed for dates ranging between 2014-09-30 00:00:00 HST and 2015-09-30 13:00:00 HST # Data is already in HST tide_data = read.csv('noaa tide data 214-2015.csv') # dim(tide_data) # 8784 5 colnames(tide_data) = c("hour_bin", "water_level", 'sigma', 'I', "L") tide_data$hour_bin = as.POSIXct(tide_data$hour_bin, format = "%Y-%m-%d %H:%M") ### Determining if tide was going in or out. ## First assume all data is headed in tide_data$direction = "in" ## Then loop through data. If current water level is less than previous water level, tide is going out. for(i in 2:length(tide_data$direction)){ if(tide_data$water_level[i-1] > tide_data$water_level[i]){ tide_data$direction[i] = "out" } } tide_data$direction = as.factor(tide_data$direction) ### Importing Tag Deployment Meta Data File tag_meta_data = read.csv('Range Test Nov 2014 - Tag Meta Data Locations and Depth.csv') # 18 9 ### Converting Lat Lons from degree minutes to decimal degrees tag_meta_data$lat = convert_lat_lon(tag_meta_data$lat_deg, tag_meta_data$lat_min) tag_meta_data$lon = convert_lat_lon(tag_meta_data$lon_deg, tag_meta_data$lon_min) ### Determining distance of tag from receiver string using lon and lat waypoints from gps for (i in 1:length(tag_meta_data$distance)){ tag_meta_data$distance[i] = round(1000*(lldist(point1 = c(tag_meta_data$lon[i], tag_meta_data$lat[i]), point2 = c(receiver_data$lon[receiver_data$station_name == 'Range Test - Nov 2014 - Sand Island 1m'][1], receiver_data$lat[receiver_data$station_name == 'Range Test - Nov 2014 - Sand Island 1m'][1])))) } unique(tag_meta_data$distance) # 0 75 150 300 600 1200 ### Removing detections prior to experiment (occurred durring experiment set up on boat) and determining an end to the experiment. start_date = as.POSIXct("2014-11-21 10:15:00 HST") end_date = as.POSIXct("2014-12-02 07:22:00 HST") vue_data = vue_data[which(vue_data$datetime >= start_date & vue_data$datetime < end_date), ] ### Rounding off of final partial hour to account for transmissions received while gear was recovered vue_data = vue_data[which(vue_data$datetime >= ceiling_date(min(vue_data$datetime), unit = 'hour') & vue_data$datetime < floor_date(max(vue_data$datetime), unit = 'hour')), ] range(vue_data$datetime) # "2015-03-17 19:00:23 HST" "2015-03-25 10:58:53 HST" # dim(vue_data) # 423384 10 ### Grouping receivers by height condition (m of sea floor) rec_1m = c(103911, 110298, 110308) rec_7.5m = c(102200, 123736, 123732) rec_15m = c(123733, 104543, 123734) ## How many total detections occurred durring the course of the experiment? detections_all = dim(vue_data)[1] # 423384 ### Removing tags not in experiment. This could be a tagged fish that swam by or a false detection tags_0m = c(18255, 18257, 18256) tags_75m = c(18260, 18273, 18270) tags_150m = c(18265, 18259, 18269) tags_300m = c(18262, 18258, 18271) tags_600m = c(18261, 18275, 18274) tags_1200m = c(18272, 18268, 18263) tag_ids = c(tags_0m, tags_75m, tags_150m, tags_300m, tags_600m, tags_1200m) vue_data = vue_data[as.numeric(levels(vue_data$tag_id))[vue_data$tag_id] %in% tag_ids, ] ## How many detections were from tags that were part of the experiment? detections_experiment = dim(vue_data)[1] # Total detections of experimental tags = 421342 detections_experiment.percent = detections_experiment / detections_all * 100 # 99.5177 % ## How many detections were from tags that were not part of the experiment? detections_other = detections_all - detections_experiment # Total detections from other tags = 2042 detections_other.percent = detections_other / detections_all * 100 # 0.4823045 % ## Assigning Lon Lat positions to detections vue_data = clean_vue_lat_lon(vue_data_df = vue_data, receiver_data_df = receiver_data) ## Getting estimated distances between receivers and tags based on deployment coordinates for (i in 1:length(tag_meta_data$lon)){ tag_meta_data$distance[i] = lldist(point1 = c(unique(vue_data$lon)[1], unique(vue_data$lat)[1]), point2 = c((tag_meta_data$lon)[i], (tag_meta_data$lat)[i])) * 1000 # m/km } round(unique(tag_meta_data$distance)) # Distances are: 0 75 150 300 600 1200 #### Hourly Detection Data Analysis ---- ### Binning data by tag, receiver, and hour bin ## First assign each datum to an date and hour bin vue_data$hour_bin = floor_date(vue_data$datetime, unit = "hour") ## Then loop through combinations of date/hour, tag id, and receiver to get the number of detections from a tag at a receiver each hour detection_df_2 = data.frame() registerDoParallel(cores = n_cores) detection_df_2 = foreach(b = c(1:length(unique(vue_data$hour_bin))), .combine = rbind) %:% foreach(i = c(1:length(unique(vue_data$tag_id))), .combine = rbind) %:% foreach(r = c(1:length(unique(vue_data$receiver))), .combine = rbind) %dopar%{ filtered = filter(vue_data, hour_bin == unique(vue_data$hour_bin)[b], tag_id == unique(vue_data$tag_id)[i], receiver == unique(vue_data$receiver)[r]) write_line = cbind(as.character(unique(vue_data$hour_bin))[b], as.character(unique(vue_data$tag_id))[i], as.character(unique(vue_data$receiver))[r], dim(filtered)[1]) return(write_line) } ## Cleaning detection_df_2 detection_df_2 = as.data.frame(detection_df_2) colnames(detection_df_2) = c('hour_bin', 'tag_id', 'receiver', 'detections') # dim(detection_df_2) # 32760 4 detection_df_2$detections = as.numeric(as.character(detection_df_2$detections)) detection_df_2$day = as.factor(strftime(detection_df_2$hour_bin, format = "%Y-%m-%d")) detection_df_2$time = as.factor(strftime(detection_df_2$hour_bin, format = "%H:%M:%S")) detection_df_2$height_of_tag_off_bottom = 7.5 detection_df_2$height_of_tag_off_bottom = as.factor(detection_df_2$height_of_tag_off_bottom) detection_df_2$hour_bin = as.POSIXct(detection_df_2$hour_bin, format = "%Y-%m-%d %H:%M:%S") ### Combining detection dataframe with distance, bottom depth, height condition, lat, and lon tag_meta_data detection_df_2 = merge(detection_df_2, tag_meta_data, by.x = colnames(detection_df_2) == 'tag_id', by.y = colnames(tag_meta_data) == 'tag_id') ## Combining detection data with wind speed, and wind gust data detection_df_2 = merge(detection_df_2, wspd_by_hour, by.x = colnames(detection_df_2) == 'hour_bin', by.y = colnames(wspd_by_hour) == 'hourly') detection_df_2 = merge(detection_df_2, gst_by_hour, by.x = colnames(detection_df_2) == 'hour_bin', by.y = colnames(gst_by_hour) == 'hourly') detection_df_2 = merge(detection_df_2, tide_data, by.x = colnames(detection_df_2) == 'hour_bin', by.y = colnames(tide_data) == 'hour_bin') detection_df_2$hour_bin = as.factor(detection_df_2$hour_bin) ### Assigning Receivers to a height condition (height of sea floor) detection_df_2$height_of_receiver_off_bottom = NA detection_df_2$height_of_receiver_off_bottom[which(detection_df_2$receiver %in% rec_1m)] = 1 detection_df_2$height_of_receiver_off_bottom[which(detection_df_2$receiver %in% rec_7.5m)] = 7.5 detection_df_2$height_of_receiver_off_bottom[which(detection_df_2$receiver %in% rec_15m)] = 15 detection_df_2$height_of_receiver_off_bottom = as.factor(detection_df_2$height_of_receiver_off_bottom) ### Assigning diurnal period to detections based around 12 hour time bins. Before 6am or after 6pm considered night, between 6am and 6pm considered day detection_df_2$day_night = 'night' for(i in 1:length(detection_df_2$hour_bin)){ if(hour(detection_df_2$hour_bin[i]) < 18 & hour(detection_df_2$hour_bin[i]) > 6){ detection_df_2$day_night[i] = 'day' } } detection_df_2$day_night = as.factor(detection_df_2$day_night) ### Determining recovery rate based on the number of detections logged and an average of 60 transmissions per tag per hour detection_df_2$recovery_rate = detection_df_2$detections / 60 # dim(detection_df_2) # 32760 27 #### Statistical Analysis - Modeling Detection Probability ---- ### Setting Model Parameters alpha = 0.05 transmissions_per_hour = 60 detection_threshold = (alpha * transmissions_per_hour) # 3 ### Fitting a GAM model global.gam_2 = gam(formula = detections ~ s(distance, k = 5) + height_of_receiver_off_bottom + s(direction, bs = "re") + s(mean_wspd, bs = "re") + s(mean_gst, bs = "re") + s(water_level, bs = "re") + s(day_night, bs = "re"), # + s(receiver, bs = "re"), data = detection_df_2, family = poisson(link = 'log'), na.action = "na.fail") gam.summary_2 = summary(global.gam_2) ## R sqr value from gam gam.summary_2$r.sq # r.sq = 0.6870176 ## Checking GAM smoother terms are appropriate gam.check(global.gam_2) ### Using Dredge function to compare this GAM to all possible combinations of variables ## Exporting mod.gam and data to cluster for using pdredge function clusterExport(clust, c('global.gam_2', 'detection_df_2')) mod.dredged_2 = pdredge(global.gam_2, cluster = clust) ### Comparing predictions for various canadate models from dredge that are within delta 2 of the lowest AICc candidate_models_2 = get.models(mod.dredged_2, subset = delta <= 2) table_2 = data.frame(NULL) ## Looping through different combinations of predictor variables for(time_of_day in unique(detection_df_2$day_night)){ for(receiver_height in as.numeric(levels(unique(detection_df_2$height_of_receiver_off_bottom))[unique(detection_df_2$height_of_receiver_off_bottom)])){ for(tide_direction in unique(detection_df_2$direction)){ ## A Master list of all predictor variables from global model to be used for predicting with candidate models var_predictors = list( day_night = time_of_day, height_of_receiver_off_bottom = receiver_height, mean_wspd = median(detection_df_2$mean_wspd), mean_gst = median(detection_df_2$mean_gst), direction = tide_direction, water_level = median(detection_df_2$water_level) ) ## Setting up output dataframes for summary statistics and predictions from candidate models candidate_model_summary_2 = as.data.frame(NULL) candidate_model_predictions_2 = list() ## Looping through candidate models for(i in 1:length(candidate_models_2)){ candidate_model_id = paste('model', names(candidate_models_2)[i], sep = '_') candidate_model = candidate_models_2[[i]] ## Subsetting terms from master list of predictors from global model if they appear as model terms in the candidate model candidate_model_terms = attr(candidate_model$terms, which = 'term.labels') new_mod_data = var_predictors[names(var_predictors) %in% candidate_model_terms] ### Predicting detection rates using the candidate model under evaluation predicted_rates = c() predicted_error_fits.fit = c() predicted_error_fits.se = c() for(dist in 0:1200){ new_mod_data$distance = dist predicted_rates = c(predicted_rates, predict(candidate_model, newdata = new_mod_data, type = "response")[[1]]) predicted_error_fits.fit = c(predicted_error_fits.fit, predict(candidate_model, newdata = new_mod_data, type = "response", se.fit = TRUE)$fit[[1]]) predicted_error_fits.se = c(predicted_error_fits.se, predict(candidate_model, newdata = new_mod_data, type = "response", se.fit = TRUE)$se.fit[[1]]) } ## Saving model predictions model_predictions = list() model_predictions$predicted_rates = predicted_rates model_predictions$predicted_error_fits.fit = predicted_error_fits.fit model_predictions$predicted_error_fits.se = predicted_error_fits.se candidate_model_predictions_2[[i]] = model_predictions ### Summary Stats ## Determining the average maximum detection radius ave_max_distance = which(abs(predicted_rates - detection_threshold) == min(abs(predicted_rates - detection_threshold)))-1 #subtract 1 because predictions start at 0, not 1 ## Determining CPDI Extent #cpdi_extent = which.max(round(predicted_rates, digits = 0))-1 # subtract 1 because predictions start at 0, while indexing starts at 1 ## Changing our CPDI Criterion max_rate_index = which.max(predicted_rates) max_rate_minus_se = predicted_rates[max_rate_index] - predicted_error_fits.se[max_rate_index] all_rates_plus_se = predicted_rates[0:max_rate_index] + predicted_error_fits.se[0:max_rate_index] cpdi_extent = which.min(abs(max_rate_minus_se - all_rates_plus_se)) - 1 # subtract 1 because predictions start at 0, while indexing starts at 1 ## Determining difference between CPDI Extent and 0 cpdi_extent - model_predictions$predicted_rates ## Determining the model's R^2 value r.sq = summary(candidate_model)$r.sq ## Determining the deviance explained for the model dev.explained = summary(candidate_model)$dev.expl ## Saving summary stats to to model summary dataframe candidate_model_summary_2 = rbind(candidate_model_summary_2, cbind(as.character(candidate_model_id), paste(candidate_model_terms, sep = ' ', collapse = ' '), r.sq, dev.explained, candidate_model$aic, ave_max_distance, cpdi_extent)) } colnames(candidate_model_summary_2) = c('model_id', 'model_terms', 'r.sq', 'dev.expl', 'aic', 'ave_max_distance', 'cpdi_extent') names(candidate_model_predictions_2) = as.character(candidate_model_summary_2$model_id) ### R sq range from candidate models rsq = fivenum(as.numeric(as.character(candidate_model_summary_2$r.sq))) # 0.6883606 0.6883628 0.6883662 0.6883737 0.6883817 ### Deviance explained from candidate models dev_explained = fivenum(as.numeric(as.character(candidate_model_summary_2$dev.expl))) # 0.7266961 0.7266961 0.7266962 0.7266962 0.7266963 ### Average maximum detection radius amdr = fivenum(as.numeric(as.character(candidate_model_summary_2$ave_max_distance))) ### CPDI extent from candidate models cpdi_extent = fivenum(as.numeric(as.character(candidate_model_summary_2$cpdi_extent))) ### Running Mechanistic CPDI model for receiver position cpdi_model = predict_cpdi_interference(bottom_depth = 25, ave_max_detection_radius = amdr[3], receiver_depth = (25 - receiver_height), speed_of_sound = 1530, max_horizontal_dist = 1200, blanking_interval = .260, evaluation_interval = 1) ## Getting CPDI prediction from model cpdi_prediction = min(which(cpdi_model[(25 - receiver_height), ] == 1))-1 #### Plotting Mechanistic Model Prediction and GAM Fit prediction Figures ## Setting directory to output a figure of model setwd(figure_dir) ## Subsetting data to be only data used in fit subset_prediction_data = detection_df_2[which(detection_df_2$height_off_bottom == receiver_height & detection_df_2$direction == tide_direction & detection_df_2$day_night == time_of_day), ] ## Creating Figure image_name = paste('SWRT-RH', receiver_height, tide_direction, '-TOD', time_of_day, '.png', sep = "") # print(image_name) png(filename = image_name, width = 1000, height = 500) par(mfrow = c(1,2), oma = c(0, 0, 3, 0)) plot_heat_map(cpdi_model, save = FALSE, save_description = parameter_description_for_figure_name, main = "Mechanistic Model Prediction", compress_x_labels = 100, compress_y_labels = 5) ### Creating confidence intervals from predicted error fits upper_ci = predicted_rates + (2 * predicted_error_fits.se) lower_ci = predicted_rates - (2 * predicted_error_fits.se) ### Converting detections to recovery rate recovery_rates = predicted_rates / 60 ## Plotting Model Fit plot(x = subset_prediction_data$distance, y = subset_prediction_data$recovery_rate, ylim=c(0,1), xlim = c(0, 1200), main = 'GAM Criteron Estimation', xlab = 'Distance Between Tag and Receiver (m)', ylab = '% Transmissions Detected', col = 'black', pch = 19, cex = .25 ) ## Looping through model fits for(r in 1:length(candidate_model_predictions_2)){ predicted_recovery_rate = (candidate_model_predictions_2[[r]]$predicted_rates)/60 lines(x = 0:1200 ,y = predicted_recovery_rate) } ## Plotting individual data points points(x = subset_prediction_data$distance, y = subset_prediction_data$recovery_rate, pch = 19, cex = .25, col = 'black') ## Adding average maximum distance text max_distance = (which(abs(predicted_rates - detection_threshold) == min(abs(predicted_rates - detection_threshold))))-1 #subtract 1 because predictions start at 0, not 1 max_distance_lower_ci = (which(abs(lower_ci - detection_threshold) == min(abs(lower_ci - detection_threshold))))-1 # subtract 1 because predictions start at 0, not 1 max_distance_upper_ci = (which(abs(upper_ci - detection_threshold) == min(abs(upper_ci - detection_threshold))))-1 # subtract 1 because predictions start at 0, not 1 ## Adding AMDR to plot points(x = amdr[3], y = .05, pch = 8) text(x = max_distance +50, y = .15, labels = paste('AMDR \n', max_distance-1, 'm'), col = 'black', cex = .75) ## Adding CPDI to plot if(cpdi_extent[3] != 0){ points(x = cpdi_extent[3], predicted_rates[cpdi_extent[3]], pch = 4) text(x = cpdi_extent[3], y = max(recovery_rates) + .1, labels = paste('CPDI Extent \n ', cpdi_extent[3], 'm'), cex = .75) } mtext('CPDI Predictions for Shallow Water Ranging Experiment \n ', font = 2,outer = TRUE) mtext(paste(' \n Receiver Height =', receiver_height, 'm, Tag Height =', tag_height, 'm, Tide Direction =', tide_direction, 'Diurnal Period =', time_of_day),outer = TRUE) dev.off() ## Writing output of all this to table table_2 = rbind(table_2, cbind(receiver_height, tide_direction, time_of_day, paste(amdr, collapse =", "), paste(cpdi_extent, collapse =", "), cpdi_prediction)) # beep(1) } } } ## Saving table_2 colnames(table_2) = c('Receiver Height (m)', 'Tide Direction', 'Diurnal Period', 'Average Maximum Detection Radius (m)', 'CPDI Extent From GAM (m)', 'CPDI Prediction from Model (m)') setwd(results_dir) write.csv(table_2, 'CPDI_TABLE_2.csv') #### Analysis Cleanup ----- ### Saving workspace in results folder setwd(results_dir) save.image(file = 'Shallow Water Range Test Results.R') ###### COMPONENT 3: Depth Dependent Validation Experiment ---- #### Predicting CPDI / No CPDI for two different depth scenarios rec_50m = predict_cpdi_interference(bottom_depth = 50, ave_max_detection_radius = 846, receiver_depth = 49, speed_of_sound = 1530, max_horizontal_dist = 50, evaluation_interval = 1) plot_heat_map(rec_50m, save = TRUE, save_description = 'Component3_depth_dependent_50m.png') rec_215m = predict_cpdi_interference(bottom_depth = 215, ave_max_detection_radius = 846, receiver_depth = 214, speed_of_sound = 1530, max_horizontal_dist = 50, evaluation_interval = 1) plot_heat_map(rec_215m, save = TRUE, save_description = 'Component3_depth_dependent_215m.png') ## Because receiver for 215 m missed the mark, making sure predictions are the same for 212 m rec_212m = predict_cpdi_interference(bottom_depth = 212, ave_max_detection_radius = 846, receiver_depth = 211, speed_of_sound = 1530, max_horizontal_dist = 50, evaluation_interval = 1) plot_heat_map(rec_212m, save = TRUE, save_description = 'Component3_depth_dependent_212m.png') ##### Evaluating experiment results #### Loading In and Cleaning Data Files ---- setwd(data_dir) ### Loading Receiver Data receiver_data = load_receiver(filename = 'DEPLOYMENT_RECOVERY_LOG.csv') ### Loading VUE data vue_data = load_vemco(filename = 'VUE_Export_CPDI_Depth_Validation_Experiment_March_2015.csv') ### Loading in Receiver Meta Data Files metadata_212m = read.csv("VUE_Export_Rec_102202_metadata.csv") metadata_50m = read.csv("VUE_Export_Rec_110317_metadata.csv") ### Assigning receiver SNs and tag IDs to respective conditions receiver_50m = 110317 # serial number of receiver in 50 m depth receiver_212m = 102202 # serial number of receiver in 212 m depth tag_50m = 18275 tag_212m = 18274 tag_ids = c(tag_50m, tag_212m) ### Removing detections prior to deployment (occurred durring set up on boat) start_date = as.POSIXct('2015-03-17 17:40:00', tz = "HST") vue_data = vue_data[vue_data$datetime >= start_date, ] ### Removing detections after end of experiment ## Determining an end to the experiment - ## Receiver that was found floating off airport could have broken free at anytime. ## Because tag attached to that receiver (18275) was detected at the 212 m receiver, ## The last time this tag was detected is a stand in for when the receiver broke free. end_date = min(vue_data$datetime[which(vue_data$receiver == receiver_212m & vue_data$tag_id == tag_50m)]) vue_data = vue_data[vue_data$datetime < end_date, ] ## Removing detections before start of first full hour of test and after last full hour of test vue_data = vue_data[vue_data$datetime >= ceiling_date(min(vue_data$datetime), unit = "hour"), ] vue_data = vue_data[vue_data$datetime <= floor_date(max(vue_data$datetime), unit = "hour"), ] ## How many total detections of any tag occurred during the course of the experiment? detections_all = dim(vue_data)[1] # 12382 ### Removing detections of tags not associated with current experiment vue_data = vue_data[vue_data$tag_id %in% tag_ids, ] ## How many detections of only experiment tags? detections_experiment = dim(vue_data)[1] # 12381 detections_experiment.percent = detections_experiment / detections_all * 100 # 99.99192 % ## How many detections were from non-experiment tags? detections_other = detections_all - detections_experiment # 1 detections_other.percent = detections_other / detections_all * 100 # 0.00807624 #### Hourly Detection Data Analysis---- ### Binning Data by Hour ## Separating data by tag, receiver, and hour bin # First assign each datum to an date and hour bin vue_data$hour_bin = floor_date(vue_data$datetime, unit = "hour") # Then loop through combinations of date/hour, tag id, and receiver to get the number of detections from a tag at a receiver each hour detection_df_3 = data.frame() registerDoParallel(cores = n_cores) detection_df_3 = foreach(b = c(1:length(unique(vue_data$hour_bin))), .combine = rbind) %:% foreach(r = c(1:length(unique(vue_data$receiver))), .combine = rbind) %dopar%{ filtered = filter(vue_data, hour_bin == unique(vue_data$hour_bin)[b], receiver == unique(vue_data$receiver)[r]) write_line = cbind(as.character(unique(vue_data$hour_bin))[b], as.character(unique(vue_data$receiver))[r], dim(filtered)[1]) return(write_line) } detection_df_3 = as.data.frame(detection_df_3) colnames(detection_df_3) = c('hour_bin', 'receiver', 'detections') detection_df_3$detections = as.numeric(as.character(detection_df_3$detections)) ## Assigning depth condition based on receiver sn detection_df_3$depth = NA detection_df_3$depth[detection_df_3$receiver == receiver_50m] = '50 m' detection_df_3$depth[detection_df_3$receiver == receiver_212m] = '212 m' #### Statistical Analysis - Comparing Hourly Detections ---- #### Hourly Receiver Comparison ### ANOVA - Detection rates of receiver not lost for period before and after first receiver lost – p-value ### Comparing Number of Detections ## Shapiro Wilk test that distribution of each case are non-parametric (p-value) shapiro.test(detection_df_3$detections[detection_df_3$depth == '50 m']) # p = 0.02473, p < 0.05 reject null hypothesis that distribution is normal # Conclusion: Distribution is not normal shapiro.test(detection_df_3$detections[detection_df_3$depth == '212 m']) # p < 2.482e-13 * 10^-13, p < 0.05 reject null hypothesis that distribution is normal # Conclusion: Distribution is not normal ## Wilcox test - Selected to compare two groups (50m and 500m) non-parametrically wilcox.test(x = detection_df_3$detections[detection_df_3$depth == '50 m'], y = detection_df_3$detections[detection_df_3$depth == '212 m'], paired = TRUE) # p < 2.2 * 10^-16, p < 0.05 reject null hypothesis that difference between groups is zero # Conclusion Wilcoxon Sign rank test shows significant difference between two receiver conditions (p-value) # Ratio for Mean hourly detection rates for 50m receiver and 212 m receiver mean_detections_50m = mean(detection_df_3$detections[detection_df_3$depth == '50 m']) # 56.56989 mean_detections_212m = mean(detection_df_3$detections[detection_df_3$depth == '212 m']) # 9.994624 mean_detections_50m/mean_detections_212m # Receiver at 50 m has 5.660032 x as many detections as receiver in 212 m # Number of hours where deeper receiver detection rates exceeds shallow receiver total_hour_bins = length(unique(detection_df_3$hour_bin)) times_212m_exceeds_50m = 0 times_212m_equal_to_50m = 0 for(hour in unique(detection_df_3$hour_bin)){ subset_detection_df_3 = filter(detection_df_3, hour_bin == hour) if(subset_detection_df_3$detections[subset_detection_df_3$depth == '212 m'] > subset_detection_df_3$detections[subset_detection_df_3$depth == '50 m']){ times_212m_exceeds_50m = times_212m_exceeds_50m + 1 }else if(subset_detection_df_3$detections[subset_detection_df_3$depth == '212 m'] == subset_detection_df_3$detections[subset_detection_df_3$depth == '50 m']){ times_212m_equal_to_50m = times_212m_equal_to_50m + 1 } } times_212m_exceeds_50m # 0 times_212m_equal_to_50m # 0 #### Daily Meta Data Analysis ---- start_date = as.POSIXct('2015-3-19') #### Cleaning receiver meta data ## Receiver SN 102202 colnames(metadata_212m) = c('datetime', 'receiver', 'description', 'data', 'units') metadata_212m$datetime = as.POSIXct(metadata_212m$datetime) metadata_212m = metadata_212m[which(metadata_212m$datetime >= start_date & metadata_212m$datetime < floor_date(end_date, unit = "day")), ] metadata_212m$data = as.numeric(as.character(metadata_212m$data)) ## Receiver SN receiver_50m colnames(metadata_50m) = c('datetime', 'receiver', 'description', 'data', 'units') metadata_50m$datetime = as.POSIXct(metadata_50m$datetime) metadata_50m = metadata_50m[which(metadata_50m$datetime >= start_date & metadata_50m$datetime < floor_date(end_date, unit = "day")), ] metadata_50m$data = as.numeric(as.character(metadata_50m$data)) ### Creating daily dataframe with criteria: Pings, Syncs, Rejects, Detections metadata_df = data.frame() for(i in 1:length(unique(metadata_212m$datetime))){ ## Subset metadata log for receiver receiver_212m and write to dataframe subset_metadata = metadata_212m[metadata_212m$datetime == unique(metadata_212m$datetime)[i], ] metadata_df = rbind(metadata_df, cbind(as.character(subset_metadata$datetime[1]), 'receiver_212m', subset_metadata$data[subset_metadata$description == 'Daily Pings on 69 kHz'], subset_metadata$data[subset_metadata$description == 'Daily Syncs on 69 kHz'], subset_metadata$data[subset_metadata$description == 'Daily Detections on A69-1601'], subset_metadata$data[subset_metadata$description == 'Daily Rejects on 69 kHz'])) ## Subset metadata log for receiver receiver_50m and write to dataframe subset_metadata = metadata_50m[metadata_50m$datetime == unique(metadata_50m$datetime)[i], ] metadata_df = rbind(metadata_df, cbind(as.character(subset_metadata$datetime[1]), 'receiver_50m', subset_metadata$data[subset_metadata$description == 'Daily Pings on 69 kHz'], subset_metadata$data[subset_metadata$description == 'Daily Syncs on 69 kHz'], subset_metadata$data[subset_metadata$description == 'Daily Detections on A69-1601'], subset_metadata$data[subset_metadata$description == 'Daily Rejects on 69 kHz'])) } colnames(metadata_df) = c('date', 'receiver', 'pings', 'syncs', 'detections', 'rejections') ## Adding the following metrics based on Simpfendorfer et al 2008: Code Detection Efficiency (cde), rejection coefficient (rc), noise quotient (nq) metadata_df$cde = as.numeric(as.character(metadata_df$detections)) / as.numeric(as.character(metadata_df$syncs)) metadata_df$rc = as.numeric(as.character(metadata_df$rejections)) / as.numeric(as.character(metadata_df$syncs)) metadata_df$nq = as.numeric(as.character(metadata_df$pings)) - (as.numeric(as.character(metadata_df$syncs)) * 7) #### Statistical Analysis - Comparing Daily Detection Metrics ---- ### Comparing Code Detection Efficiency ## Shapiro Wilk Test of Code Detection Efficiency shapiro.test(metadata_df$cde[metadata_df$receiver == 'receiver_212m']) # p-value = 0.7799 shapiro.test(metadata_df$cde[metadata_df$receiver == 'receiver_50m']) # p-value = 0.002853 ## Paired Wilcoxon test for code detection efficiency metric for each receiver wilcox.test(x = metadata_df$cde[metadata_df$receiver == 'receiver_212m'], y = metadata_df$cde[metadata_df$receiver == 'receiver_50m'], paired = TRUE) # p-value = 0.03125 median(x = metadata_df$cde[metadata_df$receiver == 'receiver_212m']) # 0.0865077 median(x = metadata_df$cde[metadata_df$receiver == 'receiver_50m']) # 1 ### Comparing Rejection Coefficient ## Shapiro Wilk Test of Rejection Coefficient ### NOTE: Commmented out as cannot shapiro.test when all values are the same. rejection coefficent is 0 for all hour bins, therefore throws error when sourced # shapiro.test(metadata_df$rc[metadata_df$receiver == 'receiver_50m']) # Rejection Coefficient = 0. Cannot do a shapiro test. shapiro.test(metadata_df$rc[metadata_df$receiver == 'receiver_212m']) # p-value = 0.9427 ## Paired Wilcoxon test for rejection coefficient metric for each receiver. wilcox.test(x = metadata_df$rc[metadata_df$receiver == 'receiver_212m'], y = metadata_df$rc[metadata_df$receiver == 'receiver_50m'], paired = TRUE) # p-value = 0.03125 median(metadata_df$rc[metadata_df$receiver == 'receiver_212m']) # 0.01375137 median(metadata_df$rc[metadata_df$receiver == 'receiver_50m']) # 0.00 ### Comparing Noise Quotient ## Shapiro Wilk Test of Noise Quotient shapiro.test(metadata_df$nq[metadata_df$receiver == 'receiver_50m']) # p-value = 0.002315 shapiro.test(metadata_df$nq[metadata_df$receiver == 'receiver_212m']) # p-value = 0.25 ## Paired wilcoxon test for noise quotient metric for each receiver wilcox.test(x = metadata_df$nq[metadata_df$receiver == 'receiver_212m'], y = metadata_df$nq[metadata_df$receiver == 'receiver_50m'], paired = TRUE) # p-value = 0.03125 median(metadata_df$nq[metadata_df$receiver == 'receiver_212m']) # -13793.5 median(metadata_df$nq[metadata_df$receiver == 'receiver_50m']) # 1356 #### Analysis Cleanup ----- ### Saving workspace in results folderb setwd(results_dir) save.image(file = 'Depth Validation Test Results.R') ###### COMPONENT 3: Depth and Distance Validation Experiment ---- #### Loading In and Cleaning Data Files ############# setwd(data_dir) ### Loading Receiver Data receiver_data = load_receiver(filename = 'DEPLOYMENT_RECOVERY_LOG.csv') ### Loading VUE data vue_data = load_vemco(filename = 'VUE_Export_Depth_and_Distance_Validation_Experiment_June_2015.csv') ### Loading receiver metadata logs metadata_receiver_60m = read.csv('VUE_Export_Rec_123739_metadata.csv') metadata_receiver_508m = read.csv('VUE_Export_Rec_102199_metadata.csv') ### Cleaning Vue Data ## Assigning receiver SNs and tag IDs to respective conditions receiver_60m = 123739 # serial number of receiver in 50 m depth receiver_508m = 102199 # serial number of receiver in 212 m depth tag_ids = c(46915, 46912, 46913) ## Removing detections prior to deployment (occurred durring set up on boat) and after equipment recovered start_date = as.POSIXct('2015-05-24 19:12:25', tz = "HST") end_date = as.POSIXct('2015-05-30 09:46:00', tz = 'HST') vue_data = vue_data[which(vue_data$datetime >= start_date & vue_data$datetime < end_date), ] ## Removing detections from partial hour bins, aka hour equipment was deployed and hour it was recovered before first full hour of test vue_data = vue_data[which(vue_data$datetime >= ceiling_date(min(vue_data$datetime), unit = "hour") & vue_data$datetime <= floor_date(max(vue_data$datetime), unit = "hour")), ] ## How many total tag detections were there? detections_all = dim(vue_data)[1] # Total detections of all tags = 1699 ## Removing detections of tags not associated with current experiment vue_data = vue_data[vue_data$tag_id %in% tag_ids, ] detections_experiment = dim(vue_data)[1] # Total detections of experimental tags = 1669 ## What was the percentage of tag detections from experiment tags detections_experiment.percent = detections_experiment / detections_all * 100 # 98.23426 % ## What was the number of logged detections from other tags detections_other = detections_all - detections_experiment # Total detections from other tags = 30 detections_other.percent = detections_other / detections_all * 100 # 1.765745 #### Hourly Detection Data Analysis ---- ### Creating a dataframe (detection_df_4), to store results binned by hour ## Separating data by tag, receiver, and hour bin ## First assign each datum to an date and hour bin vue_data$hour_bin = floor_date(vue_data$datetime, unit = "hour") ## Then loop through combinations of date/hour, tag id, and receiver to get the number of detections from a tag at a receiver each hour detection_df_4 = data.frame() registerDoParallel(cores = n_cores) detection_df_4 = foreach(b = c(1:length(unique(vue_data$hour_bin))), .combine = rbind) %:% foreach(r = c(1:length(unique(vue_data$receiver))), .combine = rbind) %dopar%{ filtered = filter(vue_data, hour_bin == unique(vue_data$hour_bin)[b], receiver == unique(vue_data$receiver)[r]) write_line = cbind(as.character(unique(vue_data$hour_bin))[b], as.character(unique(vue_data$receiver))[r], dim(filtered)[1]) return(write_line) } detection_df_4 = as.data.frame(detection_df_4) colnames(detection_df_4) = c('hour_bin', 'receiver', 'detections') detection_df_4$detections = as.numeric(as.character(detection_df_4$detections)) ## Assigning distance condition based on receiver sn detection_df_4$distance = NA detection_df_4$distance[detection_df_4$receiver == receiver_60m] = '60 m' detection_df_4$distance[detection_df_4$receiver == receiver_508m] = '512 m' #### Statistical Analysis - Comparing Hourly Detections ---- ### Hourly Receiver Comparison ## Shapiro Wilk test that distribution of each case are non-parametric (p-value) shapiro.test(detection_df_4$detections[detection_df_4$distance == '60 m']) # p = 0.00151, p < 0.05 reject null hypothesis that distribution is normal # Conclusion: Distribution is not normal shapiro.test(detection_df_4$detections[detection_df_4$distance == '512 m']) # p < 4.738e-09, p < 0.05 reject null hypothesis that distribution is normal # Conclusion: Distribution is not normal ## Wilcox test - Selected to compare two groups (60m and 500m) non-parametrically wilcox.test(x = detection_df_4$detections[detection_df_4$distance == '60 m'], y = detection_df_4$detections[detection_df_4$distance == '512 m'], paired = TRUE) # p-value < 2.2e-16 reject null hypothesis that difference between groups is zero # Conclusion: The two receivers logged signifcantly different detections from one another ## Ratio for Mean hourly detection rates for 60m receiver and 512 m receiver mean_detections_60m = mean(detection_df_4$detections[detection_df_4$distance == '60 m']) # 4.879699 mean_detections_508m = mean(detection_df_4$detections[detection_df_4$distance == '512 m']) # 7.669173 mean_detections_508m/mean_detections_60m # receiver at 508m has 1.571649 x as many detections as receiver in 60 m ## Number of hours where deeper receiver detection rates exceeds shallow receiver total_hour_bins = length(unique(detection_df_4$hour_bin)) times_508m_exceeds_60m = 0 times_508m_equal_to_60m = 0 for(hour in unique(detection_df_4$hour_bin)){ subset_detection_df_4 = filter(detection_df_4, hour_bin == hour) if(subset_detection_df_4$detections[subset_detection_df_4$distance == '512 m'] > subset_detection_df_4$detections[subset_detection_df_4$distance == '60 m']){ times_508m_exceeds_60m = times_508m_exceeds_60m + 1 }else if(subset_detection_df_4$detections[subset_detection_df_4$distance == '512 m'] == subset_detection_df_4$detections[subset_detection_df_4$distance == '60 m']){ times_508m_equal_to_60m = times_508m_equal_to_60m + 1 } } times_508m_exceeds_60m # 107 times_508m_equal_to_60m # 9 #### Analysis Cleanup ----- ### Saving workspace in results folder setwd(results_dir) save.image(file = 'Depth and Distance Validation Test Results.R') ###### COMPONENT 4: Multipath Confirmation - Tank Experiment ---- ## Analysis Ideas # Break up analysis into three groups. # 1. Control # 2. Multipath (CPDI Not Expexted) # 3. Multipath (CPDI Expected) ## Anna's helpful analysis ideas: # 1. Create table Event (single event would be do, 2a, 150m, 15m) # 2. Catagorize each event (control, multipath-no CPDI, multipath - CPDI) # 3. Code by type (CPDI predicted vs. not predicted) # 4. Detected - number of actual detections at receiver # Flip to wide table # columns - # 1. Event # 2. Type # 3. Detected # 6. Fit GLM (Detected ~ type + event, family = binomial) #### Loading in and Cleaning Data Files ---- setwd(data_dir) ### Importing VR100 data vr100_raw = read.csv(file.path(data_dir, 'VR100_10320_D2016.07.15T20.49.23.csv')) ### Importing VR2 data from VUE vr2 = load_vemco(file.path(data_dir, 'VUE_Export_CPDI_Tank_Test.csv')) ### Importing meta data regarding when transmissions were sent recorded from Brendan's laptop durring experiment condStoreDM = read.csv(paste(data_dir, 'condStoreDM.csv', sep = ""), header = FALSE) condStoreDO = read.csv(paste(data_dir, 'condStoreDO.csv', sep = ""), header = FALSE) ### Creating vr100 datafame for manipulation vr100 = vr100_raw vr100$datetime = as.POSIXct(paste(vr100$Date, vr100$Time, sep = " "), format = "%Y-%m-%d %H:%M:%S", tz = "GMT") attr(vr100$datetime, "tzone") <- "HST" colnames(vr100) = c("date", "time", "chan", "freq", "type", "code_space", "tag_id", "serial_number", "data1", "units1", "slope1", "init1", "data2", "units2", "slope2", "int2", "signal_db", "gain_mode", "gain_db", "lat", "lon", "comment", "datetime") # creating vr2 dataframe for manipulation vr2$datetime = as.POSIXct(vr2$datetime, tz = "GMT") attr(vr2$datetime, "tzone") <- "GMT" vr2 = vr2[vr2$datetime >= as.POSIXct("2016-07-12 16:30:00"), ] ### Creating dataframes for direct and multipath conditions colnames(condStoreDO) = c('pluse_train_time', 'tag_range', 'tag_depth', 'relative_arrival_time_0','case_number', 'sub_case_number') colnames(condStoreDM)[1:3] = c('pluse_train_time', 'tag_range', 'tag_depth') colnames(condStoreDM)[4:23] = paste('relative_arrival_time_', 0:19, sep = "") colnames(condStoreDM)[24:25] = c('case_number', 'sub_case_number') condStoreDO$dm = 'direct only' # designates whether multipath or not condStoreDM$dm = 'multipath' # designates whether multipath or not ### Predicting whether multipaths produced CPDI effects ## DO conditions only have a direct signal, all should be detected by receivers. thus they are marked as control condStoreDO$cpdi_type = 'Control' ## To determine if multipath signals cause CPDI, we compare the relative arrival times of the multipath signals to the blanking interval (0.260 seconds). condStoreDM$cpdi_type = 'multipath_no_cpdi_predicted' for(i in 1:dim(condStoreDM)[1]){ if(any(condStoreDM[i,4:23] != 0 & condStoreDM[i,4:23] > 0.260)){ condStoreDM$cpdi_type[i] = 'multipath_cpdi_predicted' } } ### Combining direct and multipath condition dataframes into a single dataframe ## condStoreDO to do this, condStoreDO needs to be padded out to be same size as DM. This meens adding 20 additional columns condStoreDO = cbind(condStoreDO[ ,1:4], matrix(data = NA, nrow = dim(condStoreDO)[1], ncol = 19), condStoreDO[ ,5:8]) colnames(condStoreDO)[5:23] = paste('relative_arrival_time_', 1:19, sep = "") ## Combining dataframes condition = data.frame(rbind(condStoreDO, condStoreDM)) # Combining into condition dataframe ## Converting matlab date nubmers to POSIX datetime objects condition$datetime = datenum2posix(condition$pluse_train_time) ### Ordering dataframes by increasing time of detection condition = condition[order(condition$datetime), ] # sorting by time vr2 = vr2[sort.list(x = vr2$datetime, decreasing = FALSE), ] vr100 = vr100[sort.list(x = vr100$datetime, decreasing = FALSE), ] rownames(vr100) = NULL rownames(vr2) = NULL ### Converting vr2$receiver and vr2$tag_id to numeric vr2$receiver = as.numeric(levels(vr2$receiver)[vr2$receiver]) vr2$tag_id = as.numeric(levels(vr2$tag_id)[vr2$tag_id]) ### Pulling in all syncs to see if there is major drift in receivers/vr100 clocks vr100_syncs = vr100[which(vr100$tag_id == 36817), ] vr2_syncs = vr2[which(vr2$tag_id == 36817), ] sync_data = data.frame(rbind(cbind(as.character(vr100_syncs$datetime), vr100_syncs$tag_id, 'vr100', vr100_syncs$signal_db), cbind(as.character(vr2_syncs$datetime), vr2_syncs$tag_id, vr2_syncs$receiver, NA))) colnames(sync_data) = c("datetime", "tag_id", "receiver", "signal_db") sync_data = sync_data[order(as.POSIXct(sync_data$datetime)), ] ### Removing tag detections prior to experiment day remove_prior_dates = as.POSIXct("2016-07-12 10:31:0 HST") vr2 = vr2[vr2$datetime >= remove_prior_dates, ] vr100 = vr100[vr100$datetime >= remove_prior_dates, ] ### Checking sync tag detected same number of times length(which(vr100$tag_id == 36817)) # 1 length(which(vr2$tag_id == 36817)) # 1 ### Checking which receiver were part of experiment unique(vr2$receiver) # 129577 ### Checking which tags were logged on receiver unique(vr2$tag_id) # 36814 36817 18266 47515 unique(vr100$tag_id) # 18266 47515 36814 36817 ### Separating two receivers ## For VR100 vr100 = vr100[max(which(vr100$tag_id == 36817)):dim(vr100)[1], ] ### Adjusting vr100 dates to sync with vr2 dates. Used test tag on VR100 and VR2 in the lab for one detection # VR2 detected tag at "2016-03-16 00:14:16 GMT" # VR100 detected tag at"2016-03-16 00:13:40 GMT" ## Time offset between VR2 and VR100 vr2_vr100_time_diff = difftime(time1 = max(vr100$datetime[vr100$tag_id == 36817]), time2 = min(vr2$datetime[vr2$tag_id == 36817])) # Time difference of -1 secs vr100$datetime = vr100$datetime - vr2_vr100_time_diff all_data = data.frame(matrix(data = 0, nrow = dim(vr2)[1] + dim(vr100)[1], ncol = 4)) colnames(all_data) = c("datetime", "tag_id", "receiver", "signal_db") all_data$datetime = c(vr2$datetime, vr100$datetime) all_data$tag_id = c(vr2$tag_id, vr100$tag_id) all_data$receiver = c(vr2$receiver, rep('vr100', dim(vr100)[1])) all_data$signal_db = c(rep(NA, length(vr2$receiver)), vr100$signal_db) ## Ordering by time all_data = all_data[order(all_data$datetime), ] rownames(all_data) = NULL all_data$diff_time = 0 for(i in 2:length(all_data$diff_time)){ all_data$diff_time[i] = difftime(all_data$datetime[i], all_data$datetime[i-1], units = 'secs') } all_data$exp = NA ## Determining where experiments begin and end all_data$exp[all_data$tag_id == 36817] = 'Sync' all_data = all_data[all_data$datetime >= min(all_data$datetime[all_data$tag_id == 18266]), ] ## How many detections did each receiver log? print(paste('Receiver 129577 detected', dim(all_data[which(all_data$receiver == 129577 & all_data$tag_id != 18266), ])[1], 'transmissions', sep = " ")) # Receiver 129577 detected 594 transmissions" print(paste('The VR100 detected', dim(all_data[all_data$receiver == 'vr100', ])[1], 'transmissions', sep = " ")) # The VR100 detected 929 transmissions ### Matching conditions to recorded detection data (all_data) rownames(all_data) = NULL all_data$tag_range = NA all_data$tag_depth = NA all_data$case_number = NA all_data$sub_case_number = NA all_data$dm = NA all_data$cpdi_type = NA all_data$datetime = all_data$datetime - 6 # Adjusted this by comparing when the first test started ~18:58 on all data to first transmission sent in condition for(i in 1:length(all_data$datetime)){ for(r in 1:length(condition$datetime)){ # If a detection occurred within an 8 second window if(all_data$datetime[i] - 4 <= condition$datetime[r] & all_data$datetime[i] + 4 >= condition$datetime[r]){ all_data$tag_range[i] = condition$tag_range[r] all_data$tag_depth[i] = condition$tag_depth[r] all_data$case_number[i] = condition$case_number[r] all_data$sub_case_number[i] = condition$sub_case_number[r] all_data$dm[i] = condition$dm[r] all_data$cpdi_type[i] = condition$cpdi_type[r] } } } ## Changing all_data$sub_case_number from an integer to a character to match other formats all_data$sub_case_number = as.character(all_data$sub_case_number) all_data$sub_case_number[all_data$sub_case_number == '1'] = 'a' all_data$sub_case_number[all_data$sub_case_number == '2'] = 'b' all_data$sub_case_number[all_data$sub_case_number == '3'] = 'c' all_data$sub_case_number[all_data$sub_case_number == '0'] = '' all_data$exp = paste(all_data$dm, all_data$case_number, all_data$sub_case_number, sep = "") ### Creating a new dataframe for noting if each transmission was detected and by what (VR2 or vr100) exp_df = as.data.frame(matrix(0, nrow = length(condition$datetime), ncol = 3)) colnames(exp_df) = c('datetime', 'v129577', 'vr100') exp_df$datetime = condition$datetime for(i in 1:length(all_data$datetime)){ for(r in 1:length(exp_df$datetime)){ if(all_data$datetime[i]-2 <= exp_df$datetime[r] & all_data$datetime[i]+2 >= exp_df$datetime[r]){ if(all_data$receiver[i] == 129577){ exp_df$v129577[r] = 1 }else if(all_data$receiver[i] == 'vr100'){ exp_df$vr100[r] = 1 } } } } detection_df_5 = melt(exp_df, id = c('datetime')) model_data = merge(detection_df_5, condition[ ,c('tag_range', 'tag_depth', 'case_number', 'sub_case_number', 'dm', 'cpdi_type', 'datetime')], by = intersect(names(exp_df),names(condition[ ,c('tag_range', 'tag_depth', 'case_number', 'sub_case_number', 'dm', 'cpdi_type', 'datetime')]))) colnames(model_data)[c(2, 3)] = c('receiver', 'detected') model_data$sub_case_number[model_data$sub_case_number == 0] = 1 model_data$sub_case_number = letters[model_data$sub_case_number] model_data = model_data[model_data$receiver != 'vr100', ] # removing VR100 detections since we're not evaluation efficiency of VR100. ## Need to adjust experiment 4 as its currently one case for both receiver conditions. model_data$sub_case_number[model_data$case_number == 4 & model_data$tag_range == 508] = 'b' #### Constructing a GLM Model for detection ### Data setup # 1. Create table Event (single event would be do, 2a, 150m, 15m) event_table = model_data # 2. Determining detection for predictions based on prediction type (1 if detection is predicted, 0 if not) event_table$event = paste(event_table$case_number, event_table$sub_case_number, sep = "") event_table$predicted_outcome = NA event_table$predicted_outcome[event_table$cpdi_type == 'multipath_cpdi_predicted' ] = 0 event_table$predicted_outcome[event_table$cpdi_type == 'Control' ] = 1 event_table$predicted_outcome[event_table$cpdi_type == 'multipath_no_cpdi_predicted' ] = 1 # 3. Renaming variables for model fit colnames(event_table)[which(colnames(event_table) == 'receiver')] = 'predicted_observed' colnames(event_table)[which(colnames(event_table) == 'cpdi_type')] = 'cpdi_prediction' event_table$cpdi_prediction = as.factor(event_table$cpdi_prediction) colnames(event_table)[which(colnames(event_table) == 'event')] = 'experiment_analgoue' event_table$experiment_analgoue = as.factor(event_table$experiment_analgoue) ### Fitting GLMs to data ## Glm fit models whether transmission was detected as a function of our model's prediction cpdi_glm.interaction = glm(detected ~ cpdi_prediction + experiment_analgoue + cpdi_prediction*experiment_analgoue, data = event_table, family = binomial) summary(cpdi_glm.interaction) table_3a = summary(cpdi_glm.interaction)$coefficients write.csv(table_3a, file = file.path(results_dir, 'table_3a.csv')) ## removing non-significant interaction terms cpdi_glm.no_interaction = glm(detected ~ cpdi_prediction + experiment_analgoue , data = event_table, family = binomial) summary(cpdi_glm.no_interaction) table_3b = summary(cpdi_glm.no_interaction)$coefficients write.csv(table_3b, file = file.path(results_dir, 'table_3b.csv')) ### How many times did observations deviate from predictions? ## With control transmissions sum(abs(event_table$predicted_outcome - event_table$detected)) length(event_table$predicted_outcome) ## Without control transmissions sum(abs(event_table$predicted_outcome[event_table$cpdi_prediction != "Control"] - event_table$detected[event_table$cpdi_prediction != "Control"])) length(event_table$predicted_outcome[event_table$cpdi_prediction != "Control"]) #### Model Diagnostics performed with Anna on 26 May 2017 library(MuMIn) ## Dredging model to determine most parsimonious model options(na.action=na.fail) dredge(cpdi_glm.no_interaction, extra="R^2") resp<-event_table[, "detected"] pred<-event_table[, c("cpdi_prediction", "experiment_analgoue")] ## Looking at model residuals plot(cpdi_glm.no_interaction) ## Comparing model to data library(car) marginalModelPlot(cpdi_glm.no_interaction) resp<-event_table[event_table$experiment_analgoue != '3b', "detected"] pred<-event_table[event_table$experiment_analgoue != '3b', c("cpdi_prediction", "experiment_analgoue")] ## Which model term explains how much variation? library(hier.part) hier.part(resp, pred, family=binomial) ## Tukey Contrasts between experimental analogue groups library('multcomp') tukey_comparison = glht(cpdi_glm.no_interaction, mcp(experiment_analgoue="Tukey")) plot(tukey_comparison) class(event_table$experiment_analgoue) event_table$experiment_analgoue = as.factor(event_table$experiment_analgoue) event_table$cpdi_prediction = as.factor(event_table$cpdi_prediction) ## Visualizing differences in glm factors library(visreg) visreg(cpdi_glm.no_interaction) dredge(cpdi_glm.interaction) anova(cpdi_glm.no_interaction) dredge(cpdi_glm.interaction) .8*64 ## Comparing glm with interaction to glm without interaction term anova(cpdi_glm.no_interaction, cpdi_glm.interaction, test="Chisq") ## Comparing best glm by stepwise addition of terms anova(cpdi_glm.no_interaction, test="Chisq") ## What happens if we pull out control cases that we know failed? cpdi_glm.no_interaction = glm(detected ~ cpdi_prediction + experiment_analgoue, data = subset(event_table, experiment_analgoue !="3a"), family = binomial) dredge(cpdi_glm.no_interaction) # Best model still includes both terms plot(cpdi_glm.no_interaction) # Checking out residuals # Is there equal variance across experimental analogues? plot(event_table$experiment_analgoue[event_table$experiment_analgoue != "3a"], residuals(cpdi_glm.no_interaction)) ## What happens if we only remove 2b cpdi_glm.no_interaction = glm(detected ~ cpdi_prediction + experiment_analgoue, data = subset(event_table, experiment_analgoue !="2b"), family = binomial) dredge(cpdi_glm.no_interaction) summary(cpdi_glm.no_interaction) # Best model still includes both terms visreg(cpdi_glm.no_interaction, scale="response") # Visualizing the differences in GLM factors #### Analysis Cleanup setwd(results_dir) save.image(file = 'HIMB Multipath Tank Test Results.R') ###### Miscelanious Plots and Figures #### Figure 2. Direct and First Multipath arrival comparision ### Setting up data repository and determining distance to calculate over direct_arrival_times = c() first_multipath_arrival_times = c() calc_distance = 2000 simulated_sound_speed = 1530 ### Running arrival simulation for(dist in 1:calc_distance){ arrival_times = get_surf_paths(surface_to_tag_distance = 250, surface_to_receiver_distance = 250, tag_to_receiver_horizontal_distance = dist, bottom_depth = 500, speed_of_sound = simulated_sound_speed, ave_max_detection_radius = 100000) direct_arrival_times = c(direct_arrival_times, arrival_times[1]) first_multipath_arrival_times = c(first_multipath_arrival_times, arrival_times[2]) } ### Plot Set-up setwd(figure_dir) png('Figure 2 - Direct and multipath arrival times.png', width = 2000, height = 1000) mar.default <- c(5,4,4,2) + 0.1 par(mar = mar.default + c(0, 4, 0, 0)) ### Plotting plot(y = direct_arrival_times, x = 1:calc_distance, type = 'l', cex = 2, cex.lab = 2, cex.axis = 2, # main = 'Arrival Time of Direct and First Surface Reflected Multipath \n # Receiver Depth 250 m, Transmitter Depth = 250 m', ylab = 'Arrival Time (s)', xlab = 'Distance Between Tag and Receiver (m)', font = 1) lines(y = first_multipath_arrival_times, x = 1:calc_distance, type = 'l', lty = 4) dev.off() #### Figure 5. Comparing Deep and Shallow Water Detection Profiles ### Plot Set-up png('Figure 5 - Comparing deep and shallow detection profiles.png', width = 2400, height = 1200) mar.default <- c(5,4,4,2) + 0.1 par(mfrow = c(1, 2), mar = mar.default + c(0, 4, 0, 0)) ### Plotting Deep Water Ranging Experiment Results exp_1_data_to_plot = detection_df_1[which(detection_df_1$height_of_tag_off_bottom == 1 & detection_df_1$height_of_receiver_off_bottom == 1 & detection_df_1$direction == "In" & detection_df_1$day_night == "Day"), ] plot((candidate_model_predictions_1$model_119$predicted_rates/60) ~ c(0:1000), type = 'l', xlim = c(0, 1000), xlab = "Distance Between Tag and Receiver (m)", ylim = c(0, 1), ylab = "% of Transmissions Detected", cex = 2, cex.lab = 2, cex.axis = 2, main = "Deep Water Ranging Experiment") points((exp_1_data_to_plot$detections/60) ~ exp_1_data_to_plot$distance, pch = 19, cex = 2) ## Adding AMDR to plot points(x = levels(candidate_model_summary_1$ave_max_distance[1])[candidate_model_summary_1$ave_max_distance[1]], y = .05, pch = 8, cex = 2) text(x = as.numeric(levels(candidate_model_summary_1$ave_max_distance[1])[candidate_model_summary_1$ave_max_distance[1]]) +50, y = .075, labels = paste('AMDR \n', max_distance-1, 'm'), col = 'black', cex = 1.5) ## Adding CPDI to plot if(levels(candidate_model_summary_1$cpdi_extent)[candidate_model_summary_1$cpdi_extent][3] != 0){ points(x = as.numeric(levels(candidate_model_summary_1$cpdi_extent)[candidate_model_summary_1$cpdi_extent])[3], y = (candidate_model_predictions_1$model_119$predicted_rates[as.numeric(levels(candidate_model_summary_1$cpdi_extent)[candidate_model_summary_1$cpdi_extent])[3]] / 60), pch = 8, cex = 2) text(x = levels(candidate_model_summary_1$cpdi_extent)[candidate_model_summary_1$cpdi_extent][3], y = .5, labels = paste('CPDI Extent \n ', levels(candidate_model_summary_1$cpdi_extent)[candidate_model_summary_1$cpdi_extent][3], 'm'), cex = 1.5) } ### Plotting Shallow Water Ranging Experiment Results exp_2_data_to_plot = detection_df_2[which(detection_df_2$height_of_receiver_off_bottom == 1 & detection_df_2$direction == "in" & detection_df_2$day_night == "day"), ] plot((candidate_model_predictions_2$model_123$predicted_rates/60) ~ c(0:1200), type = 'l', xlim = c(0, 1200), xlab = "Distance Between Tag and Receiver (m)", ylim = c(0, 1), ylab = "% of Transmissions Detected", cex = 2, cex.lab = 2, cex.axis = 2, main = "Shallow Water Ranging Experiment") points((exp_2_data_to_plot$detections/60) ~ exp_2_data_to_plot$distance, pch = 19, cex = 2) ## Adding AMDR to plot points(x = levels(candidate_model_summary_2$ave_max_distance[1])[candidate_model_summary_2$ave_max_distance[1]], y = .05, pch = 8, cex = 2) text(x = as.numeric(levels(candidate_model_summary_2$ave_max_distance[1])[candidate_model_summary_2$ave_max_distance[1]]) + 75, y = .075, labels = paste('AMDR \n', max_distance-1, 'm'), col = 'black', cex = 2) dev.off() #### Figure 6. Comparing detections and pings ### Converting data classes to numeric for ggplot metadata_df$pings = as.number(metadata_df$pings) metadata_df$detections = as.number(metadata_df$detections) ### Creating a dataframe just for plotting meta_for_plot = as.data.frame(rbind(cbind("rec_50m", 'pings / 8', mean(metadata_df$pings[metadata_df$receiver == "receiver_50m"]) / 8), cbind("rec_50m", 'detections', mean(metadata_df$detections[metadata_df$receiver == "receiver_50m"])), cbind("rec_212m", 'pings / 8', mean(metadata_df$pings[metadata_df$receiver == "receiver_212m"]) / 8), cbind("rec_212m", 'detections', mean(metadata_df$detections[metadata_df$receiver == "receiver_212m"])))) colnames(meta_for_plot) = c('receiver', 'Metric', 'Detected') meta_for_plot$Detected = as.number(meta_for_plot$Detected) ### Plotting Figure setwd(figure_dir) png('exp_3_pings_vs_detections.png', height = 1800, width = 1800) (ggplot(meta_for_plot, aes(factor(receiver), Detected, fill = Metric)) + geom_bar(stat = "identity", position = "dodge") + scale_fill_grey(start = 0, end = .7) + theme_bw()) dev.off() setwd(results_dir) ###### Script Cleanup ---- save.image('completed_script.R') # load(file.path(results_dir, 'completed_script.R')) run_time = proc.time() - script_timer send_push(user = 'uGEHvA4hr37tsrCCtpSv4sUUxVuTqN', message = paste("CPDI Analysis Completed in", round(run_time[3]), 'seconds!'))