clear all variables, load packages
rm(list=ls())
library(MASS)
library(data.table)
library(ggplot2)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
library(monitoR)
## Loading required package: tuneR
##
## Attaching package: 'monitoR'
## The following object is masked from 'package:tuneR':
##
## readMP3
library(DHARMa)
## This is DHARMa 0.3.1. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa') Note: Syntax of plotResiduals has changed in 0.3.0, see ?plotResiduals for details
library(MuMIn)
library(emmeans)
set working directory
setwd("/home/kdarras/DATA/Docs/Boulot/2012_Jambi/Microphone SNR")
import data from CSVs
types0=fread("Microphones - Types.csv")
mics0=fread("Microphones - Units.csv")
extinction0<-fread("Microphones - Extinction distances.csv")
tags0=fread("Bird and bat tags.csv")
derive tag duration and microphone IDs
tags0[,duration_s:=(max_time-min_time)]
tags0[,Microphone_ID:=as.numeric(substr(recording.name,1,2))]
check unique species (empty strings correspond to recordings with no tags)
tags0[,unique(binomial)]
## [1] "" "Chalcophaps indica" "Dicaeum trigonostigma"
## [4] "Geopelia striata" "Halcyon smyrnensis" "Orthotomus atrogularis"
## [7] "Orthotomus ruficeps" "Orthotomus sericeus" "Pycnonotus aurigaster"
## [10] "Pycnonotus goiavier" "A1" "B1"
## [13] "C1" "D1"
Calculate electrical noise floor of microphones to compare to recorder pre-amplifier noise contribution. Do not calculate for SMX-US1 because of built-in amplification. Pre-amp noise floor is -105 dBV at 192 kHz sampling rate according to Wildlife Acoustics specifications
types0[Code!="K",electrical_noise_floor_dBV:=(Sensitivity_specified_dBV-SNR_specified_1kHz_dB)]
merge microphones with types info
mics1=merge(mics0,types0,by="Code")
standardise self-noise and sound levels at closest distance by subtracting amplification of recorder
mics1[,c("selfnoise_1kHz_dB_noamp","selfnoise_40kHz_dB_noamp","soundlevel_1kHz_4m_level1_dB_noamp","soundlevel_40kHz_8m_dB_noamp"):=
.(selfnoise_1kHz_dB-Amplification_recorder_dB
,selfnoise_40kHz_dB-Amplification_recorder_dB
,soundlevel_1kHz_4m_level1_dB-Amplification_recorder_dB
,soundlevel_40kHz_8m_dB-Amplification_recorder_dB)]
further subtraction for FG Knowles microphone (Type F) that has built-in amplification of 48 dB
mics1[Code=="F",c("selfnoise_40kHz_dB_noamp","soundlevel_40kHz_8m_dB_noamp"):=
.(selfnoise_40kHz_dB_noamp-48,soundlevel_40kHz_8m_dB_noamp-48)]
get calibration value at 1 kHz for reference microphone (ID 9; amplification was 36dB for calibration) TODO: use calibration value for each set?
calibration_value_1kHz_dB=mics1[Microphone_ID==9,soundlevel_94dBSPL_1kHz]-36
The US calibrator’s “CAL” mode emits a 48 dB SPL (+/- 3dB) 40 kHz signal at a distance of 30 cm according to Wildlife acoustics documentation
calibration_value_40kHz_dB=mics1[Microphone_ID==15,soundlevel_48dBSPL_40kHz]-36
calculate sensitivities of all microphones relative to reference microphone
mics1[,relative_to_reference_1kHz_dB:=soundlevel_1kHz_4m_level1_dB_noamp-mics1[Microphone_ID==9,soundlevel_1kHz_4m_level1_dB_noamp]]
mics1[,relative_to_reference_40kHz_dB:=soundlevel_40kHz_8m_dB_noamp-mics1[Microphone_ID==15,soundlevel_40kHz_8m_dB_noamp]]
calculate SNR using calibration value, relative sensitivities, and self-noise
mics1[,SNR_1kHz_dB_cal:=calibration_value_1kHz_dB+relative_to_reference_1kHz_dB-selfnoise_1kHz_dB_noamp]
mics1[,SNR_40kHz_dB_cal:=calibration_value_40kHz_dB+relative_to_reference_40kHz_dB-selfnoise_40kHz_dB_noamp]
adjust SNR with loss of dB caused by acoustic vent (only used later in graphical comparison to manufacturer-provided values)
mics1[,SNR_1kHz_dB_cal_adjusted:=SNR_1kHz_dB_cal+Acoustic_vent_loss_dB]
We expect a linear relationship between specified and measured SNR and test different models.
Tested (likely) model: simple (constant) offsets in SNR values between manufacturers
lm.SNR=lm(SNR_1kHz_dB_cal_adjusted~SNR_specified_1kHz_dB+Manufacturer-1,mics1)
Complex model: offsets in SNR values between manufacturers and also differing slopes
lm.SNR.full=lm(SNR_1kHz_dB_cal_adjusted~SNR_specified_1kHz_dB*Manufacturer,mics1)
Simple model: no differences between manufacturers
lm.SNR.simple=lm(SNR_1kHz_dB_cal_adjusted~SNR_specified_1kHz_dB,mics1)
Null (unlikely) model: no linear relationship, only effect of manufacturer
lm.SNR.manu=lm(SNR_1kHz_dB_cal_adjusted~Manufacturer,mics1)
since models differ in number of predictors, we cannot use R-squared measures for comparison. So we determine best model with information criterion to counteract automatic positive effect of number of predictors on fit.
AICc(lm.SNR,lm.SNR.full,lm.SNR.simple,lm.SNR.manu)
## df AICc
## lm.SNR 8 157.9336
## lm.SNR.full 9 163.8536
## lm.SNR.simple 3 175.8383
## lm.SNR.manu 7 190.4122
We find that the tested model with constant offsets for each manufacturer is the most parsimonious. We can check model diagnostics even though we have a fairly robust understanding of the relationships so that it should be correct
plot(simulateResiduals(lm.SNR))
no striking problem.
We check its summary (one coult test for significant differences between manufacturers, but this is not the aim).
summary(lm.SNR)
##
## Call:
## lm(formula = SNR_1kHz_dB_cal_adjusted ~ SNR_specified_1kHz_dB +
## Manufacturer - 1, data = mics1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1835 -1.8225 -0.0864 1.3231 13.2987
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## SNR_specified_1kHz_dB 1.6296 0.1981 8.228 6.09e-07 ***
## ManufacturerInvensense -48.2839 14.4452 -3.343 0.004453 **
## ManufacturerKnowles -30.3554 12.4174 -2.445 0.027330 *
## ManufacturerPanasonic -32.1375 12.9321 -2.485 0.025237 *
## ManufacturerPrimo -45.3422 15.2072 -2.982 0.009315 **
## ManufacturerPUI Audio -56.5021 13.4720 -4.194 0.000782 ***
## ManufacturerVesper -38.8375 12.9321 -3.003 0.008915 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.735 on 15 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.9944, Adjusted R-squared: 0.9917
## F-statistic: 377.7 on 7 and 15 DF, p-value: 1.05e-15
The low P-value for the SNR_specified_1kHz_dB predictor tells us that when assuming the null hypothesis (that this coefficient is 0) is true, it is highly unlikely to get values at least as extreme as the one we estimated.
Now we extract model predictions for plotting them subsequently.
predict.SNR=predict(lm.SNR)
We append these predictions to the microphones table.
mics2=cbind(mics1[as.numeric(names(predict.SNR))],SNR_predicted=predict.SNR)
We graph specified vs. measured SNR values along with the best model’s fit, we use the calibrated SNR values (corrected for acoustic vent presence).
ggplot(mics2,aes(SNR_specified_1kHz_dB,SNR_1kHz_dB_cal_adjusted,color=Manufacturer,label=Code,shape=Format))+
geom_point()+
geom_text(data=mics1[label_placement==1],nudge_y=2,show.legend=F)+
geom_line(aes(SNR_specified_1kHz_dB,SNR_predicted),size=1)+
scale_color_brewer(type="qual",palette=6)+
xlim(c(20,80))+ylim(c(20,80))+
geom_abline(slope = 1,intercept = 0,lty=3)+
labs(x="Signal-to-noise ratio at 1 kHz\n(dB, manufacturer-given)",y="Signal-to-noise ratio at 1 kHz\n(dB, measured)")+
theme_cowplot()+
theme(legend.position = c(0.1,0.7))
ggsave("Figures/Fig S4.eps",width=5,height=5)
We explore graphically whether measured SNR at 1 kHz correlates with SNR ar 40 kHz (we exclude the Knowles FG microphone that does not record audible sound and thus has no SNR at 1 kHz).
ggplot(mics1[Code!="K"],aes(SNR_1kHz_dB_cal_adjusted,SNR_40kHz_dB_cal,color=Manufacturer,label=Code,shape=Format,group=1))+
labs(x="Signal-to-noise ratio at 1 kHz\n(dB, measured)",y="Signal-to-noise ratio at 40 kHz\n(dB, measured)")+
# stat_smooth(method="lm",se=T,color="black")+
geom_point()+
scale_color_brewer(type="qual",palette=6)+
geom_text(nudge_y = 2)+
# xlim(c(10,80))+ylim(c(10,80))+
# geom_abline(slope=1,intercept = 0,lty=3)+
annotate(geom="text",x=50,y=100,label=paste("Correlation:\n",
round(cor(mics1[!is.na(SNR_1kHz_dB_cal),SNR_1kHz_dB_cal],mics1[!is.na(SNR_1kHz_dB_cal),SNR_40kHz_dB_cal]),2)))+
theme_cowplot()+
theme(legend.position = c(0.1,0.7))
ggsave("Figures/Fig S4.eps",width=5,height=5)
We select microphones with extreme SNR values for generating Fig S5.
mics1[set==1,SNR_40kHz_dB_cal,.(Model,Microphone_ID)]
## Model Microphone_ID SNR_40kHz_dB_cal
## 1: POM-1345P-C3310-R 1 67.29
## 2: POM-2735P-R 4 65.42
## 3: ROM-2235P-HD-R 6 82.15
## 4: POM-2730L-HD-R 12 83.25
## 5: AOM-5024L-HD-R 14 73.75
## 6: ICS-40720 16 106.59
## 7: WM-61A 7 87.53
## 8: PMM-3738-VM1000-R 21 92.22
## 9: SPM0404UD5 9 103.42
## 10: SPU0410LR5H-QB 20 102.52
## 11: FG-23629-C36 17 96.75
## 12: EM258 23 93.33
mics1[set==1,SNR_1kHz_dB_cal,.(Model,Microphone_ID)]
## Model Microphone_ID SNR_1kHz_dB_cal
## 1: POM-1345P-C3310-R 1 20.94
## 2: POM-2735P-R 4 49.50
## 3: ROM-2235P-HD-R 6 55.39
## 4: POM-2730L-HD-R 12 60.73
## 5: AOM-5024L-HD-R 14 76.42
## 6: ICS-40720 16 63.93
## 7: WM-61A 7 70.42
## 8: PMM-3738-VM1000-R 21 62.88
## 9: SPM0404UD5 9 65.40
## 10: SPU0410LR5H-QB 20 71.16
## 11: FG-23629-C36 17 NA
## 12: EM258 23 73.19
prepare data with direction as angle
extinction.melt=melt(mics2,id=c("Code","Microphone_ID")
,measure=c("extinction_distance_front_level5_1kHz_m","extinction_distance_front_40kHz_m"
,"extinction_distance_left_level5_1kHz_m","extinction_distance_left_40kHz_m"
,"extinction_distance_back_level5_1kHz_m","extinction_distance_back_40kHz_m"
,"extinction_distance_right_level5_1kHz_m","extinction_distance_right_40kHz_m")
,value.name = "Distance (m)")
assign angles in degrees to directions
extinction.melt[grepl("right",variable),c("direction","angle"):=.("right",90)]
extinction.melt[grepl("left",variable),c("direction","angle"):=.("left",270)]
extinction.melt[grepl("front",variable),c("direction","angle"):=.("front",0)]
extinction.melt[grepl("back",variable),c("direction","angle"):=.("back",180)]
extract frequency and estimation method
extinction.melt[grepl("1kHz",variable),Frequency:="1 kHz"]
extinction.melt[grepl("40kHz",variable),Frequency:="40 kHz"]
extinction.melt[grepl("extrapolated",variable),estimation:=paste("extrapolated")]
extinction.melt[is.na(estimation),estimation:="audio-visual"]
take average of two microphone units of one model for better graph readability
polar.points0=extinction.melt[,.(`Distance (m)`=mean(`Distance (m)`)),.(variable,Code,estimation,Frequency,angle,direction)]
add more lines to close the polar graph
polar.addon=polar.points0[direction=="front"]
polar.addon[,angle:=360]
polar.points1=rbind(polar.points0,polar.addon)
creating 2 polar plots
pp1=ggplot(polar.points1[Frequency=="1 kHz" & Code!="K" & estimation=="audio-visual"],aes(angle,`Distance (m)`,color=Code))+
geom_point()+
geom_line()+
coord_polar()+
ylim(c(0,max(polar.points1[,`Distance (m)`])))+
scale_x_continuous(breaks=c(0,90,180,270))+
scale_color_discrete(guide=F)+
facet_wrap(~Frequency)+
theme_cowplot()+
background_grid(major = "xy")
second one with legend
pp2=ggplot(polar.points1[Frequency=="40 kHz"],aes(angle,`Distance (m)`,color=Code))+
geom_point()+
geom_line()+
coord_polar()+
ylim(c(0,max(polar.points1[,`Distance (m)`])))+
scale_x_continuous(breaks=c(0,90,180,270))+
facet_wrap(~Frequency)+
theme_cowplot()+
background_grid(major = "xy")
combining them and saving
plot_grid(pp1,pp2,rel_widths = c(1,1.17),ncol = 2)
ggsave("Figures/Fig S3.eps",width=8,height=5)
calculate detection area as sum of four quarter-ellipses
extinction.melt1=extinction.melt[,.(detection_area_m2=round(
0.25*pi*`Distance (m)`[direction=="left"]*`Distance (m)`[direction=="front"]+
0.25*pi*`Distance (m)`[direction=="front"]*`Distance (m)`[direction=="right"]+
0.25*pi*`Distance (m)`[direction=="right"]*`Distance (m)`[direction=="back"]+
0.25*pi*`Distance (m)`[direction=="back"]*`Distance (m)`[direction=="left"]))
,.(Frequency,Microphone_ID,Code,estimation)]
melt area and SNR data into one data table for analysis and plots
mics.melt0=melt(mics2,id=c("Microphone_ID","set","Code","Manufacturer")
,measure=c("SNR_1kHz_dB_cal","SNR_40kHz_dB_cal")
,value.name="SNR_dB")
construct labels column
mics.melt0[variable=="SNR_1kHz_dB_cal",Frequency:="1 kHz"]
mics.melt0[variable=="SNR_40kHz_dB_cal",Frequency:="40 kHz"]
mics.melt1=merge(extinction.melt1,mics.melt0,by=c("Microphone_ID","Frequency","Code"))
Construct and test different linear models: with native SNR, log-transformed SNR, and a curved line with polynomial relationship. We run separate models for each frequency and estimation method because of very different ranges and scales.
lm.area.1kHz=lm(detection_area_m2~SNR_dB,mics.melt1[Frequency=="1 kHz" & estimation=="audio-visual"])
lm.area.log.1kHz=lm(detection_area_m2~log(SNR_dB),mics.melt1[Frequency=="1 kHz" & estimation=="audio-visual"])
lm.area.poly.1kHz=lm(detection_area_m2~poly(SNR_dB,2),mics.melt1[Frequency=="1 kHz" & Code!="K" & estimation=="audio-visual"])
lm.area.40kHz=lm(detection_area_m2~SNR_dB,mics.melt1[Frequency=="40 kHz"])
lm.area.log.40kHz=lm(detection_area_m2~log(SNR_dB),mics.melt1[Frequency=="40 kHz"])
lm.area.poly.40kHz=lm(detection_area_m2~poly(SNR_dB,2),mics.melt1[Frequency=="40 kHz"])
Compare their AICcs but not R-squared values, since the polynomial model fits two coefficients.
AICc(lm.area.1kHz,lm.area.log.1kHz,lm.area.poly.1kHz
,lm.area.40kHz,lm.area.log.40kHz,lm.area.poly.40kHz)
## df AICc
## lm.area.1kHz 3 367.6964
## lm.area.log.1kHz 3 362.6818
## lm.area.poly.1kHz 4 364.4879
## lm.area.40kHz 3 348.1721
## lm.area.log.40kHz 3 350.5534
## lm.area.poly.40kHz 4 349.9045
Linear model with log-transformation is best best for human-estimated ranges of audible sound. Polynomial model is best for audible extrapolated extinction distances, linear model with native SNR is best for ultrasound. check residuals of best models
plot(simulateResiduals(lm.area.log.1kHz))
plot(simulateResiduals(lm.area.40kHz))
check results and R-squared values
summary(lm.area.log.1kHz)
##
## Call:
## lm(formula = detection_area_m2 ~ log(SNR_dB), data = mics.melt1[Frequency ==
## "1 kHz" & estimation == "audio-visual"])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1536.37 -490.68 23.11 475.75 2011.46
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -26042 2255 -11.55 2.67e-10 ***
## log(SNR_dB) 8365 552 15.15 1.99e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 816.3 on 20 degrees of freedom
## Multiple R-squared: 0.9199, Adjusted R-squared: 0.9159
## F-statistic: 229.7 on 1 and 20 DF, p-value: 1.993e-12
summary(lm.area.40kHz)
##
## Call:
## lm(formula = detection_area_m2 ~ SNR_dB, data = mics.melt1[Frequency ==
## "40 kHz"])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1047.9 -272.9 -34.5 335.9 1590.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4770.757 781.670 -6.103 5.78e-06 ***
## SNR_dB 78.267 9.154 8.550 4.11e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 587 on 20 degrees of freedom
## Multiple R-squared: 0.7852, Adjusted R-squared: 0.7745
## F-statistic: 73.11 on 1 and 20 DF, p-value: 4.109e-08
summary(lm.area.log.1kHz)$adj.r.squared
## [1] 0.9158823
summary(lm.area.40kHz)$adj.r.squared
## [1] 0.7744529
SNR has a strong, significant effect in all cases, but a poor fit with extinction distances
Construct new data table for model predictions, spanning all SNR values between extrema.
new.predict=data.table(SNR_dB=c(20:80,59:107)
,Frequency=c(rep("1 kHz",length(20:80))
,rep("40 kHz",length(59:107))))
predict areas with new values
predict.area.1kHz0=predict(lm.area.log.1kHz,newdata = new.predict[Frequency=="1 kHz"])
predict.area.40kHz0=predict(lm.area.40kHz,newdata=new.predict[Frequency=="40 kHz"])
generate auxiliary data for graph
predict.area.1kHz1=cbind(new.predict[Frequency=="1 kHz"],area_predicted=predict.area.1kHz0)
predict.area.1kHz1[,Microphone_ID:=NA]
predict.area.40kHz1=cbind(new.predict[Frequency=="40 kHz"],area_predicted=predict.area.40kHz0)
predict.area.40kHz1[,Microphone_ID:=NA]
plot detection spaces against SNR
ggplot(mics.melt1,aes(SNR_dB,detection_area_m2,group=1))+
geom_point(alpha=0.5)+
# geom_text(aes(label=Microphone_ID))+
geom_line(data=predict.area.1kHz1[area_predicted>0],aes(SNR_dB,area_predicted),size=1)+
geom_line(data=predict.area.40kHz1[area_predicted>0],aes(SNR_dB,area_predicted),size=1)+
labs(x= "Signal-to-noise ratio (dB, measured)",y=expression(Sound~detection~space~ground~area~(m^{2})))+
geom_hline(yintercept = 0,lty=2)+
facet_wrap(.~Frequency,scales = "free_x")+
theme(legend.position = c(0.8,0.8))+
theme_cowplot()
ggsave("Figures/Fig 2.pdf",width=8,height=5)
detection space increase from first usable mic to highest SNR one
mics.melt1[Frequency=="1 kHz" & Microphone_ID==14,.(detection_area_m2)]/mics.melt1[Frequency=="1 kHz" & Microphone_ID==4,.(detection_area_m2)]
## detection_area_m2
## 1: 1.723474
mics.melt1[Frequency=="1 kHz" & Microphone_ID==1,.(detection_area_m2)]
## detection_area_m2
## 1: 207
mics.melt1[Frequency=="40 kHz" & Microphone_ID==16,.(detection_area_m2)]/mics.melt1[Frequency=="40 kHz" & Microphone_ID==3,.(detection_area_m2)]
## detection_area_m2
## 1: 10.28895
merge animal detection tags with microphones info
tags1=merge(mics1,tags0,by="Microphone_ID",all.x=T)
find most common species for choosing calls to automatically detect in next section
tags1[,.(activity=sum(duration_s)),.(binomial,class)]
## binomial class activity
## 1: NA
## 2: A1 MAMMALIA 114.80
## 3: C1 MAMMALIA 1654.71
## 4: Dicaeum trigonostigma AVES 997.40
## 5: Geopelia striata AVES 790.40
## 6: Halcyon smyrnensis AVES 4587.70
## 7: Orthotomus ruficeps AVES 5491.00
## 8: Orthotomus sericeus AVES 127.70
## 9: Pycnonotus goiavier AVES 9285.60
## 10: Chalcophaps indica AVES 733.30
## 11: Pycnonotus aurigaster AVES 1372.90
## 12: B1 MAMMALIA 464.80
## 13: D1 MAMMALIA 289.10
## 14: Orthotomus atrogularis AVES 235.70
compute total activity in seconds and minutes per recording
tags.plot0=tags1[class!="",.(total_activity_s=round(sum(duration_s))
,total_activity_min=sum(duration_s)/60)
,.(Microphone_ID,recording.date,SNR_1kHz_dB_cal,SNR_40kHz_dB_cal,Code,class)]
manually add data for two microphones (ID 1 and 2) with no tags
tags.plot1=rbind(tags.plot0,cbind(mics1[Microphone_ID %in% c(1,2)
,.(Microphone_ID
,Code
,SNR_1kHz_dB_cal
,SNR_40kHz_dB_cal)]
,data.table(recording.date=c("2018-11-04","2018-11-05")
,class=c("AVES","AVES")
,total_activity_min=c(0,0)
,total_activity_s=c(0,0)))
)
assign SNR corresponding to frequency of vocalising animals
tags.plot1[class=="AVES",SNR_dB:=SNR_1kHz_dB_cal]
tags.plot1[class=="MAMMALIA",SNR_dB:=SNR_40kHz_dB_cal]
construct labels column
tags.plot1[class=="AVES",label:="Birds"]
tags.plot1[class=="MAMMALIA",label:="Bats"]
check distributions of bird and bat activities
Birds
hist(tags.plot1[label=="Birds",total_activity_s])
excluding outliers reveals an otherwise approximately normal distribution
hist(tags.plot1[label=="Birds" & Code!="A",total_activity_s])
Bats
hist(tags.plot1[label=="Bats",total_activity_s])
Bat activity data look right-skewed, typical for count variables. We also have much smaller values. Bird and bat activity response variables are on different scales (birds: minutes, bats: seconds) and distributed differently: we need separate models again. We follow the outcome of the previous detection area for specifying the models.
we need to account for random effect of the date but cannot use mixed models because there are too few levels for estimating their variance. Activity is not a count variable per se, because it is continuous. It is possible that it is distributed normally as it is only derived from counts (activities are essentially weighted counts), so we need to test different models typical for different response types. simplest case: linear model
plot(simulateResiduals(lm(total_activity_min~log(SNR_dB)+recording.date,tags.plot1[label=="Birds"])))
the linear model behaves very well, but let’s see if a poisson glm does better
plot(simulateResiduals(glm(round(total_activity_min)~log(SNR_dB)+recording.date,family="poisson",tags.plot1[label=="Birds"])))
quantile deviations very strong. Problem may arise from overdispersion, let’s try negative binomial
plot(simulateResiduals(glm.nb(round(total_activity_min)~log(SNR_dB)+recording.date,tags.plot1[label=="Birds"])))
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
This did not solve the problem either.
We choose the linear model for birds because the fit is good and deviation not significant.
activity.model.birds=lm(total_activity_min~log(SNR_dB)+recording.date,tags.plot1[label=="Birds"])
check significance of model parameters as well as fits with R2 values
summary(activity.model.birds)
##
## Call:
## lm(formula = total_activity_min ~ log(SNR_dB) + recording.date,
## data = tags.plot1[label == "Birds"])
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5175 -0.8761 -0.5082 1.3630 3.0537
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -58.3273 5.4531 -10.696 3.14e-09 ***
## log(SNR_dB) 19.3492 1.3342 14.503 2.26e-11 ***
## recording.date2018-11-05 -3.5119 0.8613 -4.077 0.000707 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.971 on 18 degrees of freedom
## Multiple R-squared: 0.9258, Adjusted R-squared: 0.9175
## F-statistic: 112.2 on 2 and 18 DF, p-value: 6.853e-11
r.squaredLR(activity.model.birds)
## [1] 0.9257573
## attr(,"adj.r.squared")
## [1] 0.9269677
SNR explains a lot of variation for bird activity
Generate new data tables for predicting values and plotting.
new.data.birds=data.table(tags.plot1[label=="Birds",.(SNR_dB=seq(min(SNR_dB),max(SNR_dB),0.5))]
,tags.plot1[label=="Birds",.(recording.date=unique(recording.date))])
## Warning in as.data.table.list(x, keep.rownames = keep.rownames, check.names
## = check.names, : Item 2 has 2 rows but longest item has 111; recycled with
## remainder.
predict values with chosen model
predict.activity.birds=data.table(total_activity_min_predicted=predict(activity.model.birds,newdata=new.data.birds),new.data.birds,label="Birds")
Bat activities, because of the right skew, behave more like a count variable. We use the interaction between day and SNR this time, because we observed strong differences between nights that might cause slightly different relationships.
generalised linear model for count variable (poisson)
plot(simulateResiduals(glm(total_activity_s~SNR_dB*recording.date,family="poisson",tags.plot1[label=="Bats"])))
Too many outliers, problem may arise from overdispersion, let’s try negative binomial
plot(simulateResiduals(glm.nb(total_activity_s~SNR_dB*recording.date,tags.plot1[label=="Bats"])))
fixes some problems. Gamma might be better because we have a continuous response variable.
plot(simulateResiduals(glm(total_activity_s~SNR_dB*recording.date,family=Gamma(link="log"),tags.plot1[label=="Bats"])))
We choose this as the best model fit so far.
activity.model.bats=glm(total_activity_s~SNR_dB*recording.date,family=Gamma(link="log"),tags.plot1[label=="Bats"])
check significance of model parameters as well as fits with R2 values
summary(activity.model.bats)
##
## Call:
## glm(formula = total_activity_s ~ SNR_dB * recording.date, family = Gamma(link = "log"),
## data = tags.plot1[label == "Bats"])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.97667 -0.32768 -0.01821 0.26049 0.68977
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.166242 0.881932 -6.992 8.75e-07 ***
## SNR_dB 0.108950 0.009926 10.976 6.46e-10 ***
## recording.date2018-11-04 6.079401 1.229888 4.943 7.84e-05 ***
## SNR_dB:recording.date2018-11-04 -0.049330 0.014317 -3.446 0.00256 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.2084408)
##
## Null deviance: 30.9526 on 23 degrees of freedom
## Residual deviance: 4.6062 on 20 degrees of freedom
## AIC: 231.77
##
## Number of Fisher Scoring iterations: 9
r.squaredLR(activity.model.bats)
## [1] 0.8749641
## attr(,"adj.r.squared")
## [1] 0.8749747
SNR explains a lot of variation for bat activity
Generate new data tables for predicting values and plotting.
new.data.bats=data.table(tags.plot1[label=="Bats",.(SNR_dB=seq(min(SNR_dB),max(SNR_dB),0.5))]
,tags.plot1[label=="Bats",.(recording.date=unique(recording.date))])
## Warning in as.data.table.list(x, keep.rownames = keep.rownames, check.names
## = check.names, : Item 2 has 2 rows but longest item has 95; recycled with
## remainder.
predict values with chosen model
predict.activity.bats=data.table(total_activity_min_predicted=exp(predict(activity.model.bats,newdata=new.data.bats))/60,new.data.bats,label="Bats")
plot activity data and model predictions
ggplot(tags.plot1,aes(SNR_dB,total_activity_min,color=recording.date,group=recording.date))+
geom_hline(yintercept=0,lty=2)+
geom_point(alpha=0.5)+
# geom_text(aes(label=Microphone_ID))+
geom_line(data=predict.activity.birds[total_activity_min_predicted>0],aes(SNR_dB,total_activity_min_predicted),size=1)+
geom_line(data=predict.activity.bats,aes(SNR_dB,total_activity_min_predicted),size=1)+
facet_wrap(.~label,scales="free")+
scale_color_discrete(name="Date")+
labs(x="Signal-to-noise ratio (dB, measured)",y="Total vocalisation activity (min)")+
theme_cowplot()+
theme(legend.position = c(0.05,0.8))
ggsave("Figures/Fig 3.pdf",width=8,height=5)
Bird activity increase from first usable mic to highest SNR one, for most active day
tags.plot1[label=="Birds" & Microphone_ID==14,.(total_activity_s)]/tags.plot1[label=="Birds" & Microphone_ID==4,.(total_activity_s)]
## total_activity_s
## 1: 1.62
likewise, we compute the bat activity increase
tags.plot1[label=="Bats" & Microphone_ID==19,.(total_activity_s)]/tags.plot1[label=="Bats" & Microphone_ID==3,.(total_activity_s)]
## total_activity_s
## 1: 9.69697
number of time steps over which to calculate species accumulation curves
n_timesteps=40
duration of recordings in seconds
total_duration_birds=900
total_duration_bats=900
add duration of first ultrasound recording to tag time of second recording to simulate one recording
tags1[class=="MAMMALIA" & grepl("_181500",recording.name),c("min_time_adjusted","max_time_adjusted"):=.(min_time+(30*60),max_time+(30*60))]
initiate empty data table and run loop for counting species at each time step for each microphone and taxon
richness.cumulative0=data.table()
for (c in unique(tags1[class!="",class])){
if (c=="AVES") {total_duration=total_duration_birds}
if (c=="MAMMALIA") {total_duration=total_duration_bats}
for (t in 1:n_timesteps){
end=total_duration*(t)/n_timesteps
#'count species
richness.cumulative.temp=tags1[class==c,.(richness=length(unique(binomial[min_time<end]))
,class=unique(class))
,.(Microphone_ID,SNR_1kHz_dB_cal,SNR_40kHz_dB_cal,recording.date)]
#'append results into the data table
richness.cumulative0=rbind(richness.cumulative0
,cbind(richness.cumulative.temp,time=end,time_bin=t))
}
}
append missing data from 2 microphones that did not record birds manually
richness.cumulative1=rbind(richness.cumulative0
,data.table(Microphone_ID=c(1,2)
,class=c("AVES","AVES")
,mics1[Microphone_ID %in% c(1,2),.(SNR_1kHz_dB_cal)]
,recording.date=c("2018-11-04","2018-11-05")
,richness=0
,time=rep(richness.cumulative0[,unique(time)],each=2))
,fill=T)
fill in time bins for missing data
richness.cumulative1[is.na(time_bin) & class=="MAMMALIA",time_bin:=(time*n_timesteps)/total_duration_bats]
richness.cumulative1[is.na(time_bin) & class=="AVES",time_bin:=(time*n_timesteps)/total_duration_birds]
put SNR into one column
richness.cumulative1[class=="AVES",SNR:=SNR_1kHz_dB_cal]
richness.cumulative1[class=="MAMMALIA",SNR:=SNR_40kHz_dB_cal]
create labels column
richness.cumulative1[class=="AVES",label:="Birds"]
richness.cumulative1[class=="MAMMALIA",label:="Bats"]
remove duplicates to shorten
richness.cumulative_notime=copy(richness.cumulative1)
richness.cumulative_notime[,c("time_bin","time"):=.(NULL,NULL)]
richness.cumulative2=richness.cumulative1[!duplicated(richness.cumulative_notime) | time_bin==1 | time_bin==n_timesteps]
generate two graphs separately (for maximal separation between color gradient extremes)
pcum1=ggplot(richness.cumulative2[class=="MAMMALIA"],aes(time+1,richness,group=Microphone_ID,color=SNR))+
geom_line(size=1.5,color="black")+
geom_line(size=1)+
scale_color_gradient(low="white",high="black",name="Signal-to-noise ratio \n@40 kHz (dB)")+
labs(x=NULL,y="Bat species richness")+
facet_wrap(.~recording.date,scales="free")+
theme_cowplot()+
theme(panel.grid.major.y=element_line(linetype=3,colour="darkgrey"))
pcum2=ggplot(richness.cumulative2[class=="AVES"],aes(time+1,richness,group=Microphone_ID,color=SNR))+
geom_line(size=1.5,color="black")+
geom_line(size=1)+
scale_color_gradient(low="white",high="black",name="Signal-to-noise ratio \n@1 kHz (dB)")+
labs(x="Sampling time (s)",y="Bird species richness")+
facet_wrap(.~recording.date,scales="free")+
theme_cowplot()+
theme(panel.grid.major.y=element_line(linetype=3,colour="darkgrey"))
plot_grid(pcum1,pcum2,ncol=1)
ggsave("Figures/Fig 4.eps",width=9,height=8)
list files in subdirectory “call detection”
files=data.table(filepath=list.files(path="call detection",pattern=".wav",full.names = T)
,filename=list.files(path="call detection",pattern=".wav"))
distingish templates from soundscapes
files[grepl("call",filename),template:=T]
files[is.na(template),template:=F]
extract microphone ID from file name
files[,Microphone_ID:=as.numeric(substr(filename,1,2))]
extract time of day info
files[grepl("180000",filename),daytime:="night"]
files[is.na(daytime),daytime:="day"]
files[,mic_daytime:=paste(Microphone_ID,daytime)]
files[template==F,date:=tstrsplit(filename,"_")[3]]
Only run lengthy automated detection if the file has not been generated yet by a previous script run.
if (!file.exists("Detected calls.csv")){
# create recipient data table for all detections
detections.all=data.table()
# run loop over all microphone-daytime combinations
# for (md in files[,unique(mic_daytime)]){
for (md in "24 day"){
# run loop over all microphone templates for each microphone
r=files[mic_daytime==md,unique(Microphone_ID)]
#extract date
date_temp=files[template==F & mic_daytime==md,date]
#inform loop progress
print(paste(md,date_temp))
# set the frequency range of the signal to detect
if (grepl("night",md)) {freq_temp=c(35,80)
name_temp="bat A";d="night";window_temp=256;f_range=c(30,90)
} else if (grepl("day",md)) {freq_temp=c(1,3.5)
name_temp="bulbul";d="day";window_temp=128;f_range=c(0,4)}
# extract call count from microphones data table
if (d=="night") {call_count=mics1[Microphone_ID==r,BatA_counted_calls]
} else if (d=="day") {call_count=mics1[Microphone_ID==r,Bulbul_counted_calls]}
# build first and second call template
temp1=makeCorTemplate(files[template==T & Microphone_ID==r & daytime==d,filepath][1],frq.lim=freq_temp,wl=window_temp,name=paste(name_temp,1))
temp2=makeCorTemplate(files[template==T & Microphone_ID==r & daytime==d,filepath][2],frq.lim=freq_temp,wl=window_temp,name=paste(name_temp,2))
# scan recording with template
soundscape.temp=files[template==F & mic_daytime==md,filepath]
matches.temp1=corMatch(soundscape.temp,temp1,time.source = "fileinfo")
matches.temp2=corMatch(soundscape.temp,temp2,time.source = "fileinfo")
# take only the peaks from the detection timeline with a minimal cutoff
templateCutoff(matches.temp1)=c(default=0.05)
templateCutoff(matches.temp2)=c(default=0.05)
peaks1=findPeaks(matches.temp1)
peaks2=findPeaks(matches.temp2)
detections0=data.table()
for (t in c(1,2)){
#take one of the two peak objects corresponding to the template
if (t==1) {peaks=peaks1} else if (t==2) {peaks=peaks2}
detections.temp0=data.table(peaks@detections[[1]])
#create id column
detections.temp0[,id:=1:.N]
# order detections by score to work our way down
detections.temp1=detections.temp0[order(-score)]
for (i in 1:nrow(detections.temp1)) {
# verify all peaks manually
print(paste("detection:",detections.temp1[i,id],"template:",t,". Enter y if detection is correct, n if not"))
verify_temp=readline(showPeaks(peaks,flim=f_range,id=detections.temp1[i,id]))
detections.temp1[i,true:=verify_temp]
# stop if ten consecutive detections are false positives
if (verify_temp=="n"){consecutive_false_positives=consecutive_false_positives+1
} else if (verify_temp=="y") {consecutive_false_positives=0}
if (nrow(detections.temp1[true=="y"])==call_count | consecutive_false_positives==20){detections.temp1[is.na(true),true:="n"];break}
}
#assign microphone, daytime, and template info
detections.temp1[,template:=t]
detections0=rbind(detections0,detections.temp1)
}
detections0[,mic_daytime:=md]
# append output (in case of crash)
fwrite(detections0,"Detected calls.csv",append=T)
# and save in object too
detections.all=rbind(detections.all,detections0)
}
} else {
# this imports the file consisting of detection results
detections.all=fread("Detected calls.csv")}
Compute the number of true and false positives at different score cutoffs.
auto.detect0=data.table()
for (s in seq(1,0.05,-0.01)){
for (md in detections.all[,unique(mic_daytime)]) {
#take average number of positives per template
true_positives=nrow(detections.all[score>=s & true=="y" & mic_daytime==md])/2
false_positives=nrow(detections.all[score>=s & true=="n" & mic_daytime==md])/2
# store in data table
auto.detect0=rbind(auto.detect0,data.table(true_positives,false_positives,score_cutoff=s,mic_daytime=md))
}
}
extract day time, taxon, and microphone ID
auto.detect0[,Soundscape_microphone_ID:=as.numeric(unlist(tstrsplit(mic_daytime," ")[1]))]
auto.detect0[,daytime:=tstrsplit(mic_daytime," ")[2]]
auto.detect0[daytime=="day",Label_calls:="Bird calls"]
auto.detect0[daytime=="night",Label_calls:="Bat calls"]
merge automated detections data with microphones data
auto.detect1=merge(auto.detect0,mics1[,.(Microphone_ID,BatA_counted_calls,Bulbul_counted_calls,SNR_40kHz_dB_cal,SNR_1kHz_dB_cal,set)]
,by.x="Soundscape_microphone_ID",by.y="Microphone_ID")
put manually counted calls total in one column
auto.detect1[daytime=="night",counted_calls:=BatA_counted_calls]
auto.detect1[daytime=="day",counted_calls:=Bulbul_counted_calls]
define set labels
auto.detect1[set==1,set_label:="Set 1"]
auto.detect1[set==2,set_label:="Set 2"]
compute recall and precision, as well as total detections
auto.detect1[,recall:=true_positives/counted_calls]
auto.detect1[,total_detections:=true_positives+false_positives]
auto.detect1[,precision:=true_positives/(true_positives+false_positives)]
assign SNR values depending on time of day
auto.detect1[daytime=="night",SNR_dB:=SNR_40kHz_dB_cal]
auto.detect1[daytime=="day",SNR_dB:=SNR_1kHz_dB_cal]
calculate the maximal score for an acceptable recall of 0.5, and the minimum score for an acceptable precision of 0.5
acceptable=auto.detect1[,.(recall_max_score=max(score_cutoff[recall>=0.5])
,precision_min_score=min(score_cutoff[precision>=0.5],na.rm=T))
,.(mic_daytime,Soundscape_microphone_ID,daytime,SNR_dB,set_label,Label_calls)]
## Warning in max(score_cutoff[recall >= 0.5]): no non-missing arguments to max;
## returning -Inf
## Warning in max(score_cutoff[recall >= 0.5]): no non-missing arguments to max;
## returning -Inf
## Warning in max(score_cutoff[recall >= 0.5]): no non-missing arguments to max;
## returning -Inf
## Warning in max(score_cutoff[recall >= 0.5]): no non-missing arguments to max;
## returning -Inf
## Warning in max(score_cutoff[recall >= 0.5]): no non-missing arguments to max;
## returning -Inf
some values have to be replaced with 0
acceptable[recall_max_score<0,recall_max_score:=0]
acceptable[recall_max_score>precision_min_score,range_acceptable:=recall_max_score-precision_min_score]
acceptable[recall_max_score<=precision_min_score,range_acceptable:=0]
melt data for graphing
acceptable.melt=melt(acceptable,id=c("mic_daytime","Soundscape_microphone_ID","daytime","SNR_dB","Label_calls","set_label"),measure=c("recall_max_score","precision_min_score"))
auto.detect.melt=melt(auto.detect1,id=c("SNR_dB","score_cutoff","set_label","daytime","Label_calls"),measure=c("total_detections","false_positives","counted_calls"),value.name="Calls/detections")
## Warning in melt.data.table(auto.detect1, id = c("SNR_dB", "score_cutoff", :
## 'measure.vars' [total_detections, false_positives, counted_calls] are not all
## of the same type. By order of hierarchy, the molten data value column will be of
## type 'double'. All measure variables not of type 'double' will be coerced too.
## Check DETAILS in ?melt.data.table for more on coercion.
define labels
auto.detect.melt[variable=="counted_calls",variable:="Total calls"]
auto.detect.melt[variable=="total_detections",variable:="True positives"]
auto.detect.melt[variable=="false_positives",variable:="False positives"]
auto.detect.melt[`Calls/detections`==0,`Calls/detections`:=NA]
auto.detect.melt.ordered=auto.detect.melt[order(variable)]
plot total calls, true positives, false positives, and levels of acceptable recall and precision
ggplot(auto.detect.melt.ordered,aes(SNR_dB,score_cutoff))+
# geom_ribbon(data=acceptable,aes(SNR_dB,ymax=recall_max_score,ymin=precision_min_score,y=1),color="green",alpha=0.3)+
geom_point(aes(size=`Calls/detections`,color=variable),shape=15)+
scale_size_continuous(range=c(0.5,10))+
# geom_line(data=acceptable.melt,aes(SNR_dB,value,lty=variable),color="green",size=0.8)+
geom_errorbar(data=acceptable[range_acceptable>0],aes(SNR_dB,ymax=recall_max_score,ymin=precision_min_score,y=1),color="green",size=1,width=1)+
scale_color_manual(values = c("black","blue","red"),name="")+
facet_grid(set_label~Label_calls,scales="free")+
theme_cowplot()+
background_grid(major="y")+
labs(y="Score cutoff",x="Signal-to-noise (dB)")
## Warning: Removed 4008 rows containing missing values (geom_point).
ggsave("Figures/Fig 5.eps",width=10,height=8)
## Warning: Removed 4008 rows containing missing values (geom_point).
test effect of SNR on range of acceptable scores
acceptable.model=lm(range_acceptable~SNR_dB*daytime+set_label,acceptable)
plot(simulateResiduals(acceptable.model))
summary(acceptable.model)
##
## Call:
## lm(formula = range_acceptable ~ SNR_dB * daytime + set_label,
## data = acceptable)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.094678 -0.046344 -0.001832 0.044198 0.167510
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.333954 0.121779 -2.742 0.009549 **
## SNR_dB 0.009306 0.001859 5.006 1.58e-05 ***
## daytimenight 0.044555 0.160917 0.277 0.783497
## set_labelSet 2 -0.071129 0.019591 -3.631 0.000895 ***
## SNR_dB:daytimenight -0.005017 0.002206 -2.274 0.029197 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06131 on 35 degrees of freedom
## Multiple R-squared: 0.789, Adjusted R-squared: 0.7649
## F-statistic: 32.73 on 4 and 35 DF, p-value: 2.207e-11
summary(acceptable.model)$adj.r.squared
## [1] 0.7649346
explore interaction of SNR and daytime
test(emtrends(acceptable.model,"daytime",var="SNR_dB"))
## daytime SNR_dB.trend SE df t.ratio p.value
## day 0.00931 0.00186 35 5.006 <.0001
## night 0.00429 0.00119 35 3.613 0.0009
##
## Results are averaged over the levels of: set_label