#' --- #' title: "Microphone Signal-to-noise Ratio Analysis" #' author: "Kevin FA Darras" #' date: "August 21, 2020" #' --- #' #' # Preparation #' clear all variables, load packages rm(list=ls()) library(MASS) library(data.table) library(ggplot2) library(cowplot) library(monitoR) library(DHARMa) library(MuMIn) library(emmeans) #'set working directory setwd("/home/kdarras/DATA/Docs/Boulot/2012_Jambi/Microphone SNR") #'import data from CSVs types0=fread("Microphones - Types.csv") mics0=fread("Microphones - Units.csv") extinction0<-fread("Microphones - Extinction distances.csv") tags0=fread("Bird and bat tags.csv") #'derive tag duration and microphone IDs tags0[,duration_s:=(max_time-min_time)] tags0[,Microphone_ID:=as.numeric(substr(recording.name,1,2))] #'check unique species (empty strings correspond to recordings with no tags) tags0[,unique(binomial)] #'Calculate electrical noise floor of microphones to compare to recorder pre-amplifier noise contribution. Do not calculate for SMX-US1 because of built-in amplification. Pre-amp noise floor is -105 dBV at 192 kHz sampling rate according to Wildlife Acoustics specifications types0[Code!="K",electrical_noise_floor_dBV:=(Sensitivity_specified_dBV-SNR_specified_1kHz_dB)] #'merge microphones with types info mics1=merge(mics0,types0,by="Code") #' # Microphone SNR #'standardise self-noise and sound levels at closest distance by subtracting amplification of recorder mics1[,c("selfnoise_1kHz_dB_noamp","selfnoise_40kHz_dB_noamp","soundlevel_1kHz_4m_level1_dB_noamp","soundlevel_40kHz_8m_dB_noamp"):= .(selfnoise_1kHz_dB-Amplification_recorder_dB ,selfnoise_40kHz_dB-Amplification_recorder_dB ,soundlevel_1kHz_4m_level1_dB-Amplification_recorder_dB ,soundlevel_40kHz_8m_dB-Amplification_recorder_dB)] #'further subtraction for FG Knowles microphone (Type F) that has built-in amplification of 48 dB mics1[Code=="F",c("selfnoise_40kHz_dB_noamp","soundlevel_40kHz_8m_dB_noamp"):= .(selfnoise_40kHz_dB_noamp-48,soundlevel_40kHz_8m_dB_noamp-48)] #'get calibration value at 1 kHz for reference microphones (amplification was 36dB for calibration) mics1[set==1,calibration_value_1kHz_dB:=mics1[Microphone_ID==9,soundlevel_94dBSPL_1kHz]-36] mics1[set==2,calibration_value_1kHz_dB:=mics1[Microphone_ID==10,soundlevel_94dBSPL_1kHz]-36] #' The US calibrator's "CAL" mode emits a 48 dB SPL (+/- 3dB) 40 kHz signal at a distance of 30 cm according to Wildlife acoustics documentation mics1[set==1,calibration_value_40kHz_dB:=mics1[Microphone_ID==16,soundlevel_48dBSPL_40kHz]-36] mics1[set==2,calibration_value_40kHz_dB:=mics1[Microphone_ID==15,soundlevel_48dBSPL_40kHz]-36] #'calculate sensitivities of all microphones relative to reference microphone mics1[set==1,relative_to_reference_1kHz_dB:=soundlevel_1kHz_4m_level1_dB_noamp-mics1[Microphone_ID==9,soundlevel_1kHz_4m_level1_dB_noamp]] mics1[set==2,relative_to_reference_1kHz_dB:=soundlevel_1kHz_4m_level1_dB_noamp-mics1[Microphone_ID==10,soundlevel_1kHz_4m_level1_dB_noamp]] mics1[set==1,relative_to_reference_40kHz_dB:=soundlevel_40kHz_8m_dB_noamp-mics1[Microphone_ID==16,soundlevel_40kHz_8m_dB_noamp]] mics1[set==2,relative_to_reference_40kHz_dB:=soundlevel_40kHz_8m_dB_noamp-mics1[Microphone_ID==15,soundlevel_40kHz_8m_dB_noamp]] #'calculate SNR using calibration value, relative sensitivities, and self-noise mics1[,SNR_1kHz_dB_cal:=calibration_value_1kHz_dB+relative_to_reference_1kHz_dB-selfnoise_1kHz_dB_noamp] mics1[,SNR_40kHz_dB_cal:=calibration_value_40kHz_dB+relative_to_reference_40kHz_dB-selfnoise_40kHz_dB_noamp] #'adjust SNR with loss of dB caused by acoustic vent (only used later in graphical comparison to manufacturer-provided values) mics1[,SNR_1kHz_dB_cal_adjusted:=SNR_1kHz_dB_cal+Acoustic_vent_loss_dB] #'We expect a linear relationship between specified and measured SNR and test different models. #' #'Null (unlikely) model: no linear relationship lm.SNR.null=lm(SNR_1kHz_dB_cal_adjusted~1,mics1) #'Simple model: no differences between manufacturers but effect of specified SNR lm.SNR.simple=lm(SNR_1kHz_dB_cal_adjusted~SNR_specified_1kHz_dB,mics1) #'Tested (likely) model: simple (constant) offsets in SNR values between manufacturers lm.SNR.manu=lm(SNR_1kHz_dB_cal_adjusted~SNR_specified_1kHz_dB+Manufacturer,mics1) #'Complex model: offsets in SNR values between manufacturers and also differing slopes, which could be due to differing measurement protocols within manufacturers lm.SNR.full=lm(SNR_1kHz_dB_cal_adjusted~SNR_specified_1kHz_dB*Manufacturer,mics1) #'since models differ in number of predictors, we cannot use R-squared measures for comparison. So we determine best model with information criterion to counteract automatic positive effect of number of predictors on fit. T2a=round(AICc(lm.SNR.null,lm.SNR.simple,lm.SNR.manu,lm.SNR.full),2) T2b=data.table(Model=row.names(T2a),T2a,Formula=as.character(c(formula(lm.SNR.null),formula(lm.SNR.simple),formula(lm.SNR.manu),formula(lm.SNR.full)))) T2b #'We find that the tested model with constant offsets for each manufacturer is the most parsimonious. We can check model diagnostics even though we have a fairly robust understanding of the relationships so that it should be correct plot(simulateResiduals(lm.SNR.manu)) #'one outlier but otherwise fine diagnostics #' #'We check its summary (one coult test for significant differences between manufacturers, but this is not the aim). summary(lm.SNR.manu) #'The low P-value for the SNR_specified_1kHz_dB predictor tells us that when assuming the null hypothesis (that this coefficient is 0) is true, it is highly unlikely to get values at least as extreme as the one we estimated. #' #'Now we extract model predictions for plotting them subsequently. predict.SNR=predict(lm.SNR.manu) #'We append these predictions to the microphones table. mics2=cbind(mics1[as.numeric(names(predict.SNR))],SNR_predicted=predict.SNR) #'We graph specified vs. measured SNR values along with the best model's fit, we use the calibrated SNR values (corrected for acoustic vent presence). ggplot(mics2,aes(SNR_specified_1kHz_dB,SNR_1kHz_dB_cal_adjusted,color=Manufacturer,label=Code,shape=Format))+ geom_point()+ geom_text(data=mics1[label_placement==1],nudge_y=2,show.legend=F)+ geom_line(aes(SNR_specified_1kHz_dB,SNR_predicted),size=1)+ scale_color_brewer(type="qual",palette=6)+ xlim(c(20,80))+ylim(c(20,80))+ geom_abline(slope = 1,intercept = 0,lty=3)+ labs(x="Signal-to-noise ratio at 1 kHz\n(dB, manufacturer-given)",y="Signal-to-noise ratio at 1 kHz\n(dB, measured)")+ theme_cowplot()+ theme(legend.position = c(0.1,0.7)) ggsave("Figures/Fig 1.eps",width=5,height=5) #'We explore graphically whether measured SNR at 1 kHz correlates with SNR ar 40 kHz (we exclude the Knowles FG microphone that does not record audible sound and thus has no SNR at 1 kHz). ggplot(mics1[Code!="K"],aes(SNR_1kHz_dB_cal_adjusted,SNR_40kHz_dB_cal,color=Manufacturer,label=Code,shape=Format,group=1))+ labs(x="Signal-to-noise ratio at 1 kHz\n(dB, measured)",y="Signal-to-noise ratio at 40 kHz\n(dB, measured)")+ # stat_smooth(method="lm",se=T,color="black")+ geom_point()+ scale_color_brewer(type="qual",palette=6)+ geom_text(nudge_y = 2)+ # xlim(c(10,80))+ylim(c(10,80))+ # geom_abline(slope=1,intercept = 0,lty=3)+ annotate(geom="text",x=50,y=100,label=paste("Correlation:\n", round(cor(mics1[!is.na(SNR_1kHz_dB_cal),SNR_1kHz_dB_cal],mics1[!is.na(SNR_1kHz_dB_cal),SNR_40kHz_dB_cal]),2)))+ theme_cowplot()+ theme(legend.position = c(0.1,0.7)) ggsave("Figures/Fig S4.eps",width=5,height=5) #'We select microphones with extreme SNR values for generating Fig S5. mics1[set==1,SNR_40kHz_dB_cal,.(Model,Microphone_ID)] mics1[set==1,SNR_1kHz_dB_cal,.(Model,Microphone_ID)] #' # Detection areas #' ## Draw detection spaces #' #'prepare data with direction as angle extinction.melt=melt(mics2,id=c("Code","Microphone_ID") ,measure=c("extinction_distance_front_level5_1kHz_m","extinction_distance_front_40kHz_m" ,"extinction_distance_left_level5_1kHz_m","extinction_distance_left_40kHz_m" ,"extinction_distance_back_level5_1kHz_m","extinction_distance_back_40kHz_m" ,"extinction_distance_right_level5_1kHz_m","extinction_distance_right_40kHz_m") ,value.name = "Extinction distance (m)") #'assign angles in degrees to directions extinction.melt[grepl("right",variable),c("direction","angle"):=.("right",90)] extinction.melt[grepl("left",variable),c("direction","angle"):=.("left",270)] extinction.melt[grepl("front",variable),c("direction","angle"):=.("front",0)] extinction.melt[grepl("back",variable),c("direction","angle"):=.("back",180)] #'extract frequency and estimation method extinction.melt[grepl("1kHz",variable),Frequency:="1 kHz"] extinction.melt[grepl("40kHz",variable),Frequency:="40 kHz"] extinction.melt[grepl("extrapolated",variable),estimation:=paste("extrapolated")] extinction.melt[is.na(estimation),estimation:="audio-visual"] #'take average of two microphone units of one model for better graph readability polar.points0=extinction.melt[,.(`Extinction distance (m)`=mean(`Extinction distance (m)`)),.(variable,Code,estimation,Frequency,angle,direction)] #'add more lines to close the polar graph polar.addon=polar.points0[direction=="front"] polar.addon[,angle:=360] polar.points1=rbind(polar.points0,polar.addon) #'creating 2 polar plots pp1=ggplot(polar.points1[Frequency=="1 kHz" & Code!="K" & estimation=="audio-visual"],aes(angle,`Extinction distance (m)`,color=Code))+ geom_point()+ geom_line()+ coord_polar()+ ylim(c(0,max(polar.points1[,`Extinction distance (m)`])))+ scale_x_continuous(breaks=c(0,90,180,270))+ scale_color_discrete(guide=F)+ facet_wrap(~Frequency)+ theme_cowplot()+ background_grid(major = "xy") #'second one with legend pp2=ggplot(polar.points1[Frequency=="40 kHz"],aes(angle,`Extinction distance (m)`,color=Code))+ geom_point()+ geom_line()+ coord_polar()+ ylim(c(0,max(polar.points1[,`Extinction distance (m)`])))+ scale_x_continuous(breaks=c(0,90,180,270))+ facet_wrap(~Frequency)+ theme_cowplot()+ background_grid(major = "xy") #'combining them and saving plot_grid(pp1,pp2,rel_widths = c(1,1.17),ncol = 2) ggsave("Figures/Fig S3.eps",width=8,height=5) #'calculate detection area as sum of four quarter-ellipses extinction.melt1=extinction.melt[,.(detection_area_m2=round( 0.25*pi*`Extinction distance (m)`[direction=="left"]*`Extinction distance (m)`[direction=="front"]+ 0.25*pi*`Extinction distance (m)`[direction=="front"]*`Extinction distance (m)`[direction=="right"]+ 0.25*pi*`Extinction distance (m)`[direction=="right"]*`Extinction distance (m)`[direction=="back"]+ 0.25*pi*`Extinction distance (m)`[direction=="back"]*`Extinction distance (m)`[direction=="left"])) ,.(Frequency,Microphone_ID,Code,estimation)] #'melt area and SNR data into one data table for analysis and plots mics.melt0=melt(mics2,id=c("Microphone_ID","set","Code","Manufacturer") ,measure=c("SNR_1kHz_dB_cal","SNR_40kHz_dB_cal") ,value.name="SNR_dB") #'construct labels column mics.melt0[variable=="SNR_1kHz_dB_cal",Frequency:="1 kHz"] mics.melt0[variable=="SNR_40kHz_dB_cal",Frequency:="40 kHz"] mics.melt1=merge(extinction.melt1,mics.melt0,by=c("Microphone_ID","Frequency","Code")) #' ## Analyse relationship between SNR and detection area #' #'Construct and test different linear models: with native SNR, log-transformed SNR, and a curved line with polynomial relationship. We run separate models for each frequency and estimation method because of very different ranges and scales. lm.area.1kHz=lm(detection_area_m2~SNR_dB,mics.melt1[Frequency=="1 kHz" & estimation=="audio-visual"]) lm.area.log.1kHz=lm(detection_area_m2~log(SNR_dB),mics.melt1[Frequency=="1 kHz" & estimation=="audio-visual"]) lm.area.poly.1kHz=lm(detection_area_m2~poly(SNR_dB,2),mics.melt1[Frequency=="1 kHz" & Code!="K" & estimation=="audio-visual"]) lm.area.40kHz=lm(detection_area_m2~SNR_dB,mics.melt1[Frequency=="40 kHz"]) lm.area.log.40kHz=lm(detection_area_m2~log(SNR_dB),mics.melt1[Frequency=="40 kHz"]) lm.area.poly.40kHz=lm(detection_area_m2~poly(SNR_dB,2),mics.melt1[Frequency=="40 kHz"]) #'Compare their AICcs but not R-squared values, since the polynomial model fits two coefficients. T3a=round(AICc(lm.area.1kHz,lm.area.log.1kHz,lm.area.poly.1kHz ,lm.area.40kHz,lm.area.log.40kHz,lm.area.poly.40kHz),2) T3b=data.table(Model=row.names(T3a),T3a,Formula=as.character(c(formula(lm.area.1kHz),formula(lm.area.log.1kHz),formula(lm.area.poly.1kHz),formula(lm.area.40kHz),formula(lm.area.log.40kHz),formula(lm.area.poly.40kHz)))) T3b #'Linear model with log-transformation is best for human-estimated ranges of audible sound. Polynomial model is best for audible extrapolated extinction distances, linear model with native SNR is best for ultrasound. #'check residuals of best models plot(simulateResiduals(lm.area.log.1kHz)) plot(simulateResiduals(lm.area.40kHz)) #'check results and R-squared values summary(lm.area.log.1kHz) summary(lm.area.40kHz) summary(lm.area.log.1kHz)$adj.r.squared summary(lm.area.40kHz)$adj.r.squared #' SNR has a strong, significant effect in all cases, but a poorer fit with ultrasonic extinction distances #' #'Construct new data table for model predictions, spanning all SNR values between extrema. audible_range=min(floor(mics1[,SNR_1kHz_dB_cal]),na.rm=T):max(ceiling(mics1[,SNR_1kHz_dB_cal]),na.rm=T) ultrasonic_range=min(floor(mics1[,SNR_40kHz_dB_cal]),na.rm=T):max(ceiling(mics1[,SNR_40kHz_dB_cal]),na.rm=T) new.predict=data.table(SNR_dB=c(audible_range,ultrasonic_range) ,Frequency=c(rep("1 kHz",length(audible_range)) ,rep("40 kHz",length(ultrasonic_range)))) #' predict areas with new values predict.area.1kHz0=predict(lm.area.log.1kHz,newdata = new.predict[Frequency=="1 kHz"]) predict.area.40kHz0=predict(lm.area.40kHz,newdata=new.predict[Frequency=="40 kHz"]) #' generate auxiliary data for graph predict.area.1kHz1=cbind(new.predict[Frequency=="1 kHz"],area_predicted=predict.area.1kHz0) predict.area.1kHz1[,Microphone_ID:=NA] predict.area.40kHz1=cbind(new.predict[Frequency=="40 kHz"],area_predicted=predict.area.40kHz0) predict.area.40kHz1[,Microphone_ID:=NA] #'plot detection spaces against SNR ggplot(mics.melt1,aes(SNR_dB,detection_area_m2,group=1))+ geom_point(alpha=0.5)+ # geom_text(aes(label=Microphone_ID))+ geom_line(data=predict.area.1kHz1[area_predicted>0],aes(SNR_dB,area_predicted),size=1)+ geom_line(data=predict.area.40kHz1[area_predicted>0],aes(SNR_dB,area_predicted),size=1)+ labs(x= "Signal-to-noise ratio (dB, measured)",y=expression(Sound~detection~space~ground~area~(m^{2})))+ geom_hline(yintercept = 0,lty=2)+ facet_wrap(.~Frequency,scales = "free_x")+ theme(legend.position = c(0.8,0.8))+ theme_cowplot() ggsave("Figures/Fig 2.pdf",width=8,height=5) #'detection space increase from first usable mic to highest SNR one mics.melt1[Frequency=="1 kHz" & Microphone_ID==14,.(detection_area_m2)]/mics.melt1[Frequency=="1 kHz" & Microphone_ID==4,.(detection_area_m2)] #'audible detection space of microphone thatdid not detectpick up any birds mics.melt1[Frequency=="1 kHz" & Microphone_ID==1,.(detection_area_m2)] mics.melt1[Frequency=="40 kHz" & Microphone_ID==16,.(detection_area_m2)]/mics.melt1[Frequency=="40 kHz" & Microphone_ID==3,.(detection_area_m2)] #' # Bird and bat activity #'merge animal detection tags with microphones info tags1=merge(mics1,tags0,by="Microphone_ID",all.x=T) #'find most common species for choosing calls to automatically detect in next section tags1[,.(activity=sum(duration_s)),.(binomial,class)] #'compute total activity in seconds and minutes per recording tags.plot0=tags1[class!="",.(total_activity_s=round(sum(duration_s)) ,total_activity_min=sum(duration_s)/60) ,.(Microphone_ID,recording.date,SNR_1kHz_dB_cal,SNR_40kHz_dB_cal,Code,class)] #'manually add data for two microphones (ID 1 and 2) with no tags tags.plot1=rbind(tags.plot0,cbind(mics1[Microphone_ID %in% c(1,2) ,.(Microphone_ID ,Code ,SNR_1kHz_dB_cal ,SNR_40kHz_dB_cal)] ,data.table(recording.date=c("2018-11-04","2018-11-05") ,class=c("AVES","AVES") ,total_activity_min=c(0,0) ,total_activity_s=c(0,0))) ) #'assign SNR corresponding to frequency of vocalising animals tags.plot1[class=="AVES",SNR_dB:=SNR_1kHz_dB_cal] tags.plot1[class=="MAMMALIA",SNR_dB:=SNR_40kHz_dB_cal] #'construct labels column tags.plot1[class=="AVES",label:="Birds"] tags.plot1[class=="MAMMALIA",label:="Bats"] #'check distributions of bird and bat activities #' #'Birds hist(tags.plot1[label=="Birds",total_activity_s]) #'excluding outliers reveals an otherwise approximately normal distribution hist(tags.plot1[label=="Birds" & Code!="A",total_activity_s]) #'Bats hist(tags.plot1[label=="Bats",total_activity_s]) #'Bat activity data look right-skewed, typical for count variables. We also have much smaller values. Bird and bat activity response variables are on different scales (birds: minutes, bats: seconds) and distributed differently: we need separate models again. We follow the outcome of the previous detection area for specifying the models. #' #' ## Birds #'we need to account for random effect of the date but cannot use mixed models because there are too few levels for estimating their variance. Activity is not a count variable per se, because it is continuous. It is possible that it is distributed normally as it is only derived from counts (activities are essentially weighted counts), so we need to test different models typical for different response types. #' #'simplest case: linear model activity.model.birds.lm=lm(total_activity_min~log(SNR_dB)+recording.date,tags.plot1[label=="Birds"]) #'poisson glm (rounding response to get integers results in little loss of accuracy) activity.model.birds.glm=glm(round(total_activity_min)~log(SNR_dB)+recording.date,family="poisson",tags.plot1[label=="Birds"]) #'to tackle possible overdispersion, let's try negative binomial activity.model.birds.glm.nb=glm.nb(round(total_activity_min)~log(SNR_dB)+recording.date,tags.plot1[label=="Birds"]) #'to acount for continuous variable, let's try gamma but add a very small value to avoid forbidden zeroes activity.model.birds.gamma=glm(total_activity_min+0.01~log(SNR_dB)+recording.date,family=Gamma(link="log"),tags.plot1[label=="Birds"]) #'Check AICcs T4a=round(AICc(activity.model.birds.lm,activity.model.birds.glm,activity.model.birds.glm.nb,activity.model.birds.gamma),2) T4b=data.table(Model=row.names(T4a),T4a,Formula=as.character(c(formula(activity.model.birds.lm) ,formula(activity.model.birds.glm) ,formula(activity.model.birds.glm.nb) ,formula(activity.model.birds.gamma)))) T4b #' check simulated residuals of linear model as it has lowest AICc plot(simulateResiduals(activity.model.birds.lm)) #' #'We choose the linear model for birds because the fit is good and deviation not significant. #' #'check significance of model parameters as well as fits with R2 values summary(activity.model.birds.lm) r.squaredLR(activity.model.birds.lm) #'SNR explains a lot of variation for bird activity #' #'Generate new data tables for predicting values and plotting. new.data.birds=data.table(tags.plot1[label=="Birds",.(SNR_dB=seq(min(SNR_dB),max(SNR_dB),0.5))] ,tags.plot1[label=="Birds",.(recording.date=unique(recording.date))]) #'predict values with chosen model predict.activity.birds=data.table(total_activity_min_predicted=predict(activity.model.birds.lm,newdata=new.data.birds),new.data.birds,label="Birds") #' ## Bats #'Bat activities, because of the right skew, behave more like a count variable. We use the interaction between day and SNR this time, because we observed strong differences between nights that might cause slightly different relationships. #'we do not try a linear model for bats because the response is clearly not normal #'generalised linear model for count variable (poisson) activity.model.bats.glm=glm(total_activity_s~SNR_dB*recording.date,family="poisson",tags.plot1[label=="Bats"]) activity.model.bats.glm.nb=glm.nb(total_activity_s~SNR_dB*recording.date,tags.plot1[label=="Bats"]) activity.model.bats.gamma=glm(total_activity_s~SNR_dB*recording.date,family=Gamma(link="log"),tags.plot1[label=="Bats"]) #' We choose the the best model with the lowest AICc T4c=round(AICc(activity.model.bats.glm,activity.model.bats.glm.nb,activity.model.bats.gamma),2) T4d=data.table(Model=row.names(T4c),T4c,Formula=as.character(c(formula(activity.model.bats.glm) ,formula(activity.model.bats.glm.nb) ,formula(activity.model.bats.gamma)))) T4d #'check simulated residuals, some problems with negative binomial but AICc is much lower that other models plot(simulateResiduals(activity.model.bats.glm.nb)) #'check significance of model parameters as well as fits with R2 values summary(activity.model.bats.glm.nb) r.squaredLR(activity.model.bats.glm.nb) #'SNR explains a lot of variation for bat activity #' #'Generate new data tables for predicting values and plotting. new.data.bats=data.table(tags.plot1[label=="Bats",.(SNR_dB=seq(min(SNR_dB),max(SNR_dB),0.5))] ,tags.plot1[label=="Bats",.(recording.date=unique(recording.date))]) #'predict values with chosen model predict.activity.bats=data.table(total_activity_min_predicted=exp(predict(activity.model.bats.glm.nb,newdata=new.data.bats))/60,new.data.bats,label="Bats") #'annotation text for figure annotation_text=data.table(SNR_dB=c(60,22) ,label=c("Bats","Birds"),lab=c("A","B") ,recording.date=NA,total_activity_min=c(8.1,25)) #'plot activity data and model predictions ggplot(tags.plot1,aes(SNR_dB,total_activity_min,color=recording.date,group=recording.date))+ geom_hline(yintercept=0,lty=2)+ geom_point(alpha=0.5)+ # geom_text(aes(label=Microphone_ID))+ geom_line(data=predict.activity.birds[total_activity_min_predicted>0],aes(SNR_dB,total_activity_min_predicted),size=1)+ geom_line(data=predict.activity.bats,aes(SNR_dB,total_activity_min_predicted),size=1)+ facet_wrap(.~label,scales="free")+ scale_color_discrete(name="Date")+ geom_text(data=annotation_text,aes(label=lab),color="black",size=8)+ labs(x="Signal-to-noise ratio (dB, measured)",y="Total vocalisation activity (min)")+ theme_cowplot()+ theme(legend.position = c(0.05,0.7)) ggsave("Figures/Fig 3.pdf",width=8,height=5) #'Bird activity increase from first usable mic to highest SNR one, for most active day tags.plot1[label=="Birds" & Microphone_ID==14,.(total_activity_s)]/tags.plot1[label=="Birds" & Microphone_ID==4,.(total_activity_s)] #'likewise, we compute the bat activity increase tags.plot1[label=="Bats" & Microphone_ID==19,.(total_activity_s)]/tags.plot1[label=="Bats" & Microphone_ID==3,.(total_activity_s)] #' # Bird and bat species accumulation curves #'number of time steps over which to calculate species accumulation curves n_timesteps=40 #'duration of recordings in seconds total_duration_birds=900 total_duration_bats=900 #'add duration of first ultrasound recording to tag time of second recording to simulate one recording tags1[class=="MAMMALIA" & grepl("_181500",recording.name),c("min_time_adjusted","max_time_adjusted"):=.(min_time+(30*60),max_time+(30*60))] #'initiate empty data table and run loop for counting species at each time step for each microphone and taxon richness.cumulative0=data.table() for (c in unique(tags1[class!="",class])){ if (c=="AVES") {total_duration=total_duration_birds} if (c=="MAMMALIA") {total_duration=total_duration_bats} for (t in 1:n_timesteps){ end=total_duration*(t)/n_timesteps #'count species richness.cumulative.temp=tags1[class==c,.(richness=length(unique(binomial[min_time=s & true=="y" & mic_daytime==md])/2 false_positives=nrow(detections.all[score>=s & true=="n" & mic_daytime==md])/2 # store in data table auto.detect0=rbind(auto.detect0,data.table(true_positives,false_positives,score_cutoff=s,mic_daytime=md)) } } #'extract day time, taxon, and microphone ID auto.detect0[,Soundscape_microphone_ID:=as.numeric(unlist(tstrsplit(mic_daytime," ")[1]))] auto.detect0[,daytime:=tstrsplit(mic_daytime," ")[2]] auto.detect0[daytime=="day",Label_calls:="Bird calls"] auto.detect0[daytime=="night",Label_calls:="Bat calls"] #'merge automated detections data with microphones data auto.detect1=merge(auto.detect0,mics1[,.(Microphone_ID,BatA_counted_calls,Bulbul_counted_calls,SNR_40kHz_dB_cal,SNR_1kHz_dB_cal,set)] ,by.x="Soundscape_microphone_ID",by.y="Microphone_ID") #'put manually counted calls total in one column auto.detect1[daytime=="night",counted_calls:=BatA_counted_calls] auto.detect1[daytime=="day",counted_calls:=Bulbul_counted_calls] #' define set labels auto.detect1[set==1,set_label:="Set 1"] auto.detect1[set==2,set_label:="Set 2"] #'compute recall and precision, as well as total detections auto.detect1[,recall:=true_positives/counted_calls] auto.detect1[,total_detections:=true_positives+false_positives] auto.detect1[,precision:=true_positives/(true_positives+false_positives)] #'assign SNR values depending on time of day auto.detect1[daytime=="night",SNR_dB:=SNR_40kHz_dB_cal] auto.detect1[daytime=="day",SNR_dB:=SNR_1kHz_dB_cal] #'calculate the maximal score for an acceptable recall of 0.5, and the minimum score for an acceptable precision of 0.5 acceptable=auto.detect1[,.(recall_max_score=max(score_cutoff[recall>=0.5]) ,precision_min_score=min(score_cutoff[precision>=0.5],na.rm=T)) ,.(mic_daytime,Soundscape_microphone_ID,daytime,SNR_dB,set_label,Label_calls)] #'some values have to be replaced with 0 acceptable[recall_max_score<0,recall_max_score:=0] acceptable[recall_max_score>precision_min_score,range_acceptable:=recall_max_score-precision_min_score] acceptable[recall_max_score<=precision_min_score,range_acceptable:=0] #'melt data for graphing acceptable.melt=melt(acceptable,id=c("mic_daytime","Soundscape_microphone_ID","daytime","SNR_dB","Label_calls","set_label"),measure=c("recall_max_score","precision_min_score")) auto.detect.melt=melt(auto.detect1,id=c("SNR_dB","score_cutoff","set_label","daytime","Label_calls"),measure=c("total_detections","false_positives","counted_calls"),value.name="Calls/detections") #'define labels auto.detect.melt[variable=="counted_calls",variable:="Total calls"] auto.detect.melt[variable=="total_detections",variable:="True positives"] auto.detect.melt[variable=="false_positives",variable:="False positives"] auto.detect.melt[`Calls/detections`==0,`Calls/detections`:=NA] auto.detect.melt.ordered=auto.detect.melt[order(variable)] #'annotation text for figure annotation_text2=data.table(SNR_dB=c(60,22) ,label=c("Bats","Birds"),lab=c("A","B") ,recording.date=NA,total_activity_min=c(8.1,25)) #'plot total calls, true positives, false positives, and levels of acceptable recall and precision ggplot(auto.detect.melt.ordered,aes(SNR_dB,score_cutoff))+ # geom_ribbon(data=acceptable,aes(SNR_dB,ymax=recall_max_score,ymin=precision_min_score,y=1),color="green",alpha=0.3)+ geom_point(aes(size=`Calls/detections`,color=variable),shape=15)+ scale_size_continuous(range=c(0.5,10))+ # geom_line(data=acceptable.melt,aes(SNR_dB,value,lty=variable),color="green",size=0.8)+ geom_errorbar(data=acceptable[range_acceptable>0],aes(SNR_dB,ymax=recall_max_score,ymin=precision_min_score,y=1),color="green",size=0.6,width=1)+ scale_color_manual(values = c("black","#4d4dff","red"),name="")+ facet_grid(set_label~Label_calls,scales="free")+ theme_cowplot()+ background_grid(major="y")+ labs(y="Recognizer score cutoff",x="Signal-to-noise ratio (dB)") ggsave("Figures/Fig 5.eps",width=10,height=8) #'test effect of SNR on range of acceptable scores acceptable.model=lm(range_acceptable~SNR_dB*daytime+set_label,acceptable) plot(simulateResiduals(acceptable.model)) summary(acceptable.model) summary(acceptable.model)$adj.r.squared #'explore interaction of SNR and daytime test(emtrends(acceptable.model,"daytime",var="SNR_dB"))