############ Length prelim analyses ############ library(ggplot2) library(glmmTMB) library(lme4) library(lmerTest) library(DHARMa) library(MuMIn) library(optimx) library(mefa) library(nlme) library(ape) library(AICcmodavg) library(dplyr) library(lubridate) library(rgdal) library(mapproj) #is descriptive ID the right one? ### read in data ############## setwd("C:\\Users\\Madeleine\\Dropbox (CCIRA)\\Ecological Research - CCIRA\\Analyses\\Groundfish\\RCA-Pairing analyses\\Data") spInfo = read.csv("species table simplified 20191202.csv") diveDat = read.csv("Dive Fish Lengths 16.10.2019.csv") # average complexity diveDat$Complexity = apply(diveDat[,c("Complex1","Complex2","Complex3")],1,function(x)mean(x,na.rm=T)) # edit species names diveDat$Species=as.character(diveDat$Species) diveDat$Species[which(diveDat$Species=="Yellowtail")]="yellowtail" diveDat$Species[which(diveDat$Species=="Tiger")]="tiger" diveDat$Species[which(diveDat$Species=="boccacio")]="bocaccio" diveDat$Species[which(diveDat$Species=="silvergrey")]="silvergray" diveDat$Species=as.factor(diveDat$Species) #remove some species diveDat=diveDat[-which(diveDat$Species%in%c("deacon","redstripe","lingcod","dusky")),] # I do not combine by life history. I stick to single species analyses # binary RCA variable diveDat$InRCA = "in";diveDat$InRCA[which(diveDat$RCA_DistKM!=0)]="out" # diveDat$InRCA = factor(diveDat$InRCA, levels = c("out","in")) # only those within 5 km of an RCA, but 10 km for goose. remember to remove goose later diveDat = subset(diveDat,diveDat$RCA_DistKM<=10) # Ignore small fish diveDat = subset(diveDat,diveDat$TL>=10) #Date since establishment of RCA RCAdate = read.csv("RCA Establishment.csv") diveDat$Year = as.numeric(format(as.Date(diveDat$Date,format="%d-%b-%y"),"%Y")) #delete this eventually # diveDat$RCAYearDiff = diveDat$Year - RCAdate[match(diveDat$RCA_Name,RCAdate$RCAName),2] #combine goose mcmullin #wait no keep them separate # diveDat$RCA_Name[which(diveDat$RCA_Name=="Goose Island")] = "McMullin Group" # diveDat$RCA_Name=as.factor(as.character(diveDat$RCA_Name)) #remove irrelevant columns diveDat = diveDat[,c("Descriptive.ID","Date","Lat","Long","Transect.Number","Depth","Complex1","Complex2","Complex3","Species","TL","Multiplier", "MinTargetSiteDistKm","RCA_DistKM","RCA_Name","Complexity","InRCA","Year")] ####### sample sizes ####### diveDatSub = na.omit(diveDat) SampleSize=data.frame(tapply(diveDatSub$Multiplier,list(diveDatSub$Species,diveDatSub$RCA_Name,diveDatSub$InRCA),function(x)sum(x))) setwd("C:\\Users\\Madeleine\\Dropbox (CCIRA)\\Ecological Research - CCIRA\\Analyses\\Groundfish\\RCA-Pairing analyses\\Output\\20200330_DiveLength") # write.csv(SampleSize,"Dive Length Sample Sizes 20191029_no skew correction.csv") #fix skew diveDatSub_fixSkew=na.omit(diveDat) # tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.99,0.95,0.90)))) MultiplierPercentiles = data.frame(species = names(tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.90))))), p90 = (tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.90))))), p95 = (tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.95))))), p99 = (tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.99)))))) ### only apply this to schooling fish sps = c("black","boccacio","canary","deacon","dusky","puget sound","redstripe","silvergray","vermillion","widow","yellowtail") MultiplierPercentiles2 = subset(MultiplierPercentiles,MultiplierPercentiles$species%in%sps) # write.csv(MultiplierPercentiles2,"multiplierPercentiles.csv") #then I have to fix all the individual species. I have been reduced to a for loop. for(i in sps){ p95 = MultiplierPercentiles[which(MultiplierPercentiles$species==i),"p95"] diveDatSub_fixSkew[which(diveDatSub_fixSkew$Species==i&diveDatSub_fixSkew$Multiplier>=p95),"Multiplier"]=p95 } # tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.99,0.90)))) #this one is for helping with running the single-species model later SampleSize_fixSkew=tapply(diveDatSub_fixSkew$Multiplier,list(diveDatSub_fixSkew$InRCA,diveDatSub_fixSkew$Species),function(x)sum(x)) #this one gives a complete picture SampleSize_fixSkew2=data.frame(tapply(diveDatSub_fixSkew$Multiplier,list(diveDatSub_fixSkew$Species,diveDatSub_fixSkew$RCA_Name,diveDatSub_fixSkew$InRCA),function(x)sum(x))) setwd("C:\\Users\\Madeleine\\Dropbox (CCIRA)\\Ecological Research - CCIRA\\Analyses\\Groundfish\\RCA-Pairing analyses\\Output\\20200330_DiveLength") # write.csv(SampleSize_fixSkew2,"Dive Length Sample Sizes 20191029_skewness fixed.csv") ####### AIC tables - species combined ########## #first, make sure you're using the fixed multipliers diveDat = diveDatSub_fixSkew #remove sps missing movement data spInfo = subset(spInfo,spInfo$Relative.mobility%in%c("low","high")) #match dive dat and species index=match(diveDat$Species,tolower(gsub(" Rockfish","",spInfo$Common.Name))) #TL normalized (this is the length anomaly) x = tapply(diveDat$TL,diveDat$Species,function(x)mean(x,na.rm=T)) y = tapply(diveDat$TL,diveDat$Species,function(x)sd(x,na.rm=T)) index2 = match(diveDat$Species,names(x)) diveDat$TLnormalized = (diveDat$TL-x[index2])/y[index2] # don't forget to include a relative mobility*InRCA interaction, so you need a relative mobility variable diveDat$RM = spInfo$Relative.mobility[index] #kAvg diveDat$kAvg = spInfo$kAvg[index] #also a schooling by RCA interaction needs to be tested, so you need a max age variable diveDat$Pelagic = spInfo$Analysis.grouping[index] #combine rm and water column location diveDat$behaviour = paste(diveDat$RM,diveDat$Pelagic) diveDat$behaviour[which(diveDat$behaviour=="low no")] = "Demersal-Low RM" diveDat$behaviour[which(diveDat$behaviour=="high yes")] = "Pelagic-High RM" #make sure that dive dat sub from elsewhere is not getting used rm(diveDatSub) diveDatSub=na.omit(diveDat) diveDatSub$Species=as.factor(as.character(diveDatSub$Species)) #filter by sample size SampleSizeAllSpModel=data.frame(tapply(diveDatSub$Multiplier,list(diveDatSub$InRCA,diveDatSub$RCA_Name),function(x)sum(x))) #output a sample size table like this SampleSizeAllSpModel2=data.frame(tapply(diveDatSub$Multiplier,list(diveDatSub$Species,diveDatSub$RCA_Name,diveDatSub$InRCA),function(x)sum(x))) # write.csv(SampleSizeAllSpModel2,"DiveLength_All sp_sample size_10km_fixedSkew.csv") # 5 fish in and 5 fish out (any species) rmvRCANamei = c(which(SampleSizeAllSpModel[1,]<5),which(SampleSizeAllSpModel[2,]<5),which(is.na(SampleSizeAllSpModel[1,])),which(is.na(SampleSizeAllSpModel[2,]))) rmvRCAName=gsub("\\."," ",names(SampleSizeAllSpModel)[rmvRCANamei]) rmvRCAName rmv = which(diveDatSub$RCA_Name%in%rmvRCAName) diveDatSub = diveDatSub[-rmv,] # spatial autocorrelation test ######## TL.dists <- as.matrix(dist(cbind(diveDatSub$Long, diveDatSub$Lat))) TL.dists.inv <- 1/TL.dists diag(TL.dists.inv) <- 0 TL.dists.inv[is.infinite(TL.dists.inv)] <- 0 Moran.I(diveDatSub$TLnormalized, TL.dists.inv,na.rm=T) #yes there is spatial autocorrelation ##### model aic selection k ########################## setwd("C:\\Users\\Madeleine\\Dropbox (CCIRA)\\Ecological Research - CCIRA\\Analyses\\Groundfish\\RCA-Pairing analyses\\Output\\20200118_DiveLength_k\\tigerv2") print("All Species: Global Model") #write just the sites #change lat-long to X-Y and add a tiny bit of spatial jitter set.seed(1) AlbersCoords = mapproject(diveDatSub$Lat, diveDatSub$Long, "albers", param=c(51, 53)) diveDatSub$X = AlbersCoords$x diveDatSub$Y = AlbersCoords$y diveDatSub$X = diveDatSub$X+runif(length(diveDatSub$X),min = -0.00001,max = 0.00001) diveDatSub$Y = diveDatSub$Y+runif(length(diveDatSub$Y),min = -0.00001,max = 0.00001) #test autocorrelation a second way mk = lm(data= diveDatSub, TLnormalized~InRCA*RCA_Name+InRCA*kAvg+InRCA*behaviour+Complexity*behaviour+Depth*behaviour, weights=Multiplier,na.action=na.fail) All = dredge(mk,fixed = c("Complexity","Depth"),trace=T,rank="AICc") mk0=get.models(All,subset=delta==0)[[1]] Moran.I(residuals(mk0), TL.dists.inv,na.rm=T) #run the global model with spatially autocorrelated residuals start_time <- Sys.time() mk = gls(data= diveDatSub, TLnormalized~InRCA*RCA_Name+InRCA*kAvg+InRCA*behaviour+Complexity*behaviour+Depth*behaviour, correlation=corGaus(form=~Y+X,nugget=TRUE),weights=varFixed(~Multiplier),method="ML") end_time <- Sys.time() end_time - start_time #start the AIC selection print("dredge") options(warn=1) Allk=dredge(mk,fixed = c("Complexity","Depth"),trace=T,rank="AICc") ### best models setwd("C:\\Users\\Madeleine\\Dropbox (CCIRA)\\Ecological Research - CCIRA\\Analyses\\Groundfish\\RCA-Pairing analyses\\Output\\20200330_DiveLength") Allk2 = get.models(Allk,subset=delta==0)[[1]] Allk2_REML = update(Allk2,method = "REML") sink("20200330_OldCorForm_ModelSummary_Tiger1.txt") print(summary(Allk2_REML)) sink() sink("20200330_OldCorForm_AICtable_Tiger1.txt") print(Allk) sink() ### diagnostics tiff("20200330_diagnostics_Tiger1.tiff", width = 8, height = 4, units = 'in', res = 300) par(mfrow=c(1,2)) plot(resid(Allk2_REML,type="pearson")~fitted(Allk2_REML)) qqnorm(resid(Allk2_REML)) dev.off() ### prediction tables newDat = data.frame(Species = diveDatSub$Species[1], kAvg = rep(c(0.050, 0.075, 0.100, 0.125, 0.150, 0.175, 0.12),each=160), Depth = rep(rep(c(5,10,15,20,30),each=32),7), RCA_Name = diveDatSub$RCA_Name[1], InRCA = rep(rep(c("in","out"),each=16),35), Complexity = rep(rep(c(1,2,3,4),each=4),70), Pelagic = rep(rep(c("yes","no"),each=2),280), RM = rep(c("low","high"),560)) #I got tired of figuring out all the correct numbers to replicate everything by (as above) #I am just going to replicate the RCA names here like this RCAlen=length(unique(diveDatSub$RCA_Name)) RCA_Name = rep(unique(diveDatSub$RCA_Name),each=length(newDat[,1])) newDat = do.call("rbind",replicate(RCAlen,newDat,simplify=F)) newDat$RCA_Name = RCA_Name #adding another variable after the fact is always a good idea newDat$behaviour = paste(newDat$RM,newDat$Pelagic) newDat$behaviour[which(newDat$behaviour=="low no")] = "Demersal-Low RM" newDat$behaviour[which(newDat$behaviour=="high yes")] = "Pelagic-High RM" newDat=newDat[which(newDat$behaviour%in%c("Demersal-Low RM", "Pelagic-High RM")),] newDat$RCA_Name = as.factor(as.character(newDat$RCA_Name)) test = predictSE(Allk2_REML,newDat,se.fit=T) newDat$predictions = test[[1]] newDat$SE = test[[2]] write.csv(newDat,"predictionTableTiger.csv")