############ Length prelim analyses ############
library(ggplot2)
library(glmmTMB)
library(lme4)
library(lmerTest)
library(DHARMa)
library(MuMIn)
library(optimx)
library(mefa)
library(nlme)
library(ape)
library(AICcmodavg)
library(dplyr)
library(lubridate)
library(rgdal)
library(mapproj)

#is descriptive ID the right one?

### read in data ##############
setwd("C:\\Users\\Madeleine\\Dropbox (CCIRA)\\Ecological Research - CCIRA\\Analyses\\Groundfish\\RCA-Pairing analyses\\Data")
spInfo = read.csv("species table simplified 20191202.csv")

diveDat = read.csv("Dive Fish Lengths 16.10.2019.csv")
# average complexity
diveDat$Complexity = apply(diveDat[,c("Complex1","Complex2","Complex3")],1,function(x)mean(x,na.rm=T))
# edit species names
diveDat$Species=as.character(diveDat$Species)
diveDat$Species[which(diveDat$Species=="Yellowtail")]="yellowtail"
diveDat$Species[which(diveDat$Species=="Tiger")]="tiger"
diveDat$Species[which(diveDat$Species=="boccacio")]="bocaccio"
diveDat$Species[which(diveDat$Species=="silvergrey")]="silvergray"
diveDat$Species=as.factor(diveDat$Species)
#remove some species
diveDat=diveDat[-which(diveDat$Species%in%c("deacon","redstripe","lingcod","dusky")),]

# I do not combine by life history. I stick to single species analyses

# binary RCA variable
diveDat$InRCA = "in";diveDat$InRCA[which(diveDat$RCA_DistKM!=0)]="out"
# diveDat$InRCA = factor(diveDat$InRCA, levels = c("out","in"))

# only those within 5 km of an RCA, but 10 km for goose. remember to remove goose later
diveDat = subset(diveDat,diveDat$RCA_DistKM<=10)

# Ignore small fish
diveDat = subset(diveDat,diveDat$TL>=10)

#Date since establishment of RCA
RCAdate = read.csv("RCA Establishment.csv")
diveDat$Year = as.numeric(format(as.Date(diveDat$Date,format="%d-%b-%y"),"%Y"))
#delete this eventually
# diveDat$RCAYearDiff = diveDat$Year - RCAdate[match(diveDat$RCA_Name,RCAdate$RCAName),2]

#combine goose mcmullin
#wait no keep them separate
# diveDat$RCA_Name[which(diveDat$RCA_Name=="Goose Island")] = "McMullin Group"
# diveDat$RCA_Name=as.factor(as.character(diveDat$RCA_Name))

#remove irrelevant columns
diveDat = diveDat[,c("Descriptive.ID","Date","Lat","Long","Transect.Number","Depth","Complex1","Complex2","Complex3","Species","TL","Multiplier",
                     "MinTargetSiteDistKm","RCA_DistKM","RCA_Name","Complexity","InRCA","Year")]

####### sample sizes #######
diveDatSub = na.omit(diveDat)

SampleSize=data.frame(tapply(diveDatSub$Multiplier,list(diveDatSub$Species,diveDatSub$RCA_Name,diveDatSub$InRCA),function(x)sum(x)))

setwd("C:\\Users\\Madeleine\\Dropbox (CCIRA)\\Ecological Research - CCIRA\\Analyses\\Groundfish\\RCA-Pairing analyses\\Output\\20200330_DiveLength")
# write.csv(SampleSize,"Dive Length Sample Sizes 20191029_no skew correction.csv")

#fix skew
diveDatSub_fixSkew=na.omit(diveDat)

# tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.99,0.95,0.90))))
MultiplierPercentiles = data.frame(species = names(tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.90))))),
                                   p90 = (tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.90))))),
                                   p95 = (tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.95))))),
                                   p99 = (tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.99))))))

### only apply this to schooling fish
sps = c("black","boccacio","canary","deacon","dusky","puget sound","redstripe","silvergray","vermillion","widow","yellowtail")
MultiplierPercentiles2 = subset(MultiplierPercentiles,MultiplierPercentiles$species%in%sps)
# write.csv(MultiplierPercentiles2,"multiplierPercentiles.csv")

#then I have to fix all the individual species. I have been reduced to a for loop.
for(i in sps){
  p95 = MultiplierPercentiles[which(MultiplierPercentiles$species==i),"p95"]
  diveDatSub_fixSkew[which(diveDatSub_fixSkew$Species==i&diveDatSub_fixSkew$Multiplier>=p95),"Multiplier"]=p95
}
# tapply(diveDatSub_fixSkew$Multiplier,diveDatSub_fixSkew$Species,function(x)(quantile(x,c(0.99,0.90))))

#this one is for helping with running the single-species model later
SampleSize_fixSkew=tapply(diveDatSub_fixSkew$Multiplier,list(diveDatSub_fixSkew$InRCA,diveDatSub_fixSkew$Species),function(x)sum(x))

#this one gives a complete picture
SampleSize_fixSkew2=data.frame(tapply(diveDatSub_fixSkew$Multiplier,list(diveDatSub_fixSkew$Species,diveDatSub_fixSkew$RCA_Name,diveDatSub_fixSkew$InRCA),function(x)sum(x)))

setwd("C:\\Users\\Madeleine\\Dropbox (CCIRA)\\Ecological Research - CCIRA\\Analyses\\Groundfish\\RCA-Pairing analyses\\Output\\20200330_DiveLength")
# write.csv(SampleSize_fixSkew2,"Dive Length Sample Sizes 20191029_skewness fixed.csv")

#######  AIC tables - species combined ##########
#first, make sure you're using the fixed multipliers
diveDat = diveDatSub_fixSkew

#remove sps missing movement data
spInfo = subset(spInfo,spInfo$Relative.mobility%in%c("low","high"))

#match dive dat and species
index=match(diveDat$Species,tolower(gsub(" Rockfish","",spInfo$Common.Name)))

#TL normalized (this is the length anomaly)
x = tapply(diveDat$TL,diveDat$Species,function(x)mean(x,na.rm=T))
y = tapply(diveDat$TL,diveDat$Species,function(x)sd(x,na.rm=T))
index2 = match(diveDat$Species,names(x))
diveDat$TLnormalized = (diveDat$TL-x[index2])/y[index2]

# don't forget to include a relative mobility*InRCA interaction, so you need a relative mobility variable
diveDat$RM = spInfo$Relative.mobility[index]

#kAvg
diveDat$kAvg = spInfo$kAvg[index]

#also a schooling by RCA interaction needs to be tested, so you need a max age variable
diveDat$Pelagic = spInfo$Analysis.grouping[index]

#combine rm and water column location
diveDat$behaviour = paste(diveDat$RM,diveDat$Pelagic)
diveDat$behaviour[which(diveDat$behaviour=="low no")] = "Demersal-Low RM"
diveDat$behaviour[which(diveDat$behaviour=="high yes")] = "Pelagic-High RM"

#make sure that dive dat sub from elsewhere is not getting used
rm(diveDatSub)
diveDatSub=na.omit(diveDat)
diveDatSub$Species=as.factor(as.character(diveDatSub$Species))

#filter by sample size
SampleSizeAllSpModel=data.frame(tapply(diveDatSub$Multiplier,list(diveDatSub$InRCA,diveDatSub$RCA_Name),function(x)sum(x)))

#output a sample size table like this
SampleSizeAllSpModel2=data.frame(tapply(diveDatSub$Multiplier,list(diveDatSub$Species,diveDatSub$RCA_Name,diveDatSub$InRCA),function(x)sum(x)))
# write.csv(SampleSizeAllSpModel2,"DiveLength_All sp_sample size_10km_fixedSkew.csv")


# 5 fish in and 5 fish out (any species)
rmvRCANamei = c(which(SampleSizeAllSpModel[1,]<5),which(SampleSizeAllSpModel[2,]<5),which(is.na(SampleSizeAllSpModel[1,])),which(is.na(SampleSizeAllSpModel[2,])))
rmvRCAName=gsub("\\."," ",names(SampleSizeAllSpModel)[rmvRCANamei])
rmvRCAName

rmv = which(diveDatSub$RCA_Name%in%rmvRCAName)
diveDatSub = diveDatSub[-rmv,]

# spatial autocorrelation test ########
TL.dists <- as.matrix(dist(cbind(diveDatSub$Long, diveDatSub$Lat)))

TL.dists.inv <- 1/TL.dists
diag(TL.dists.inv) <- 0

TL.dists.inv[is.infinite(TL.dists.inv)] <- 0

Moran.I(diveDatSub$TLnormalized, TL.dists.inv,na.rm=T)
#yes there is spatial autocorrelation

##### model aic selection k ##########################
setwd("C:\\Users\\Madeleine\\Dropbox (CCIRA)\\Ecological Research - CCIRA\\Analyses\\Groundfish\\RCA-Pairing analyses\\Output\\20200118_DiveLength_k\\tigerv2")

print("All Species: Global Model")

#write just the sites

#change lat-long to X-Y and add a tiny bit of spatial jitter
set.seed(1)
AlbersCoords = mapproject(diveDatSub$Lat, diveDatSub$Long, "albers", param=c(51, 53))
diveDatSub$X = AlbersCoords$x
diveDatSub$Y = AlbersCoords$y

diveDatSub$X = diveDatSub$X+runif(length(diveDatSub$X),min = -0.00001,max = 0.00001)
diveDatSub$Y = diveDatSub$Y+runif(length(diveDatSub$Y),min = -0.00001,max = 0.00001)

#test autocorrelation a second way
mk = lm(data= diveDatSub,
        TLnormalized~InRCA*RCA_Name+InRCA*kAvg+InRCA*behaviour+Complexity*behaviour+Depth*behaviour,
        weights=Multiplier,na.action=na.fail)

All = dredge(mk,fixed = c("Complexity","Depth"),trace=T,rank="AICc")

mk0=get.models(All,subset=delta==0)[[1]]
Moran.I(residuals(mk0), TL.dists.inv,na.rm=T)

#run the global model with spatially autocorrelated residuals
start_time <- Sys.time()
mk = gls(data= diveDatSub,
        TLnormalized~InRCA*RCA_Name+InRCA*kAvg+InRCA*behaviour+Complexity*behaviour+Depth*behaviour,
        correlation=corGaus(form=~Y+X,nugget=TRUE),weights=varFixed(~Multiplier),method="ML")
end_time <- Sys.time()
end_time - start_time

#start the AIC selection
print("dredge")

options(warn=1)
Allk=dredge(mk,fixed = c("Complexity","Depth"),trace=T,rank="AICc")

### best models
setwd("C:\\Users\\Madeleine\\Dropbox (CCIRA)\\Ecological Research - CCIRA\\Analyses\\Groundfish\\RCA-Pairing analyses\\Output\\20200330_DiveLength")
Allk2 = get.models(Allk,subset=delta==0)[[1]]
Allk2_REML = update(Allk2,method = "REML")


sink("20200330_OldCorForm_ModelSummary_Tiger1.txt")
print(summary(Allk2_REML))
sink()

sink("20200330_OldCorForm_AICtable_Tiger1.txt")
print(Allk)
sink()

### diagnostics
tiff("20200330_diagnostics_Tiger1.tiff", width = 8, height = 4, units = 'in', res = 300)
par(mfrow=c(1,2))
plot(resid(Allk2_REML,type="pearson")~fitted(Allk2_REML))
qqnorm(resid(Allk2_REML))
dev.off()

### prediction tables
newDat = data.frame(Species = diveDatSub$Species[1],
                    kAvg = rep(c(0.050, 0.075, 0.100, 0.125, 0.150, 0.175, 0.12),each=160),
                    Depth = rep(rep(c(5,10,15,20,30),each=32),7),
                    RCA_Name = diveDatSub$RCA_Name[1],
                    InRCA = rep(rep(c("in","out"),each=16),35),
                    Complexity = rep(rep(c(1,2,3,4),each=4),70),
                    Pelagic = rep(rep(c("yes","no"),each=2),280),
                    RM = rep(c("low","high"),560))

#I got tired of figuring out all the correct numbers to replicate everything by (as above)
#I am just going to replicate the RCA names here like this
RCAlen=length(unique(diveDatSub$RCA_Name))
RCA_Name = rep(unique(diveDatSub$RCA_Name),each=length(newDat[,1]))
newDat = do.call("rbind",replicate(RCAlen,newDat,simplify=F))
newDat$RCA_Name = RCA_Name

#adding another variable after the fact is always a good idea
newDat$behaviour = paste(newDat$RM,newDat$Pelagic)
newDat$behaviour[which(newDat$behaviour=="low no")] = "Demersal-Low RM"
newDat$behaviour[which(newDat$behaviour=="high yes")] = "Pelagic-High RM"

newDat=newDat[which(newDat$behaviour%in%c("Demersal-Low RM", "Pelagic-High RM")),]

newDat$RCA_Name = as.factor(as.character(newDat$RCA_Name))
test = predictSE(Allk2_REML,newDat,se.fit=T)

newDat$predictions = test[[1]]
newDat$SE = test[[2]]
write.csv(newDat,"predictionTableTiger.csv")