# Data cleaning dataclean_KR <- function(dat) { # hist(dat$hbf,nclass=400) if(sum(is.na(dat$alb))>0) dat[is.na(dat$alb),]$alb <- 0 if(sum(is.na(dat$bet))>0) dat[is.na(dat$bet),]$bet <- 0 if(sum(is.na(dat$blm))>0) dat[is.na(dat$blm),]$blm <- 0 if(sum(is.na(dat$bum))>0) dat[is.na(dat$bum),]$bum <- 0 if(sum(is.na(dat$mls))>0) dat[is.na(dat$mls),]$mls <- 0 if(sum(is.na(dat$oth))>0) dat[is.na(dat$oth),]$oth <- 0 if(sum(is.na(dat$pbf))>0) dat[is.na(dat$pbf),]$pbf <- 0 if(sum(is.na(dat$sbt))>0) dat[is.na(dat$sbt),]$sbt <- 0 if(sum(is.na(dat$sfa))>0) dat[is.na(dat$sfa),]$sfa <- 0 if(sum(is.na(dat$sha))>0) dat[is.na(dat$sha),]$sha <- 0 if(sum(is.na(dat$skj))>0) dat[is.na(dat$skj),]$skj <- 0 if(sum(is.na(dat$swo))>0) dat[is.na(dat$swo),]$swo <- 0 if(sum(is.na(dat$yft))>0) dat[is.na(dat$yft),]$yft <- 0 dat <- dat[!is.na(dat$hooks),] # Clean up 294 NAs dat <- dat[dat$hooks<5000,] # clean up outliers dat <- dat[dat$hooks>200,] # dat <- dat[dat$yft<750,] # dat <- dat[dat$bet<250,] # dat <- dat[dat$alb<250,] dat <- dat[is.na(dat$hbf)==F,] dat <- dat[dat$hbf < 40,] dat <- dat[dat$op_yr > 1976,] dat <- dat[dat$yrqtr < 2017,] dat <- dat[dat$EW==1,] dat <- dat[dat$hooks >= 1000,] dat <- dat[dat$hbf >= 5,] return(dat) } dataprep_KRSBT <- function(dat,alldat=F) { dat$dmy <- ymd(dat$DATE) loc <- which(is.na(dat$dmy)) dat$dmy[loc] <- dmy(dat$DATE[loc]) #dat$dmy <- dmy(dat$DATE) dat$hbf <- round(dat$hooks / dat$floats,0) dat$moon <- lunar.illumination(dat$dmy) dat$lat <- dat$Lat01 dat$lon <- dat$lonx <- dat$Long01 dat$lat[dat$NS %in% c(2, "S")] <- (dat$Lat01[dat$NS %in% c(2, "S")]+1) * -1 dat$lon[dat$EW %in% c(2, "W")] <- 360 - (dat$Long01[dat$EW %in% c(2, "W")] + 1) dat$lonx[dat$EW %in% c(2, "W")] <- -(dat$Long01[dat$EW %in% c(2, "W")] + 1) dat <- dat[dat$lon >= 0,] dat <- dat[dat$lat < 29,] dat$lat5 <- 5 * floor(dat$lat/5) + 2.5 dat$lon5 <- 5 * floor(dat$lon/5) + 2.5 dat$lonx5 <- 5 * floor(dat$lonx/5) + 2.5 #regSBT preliminary boudnaries - update later dat$regSBT <- 0 dat[dat$lat < -30 & dat$lon < 60,]$regSBT <- 1 dat[dat$lat < -30 & dat$lon >= 60,]$regSBT <- 2 dat$lonfac <- factor(dat$lonx,levels=seq(-25,190,1)) dat$latfac <- factor(dat$lat,levels=seq(-60,10,1)) dat$lon5fac <- factor(dat$lonx,levels=seq(-25,190,1)) dat$lat5fac <- factor(dat$lat,levels=seq(-60,10,1)) dat$qtr <- floor((dat$op_mon-1)/3)+1 dat$yrqtr <- dat$op_yr + floor((dat$op_mon-1)/3)/4 + 0.125 dat$latlong <- paste(dat$lat5,dat$lon5,sep="_") dat$vessid <- as.factor(dat$VESSEL_NAME_rev) dat$tripidmon <- paste(dat$vessid,dat$op_yr,dat$op_mon) return(dat) } get.coefs <- function(model,nyrs) { coefs <- exp(c(0,summary(model)$coefficients[2:nyrs,1])) coefs <- coefs/mean(coefs) return(coefs) } get.bin.coefs2 <- function(model,nyrs,dat) { mn <- logit(mean(dat[,3]!=0)) a <- c(0,summary(model)$coefficients[2:nyrs,1]) a <- a - mean(a) + mn coefs <- inv.logit(a) # coefs <- coefs/mean(coefs) return(coefs) } get.bin.coefs <- function(model,nyrs,dat) { mn <- logit(mean(model$y)) a <- c(0,summary(model)$coefficients[2:nyrs,1]) a <- a - mean(a) + mn coefs <- inv.logit(a) return(coefs) } plot.slope.ratio <- function(coefs1,coefs2,yr,titl) { # base goes first, boat goes second windows(height=14,width=12) par(mfrow=c(2,1),mar=c(4,4,3,1)) plot(yr,coefs1/coefs2,xlim=c(1976,2010),ylim=c(0,2),xlab="Year",ylab="Ratio of coefficients") title(main=titl,cex.main=1.2) lin <-lm(coefs1/coefs2 ~ yr) logl <-lm(log(coefs1/coefs2) ~ yr) lines(yr,exp(logl$coefficients[1] + logl$coefficients[2]*yr),lty=3) tt <- paste(prettyNum(100*logl$coefficients[2],digits=2,format="f"), "% \261 ",prettyNum(100*summary(logl)$coefficients[2,2],digits=2,format="f"),", p = ", prettyNum(summary(logl)$coefficients[2,4], digits=2,format="f"),sep="") text(min(yr)+5, 1.9, tt,font=2,col="red",cex=1.1) par(mar=c(5,4,1,1)) plot(yr,coefs1,type="l",ylab="Relative abundance estimate",xlab="Year",ylim=c(0,2.5)) lines(yr,coefs2,col="red") } plot.slope.ratio2 <- function(coefs1,coefs2,yr1,yr2,titl) { # base goes first, boat goes second windows(height=14,width=12) par(mfrow=c(2,1),mar=c(4,4,3,1)) yy <- c(yr1,yr2) yr <- seq(min(yy),max(yy),0.25) coefs1 <- coefs1[match(yr,yr1)] coefs2 <- coefs2[match(yr,yr2)] plot(yr,coefs1/coefs2,ylim=c(0,2),xlab="Year",ylab="Ratio of coefficients") title(main=titl,cex.main=1.2) lin <-lm(coefs1/coefs2 ~ yr) logl <-lm(log(coefs1/coefs2) ~ yr) lines(yr,exp(logl$coefficients[1] + logl$coefficients[2]*yr),lty=3) tt <- paste(prettyNum(100*logl$coefficients[2],digits=2,format="f"), "% \261 ",prettyNum(100*summary(logl)$coefficients[2,2],digits=2,format="f"),", p = ", prettyNum(summary(logl)$coefficients[2,4],digits=2,format="f"),sep="") text(min(yr)+10, 1.9, tt,font=2,col="red",cex=1.1) par(mar=c(5,4,1,1)) plot(yr,coefs1,type="l",ylab="Relative abundance estimate",xlab="Year",ylim=c(0,3.5)) lines(yr,coefs2,col="red") } plot.agg.slope.ratio <- function(coefs1,coefs2,aggyr,opyr,titl,lab1="coefs1",lab2="coefs2",fname=NULL) { # agg goes first, op goes second windows(height=14,width=12) par(mfrow=c(2,1),mar=c(4,4,3,1)) myr <- aggyr[match(opyr,aggyr)] coefs1 <- coefs1[match(opyr,aggyr)] plot(myr,coefs1/coefs2,ylim=c(0,2),xlab="Year",ylab="Ratio of coefficients") title(main=titl,cex.main=1.2) legend("bottomright",legend=paste(lab1,"/",lab2),col=1,lty=3) lin <-lm(coefs1/coefs2 ~ myr) logl <-lm(log(coefs1/coefs2) ~ myr) lines(myr,exp(logl$coefficients[1] + logl$coefficients[2]*myr),lty=3) tt <- paste(prettyNum(100*logl$coefficients[2],digits=2,format="f"), "% \261 ",prettyNum(100*summary(logl)$coefficients[2,2],digits=2,format="f"),", p = ", prettyNum(summary(logl)$coefficients[2,4],digits=2,format="f"),sep="") text(min(as.numeric(as.character(myr)))+10, 1.9, tt,font=2,col="red",cex=1.1) par(mar=c(5,4,1,1)) plot(myr,coefs1,type="l",ylab="Relative abundance estimate",xlab="Year",ylim=c(0,3.5)) lines(myr,coefs2,col="red") legend("topright",legend=c(lab1,lab2),col=1:2,lty=c(1,1)) if(is.null(fname)==F) savePlot(paste(fname,lab1,lab2),type=plotfile) } plot.res <- function(summ1,summ2,name1,name2) { nyrs <- length(grep("yrqtr",rownames(summ1$coefficients)))+1 coefs1 <- get.summ.coefs(summ1,nyrs) coefs2 <- get.summ.coefs(summ2,nyrs) yrs <- rownames(summ1$coefficients)[grep("yrqtr",rownames(summ1$coefficients))] yrs <- c(1980.25,as.numeric(substring(yrs,17))) windows(height=13,width=11) par(mfrow=c(2,1),mar=c(5,4,1,2)) plot.slope.ratio(coefs1,coefs2,yrs,"") plot(yrs,coefs1,type="l",ylab="Relative abundance estimate",xlab="Year",ylim=c(0,2.5)) lines(yrs,coefs2,col="red") legend("topleft",legend=c(name1, name2),lty=c(1,1),col=c("black","red")) } make_formula_SBT <- function(runsp,modtype,addboat,dohbf=T,splitboat=F,addcl=NA,addpca=NA,addhooks=T,addmoon=F,addmonth=F) { fmla <- "~ Year + latlong" if(dohbf) fmla <- paste(fmla,"+ ns(hbf,3)") modhead <- switch(modtype,logn=paste0("log(",runsp,"/hooks+mn)"),deltabin=paste(runsp,"!= 0"),negbin=runsp,deltapos=paste("log((",runsp,")/hooks)"),qp=runsp,propn=runsp) fmla <- paste(modhead,fmla) if(addhooks) fmla <- paste(fmla,"+ ns(hooks,5)") if(addboat & !splitboat) fmla <- paste(fmla,"+ vessid") if(addboat & splitboat) fmla <- paste(fmla,"+ splitvess") if(addmonth) fmla <- paste(fmla,"+ ns(month,df=3)") if(addmoon) fmla <- paste(fmla,"+ ns(moon,df=4)") if(!is.na(addcl)) fmla <- paste(fmla,"+ clust") if(!is.na(addpca)) fmla <- paste(fmla,"+",addpca) return(fmla) } mk_wts_Year <- function(dat,wttype,catch=NULL,sp=NULL) { if(wttype=="equal") wts <- NULL if(wttype=="propn") wts <- catch if(wttype=="area") { a <- tapply(dat$latlong,list(dat$latlong,dat$Year),length) i <- match(dat$latlong,rownames(a)) j <- match(dat$Year,colnames(a)) n <- mapply("[", list(a), i, j) wts <- 1/n } if(wttype=="catch") { if(is.null(catch)) catch <- tapply(dat[,sp],list(dat$latlong),sum) a <- tapply(dat$latlong,list(dat$latlong,dat$Year),length) i <- match(dat$latlong,rownames(a)) j <- match(dat$Year,colnames(a)) n <- mapply("[", list(a), i, j) cwts <- mapply("[", list(catch), i)/sum(catch) wts <- cwts/n } return(wts) } make_strat <- function(dat) { a <- tapply(dat$latlong,list(dat$latlong,dat$yrqtr),length) i <- match(dat$latlong,rownames(a)) j <- match(dat$yrqtr,colnames(a)) n <- mapply("[", list(a), i, j) return(n) } samp_data <- function(dat,n,nsamp) { p <- nsamp / n r <- runif(length(n)) d2 <- dat[r 0) gdat[is.na(gdat$hbf),]$hbf <- 5 #gdat <- gdat[gdat$hbf >= 5,] if(llstrat!=5) gdat$latlong <- paste(llstrat*floor(gdat$lat/llstrat),llstrat*floor(gdat$lon/llstrat),sep="_") if(mt=="deltapos") gdat <- gdat[gdat[,runsp] > 0,] a <- table(gdat$vessid,gdat$yrqtr) a <- apply(a>0,1,sum) table(a) a <- a[a >= minqtrs & a <= maxqtrs] gdat <- gdat[gdat$vessid %in% names(a),] a <- table(gdat$Year);a a <- a[a>=minperyr] gdat <- gdat[gdat$Year %in% names(a),] a <- table(gdat$vessid);a a <- a[a>=minpervess] gdat <- gdat[gdat$vessid %in% names(a),] a <- table(gdat$latlong);a a <- a[a>=minperll] gdat <- gdat[gdat$latlong %in% names(a),] a <- table(gdat$vessid);a a <- a[a>=minpervess] gdat <- gdat[gdat$vessid %in% names(a),] vars <- c("vessid","hooks","SBT", "hbf", "yrqtr", "Year","latlong","moon","month","BET","YFT") if(!is.na(addcl)) vars <- c(vars,addcl) if(!is.na(addpca)) vars <- c(vars,addpca) gdat <- gdat[,vars] if(!is.na(samp)) gdat <- samp_data2(gdat,samp) gdat$vessid <- factor(gdat$vessid) gdat$latlong <- factor(gdat$latlong) gdat$yrqtr <- factor(gdat$yrqtr,levels=seq(min(gdat$yrqtr),max(gdat$yrqtr),0.25)) gdat$Year <- factor(gdat$Year,levels=seq(min(gdat$Year),max(gdat$Year),1)) if(!is.na(addcl)) { gdat[,addcl] <- as.factor(gdat[,addcl]) gdat <- rename(gdat, clust = eval(addcl)) } return(gdat) } qqDist <- function (x, standardise = F, add.median = F, ...) { n <- length(x) seq.length <- min(1000, n) if (standardise) { SEQ <- seq(1, 2 * n + 1, length = seq.length)/2 U <- qnorm(qbeta(0.975, SEQ, rev(SEQ))) L <- qnorm(qbeta(0.025, SEQ, rev(SEQ))) if (add.median) M <- qnorm(qbeta(0.5, SEQ, rev(SEQ))) } else { SD <- sqrt(var(x) * (n + 1)/n) SEQ <- seq(1, 2 * n + 1, length = seq.length)/2 U <- mean(x) + SD * qt(qbeta(0.975, SEQ, rev(SEQ)), n - 1) L <- mean(x) + SD * qt(qbeta(0.025, SEQ, rev(SEQ)), n - 1) if (add.median) M <- mean(x) + SD * qt(qbeta(0.5, SEQ, rev(SEQ)), n - 1) } X <- qnorm((SEQ - 0.25)/(n + 0.5)) qqnorm(x, main = "", ...) lines(X, U, type = "l",col=2) lines(X, L, type = "l",col=2) if (add.median) lines(X, M, type = "l",col=2) invisible() } plotdiags <- function(res,ti="") { hist(res,nclass=200,freq=F,xlab="Residuals",main="",cex.axis=1.2,cex.lab=1.3,las=1) lines((-30:30)/10,dnorm((-30:30)/10,sd=sd(res)),col=2) sdres <- res/sd(res) qqDist(sdres,add.median=T,cex.axis=1.2,cex.lab=1.3,las=1) } plot_effects_SBT_yr <- function(model,indat,dovess=F,dohbf=T,domonth=F,domoon=F,addcl=F,addpca=F,mt,ti="") { # plot_effects_SBT_yr(mod,indat=dat,dovess=dovess,dohbf=T,domonth=T,domoon=T,mt=modtype, addcl=addcl, addpca=F); cf <- model$coefficients pred <- predict(model,data=indat,type="terms",se.fit=T) fishlab <- "SBT"; methlab <- switch(mt,deltabin="Delta-binomial",deltapos="Delta-positive",logn="Lognormal(+0.01)",negb="Negative binomial",propn="Proportion Bigeye") nfigs <- 3 + 2*dovess + dohbf + addcl + addpca + dohbf + domonth + domoon mf <- switch(nfigs-2,c(2,2),c(2,2),c(2,3),c(2,3),c(3,3),c(3,3),c(3,3),c(3,4)) hw <- c(14,19) windows(height=hw[1],width=hw[2]) par(mfrow=mf,mar=c(5,4,2,1),oma=c(0,0,3,0)) pr <- pred$fit ; prse <- pred$se.fit termlist <- dimnames(pred$fit)[[2]] llpos <- grep("latlong",termlist) hbfpos <- grep("hbf",termlist) vesspos <- grep("vessid",termlist) branchpos <- grep("branchline",termlist) monthpos <- grep("month",termlist) moonpos <- grep("moon",termlist) if(length(grep("hooks",termlist))>0) db <- T else db <- F index <- sort(unique(indat$Year)) b <- match(index,indat$Year) out <- exp(pr[b,1]) se1 <- exp(pr[b,1] - 1.96 * prse[b,1]) se2 <- exp(pr[b,1] + 1.96 * prse[b,1]) plot(as.numeric(as.character(index)),out,ylim=c(0,4),xlab="Year",ylab="Effect size",main="Year effects") segments(as.numeric(as.character(index)), se1, as.numeric(as.character(index)), se2, lty=1, col="slate grey") points(as.numeric(as.character(index)), out, pch=16) index <- sort(unique(indat$latlong)) b <- match(index,indat$latlong) out <- exp(pr[b,llpos]) se1 <- exp(pr[b,llpos] - 1.96 * prse[b,llpos]) se2 <- exp(pr[b,llpos] + 1.96 * prse[b,llpos]) plot(as.numeric(index),out,ylim=c(0,4),xlab="Latlong",ylab="Effect size",main="Spatial effects") segments(as.numeric(index), se1, as.numeric(index), se2, lty=1, col="slate grey") points(as.numeric(index), out, pch=16) ##plot coefficients library(maps) ll <- as.character(indat$latlong) index <- sort(unique(ll)) index2 <- unlist(strsplit(index,"\\_")) pos1 <- 2*(1:length(index)) -1 pos2 <- pos1 + 1 lats <- as.numeric(index2[pos1]) lons <- as.numeric(index2[pos2]) lons[lons > 190] <- lons[lons > 190] - 360 alat <- round(lats[match(ll,index)]*2)/2 + 2.5 alon <- round(lons[match(ll,index)]*2)/2 + 2.5 alat <- factoralong(alat,5) alon <- factoralong(alon,5) coefs <- exp(pr[match(ll,index),llpos]) coefs2 <- tapply(coefs, list(alon, alat), mean) edge1 <- as.numeric(dimnames(coefs2)[[1]]) edge2 <- as.numeric(dimnames(coefs2)[[2]]) image(edge1,edge2,coefs2, zlim=c(0.5, 2.5), ylab="Lat", xlab="Long",col=rev(heat.colors(12))) contour(edge1,edge2,coefs2, levels= c(0,1,2,2.5), add=TRUE,col=4) maps::map(add=TRUE,fill=T) if(dovess) { index <- sort(unique(indat$vessid)) b <- match(index,indat$vessid) out <- exp(pr[b,vesspos]) se1 <- exp(pr[b,vesspos] - 1.96 * prse[b,vesspos]) se2 <- exp(pr[b,vesspos] + 1.96 * prse[b,vesspos]) ind <- as.factor(index) plot(match(ind,ind),out,type="n",ylim=c(0,4),xlab="Vessel ID",ylab="Effect size",main="") segments(match(ind,ind), se1, match(ind,ind), se2, lty=1, col="slate grey") points(match(ind,ind), out, pch=16) vts <- tapply(pr[,vesspos],list(indat$yrqtr,indat$vessid),mean) y<-exp(as.vector(vts)) x<-rep(as.numeric(dimnames(vts)[[1]]),dim(vts)[[2]]) plot(x,y,type="p",ylim=c(0,3),xlab="Vessel ID",ylab="Effect size",main="Vessel effects",pch=16,cex=0.9) mn <- tapply(exp(pr[,vesspos]),indat$yrqtr,mean) lines(as.numeric(dimnames(vts)[[1]]), mn, col=2,lwd=2) } if(dohbf) { rsp <- pr[,hbfpos[1]] rsp.se <- prse[,hbfpos[1]] b <- match(sort(unique(indat$hbf)),indat$hbf) plot(indat$hbf[b],exp(pr[,hbfpos][b]),ylim=c(0,4),xlab="HBF",ylab="Effect size",main="HBF effects") lines(indat$hbf[b],exp(rsp[b]+2*rsp.se[b]),lty=2) lines(indat$hbf[b],exp(rsp[b]-2*rsp.se[b]),lty=2) } if(addcl) { index <- sort(unique(indat$clust)) b <- match(index,indat$clust) out <- exp(pr[b,1]) se1 <- exp(pr[b,1] - 1.96 * prse[b,1]) se2 <- exp(pr[b,1] + 1.96 * prse[b,1]) plot(as.numeric(as.character(index)),out,ylim=c(0,4),xlab="Cluster category",ylab="Effect size",main="Cluster effects") segments(as.numeric(as.character(index)), se1, as.numeric(as.character(index)), se2, lty=1, col="slate grey") points(as.numeric(as.character(index)), out, pch=16) } if(addpca) { ind <- indat[order(indat$pca),] out <- exp(pr[b,1]) se1 <- exp(pr[b,1] - 1.96 * prse[b,1]) se2 <- exp(pr[b,1] + 1.96 * prse[b,1]) plot(as.numeric(as.character(index)),out,ylim=c(0,4),xlab="Cluster category",ylab="Effect size",main="Cluster effects") segments(as.numeric(as.character(index)), se1, as.numeric(as.character(index)), se2, lty=1, col="slate grey") points(as.numeric(as.character(index)), out, pch=16) } if(domonth) { index <- sort(unique(indat$month)) b <- match(index,indat$month) out <- exp(pr[b,monthpos]) se1 <- exp(pr[b,monthpos] - 1.96 * prse[b,monthpos]) se2 <- exp(pr[b,monthpos] + 1.96 * prse[b,monthpos]) ind <- index plot(ind,out,type="n",ylim=c(0,3),xlab="Month",ylab="Effect size",main="") segments(ind, se1, ind, se2, lty=1, col="slate grey") points(ind, out, pch=16) } if(domoon) { index <- sort(unique(indat$moon)) b <- match(index,indat$moon) out <- exp(pr[b,moonpos]) se1 <- exp(pr[b,moonpos] - 1.96 * prse[b,moonpos]) se2 <- exp(pr[b,moonpos] + 1.96 * prse[b,moonpos]) ind <- index plot(ind,out,type="n",ylim=c(0,3),xlab="Lunar illumination",ylab="Effect size",main="") lines(ind, se1, lty=2, col="slate grey") lines(ind, se2, lty=2, col="slate grey") points(ind, out, pch=16) } title(main=ti,cex.main=1.5,outer=T) } storesumm <- function(mod,fname) { summry <- summary(mod) save(summry,file=paste(fname,".RData")) a <- capture.output(summry); cat(a,file=paste(fname,".txt"),sep="\n",append=F) } factoralong <- function(x,diffs) { stt <- min(x,na.rm=T) stp <- max(x,na.rm=T) x <- factor(x,levels=seq(stt,stp,diffs)) return(x) } summarize_and_store <- function(mod,dat,fname,modtype, addcl = F, plotfile = "png") { windows(); par(mfrow=c(2,1),mar=c(4,4,3,1)) # plot diagnostics plotdiags(mod$residuals,ti=paste(fname,modtype)); savePlot(paste(fname,modtype,"diags",sep="_"),type=plotfile) if(is.null(mod$model$vessid)) dovess <- FALSE else dovess <- TRUE plot_effects_SBT_yr(mod,indat=dat,dovess=dovess,dohbf=T,domonth=T,domoon=T,mt=modtype, addcl=addcl, addpca=F); savePlot(paste0(fname,"_",modtype,"_effects.png"),type="png") # nd <- make_newdat3(model=mod,datx=dat, addcl=addcl); save(nd,file=paste0(fname,"_",modtype,"_predictions.RData")) nd <- make_newdat_x(mod=mod,dat=dat); save(nd,file=paste0(fname,"_",modtype,"_predictions.RData")) summ <- summary(mod); save(summ,file=paste0(fname,"_",modtype,"_summary.RData")) save(mod,file=paste0(fname,"_",modtype,"_model.RData")) } #' Summarize and store GLM results for delta lognormal. #' #' The function summarizes and stores the reuslts of both components of a delta lognormal standardization #' @param modb The model result file from the binomial glm. #' @param modp The model result file from the lognormal positive glm. #' @param dat The dataset that was input to the binomial glm. #' @param datpos The dataset that was input to the lognormal positive glm. #' @param fname File name used for saving outputs. #' @param modlab Model label used in plots and for output filenames. #' @param dohbf Include hbf if TRUE. #' @param addcl Use clusters if TRUE. #' @param keepd Remove data from model object unless TRUE. #' summarize_and_store_dl <- function(modb, modp, dat, datpos, fname, modlab, dohbf = dohbf, addcl = addcl, addmon = domon, keepd = TRUE) { abin <- length(unique(dat$Year)) apos <- length(unique(datpos$Year)) summodb <- summary(modb) summodp <- summary(modp) save(summodb, summodp, file = paste(fname, "_", modlab, " summary_delta.RData", sep = "")) vessidx <- Mode(dat$vessid) latlongx <- Mode(dat$latlong) if ("clust" %in% names(dat)) newdat <- expand.grid(Year = sort(unique(dat$Year)), latlong = latlongx, hooks = median(dat$hooks), vessid = vessidx, clust = Mode(dat$clust), month = median(dat$month), moon=0) if (!"clust" %in% names(dat)) newdat <- expand.grid(Year = sort(unique(dat$Year)), latlong = latlongx, hooks = median(dat$hooks), vessid = vessidx, month = median(dat$month), moon=0) if (dohbf & "clust" %in% names(dat)) newdat <- expand.grid(Year = sort(unique(dat$Year)), latlong = latlongx, hooks = median(dat$hooks), vessid = vessidx, clust = Mode(dat$clust), hbf = median(dat$hbf), month = median(dat$month), moon=0) if (dohbf & !"clust" %in% names(dat)) newdat <- expand.grid(Year = sort(unique(dat$Year)), latlong = latlongx, hooks = median(dat$hooks), vessid = vessidx, hbf = median(dat$hbf), month = median(dat$month), moon=0) # assign('fmla.bin', as.formula(fmla.bin), envir = .GlobalEnv) predresp <- predict.glm(modb, newdata = newdat, type = "response", se.fit = T) predterms <- predict.glm(modb, newdata = newdat, type = "terms", se.fit = T) mn <- logit(mean(modb$y)) a <- predresp$fit - mean(predresp$fit) + mn predresp$fit2 <- inv.logit(a) ndbin = list(newdat = newdat, predresp = predresp, predterms = predterms) save(ndbin, file = paste0(fname, "_bin_", modlab, "_predictions.RData")) vessidx <- Mode(datpos$vessid) latlongx <- Mode(datpos$latlong) if (!dohbf & !"clust" %in% names(dat)) newdat <- expand.grid(Year = sort(unique(datpos$Year)), latlong = latlongx, hooks = median(datpos$hooks), vessid = vessidx, month = median(dat$month), moon=0) if (!dohbf & "clust" %in% names(dat)) newdat <- expand.grid(Year = sort(unique(datpos$Year)), latlong = latlongx, hooks = median(datpos$hooks), vessid = vessidx, clust = Mode(datpos$clust), month = median(dat$month), moon=0) if (dohbf & "clust" %in% names(dat)) newdat <- expand.grid(Year = sort(unique(datpos$Year)), latlong = latlongx, hooks = median(datpos$hooks), vessid = vessidx, clust = Mode(datpos$clust), hbf = median(datpos$hbf), month = median(dat$month), moon=0) if (dohbf & !"clust" %in% names(dat)) newdat <- expand.grid(Year = sort(unique(datpos$Year)), latlong = latlongx, hooks = median(datpos$hooks), vessid = vessidx, hbf = median(datpos$hbf), month = median(dat$month), moon=0) predresp <- predict(modp, newdata = newdat, type = "response", se.fit = T) predterms <- predict(modp, newdata = newdat, type = "terms", se.fit = T) ndpos = list(newdat = newdat, predresp = predresp, predterms = predterms) save(ndpos, file = paste0(fname, "_pos_", modlab, "_predictions.RData")) pbin <- ndbin$predresp$fit2 ppos <- ndpos$predresp$fit a <- na.omit(match(ndbin$newdat$Year, ndpos$newdat$Year)) # make indices pcoefs <- pbin[a] * exp(ppos) pcoefs <- pcoefs/mean(pcoefs) coefs.bin <- get.bin.coefs(modb, abin, dat) coefs.pos <- get.coefs(modp, apos) windows() par(mfrow = c(2, 1), mar = c(4, 4, 3, 1)) # plot diagnostics plotdiags(modp$residuals, ti = paste(fname, modlab)) savePlot(paste(fname, modlab, "diags.png", sep = "_"), type = "png") if (is.null(modb$xlevels$vessid)) dovess <- FALSE else dovess <- TRUE plot_effects_IO(modb, indat = dat, addcl = addcl, dohbf = dohbf) savePlot(paste0(fname, "_", modlab, "_bin_effects.png"), type = "png") plot_effects_IO(modp, indat = datpos, addcl = addcl, dohbf = dohbf) savePlot(paste0(fname, "_", modlab, "_pos_effects.png"), type = "png") mn <- logit(mean(modb$y)) opyr <- sort(unique(datpos$Year)) a <- match(names(coefs.pos), names(coefs.bin)) # make indices coefs <- coefs.bin[a] * coefs.pos save(coefs.bin, coefs.pos, coefs, pcoefs, file = paste(fname, "_", modlab, "_indices.RData", sep = "")) if (!keepd) { modb$data <- NULL modp$data <- NULL } save(modb, modp, file = paste(fname, "_", modlab, " moddelta.RData", sep = "")) gc() graphics.off() } Mode <- function(x) { # from http://stackoverflow.com/questions/2547402/standard-library-function-in-r-for-finding-the-mode ux <- unique(x) ux[which.max(tabulate(match(x, ux)))] } make_newdat3 <- function(model,datx, addcl=NA) { vessidx <- Mode(datx$vessid) latlongx <- Mode(datx$latlong) newdat <- expand.grid(Year=sort(unique(datx$Year)),vessid=vessidx,latlong=latlongx,hooks=median(datx$hooks),month=Mode(datx$month),moon=0.5) if(addcl) newdat <- expand.grid(Year=sort(unique(datx$Year)),vessid=vessidx,latlong=latlongx,hooks=median(datx$hooks),month=Mode(datx$month),moon=0.5, clust = levels(datx$clust)[1]) predresp <- predict(model,newdata=newdat,type="response",se.fit=T) predterms <- predict(model,newdata=newdat,type="terms",se.fit=T) return(list(newdat=newdat,predresp=predresp,predterms=predterms)) } #' Make newdat object. #' #' Function to make newdat object and predict responses from the glm with time area interactions. Originally developed for joint analyses in the IO. #' @param mod GLM object to predict for. #' @param dat Original dataset input to the glm. #' @return newdat. #' make_newdat_x <- function(mod, dat) { vessidx <- Mode(dat$vessid) latlongx <- Mode(dat$latlong) if (is.null(mod$formula)) form <- mod$terms else form <- mod$formula ndtxt <- "expand.grid(" if (isTRUE(grep("clust", form) >= 1)) ndtxt <- paste0(ndtxt, "clust=Mode(dat$clust),") if (isTRUE(grep("hbf", form) >= 1)) ndtxt <- paste0(ndtxt, "hbf=median(dat$hbf),") if (isTRUE(grep("month", form) >= 1)) ndtxt <- paste0(ndtxt, "month=median(dat$month),") if (isTRUE(grep("latlong", form) >= 1)) ndtxt <- paste0(ndtxt, "latlong=latlongx,") if (isTRUE(grep("moon", form) >= 1)) ndtxt <- paste0(ndtxt, "moon = median(dat$moon),") ndtxt <- paste0(ndtxt, "Year=sort(unique(dat$Year)),hooks=median(dat$hooks),vessid=vessidx)") newdat <- eval(parse(text = ndtxt)) yqloc <- match(newdat$Year, dat$Year) newdat$Year <- dat[yqloc, ]$Year newdat$qtr <- dat[yqloc, ]$qtr newdat$lat5 <- dat[match(newdat$latlong, dat$latlong), ]$lat5 predresp <- predict(mod,newdata=newdat,type="response",se.fit=T) predterms <- predict(mod,newdata=newdat,type="terms",se.fit=T) return(list(newdat=newdat,predresp=predresp,predterms=predterms)) } plot_catchmapx <- function(indat=a,vbl,dcd,latlim=c(-40,20),lonlim=c(20,140),brk=seq(0,1,.05),brk2=seq(0,1,.1),ti="",ti2=dcd,x.lab=c("20W","20E","60E","100E","140E"),y.lab=c("50S","40S","30S")) { plot(1:5,1:5,ylim=latlim,xlim=lonlim,type="n",xlab="Longitude",ylab="Latitude",cex.lab=1.4,axes = F) image(lonlim,latlim,matrix(1,nrow=2,ncol=2),add=T,col="light blue") indat <- cbind(indat,vbl) indat <- indat[indat$lonx <= 140,] div <- min(diff(sort(unique(indat$lonx)))) test <- indat[indat$decade==dcd,] if(dim(test)[1] > 0) { a1 <- with(indat[indat$decade==dcd,],tapply(vbl,list(factor(lonx,levels=seq(min(lonx),max(lonx),div)),factor(lat,levels=seq(min(lat),max(lat),div))),sum)) image(as.numeric(rownames(a1)),as.numeric(colnames(a1)),a1,add=T,col=rev(heat.colors(length(brk)-1)),breaks=brk) contour(as.numeric(rownames(a1)),as.numeric(colnames(a1)),a1,add=T,levels=brk2) maps::map(database="world",add=T, interior=F,fill=T) } title(paste(ti2,ti), cex.main=1.4) axis(1, at=seq(-20,140,40), labels = x.lab, cex.axis=1.1) axis(2, at=seq(-50,-30,10), labels = y.lab, cex.axis=1.1) box() } plot_cpuemapx <- function(indat=a,vb1,vb2,dcd,latlim=c(-40,40),lonlim=c(120,210),brk=seq(0,1,.05),brk2=seq(0,1,.1),ti="") { plot(1:5,1:5,ylim=latlim,xlim=lonlim,type="n",xlab="Longitude",ylab="Latitude") indat <- cbind(indat,vb1,vb2) indat <- indat[indat$lonx <= 210,] a1 <- with(indat[indat$decade==dcd,],tapply(vb1,list(lonx,lat),sum)/tapply(vb2,list(lonx,lat),sum)) image(as.numeric(rownames(a1)),as.numeric(colnames(a1)),a1,add=T,col=rev(heat.colors(length(brk)-1)),breaks=brk) contour(as.numeric(rownames(a1)),as.numeric(colnames(a1)),a1,add=T,levels=brk2) maps::map(database="worldHires",add=T, interior=F,fill=T) title(paste(dcd,ti)) } clust_PCA_run <- function(r,ddd=dat,allsp,allabs=allabs,regtype="regY",ncl,plotPCA=T,clustid="tripidmon",allclust=F,flag="JP",cllist=NA,fnhead="", covarnames=c("yrqtr","latlong","hooks","hbf","vessid","callsign","Total","lat","lon","lat5","lon5","moon","Year","month"), plotfile = "png"){ datr <- ddd[with(ddd,get(regtype))==r,allabs] datr$reg <- datr[,regtype] spec_dat <- datr[,allsp] #extract catch composition spec_dat$sum <- apply(spec_dat, 1,sum) datr <- datr[spec_dat$sum > 0,] if(sum(datr$OTH)==0) datr$OTH[1] <- 1 if(sum(datr$SFA)==0) datr$SFA[1] <- 1 if(sum(datr$BLM)==0) datr$BLM[1] <- 1 pcaset <- PCA_by_set(datr,Screeplotname=paste0(fnhead," ","screeplot set"),allsp) pcatrp <- PCA_by_trip(datr,Screeplotname=paste0(fnhead," ","screeplot trip"),allsp) #################### Clustering datr <- data.frame(datr) cldat <- make_clusters(setdat=data.frame(datr),spp=allsp,ncl=ncl,titx=paste0("Statistical area ",r),setclust=F,tripid=clustid,fname=fnhead,regtype=regtype, plotfile = plotfile) plot_km_deviance_trip(ddd=datr,allsp,r=r,ti=fnhead,regtype=regtype) # outputs datprop <- cldat$setdat datprop[,allsp] <- cldat$setdat[,allsp] / apply(cldat$setdat[,allsp],1,sum) a1 <- aggregate(formula=as.formula(paste0("cbind(",paste(allsp,collapse=","),") ~ kcltrp")),data=datprop,FUN=mean); names(a1)[1] <- "cluster";a1$ctype<- "kcltrp" a2 <- aggregate(formula=as.formula(paste0("cbind(",paste(allsp,collapse=","),") ~ clrtrp")),data=datprop,FUN=mean); names(a2)[1] <- "cluster";a2$ctype<- "clrtrp" a3 <- aggregate(formula=as.formula(paste0("cbind(",paste(allsp,collapse=","),") ~ hcltrp")),data=datprop,FUN=mean); names(a3)[1] <- "cluster";a3$ctype<- "hcltrp" a4 <- aggregate(formula=as.formula(paste0("cbind(",paste(allsp,collapse=","),") ~ kclset")),data=datprop,FUN=mean); names(a4)[1] <- "cluster";a4$ctype<- "kclset" a5 <- aggregate(formula=as.formula(paste0("cbind(",paste(allsp,collapse=","),") ~ FT")),data=datprop,FUN=mean); names(a5)[1] <- "cluster";a5$ctype<- "FT" a6 <- aggregate(formula=as.formula(paste0("cbind(",paste(allsp,collapse=","),") ~ clrset")),data=datprop,FUN=mean); names(a6)[1] <- "cluster";a6$ctype<- "clrset" a <- rbind(a1,a2,a3,a4,a5,a6) write.csv(a,file=paste(fnhead,"cluster proportions",clustid,".csv",sep="_")) ll5 <- F; dbh <- T ###### Create datasets ###################################################### # covarnames <- c("yrqtr","latlong","hooks","hbf","vessid","callsign","Total","lat","lon","lat5","lon5","reg","moon","Year","month") covarnames <- c(covarnames,"reg") dataset <- cbind(datr[,covarnames],datr[,allsp],pcaset$pcs[,1:6],pcaset$pcsBin[,1:6],pcatrp$pcs[,1:6],pcatrp$pcsBin[,1:6],cldat$setdat[,c("FT","kcltrp","clrtrp","hcltrp","kclset","clrset","hclset")]) if (!regtype %in% names(dataset)) { dataset <- cbind(dataset, regtype = dataset$reg) names(dataset) <- gsub("regtype",regtype, names(dataset)) } if(!is.na(cllist[1])) dataset <- cbind(datr[,covarnames],datr[,allsp],cldat$setdat[,cllist]) ti=paste(fnhead,clustid,sep="_") # Examine factors associated with each cluster or PCA boxplots_spCL_comp(dat=dataset,cl="hcltrp",ti=ti,outL=F,nsp=length(allsp),regtype=regtype,r=r, plotfile = plotfile) beanplots_spCL_comp(dat=dataset,cl="hcltrp",ti=ti,outL=F,nsp=length(allsp),regtype=regtype,r=r, plotfile = plotfile) boxplots_CL(dat=dataset,cl="hcltrp",ti=ti,outL=F,dohbf=dbh,lat5=ll5,regtype=regtype,r=r, plotfile = plotfile) boxplots_CL_bean(dat=dataset,cl="hcltrp",ti=ti,outL=F,dohbf=dbh,lat5=ll5,regtype=regtype,r=r, plotfile = plotfile) map_clusters(ddd=dataset,cl="hcltrp",ti=ti,lat5=ll5,regtype=regtype,r=r,ncl=ncl, plotfile = plotfile) if(allclust) { boxplots_spCL_comp(dat=dataset,cl="kcltrp",ti=ti,outL=F,nsp=length(allsp),regtype=regtype,r=r, plotfile = plotfile) boxplots_spCL_comp(dat=dataset,cl="clrtrp",ti=ti,outL=F,nsp=length(allsp),regtype=regtype,r=r, plotfile = plotfile) boxplots_spCL_comp(dat=dataset,cl="kclset",ti=ti,outL=F,nsp=length(allsp),regtype=regtype,r=r, plotfile = plotfile) boxplots_spCL_comp(dat=dataset,cl="clrset",ti=ti,outL=F,nsp=length(allsp),regtype=regtype,r=r, plotfile = plotfile) boxplots_CL(dat=dataset,cl="kcltrp",ti=ti,outL=F,dohbf=dbh,lat5=ll5,regtype=regtype,r=r, plotfile = plotfile) boxplots_CL(dat=dataset,cl="clrtrp",ti=ti,outL=F,dohbf=dbh,lat5=ll5,regtype=regtype,r=r, plotfile = plotfile) boxplots_CL(dat=dataset,cl="FT",ti=ti,outL=F,dohbf=dbh,lat5=ll5,regtype=regtype,r=r, plotfile = plotfile) boxplots_CL(dat=dataset,cl="kclset",ti=ti,outL=F,dohbf=dbh,lat5=ll5,regtype=regtype,r=r, plotfile = plotfile) boxplots_CL(dat=dataset,cl="clrset",ti=ti,outL=F,dohbf=dbh,lat5=ll5,regtype=regtype,r=r, plotfile = plotfile) } graphics.off() if(plotPCA) { boxplots_PCA(dat=dataset,nPCA=4,ti=ti,dohbf=dbh,lat5=ll5,regtype=regtype,r=r) boxplots_spPCA(dat=dataset,nPCA=4,ti=ti,nsp=8,regtype=regtype,r=r) mapPCA(ddd=dataset,nPCA=4,ti=ti,lat5=ll5,regtype=regtype,r=r) boxplots_TPCA(dat=dataset,nPCA=4,ti=ti,dohbf=dbh,lat5=ll5) boxplots_spTPCA(dat=dataset,nPCA=4,ti=ti,nsp=8) mapTPCA(ddd=dataset,nPCA=4,ti=ti,lat5=ll5) graphics.off() } return(invisible(dataset)) } PCA_by_set <- function(datr,Screeplotname="screeplot set",allsp) { spec_dat <- datr[,allsp] #extract catch composition spec_dat$sum <- apply(spec_dat, 1,sum) nspec <- length(allsp) dat2 <- datr[spec_dat$sum > 0,] mmult_dat <- spec_dat[spec_dat$sum>0,] pca_dat <- (sqrt(sqrt(mmult_dat[,1:nspec]/mmult_dat$sum))) ## prepare species composition for PCA apply(spec_dat,2,sum) pca<-prcomp(pca_dat, scale=T) #run PCA pr_pca <- predict(pca,pca_dat) # Predict PCA Loadings pcs <- data.frame(pr_pca[,1:nspec]) eig = summary(pca)$sdev^2 # get Eigenvalue OCtest = nScree(eig) # run Optimal Coordinates test windows() plotnScree(OCtest);par(col="black") savePlot(Screeplotname,type="png") nPC = min(OCtest$Components$nkaiser,OCtest$Components$noc) # retain number of PCs in combination with Eigenvalue > 1 pcsBin = ifelse(pcs>0.5,1,0) ### GLM-DPC variable as binary PCs dimnames(pcsBin)[[2]] <- paste0("B",names(pcs)) return(PCAset=list(pcs=pcs,pcsBin=pcsBin)) } # PCA by trip PCA_by_trip <- function(datr,Screeplotname="screeplot set",allsp) { spec_dat <- datr[,allsp] #extract catch composition spec_dat$sum <- apply(spec_dat, 1,sum) nspec <- length(allsp) dat2 <- datr[spec_dat$sum > 0,] mmult_dat <- spec_dat[spec_dat$sum>0,] dat2$TRIP_NUM <- dat2$tripidmon tpagg <- aggregate_by_trip(dat2,allsp) pca_tpdat <- (sqrt(sqrt(tpagg[,allsp]))) ## prepare species composition for PCA pca <- prcomp(pca_tpdat, scale=T) #run PCA pr_pca <- predict(pca,pca_tpdat) # Predict PCA Loadings pcs <- data.frame(pr_pca[,1:nspec]) names(pcs) <- paste0("T",names(pcs)) setpcs <- pcs[match(dat2$TRIP_NUM,tpagg$TRIP_NUM),] eigtp = summary(pca)$sdev^2 # get Eigenvalue OCtesttp = nScree(eigtp) # run Optimal Coordinates test windows() plotnScree(OCtesttp);par(col="black") savePlot(Screeplotname,type="png") nPC = min(OCtesttp$Components$nkaiser,OCtesttp$Components$noc) # retain number of PCs in combination with Eigenvalue > 1 pcsBin = ifelse(pcs>0.5,1,0) ### GLM-DPC variable as binary PCs dimnames(pcsBin)[[2]] <- paste0("B",names(pcs)) setpcsBin <- pcsBin[match(dat2$TRIP_NUM,tpagg$TRIP_NUM),] return(PCAtrip=list(pcs=setpcs,pcsBin=setpcsBin)) } make_clusters <- function(setdat=dat1,spp=allsp,ncl=5,titx="",setclust=T,tripid="tripid",fname="",regtype="regY",plotfile="png") { setdat$TRIP_NUM <- as.vector(setdat[,tripid]) spec_dat <- setdat[,allsp] #extract catch composition spec_dat$sum <- apply(spec_dat, 1,sum) nspec <- length(allsp) dat2 <- setdat[spec_dat$sum > 0,] mmult_dat <- spec_dat[spec_dat$sum>0,] clus_dat <- mmult_dat[,1:nspec]/mmult_dat$sum # raw proportions FT = kmeans(clus_dat,ncl)$cluster aset <- na.omit(setdat[,spp]) aset <- scale(aset[,spp]) # rescaled data indat <- aggregate_by_trip(dat2,spp) atrp <- na.omit(indat); #NbClust(atrp[,spp],method="ward.D");flush.console() atrp <- scale(atrp[,spp]) dtrp <- dist(atrp, method = "euclidean"); fittrp <- hclust(dtrp, method="ward.D") plot(fittrp, labels = FALSE, hang=-1, main = titx, xlab="", sub="", cex.lab=1.2, axes=F) # display dendogram #looks like 3 (or 4) axis(2,cex.axis=1.2,lwd = 2) grptrp <- cutree(fittrp, k=ncl) # cut tree into ncl clusters print(table(grptrp)) rect.hclust(fittrp, k=ncl, border="red") savePlot(paste0(fname,"_hclusters_trip"),type=plotfile) if(setclust) { dset <- dist(aset, method = "euclidean") # distance matrix fitset <- hclust(dset, method="ward.D") windows() plot(fitset, labels = FALSE, hang=-1, main = paste(titx,"set")) # display dendogram #looks like 3 (or 4) grpset <- cutree(fitset, k=ncl) # cut tree into ncl clusters print(table(grpset)) rect.hclust(fitset, k=ncl, border="red") savePlot(paste0(fname,"_hclusters_trip"),type=plotfile) } claratrp <- clara(atrp,ncl) #clustering based upon the percent of spp in total catch of tuna claraset <- clara(aset,ncl) #clustering based upon the percent of spp in total catch of tuna kmtrp <- kmeans(atrp,centers=ncl,iter.max = 60) kmset <- kmeans(aset,centers=ncl,iter.max = 60) setdat$kcltrp <- kmtrp$cluster[match(setdat$TRIP_NUM,indat$TRIP_NUM)] setdat$clrtrp <- claratrp$clustering[match(setdat$TRIP_NUM,indat$TRIP_NUM)] setdat$hcltrp <- grptrp[match(setdat$TRIP_NUM,indat$TRIP_NUM)] setdat$kclset <- kmset$cluster setdat$FT <- FT setdat$clrset <- claraset$clustering if(setclust) setdat$hclset <- grpset else setdat$hclset <- NA return(list(d=dtrp,fit=fittrp,clarax=claratrp,setdat=setdat)) } plot_km_deviance <- function(dat,allsp,r,ti,regtype="sarea",plotfile="png") { a <- scale(dat[,allsp]) wss <- (nrow(a)-1)*sum(apply(a[,allsp],2,var)) for (i in 2:15) wss[i] <- sum(kmeans(a[,allsp],centers=i,iter.max = 40)$withinss) plot(1:15, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares",main=paste("Region",r)) savePlot(paste0(ti,"_plot_km_deviance",".png"),type=plotfile) } plot_km_deviance_trip <- function(ddd,allsp,r,ti,regtype="regY",plotfile="png") { ddd$TRIP_NUM <- ddd$tripidmon indat <- aggregate_by_trip(ddd,allsp) a <- indat[,allsp] # a <- scale(indat[,allsp]) wss <- (nrow(a)-1)*sum(apply(a[,allsp],2,var)) for (i in 2:15) wss[i] <- sum(kmeans(a[,allsp],centers=i,iter.max = 40)$withinss) plot(1:15, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares",main=paste("Region",r)) savePlot(paste0(ti,"_plot_km_deviance_trip.",plotfile),type=plotfile) } boxplots_spCL_comp <- function(dat=dataset,cl="kmeans",ti="",outL=T,nsp=13,regtype=regtype,r=r,plotfile="png") { if(nsp==8) { windows(30,20);par(mfrow=c(2,4),mar=c(3,3,3,3)) } if(nsp %in% 9:12){ windows(30,20);par(mfrow=c(3,4),mar=c(3,3,3,3)) } if(nsp==13){ windows(30,20);par(mfrow=c(3,5),mar=c(3,3,3,3)) } wd=table(dat[,cl]) for(sp in allsp) { boxplot(dat[,sp]/dat$Total ~ dat[,cl],width=wd,main=sp,outline=outL,pars = list(boxwex = 1),ylim=c(0,1)) } title(paste(gsub("_"," ",ti),cl),outer=T,line=-1,cex.main=1.5) savePlot(paste0(ti,"_boxplots_spCLcomp",cl,".",plotfile),type=plotfile) } beanplots_spCL_comp <- function(dat=dataset,cl="kmeans",ti="",outL=T,nsp=13,regtype=regtype,r=r,plotfile="png") { if(nsp==8) { windows(30,20);par(mfrow=c(2,4),mar=c(3,3,3,3),oma=c(0,0,2,0)) } if(nsp %in% 9:12){ windows(30,20);par(mfrow=c(3,4),mar=c(3,3,3,3),oma=c(0,0,2,0)) } if(nsp==13){ windows(30,20);par(mfrow=c(3,5),mar=c(3,3,3,3),oma=c(0,0,2,0)) } wd=table(dat[,cl]) for(sp in allsp) { beanplot(dat[,sp]/dat$Total ~ dat[,cl],bw=.01,what=c(1,1,1,0),ylim=c(0,1),log="",main=sp,cex.main=1.5,cex.axis=1.4) #boxplot(dat[,sp]/dat$Total ~ dat[,cl],width=wd,main=sp,outline=outL,pars = list(boxwex = 1),ylim=c(0,1)) } title(paste("Statistical area ",r),outer=T,line=0.5,cex.main=2) nm <- paste0(ti,"_beanplots_spCLcomp",cl) savePlot(paste0(nm,".",plotfile),type=plotfile) } boxplots_CL <- function(dat=dataset,cl="kmeans",ti="",outL=T,dohbf=T,lat5=F,regtype=regtype,r=r,plotfile="png") { windows(30,20);par(mfrow=c(2,3),mar=c(3,3,3,3),oma=c(0,0,2,0)) wd=table(dat[,cl]) boxplot(dat$Year ~ dat[,cl],width=wd,main="Year",outline=outL,pars = list(boxwex = 1)) if(lat5) { boxplot(dat$lat5 ~ dat[,cl],width=wd,main="Lat",outline=outL,pars = list(boxwex = 1)) boxplot(dat$lon5x ~ dat[,cl],width=wd,main="Lon",outline=outL,pars = list(boxwex = 1)) } else { boxplot(dat$lat ~ dat[,cl],width=wd,main="Lat",outline=outL,pars = list(boxwex = 1)) boxplot(dat$lonx ~ dat[,cl],width=wd,main="Lon",outline=outL,pars = list(boxwex = 1)) } boxplot(dat$hooks ~ dat[,cl],width=wd,main="Hooks",outline=outL,pars = list(boxwex = 1)) if(dohbf) boxplot(dat$hbf ~ dat[,cl],width=wd,main="HBF",outline=outL,pars = list(boxwex = 1)) boxplot(dat$month ~ dat[,cl],width=wd,main="Months",outline=outL,pars = list(boxwex = 1)) title(paste(gsub("_"," ",ti),cl),outer=T,line=1,cex.main=1.5) savePlot(paste0(ti,"_boxplots_CL",cl,".",plotfile),type=plotfile) } boxplots_CL_bean <- function(dat=dataset,cl="kmeans",ti="",outL=T,dohbf=T,lat5=F,regtype=regtype,r=r,plotfile="png") { windows(30,20);par(mfrow=c(2,3),mar=c(3,3,3,3),oma=c(0,0,2,0)) wd=table(dat[,cl]) beanplot(dat$Year ~ dat[,cl],bw=.5,what=c(1,1,1,0),ylim=c(min(dat$Year,na.rm=T)-.5,max(dat$Year,na.rm=T)+.5),log="",main="Year", cex.main=1.5, cex.axis = 1.4) if(lat5) { beanplot(dat$lat5 ~ dat[,cl],bw=1.25,what=c(1,1,1,0),ylim=c(min(dat$lat5,na.rm=T)-2.5,max(dat$lat5,na.rm=T)+2.5),log="",main="Latitude", cex.main=1.5, cex.axis = 1.4) beanplot(dat$lon5x ~ dat[,cl],bw=1.25,what=c(1,1,1,0),ylim=c(min(dat$lon5,na.rm=T)-2.5,max(dat$lon5,na.rm=T)+2.5),log="",main="Longitude", cex.main=1.5, cex.axis = 1.4) } else { beanplot(dat$lat ~ dat[,cl],bw=.5,what=c(1,1,1,0),ylim=c(min(dat$lat,na.rm=T)-.5,max(dat$lat,na.rm=T)+.5),log="",main="Latitude", cex.main=1.5, cex.axis = 1.4) beanplot(dat$lonx ~ dat[,cl],bw=.5,what=c(1,1,1,0),ylim=c(min(dat$lonx,na.rm=T)-.5,max(dat$lonx,na.rm=T)+.5),log="",main="Longitude", cex.main=1.5, cex.axis = 1.4) } beanplot(dat$hooks ~ dat[,cl],bw=50,what=c(1,1,1,0),ylim=c(min(dat$hooks,na.rm=T)-.5,max(dat$hooks,na.rm=T)+.5),log="",main="Hooks", cex.main=1.5, cex.axis = 1.4) if(dohbf) beanplot(dat$hbf ~ dat[,cl],bw=.5,what=c(1,1,1,0),ylim=c(min(dat$hbf,na.rm=T)-.5,max(dat$hbf,na.rm=T)+.5),log="",main="HBF", cex.main=1.5, cex.axis = 1.4) beanplot(dat$month ~ dat[,cl],bw=.5,what=c(1,1,1,0),log="",main="Months",ylim=c(.5,12.5), cex.main=1.5, cex.axis = 1.4) title(paste("Statistical area ",r),outer=T,line=0.5,cex.main=2) savePlot(paste0(ti,"_boxplots_CL",cl,"_bean.",plotfile),type=plotfile) } boxplots_PCA <- function(dat,nPCA=3,ti="",dohbf=T,lat5=F,regtype="regY",r=r,plotfile="png") { for (ppc in paste0("PC",1:nPCA)) { pp <- with(dat,get(ppc)) windows(width=30,height=20);par(mfrow=c(2,3)) boxplot(pp ~ yrqtr,data=dat,main="YQ") if(lat5) { boxplot(pp ~ (floor(lat5)),data=dat,main="LAT") boxplot(pp ~ (floor(lon5)),data=dat,main="LON") } else { boxplot(pp ~ (floor(lat)),data=dat,main="LAT") boxplot(pp ~ (floor(lon)),data=dat,main="LON") } if(dohbf) boxplot(pp ~ (floor(hbf)),data=dat,main="HBF") boxplot(pp ~ eval(100*floor(hooks/100)),data=dat,main="Hooks") boxplot(pp ~ vessid,data=dat,main="Vessel") title(paste(gsub("_"," ",ti),ppc),outer=T,line=-1,cex.main=1.5) savePlot(paste0(ti,"_boxplots_",ppc,".",plotfile),type=plotfile) } } boxplots_TPCA <- function(dat,nPCA=6,ti="",dohbf=T,lat5=F,regtype="regY",r=r,plotfile="png") { for (ppc in paste0("TPC",1:nPCA)) { pp <- with(dat,get(ppc)) windows(20,14);par(mfrow=c(2,4),oma=c(0,0,2,0)) boxplot(pp ~ yrqtr,data=dat,main="YQ") if(lat5) { boxplot(pp ~ (floor(lat5)),data=dat,main="LAT") boxplot(pp ~ (floor(lon5)),data=dat,main="LON") } else { boxplot(pp ~ (floor(lat)),data=dat,main="LAT") boxplot(pp ~ (floor(lon)),data=dat,main="LON") } if(dohbf) boxplot(pp ~ (floor(hbf)),data=dat,main="HBF") boxplot(pp ~ eval(100*floor(hooks/100)),data=dat,main="Hooks") boxplot(pp ~ vessid,data=dat,main="Vessel") title(paste(gsub("_"," ",ti),ppc),outer=T,line=1,cex.main=1.5) savePlot(paste0(ti,"_boxplots_",ppc,".",plotfile),type=plotfile) } } boxplots_spPCA <- function(dat=dataset,nPCA=6,ti="",outL=T,nsp=13,regtype="regY",r=r,plotfile="png") { for (ppc in paste0("PC",1:nPCA)) { pp <- with(dat,get(ppc)) notdone<- T bk <- quantile(pp, seq(0,1,length.out=11),include.lowest = TRUE) if(length(bk)==length(unique(bk))) ppq <- cut(pp,breaks = bk) else ppq <- cut(pp,breaks = 11) if(nsp==8){ windows(30,20);par(mfrow=c(2,4),mar=c(3,3,3,3),oma=c(0,0,2,0))} if(nsp==12){ windows(30,20);par(mfrow=c(3,4),mar=c(3,3,3,3),oma=c(0,0,2,0))} if(nsp==13){ windows(30,20);par(mfrow=c(3,5),mar=c(3,3,3,3),oma=c(0,0,2,0))} for(sp in allsp) { boxplot(dat[,sp]/dat$Total ~ ppq,main=sp,outline=outL,ylim=c(0,1)) } title(paste(gsub("_"," ",ti),ppc),outer=T,line=1,cex.main=1.5) savePlot(paste0(ti,"_boxplots_spPCA",ppc,".", plotfile),type=plotfile) } } boxplots_spTPCA <- function(dat=dataset,nPCA=6,ti="",outL=T,nsp=13,regtype="regY",r=r,plotfile="png") { for (ppc in paste0("TPC",1:nPCA)) { pp <- with(dat,get(ppc)) ppq <- cut(pp,breaks = quantile(pp, seq(0,1,length.out=11),include.lowest = TRUE)) if(nsp==8){ windows(30,20);par(mfrow=c(2,4),mar=c(3,3,3,3),oma=c(0,0,2,0))} if(nsp==12){ windows(30,20);par(mfrow=c(3,4),mar=c(3,3,3,3),oma=c(0,0,2,0))} if(nsp==13){ windows(30,20);par(mfrow=c(3,5),mar=c(3,3,3,3),oma=c(0,0,2,0))} for(sp in allsp) { boxplot(dat[,sp]/dat$Total ~ ppq,main=sp,outline=outL,ylim=c(0,1)) } title(paste(gsub("_"," ",ti),ppc),outer=T,line=1,cex.main=1.5) savePlot(paste0(ti,"_boxplots_spTPCA",ppc,".",plotfile),type=plotfile) } } mapPCA <- function(ddd,nPCA=6,ti="",lat5=F,regtype="regY",r=r,plotfile="png") { windows(20,14);par(mfrow=c(2,2)) for (ppc in paste0("PC",1:nPCA)) { pp <- with(ddd,get(ppc)) if(lat5) { pcm <- with(ddd,tapply(pp,list(floor(lon5),floor(lat5)),mean)) } else { pcm <- with(ddd,tapply(pp,list(floor(lon),floor(lat)),mean)) } lonn <- as.numeric(dimnames(pcm)[[1]])+0.5 latn <- as.numeric(dimnames(pcm)[[2]])+0.5 image(lonn,latn,pcm) contour(lonn,latn,pcm,add=T) maps::map(database="world2Hires",add=T) title(ppc,line=-2,cex.main=1.5) } title(paste(gsub("_"," ",ti)),outer=T,line=-1,cex.main=1.5) savePlot(paste0(ti,"_map_PCs",".", plotfile),type=plotfile) } mapTPCA <- function(ddd,nPCA=6,ti="",lat5=F,regtype="regY",r=r,plotfile="png") { windows(20,14);par(mfrow=c(2,2)) for (ppc in paste0("TPC",1:nPCA)) { pp <- with(ddd,get(ppc)) if(lat5) { pcm <- with(ddd,tapply(pp,list(floor(lon5),floor(lat5)),mean)) } else { pcm <- with(ddd,tapply(pp,list(floor(lon),floor(lat)),mean)) } lonn <- as.numeric(dimnames(pcm)[[1]])+0.5 latn <- as.numeric(dimnames(pcm)[[2]])+0.5 image(lonn,latn,pcm) contour(lonn,latn,pcm,add=T) maps::map(database="world2Hires",add=T) title(ppc,line=-2,cex.main=1.5) } title(paste(regtype,r,ti),outer=T,line=-1,cex.main=1.5) savePlot(paste0(ti,"_map_TPCs",".", plotfile),type=plotfile) } map_clusters <- function(ddd,cl="hclustcl",ti="",lat5=F,regtype="regY",ncl,r=r,plotfile="png") { if(ncl <= 4) { windows(20,14);par(mfrow=c(2,2),mar=c(3,3,3,3),oma=c(0,0,2,0)) } if(ncl %in% c(5,6)) { windows(20,20);par(mfrow=c(3,2),mar=c(3,3,3,3),oma=c(0,0,2,0))} for (clx in 1:ncl) { if(lat5) { add5=2.5 lo <- floor(ddd$lonx5) la <- floor(ddd$lat5) } else { add5=.5 lo <- floor(ddd$lonx) la <- floor(ddd$lat) } lo <- factor(lo,levels=seq(min(lo,na.rm=T),max(lo,na.rm=T),2*add5)) la <- factor(la,levels=seq(min(la,na.rm=T),max(la,na.rm=T),2*add5)) clm <- tapply(ddd[,cl]==clx,list(lo,la),mean) lonn <- as.numeric(levels(lo)) latn <- as.numeric(levels(la)) ylm <- range(latn) ylm[1] <- max(ylm[1],-50) x.lab <- c("10W", "0", "10E", "20E", "30E", "40E", "50E", "60E", "70E", "80E", "90E", "100E", "110E") y.lab <- c("48S", "44S", "40S", "36S") image(lonn,latn,clm,ylim=ylm,axes=F) axis(1, at=seq(-10,110,10), labels = x.lab, cex.axis=1.3) axis(2, at=seq(-48,-36,4), labels = y.lab, cex.axis=1.3) box() contour(lonn,latn,clm,add=T) maps::map(database="world2",add=T,fill=T) title(paste("Cluster ",clx),line=1,cex.main=1.5) } title(paste("Statistical area ",r),outer=T,line=0.5,cex.main=2) savePlot(paste0(ti,"_mapclust_",cl,".", plotfile),type=plotfile) } unfactor <- function(x) as.numeric(as.character(x))