# package CATSregression will be used 
# it can be istalled for GitHub by
# devtools::install_github("BottaDZ/CATSregression"
library(CATSregression)


# Example 1
set.seed(894531)

trait<-rnorm(20,10,3) # traits of 20 species are random numbers from normal distribution with mean=10, sd=2

# species abundaces in 50 plots are random numbers from negative binomial distribution, 
# whose mean is a function of trait
abund<-vector()
for (i in 1:50) abund<-rbind(abund,rnbinom(n=20,mu=exp(0.5*trait),size=1))

# fitting model with Poisson distribution, which is the default family
# est.prior=F because we don't want to include meta-community level relative abundances into the model
m.pois<-CATSregression(abund,trait,est.prior=F)

# fitting model with negative binomial distribution
m.negbin<-CATSregression(abund,trait,est.prior=F,family="nb")

# drawing Figure 1
par(mfrow=c(2,2))
plot(predict(m.pois),resid(m.pois),log="x",xlab="Fitted values",ylab="Dunn-Smyth residuals",main="Poisson model",cex.axis=1.2,cex.lab=1.4,cex.main=1.4,pch=19)
plot(predict(m.negbin),resid(m.negbin),log="x",xlab="Fitted values",ylab="Dunn-Smyth residuals",main="Negative binomial model",cex.axis=1.2,cex.lab=1.4,cex.main=1.4,pch=19)
qqnorm(resid(m.pois),ylab="Dunn-Smyth residuals",main="",cex.axis=1.2,cex.lab=1.4,cex.main=1.4,pch=19)
qqline(resid(m.pois),lwd=2)
qqnorm(resid(m.negbin),ylab="Dunn-Smyth residuals",main="",cex.axis=1.2,cex.lab=1.4,cex.main=1.4,pch=19)
qqline(resid(m.negbin),lwd=2)


# models' coefficients can easily get by the coef function
# the second column will be used (the first one is the intercepts)
coef.pois<-coef(m.pois)[,2]
coef.negbin<-coef(m.negbin)[,2]

# getting confidence intervals is more tricky (function confint for CATS models has not been implemented yet)
# packages dplyr and mgcv.helper will be needed
# the later could be installed from github:
# devtools::install_github("samclifford/mgcv.helper")
library(dplyr)
library(mgcv.helper)


# then we can get the confidence intervals
conf.pois<-sapply(m.pois$model, function(x) as.matrix(as.data.frame(confint(x))[2,5:6]))
conf.negbin<-sapply(m.negbin$model, function(x) as.matrix(as.data.frame(confint(x))[2,5:6]))

# drawing Figure 2
par(mfrow=c(1,2))
plot(1:50,coef.pois,type="n",ylim=c(0,1.2),ylab="estimated slope",xlab="",main="Possion model",axes=F,cex.axis=1.4,cex.lab=1.6,cex.main=1.4)
points(1:50,coef.pois)
arrows(1:50, conf.pois[1,], 1:50, conf.pois[2,], 
                angle = 90, lty = 1, code = 3, length = 0.05)
axis(2,cex.axis=1.4)                
abline(0.5,0,lwd=2,col="red")

plot(1:50,coef.negbin,type="n",ylim=c(0,1.2),ylab="estimated slope",xlab="",main="negative binomial model",axes=F,cex.lab=1.6,cex.main=1.4)
points(1:50,coef.negbin)
arrows(1:50, conf.negbin[1,], 1:50, conf.negbin[2,], 
                angle = 90, lty = 1, code = 3, length = 0.05)
axis(2,cex.axis=1.4)
abline(0.5,0,lwd=2,col="red")


##################################################################

# Example 2
rm(list=ls(all=TRUE))
data(Raevel)  # this dataset available in the CATSregression package

# community matrix is transformed into presence/absence form
bin.data<-Raevel$comm
bin.data[bin.data>0]<-1

# meta-community level relative abundances are calculated
rel.abund<-colSums(bin.data)/sum(bin.data)

# Inverz logit transformation can be applied, when product of relative abundance and 
# species richness is lower than 1 for each species-plot combinations
# Let us count the combinations where this condition is not satisfied 
sum(rowSums(bin.data)%*%t(rel.abund)>1)
# In 39 plot-species combinations the inverz logit transformation cannot be applied

# Let us count the plots where this condition is not satisfied
sum(max(rel.abund)*rowSums(bin.data)>1)
# In 22 of 52 plots the inverz logit transformation cannot be applied
# thus the problem cannot be solved by excluding few plots

# fitting model with the cannonical logit link
# However trait data are not used, a vector should be supplied
# family is binomial, because binary data are analysed
# formul="1": model contains only intercept (and possibly offset)
# log.link=F: the cannonical link is applied
# est.prior=T: estimted relative abundances are used as prior
m.logit<-CATSregression(bin.data,trait=rep(NA,ncol(bin.data)),family="binomial",formul="1",log.link=F,est.prior=T)

# fitting model with log link
# the only difference is that log.link=T (the default)
m.log<-CATSregression(bin.data,trait=rep(NA,ncol(bin.data)),family="binomial",formul="1",est.prior=T)
# warnings can be neglected

# calculating relative abundances from predicted abundances
rel.abund.logit<-apply(predict(m.logit),1,function(x) x/sum(x))
rel.abund.log<-apply(predict(m.log),1,function(x) x/sum(x))



# drawing Figure 3
n.site = nrow(bin.data)  # number of sites
par(mfrow=c(1,1))
plot(rep(rel.abund,n.site),as.vector(rel.abund.logit),xlab="relative abundance in the meta-community",
     ylab="predicted relative abundance in plots", 
     cex.axis=1.2,cex.lab=1.4,cex.main=1.4,ylim=c(0,0.04))
abline(0,1,col="red")

# An additional plot (not shown in the paper)
plot(rep(rel.abund,n.site),as.vector(rel.abund.log),xlab="relative abundance in the meta-community",
     ylab="predicted relative abundance in plots", main="log-link",
     cex.axis=1.2,cex.lab=1.4,cex.main=1.4,,ylim=c(0,0.04))
abline(0,1,col="red")



##################################################################

# Example 3
rm(list=ls(all=TRUE))

set.seed(894531) # setting random seeds allows getting the same results for each run
library(vegan)
metacomm<-bstick(50)  # meta-community level relative abundances for 50 species following brocken-stick ditribution 
trait<-rnorm(50,0,1)  # random trait values for species from standard normal distribution

slope<-seq(0,3,0.05)  # vector of possible selection strength
comm<-vector()        # empty space for community data

# simulation of communities
for (s in slope)  # cycle through the possible selection strengths
for (k in 1:5)    # cycle of five replicates for each selection strength
  {
  M<-50/mean(exp(log(metacomm)+s*trait))    # calculating mean abundances (eq. 31 & 32 in the paper)
  comm<-rbind(comm,rpois(50,lambda=M*exp(log(metacomm)+s*trait)))   # drawing random abundances from Poisson distribution
  }

# Variation partitioning by the method proposed in the paper

# For variation partitioning three models has to be fitted
# with metacommunity information only
m1<-CATSregression(comm,trait, formul="1",prior=metacomm,est.prior=F)

# with trait only
m2<-CATSregression(comm,trait,est.prior=F)

# with trait and metacommunity abundances
m3<-CATSregression(comm,trait,prior=metacomm,est.prior=F)

# variation components can be easily calculated from adjusted R-squared values
pure.trait<-m3$R.sq.adj-m1$R.sq.adj
pure.metacomm<-m3$R.sq.adj-m2$R.sq.adj
joint<-m1$R.sq.adj+m2$R.sq.adj-m3$R.sq.adj

# Variation partitioning by Shipley's method

# matrix of mean abundances in plots (used as uniform prior)
q0<-matrix(rep(rowMeans(comm),50),nrow=nrow(comm),ncol=ncol(comm))

# denominator in equation 4 is the same for all R-squared
denom<-rowSums(comm*log(comm/q0),na.rm=T)

# calculation R-squared with observed traits with uniform and neautral prior
nomin_uobs<-rowSums(comm*log(comm/predict(m2)),na.rm=T)
nomin_nobs<-rowSums(comm*log(comm/predict(m3)),na.rm=T)
R_ut<-1-nomin_uobs/denom
R_nt<-1-nomin_nobs/denom



# calculation of R-squared with randomized traits
# 50 randomizations are done and resulted R-squareds are strored in matrices
# it may take several minutes
R_u.matrix<-vector()
R_n.matrix<-vector()

for (j in 1:50)
    {
    # print(j)
    rtrait<-sample(trait)  # randomizotion by re-shuffling trait values  
    rm2<-CATSregression(comm,rtrait,est.prior=F)
    rm3<-CATSregression(comm,rtrait,prior=metacomm,est.prior=F)
    
    nomin_urand<-rowSums(comm*log(comm/predict(rm2)),na.rm=T)
    nomin_nrand<-rowSums(comm*log(comm/predict(rm3)),na.rm=T)

    
    R_u.matrix<-cbind(R_u.matrix,1-nomin_urand/denom)
    R_n.matrix<-cbind(R_n.matrix,1-nomin_nrand/denom)
    }

# results of randomizations are averaged    
R_u<-rowMeans(R_u.matrix)    
R_n<-rowMeans(R_n.matrix)

# correction to be any effect zero or positive
R_ut<-pmax(R_ut,R_u)
R_n<-pmax(R_n,R_u)
R_nt<-pmax(R_nt,R_ut)
R_nt<-pmax(R_nt,R_ut)

# calculation variation components
pure.trait.shipley<-(R_nt-R_n)/(1-R_u)
pure.metacomm.shipley<-(R_nt-R_ut)/(1-R_u)
joint.shipley<-(R_ut+R_n-R_u-R_nt)/(1-R_u)
unexplained.shipley<-(1-R_nt)/(1-R_u)

# drawing Figure 4
par(mfrow=c(2,2))
plot(pure.metacomm,pure.metacomm.shipley,main="Pure metacommunity effect",ylab="Shipley's formula",
                         xlab="proposed new formula", cex.axis=1.2,cex.lab=1.4,cex.main=1.4)
abline(0,1,col="red",lwd=1.5)
plot(pure.trait,pure.trait.shipley,main="Pure selection effect",ylab="Shipley's formula",
                         xlab="proposed new formula", cex.axis=1.2,cex.lab=1.4,cex.main=1.4)
abline(0,1,col="red",lwd=1.5)
plot(joint,joint.shipley,main="Joint effect",ylab="Shipley's formula",
                         xlab="proposed new formula", cex.axis=1.2,cex.lab=1.4,cex.main=1.4)
abline(0,1,col="red",lwd=1.5)
plot(1-m3$R.sq.adj,unexplained.shipley,main="Unexplained",ylab="Shipley's formula",
                         xlab="proposed new formula", cex.axis=1.2,cex.lab=1.4,cex.main=1.4)
abline(0,1,col="red",lwd=1.5)


# drawing Figure 5
par(mfrow=c(2,2))
plot(rep(slope,each=5),pure.metacomm,ylab="Pure metacommunity effect",
                         xlab="strength of selection (slope)", cex.axis=1.2,cex.lab=1.4,cex.main=1.4,ylim=c(0,1))
plot(rep(slope,each=5),pure.trait,ylab="Pure selection effect",
                         xlab="strength of selection (slope)", cex.axis=1.2,cex.lab=1.4,cex.main=1.4,ylim=c(0,1))
plot(rep(slope,each=5),joint,ylab="Joint effect",
                         xlab="strength of selection (slope)", cex.axis=1.2,cex.lab=1.4,cex.main=1.4,ylim=c(min(joint),1))
plot(rep(slope,each=5),1-m3$R.sq.adj,ylab="Unexplained",
                         xlab="strength of selection (slope)", cex.axis=1.2,cex.lab=1.4,cex.main=1.4,ylim=c(0,1))