# package CATSregression will be used # it can be istalled for GitHub by # devtools::install_github("BottaDZ/CATSregression" library(CATSregression) # Example 1 set.seed(894531) trait<-rnorm(20,10,3) # traits of 20 species are random numbers from normal distribution with mean=10, sd=2 # species abundaces in 50 plots are random numbers from negative binomial distribution, # whose mean is a function of trait abund<-vector() for (i in 1:50) abund<-rbind(abund,rnbinom(n=20,mu=exp(0.5*trait),size=1)) # fitting model with Poisson distribution, which is the default family # est.prior=F because we don't want to include meta-community level relative abundances into the model m.pois<-CATSregression(abund,trait,est.prior=F) # fitting model with negative binomial distribution m.negbin<-CATSregression(abund,trait,est.prior=F,family="nb") # drawing Figure 1 par(mfrow=c(2,2)) plot(predict(m.pois),resid(m.pois),log="x",xlab="Fitted values",ylab="Dunn-Smyth residuals",main="Poisson model",cex.axis=1.2,cex.lab=1.4,cex.main=1.4,pch=19) plot(predict(m.negbin),resid(m.negbin),log="x",xlab="Fitted values",ylab="Dunn-Smyth residuals",main="Negative binomial model",cex.axis=1.2,cex.lab=1.4,cex.main=1.4,pch=19) qqnorm(resid(m.pois),ylab="Dunn-Smyth residuals",main="",cex.axis=1.2,cex.lab=1.4,cex.main=1.4,pch=19) qqline(resid(m.pois),lwd=2) qqnorm(resid(m.negbin),ylab="Dunn-Smyth residuals",main="",cex.axis=1.2,cex.lab=1.4,cex.main=1.4,pch=19) qqline(resid(m.negbin),lwd=2) # models' coefficients can easily get by the coef function # the second column will be used (the first one is the intercepts) coef.pois<-coef(m.pois)[,2] coef.negbin<-coef(m.negbin)[,2] # getting confidence intervals is more tricky (function confint for CATS models has not been implemented yet) # packages dplyr and mgcv.helper will be needed # the later could be installed from github: # devtools::install_github("samclifford/mgcv.helper") library(dplyr) library(mgcv.helper) # then we can get the confidence intervals conf.pois<-sapply(m.pois$model, function(x) as.matrix(as.data.frame(confint(x))[2,5:6])) conf.negbin<-sapply(m.negbin$model, function(x) as.matrix(as.data.frame(confint(x))[2,5:6])) # drawing Figure 2 par(mfrow=c(1,2)) plot(1:50,coef.pois,type="n",ylim=c(0,1.2),ylab="estimated slope",xlab="",main="Possion model",axes=F,cex.axis=1.4,cex.lab=1.6,cex.main=1.4) points(1:50,coef.pois) arrows(1:50, conf.pois[1,], 1:50, conf.pois[2,], angle = 90, lty = 1, code = 3, length = 0.05) axis(2,cex.axis=1.4) abline(0.5,0,lwd=2,col="red") plot(1:50,coef.negbin,type="n",ylim=c(0,1.2),ylab="estimated slope",xlab="",main="negative binomial model",axes=F,cex.lab=1.6,cex.main=1.4) points(1:50,coef.negbin) arrows(1:50, conf.negbin[1,], 1:50, conf.negbin[2,], angle = 90, lty = 1, code = 3, length = 0.05) axis(2,cex.axis=1.4) abline(0.5,0,lwd=2,col="red") ################################################################## # Example 2 rm(list=ls(all=TRUE)) data(Raevel) # this dataset available in the CATSregression package # community matrix is transformed into presence/absence form bin.data<-Raevel$comm bin.data[bin.data>0]<-1 # meta-community level relative abundances are calculated rel.abund<-colSums(bin.data)/sum(bin.data) # Inverz logit transformation can be applied, when product of relative abundance and # species richness is lower than 1 for each species-plot combinations # Let us count the combinations where this condition is not satisfied sum(rowSums(bin.data)%*%t(rel.abund)>1) # In 39 plot-species combinations the inverz logit transformation cannot be applied # Let us count the plots where this condition is not satisfied sum(max(rel.abund)*rowSums(bin.data)>1) # In 22 of 52 plots the inverz logit transformation cannot be applied # thus the problem cannot be solved by excluding few plots # fitting model with the cannonical logit link # However trait data are not used, a vector should be supplied # family is binomial, because binary data are analysed # formul="1": model contains only intercept (and possibly offset) # log.link=F: the cannonical link is applied # est.prior=T: estimted relative abundances are used as prior m.logit<-CATSregression(bin.data,trait=rep(NA,ncol(bin.data)),family="binomial",formul="1",log.link=F,est.prior=T) # fitting model with log link # the only difference is that log.link=T (the default) m.log<-CATSregression(bin.data,trait=rep(NA,ncol(bin.data)),family="binomial",formul="1",est.prior=T) # warnings can be neglected # calculating relative abundances from predicted abundances rel.abund.logit<-apply(predict(m.logit),1,function(x) x/sum(x)) rel.abund.log<-apply(predict(m.log),1,function(x) x/sum(x)) # drawing Figure 3 n.site = nrow(bin.data) # number of sites par(mfrow=c(1,1)) plot(rep(rel.abund,n.site),as.vector(rel.abund.logit),xlab="relative abundance in the meta-community", ylab="predicted relative abundance in plots", cex.axis=1.2,cex.lab=1.4,cex.main=1.4,ylim=c(0,0.04)) abline(0,1,col="red") # An additional plot (not shown in the paper) plot(rep(rel.abund,n.site),as.vector(rel.abund.log),xlab="relative abundance in the meta-community", ylab="predicted relative abundance in plots", main="log-link", cex.axis=1.2,cex.lab=1.4,cex.main=1.4,,ylim=c(0,0.04)) abline(0,1,col="red") ################################################################## # Example 3 rm(list=ls(all=TRUE)) set.seed(894531) # setting random seeds allows getting the same results for each run library(vegan) metacomm<-bstick(50) # meta-community level relative abundances for 50 species following brocken-stick ditribution trait<-rnorm(50,0,1) # random trait values for species from standard normal distribution slope<-seq(0,3,0.05) # vector of possible selection strength comm<-vector() # empty space for community data # simulation of communities for (s in slope) # cycle through the possible selection strengths for (k in 1:5) # cycle of five replicates for each selection strength { M<-50/mean(exp(log(metacomm)+s*trait)) # calculating mean abundances (eq. 31 & 32 in the paper) comm<-rbind(comm,rpois(50,lambda=M*exp(log(metacomm)+s*trait))) # drawing random abundances from Poisson distribution } # Variation partitioning by the method proposed in the paper # For variation partitioning three models has to be fitted # with metacommunity information only m1<-CATSregression(comm,trait, formul="1",prior=metacomm,est.prior=F) # with trait only m2<-CATSregression(comm,trait,est.prior=F) # with trait and metacommunity abundances m3<-CATSregression(comm,trait,prior=metacomm,est.prior=F) # variation components can be easily calculated from adjusted R-squared values pure.trait<-m3$R.sq.adj-m1$R.sq.adj pure.metacomm<-m3$R.sq.adj-m2$R.sq.adj joint<-m1$R.sq.adj+m2$R.sq.adj-m3$R.sq.adj # Variation partitioning by Shipley's method # matrix of mean abundances in plots (used as uniform prior) q0<-matrix(rep(rowMeans(comm),50),nrow=nrow(comm),ncol=ncol(comm)) # denominator in equation 4 is the same for all R-squared denom<-rowSums(comm*log(comm/q0),na.rm=T) # calculation R-squared with observed traits with uniform and neautral prior nomin_uobs<-rowSums(comm*log(comm/predict(m2)),na.rm=T) nomin_nobs<-rowSums(comm*log(comm/predict(m3)),na.rm=T) R_ut<-1-nomin_uobs/denom R_nt<-1-nomin_nobs/denom # calculation of R-squared with randomized traits # 50 randomizations are done and resulted R-squareds are strored in matrices # it may take several minutes R_u.matrix<-vector() R_n.matrix<-vector() for (j in 1:50) { # print(j) rtrait<-sample(trait) # randomizotion by re-shuffling trait values rm2<-CATSregression(comm,rtrait,est.prior=F) rm3<-CATSregression(comm,rtrait,prior=metacomm,est.prior=F) nomin_urand<-rowSums(comm*log(comm/predict(rm2)),na.rm=T) nomin_nrand<-rowSums(comm*log(comm/predict(rm3)),na.rm=T) R_u.matrix<-cbind(R_u.matrix,1-nomin_urand/denom) R_n.matrix<-cbind(R_n.matrix,1-nomin_nrand/denom) } # results of randomizations are averaged R_u<-rowMeans(R_u.matrix) R_n<-rowMeans(R_n.matrix) # correction to be any effect zero or positive R_ut<-pmax(R_ut,R_u) R_n<-pmax(R_n,R_u) R_nt<-pmax(R_nt,R_ut) R_nt<-pmax(R_nt,R_ut) # calculation variation components pure.trait.shipley<-(R_nt-R_n)/(1-R_u) pure.metacomm.shipley<-(R_nt-R_ut)/(1-R_u) joint.shipley<-(R_ut+R_n-R_u-R_nt)/(1-R_u) unexplained.shipley<-(1-R_nt)/(1-R_u) # drawing Figure 4 par(mfrow=c(2,2)) plot(pure.metacomm,pure.metacomm.shipley,main="Pure metacommunity effect",ylab="Shipley's formula", xlab="proposed new formula", cex.axis=1.2,cex.lab=1.4,cex.main=1.4) abline(0,1,col="red",lwd=1.5) plot(pure.trait,pure.trait.shipley,main="Pure selection effect",ylab="Shipley's formula", xlab="proposed new formula", cex.axis=1.2,cex.lab=1.4,cex.main=1.4) abline(0,1,col="red",lwd=1.5) plot(joint,joint.shipley,main="Joint effect",ylab="Shipley's formula", xlab="proposed new formula", cex.axis=1.2,cex.lab=1.4,cex.main=1.4) abline(0,1,col="red",lwd=1.5) plot(1-m3$R.sq.adj,unexplained.shipley,main="Unexplained",ylab="Shipley's formula", xlab="proposed new formula", cex.axis=1.2,cex.lab=1.4,cex.main=1.4) abline(0,1,col="red",lwd=1.5) # drawing Figure 5 par(mfrow=c(2,2)) plot(rep(slope,each=5),pure.metacomm,ylab="Pure metacommunity effect", xlab="strength of selection (slope)", cex.axis=1.2,cex.lab=1.4,cex.main=1.4,ylim=c(0,1)) plot(rep(slope,each=5),pure.trait,ylab="Pure selection effect", xlab="strength of selection (slope)", cex.axis=1.2,cex.lab=1.4,cex.main=1.4,ylim=c(0,1)) plot(rep(slope,each=5),joint,ylab="Joint effect", xlab="strength of selection (slope)", cex.axis=1.2,cex.lab=1.4,cex.main=1.4,ylim=c(min(joint),1)) plot(rep(slope,each=5),1-m3$R.sq.adj,ylab="Unexplained", xlab="strength of selection (slope)", cex.axis=1.2,cex.lab=1.4,cex.main=1.4,ylim=c(0,1))