rm(list = ls()) ## Code to reproduce figures and table presented in the article ## Moreno, R.A., Labra, F.A, Cotoras, D.D., Camus, P.A., Gutiérrez, D., Aguirre, L., Rozbaczylo, ## N., Poulin, E., Lagos, N.A., Zamorano, D. & M.M. Rivadeneira. Evolutionary drivers ## of the hump-shaped latitudinal gradient of benthic polychaete species richness along ## the Southeastern Pacific coast. PeerJ. ## ## Code written by Marcelo Rivadeneira on July 2021, queries to:marcelo.rivadeneira@ceaza.cl or mrivaden@gmail.com ## Needed libraries library(tcltk) library(intervals) library(maptools) library(mapdata) library(plyr) library(vegan) library(usdm) library(HH) library(ranger) library(ncf) library(pdp) library(ape) library(paleotree) library(picante) library(PhyloMeasures) ## Loading working files setwd(tk_choose.dir(getwd())) poly=read.table("Dataset_S1.txt",T,sep="\t") local=read.table("Dataset_S2.txt",T,sep="\t") env=read.table("Dataset_S3.txt",T,sep="\t") env=env[order(env$Lat,decreasing = F),] ## Building a presence/absence matrix of species distribution using 0.5º latitude bins lat.poly=Intervals(c(poly$LatS,poly$LatN)) int.min=seq(-56,-3.5,0.5) int.max=seq(-55.5,-3,0.5) lat.bins=Intervals(c(int.min,int.max)) int.min.ep=seq(-79,63,0.5) int.max.ep=seq(-78.5,62.5,0.5) lat.bins.ep=Intervals(c(int.min.ep,int.max.ep)) ## Presence-absence matrix for the study area juntos=interval_overlap(lat.poly,lat.bins) lmat=matrix(0,ncol=length(int.min),nrow=nrow(poly)) rownames(lmat)=poly$ScientificName_accepted colnames(lmat)=int.min for(u in 1:nrow(lmat)) { lmat[u,juntos[[u]]]=1 } ## Species richness per latitudinal bin spp=apply(lmat,2,sum) ## Median latitudinal range range.mat=lmat*poly$LatRan range.mat[range.mat==0]=NA median.range=apply(range.mat,2,median,na.rm=T) ## Species richness at local scale m2=tapply(local$Ind,list(local$Site,local$ScientificName_accepted),sum) m2[is.na(m2)]=0 local$latbin=round_any(local$Lat,0.5) Es=20 loca.rare=data.frame(rar=rarefy(m2,Es)) loca.rare$ind=apply(m2,1,sum) loca.rare$lat=local$Lat[match(rownames(loca.rare),local$Site)] loca.rare=loca.rare[loca.rare$ind>=Es,] loca.rare$latbin=local$latbin[match(rownames(loca.rare),local$Site)] loca.rare$regional.spp=spp[match(loca.rare$latbin,as.numeric(names(spp)))] richness.averages=aggregate(cbind(loca.rare$rar,loca.rare$regional.spp),list(loca.rare$latbin),mean) # Correlation between species richness estimated from a range-through approach vs mean rarefied local estimations (in 29 latitudinal bins) cor.test(richness.averages$V1,richness.averages$V2) # local vs regional species richness relationship. No evidence of local species saturation. satura=lm(richness.averages$V1~poly(richness.averages$V2,2)) summary(satura) ## Reproducing Figure 1 data(wrld_simpl) pdf("Fig1.pdf",width=8.5,height=5) par(mfrow=c(1,2),mar=c(5,1,1,0),new=F) map(wrld_simpl,xlim=c(-100,-35),ylim=c(-56,0),fill=T,col="gray40",bg="white") mtext("A",side=3,adj=1,line=0,cex=1.2) points(env$Lon-0.5,env$Lat,pch=21,bg="cadetblue3",col="cadetblue3") plot(spp, env$Lat,type="l",ylim=c(-56,0),xlim=c(150,300),lwd=1,col="red",xlab="Regional species richness",ylab="Latitude (º)",bty="n",yaxt="n",xaxt="n",cex.axis=1.2,cex.lab=1.2) axis(2,las=2,at=c(-60,-50,-40,-30,-20,-10,0),cex.axis=1.5) axis(1,las=1,cex.axis=1.2) par(new = T) plot(cex.axis=1.2,cex.lab=1.2,loca.rare$rar,loca.rare$lat,ylim=c(-56,0),col = "black", axes = F,xlab = NA, ylab = "Local species richness",pch=21,bg="black") axis(side = 3,las=1,cex.axis=1.2) mtext("Local species richness (Es=20)",side=3,cex=1.2,line=2) legend("topright",lty=c(1,NA),pch=c(NA,21),pt.bg =c(NA,"black"),col=c("red","black"),c("regional","local(rarefaction)"),bty="n",cex=0.8) mtext("B",side=3,adj=1,line=0,cex=1.2) dev.off() ## Evaluating the environmental predictors of the latitudinal diversity gradient env.all=na.exclude(cbind(env,median.range)) tvif=vifstep(cbind(env.all[,-c(1,2)]),th=10) tvif ## 'best variables' goodvars=tvif@results$Variables env.target=cbind(env.all[names(env.all)%in%goodvars]) set1=cbind(env.target,spp=spp) ## Ranger Random forest model with all predictors included set.seed(7) # to get our same results rg.general=ranger(spp~., data=set1,importance = "permutation") rg.general rg.general.imp=importance_pvalues(rg.general,method="altmann",formula = spp~., data=set1,num.permutations=10000) # ...this will take a while, go get a cup of coffee ## Diagnostic analyses of the model residuals.rg.model=(set1$spp-predict(rg.general,set1)$predictions)/sd(spp) spline.model.general=spline.correlog(x=env$Lon,y=env$Lat,z=residuals.rg.model,latlon=T,resamp=1000,quiet=T) ## Reproducing Figure 2 pdf("Fig2.pdf",width=8,height=4) par(mfrow=c(1,3),mar=c(5,5,2,1)) plot(spp,predict(rg.general,set1)$predictions,pch=21,col="dodgerblue4",bg="skyblue",yaxt="n",cex=1.2,xlab="Observed species richness",ylab="Predicted species richness",xlim=c(150,300),ylim=c(150,300),cex.lab=1.2,cex.axis=1.2) axis(2,las=2,at=c(150,200,250,300),labels=c(150,200,250,300),cex.axis=1.2) abline(a=0,b=1,col="red",lwd=2) mtext(bquote(paste(" ",pseudo-r^2==.(round(rg.general$r.squared,2)))),3,adj=0,line=-2,cex=0.8) mtext("A",side=3,adj=1,line=0) plot(env$Lat,residuals.rg.model,ylim=c(-0.6, 0.5),pch=21,cex=1.2,col="dodgerblue4",bg="skyblue",xlab="Latitude (º)",ylab="Standardized residuals",yaxt="n",cex.lab=1.2,cex.axis=1.2) axis(2,las=2,at=c(-0.4,-0.2,0,0.2,0.4),cex.axis=1.2) mtext("B",side=3,adj=1,line=0) plot(spline.model.general,xlab="Distance (km)",ylab="Moran's I",xlim=c(0,5000),cex.axis=1.2,cex.lab=1.2,yaxt="n") axis(2,las=2,at=c(-1,-0.5,0,0.5,1),cex.axis=1.2) mtext("C",side=3,adj=1,line=0) dev.off() ## Getting variable importance and significance of the random forest model table1=data.frame(pmip=rg.general.imp[,1],pmip.pval=rg.general.imp[,2]) table1$vif=tvif@results$VIF[match(rownames(table1),tvif@results$Variables)] table1$significance=ifelse(table1$pmip.pval< 0.05,"significant","ns") table1=table1[order(table1$pmip,decreasing=T),] table1 ## Reproducing Figure 3 pdf("Fig3.pdf",width=5,height=7) par(mfrow=c(3,1),mar=c(0,5,5,5),new=F) plot(env$Lat,spp,cex=1.4,xlab=NA,ylab="Species richness",pch=21,col="dodgerblue4",bg="skyblue",bty="n",xaxt="n",yaxt="n") axis(2,las=2,at=c(100,150,200,250,300),labels=c(100,150,200,250,300)) axis(1,at=seq(-60,0,10),labels=NA) lines(env$Lat,predict(rg.general,set1)$predictions,col="red",lwd=2) legend("bottom",lty=c(NA,1),pt.cex=c(1.4,NA),pt.bg=c("skyblue",NA), pch=c(21,NA),col=c("dodgerblue4","red"),c("Observed","Model"),bty="y") mtext(" A",side=3,adj=1,line=-1,cex=1.4) par(mar=c(3,5,2,5)) plot(env$Lat, env$sst,type="l",axes=F,bty="n",col="dodgerblue4",ylab="SST (ºC)",xlab="") axis(2,las=2,at=c(5,10,15,20,25),labels=c(5,10,15,20,25)) par(new = TRUE) plot(env$Lat, env$sst.range,type="l",lty=2,bty="n",col="deepskyblue2",axes=F,ylab="",xlab="") axis(4,las=2,at=c(2,4,6,8,10),labels=c(2,4,6,8,10)) mtext("SST range (ºC)",4,line=2.5,cex=0.7) mtext(" B",side=3,adj=1,line=-1,cex=1.4) legend("topleft",c("SST","SST range"),horiz = F,bty="y",lty=c(1,2),col=c("dodgerblue4","deepskyblue2")) par(mar=c(5,5,0,5)) plot(env$Lat, env$salinity,type="l",axes=F,bty="n",col="forestgreen",ylab="Salinity (PSS)",xlab="") axis(2,las=2,at=c(31,32,33,34,35),labels=c(31,32,33,34,35)) par(new = TRUE) plot(env$Lat, env.all$median.range,type="l",lty=2,bty="n",col="goldenrod2",axes=F,ylab="",xlab="Latitude (º)") axis(4,las=2,at=c(30,45,60,75,90),labels=c(30,45,60,75,90)) mtext(" C",side=3,adj=1,line=-1,cex=1.4) mtext("Latitudinal range (º)",4,line=2.5,cex=0.7) axis(1,at=seq(-60,0,10),labels=seq(-60,0,10)) legend("topleft",c("Salinity","Median latitudinal range"),horiz = F,bty="y",lty=c(1,2),col=c("forestgreen","goldenrod2")) dev.off() ## Partial dependence plots p1=partial(rg.general, pred.var = "sst", plot = F, rug = TRUE) p2=partial(rg.general, pred.var = "sst.range", plot = F, rug = TRUE) p3=partial(rg.general, pred.var = "salinity", plot = F, rug = TRUE) p4=partial(rg.general, pred.var = "median.range", plot = F, rug = TRUE) ## Reproducing Figure 4 pdf("Fig4.pdf",width=7,height=5) par(mfrow=c(2,2),mar=c(5,5,1.5,1)) plot(p1,type="l",yaxt="n",lwd=2,ylab="Predicted species richness",col="dodgerblue4",xlab="SST (ºC)") axis(2,las=2) mtext("A",side=3,adj=1,line=0,cex=1.5) rug(set1$sst) plot(p2,type="l",ylab="",lwd=2,yaxt="n",col="deepskyblue2",xlab="SST range (ºC)") axis(2,las=2) mtext("B",side=3,adj=1,line=0,cex=1.5) rug(set1$sst.range) plot(p3,type="l",lwd=2,yaxt="n",col="forestgreen",xlab="Salinity (PSS)",ylab="Predicted species richness") axis(2,las=2) mtext("C",side=3,adj=1,line=0,cex=1.5) rug(set1$salinity) plot(p4,type="l",ylab="",lwd=2,yaxt="n",col="goldenrod2",xlab="Median latitudinal range (º)") axis(2,las=2) mtext("D",side=3,adj=1,line=0,cex=1.5) rug(set1$median.range) dev.off() ## Phylogenetic inertia of predictors # For phylotable, use species in the last column, sorted alphabetically phylotable=data.frame(Class=poly$Class, Order=poly$Order,Family=poly$Family,Genus=poly$Genus,Species=gsub("\\s", "_", poly$ScientificName_accepted)) phylotable[phylotable==""]=NA politree=taxonTable2taxonTree(as.matrix(phylotable)) politree.dicho=multi2di(politree) traits.matrix=poly[,c(9,11:15)] names(traits.matrix)=c("median.range","sst","sst.range","salinity","salinity.range", "productivity") rownames(traits.matrix)=gsub("\\s", "_", poly$ScientificName_accepted) match.traits=match.phylo.data(politree.dicho, traits.matrix) trait.signal=multiPhylosignal(match.traits$data,match.traits$phy,reps=1000) table1$k=trait.signal$K[match(rownames(table1),rownames(trait.signal))] table1$k.pvalue=trait.signal$PIC.variance.P[match(rownames(table1),rownames(trait.signal))] ## Table 1 (raw unedited version) table1 ## Phylogenetic diversity indices ## PD (Faith's phylogenetic diversity or phylogenetic richness), MPD (mean pairwise distance), ## and phylogenetic regularity (VPD) lmat2=lmat rownames(lmat2)=gsub("\\s", "_", rownames(lmat)) match.bins=match.phylo.comm(politree, t(lmat2)) PD=ses.pd(match.bins$comm,match.bins$phy,null.model="taxa.labels", runs=1000, include.root = F) # time for an even longer coffee break! MPD=ses.mpd(match.bins$comm,cophenetic(match.bins$phy),null.model="taxa.labels",runs=1000) VPD=taxondive(match.bins$comm, cophenetic(match.bins$phy))$Lambda # Correlations between PD indices and species richness cor.test(PD$pd.obs.z,spp) cor.test(MPD$mpd.obs.z,spp) cor.test(VPD,spp) ## Reproducing Figure 5 pdf("Fig5.pdf",width=5,height=7) par(mfrow=c(3,1),mar=c(4,4.5,1,4),oma=c(1,1,1,1),new=F) plot(env$Lat, PD$pd.obs.z,yaxt="n",xaxt="n",ylab=expression('PD'["SES"]),cex=1.2,xlab="",cex.lab=1.2) axis(2,las=2,at=c(-4,-3,-2,-1,0,1)) axis(1,las=1,at=c(-50,-40,-30,-20,-10)) points(env$Lat,PD$pd.obs.z,pch=21,bg=ifelse(PD$pd.obs.p<0.05,"skyblue","gray80"),col=ifelse(PD$pd.obs.p<0.05,"dodgerblue4","gray40"),cex=1.2) mtext("A",side=3,line=0,adj=1,cex=1.5) par(new = TRUE) plot(env$Lat,spp,type="l",col="red",axes=F,bty="n",ylab=NA,xlab=NA) axis(4,las=2,at=c(100,150,200,250,300)) mtext("Species richness",side=4,line=3) plot(env$Lat, MPD$mpd.obs.z,yaxt="n",xaxt="n",ylab=expression('MPD'["SES"]),cex=1.2,xlab="",cex.lab=1.2) legend("topright",pch=c(21,21,NA),pt.cex = 1,pt.bg = c("skyblue","gray80",NA),col=c("dodgerblue4","gray40","red"),lty=c(NA,NA,1),c("p<0.05","p>=0.05","species richness"),cex=0.7) axis(2,las=2,at=c(-4,-3,-2,-1,0,1)) axis(1,las=1,at=c(-50,-40,-30,-20,-10)) points(env$Lat,MPD$mpd.obs.z,pch=21,bg=ifelse(MPD$mpd.obs.p>0.95 | MPD$mpd.obs.p<0.05,"skyblue","gray80"),col=ifelse(MPD$mpd.obs.p>0.95 | MPD$mpd.obs.p<0.05,"dodgerblue4","gray40"),cex=1.2) par(new = TRUE) plot(env$Lat,spp,type="l",col="red",axes=F,bty="n",ylab=NA,xlab=NA) axis(4,las=2,at=c(100,150,200,250,300)) mtext("Species richness",side=4,line=3) mtext("B",side=3,line=0,adj=1,cex=1.5) plot(env$Lat, VPD,yaxt="n",xaxt="n",ylab="VPD",pch=21,bg="white",cex=1.2,cex.lab=1.5,xlab="Latitude (º)") axis(2,las=2) axis(1,las=1,at=c(-50,-40,-30,-20,-10)) par(new = TRUE) plot(env$Lat,spp,type="l",col="red",axes=F,bty="n",ylab=NA,xlab=NA) axis(4,las=2,at=c(100,150,200,250,300)) mtext("Species richness",side=4,line=3) mtext("C",side=3,line=0,adj=1,cex=1.5) dev.off()