############################################################################### ##################CRINOIDS INTRA- AND INTER-SPECIFIC VARIATION################# ############################################################################### library(dplyr) library(plyr) library(reshape2) library(paleoTS) library(multcomp) library(nortest) library(MASS) library(moments) library(tscount) library(picante) library(stats) library(nortest) library(scales) library(Hmisc) # set working directory setwd("~/Dropbox/Crinoideos/Datasets") # read file crinoids.all<-read.csv("Pimiento et al.Dataset.csv") # delete specimens from private collections crinoids.all<-crinoids.all[crinoids.all$collection_number!="Ref (see remarks)", ] # combine Cenozoic samples in one crinoids.all$period<-as.vector(crinoids.all$period) crinoids.all$period[crinoids.all$period=="Paleogene"] <- "Cenozoic" crinoids.all$period[crinoids.all$period=="Neogene"] <- "Cenozoic" crinoids.all$period[crinoids.all$period=="Recent"] <- "Cenozoic" # assess for differences in samples size sample.size.ALL<-ddply(crinoids.all,~period,summarise,S=length(unique(species)),N=length(species)) species.data<-ddply(crinoids.all,~species+subclass+clade, summarise, N=length(species)) ############################################################################### # INTRASPECIFIC VARIATION # ############################################################################### # table with all species all.species<-table(crinoids.all$species) # select only species with more than 5 specimens trimed<-subset(crinoids.all, species %in% names(all.species[all.species >= 5])) # assess sampling differences sample.size<-ddply(trimed,~period,summarise, s=length(unique(species)),n=length(species)) pie(sample.size$n, labels = sample.size$period) # specimens plot pie(sample.size$s, labels = sample.size$period) # species plot # create table for paper using trimmed + all data table1<-merge(sample.size.ALL, sample.size, by="period") write.csv(table1,file="~/Dropbox/Crinoideos/Manuscript/table1.csv") #writes table in wd # create dataframes per arm using primibrachials arm.i=data.frame(subclass=trimed$subclass, clade=trimed$clade,species=trimed$species, period= trimed$period,Ibr= trimed$IBr1) arm.ii=data.frame(subclass=trimed$subclass, clade=trimed$clade,species=trimed$species, period= trimed$period,Ibr= trimed$IBr2) arm.iii=data.frame(subclass=trimed$subclass, clade=trimed$clade,species=trimed$species, period= trimed$period,Ibr= trimed$IBr3) arm.iv=data.frame(subclass=trimed$subclass, clade=trimed$clade,species=trimed$species, period= trimed$period, Ibr= trimed$IBr4) arm.v=data.frame(subclass=trimed$subclass, clade=trimed$clade,species=trimed$species, period= trimed$period, Ibr= trimed$IBr5) # combine drataframes/all arms Crin <- rbind(arm.i, arm.ii, arm.iii, arm.iv, arm.v) # get rid of NAs Crin<- na.omit(Crin) # add numerical values for time (mid_age) Crin$Ma<-as.vector(Crin$period) Crin$Ma[Crin$Ma=="Ordovician"] <- 467; Crin$Ma[Crin$Ma=="Silurian"] <- 433; Crin$Ma[Crin$Ma=="Devonian"] <- 388; Crin$Ma[Crin$Ma=="Carboniferous"] <- 323; Crin$Ma[Crin$Ma=="Permian"] <- 272; Crin$Ma[Crin$Ma=="Triassic"] <- 237; Crin$Ma[Crin$Ma=="Jurassic"] <- 168;Crin$Ma[Crin$Ma=="Cretaceous"] <- 100; Crin$Ma[Crin$Ma=="Cenozoic"] <- 33 Crin$Ma<-as.numeric(Crin$Ma) # order by time bin Crin <- Crin[order(Crin[,"Ma"]),] # summarize data per species summary<-ddply(Crin,~species+period+Ma, summarise, range=(max(Ibr)-min(Ibr)), average=mean(Ibr), sd=sd(Ibr),N=length(species)) # calculate intraspecific variation (coeficient of variation, herin=CV) summary$CV<-summary$sd/summary$average summary # see summarized data # ADDRESS SAMPLING SIZE ISSUES # summarize number of specimens per species per period species.samples<-ddply(trimed, ~species, summarise, N=length(species)) # combine with CV data species.CV<-summary species.CV$range=NULL;species.CV$average=NULL;species.CV$sd=NULL;species.CV$N=NULL;species.CV$Ma=NULL species.CV.time<-merge(species.CV, species.samples, by="species", ALL=T) # correlation modelSN<-lm(CV~N, data=species.CV.time) summary(modelSN) # ASSESS FOR CALANLIZATION USING A LINEAR CORRELATION AND LOESS # all data lm.data<-as.data.frame(cbind(as.numeric(summary$CV), as.numeric(summary$Ma))) #create dataframe lm.data<-na.omit(lm.data) colnames(lm.data)<-c("CV", "Ma") # test for nromality shapiro.test(lm.data$CV) lm<-lm(CV~Ma, data=lm.data) # linear model summary(lm) # excluding high extremes max(lm.data$CV) lm.data.new<-subset(lm.data, Ma!="323") lm2<-lm(CV~Ma, data=lm.data.new) summary(lm2) # excluding low extremes (zeros) lm.data.new2<-subset(lm.data, CV!="0") lm3<-lm(CV~Ma, data=lm.data.new2) summary(lm3) # ASSESS FOR CALANLIZATION TESTING FOR EVOLUTIONARY MODELS # summary data per species mean values to use in evo. models evo<-ddply(summary,~Ma,summarise, mm=mean(CV),vv=mean(sd^2), nn=length(species)) evo$tt<-as.numeric(evo$Ma); evo$Ma=NULL; evo$nn<-as.numeric(evo$nn) evo<-evo[order(evo[,"tt"]),] # convert to PaleoTS object evo.mode<-as.paleoTS(evo$mm, evo$vv, evo$nn, evo$tt, MM = NULL, genpars = NULL, label = "Intraspecific variation", start.age = NULL, oldest = c("first","last"),reset.time = T) # assess different hypotheses ou<-opt.GRW(evo.mode, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) bm<-opt.URW(evo.mode, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) st<-opt.Stasis(evo.mode, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) # select best fit comp<-compareModels(st,bm,ou,silent = F) #write.csv(comp,file="~/Dropbox/Crinoideos/Manuscript/table2.csv") # correlation modelSN2<-lm(mm~nn, data=evo) summary(modelSN) # ASSESS FOR CANALIZATION IN INDIVIUDAL CLADES # subset clades Articulata<-subset(Crin, clade=="Articulata"); unique(Articulata$period); length(Articulata[,1]) Cyathoformes<-subset(Crin, clade=="Cyathoformes"); unique(Cyathoformes$period); length(Cyathoformes[,1]) Diplobathrida<-subset(Crin, clade=="Diplobathrida"); unique(Diplobathrida$period); length(Diplobathrida[,1]) Disparida<-subset(Crin, clade=="Disparida"); unique(Disparida$period); length(Disparida[,1]) Flexibilia<-subset(Crin, clade=="Flexibilia"); unique(Flexibilia$period); length(Flexibilia[,1]) Monobathrida<-subset(Crin, clade=="Monobathrida"); unique(Monobathrida$period); length(Monobathrida[,1]) # subset subclasses Camerata<-subset(Crin, subclass=="Camerata"); unique(Camerata$period); length(Camerata[,1]) Pentacrinoidea<-subset(Crin, subclass=="Pentacrinoidea"); unique(Pentacrinoidea$period); length(Pentacrinoidea[,1]) # assess intraspecific variation for each clade # Articulata summaryA<-ddply(Articulata,~species+Ma,summarise,range=(max(Ibr)-min(Ibr)), average=mean(Ibr), sd=sd(Ibr)) summaryA$CV<-summaryA$sd/summaryA$average # linear model lmA=lm(CV~Ma, data=summaryA) summary(lmA) # assess evo. models evoA<-ddply(summaryA,~Ma,summarise, mm=mean(CV),vv=mean(sd^2), nn=length(species)) evoA$tt<-evoA$Ma; evoA$Ma=NULL; evoA$nn<-as.numeric(evoA$nn) evoA<-evoA[order(evoA[,"tt"]),] evo.modeA<-as.paleoTS(evoA$mm, evoA$vv, evoA$nn, evoA$tt, MM = NULL, genpars = NULL, label = "Articulata", start.age = NULL, oldest = c("first","last"),reset.time = T) ouA<-opt.GRW(evo.modeA, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) bmA<-opt.URW(evo.modeA, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) stA<-opt.Stasis(evo.modeA, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) comp<-compareModels(stA,bmA,ouA,silent = F) # Cyathoformes summaryCy<-ddply(Cyathoformes,~species+Ma,summarise,range=(max(Ibr)-min(Ibr)), average=mean(Ibr), sd=sd(Ibr)) summaryCy$CV<-summaryCy$sd/summaryCy$average # linear model lmCy=lm(CV~Ma, data=summaryCy) summary(lmCy) # assess evo. models evoCy<-ddply(summaryCy,~Ma,summarise, mm=mean(CV),vv=mean(sd^2), nn=length(species)) evoCy$tt<-evoCy$Ma; evoCy$Ma=NULL; evoCy$nn<-as.numeric(evoCy$nn) evoCy<-evoCy[order(evoCy[,"tt"]),] evo.modeCy<-as.paleoTS(evoCy$mm, evoCy$vv, evoCy$nn, evoCy$tt, MM = NULL, genpars = NULL, label = "Cyathoformes", start.age = NULL, oldest = c("first","last"),reset.time = T) ouCy<-opt.GRW(evo.modeCy, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) bmCy<-opt.URW(evo.modeCy, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) stCy<-opt.Stasis(evo.modeCy, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) comp<-compareModels(stCy,bmCy,ouCy,silent = F) # Diplobathrida summaryDip<-ddply(Diplobathrida,~species+Ma,summarise,range=(max(Ibr)-min(Ibr)), average=mean(Ibr), sd=sd(Ibr)) summaryDip$CV<-summaryDip$sd/summaryDip$average # assess evo. models evoDip<-ddply(summaryDip,~Ma,summarise, mm=mean(CV),vv=mean(sd^2), nn=length(species))#too small sample size; no evo analyses performed # Disparida summaryDisp<-ddply(Disparida,~species+Ma,summarise,range=(max(Ibr)-min(Ibr)), average=mean(Ibr), sd=sd(Ibr)) summaryDisp$CV<-summaryDisp$sd/summaryDisp$average # assess evo. models evoDis<-ddply(summaryDisp,~Ma,summarise, mm=mean(CV),vv=mean(sd^2), nn=length(species))#too small sample size; no evo analyses performed # Flexibilia summaryF<-ddply(Flexibilia,~species+Ma,summarise,range=(max(Ibr)-min(Ibr)), average=mean(Ibr), sd=sd(Ibr)) summaryF$CV<-summaryF$sd/summaryF$average # linear model lmF=lm(CV~Ma, data=summaryF) summary(lmF) # assess evo. models evoF<-ddply(summaryF,~Ma,summarise, mm=mean(CV),vv=mean(sd^2), nn=length(species))#too small sample size; no evo analyses performed # Monobathrida summaryM<-ddply(Monobathrida,~species+Ma,summarise,range=(max(Ibr)-min(Ibr)), average=mean(Ibr), sd=sd(Ibr)) summaryM$CV<-summaryM$sd/summaryM$average # linear model lmM=lm(CV~Ma, data=summaryM) summary(lmM) # assess evo. models evoM<-ddply(summaryM,~Ma,summarise, mm=mean(CV),vv=mean(sd^2), nn=length(species)) #too small sample size; no evo analyses performed # assess intraspecific variation for each subclass # Camerata summaryC<-ddply(Camerata,~species+Ma,summarise,range=(max(Ibr)-min(Ibr)), average=mean(Ibr), sd=sd(Ibr)) summaryC$CV<-summaryC$sd/summaryC$average # linear model lmC=lm(CV~Ma, data=summaryC) summary(lmC) # assess evo. models evoC<-ddply(summaryC,~Ma,summarise, mm=mean(CV),vv=mean(sd^2), nn=length(species)) #too small sample size; no evo analyses performed # Pentacrinoidea summaryP<-ddply(Pentacrinoidea,~species+Ma,summarise,range=(max(Ibr)-min(Ibr)), average=mean(Ibr), sd=sd(Ibr)) summaryP$CV<-summaryP$sd/summaryP$average # linear model lmP=lm(CV~Ma, data=summaryP) summary(lmP) # assess evo. models evoP<-ddply(summaryP,~Ma,summarise, mm=mean(CV),vv=mean(sd^2), nn=length(species)) evoP$tt<-evoP$Ma; evoP$Ma=NULL; evoP$nn<-as.numeric(evoP$nn) evoP<-evoP[order(evoP[,"tt"]),] evo.modeP<-as.paleoTS(evoP$mm, evoP$vv, evoP$nn, evoP$tt, MM = NULL, genpars = NULL, label = "Pentacrinoidea", start.age = NULL, oldest = c("first","last"),reset.time = T) ouP<-opt.GRW(evo.modeP, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) bmP<-opt.URW(evo.modeP, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) stP<-opt.Stasis(evo.modeP, pool=T ,cl=list(fnscale=-1),meth="L-BFGS-B",hess=F) comp<-compareModels(stP,bmP,ouP,silent = F) ############################################################################### # INTERSPECIFIC VARIATION # ############################################################################### # create drataframe per arm D.arm.i<-data.frame(species=crinoids.all$species, period= crinoids.all$period, Ibr= crinoids.all$IBr1, epoch= crinoids.all$epoch) D.arm.ii<-data.frame(species=crinoids.all$species, period= crinoids.all$period, Ibr= crinoids.all$IBr2, epoch= crinoids.all$epoch) D.arm.iii<-data.frame(species=crinoids.all$species, period= crinoids.all$period, Ibr= crinoids.all$IBr3, epoch= crinoids.all$epoch) D.arm.iv<-data.frame(species=crinoids.all$species, period= crinoids.all$period, Ibr= crinoids.all$IBr4, epoch= crinoids.all$epoch) D.arm.v<-data.frame(species=crinoids.all$species, period= crinoids.all$period, Ibr= crinoids.all$IBr5, epoch= crinoids.all$epoch) # combine all arms D.Crin <- rbind(D.arm.i, D.arm.ii, D.arm.iii, D.arm.iv, D.arm.v) # get rid of NAs D.Crin<- na.omit(D.Crin) # order by species D.Crin <- D.Crin[order(D.Crin[,"species"]),] head(D.Crin) # summarize per species and time bin summaryDis<-ddply(D.Crin,~species+period,summarise,range=(max(Ibr)-min(Ibr)),average=mean(Ibr),sd=sd(Ibr)) summaryDis$CV<-summaryDis$sd/summaryDis$average head(summaryDis) # add numerical values for time (mid_age) summaryDis$Ma<-as.vector(summaryDis$period) summaryDis$Ma[summaryDis$Ma=="Ordovician"] <- 467; summaryDis$Ma[summaryDis$Ma=="Silurian"] <- 433; summaryDis$Ma[summaryDis$Ma=="Devonian"] <- 388; summaryDis$Ma[summaryDis$Ma=="Carboniferous"] <- 323; summaryDis$Ma[summaryDis$Ma=="Permian"] <- 272; summaryDis$Ma[summaryDis$Ma=="Triassic"] <- 237; summaryDis$Ma[summaryDis$Ma=="Jurassic"] <- 168;summaryDis$Ma[summaryDis$Ma=="Cretaceous"] <- 100; summaryDis$Ma[summaryDis$Ma=="Cenozoic"] <- 33 summaryDis$Ma<-as.numeric(summaryDis$Ma) summaryDis <- summaryDis[order(summaryDis[,"Ma"]),] head(summaryDis) # merge with data from previous analyses (<5 sps for presentation) tableS1<-merge(summaryDis, species.data, by="species") tableS1<-tableS1[order(tableS1[,"Ma"]),] head(tableS1) write.csv(tableS1,file="~/Dropbox/Crinoideos/Manuscript/tableS1.csv") # SIMILARITY ANALYSIS # create community data community<-dcast(summaryDis, Ma~species) # give 0 and 1 for presence and absence community[is.na(community)]<-0 community$Ma<-as.numeric(community$Ma) community<- community[order(community[,"Ma"]),] head(community) rownames(community)<-community$Ma # delete taxa column community$Ma<-NULL # covert to matrix community<-data.matrix(community) # distance matrix between species species<-summaryDis species$period<-NULL; species$Ma<-NULL; species$meanIbr<-NULL;species$sd=NULL;species$CV=NULL species$average=NULL #deletes unnecesary rows duplicated(species$species) #duplicates not allowed rownames(species)<-species$species species$species<-NULL head(species) speciesD<-dist(species) speciesD<-as.matrix(speciesD) # match taxa names in distance and abundance matrices #community<-community[,match(colnames(community), colnames(community))] MPD<-mpd(community, speciesD, abundance.weighted=FALSE) tt<-rownames(community) rownames(community) dist.data<-as.data.frame(cbind(MPD,tt)) dist.data<- na.omit(dist.data) dist.data$tt<-as.numeric(as.character(dist.data$tt)) dist.data$MPD<-as.numeric(as.character(dist.data$MPD)) # standardized effect size (draw standard deviation) #MPD.rand=ses.mpd(community, speciesD, abundance.weighted = FALSE, runs = 999, #iterations = 1000) # CORRELATE INTRA- INTER-SPECIFIC VARIATION # use function source("http://www.graemetlloyd.com/pubdata/functions_2.r") intra<-evo; intra$vv<-NULL;intra$nn<-NULL inter<-dist.data # generalized differences gd.intra<-gen.diff(intra$mm,intra$tt) #intraspecific variation vs. gd.inter<-gen.diff(inter$MPD,inter$tt) #interspecific variation #correlatoon r=cor.test(gd.inter,gd.intra,method="spearman") ############################################################################### # PLOTS # ############################################################################### # PLOT SAMPLING SIZE EFFECTS # CV per species per time pdf("~/Dropbox/Crinoideos/Manuscript/Figures/Pimientoetal_Fig2.pdf", width=10, height=5) # create panel of figures par(mfrow=c(1,2)) plot(CV~N, data=species.CV.time,pch=1,ylab="Species' CV", xlab="Specimens") plot(evo$mm~evo$nn, pch=19, ylab="Mean CV per time bin", xlab="Species") dev.off() # PLOT INTRASPECIFIC AND INTERSPECIFIC VARIATION (ALL DATA) pdf("~/Dropbox/Crinoideos/Manuscript/Figures/Pimientoetal_Fig.3.pdf", width=5, height=10) par(mfrow=c(3,1)) # intra- # CV per time period plot(CV~Ma, data=summary, ylim=c(-0.2,0.7), pch=16, col=alpha("grey", 0.5), main="Intraspecific variation") # loess source("~/Dropbox/Crinoideos/R scripts/funciones LOESS.R") # available at https://gist.github.com/kylebgorman/6444612 y=summary$CV x=as.numeric(summary$Ma) fit <- loess(y ~ x) best.span <- autoloess(fit, span=c(0.1,0.9))$pars$span fit <- loess(y ~ x,span= best.span) x_fine <- seq(min(x),max(x),0.1) pred<-predict(fit,x_fine, se=T, interval="confidence") x_fine <- x_fine[!is.na(pred$fit)] y.polygon <- c((pred$fit+1.96*pred$se.fit), rev((pred$fit-1.96*pred$se.fit))) x.polygon <- c(x_fine, rev(x_fine)) polygon(x.polygon, y.polygon[!is.na(y.polygon)], col= alpha("brown1",0.3), border=F) lines(x_fine,pred$fit[!is.na(pred$fit)], col="brown1",lwd=2) # linear correlation # CI polygon newx <- seq(min(lm.data$Ma), max(lm.data$Ma), length.out=100) preds <- predict(lm, newdata = data.frame(Ma=newx),interval = 'confidence') polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = alpha('grey80', 0.5), border = NA) # regression lines abline(lm, lwd=3) abline(lm2, lty=2) abline(lm3, lty=2) # define erass abline(v=252, lty=2); abline(v=66, lty=2) # mode of evolution plot(evo.mode,nse=1, pool=T, add= F, modelFit = NULL,pch = 16, lwd = 1.5, col=alpha("brown1",0.3)) # inter- plot(MPD~tt, data=dist.data, col="white",main="Interspecific variation") lines(dist.data$tt,dist.data$MPD, col="darkslategray4", lwd=5) # define erass abline(v=252, lty=2); abline(v=66, lty=2) dev.off()