library(ape) library(phytools) library(caper) library(geiger) library(lmtest) library(ggplot2) tree <- read.nexus("consensusTree_10kTrees_Primates_Version3(1).nex") dataset <- read.csv("formatted_used_database.csv",header=T,row.names=1) name.check(tree, dataset) # making sure the species in both files match up - they do! #checking for phylogenetic signal phylosig(tree,dataset$blinkrate) phylosig(tree,dataset$duration) #exists for both measures, so worth pursuing further blinkerSUBforduration <- comparative.data(tree, dataset, Species, vcv=TRUE, vcv.dim=3, scope=duration~diurnal) blinkerFULLforblinkrate <- comparative.data(tree, dataset, Species, vcv=TRUE, vcv.dim=3, na.omit=FALSE) #full model for blinkrate, estimating kappa using maximum likelihood br1 <- pgls(blinkrate ~ locomotion + diurnal + mass + groupsize + trophicguild, blinkerFULLforblinkrate, lambda='ML') summary(br1) # maximum likelihood method doesn't work for estimating lambda, giving estimated value of 0 with 95% bound close round this: means no attention paid to prior history, despite there being signal in the data. Instead try estimating kappa br2 <- pgls(blinkrate ~ locomotion + diurnal + mass + groupsize + trophicguild, blinkerFULLforblinkrate, kappa='ML') summary(br2) #kappa isn't a great estimate in this case, but prior experimentation on reduced models shows that it is possible to estimate kappa for reduced models anova(br2) plot(br2) #qqplot looks unusual - trying a transformation br3 <- pgls(sqrt(blinkrate+1) ~ locomotion + diurnal + mass + groupsize + trophicguild, blinkerFULLforblinkrate, kappa='ML') summary(br3) anova(br3) plot(br3) #qqplot looks better - check with a test shapiro.test(resid(br3)) #trophic guild explains least, so remove from model br4 <- pgls(sqrt(blinkrate+1) ~ locomotion + diurnal + mass + groupsize, blinkerFULLforblinkrate, kappa='ML') summary(br4) anova(br4) plot(br4) shapiro.test(resid(br4)) #compare br3 and br4 with likelihood ratio test lrtest(br3, br4) #no sig diff - br4 preferred as fewer terms. #now remove groupsize, which explains the least of br4 br5 <- pgls(sqrt(blinkrate+1) ~ locomotion + diurnal + mass, blinkerFULLforblinkrate, kappa='ML') summary(br5) anova(br5) plot(br5) shapiro.test(resid(br5)) #compare br4 and br5 with likelihood ratio test lrtest(br4, br5) #no sig diff - br5 preferred as fewer terms. #now remove diurnal, which explains the least of br5 br6 <- pgls(sqrt(blinkrate+1) ~ locomotion + mass, blinkerFULLforblinkrate, kappa='ML') summary(br6) anova(br6) plot(br6) shapiro.test(resid(br6)) #compare br5 and br6 with likelihood ratio test lrtest(br5, br6) #no sig diff - br6 preferred as fewer terms. #now remove mass, which explains the least of br6 br7 <- pgls(sqrt(blinkrate+1) ~ locomotion , blinkerFULLforblinkrate, kappa='ML') summary(br7) anova(br7) plot(br7) shapiro.test(resid(br7)) #compare br6 and br7 with likelihood ratio test lrtest(br6, br7) #no sig diff - br7 preferred as fewer terms. #to avoid missing any subtle effects from earlier removals, models also tried with locomotion and one other term (diurnal, groupsize and trophicguild, whilst mass was already examined in model6) br8 <- pgls(sqrt(blinkrate+1) ~ locomotion + diurnal, blinkerFULLforblinkrate, kappa='ML') anova(br8) #diurnal not significant lrtest(br7, br8) #br7 still best br9 <- pgls(sqrt(blinkrate+1) ~ locomotion + groupsize, blinkerFULLforblinkrate, kappa='ML') anova(br9) #diurnal not significant lrtest(br7, br9) #br7 still best br10 <- pgls(sqrt(blinkrate+1) ~ locomotion + trophicguild, blinkerFULLforblinkrate, kappa='ML') anova(br10) #diurnal not significant lrtest(br7, br10) #br7 still best ###so br7 preferred model: sqrt(blinkrate+1) ~ locomotion #plotting this... ggplot(data=dataset, mapping=aes(x=locomotion, y=blinkrate), outlier.shape=NA)+ geom_violin(draw_quantiles = c(0.25, 0.5, 0.75))+geom_jitter(width=.1)+ scale_x_discrete(labels=c("arboreal","both","terrestrial"))+ labs(y="blink rate (blinks per minute)") + theme_classic(base_size = 20) # duration, starting off by estimating kappa as in previous dur1 <- pgls(duration ~ locomotion + diurnal + mass + groupsize + trophicguild, blinkerSUBforduration, kappa='ML') summary(dur1) #estimated kappa close to zero. Instead, estimating delta using ML dur2 <- pgls(duration ~ locomotion + diurnal + mass + groupsize + trophicguild, blinkerSUBforduration, delta='ML') summary(dur2) anova(dur2) plot(dur2) #possibly okay, but checking whether there is a little skew and qqplot not lined up nicely shapiro.test(resid(dur2)) #okay, nothing bad #reducing model, taking out trophicguild dur3 <- pgls(duration ~ locomotion + diurnal + mass + groupsize , blinkerSUBforduration, delta='ML') summary(dur3) anova(dur3) plot(dur3) shapiro.test(resid(dur3)) #compare dur2 and dur3 with likelihood ratio test lrtest(dur2, dur3) #no sig diff - dur3 preferred as fewer terms. #now remove diurnal, which explains the least of br4 dur4 <- pgls(duration ~ locomotion + mass + groupsize , blinkerSUBforduration, delta='ML') summary(dur4) anova(dur4) plot(dur4) shapiro.test(resid(dur4)) #compare br4 and br5 with likelihood ratio test lrtest(dur3, dur4) #sig diff - dur4 has lower log likelihood, so explains data better. #no nonsig terms left. To check presence of diurnal wasn't masking effects of trophicguild, this is added back in dur5 <- pgls(duration ~ locomotion + mass + groupsize +trophicguild, blinkerSUBforduration, delta='ML') summary(dur5) anova(dur5) lrtest(dur4, dur5) #trophicguild not sig, and dur4 explains data better ### so dur4 preferred model: duration ~ locomotion + mass + groupsize #not shown: slight concerns about delta being set to 3 by ML throughout (which means the trees are quite drawn out). Also tried simultaneously estimating delta and kappa together (needing log transofmration), and then reducing model dur_temp1 <- pgls(log(duration+1) ~ locomotion + diurnal + mass + groupsize + trophicguild, blinkerSUBforduration, delta='ML', kappa='ML') summary(dur_temp1) anova(dur_temp1) dur_temp2 <- pgls(log(duration+1) ~ locomotion + diurnal + mass + groupsize, blinkerSUBforduration, delta='ML', kappa='ML') summary(dur_temp2) anova(dur_temp2) dur_temp3 <- pgls(log(duration+1) ~ locomotion + diurnal + mass, blinkerSUBforduration, delta='ML', kappa='ML') summary(dur_temp3) anova(dur_temp3) plot(dur_temp3) shapiro.test(resid(dur_temp3)) # these estimate kappa and delta together at less extreme values, and give qualitatively similar results to when just delta is estimated