#======================================================================== # Robust principal component beta regression # Testing the influence of growth rate and shear strain on laminarity # of recent appositional bone in the caudal octant of the femur and # tibiotarsus # # Please cite: Kuehn, Lee, Main, Simons (2019) # #======================================================================== install.packages('DescTools') # install.packages('gamlss') # install install.packages('rospca', dependencies=TRUE) # these install.packages('devtools') # packages library(devtools) # first install_github("vqv/ggbiplot", force=TRUE) # library(DescTools) # For robust centering and scaling library(rospca) # Robust PCA library(ggbiplot) # pretty PCA plotting library(car) # pretty Q-Q plots to assess data normality library(gamlss) # zero-inflated beta regression set.seed(1001) # enable reproducibility #============================================ # Modified ggbiplot() to plot robpca() object #============================================ ggbiplot.robpca <- function (pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, obs.scale = 1 - scale, var.scale = scale, groups = NULL, ellipse = FALSE, ellipse.prob = 0.68, labels = NULL, labels.size = 3, alpha = 1, var.axes = TRUE, circle = FALSE, circle.prob = 0.69, varname.size = 3, varname.adjust = 1.5, ...) { library(ggplot2) library(plyr) library(scales) library(grid) stopifnot(length(choices) == 2) nobs.factor <- sqrt(nrow(pcobj$scores) - 1) d <- sqrt(pcobj$eigenvalues) u <- sweep(pcobj$scores, 2, 1/(d * nobs.factor), FUN = "*") v <- pcobj$loadings choices <- pmin(choices, ncol(u)) df.u <- as.data.frame(sweep(u[, choices], 2, d[choices]^obs.scale, FUN = "*")) v <- sweep(v, 2, d^var.scale, FUN = "*") df.v <- as.data.frame(v[, choices]) names(df.u) <- c("xvar", "yvar") names(df.v) <- names(df.u) if (pc.biplot) { df.u <- df.u * nobs.factor } r <- sqrt(qchisq(circle.prob, df = 2)) * prod(colMeans(df.u^2))^(1/4) v.scale <- rowSums(v^2) df.v <- r * df.v/sqrt(max(v.scale)) if (obs.scale == 0) { u.axis.labs <- paste("standardized PC", choices, sep = "") } else { u.axis.labs <- paste("PC", choices, sep = "") } u.axis.labs <- paste(u.axis.labs, sprintf("(%0.1f%% explained var.)", 100 * pcobj$eigenvalues[choices]/sum(pcobj$eigenvalues))) if (!is.null(labels)) { df.u$labels <- labels } if (!is.null(groups)) { df.u$groups <- groups } df.v$varname <- rownames(v) df.v$angle <- with(df.v, (180/pi) * atan(yvar/xvar)) df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar))/2) g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) + xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal() if (var.axes) { if (circle) { theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta)) g <- g + geom_path(data = circle, color = muted("white"), size = 1/2, alpha = 1/3) } g <- g + geom_segment(data = df.v, aes(x = 0, y = 0, xend = xvar, yend = yvar), arrow = arrow(length = unit(1/2, "picas")), color = muted("red")) } if (!is.null(df.u$labels)) { if (!is.null(df.u$groups)) { g <- g + geom_text(aes(label = labels, color = groups), size = labels.size) } else { g <- g + geom_text(aes(label = labels), size = labels.size) } } else { if (!is.null(df.u$groups)) { g <- g + geom_point(aes(color = groups), alpha = alpha) } else { g <- g + geom_point(alpha = alpha) } } if (!is.null(df.u$groups) && ellipse) { theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) circle <- cbind(cos(theta), sin(theta)) ell <- ddply(df.u, "groups", function(x) { if (nrow(x) <= 2) { return(NULL) } sigma <- var(cbind(x$xvar, x$yvar)) mu <- c(mean(x$xvar), mean(x$yvar)) ed <- sqrt(qchisq(ellipse.prob, df = 2)) data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = "+"), groups = x$groups[1]) }) names(ell)[1:2] <- c("xvar", "yvar") g <- g + geom_path(data = ell, aes(color = groups, group = groups)) } if (var.axes) { g <- g + geom_text(data = df.v, aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust), color = "darkred", size = varname.size) } return(g) } #============================================================= # PCA # Testing the influence of ontogeny on appositional laminarity # in hindlimb elements #============================================================= GR_hind.df = data.frame( element=factor(rep(c("Femur","TBT"), c(8,8))), specimen=factor(rep(c("15","1c","17","14b","16","2a","21","23"),2)), mass=rep(c(0.74,0.94,1.53,4.73,6.85,11.13,28.9,29.4), 2), age=rep(c(2.3,2.4,4.6,8.1,12,15.9,48,60.1), 2), GR=c(130.2,73.3,162.6,101.1,38.3,29.4,6.4,5.9, 62.3,53.8,99.1,68.5,41.5,29.2,4.9,3.8), LI=c(0.034,0.139,0.016,0.139,0.476,0.559,0.289,0.506, 0.016,0.120,0.013,0.140,0.187,0.350,0.387,0.585)) #--------------------------------- # Testing for normality (skewness) #--------------------------------- hist(GR_hind.df$mass) qqPlot(GR_hind.df$mass) shapiro.test(GR_hind.df$mass) # p = 0.0006 suggests data are not normal hist(GR_hind.df$age) qqPlot(GR_hind.df$age) shapiro.test(GR_hind.df$age) # p = 0.0004 suggests data are not normal hist(GR_hind.df$GR) qqPlot(GR_hind.df$GR) shapiro.test(GR_hind.df$GR) # p = 0.1624 suggests data are normal hist(GR_hind.df$LI) qqPlot(GR_hind.df$LI) shapiro.test(GR_hind.df$LI) # p = 0.0618 suggests data are normal #----------- # Robust PCA #----------- GR.hind.pca <- robpca(RobScale(GR_hind.df[,3:5], center=TRUE, scale= TRUE), k=3, skew=TRUE) GR.hind.pca$eigenvalues # eigenvalues sqrt(GR.hind.pca$eigenvalues) # Standard dev c(GR.hind.pca$eigenvalues/sum(GR.hind.pca$eigenvalues)) # Propor of Var cumsum(GR.hind.pca$eigenvalues/sum(GR.hind.pca$eigenvalues)) # Cum Proportion GR.hind.pca$loadings[,2] <- -1*GR.hind.pca$loadings[,2] # eigenvectors GR.hind.pca$loadings*sqrt(GR.hind.pca$eigenvalues) # loadings GR.hind.pca$scores[,2] <- -1*GR.hind.pca$scores[,2] # scores (Z) #========= # Plot PCA #========= fem.color <- rgb(77/255,175/255,74/255, 1) tbt.color <- rgb(55/255,126/255,184/255,1) hum.color <- rgb(152/255,78/255,163/255,1) ulna.color <- rgb(228/255,26/255,28/255,1) rad.color <- rgb(255/255,127/255,0/255,1) dev.new(width=4, height=3) par(mar=c(3,3.9,1,0.5)+0.1) ggbiplot.robpca(GR.hind.pca, obs.scale=1, var.scale=1, groups=GR_hind.df$element, scale=1, circle=FALSE, varname.size=3.5) + xlim(-3,4.5) + ylim(-2.2,2.2) + theme_bw() + geom_point(aes(colour=GR_hind.df$element, shape=GR_hind.df$element, fill=GR_hind.df$element), size = 3) + scale_shape_manual(values=c(24,22), name="", labels=c("Femur", "TBT")) + scale_color_manual(values=c("black","black"), name="", labels=c("Femur", "TBT")) + scale_fill_manual(values=c(fem.color,tbt.color), name="", labels=c("Femur", "TBT")) + labs( x=paste("PC1 (",round(GR.hind.pca$eigenvalues[1]/ sum(GR.hind.pca$eigenvalues)*100,0),"%) - Ontogenetic Effects", sep = ""), y=paste("PC2 (",round(GR.hind.pca$eigenvalues[2]/ sum(GR.hind.pca$eigenvalues)*100,0),"%) - Growth Rate", sep = "")) + theme(legend.position=c(0.88,0.17), legend.title=element_blank(), legend.background=element_rect(color = "black", fill = "white", size=0.5, linetype = "solid")) #---------------------------------------- # Beta regression of principal components #---------------------------------------- GR_hind.df$PC1 <- GR.hind.pca$scores[,1] GR_hind.df$PC2 <- GR.hind.pca$scores[,2] PCR.GR.hind.betareg1 <- gamlss(LI ~ PC1 + re(random=~1|specimen), family = BE, data=GR_hind.df, trace = F) summary(PCR.GR.hind.betareg1) AIC(PCR.GR.hind.betareg1, c=TRUE) # AICc Rsq(PCR.GR.hind.betareg1) # pseudo R2 PCR.GR.hind.betareg2 <- gamlss(LI ~ PC1 + element + re(random=~1|specimen), family = BE, data=GR_hind.df, trace = F) summary(PCR.GR.hind.betareg2) AIC(PCR.GR.hind.betareg2, c=TRUE) # AICc Rsq(PCR.GR.hind.betareg2) # pseudo R2 # Calculate model coefficients in terms of the original variables hind.beta1 <- as.matrix(GR.hind.pca$loadings[,1]) %*% as.vector(coef(PCR.GR.hind.betareg1)[2]) #--------------------------------- # Beta regression plot #--------------------------------- single.logit.model <- function(x,B0,B1){ y=exp(B0+B1*x)/(1+exp(B0+B1*x)) } dev.new(width=4, height=4) Fig5 <- ggplot(GR_hind.df, aes(y=LI, x=PC1, shape=element, fill=element)) + xlim(min(GR_hind.df$PC1), max(GR_hind.df$PC1)) + ylim(0, max(GR_hind.df$LI)) + theme_bw() + stat_function(fun = single.logit.model, args=list(B0=coef(PCR.GR.hind.betareg1)[1], B1=coef(PCR.GR.hind.betareg1)[2]), color="black", size=1) + geom_point(color="black", size=3) + scale_shape_manual(values=c(24,22), name="") + scale_fill_manual(values=c(fem.color, tbt.color), name="", labels=c("Femur", "TBT")) + labs(x="Ontogenetic Effects (PC 1)", y="LI") + theme(legend.position=c(0.875,0.10), legend.title=element_blank(), legend.background=element_rect(color = "black", fill = "white", size=0.5, linetype = "solid")) #====================================================================== # Caudal hindlimb analysis # Load data table and rescale LI to deal with zeros #====================================================================== lam.df = data.frame( element=factor(rep(c("Femur", "TBT"), c(7,5))), specimen=c("1c","17","14b","16","2a","21","23", "17","14b","16","2a","21"), mass=c(0.94,1.53,4.73,6.85,11.13,28.9,29.4, 1.53,4.73,6.85,11.13,28.9), age=c(2.4,4.6,8.1,12,15.9,48,60.1, 4.6,8.1,12,15.9,48), strain=c(308,1503,997,1491,1620,1657,2283, 1397,261,947,293,1318), GR=c(58.7,197.0,132.9,60.4,41.8,9.5,9.7, 104.2,109.0,55.5,28.0,5.0), LI=c(0.211,0,0.046,0.264,0.452,0.146,0.527, 0,0.060,0.219,0.590,0.545)) # Rescale LI to lie in the (0,1) interval, shrinking the effective interval # to (0.005, 0.995) to avoid zeros and ones # y' = [y(N-1) + 0.5]/N # (Smithson and Verkuilen 2006: Psychological Methods) lam.df$LI <- round((lam.df$LI*(12-1)+0.5)/12, 3) #--------------------------------- # Testing for normality (skewness) #--------------------------------- hist(lam.df$mass) qqPlot(lam.df$mass) shapiro.test(lam.df$mass) # p = 0.005 suggests data are not normal hist(lam.df$age) qqPlot(lam.df$age) shapiro.test(lam.df$age) # p = 0.004 suggests data are not normal hist(lam.df$strain) qqPlot(lam.df$strain) shapiro.test(lam.df$strain) # p = 0.2597 suggests data are normal hist(lam.df$GR) qqPlot(lam.df$GR) shapiro.test(lam.df$GR) # p = 0.1749 suggests data are normal hist(lam.df$LI) qqPlot(lam.df$LI) shapiro.test(lam.df$LI) # p = 0.1159 suggests data are normal #------------------------------ # Robust PCA of caudal hindlimb #------------------------------ LI.pca <- robpca(RobScale(lam.df[,3:6], center=TRUE, scale= TRUE), k=4, skew=TRUE) LI.pca$eigenvalues # eigenvalues sqrt(LI.pca$eigenvalues) # Standard dev c(LI.pca$eigenvalues/sum(LI.pca$eigenvalues)) # Proportion of Var cumsum(LI.pca$eigenvalues/sum(LI.pca$eigenvalues)) # Cumulative Proportion LI.pca$loadings[,2] <- -1*LI.pca$loadings[,2] # eigenvectors (T) LI.pca$loadings[,4] <- -1*LI.pca$loadings[,4] LI.pca$loadings*LI.pca$sdev # loadings cor(RobScale(lam.df[,3:6],center=TRUE,scale=TRUE)) # correlations LI.pca$scores[,2] <- -1*LI.pca$scores[,2] # scores (Z) LI.pca$scores[,4] <- -1*LI.pca$scores[,4] #--------- # Plot PCA #--------- dev.new(width=4, height=3) ggbiplot.robpca(LI.pca, obs.scale=1, var.scale=1, groups=lam.df$element, scale=1, circle=FALSE, varname.size=3.5) + ylim(-2.7, 2.7) + theme_bw() + geom_point(aes(colour=lam.df$element, shape=lam.df$element, fill=lam.df$element), size = 3) + scale_shape_manual(values=c(24,22), name="", labels=c("Femur", "TBT")) + scale_color_manual(values=c("black","black"), name="", labels=c("Femur", "TBT")) + scale_fill_manual(values=c(fem.color,tbt.color), name="", labels=c("Femur", "TBT")) + labs( x=paste("PC1 (",round(LI.pca$eigenvalues[1]/ sum(LI.pca$eigenvalues)*100,0),"%) - Ontogenetic Effects", sep = ""), y=paste("PC2 (",round(LI.pca$eigenvalues[2]/ sum(LI.pca$eigenvalues)*100,0),"%) - Loading Effects", sep = "")) + theme(legend.position=c(0.88,0.16), legend.title=element_blank(), legend.background=element_rect(color = "black", fill = "white", size=0.5, linetype = "solid")) #---------------------------------------- # Beta regression of principal components #---------------------------------------- lam.df$PC1 <- LI.pca$scores[,1] lam.df$PC2 <- LI.pca$scores[,2] PCR.betareg1 <- gamlss(LI ~ PC1 + re(random=~1|specimen), family = BE, data=lam.df, trace = F) summary(PCR.betareg1) AIC(PCR.betareg1, c=TRUE) # AICc Rsq(PCR.betareg1) # pseudo R2 PCR.betareg2 <- gamlss(LI ~ PC2 + re(random=~1|specimen), family = BE, data=lam.df, trace = F) summary(PCR.betareg2) AIC(PCR.betareg2, c=TRUE) # AICc Rsq(PCR.betareg2) # pseudo R2 PCR.betareg3 <- gamlss(LI ~ PC1 + PC2 + re(random=~1|specimen), family = BE, data=lam.df, trace = F) summary(PCR.betareg3) AIC(PCR.betareg3, c=TRUE) # AICc Rsq(PCR.betareg3) # pseudo R2 PCR.betareg4 <- gamlss(LI ~ PC1 + element + re(random=~1|specimen), family = BE, data=lam.df, trace = F) summary(PCR.betareg4) AIC(PCR.betareg4, c=TRUE) # AICc Rsq(PCR.betareg4) # pseudo R2 PCR.betareg5 <- gamlss(LI ~ PC2 + element + re(random=~1|specimen), family = BE, data=lam.df, trace = F) summary(PCR.betareg5) AIC(PCR.betareg5, c=TRUE) # AICc Rsq(PCR.betareg5) # pseudo R2 PCR.betareg6 <- gamlss(LI ~ PC1 + PC2 + element + re(random=~1|specimen), family = BE, data=lam.df, trace = F) summary(PCR.betareg6) AIC(PCR.betareg6, c=TRUE) # AICc Rsq(PCR.betareg6) # pseudo R2 # Calculate model coefficients in terms of the original variables caud.beta <- as.matrix(LI.pca$loadings[,1]) %*% as.vector(coef(PCR.betareg1)[2]) #----------------------------------------------------------------- # Scatterplot of beta regression: caudal hindlimb #----------------------------------------------------------------- dev.new(width=4, height=4) ggplot(lam.df, aes(y=LI, x=PC1, shape=element, fill=element)) + xlim(min(lam.df$PC1), max(lam.df$PC1)) + ylim(0, max(lam.df$LI)) + theme_bw() + stat_function(fun = single.logit.model, args=list(B0=coef(PCR.betareg1)[1], B1=coef(PCR.betareg1)[2]), color="black", size=1) + geom_point(color="black", size=3) + scale_shape_manual(values=c(24,22), name="") + scale_fill_manual(values=c(fem.color, tbt.color), name="", labels=c("Femur", "TBT")) + labs(x="Ontogenetic Effects (PC 1)", y="Rescaled LI") + theme(legend.position=c(0.875,0.10), legend.title=element_blank(), legend.background=element_rect(color = "black", fill = "white", size=0.5, linetype = "solid")) #============ # PCA on wing #============ GR_wing.df = data.frame( element=factor(rep(c("Humerus","Ulna","Radius"), c(8,8,8))), specimen=rep(c("15","1c","17","14b","16","2a","21","23"),3), mass=rep(c(0.74,0.94,1.53,4.73,6.85,11.13,28.9,29.4), 3), age=rep(c(2.3,2.4,4.6,8.1,12,15.9,48,60.1), 3), GR=c(25.4,16.2,25.2,23.8,12.2,11.1,14.6,1.7, 6.8,11.6,11.5,14.4,11.8,3.4,2.2,1.6, 15.2,9.5,9.4,14.1,8.7,2.4,1.3,1.7), LI=c(0.207,0.133,0.289,0.333,0.577,0.455,0.217,0.000, 0.143,0.000,0.083,0.190,0.324,0.250,0.400,0.000, 0.000,0.111,0.111,0.150,0.273,0.000,0.000,0.000)) # Rescale LI to lie in the (0,1) interval, shrinking the effective interval # to (0.005, 0.995) to avoid zeros and ones # y' = [y(N-1) + 0.5]/N # (Smithson and Verkuilen 2006: Psychological Methods) GR_wing.df$LI <- round((GR_wing.df$LI*(24-1)+0.5)/24, 3) #--------------------------------- # Testing for normality (skewness) #--------------------------------- hist(GR_wing.df$mass) qqPlot(GR_wing.df$mass) shapiro.test(GR_wing.df$mass) # p = 4.02e-5 suggests data are not normal hist(GR_wing.df$age) qqPlot(GR_wing.df$age) shapiro.test(GR_wing.df$age) # p = 2.887e-05 suggests data are not normal hist(GR_wing.df$GR) qqPlot(GR_wing.df$GR) shapiro.test(GR_wing.df$GR) # p = 0.03815 suggests data are not normal hist(GR_wing.df$LI) qqPlot(GR_wing.df$LI) shapiro.test(GR_wing.df$LI) # p = 0.04023 suggests data are not normal #----------- # Robust PCA #----------- GR.wing.pca <- robpca(RobScale(GR_wing.df[,3:5], center=TRUE, scale= TRUE), k=3, skew=TRUE) GR.wing.pca$eigenvalues # eigenvalues sqrt(GR.wing.pca$eigenvalues) # Standard dev c(GR.wing.pca$eigenvalues/sum(GR.wing.pca$eigenvalues)) # Propor of Var cumsum(GR.wing.pca$eigenvalues/sum(GR.wing.pca$eigenvalues)) # Cum Proportion GR.wing.pca$loadings[,2] <- -1*GR.wing.pca$loadings[,2] # eigenvectors GR.wing.pca$loadings*sqrt(GR.wing.pca$eigenvalues) # loadings GR.wing.pca$scores[,2] <- -1*GR.wing.pca$scores[,2] # scores (Z) dev.new(width=4, height=3.5) ggbiplot.robpca(GR.wing.pca, obs.scale=1, var.scale=1, groups=GR_wing.df$element, scale=1, circle=FALSE, varname.size=3.5) + xlim(-3,5) + ylim(-3,3) + theme_bw() + geom_point(aes(colour=GR_wing.df$element, shape=GR_wing.df$element, fill=GR_wing.df$element), size = 3) + scale_shape_manual(values=c(24,22,21), name="", labels=c("Humerus", "Ulna", "Radius")) + scale_color_manual(values=c("black","black","black"), name="", labels=c("Humerus", "Ulna", "Radius")) + scale_fill_manual(values=c(hum.color, ulna.color, rad.color), name="", labels=c("Humerus", "Ulna", "Radius")) + labs( x=paste("PC1 (",round(GR.wing.pca$eigenvalues[1]/ sum(GR.wing.pca$eigenvalues)*100,0),"%) - Ontogenetic Effects", sep = ""), y=paste("PC2 (",round(GR.wing.pca$eigenvalues[2]/ sum(GR.wing.pca$eigenvalues)*100,0),"%) - Growth Rate", sep = "")) + theme(legend.position=c(0.85,0.19), legend.title=element_blank(), legend.background=element_rect(color = "black", fill = "white", size=0.5, linetype = "solid")) #---------------------------------------- # Beta regression of principal components #---------------------------------------- GR_wing.df$PC1 <- GR.wing.pca$scores[,1] GR_wing.df$PC2 <- GR.wing.pca$scores[,2] PCR.GR.wing.betareg1 <- gamlss(LI ~ PC1 + re(random=~1|specimen), family = BE, data=GR_wing.df, trace = F) summary(PCR.GR.wing.betareg1) AIC(PCR.GR.wing.betareg1, c=TRUE) # AICc Rsq(PCR.GR.wing.betareg1) # pseudo R2 PCR.GR.wing.betareg2 <- gamlss(LI ~ PC1 + element + re(random=~1|specimen), family = BE, data=GR_wing.df, trace = F) summary(PCR.GR.wing.betareg2) AIC(PCR.GR.wing.betareg2, c=TRUE) # AICc Rsq(PCR.GR.wing.betareg2) # pseudo R2 # Calculate model coefficients in terms of the original variables wing.beta <- as.matrix(GR.wing.pca$loadings[,1]) %*% as.vector(coef(PCR.GR.wing.betareg1)[2]) #--------------------------------- # Beta regression plot #--------------------------------- dev.new(width=4, height=4) ggplot(GR_wing.df, aes(y=LI, x=PC1, shape=element, fill=element)) + xlim(min(GR_wing.df$PC1), max(GR_wing.df$PC1)) + ylim(0, max(GR_wing.df$LI)) + theme_bw() + stat_function(fun = single.logit.model, args=list(B0=coef(PCR.GR.wing.betareg1)[1], B1=coef(PCR.GR.wing.betareg1)[2]), color="black", size=1) + geom_point(color="black", size=3) + scale_shape_manual(values=c(24,21,22), name="") + scale_fill_manual(values=c(hum.color, rad.color, ulna.color), name="", labels=c("Humerus", "Radius", "Ulna")) + labs(x="Ontogenetic Effects (PC 1)", y="Rescaled LI") + theme(legend.position=c(0.85,0.86), legend.title=element_blank(), legend.background=element_rect(color = "black", fill = "white", size=0.5, linetype = "solid"))