#======================================================================= # Using PCA and beta regression to assess effect of mass, age, Zp on # laminarity in bones of growing pigeons # Citation: McGuire, Ourfalian, Ezell, Lee (in prep) #======================================================================= install.packages('DescTools') # install.packages('gamlss') # install install.packages('rospca', dependencies=TRUE) # these install.packages('devtools') # packages library(devtools) # first install_github("vqv/ggbiplot", force=TRUE) # library(DescTools) # For robust centering and scaling library(rospca) # Robust PCA library(ggbiplot) # pretty PCA plotting library(car) # pretty Q-Q plots to assess data normality library(gamlss) # zero-inflated beta regression set.seed(1001) # enable reproducibility # Multiple plot function # # ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) # - cols: Number of columns in layout # - layout: A matrix specifying the layout. If present, 'cols' is ignored. # # If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), # then plot 1 will go in the upper left, 2 will go in the upper right, and # 3 will go all the way across the bottom. # multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { library(grid) # Make a list from the ... arguments and plotlist plots <- c(list(...), plotlist) numPlots = length(plots) # If layout is NULL, then use 'cols' to determine layout if (is.null(layout)) { # Make the panel # ncol: Number of columns of plots # nrow: Number of rows needed, calculated from # of cols layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols, nrow = ceiling(numPlots/cols)) } if (numPlots==1) { print(plots[[1]]) } else { # Set up the page grid.newpage() pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) # Make each plot, in the correct location for (i in 1:numPlots) { # Get the i,j matrix positions of the regions that contain this subplot matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col)) } } } #============================================================= # PCA # Testing the influence of ontogeny on appositional laminarity # in INDIVIDUAL elements #============================================================= hum.histo.df = data.frame( spec=factor(c("258","270","271","272","269","273","276","275","274","256","257","254","255")), mass=c(242,209,209,341,372,314,360,364,374,455,498,482,563), length=c(38.3,39.9,41.1,47.4,47.8,47.5,46.7,47.6,49.9,49.2,50.9,50.0,51.0), Zp=c(4.058,4.576,5.341,10.893,12.124,11.293,12.818,13.649,13.123,11.510,14.294,16.442,17.822), LI=c(0.348,0.378,0.486,0.202,0.082,0.189,0.126,0.159,0.182,0.124,0.140,0.158,0.122), LI.A=c(0.0459,0.0229,0.0292,0.0148,0.0089,0.0110,0.0037,0.0072,0.0087,0.0039,0.0049,0.0048,0.0033)) rad.histo.df = data.frame( spec=factor(c("258","270","271","272","269","273","276","275","274","256","257","254","255")), mass=c(242,209,209,341,372,314,360,364,374,455,498,482,563), length=c(40.3,41.1,42.4,49.0,50.3,49.5,48.3,53.4,50.6,55.3,52.4,54.0,52.3), Zp=c(0.546,0.734,0.878,1.281,1.501,1.380,1.709,2.158,1.481,1.845,2.205,2.266,2.516), LI=c(0.108,0.088,0.129,0.067,0.039,0.087,0.029,0.034,0.079,0.024,0.063,0.029,0.096), LI.A=c(0.0250,0.0077,0.0046,0.0033,0.0038,0.0042,0.0014,0.0030,0.0039,0.0008,0.0022,0.0009,0.0030)) ulna.histo.df = data.frame( spec=factor(c("258","270","271","272","269","273","276","275","274","256","257","254","255")), mass=c(242,209,209,341,372,314,360,364,374,455,498,482,563), length=c(42.8,45.4,46.2,54.1,54.5,54.5,54.1,54.9,56.9,56.4,56.3,58.0,56.2), Zp=c(2.348,2.933,3.518,5.340,6.496,5.758,6.090,7.029,5.649,7.016,7.058,7.797,8.053), LI=c(0.311,0.281,0.332,0.142,0.075,0.143,0.088,0.130,0.094,0.086,0.106,0.110,0.103), LI.A=c(0.0599,0.0151,0.0225,0.0094,0.0106,0.0101,0.0042,0.0104,0.0047,0.0028,0.0039,0.0044,0.0030)) fem.histo.df = data.frame( spec=factor(c("258","270","271","272","269","273","276","275","274","256","257","254","255")), mass=c(242,209,209,341,372,314,360,364,374,455,498,482,563), length=c(35.2,35.7,35.4,42.5,42.4,44.1,41.7,41.5,44.3,44.4,45.0,45.0,45.8), Zp=c(2.072,2.139,2.294,4.904,4.783,4.334,5.265,5.162,5.282,5.544,6.196,7.558,7.668), LI=c(0.240,0.321,0.299,0.120,0.124,0.132,0.197,0.160,0.199,0.121,0.111,0.064,0.063), LI.A=c(0.0551,0.0152,0.0110,0.0058,0.0105,0.0084,0.0085,0.0086,0.0055,0.0023,0.0035,0.0013,0.0020)) tbt.histo.df = data.frame( spec=factor(c("258","270","271","272","269","273","276","275","274","256","257","254","255")), mass=c(242,209,209,341,372,314,360,364,374,455,498,482,563), length=c(46.6,48.4,47.6,59.5,59.9,59.3,59.7,64.3,61.2,60.5,62.7,61.8,63.6), Zp=c(1.804,1.713,1.992,3.222,4.023,3.261,4.225,5.311,3.545,4.095,4.759,5.681,6.444), CSA=c(2.059,1.635,1.904,3.300,3.302,3.135,3.64,4.179,3.123,3.359,3.881,4.851,4.873), LI=c(0.106,0.135,0.084,0.034,0.037,0.077,0.033,0.061,0.071,0.033,0.062,0.052,0.042), LI.A=c(0.0232,0.0061,0.0025,0.0015,0.0019,0.0048,0.0013,0.0021,0.0019,0.0008,0.0016,0.0009,0.0007)) #----------- # Robust PCA #----------- hum.histo.pca <- robpca(RobScale(hum.histo.df[,2:4], center=TRUE, scale= TRUE), k=3, skew=TRUE) hum.histo.pca$eigenvalues # eigenvalues sqrt(hum.histo.pca$eigenvalues) # Standard dev c(hum.histo.pca$eigenvalues/sum(hum.histo.pca$eigenvalues)) # Propor of Var cumsum(hum.histo.pca$eigenvalues/sum(hum.histo.pca$eigenvalues)) # Cum Proportion hum.histo.pca$loadings[,2] <- -1*hum.histo.pca$loadings[,2] # eigenvectors hum.histo.pca$scores[,2] <- -1*hum.histo.pca$scores[,2] # scores (Z) rad.histo.pca <- robpca(RobScale(rad.histo.df[,2:4], center=TRUE, scale= TRUE), k=3, skew=TRUE) rad.histo.pca$eigenvalues # eigenvalues sqrt(rad.histo.pca$eigenvalues) # Standard dev c(rad.histo.pca$eigenvalues/sum(rad.histo.pca$eigenvalues)) # Propor of Var cumsum(rad.histo.pca$eigenvalues/sum(rad.histo.pca$eigenvalues)) # Cum Proportion rad.histo.pca$loadings[,2] <- -1*rad.histo.pca$loadings[,2] # eigenvectors rad.histo.pca$scores[,2] <- -1*rad.histo.pca$scores[,2] # scores (Z) ulna.histo.pca <- robpca(RobScale(ulna.histo.df[,2:4], center=TRUE, scale= TRUE), k=3, skew=TRUE) ulna.histo.pca$eigenvalues # eigenvalues sqrt(ulna.histo.pca$eigenvalues) # Standard dev c(ulna.histo.pca$eigenvalues/sum(ulna.histo.pca$eigenvalues)) # Propor of Var cumsum(ulna.histo.pca$eigenvalues/sum(ulna.histo.pca$eigenvalues)) # Cum Proportion ulna.histo.pca$loadings[,2] <- -1*ulna.histo.pca$loadings[,2] # eigenvectors ulna.histo.pca$scores[,2] <- -1*ulna.histo.pca$scores[,2] # scores (Z) fem.histo.pca <- robpca(RobScale(fem.histo.df[,2:4], center=TRUE, scale= TRUE), k=3, skew=TRUE) fem.histo.pca$eigenvalues # eigenvalues sqrt(fem.histo.pca$eigenvalues) # Standard dev c(fem.histo.pca$eigenvalues/sum(fem.histo.pca$eigenvalues)) # Propor of Var cumsum(fem.histo.pca$eigenvalues/sum(fem.histo.pca$eigenvalues)) # Cum Proportion fem.histo.pca$loadings[,2] <- -1*fem.histo.pca$loadings[,2] # eigenvectors fem.histo.pca$scores[,2] <- -1*fem.histo.pca$scores[,2] # scores (Z) tbt.histo.pca <- robpca(RobScale(tbt.histo.df[,2:4], center=TRUE, scale= TRUE), k=3, skew=TRUE) tbt.histo.pca$eigenvalues # eigenvalues sqrt(tbt.histo.pca$eigenvalues) # Standard dev c(tbt.histo.pca$eigenvalues/sum(tbt.histo.pca$eigenvalues)) # Propor of Var cumsum(tbt.histo.pca$eigenvalues/sum(tbt.histo.pca$eigenvalues)) # Cum Proportion tbt.histo.pca$loadings[,2] <- -1*tbt.histo.pca$loadings[,2] # eigenvectors tbt.histo.pca$scores[,2] <- -1*tbt.histo.pca$scores[,2] # scores (Z) #---------------------------------------- # Beta regression of principal components #---------------------------------------- hum.histo.df$PC1 <- hum.histo.pca$scores[,1] rad.histo.df$PC1 <- rad.histo.pca$scores[,1] ulna.histo.df$PC1 <- ulna.histo.pca$scores[,1] fem.histo.df$PC1 <- fem.histo.pca$scores[,1] tbt.histo.df$PC1 <- tbt.histo.pca$scores[,1] PCR.hum.histo.1 <- gamlss(LI ~ PC1, family = BE, data=hum.histo.df, trace = F) summary(PCR.hum.histo.1) shapiro.test(PCR.hum.histo.1$resid) # residuals are normal AIC(PCR.hum.histo.1, c=TRUE) # AICc Rsq(PCR.hum.histo.1) # pseudo R2 PCR.rad.histo.1 <- gamlss(LI ~ PC1, family = BE, data=rad.histo.df, trace = F) summary(PCR.rad.histo.1) shapiro.test(PCR.rad.histo.1$resid) # residuals are normal AIC(PCR.rad.histo.1, c=TRUE) # AICc Rsq(PCR.rad.histo.1) # pseudo R2 PCR.ulna.histo.1 <- gamlss(LI ~ PC1, family = BE, data=ulna.histo.df, trace = F) summary(PCR.ulna.histo.1) shapiro.test(PCR.ulna.histo.1$resid) # residuals are normal AIC(PCR.ulna.histo.1, c=TRUE) # AICc Rsq(PCR.ulna.histo.1) # pseudo R2 PCR.fem.histo.1 <- gamlss(LI ~ PC1, family = BE, data=fem.histo.df, trace = F) summary(PCR.fem.histo.1) shapiro.test(PCR.fem.histo.1$resid) # residuals are normal AIC(PCR.fem.histo.1, c=TRUE) # AICc Rsq(PCR.fem.histo.1) # pseudo R2 PCR.tbt.histo.1 <- gamlss(LI ~ PC1, family = BE, data=tbt.histo.df, trace = F) summary(PCR.tbt.histo.1) shapiro.test(PCR.tbt.histo.1$resid) # residuals are normal AIC(PCR.tbt.histo.1, c=TRUE) # AICc Rsq(PCR.tbt.histo.1) # pseudo R2 #----------------------------------------------------------------- # Calculate model coefficients in terms of the original variables # Model 1 #----------------------------------------------------------------- orig.beta <- as.matrix(hum.histo.pca$loadings[,1]) %*% as.vector(coef(PCR.hum.histo.1)[2]) orig.rad <- as.matrix(rad.histo.pca$loadings[,1]) %*% as.vector(coef(PCR.rad.histo.1)[2]) orig.ulna <- as.matrix(ulna.histo.pca$loadings[,1]) %*% as.vector(coef(PCR.ulna.histo.1)[2]) orig.fem <- as.matrix(fem.histo.pca$loadings[,1]) %*% as.vector(coef(PCR.fem.histo.1)[2]) orig.tbt <- as.matrix(tbt.histo.pca$loadings[,1]) %*% as.vector(coef(PCR.tbt.histo.1)[2]) #=========================================================================== # Calculate 95% confidence bands of curve using non-parametric bootstrapping #=========================================================================== # Bootstrap confidence bands function np.conf.bands <- function(new.x, B, pred, alpha=0.05){ pred.combine <- do.call(rbind, pred) conf.bands <- cbind(new.x, t(apply(pred.combine, 2, quantile, probs=c(alpha/2, 1-alpha/2), type=6))) colnames(conf.bands)[2] <- "lower.band" colnames(conf.bands)[3] <- "upper.band" return(conf.bands) } #--------------------- # Initialize bootstrap #--------------------- B <- 5000 LI.model <- LI ~ PC1 logit.model <- function(x,B0,B1){ y=exp(B0+B1*x)/ (1+exp(B0+B1*x)) } #---------------------------------------------------- # Resample, bootstrap, and calculate confidence bands #---------------------------------------------------- # Humerus hum.interpolate.PC1 <- data.frame(PC1=round(seq( from=min(hum.histo.df$PC1), to=max(hum.histo.df$PC1), length.out=100),7)) hum.histo.res <- replicate(B, hum.histo.df[sample(1:nrow(hum.histo.df),replace=T),],simplify=F) hum.histo.boot <- list() hum.histo.pred.res <- list() for (i in 1:B){ hum.histo.boot[[i]] <- gamlss(LI.model, data=hum.histo.res[[i]], family = BE, trace = F) hum.histo.pred.res[[i]] <- logit.model(hum.interpolate.PC1$PC1, coef(hum.histo.boot[[i]])[1], coef(hum.histo.boot[[i]])[2]) } hum.histo.conf.bands <- np.conf.bands(new.x=hum.interpolate.PC1, B, pred=hum.histo.pred.res) hum.band.poly <- data.frame( bandx = c(hum.histo.conf.bands[,1], rev(hum.histo.conf.bands[,1])), bandy = c(hum.histo.conf.bands[,2], rev(hum.histo.conf.bands[,3]))) # Ulna ulna.interpolate.PC1 <- data.frame(PC1=round(seq( from=min(ulna.histo.df$PC1), to=max(ulna.histo.df$PC1), length.out=100),7)) ulna.histo.res <- replicate(B, ulna.histo.df[sample(1:nrow(ulna.histo.df),replace=T),],simplify=F) ulna.histo.boot <- list() ulna.histo.pred.res <- list() for (i in 1:B){ ulna.histo.boot[[i]] <- gamlss(LI.model, data=ulna.histo.res[[i]], family = BE, trace = F) ulna.histo.pred.res[[i]] <- logit.model(ulna.interpolate.PC1$PC1, coef(ulna.histo.boot[[i]])[1], coef(ulna.histo.boot[[i]])[2]) } ulna.histo.conf.bands <- np.conf.bands(new.x=ulna.interpolate.PC1, B, pred=ulna.histo.pred.res) ulna.band.poly <- data.frame( bandx = c(ulna.histo.conf.bands[,1], rev(ulna.histo.conf.bands[,1])), bandy = c(ulna.histo.conf.bands[,2], rev(ulna.histo.conf.bands[,3]))) # Radius rad.interpolate.PC1 <- data.frame(PC1=round(seq( from=min(rad.histo.df$PC1), to=max(rad.histo.df$PC1), length.out=100),7)) rad.histo.res <- replicate(B, rad.histo.df[sample(1:nrow(rad.histo.df),replace=T),],simplify=F) rad.histo.boot <- list() rad.histo.pred.res <- list() for (i in 1:B){ rad.histo.boot[[i]] <- gamlss(LI.model, data=rad.histo.res[[i]], family = BE, trace = F) rad.histo.pred.res[[i]] <- logit.model(rad.interpolate.PC1$PC1, coef(rad.histo.boot[[i]])[1], coef(rad.histo.boot[[i]])[2]) } rad.histo.conf.bands <- np.conf.bands(new.x=rad.interpolate.PC1, B, pred=rad.histo.pred.res) rad.band.poly <- data.frame( bandx = c(rad.histo.conf.bands[,1], rev(rad.histo.conf.bands[,1])), bandy = c(rad.histo.conf.bands[,2], rev(rad.histo.conf.bands[,3]))) # Femur fem.interpolate.PC1 <- data.frame(PC1=round(seq( from=min(fem.histo.df$PC1), to=max(fem.histo.df$PC1), length.out=100),7)) fem.histo.res <- replicate(B, fem.histo.df[sample(1:nrow(fem.histo.df),replace=T),],simplify=F) fem.histo.boot <- list() fem.histo.pred.res <- list() for (i in 1:B){ fem.histo.boot[[i]] <- gamlss(LI.model, data=fem.histo.res[[i]], family = BE, trace = F) fem.histo.pred.res[[i]] <- logit.model(fem.interpolate.PC1$PC1, coef(fem.histo.boot[[i]])[1], coef(fem.histo.boot[[i]])[2]) } fem.histo.conf.bands <- np.conf.bands(new.x=fem.interpolate.PC1, B, pred=fem.histo.pred.res) fem.band.poly <- data.frame( bandx = c(fem.histo.conf.bands[,1], rev(fem.histo.conf.bands[,1])), bandy = c(fem.histo.conf.bands[,2], rev(fem.histo.conf.bands[,3]))) # TBT tbt.interpolate.PC1 <- data.frame(PC1=round(seq( from=min(tbt.histo.df$PC1), to=max(tbt.histo.df$PC1), length.out=100),7)) tbt.histo.res <- replicate(B, tbt.histo.df[sample(1:nrow(tbt.histo.df),replace=T),],simplify=F) tbt.histo.boot <- list() tbt.histo.pred.res <- list() for (i in 1:B){ tbt.histo.boot[[i]] <- gamlss(LI.model, data=tbt.histo.res[[i]], family = BE, trace = F) tbt.histo.pred.res[[i]] <- logit.model(tbt.interpolate.PC1$PC1, coef(tbt.histo.boot[[i]])[1], coef(tbt.histo.boot[[i]])[2]) } tbt.histo.conf.bands <- np.conf.bands(new.x=tbt.interpolate.PC1, B, pred=tbt.histo.pred.res) tbt.band.poly <- data.frame( bandx = c(tbt.histo.conf.bands[,1], rev(tbt.histo.conf.bands[,1])), bandy = c(tbt.histo.conf.bands[,2], rev(tbt.histo.conf.bands[,3]))) #----------- # Plot PCR #----------- fem.color <- c("purple") tbt.color <- c("orange") hum.color <- c("green") ulna.color <- c("deepskyblue") rad.color <- c("red") hum.plot <- ggplot(hum.histo.df, aes(y=LI, x=PC1)) + xlim(min(hum.interpolate.PC1), max(hum.histo.df$PC1)) + ylim(0,0.52) + theme_bw() + theme(legend.position="none") + geom_polygon(data=hum.band.poly, fill="green4", alpha=0.5, aes(x=bandx, y=bandy)) + geom_point(shape=24,color="black",fill=hum.color, size = 3) + stat_function(fun = logit.model, args=list(B0=coef(PCR.hum.histo.1)[1],B1=coef(PCR.hum.histo.1)[2]), color="green4", size=1) + labs(x="",y="LI") ulna.plot <- ggplot(ulna.histo.df, aes(y=LI, x=PC1)) + xlim(min(ulna.histo.df$PC1), max(ulna.histo.df$PC1)) + ylim(0,0.52) + theme_bw() + theme(legend.position="none") + geom_polygon(data=ulna.band.poly, fill="deepskyblue", alpha=0.5, aes(x=bandx, y=bandy)) + geom_point(shape=22,color="black",fill=ulna.color, size = 3) + stat_function(fun = logit.model, args=list(B0=coef(PCR.ulna.histo.1)[1],B1=coef(PCR.ulna.histo.1)[2]), color="deepskyblue", size=1) + labs(x="",y="") rad.plot <- ggplot(rad.histo.df, aes(y=LI, x=PC1)) + xlim(min(rad.interpolate.PC1), max(rad.interpolate.PC1)) + ylim(0,0.52) + theme_bw() + theme(legend.position="none") + geom_polygon(data=rad.band.poly, fill="red3", alpha=0.5, aes(x=bandx, y=bandy)) + geom_point(shape=21,color="black",fill=rad.color, size = 3) + stat_function(fun = logit.model, args=list(B0=coef(PCR.rad.histo.1)[1],B1=coef(PCR.rad.histo.1)[2]), color="red3", size=1) + labs(x="Ontogenetic Axis (PC 1)",y="") fem.plot <- ggplot(fem.histo.df, aes(y=LI, x=PC1)) + xlim(min(fem.interpolate.PC1), max(fem.interpolate.PC1)) + ylim(0,0.52) + theme_bw() + theme(legend.position="none") + geom_polygon(data=fem.band.poly, fill="purple3", alpha=0.5, aes(x=bandx, y=bandy)) + geom_point(shape=23,color="black",fill=fem.color, size = 3) + stat_function(fun = logit.model, args=list(B0=coef(PCR.fem.histo.1)[1],B1=coef(PCR.fem.histo.1)[2]), color="purple3", size=1)+ labs(x="",y="") tbt.plot <- ggplot(tbt.histo.df, aes(y=LI, x=PC1)) + xlim(min(tbt.interpolate.PC1), max(tbt.interpolate.PC1)) + ylim(0,0.52) + theme_bw() + theme(legend.position="none") + geom_polygon(data=tbt.band.poly, fill="orange4", alpha=0.5, aes(x=bandx, y=bandy)) + geom_point(shape=25,color="black",fill=tbt.color, size = 3) + stat_function(fun = logit.model, args=list(B0=coef(PCR.tbt.histo.1)[1],B1=coef(PCR.tbt.histo.1)[2]), color="orange4", size=1) + labs(x="",y="") dev.new(width=10,height=3) multiplot(hum.plot,ulna.plot,rad.plot,fem.plot,tbt.plot,cols=5)