# Maniraptoran pelvic myology



### Introduction
# Code to analyze the area of pelvic muscle origins in maniraptoran theropod dinosaurs.
# Published data avilabled in Rhodes, Henderson & Currie (2021). Maniraptoran pelvic 
# musculature highlights evolutionary patterns in theropod locomotion on the line to 
# birds. PeerJ.
# This code was modified from RMarkdown (.Rmd) to R script (.R) by M. M. Rhodes.

### IMPORTANT
# To make this code work, you must do the following:
# 1. Download Supplemental Tables 1-5 (.csv) and Supplemental Tree S1 (.nex).
# 2. Rename Supplemental Tables 1-5 to "TableS1", etc., and Supplemental Tree 
#    S1 to "TreeS1" so that the filenames match those in "Load data" below.
# 3. Set the working directory to the folder where the files are located 
#    (in "Load data", below). Ensure the filepath follows the proper syntax 
#    for your operating system.

### Libraries
install.packages("dplyr")
install.packages("pander")
install.packages("MatrixCorrelation")
install.packages("ape")
install.packages("caper")
install.packages("classInt")
install.packages("AICcmodavg")
library(dplyr)  # for data formatting
library(pander)  # for table formatting
library(MatrixCorrelation)  # for RV coefficient analysis
library(ape)  # for handling trees
library(caper)  # for Phylogenetic Generalized Least Sqaures (PGLS)
library(classInt)  # for Jenks Natural Breaks analysis
library(AICcmodavg)  # for Akaike information criterion (AIC) model comparison

### Load data
# set working directory and import data
setwd("ENTER LOCATION HERE")
TableS1 <- read.csv("TableS1.csv")
TableS2 <- read.csv("TableS2.csv")
TableS3 <- read.csv("TableS3.csv")
TableS4 <- read.csv("TableS4.csv")
TableS5 <- read.csv("TableS5.csv")
TreeS1 <- read.nexus("TreeS1.nex")



## Data analysis

### Program comparison
# Compare areas measured by Corel DRAW! (CD) and ImageJ (IJ).

# Two-sample t-tests:
# sort by taxon, maintaining order in spreadsheet
TableS1$Taxon <- factor(TableS1$Taxon,
                        levels = c("Varanus", "Alligator", "Caiman", 
                                   "Allosaurus", "Albertosaurus", 
                                   "Ornithomimidae indet.", "Falcarius", 
                                   "Caenagnathidae indet.", "Saurornitholestes", 
                                   "Sinovenator", "Troodontidae indet.", 
                                   "Struthio", "Dromaius", "Gallus")
                        )
S1_grouped <- TableS1 %>% 
              group_by(Taxon)

# t-tests
S1_taxon <- summarise(S1_grouped, t.test(CD, IJ)$estimate[1],
                      t.test(CD, IJ)$estimate[2],
                      t.test(CD, IJ)$statistic,
                      t.test(CD, IJ)$parameter,
                      t.test(CD, IJ)$p.value
                      )
S1_all <- summarise(TableS1, t.test(CD, IJ)$estimate[1],
                    t.test(CD, IJ)$estimate[2],
                    t.test(CD, IJ)$statistic,
                    t.test(CD, IJ)$parameter,
                    t.test(CD, IJ)$p.value
                    )

# arrange columns
S1_all <- cbind(Group = "All taxa", S1_all)
colnames(S1_taxon) = colnames(S1_all)
Table6 <- data.frame(rbind(S1_taxon, S1_all))
names(Table6) <- c("Group", "Mean~CD~", "Mean~IJ~", "t-value", "*df*", "p-value")

# create table
Table6[,2:3] <- format(Table6[,2:3], digits = 4, nsmall = 4)
Table6[,4] <- format(Table6[,4], digits = 2, nsmall = 4)
Table6[,5] <- format(Table6[,5], digits = 2)
Table6[,6] <- format(Table6[,6], digits = 4, nsmall = 4)
pander(Table6, caption = "Table 6 of main text.")



# RV coefficient analysis:
# read tables as a matrix
CD <- as.matrix(TableS2)
IJ <- as.matrix(TableS3)

# RV coefficient analysis
Y <- format(RV(CD,IJ), digits = 4)

# create output
RVtest <- data.frame(Y)
RVtest <- rename(RVtest, "RV" = Y)
pander(RVtest, caption = "RV coefficient.")



### Sensitivity analysis
# Compare areas of pelvic muscle origins in *Albertosaurus* from Corel DRAW! (CD) 
# and ImageJ (IJ) under varying tolerance levels (0 [default], 16, 32, 48, 64).

# One-way ANOVA:
# filter data and perform ANOVA
IJonly_ANOVA <- anova(aov(Area~Threshold, filter(TableS4, Threshold != "CD")))
All_ANOVA <- anova(aov(Area~Threshold, TableS4))

# arrange columns
ANOVA_table <- rbind(IJonly_ANOVA[,c(2,1,3:5)], All_ANOVA[,c(2,1,3:5)])
names(ANOVA_table) <- c("SS", "*df*", "MS", "F", "p-value")
row.names(ANOVA_table) <- c("Threshold~IJ_only~", "Residuals~IJ_only~", 
                            "Threshold~All~", "Residuals~All~"
                            )

# create table
ANOVA_table[,1:3] <- format(ANOVA_table[,1:3], digits = 2, nsmall = 2)
ANOVA_table[,4:5] <- format(ANOVA_table[,4:5], digits = 4, nsmall = 6)
pander(ANOVA_table, caption = "Sensitivity analysis.")



### Allometry and body mass
# Bivariate comparisons of the area of all hip muscles (HM), area of major 
# extensors (ME), and length of the ilium (IL) to body mass (BM).

# Phylogenetic Generalized Least Squares (PGLS) regressions:
# sort data by group or form
S5_Sauria <- filter(TableS5, Group1 == "Sauria")
S5_Theropoda <- filter(TableS5, Group1 == "Theropoda")
S5_Maniraptora <- filter(TableS5, Group1 == "Maniraptora")
S5_Aves <- filter(TableS5, Group1 == "Aves")
S5_na_Theropoda <- filter(TableS5, Group2 == "Nonavian_Theropoda")
S5_bipeds <- filter(TableS5, Form == "Biped")

# combine data and tree (all branches weighted equally)
Sauria_equal <- comparative.data(TreeS1, S5_Sauria, Taxon, vcv = T)
Theropoda_equal <- comparative.data(TreeS1, S5_Theropoda, Taxon, vcv = T)
Maniraptora_equal <- comparative.data(TreeS1, S5_Maniraptora, Taxon, vcv = T)
Aves_equal <- comparative.data(TreeS1, S5_Aves, Taxon, vcv = T)
na_Theropoda_equal <- comparative.data(TreeS1, S5_na_Theropoda, Taxon, vcv = T)
bipeds_equal <- comparative.data(TreeS1, S5_bipeds, Taxon, vcv = T)
all_equal <- comparative.data(TreeS1, TableS5, Taxon, vcv = T)

# PGLS regressions
pgls_HM_Sauria <- pgls(logHM ~ logBM, Sauria_equal)
pgls_HM_Theropoda <- pgls(logHM ~ logBM, Theropoda_equal)
pgls_HM_Maniraptora <- pgls(logHM ~ logBM, Maniraptora_equal)
pgls_HM_Aves <- pgls(logHM ~ logBM, Aves_equal)
pgls_HM_na_Theropoda <- pgls(logHM ~ logBM, na_Theropoda_equal)
pgls_HM_bipeds <- pgls(logHM ~ logBM, bipeds_equal)
pgls_HM_all <- pgls(logHM ~ logBM, all_equal)

pgls_ME_Sauria <- pgls(logME ~ logBM, Sauria_equal)
pgls_ME_Theropoda <- pgls(logME ~ logBM, Theropoda_equal)
pgls_ME_Maniraptora <- pgls(logME ~ logBM, Maniraptora_equal)
pgls_ME_Aves <- pgls(logME ~ logBM, Aves_equal)
pgls_ME_na_Theropoda <- pgls(logME ~ logBM, na_Theropoda_equal)
pgls_ME_bipeds <- pgls(logME ~ logBM, bipeds_equal)
pgls_ME_all <- pgls(logME ~ logBM, all_equal)

pgls_IL_Sauria <- pgls(logIL ~ logBM, Sauria_equal)
pgls_IL_Theropoda <- pgls(logIL ~ logBM, Theropoda_equal)
pgls_IL_Maniraptora <- pgls(logIL ~ logBM, Maniraptora_equal)
pgls_IL_Aves <- pgls(logIL ~ logBM, Aves_equal)
pgls_IL_na_Theropoda <- pgls(logIL ~ logBM, na_Theropoda_equal)
pgls_IL_bipeds <- pgls(logIL ~ logBM, bipeds_equal)
pgls_IL_all <- pgls(logIL ~ logBM, all_equal)

# arrange columns
summ.fn <- function(data, test){
  summarise(data, test$n, summary(test)$r.squared, summary(test)$adj.r.squared, 
            summary(test)$coef[2,1], summary(test)$coef[1,1], summary(test)$coef[2,4])
}

HM_Sauria <- summ.fn(S5_Sauria, pgls_HM_Sauria)
HM_Theropoda <- summ.fn(S5_Theropoda, pgls_HM_Theropoda)
HM_Maniraptora <- summ.fn(S5_Maniraptora, pgls_HM_Maniraptora)
HM_Aves <- summ.fn(S5_Aves, pgls_HM_Aves)
HM_na_Theropoda <- summ.fn(S5_na_Theropoda, pgls_HM_na_Theropoda)
HM_bipeds <- summ.fn(S5_bipeds, pgls_HM_bipeds)
HM_all <- summ.fn(TableS5, pgls_HM_all)

ME_Sauria <- summ.fn(S5_Sauria, pgls_ME_Sauria)
ME_Theropoda <- summ.fn(S5_Theropoda, pgls_ME_Theropoda)
ME_Maniraptora <- summ.fn(S5_Maniraptora, pgls_ME_Maniraptora)
ME_Aves <- summ.fn(S5_Aves, pgls_ME_Aves)
ME_na_Theropoda <- summ.fn(S5_na_Theropoda, pgls_ME_na_Theropoda)
ME_bipeds <- summ.fn(S5_bipeds, pgls_ME_bipeds)
ME_all <- summ.fn(TableS5, pgls_ME_all)

IL_Sauria <- summ.fn(S5_Sauria, pgls_IL_Sauria)
IL_Theropoda <- summ.fn(S5_Theropoda, pgls_IL_Theropoda)
IL_Maniraptora <- summ.fn(S5_Maniraptora, pgls_IL_Maniraptora)
IL_Aves <- summ.fn(S5_Aves, pgls_IL_Aves)
IL_na_Theropoda <- summ.fn(S5_na_Theropoda, pgls_IL_na_Theropoda)
IL_bipeds <- summ.fn(S5_bipeds, pgls_IL_bipeds)
IL_all <- summ.fn(TableS5, pgls_IL_all)

Table8 <- cbind(c("HM~Sauria~", "HM~Theropoda~", "HM~Maniraptora~", "HM~Aves~", 
                  "HM~na_Theropoda~", "HM~bipeds~", "HM~all~", 
                  "ME~Sauria~", "ME~Theropoda~", "ME~Maniraptora~", "ME~Aves~", 
                  "ME~na_Theropoda~", "ME~bipeds~", "ME~all~", 
                  "IL~Sauria~", "IL~Theropoda~", "IL~Maniraptora~", "IL~Aves~", 
                  "IL~na_Theropoda~", "IL~bipeds~", "IL~all~"), 
                data.frame(rbind(HM_Sauria, HM_Theropoda, HM_Maniraptora, 
                                 HM_Aves, HM_na_Theropoda, HM_bipeds, HM_all, 
                                 ME_Sauria, ME_Theropoda, ME_Maniraptora, 
                                 ME_Aves, ME_na_Theropoda, ME_bipeds, ME_all, 
                                 IL_Sauria, IL_Theropoda, IL_Maniraptora, 
                                 IL_Aves, IL_na_Theropoda, IL_bipeds, IL_all))
                )
names(Table8) <- c("Dependent variable", "n", "R^2^", "R^2^~adj~", "k", "b", 
                   "p-value")

# create table
Table8[,3:7] <- format(Table8[,3:7], digits = 4, nsmall = 4)
pander(Table8, caption = "Table 8 of main text.")



# Residuals (non-avian Theropoda and bipeds equations):
# retrieve values
fit_na_Theropoda <- c(pgls_HM_na_Theropoda$fitted, 0, 0, 0, 
                      pgls_ME_na_Theropoda$fitted, 0, 0, 0, 
                      pgls_IL_na_Theropoda$fitted, 0, 0, 0
                      )
res_na_Theropoda <- c(pgls_HM_na_Theropoda$phyres, 0, 0, 0, 
                      pgls_ME_na_Theropoda$phyres, 0, 0, 0, 
                      pgls_IL_na_Theropoda$phyres, 0, 0, 0
                      )
per_na_Theropoda <- (res_na_Theropoda/fit_na_Theropoda)*100

fit_bipeds <- c(pgls_HM_bipeds$fitted, pgls_ME_bipeds$fitted, 
                pgls_IL_bipeds$fitted)
res_bipeds <- c(pgls_HM_bipeds$phyres, pgls_ME_bipeds$phyres, 
                pgls_IL_bipeds$phyres)
per_bipeds <- (res_bipeds/fit_bipeds)*100

# arrange columns
dep <- c("HM~Allosaurus~", "HM~Albertosaurus~", "HM~Ornithomimidae~", "HM~Falcarius~", 
         "HM~Caenagnathidae~", "HM~Saurornitholestes~", "HM~Sinovenator~", 
         "HM~Troodontidae~", "HM~Struthio~", "HM~Dromaius~", "HM~Gallus~", 
         "ME~Allosaurus~", "ME~Albertosaurus~", "ME~Ornithomimidae~", "ME~Falcarius~", 
         "ME~Caenagnathidae~", "ME~Saurornitholestes~", "ME~Sinovenator~", 
         "ME~Troodontidae~", "ME~Struthio~", "ME~Dromaius~", "ME~Gallus~", 
         "IL~Allosaurus~", "IL~Albertosaurus~", "IL~Ornithomimidae~", "IL~Falcarius~", 
         "IL~Caenagnathidae~", "IL~Saurornitholestes~", "IL~Sinovenator~", 
         "IL~Troodontidae~", "IL~Struthio~", "IL~Dromaius~", "IL~Gallus~"
         )
Table9 <- data.frame(dep, fit_na_Theropoda, res_na_Theropoda, per_na_Theropoda, 
                     fit_bipeds, res_bipeds, per_bipeds)
Table9[Table9 == 0] <- NA
Table9[Table9 == "NaN"] <- NA
names(Table9) <- c("Dependent variable", "Fitted~na_Theropoda~", "Residual", 
                   "% fitted", "Fitted~bipeds~", "Residual", "% fitted"
                   )

# create table
Table9[,3] <- format(Table9[,3], digits = 2, nsmall = 4)
Table9[,6] <- format(Table9[,6], digits = 2, nsmall = 4)
Table9[,2:7] <- format(Table9[,2:7], digits = 4, nsmall = 4)
pander(Table9, caption = "Table 9 of main text.")



### Cursoriality categories
# Jenks Natural Breaks optimization on the proportion of the area of major 
# extensors (ME) to the area of all hip muscles (HM), and on "average" cursoriality.

# Jenks Natural Breaks (%ME):
# calculate proportions
Prop <- c((10^TableS5$logME)/(10^TableS5$logHM))

# run Jenks Natural Breaks
JenksME4 <- classIntervals(Prop, 4, style = "jenks")
JenksME5 <- classIntervals(Prop, 5, style = "jenks")
JenksME4$brks[1] <- 0
JenksME5$brks[1] <- 0

# get counts
count.fn <- function(data, k, class){
  sum(
    data > k$brks[class] & data <= k$brks[class+1],
    na.rm = T
  )
}

# arrange columns
Class <- c("1", "2", "3", "4", "5", "GVF")
ME4 <- c(JenksME4$brks[2:5], 0, 0)
ME5 <- c(JenksME5$brks[2:6], 0)
nME4 <- c(count.fn(Prop, JenksME4, 1), count.fn(Prop, JenksME4, 2), 
          count.fn(Prop, JenksME4, 3), count.fn(Prop, JenksME4, 4), 
          0, jenks.tests(JenksME4)[2]
          )
nME5 <- c(count.fn(Prop, JenksME5, 1), count.fn(Prop, JenksME5, 2), 
          count.fn(Prop, JenksME5, 3), count.fn(Prop, JenksME5, 4), 
          count.fn(Prop, JenksME5, 5), jenks.tests(JenksME5)[2]
          )
Jenks_table <- data.frame(Class, ME4, nME4, ME5, nME5)
Jenks_table[Jenks_table == 0] <- NA
Jenks_table <- rename(Jenks_table, "Upper limit~4~" = ME4, "n~4~" = nME4, 
                      "Upper limit~5~" = ME5, "n~5~" = nME5)

# create output
pander(Jenks_table, caption = "Cursoriality categories (%ME, metric~classes~).")

# By taxon (%ME):
# arrange data
ME <- data.frame(TableS5$Taxon, findCols(JenksME4), findCols(JenksME5))
colnames(ME) <- c("Taxon", "ME~4~", "ME~5~")

# create output
pander(ME, caption = "Cursoriality categories (%ME~class~).")

# Jenks Natural Breaks ("average" cursoriality):
# calculate "average" score (shown in Fig. 14B-C)
avg <- data.frame(c(1, 1, 1, 1, 1), c(1, 1, 1, 1, 1), c(1, 1, 1, 1, 1), 
                  c(5, 3, 2, 3, 4), c(5, 5, 4, 3, 5), c(5, 5, 4, 3, 5), 
                  c(2, 3, 2, 3, 4), c(3, 5, 4, 3, 4), c(3, 3, 2, 4, 2), 
                  c(2, 3, 3, 4, 3), c(4, 5, 4, 4, 3), c(5, 5, 5, 4, 3), 
                  c(5, 5, 5, 3, 5), c(2, 3, 5, 3, 5), 
                  row.names = c("Major extensors", "Leg proportions", 
                                "Ankle joint/fusion", "Functional digits", 
                                "Foot symmetry")
                  )
colnames(avg) <- TableS5$Taxon
avg2 <- summarize_all(avg, mean)
avg2 <- as.numeric(avg2)

# run Jenks Natural Breaks
JenksAVG4 <- classIntervals(avg2, 4, style = "jenks")
JenksAVG5 <- classIntervals(avg2, 5, style = "jenks")
JenksAVG4$brks[1] <- 0
JenksAVG5$brks[1] <- 0

# get counts
count.fn <- function(data, k, class){
  sum(
    data > k$brks[class] & data <= k$brks[class+1],
    na.rm = T
  )
}

# arrange columns
Class <- c("1", "2", "3", "4", "5", "GVF")
AVG4 <- c(JenksAVG4$brks[2:5], 0, 0)
AVG5 <- c(JenksAVG5$brks[2:6], 0)
nAVG4 <- c(count.fn(avg2, JenksAVG4, 1), count.fn(avg2, JenksAVG4, 2), 
           count.fn(avg2, JenksAVG4, 3), count.fn(avg2, JenksAVG4, 4), 
           0, jenks.tests(JenksAVG4)[2]
           )
nAVG5 <- c(count.fn(avg2, JenksAVG5, 1), count.fn(avg2, JenksAVG5, 2), 
           count.fn(avg2, JenksAVG5, 3), count.fn(avg2, JenksAVG5, 4), 
           count.fn(avg2, JenksAVG5, 5), jenks.tests(JenksAVG5)[2]
           )
Jenks_table2 <- data.frame(Class, AVG4, nAVG4, AVG5, nAVG5)
Jenks_table2[Jenks_table2 == 0] <- NA
Jenks_table2 <- rename(Jenks_table2, "Upper limit~4~" = AVG4, "n~4~" = nAVG4, 
                      "Upper limit~5~" = AVG5, "n~5~" = nAVG5)

# create output
pander(Jenks_table2, caption = "Cursoriality categories (average, metric~classes~).")

# By taxon ("average" cursoriality):
# arrange data
AVG <- data.frame(TableS5$Taxon, findCols(JenksAVG4), findCols(JenksAVG5))
colnames(AVG) <- c("Taxon", "AVG~4~", "AVG~5~")

# create output
pander(AVG, caption = "Cursoriality categories (average~class~).")

# Akaike information criterion (AIC) and small-sample corrected (AICc) tests for 
# model comparison of Jenks Natural Breaks using 4 or 5 classes.

# AIC and AICc (%ME):
# perform AIC test
AIC_ME <- AIC(JenksME4, JenksME5)

# perform AICc test
AICc_ME4 <- (AIC_ME$AIC[1] + ((2*AIC_ME$df[1]*(AIC_ME$df[1] + 1)) / 
                                (length(JenksME4$var) - AIC_ME$df[1] - 1)))
AICc_ME5 <- (AIC_ME$AIC[2] + ((2*AIC_ME$df[2]*(AIC_ME$df[2] + 1)) / 
                                (length(JenksME5$var) - AIC_ME$df[2] - 1)))
AICc_ME <- c(AICc_ME4, AICc_ME5)

# calculate delta values
dAIC_ME <- c(AIC_ME$AIC[1] - min(AIC_ME$AIC), AIC_ME$AIC[2] - min(AIC_ME$AIC))
dAICc_ME <- c(AICc_ME[1] - min(AICc_ME), AICc_ME[2] - min(AICc_ME))

# arrange columns
noME <- c(length(JenksME4$var), length(JenksME5$var))
AIC_ME_table <- cbind(noME, AIC_ME, deltaAIC_ME, AICc_ME, deltaAICc_ME)
names(AIC_ME_table) <- c("n", "df", "AIC", "dAIC", "AIC~C~", "dAIC~C~")

# create table
pander(AIC_ME_table, caption = "AIC and AICc tests (%ME).")

# AIC and AICc (average):
# perform AIC test
AIC_AVG <- AIC(JenksAVG4, JenksAVG5)

# perform AICc test
AICc_AVG4 <- (AIC_AVG$AIC[1] + ((2*AIC_AVG$df[1]*(AIC_AVG$df[1] + 1)) / 
                                (length(JenksAVG4$var) - AIC_AVG$df[1] - 1)))
AICc_AVG5 <- (AIC_AVG$AIC[2] + ((2*AIC_AVG$df[2]*(AIC_AVG$df[2] + 1)) / 
                                (length(JenksAVG5$var) - AIC_AVG$df[2] - 1)))
AICc_AVG <- c(AICc_AVG4, AICc_AVG5)

# calculate delta values
dAIC_AVG <- c(AIC_AVG$AIC[1] - min(AIC_AVG$AIC), AIC_AVG$AIC[2] - min(AIC_AVG$AIC))
dAICc_AVG <- c(AICc_AVG[1] - min(AICc_AVG), AICc_AVG[2] - min(AICc_AVG))

# arrange columns
noAVG <- c(length(JenksAVG4$var), length(JenksAVG5$var))
AIC_AVG_table <- cbind(noAVG, AIC_AVG, dAIC_AVG, AICc_AVG, dAICc_AVG)
names(AIC_AVG_table) <- c("n", "df", "AIC", "deltaAIC", "AIC~C~", "deltaAIC~C~")

# create table
pander(AIC_AVG_table, caption = "AIC and AICc tests (average).")