# 'data' is already a function in R, so you're overwriting it if you assign your data to it. # This assumes your current directory is the folder in which this script lives (ProjectFolder/Analysis/) # make file paths so R can read in the data from the DATA folder and write figures to the FIGURES folder setwd("~/Documents/Dropbox/periclimenes/Analysis") data_dir <- file.path("..", "Data") fig_dir <- file.path("..", "Figures") # define a function to make italic names in plots make.italic <- function(x) as.expression(lapply(x, function(y) bquote(italic(.(y))))) ################################################################################ # READ IN DATA # construct the filepath datafile <- file.path(data_dir, "panamashrimp.csv") # read in the file DAT <- read.csv(datafile) # attach it so that you can reference columns without dataframe$colname syntax attach(DAT) ################################################################################ # Summarize all columns summary(DAT) # For reference # "Bartholomea annulata", "Stichodactyla helianthus" anem_names <- c("Bartholomea annulata", "Stichodactyla helianthus") anem_names_short <- c("B. annulata", "S. helianthus") # I don't think this is necessary (after revising spreadsheet) # levels(ANEMSPECIES)[levels(ANEMSPECIES)=="cork"] <- "Bartholomea" # levels(ANEMSPECIES)[levels(ANEMSPECIES)=="sun"] <- "Stichodactyla" # Does the egg number response variable need to be transformed? boxplot(EGGNUMBER) # that is "normal enough" by my standards EGGNUMBER_t <- sqrt(EGGNUMBER) # square root transformed boxplot(EGGNUMBER_t) # this is slightly more normal pdf(file.path(fig_dir, "transformation.pdf")) boxplot( list(EGGNUMBER, EGGNUMBER_t*10), names = c("egg number", "10 * sqrt(egg number)") ) dev.off() ################################################################################ # How many shrimp were collected from each host anemone species? table(ANEMSPECIES) ################################################################################ # What was the mean group size of shrimp on each anemone species? sapply(split(GROUPSIZE, ANEMSPECIES), mean) sapply(split(GROUPSIZE, ANEMSPECIES), sd) # Is there a difference? t.test(split(GROUPSIZE, ANEMSPECIES)[[1]],split(GROUPSIZE, ANEMSPECIES)[[2]]) ################################################################################ # What was the mean number of eggs carried by shrimp on each anemone species? sapply(split(EGGNUMBER, ANEMSPECIES), mean) # What was the standard deviation of the number of eggs carried by shrimp on each anemone species? sapply(split(EGGNUMBER, ANEMSPECIES), sd) # Was there a difference in the number of eggs carried by shrimp on each host species? t.test(EGGNUMBER ~ ANEMSPECIES) t.test(EGGNUMBER_t ~ ANEMSPECIES) # transform doesn't change anything # alternate phrasing of above: The mean number of eggs per gravid shrimp was greater on Stichodactyla than Bartholomea; was that difference significant? t.test( x = EGGNUMBER[ANEMSPECIES == "Stichodactyla"], y = EGGNUMBER[ANEMSPECIES == "Bartholomea"], data = DAT, alternative = "greater" ) # plot above pdf(file.path(fig_dir, "eggs_per_host.pdf")) par(mar = c(5,4,1,1)) boxplot(EGGNUMBER ~ ANEMSPECIES, names = make.italic(anem_names), ylab = "Eggs per gravid shrimp", xlab = "Host species", text.labels = 3) box() dev.off() # legend: Number of eggs per gravid individual of Periclimenes yucatanicus found on each of two host sea anemone species. Central lines represents the median value, with boxes encompassing the interquartile range. Whiskers extend to the more central value of either the furthest data point or 1.5 times the interquartile range, and outliers are represented as dots. ################################################################################ # Was there a linear effect of carapace length on the number of eggs carried by the shrimp? lm_eggs_shrimpsize <- lm(EGGNUMBER ~ SHRIMPSIZE) summary(lm_eggs_shrimpsize) lm_eggs_t_shrimpsize <- lm(EGGNUMBER_t ~ SHRIMPSIZE) summary(lm_eggs_t_shrimpsize) # plot above pdf(file.path(fig_dir, "eggs_by_shrimpsize.pdf")) par(mar = c(5,4,1,1)) # plot(SHRIMPSIZE, EGGNUMBER, pch = c(1, 2), cex = 2, xlab = "Shrimp carapace size (mm)", ylab = "Eggs per gravid shrimp") #col = c("coral", "turquoise"), plot(SHRIMPSIZE, EGGNUMBER, pch = c(1, 2)[as.numeric(ANEMSPECIES)], cex = 2, xlab = "Shrimp carapace size (mm)", ylab = "Eggs per gravid shrimp" ) #col = c("coral", "turquoise"), legend( x = "topleft", legend = anem_names, cex = 2, text.font = 3, pch = c(1, 2), title = "Host species", bty = "n") #col = c("coral", "turquoise"), dev.off() # Caption: Eggs per gravid female Periclimenes yucatanicus, plotted against # plot residuals of linear regression pdf(file.path(fig_dir, "eggs_by_shrimpsize_resid.pdf")) par(mar = c(5,4,1,1)) qqnorm(resid(lm_eggs_shrimpsize)); qqline(resid(lm_eggs_shrimpsize)) dev.off() ################################################################################ # Was there a combined linear effect of carapace length and host anemone size on the number of eggs carried by the shrimp? lm_eggs_shrimpsize_anemsize <- lm(EGGNUMBER ~ ANEMSIZE*SHRIMPSIZE) summary(lm_eggs_shrimpsize_anemsize) lm_eggs_t_shrimpsize_anemsize <- lm(EGGNUMBER_t ~ ANEMSIZE*SHRIMPSIZE) summary(lm_eggs_t_shrimpsize_anemsize) # Conclusion: # There was a significant combined effect of shrimp size and anemone size on egg number, irrespective of host sea anemone species F(3,62) = 39.89; p < 0.0001; R_adj^2 = 0.6422. # Not sure the best way to plot the above... # pdf(file.path(fig_dir, "eggs_by_shrimpsize-anemsize.pdf")) # par(mar = c(5,4,1,1)) # plot(EGGNUMBER ~ SHRIMPSIZE) # pch = c(1, 2)[as.numeric(ANEMSPECIES)], # cex = 2, # xlab = "Shrimp carapace size (mm)", # ylab = "Eggs per gravid shrimp" # ) #col = c("coral", "turquoise"), # # plot(EGGNUMBER ~ SHRIMPSIZE*ANEMSIZE) # legend( # x = "topleft", # legend = anem_names, # cex = 2, # text.font = 3, # pch = c(1, 2), # title = "Host species", # bty = "n") #col = c("coral", "turquoise"), # dev.off() # Caption: Eggs per gravid female Periclimenes yucatanicus, plotted against ################################################################################ # What was the mean body size of each anemone species? sapply(split(ANEMSIZE, ANEMSPECIES), mean) # What was the standard deviation of the body size of each anemone species? sapply(split(ANEMSIZE, ANEMSPECIES), sd) # Was there a difference in the body size of the two anemone species? t.test(ANEMSIZE ~ ANEMSPECIES, data = DAT) # plot above boxplot(split(ANEMSIZE, ANEMSPECIES), names = anem_names, ylab = "Anemone body size (cm)") ################################################################################ # What was the mean body size of shrimp on the two anemone species? sapply(split(SHRIMPSIZE, ANEMSPECIES), mean) # What was the standard deviation of the size of shrimp on each anemone species? sapply(split(SHRIMPSIZE, ANEMSPECIES), sd) # Was there a difference in the body size of the two anemone species? t.test(SHRIMPSIZE ~ ANEMSPECIES, data = DAT) # plot above boxplot(split(SHRIMPSIZE, ANEMSPECIES), names = anem_names, ylab = "Shrimp carapace length (mm)") ################################################################################ # Is there a linear effect of anemone size on shrimp size? lm_shrimpsize_anemsize <- lm(SHRIMPSIZE ~ ANEMSIZE) summary(lm_shrimpsize_anemsize) pdf(file = file.path(fig_dir, "shrimpsize_anemsize.pdf")) par(mar = c(5,4,1,1)) plot(SHRIMPSIZE ~ ANEMSIZE, xlab = "Anemone diameter (cm)", ylab = "Shrimp carapace size (mm)", pch = c(1, 2)[as.numeric(ANEMSPECIES)], cex = 2, ylim = c(5, max(SHRIMPSIZE)), xlim = c(5, max(ANEMSIZE)) ) abline( lm(SHRIMPSIZE ~ ANEMSIZE), lty = 2 ) legend( x = "bottomright", legend = anem_names, cex = 1, text.font = 3, pch = c(1, 2), title = "Host species", bty = 1) #col = c("coral", "turquoise"), dev.off() ################################################################################ # Is there an effect of anemone species on shrimp size, when controlling for anemone size? lm_shrimpsize_anemsize_anemspecies <- lm(SHRIMPSIZE ~ ANEMSPECIES*ANEMSIZE) summary(lm_shrimpsize_anemsize_anemspecies) anova(lm_shrimpsize_anemsize_anemspecies) # NO! # Alternate; this is the same as the p value for the interaction term in the above model mod1 <- lm(SHRIMPSIZE ~ ANEMSPECIES + ANEMSIZE) mod2 <- lm(SHRIMPSIZE ~ ANEMSPECIES * ANEMSIZE) anova(mod1, mod2) ################################################################################ # Is there an effect of anemone species on shrimp size when controlling for anemone size? # This is the ancova ("full model") aov_shrimpsize_anemsp_anemsize_full <- aov(SHRIMPSIZE ~ ANEMSPECIES*ANEMSIZE) summary(aov_shrimpsize_anemsp_anemsize_full) # There is a significant effect of anemone species and anemone size, but no interaction between them. # Thus, there is no significant difference in the slope of the relationship between anemone size and shrimp size for each of the two anemone species. # We can conclude that shrimp body size can be explained by anemone size, and differs among anemones only insofar as the two host species differ in size. # This is the anova ("reduced model") aov_shrimpsize_anemsp_anemsize_red <- aov(SHRIMPSIZE ~ ANEMSPECIES+ANEMSIZE) summary(aov_shrimpsize_anemsp_anemsize_red) ################################################################################ # Is there an effect of anemone species on egg number when controlling for anemone size? # (NOTE -- dropped the 'AND shrimp size?') lm_egg_anemspp_shrimpsize <- lm(EGGNUMBER ~ ANEMSPECIES * SHRIMPSIZE) summary(lm_egg_anemspp_shrimpsize) anova(lm_egg_anemspp_shrimpsize) lm_egg_t_anemspp_shrimpsize <- lm(EGGNUMBER_t ~ ANEMSPECIES * SHRIMPSIZE) summary(lm_egg_t_anemspp_shrimpsize) anova(lm_egg_t_anemspp_shrimpsize) # The (Intercept) Estimate is the estimate for the first factor ANEMSPECIEScork # The intercept for ANEMSPECIEScork is: # The p value of ANEMSPECIESsun is 0.992; this indicates there is no significant difference between the intercepts for the two species # Because the interaction (ANEMSPECIES:SHRIMPSIZE) is not significant, we use a simpler (additive) model lm_egg_anemspp_shrimpsize_add <- lm(EGGNUMBER ~ ANEMSPECIES + SHRIMPSIZE) summary(lm_egg_anemspp_shrimpsize_add) anova(lm_egg_anemspp_shrimpsize_add) # change order lm_egg_shrimpsize_anemspp_add <- lm(EGGNUMBER ~ SHRIMPSIZE + ANEMSPECIES) summary(lm_egg_shrimpsize_anemspp_add) anova(lm_egg_shrimpsize_anemspp_add) ################################################################################ ################################################################################ ################################################################################ ################################################################################ ################################################################################ # Original code below: #qplot(ANEMSPECIES,EGGNUMBER,data=DAT, geom="boxplot") # qplot(SHRIMPSIZE,EGGNUMBER,data=DAT, colour=ANEMSPECIES, shape=ANEMSPECIES) # variables were reversed from what I'd expect. summary(lm(EGGNUMBER~SHRIMPSIZE)) qplot(ANEMSIZE,SHRIMPSIZE,data=DAT, colour=ANEMSPECIES, shape=ANEMSPECIES) mod1= aov(EGGNUMBER~ANEMSIZE*ANEMSPECIES) summary(mod1) mod2= aov(EGGNUMBER~ANEMSIZE+ANEMSPECIES) summary(mod2) anova(mod1,mod2) mod3=aov(SHRIMPSIZE~ANEMSIZE*ANEMSPECIES) summary(mod3) qplot(ANEMSPECIES,ANEMSIZE,data=DAT,geom="boxplot") ancova=qplot(ANEMSIZE,SHRIMPSIZE,data=DAT, colour=ANEMSPECIES, shape=ANEMSPECIES) ancova=ancova+stat_smooth(method=lm,se=FALSE,fullrange=TRUE) ancova sun <- DAT[DAT$ANEMSPECIES=='sun',] reg1 <- lm(SHRIMPSIZE~ANEMSIZE, data=sun) reg1 summary(reg1) cork <- DAT[DAT$ANEMSPECIES=='cork',] reg2 <- lm(SHRIMPSIZE~ANEMSIZE, data=cork) reg2 summary(reg2) reg3 <- lm(EGGNUMBER~ANEMSIZE, data=sun) reg3 summary(reg3) reg4 <- lm(EGGNUMBER~ANEMSIZE, data=cork) reg4 summary(reg4)