# This script was written for data analysis reported in the manuscript: # Freshwater trematodes differ from marine trematodes in patterns connected with # Division of Labor # By Allison T Neal, Moira Stettner, Renytzabelle Ortega-Cotto, Daniel Dieringer, # and Lydia Reed # Submitted to PeerJ in October 2023 # The script was written by Allison Neal and is provided to allow easy replication # and critical evaluation of the analysis performed. The script author does not # claim to be an efficient or expert R coder, so please excuse any inefficiencies # in the code. # This script tests whether relative pharynx size differs between small and large rediae. # Rediae are first split into two groups (big, small) based on one of two criteria: # 1) Breaks in a histogram of volumes (when bimodal) # 2) Quantiles (top 25% vs bottom 25%; middle 50% excluded; used when not bimodal) # Pharynx size was then compared among big vs. small rediae ### Import data (see referenced script file for more information about data) source('/Users/aneal1/OneDrive - Norwich University/2_Research/1_Trematode Research/Div of Labor/Analysis/Redial Sizes/DOLVTsizesImportCleanUp.R') ### Designate which redia are 'large' vs. 'small' using volume # Will use breaks in distribution of redia volume; would first need to calculate # the volume: # Approximate volume of redia using pi*r^2*h data$vol<-pi*(data$width/2)^2*data$length data$log.vol<-log(data$vol,10) data$volR<-pi*(data$widthR/2)^2*data$lengthR data$log.volR<-log(data$volR,10) # Based on analysis in DOLVTvolume.R, I visually inspected histograms and identified the # low points/'breaks' between two peaks for any infections showing significant # bimodality. These 'breaks' are listed below. # Create an empty variable for 'size' (big vs. small) based on volume breaks data$size.vol<-NA # Designate rediae as small vs. large based on the breaks in the distribution of volume # (described above) data$size.vol[data$snail=='H107' & data$log.volR<6.2]<-'small' data$size.vol[data$snail=='H107' & data$log.volR>6.4]<-'big' data$size.vol[data$snail=='H127' & data$log.volR<6]<-'small' data$size.vol[data$snail=='H127' & data$log.volR>6.5]<-'big' data$size.vol[data$snail=='H148' & data$log.volR<6.5]<-'small' data$size.vol[data$snail=='H148' & data$log.volR>7]<-'big' data$size.vol[data$snail=='P112' & data$log.volR<6]<-'small' data$size.vol[data$snail=='P112' & data$log.volR>6.5]<-'big' data$size.vol[data$snail=='P113' & data$log.volR<6]<-'small' data$size.vol[data$snail=='P113' & data$log.volR>6.5]<-'big' data$size.vol[data$snail=='P116' & data$log.volR<6.2]<-'small' data$size.vol[data$snail=='P116' & data$log.volR>6.6]<-'big' data$size.vol[data$snail=='P119' & data$log.volR<7]<-'small' data$size.vol[data$snail=='P119' & data$log.volR>7.2]<-'big' data$size.vol[data$snail=='P120' & data$log.volR<5.5]<-'small' data$size.vol[data$snail=='P120' & data$log.volR>6]<-'big' data$size.vol[data$snail=='P126' & data$log.volR<6]<-'small' data$size.vol[data$snail=='P126' & data$log.volR>6.4]<-'big' data$size.vol[data$snail=='P128' & data$log.volR<6]<-'small' data$size.vol[data$snail=='P128' & data$log.volR>6.5]<-'big' data$size.vol[data$snail=='P130' & data$log.volR<6]<-'small' data$size.vol[data$snail=='P130' & data$log.volR>6]<-'big' data$size.vol[data$snail=='P131' & data$log.volR<7.1]<-'small' data$size.vol[data$snail=='P131' & data$log.volR>7.2]<-'big' data$size.vol[data$snail=='P136' & data$log.volR<5.5]<-'small' data$size.vol[data$snail=='P136' & data$log.volR>6]<-'big' data$size.vol[data$snail=='P137' & data$log.volR<6.2]<-'small' data$size.vol[data$snail=='P137' & data$log.volR>6.8]<-'big' data$size.vol[data$snail=='P150' & data$log.volR<6.5]<-'small' data$size.vol[data$snail=='P150' & data$log.volR>7.5]<-'big' data$size.vol[data$snail=='P152' & data$log.volR<6.8]<-'small' data$size.vol[data$snail=='P152' & data$log.volR>7.2]<-'big' data$size.vol[data$snail=='P153' & data$log.volR<6.8]<-'small' data$size.vol[data$snail=='P153' & data$log.volR>7]<-'big' data$size.vol[data$snail=='P155' & data$log.volR<6.5]<-'small' data$size.vol[data$snail=='P155' & data$log.volR>7]<-'big' data$size.vol[data$snail=='P166' & data$log.volR<5.5]<-'small' data$size.vol[data$snail=='P166' & data$log.volR>6]<-'big' data$size.vol[data$snail=='P177' & data$log.volR<6.5]<-'small' data$size.vol[data$snail=='P177' & data$log.volR>7]<-'big' data$size.vol[data$snail=='P178' & data$log.volR<6.4]<-'small' data$size.vol[data$snail=='P178' & data$log.volR>6.6]<-'big' data$size.vol[data$snail=='Ph116' & data$log.volR<6]<-'small' data$size.vol[data$snail=='Ph116' & data$log.volR>6.5]<-'big' data$size.vol[data$snail=='Ph126' & data$log.volR<6.4]<-'small' data$size.vol[data$snail=='Ph126' & data$log.volR>6.6]<-'big' data$size.vol[data$snail=='Ph95' & data$log.volR<6.8]<-'small' data$size.vol[data$snail=='Ph95' & data$log.volR>7]<-'big' ### Make a table counting how many small vs. large there are for each snail # List snails snails<-levels(data$snail) # Create empty vectors to store counts n.big<-rep(NA,length(snails)) n.small<-rep(NA,length(snails)) # Perform counts for the rediae from each snail for(i in 1:length(snails)){ n.big[i]<-length(which(data$snail==snails[i] & data$size.vol=='big')) n.small[i]<-length(which(data$snail==snails[i] & data$size.vol=='small')) } # Combine into one data frame cbind(snails,n.small,n.big) ### Designate which redia are 'large' vs. 'small' using volume quantile ### # First, calculate quantiles for each infection # Create empty vectors to list quantile cutoffs for each infection q25<-rep(NA,length(snails)) q75<-rep(NA,length(snails)) # Populate vectors with calculated quantile cutoffs for(i in 1:length(snails)){ q25[i]<-quantile(data$volR[data$snail==snails[i]],na.rm=T)[2] q75[i]<-quantile(data$volR[data$snail==snails[i]],na.rm=T)[4] } # Next, make a new column in the data table listing the quantiles # Create empty columns data$volr.q25<-NA data$volr.q75<-NA # Populate columns using the quantile cutoffs from above for(i in 1:length(data$snail)){ # for every redia entry ind<-which(snails==data$snail[i]) data$volr.q25[i]<-q25[ind] data$volr.q75[i]<-q75[ind] } # Classify any redia smaller than 25th quantile as small; any larger than 75th as big # Create empty column to store size designation data$size.q<-NA # Populate size designations data$size.q[data$volRdata$volr.q75]<-'big' ### Assign rediae a single size classification based on one of the two methods above # Redia will be classified as "large" vs. "small" based on break in volume distribution # if available. Otherwise, based on quantile. # Populate size column with size designation based on histogram breaks data$size <- data$size.vol # If there are no designations based on histogram breaks (i.e. b/c the volume distribution # is not bimodal), record designations based on quantile for(i in 1:length(data$sample)){ if(is.na(data$size.vol[i])){ data$size[i] <- data$size.q[i] } } ################################################################################## ### Compare pharynx sizes for large vs. small rediae # Body volume has already been calculated. # Calculate approximate volume of pharynx assuming a spherical pharynx: 4/3*pi*r^3 data$pvol<-4/3*pi*(data$pharynx/2)^3 data$log.pvol<-log(data$pvol,10) data$pvolR<-4/3*pi*(data$pharR/2)^3 data$log.pvolR<-log(data$pvolR,10) # Calculate relative volume data$rel.pvolR<-data$pvolR/data$volR # Create empty data frame to record results from statistical analyses comparing # pharynx volume of large vs. small rediae results<-data.frame(matrix(data=NA, nrow = length(snails), ncol = 11)) colnames(results)<-c('snail', # Columns names for relative pharynx size (p value, number of big rediae, number of small rediae # median relative pharynx size for big vs. small rediae) 'rel.p', 'rel.big.n', 'rel.sm.n', 'rel.median.big', 'rel.median.sm', # Column names for absolute pharynx size (p value, number of big rediae, number of small rediae # median absolute pharynx size for big vs. small rediae) 'pvol.p', 'pvol.big.n', 'pvol.sm.n', 'pvol.median.big', 'pvol.median.sm') # Add snail numbers to results dataframe results$snail<-snails # Convert size to factor variable for analysis data$size<-as.factor(data$size) # Perform analysis comparing pharynx size of small vs. big rediae and calculate summary statistics for(i in 1:length(snails)){ temp<-data[data$snail==snails[i],] # calculate sample size for small and big rediae results$pvol.big.n[i]<-length(which(!is.na(temp$pvolR[temp$size=='big']))) results$pvol.sm.n[i]<-length(which(!is.na(temp$pvolR[temp$size=='small']))) results$rel.big.n[i]<-length(which(!is.na(temp$rel.pvolR[temp$size=='big']))) results$rel.sm.n[i]<-length(which(!is.na(temp$rel.pvolR[temp$size=='small']))) # run statistical test and calculate summary statistics for relative sizes if(results$rel.big.n[i]>0 & results$rel.sm.n[i]>0){ results$rel.p[i]<-wilcox.test(rel.pvolR ~ size, data=temp,na.rm=T,alternate = "less")[3] results$rel.median.big[i]<-median(temp$rel.pvolR[temp$size=='big'],na.rm=T) results$rel.median.sm[i]<-median(temp$rel.pvolR[temp$size=='small'],na.rm=T) } # do the same for the absolute pharynx sizes if(results$pvol.big.n[i]>0 & results$pvol.sm.n[i]>0){ results$pvol.p[i]<-wilcox.test(pvolR ~ size, data=temp,na.rm=T,alternate = "greater")[3] results$pvol.median.big[i]<-median(temp$pvolR[temp$size=='big'],na.rm=T) results$pvol.median.sm[i]<-median(temp$pvolR[temp$size=='small'],na.rm=T) } } results ################################################################################ ### Get size ranges for large vs small redia for use in activity analysis # (width was not recorded for rediae in the activity analysis, so # it is not possible to use volume cutoffs; instead, we can look at the # range of lengths for each size of rediae) # Create empty data frame size.ranges<-data.frame(matrix(data=NA, nrow = length(snails), ncol = 5)) # Name columns colnames(size.ranges)<-c('snail', 'sm.min', 'sm.max', 'lg.min', 'lg.max') # Add snail numbers size.ranges$snail<-snails # Determine redia length ranges for rediae classified as small vs. large for(i in 1:length(snails)){ temp<-data[data$snail==snails[i],] if(temp$snail[1]!="H109"){ # avoids an infection that did not have its redia classified as small vs. large # calculate minimum and maximum length for each size class size.ranges$sm.min[i]<-min(temp$lengthR[temp$size == 'small'],na.rm = T) size.ranges$sm.max[i]<-max(temp$lengthR[temp$size == 'small'],na.rm = T) size.ranges$lg.min[i]<-min(temp$lengthR[temp$size == 'big'],na.rm = T) size.ranges$lg.max[i]<-max(temp$lengthR[temp$size == 'big'],na.rm = T) } }