# This script was written for data analysis reported in the manuscript: # Freshwater trematodes differ from marine trematodes in patterns connected with # Division of Labor # By Allison T Neal, Moira Stettner, Renytzabelle Ortega-Cotto, Daniel Dieringer, # and Lydia Reed # Submitted to PeerJ in October 2023 # The script was written by Allison Neal and is provided to allow easy replication # and critical evaluation of the analysis performed. The script author does not # claim to be an efficient or expert R coder, so please excuse any inefficiencies # in the code. # This script imports data and cleans it up. It does not perform any analysis, but # all other scripts that perform analysis on this data use this as a starting point. ### MetaData ### # Trematodes in the accompanying data file were collected from Vermont snails # in summers 2019 and 2021 by Allison Neal and her students (2019: Mo Stettner, Mary # Nsubuga, Kathryn Farnum, Dan Dieringer [October], Caleb Scully [October] # 2021: Lydia Reed, Reine Gibb and Macall Meslin [one trip]). Details on snail # collections may be found in databooks kept by Allison Neal # In the data set, columns are: # snail- unique identifier assigned to infected snails; letter indicates snail family # H- Hydrobiidae # P- Planorbidae # Ph- Physidae # V- Viviparidae # redia- identifier for redia within a snail and body section (location); corresponds to # photos. # Numbers with letters only appended (e.g. 'a','b','c') indicate that there were more rediae # present in the photo than indicated in the photo's label; these are separate rediae # Numbers with 'm10x'appended indicate that there was a duplicate photo take of the redia # at a higher magnification (100x [10x objective] vs. 40x); these are duplicate photos. # Measurements taken at the higher magnification are likely more accurate, so only those # will be retained in the final analysis (see Clean Up code below) # Numbers beginning with 'C2r' were taken on a different microscope (and computer- computer 2) # These are unique redia numbers. # Snail V172 has three rediae named A, B, C. These are unique. I believe they were collected # during a different stage of the dissection than rediae 1-10. They could be included in analysis, but # none of the rediae in this infection are really a random or thorough sample (see Neal lab notebook, # from July 15, 2021) # Similarly, snail V182 has rediae with names starting with a, b, c, d. I believe they were also collected # during a different stage of the dissection. The numbers indicate the multiple rediae in photos a, b, c # and d. Not random, or thoroughly sampled, but unique. # Snail P153 has two extra measurements (rediae are labeled 'extra1' and 'extra2') that were taken # from photos of attack trials. These should not be included in the general analysis for this infection # and will be excluded when this infection is analyzed. # measurer- researcher who performed the measurements recorded. # MS - Moira Stettner # ATN- Allison T Neal # Note: there was a slight difference in the measurements of these two researchers, likely # attributed to slight deviations in the calibration of their software. # For a sample of 50 redia measured by both researchers, Neal's measurements were consistently # around 2-6% higher. No correction was deemed necessary as no individual infection was measured # partially by each researcher. # length- length (micrometers) measured from photos using a segmented line in SPOT (MS) or ImageJ # software (ATN). # width- length (micrometers) at widest point on main body (i.e. not an appendage/collar) # measured from photos in SPOT (MS) or ImageJ software (ATN). # pharynx- width (micrometers) at widest point of pharynx measured from photos in SPOT (MS) or # ImageJ (ATN) # location- where in the snail the sample of redia were taken from. Entries are: # foot- snail head/foot and mantle # mid- section between foot and gonad/digestive # gonad- posterior end of snail containing gonad/digestive organs # general or gonad/Mid or mid/Foot- body sections (or specified body sections) were not separated # Slide1 or Slide 2- body sections were not separated, but two samples were taken and numbered independently # (i.e. same as general, but separate designations were maintained for easy reference to photos) # notes- notes recorded by measurer; some describe redia that may be damaged or show interesting features; # relevant notes to consider for analysis may include: # flat/flattened- slide may have been prepared with too little water; measurements maybe inconsistent # with other rediae from this snail. # unreliable- measurements that were judged unreliable by the measurer # L- length # W- width # P- pharynx # U- unknown (some samples were measured before we started specifying which measurements # were unreliable; all are now recorded as unknown, which should probably mean all measurements # are excluded for these samples) # magnification- some samples may have the magnification recorded (esp. if they're not 40x, which is # most typical) ######################################################################################################### ### Import Data and Clean Up ### data<-read.csv('/Users/aneal1/OneDrive - Norwich University/2_Research/1_Trematode Research/Div of Labor/Analysis/Redial Sizes/DOLVTsizes.csv',head=T,sep=',') # Snail should be a factor variable data$snail<-as.factor(data$snail) # Remove duplicate rediae (a few rediae were measured from multiple photos with different magnification) data$sample<-paste(data$snail,data$location,data$redia,sep='_') # Generates what should be unique sample numbers for every redia (there should be no rediae with the same snail, redia and location values combined) length(data$sample)==length(unique(data$sample)) # Checks if all samples are unique; they are grep('m',data$redia) # Identify photos with different magnification; for all samples, duplicate measurement is one row up (I checked) dup<-grep('m',data$redia)-1 # indices for the values I want removed data<-subset(data,!(1:dim(data)[1] %in% dup)) # Look at ranges of length, width and height to make sure ranges make sense and no values were entered incorrectly (enough to be outside reasonable bounds) summary(data$length) summary(data$width) summary(data$pharynx) data[order(data$pharynx),] # to see smallest/largest # Note about pharynx measurements: # It appears that Mo may have not always measured the longer of the two # dimensions on the pharynges. Correcting this would require me to repeat # all of Mo's measurements, which might itself introduce issues because there # seems to be a slight discrepancy in the calibration of my software and hers # (so I might actually have to repeat ALL of her measurements). I think it's ok. # Mo was consistent with how she took measurements at least within # a given infection (that seems to be reflected in a subset of the measurements # I did go back and try to repeat- the relative sizes are mostly fine), so # comparisions within a given infection (which is most of the analysis) should # be ok but we should proceed with caution when comparing amoung infections. # Look at how body regions were entered and fix inconsistencies table(data$location) data$location[data$location=='Foot']<-'foot' data$location[data$location=='General']<-'general' data$location[data$location=='Gonad']<-'gonad' data$location[data$location=='Mid']<-'mid' data$location[data$location=='Mid+Foot']<-'midFoot' data$location[data$location=='MidFoot']<-'midFoot' # Consider what to do with issues in notes # One issue: flattened rediae (too little water used when preparing slide) # I reviewed the notes and found that two words described these: flat and flattened # It did not appear that there were any comments indicating something was "not flat" # There were two redia that were "squished", but I'm not sure they're worth removing # For now, create variable so the flat/flattened samples can easily be removed from analysis data$flat<-"N" data$flat[grep('flat',data$notes)]<-"Y" #changes all samples with "flat" (or flattened) in the notes # Second issue: damaged; this is already recorded more succinctly in 'unreliable' column table(data$unreliable) #summary of what should be removed # Third issue: things that are marked as "not a redia" or some variation on this data$notes[grep('redia',data$notes)] # start by getting a list of all the different ways this is phrased # I will set these to having unreliable measurements (in case any measurements were recorded) # "unclear whether this is actually a redia" data$notes[grep("unclear whether this is actually a redia",data$notes)] # check to make sure this doesn't get anything unexpected data$unreliable[grep("unclear whether this is actually a redia",data$notes)]<-"LWP" # "may not be a redia" (with our without additions of "may or" or "...") data$notes[grep("may not be a redia",data$notes)] # check to make sure this doesn't get anything unexpected data$unreliable[grep("may not be a redia",data$notes)]<-"LWP" # "I'm not sure if any of these are a redia" data$notes[grep("these are a redia",data$notes)] # check to make sure this doesn't get anything unexpected data$unreliable[grep("these are a redia",data$notes)]<-"LWP" # "this is a redia" cluster (oddly, this doesn't get anything says it is a redia with confidence!) # "not clear if this is a redia" # "not sure this is a redia" # "not sure if this is a redia" # "I'm not confident this is a redia" # "I'm not positive this is a redia" # "I don't think this is a redia" # "I think this is a redia" -- sounds uncertain/unreliable # "I think this is a redia; pharynx out of focus" # "I think this is a redia, but hard to see (out of focus, light colored)" data$notes[grep("this is a redia",data$notes)] # check to make sure this doesn't get anything unexpected data$unreliable[grep("this is a redia",data$notes)]<-"LWP" # "not a redia" cluster # "I think this is a cercaria, not a redia" # "maybe not a redia" # "not a redia" # "damaged? not a redia?" data$notes[grep("not a redia",data$notes)] # check to make sure this doesn't get anything unexpected data$unreliable[grep("not a redia",data$notes)]<-"LWP" # Once all unreliable measurements have been flagged, can remove them with this: # Add columns with only reliable measurements for each measurement # Length data$lengthR<-NA all.rows<-1:dim(data)[1] unrel.length<-grep('L',data$unreliable) # unreliable because length specified as unreliable unrel.length<-c(unrel.length,grep('U',data$unreliable)) # unreliable because not specified which measurements are unreliable unrel.length<-c(unrel.length,grep('Y',data$flat)) # unreliable because they are squished/flattened rel.length<-all.rows[-unrel.length] data$lengthR[rel.length]<-data$length[rel.length] # Width data$widthR<-NA unrel.width<-grep('W',data$unreliable) # unreliable because width specified as unreliable unrel.width<-c(unrel.width,grep('U',data$unreliable)) # unreliable because not specified which measurements are unreliable unrel.width<-c(unrel.width,grep('Y',data$flat)) # unreliable because they are squished/flattened rel.width<-all.rows[-unrel.width] data$widthR[rel.width]<-data$width[rel.width] # Pharynx data$pharR<-NA unrel.phar<-grep('P',data$unreliable) # unreliable because pharynx size specified as unreliable unrel.phar<-c(unrel.phar,grep('U',data$unreliable)) # unreliable because not specified which measurements are unreliable unrel.phar<-c(unrel.phar,grep('Y',data$flat)) # unreliable because they are squished/flattened rel.phar<-all.rows[-unrel.phar] data$pharR[rel.phar]<-data$pharynx[rel.phar]