# This script was written for data analysis reported in the manuscript:
# Freshwater trematodes differ from marine trematodes in patterns connected with 
#   Division of Labor
# By Allison T Neal, Moira Stettner, Renytzabelle Ortega-Cotto, Daniel Dieringer,
#   and Lydia Reed
# Submitted to PeerJ in October 2023
# The script was written by Allison Neal and is provided to allow easy replication
#   and critical evaluation of the analysis performed.  The script author does not
#   claim to be an efficient or expert R coder, so please excuse any inefficiencies 
#   in the code.

# The goal of this script is to determine whether the distribution of small vs. 
#   large rediae differs among different parts of the snail

### First, import data (see referenced script file for more information about data)
# This script creates a data frame that includes average sizes (lengthR, widthR, volR) and 
#  size designations for the redia measured for this study.

# There are three different size designations:
#   - size.vol classifies rediae as small vs. large based on breaks in the distribution of volumes (histogram)
#   - size.q classifies rediae based on quartiles (small in first quartile, large in 4th)
#   - size matches size.vol for any rediae with  values; for infections without bimodal volume, uses size.q

# There is also information in the data dataframe about the 'location' from which the sample was collected
#   (gonad, mid, foot, general i.e. not separated)

# Note: the script generates warnings because there are some ties in median for a Wilcox test 
#   that is run in th main pharynx analysis (not needed for this analysis) 
#   and because there are some infections without enough data to calculate the minimum
#   or maximum redia sizes (also not needed for this analysis)

source('/Users/aneal1/OneDrive - Norwich University/2_Research/1_Trematode Research/Div of Labor/Analysis/Redial Sizes/DOLVTpharynx.R')


### Summarize of where rediae were found across all infections including
#   only rediae with assigned size
data.size<-data[!is.na(data$size),]
data.size<-data.size[data.size$snail!="H156",] #exclude due to issues with image labeling
data.size<-data.size[data.size$snail!="Ph114",] # exclude due to apparent mixed infection
data.size<-data.size[data.size$snail!="P112",] # exclude due to issues with image labeling
data.size<-data.size[data.size$snail!="H101",] # exclude due to issues with image labeling
summary(as.factor(data.size$location)) # summarizes location of all rediae
# Foot data
foot<-data.size[data.size$location=="foot",]
sum(foot$size=="small")/length(foot$size) # proportion small in foot
# Mid data
mid<-data.size[data.size$location=="mid",]
sum(mid$size=="small")/length(mid$size)# proportion small in mid
# Gonad data
gonad<-data.size[data.size$location=="gonad",]
sum(gonad$size=="small")/length(gonad$size) # proportion small in gonad


### Determine the proportion of redia that are 'small' vs. 'large' in each
#   body region for each infection
#   Note that problematic infections (H156, Ph114, P112, H101) were not exluded 
#   from this analysis, but that their results were excluded from the final manuscript.

# Get a list of all snails in the data
snails<-levels(as.factor(data$snail))

# Create a new data frame to store results
# The column headings are roughly explained at the end of each line
ratios<-data.frame(matrix(data = NA, nrow = length(snails), ncol = 31))
colnames(ratios)<-c("snail","gonad.sm","GonadMid.sm","mid.sm","midFoot.sm","foot.sm","general.sm", # number of small rediae
                    "gonad.n","GonadMid.n","mid.n", "midFoot.n", "foot.n", "general.n", # number of rediae total
                    "gonad", "GonadMid", "mid", "midFoot", "foot","general", # proportion of rediae that are small
                    "gonad.sm.lcl","GonadMid.sm.lcl","mid.sm.lcl","midFoot.sm.lcl","foot.sm.lcl","general.sm.lcl", # lower confidence limit
                    "gonad.sm.ucl","GonadMid.sm.ucl","mid.sm.ucl","midFoot.sm.ucl","foot.sm.ucl","general.sm.ucl") # upper confidence limit
# Populate snail numbers
ratios$snail<-snails

# For the data from each snail, calculate the desired values (see column headings above)
for(i in 1:length(snails)){
  temp <- data[data$snail==snails[i],] # generates a temporary data set with the data from a single snail
  # subset size data for each body section
  foot <- temp$size[temp$location=="foot"]
  midFoot <- temp$size[temp$location=="midFoot"] 
  gonad <- temp$size[temp$location=="gonad"]
  GonadMid <- temp$size[temp$location=="GonadMid"]
  mid<- temp$size[temp$location=="mid"]
  general<- temp$size[temp$location=="general"]
  # get counts to use in the binomial test
  foot.count <- c(sum(foot=="small",na.rm=T),sum(foot=="big",na.rm=T))
  midFoot.count <- c(sum(midFoot=="small",na.rm=T),sum(midFoot=="big",na.rm=T))
  gonad.count <- c(sum(gonad=="small",na.rm=T),sum(gonad=="big",na.rm=T))
  GonadMid.count <- c(sum(GonadMid=="small",na.rm=T),sum(GonadMid=="big",na.rm=T))
  mid.count <- c(sum(mid=="small",na.rm=T),sum(mid=="big",na.rm=T))
  general.count <- c(sum(general=="small",na.rm=T),sum(general=="big",na.rm=T))
  # perform binomial test (to get proportion small with confidence interval)
  if(sum(foot.count)>0){t.foot<-binom.test(foot.count)}
  if(sum(gonad.count)>0){t.gonad<-binom.test(gonad.count)}
  if(sum(midFoot.count)>0){t.midFoot<-binom.test(midFoot.count)}
  if(sum(GonadMid.count)>0){t.GonadMid<-binom.test(GonadMid.count)}
  if(sum(mid.count)>0){t.mid<-binom.test(mid.count)}
  if(sum(general.count)>0){t.general<-binom.test(general.count)}
  # record proportion small ("ratios") for each body section
  if(sum(foot.count)>0){ratios$foot[i] <- t.foot$estimate}
  if(sum(gonad.count)>0){ratios$gonad[i] <- t.gonad$estimate}
  if(sum(midFoot.count)>0){ratios$midFoot[i] <- t.midFoot$estimate}
  if(sum(GonadMid.count)>0){ratios$GonadMid[i] <- t.GonadMid$estimate}
  if(sum(mid.count)>0){ratios$mid[i] <- t.mid$estimate}
  if(sum(general.count)>0){ratios$general[i] <- t.general$estimate}
  # record confidence intervals for each body section
  #   lower confidence limit
  if(sum(foot.count)>0){ratios$foot.sm.lcl[i] <- t.foot$conf.int[1]} 
  if(sum(mid.count)>0){ratios$mid.sm.lcl[i] <- t.mid$conf.int[1]} 
  if(sum(gonad.count)>0){ratios$gonad.sm.lcl[i] <- t.gonad$conf.int[1]} 
  if(sum(midFoot.count)>0){ratios$midFoot.sm.lcl[i] <- t.midFoot$conf.int[1]} 
  if(sum(GonadMid.count)>0){ratios$GonadMid.sm.lcl[i] <- t.GonadMid$conf.int[1]} 
  if(sum(general.count)>0){ratios$general.sm.lcl[i] <- t.general$conf.int[1]} 
  #   upper confidence interval limit
  if(sum(gonad.count)>0){ratios$gonad.sm.ucl[i] <- t.gonad$conf.int[2]} 
  if(sum(mid.count)>0){ratios$mid.sm.ucl[i] <- t.mid$conf.int[2]}
  if(sum(foot.count)>0){ratios$foot.sm.ucl[i] <- t.foot$conf.int[2]}
  if(sum(midFoot.count)>0){ratios$midFoot.sm.ucl[i] <- t.midFoot$conf.int[2]}
  if(sum(GonadMid.count)>0){ratios$GonadMid.sm.ucl[i] <- t.GonadMid$conf.int[2]}
  if(sum(general.count)>0){ratios$general.sm.ucl[i] <- t.general$conf.int[2]}
  # record sample sizes for each body section
  ratios$gonad.n[i] <- sum(gonad.count)
  ratios$GonadMid.n[i] <- sum(GonadMid.count)
  ratios$mid.n[i] <- sum(mid.count)
  ratios$midFoot.n[i] <- sum(midFoot.count)
  ratios$foot.n[i] <- sum(foot.count)
  ratios$general.n[i] <- sum(general.count)
  # calculate number small for each body section
  ratios$gonad.sm[i] <- gonad.count[1]
  ratios$GonadMid.sm[i] <- GonadMid.count[1]
  ratios$mid.sm[i] <- mid.count[1]
  ratios$midFoot.sm[i] <- midFoot.count[1]
  ratios$foot.sm[i] <- foot.count[1]
  ratios$general.sm[i] <- general.count[1]
}

ratios