# This is my Master's thesis project- exploring the effects of oyster trophic interactions on the benthic and nekton community-----
#This script will explore statistical tests for community data for summer 2022 with method comparison#This is my Master's thesis project- exploring the effects of oyster trophic interactions on the benthic and nekton community
#This script will begin to explore the community data for summer 2022 with method comparison by site
source("scripts2/01_installpackagesM.R")
# Bring in data----

#confirmed these are standardized to per m^2
comdata<-read_rds("working_data/Summer_2022_com_data.rds")
summerproc <- read_rds("working_data/Summer_2022_reef_data.rds")
#Taxa ID coding and scientific names
taxaid <- read.csv("working_data/taxa_identification_coding_list.csv")

#taxa collected by gear
sumgeartaxa <- read.csv("working_data/summer_gear_taxa.csv")

# Reorganize data to look at biomass and abundance for site 2,3, and 4------
#remove site 5  and because it does not have a suction sample
comdata2 <- comdata%>%
  filter(Site!=5)%>%
  filter(Site!="M")
summerproc2 <- summerproc%>%
  filter(Site!=5)%>%
  filter(Site!="M")
#create a community data set and a biomass data set. remove things that shouldnt be considered for species richness and diversity
spdata <- summerproc2%>%
  left_join(comdata2)%>%
  filter(!TaxaID %in% c("egg-2", "egg-3", "egg-5", "wtf-8", "wtf-9", "gob-uni","amp-uni", "shmp-uni", "blen-juv", "egg-1","par-2","poly-uni"))%>%
  select(-true.dw)%>%
  pivot_wider(names_from = TaxaID,values_from=Abundance,values_fill=0)

#just looking at the spdata by sampling method
s.spdata <- spdata%>%
  filter(method=="s")
colnames(s.spdata)
t.spdata <- spdata%>%
  filter(method=="t")
colnames(t.spdata)
# create a community data set that removes the small organisms caught due to smaller mesh size (1mm) to see if those numbers are driving differences to comparable studies...
bigspdata <- summerproc2%>%
  left_join(comdata2)%>%
  filter(!TaxaID %in% c("amp-4","amp-1","amp-7","iso-1","iso-8","bug-2","bug-2","iso-4","poly-1","egg-2", "egg-3", "egg-5", "wtf-8", "wtf-9", "gob-uni","amp-uni", "shmp-uni", "blen-juv", "egg-1","par-2","poly-uni", "nem-1"))%>%
  select(-true.dw)%>%
  pivot_wider(names_from = TaxaID,values_from=Abundance,values_fill=0)

bigsptray <- bigspdata%>%
  filter(method=="t")

#biomass data sheet filtering out id's that should not be considered
bmass <- summerproc2%>%
  left_join(comdata2)%>%
  filter(!TaxaID %in% c("egg-2", "egg-3", "egg-5", "wtf-8", "wtf-9", "gob-uni","amp-uni", "shmp-uni", "blen-juv", "egg-1","par-2","poly-uni"))%>%
  select(-Abundance)%>%
  pivot_wider(names_from = TaxaID,values_from=true.dw,values_fill=0)

#just looking at the spdata by sampling method
s.bmass <- bmass%>%
  filter(method=="s")
colnames(s.bmass)
t.bmass <- bmass%>%
  filter(method=="t")
colnames(t.bmass)


#modify and clean up taxa id
taxaid2 <- taxaid%>%
  select(upID,scientific)%>%
  unique()%>%
  rename(Taxa=upID)


#using decostand to standardize my species biomass columns for the nmds .dont use range if you have true 0's in dataset. standardize method takes the (value-mean)/sd which give you a z score. but this can create negative values. 
bm.rem <- bmass%>%
  select(-Site, -Sample,-method)
bm.site <- bmass%>%
  select(Site, Sample,method)
#max give you relative proporiton of species. i am basically saying that i want amphipods to contribute to the community as much as blue crabs for example.
bmass.stand <- decostand(bm.rem, method = "max")
bm.stand <- cbind(bm.site,bmass.stand)
# Data set for community variables----
#using summerproc to have all 48 sites and then adiding com variables. evenness is diversity/log of spr. need to round to hundreth place
zcom<-summerproc2%>%
  mutate(spr=specnumber(spdata[,-1:-3]),
         div=round(diversity(spdata[,-1:-3]), digits = 2),
         even=round(div/log(spr), digits = 2),
         t.abund=round(apply(spdata[,-1:-3],1,sum),digits = 0),
         t.biomass=round(apply(bmass[,-1:-3],1,sum), digits = 2))

#renaming method column to meth and Site to site 
colnames(zcom)[3] = "meth"
colnames(zcom)[1] = "site"

#doing the same as above but with the data set that only includes larger organisms
bigzcom<-summerproc2%>%
  mutate(spr=specnumber(bigspdata[,-1:-3]),
         div=round(diversity(bigspdata[,-1:-3]), digits = 2),
         even=round(div/log(spr), digits = 2),
         t.abund=round(apply(bigspdata[,-1:-3],1,sum),digits = 0), t.biomass=round(apply(bmass[,-1:-3],1,sum), digits = 2))

#renaming method column to meth and Site to site 
colnames(bigzcom)[3] = "meth"
colnames(bigzcom)[1] = "site"

bigsuc <- bigzcom%>%
  filter(meth=='s')
summary(bigsuc)
bigtray <- bigzcom%>%
  filter(meth=='t')
summary(bigtray)


#Species evenness is a description of the distribution of abundance across the species in a community. Species evenness is highest when all species in a sample have the same abundance. Evenness approaches zero as relative abundances vary.

######### Spearman correlation test between abundance and biomass-----
corr <- cor.test(x=zcom$t.abund, y=zcom$t.biomass, method = 'spearman')
corr
####### Tests for normality for all biodiversity metrics- p>0.05 is normally distributed------

shapiro.test(zcom$spr)
shapiro.test(zcom$t.abund)
shapiro.test(zcom$t.biomass)
shapiro.test(zcom$even)
shapiro.test(zcom$div)

#### Site specific Kruskal wallis tests------
###### species richness -----
kruskal.test(spr~meth, data=zcom%>%
               filter(site==2))
#0.003<0.01
kruskal.test(spr~meth, data=zcom%>%
               filter(site==3))
#0.003<0.01
kruskal.test(spr~meth, data=zcom%>%
               filter(site==4))
#0.009<0.01

site <- c(1,2,3)
csq <- c(8.4857,8.4857,6.7048)
df <- c(1,1,1)
p.value <- c(0.0036,0.0036,0.0096)

results <- as.data.frame(cbind(site,csq,df,p.value))
resultsx <- results%>%
  rename(`Chi-squared`=csq, `p-value`=p.value, Reef=site)

kable(resultsx)%>%
  kable_styling(full_width = F)

###### biodiversity -----
kruskal.test(div~meth, data=zcom%>%
               filter(site==2))
#0.6>0,01
kruskal.test(div~meth, data=zcom%>%
               filter(site==3))
#0.1>0.01
kruskal.test(div~meth, data=zcom%>%
               filter(site==4))
#0.003<0.01
sited <- c(1,2,3)
csqd <- c(0.2316,2.0769,8.4255)
dfd <- c(1,1,1)
p.valued <- c(0.6304,0.1495,0.0037)

resultsd <- as.data.frame(cbind(sited,csqd,dfd,p.valued))
resultsxd <- resultsd%>%
  rename(`Chi-squared`=csqd, `p-value`=p.valued, Reef=sited, df=dfd)

kable(resultsxd)%>%
  kable_styling(full_width = F)
###### evenness -----
kruskal.test(even~meth, data=zcom%>%
               filter(site==2))
#0.006<0.01
kruskal.test(even~meth, data=zcom%>%
               filter(site==3))
#0.003<0,01
kruskal.test(even~meth, data=zcom%>%
               filter(site==4))
#0.003<0,01
sitee <- c(1,2,3)
csqe <- c(7.5154,8.3662,8.3368)
efe <- c(1,1,1)
p.valuee <- c(0.0061,0.0039,0.0039)

resultse <- as.data.frame(cbind(sitee,csqe,efe,p.valuee))
resultsxe <- resultse%>%
  rename(`Chi-squared`=csqe, `p-value`=p.valuee, Reef=sitee, df=efe)

kable(resultsxe)%>%
  kable_styling(full_width = F)
###### biomass -----
kruskal.test(t.biomass~meth, data=zcom%>%
               filter(site==2))
#0.003<0,01
kruskal.test(t.biomass~meth, data=zcom%>%
               filter(site==3))
#0.003<0,01
kruskal.test(t.biomass~meth, data=zcom%>%
               filter(site==4))
###### abundance -----
kruskal.test(t.abund~meth, data=zcom%>%
               filter(site==2))
#0.003<0,01
kruskal.test(t.abund~meth, data=zcom%>%
               filter(site==3))
#0.003<0,01
kruskal.test(t.abund~meth, data=zcom%>%
               filter(site==4))
#0.003<0,01
sitea <- c(1,2,3)
csqa <- c(8.3368,8.3077,8.3368)
efa <- c(1,1,1)
p.valuea <- c(0.0039,0.0039,0.0039)

resultsa <- as.data.frame(cbind(sitea,csqa,efa,p.valuea))
resultsxa <- resultsa%>%
  rename(`Chi-squared`=csqa, `p-value`=p.valuea, Reef=sitea, df=efa)

kable(resultsxa)%>%
  kable_styling(full_width = F)

#### Results Table -----
kwresults <- resultsx%>%
  rbind(resultsxa,resultsxe,resultsxd)

#write csv file
write.csv(kwresults, file = "figures2/tables/3.3.method_kw_summary_results_table.csv",row.names = F)
#make data usable for kable extra
base <- kwresults%>%
  kbl(align = c(rep("c",6)))
base

#now make it pretty
base%>%
  kable_styling(bootstrap_options = c("striped","condensed"))%>%
  pack_rows("taxa richness", 1,3)%>%
  pack_rows("abundance", 4,6)%>%
  pack_rows("Pielou's evenness", 7,9)%>%
  pack_rows("Shannon-Wiener diversity", 10,12)



#### Averages -----
zcom4 <- zcom%>%
  group_by(site, meth)%>%
  summarize(mspr=(mean(spr)),
            mdiv=(mean(div)),
            mev=(mean(even)),
            mbm=(mean(t.biomass)),
            mab=mean(t.abund))

zcom$site <- as.factor(zcom$site)
zcom$meth <- as.factor(zcom$meth)

suc <- zcom%>%
  filter(meth!="t")
tray <- zcom%>%
  filter(meth=="t")
summary(suc)
summary(tray)


tray2 <- tray%>%
  summarise(mean=mean(t.abund),se=std.error(t.abund))
#### Gear specific Kruskal wallis tests by site------
###### species richness -----
kruskal.test(spr~site, data=zcom%>%
               filter(meth=='s'))
#0.07>0.05 - no differences
kruskal.test(spr~site, data=zcom%>%
               filter(meth=='t'))
#0.02075<0.05 - difference
dunnTest(spr~site, data=zcom%>%
           filter(meth=='t'),
         method="bonferroni")
#these results are telling me that there were no significant differences in species richness across sites with suction sampling, but there were differences across sites with tray samples. 

gear <- c('suction','tray')
csq <- c(5.296,7.75)
df <- c(2,2)
p.value <- c(0.0708,0.0208)

results2 <- as.data.frame(cbind(gear,csq,df,p.value))
resultsx2 <- results2%>%
  rename(`Chi-squared`=csq, `p-value`=p.value, `gear type`=gear)

kable(resultsx2)%>%
  kable_styling(full_width = F)

###### biodiversity -----
kruskal.test(div~site, data=zcom%>%
               filter(meth=='s'))
# 0.2995 >0.05
kruskal.test(div~site, data=zcom%>%
               filter(meth=='t'))
#0.09>0.05
# there were no differences in diversity across sites for either gear types

geard <- c('suction','tray')
csqd <- c(2.4114,4.8035)
dfd <- c(2,2)
p.valued <- c(0.2995,0.0906)

resultsd2 <- as.data.frame(cbind(geard,csqd,dfd,p.valued))
resultsxd2 <- resultsd2%>%
  rename(`Chi-squared`=csqd, `p-value`=p.valued, `gear type`=geard, df=dfd)

kable(resultsxd2)%>%
  kable_styling(full_width = F)
###### evenness -----
kruskal.test(even~site, data=zcom%>%
               filter(meth=='s'))
#0.03<0.05
dunnTest(even~site, data=zcom%>%
           filter(meth=='s'),
         method="bonferroni")

kruskal.test(even~site, data=zcom%>%
               filter(meth=='t'))
#0.068>0.05

# there were no differences in evenness across sites for trays but there were for suction. with suction site 2 and 3 were sig different

geare <- c('suction','tray')
csqe <- c(6.8313,5.3535)
dfe <- c(2,2)
p.valuee <- c(0.0329,0.0688)

resultse2 <- as.data.frame(cbind(geare,csqe,dfe,p.valuee))
resultsxe2 <- resultse2%>%
  rename(`Chi-squared`=csqe, `p-value`=p.valuee, `gear type`=geare, df=dfe)

kable(resultsxe2)%>%
  kable_styling(full_width = F)
###### abundance -----
kruskal.test(t.abund~site, data=zcom%>%
               filter(meth=='s'))
#0.068>0.05
kruskal.test(t.abund~site, data=zcom%>%
               filter(meth=='t'))
#0.119>0.05

# there were no differences in abundance across sites for either gear types

geara <- c('suction','tray')
csqa <- c(5.3728,4.2573)
dfa <- c(2,2)
p.valuea <- c(0.0681, 0.119)

resultsa2 <- as.data.frame(cbind(geara,csqa,dfa,p.valuea))
resultsxa2 <- resultsa2%>%
  rename(`Chi-squared`=csqa, `p-value`=p.valuea, `gear type`=geara, df=dfa)

kable(resultsxa2)%>%
  kable_styling(full_width = F)

#### Results Table -----
kwresults <- resultsx2%>%
  rbind(resultsxa2,resultsxe2,resultsxd2)
#write csv file
write.csv(kwresults, file = "figures2/tables/3.4.method_comparison_sitespecific_kw_summary_results_table.csv",row.names = F)

#make data usable for kable extra
base <- kwresults%>%
  kbl(align = c(rep("c",6)))
base

#now make it pretty
base%>%
  kable_styling(bootstrap_options = c("striped","condensed"))%>%
  pack_rows("taxa richness", 1,2)%>%
  pack_rows("abundance", 3,4)%>%
  pack_rows("Pielou's evenness", 5,6)%>%
  pack_rows("Shannon-Wiener diversity", 7,8)



#### Averages  -----
zcom4 <- zcom%>%
  group_by(site, meth)%>%
  summarize(mspr=(mean(spr)),
            mdiv=(mean(div)),
            mev=(mean(even)),
            mbm=(mean(t.biomass)),
            mab=mean(t.abund))

zcom$site <- as.factor(zcom$site)
zcom$meth <- as.factor(zcom$meth)

suc <- zcom%>%
  filter(meth!="t")
tray <- zcom%>%
  filter(meth=="t")
summary(suc)
summary(tray)

####### Figures -----
#Remember site 2=1, site3=2, and site 4= 3
#species richness
SR <- ggplot(zcom, aes(y=spr,x=site,fill=meth))+
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(),size=1)+
  ggtitle("B") +
  ylab("taxa richness")+ scale_fill_grey(start = 0.8, end = 0.4)+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(hjust=0,size=20,face="bold"),
        axis.title=element_text(size=20),legend.position = "none",
        axis.text = element_text(size=15),
        axis.title.x = element_blank())+ 
  scale_x_discrete(labels = c('1','2','3'))


#diversity
dv <- ggplot(zcom, aes(y=div,x=site,fill=meth))+
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(),size=1)+
  ggtitle("D") +
  ylab("Shannon-Wiener diversity")+xlab("Reef")+ scale_fill_grey(start = 0.8, end = 0.4)+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(hjust=0,size=20,face="bold"),
        axis.title=element_text(size=20),
        axis.text = element_text(size=15),
        legend.position = "none")+ 
  scale_x_discrete(labels = c('1','2','3'))


#evennness
ev <- ggplot(zcom, aes(y=even,x=site,fill=meth))+
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(),size=1)+
  ggtitle("C") +
  ylab("Pielou's evenness")+xlab("Reef")+ scale_fill_grey(start = 0.8, end = 0.4)+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(hjust=0,size=20,face="bold"),
        axis.title=element_text(size=20),
        axis.text = element_text(size=15),
        legend.position ="none")+ 
  scale_x_discrete(labels = c('1','2','3'))


#biomass
bm <- ggplot(zcom, aes(y=t.biomass,x=site,fill=meth))+
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(),size=1)+
  ggtitle("D") +
  ylab("taxa biomass")+xlab("site")+ scale_fill_grey(start = 0.8, end = 0.4)+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(hjust=0,size=20,face="bold"),
        axis.title=element_text(size=20),
        axis.text = element_text(size=15),
        legend.position ="none")+ 
  scale_x_discrete(labels = c('1','2','3'))

#abundance

ab <- ggplot(zcom, aes(y=t.abund,x=site,fill=meth))+
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(),size=1)+
  ggtitle("A") +
  ylab("taxa abundance")+
  scale_fill_grey(start = 0.8, end = 0.4, labels=c('suction', 'tray'), name= "sampling method")+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(hjust=0,size=20,face="bold"),
        axis.title=element_text(size=20),
        axis.title.x = element_blank(),
        legend.position =c(0.3,0.7),
        axis.text = element_text(size=15),
        legend.text=element_text(size=20),
        legend.title = element_text(size=20))+
  scale_x_discrete(labels = c('1','2','3'))
(ab+SR)/(ev+dv)
ggsave("figures2/method_uni_plot_abundance_2.png", width = 10, height = 7)
#figure 2
#Remember site 2=1, site3=2, and site 4= 3
#species richness
SR2 <- ggplot(zcom, aes(y=spr,x=meth,fill=site))+
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(),size=1)+
  ggtitle("B") +
  ylab("taxa richness")+xlab("gear type")+ scale_fill_grey(start = 0.8, end = 0.4)+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(hjust=0,size=12,face="bold"),
        axis.title=element_text(size=20),legend.position = "none",
        axis.title.x = element_blank())
SR2
#diversity
dv2 <- ggplot(zcom, aes(y=div,x=meth,fill=site))+
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(),size=1)+
  ggtitle("D") +
  ylab("Shannon-Wiener diversity")+xlab("gear type")+ scale_fill_grey(start = 0.8, end = 0.4, labels=c(1,2,3), name= "site")+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(hjust=0,size=12,face="bold"),
        axis.title=element_text(size=20),
        legend.position = c(0.2,0.2))
#evennness
ev2 <- ggplot(zcom, aes(y=even,x=meth,fill=site))+
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(),size=1)+
  ggtitle("C") +
  ylab("Plielou's evenness")+xlab("gear type")+ scale_fill_grey(start = 0.8, end = 0.4)+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(hjust=0,size=12,face="bold"),
        axis.title=element_text(size=20),
        legend.position ="none",
        axis.title.x = element_blank())
#abundance
ab2 <- ggplot(zcom, aes(y=t.abund,x=meth,fill=site))+
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(),size=1)+
  ggtitle("A") +
  ylab("abundance")+xlab("gear type")+ scale_fill_grey(start = 0.8, end = 0.4)+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(hjust=0,size=12,face="bold"),
        axis.title=element_text(size=20),
        legend.position ="none")+ 
  scale_x_discrete(labels = c('1','2','3'))



(SR2+ev2)/(dv2+ab2)

(ab2+SR2)/(ev2+dv2)

##### Unique taxa by gear table -----
sumgeartaxa2 <- sumgeartaxa%>%
  rename(`scientific name`=scientific.name,`common name`=common.name,`scientific name `=scientific.name.1,`common name `=common.name.1)

#write csv file
write.csv(sumgeartaxa2, file = "figures2/tables/3.5.method_comparison_unique_captured_taxa_by_gear.csv",row.names = F)
#make data usable for kable extra
base <- sumgeartaxa2%>%
  kbl(align = c(rep("c",6)))
base

#now make it pretty
base%>%
  kable_styling(bootstrap_options = c("striped","condensed"))%>%
  add_header_above(c("Substrate tray samples"=2, "Suction samples" = 2))

##### Table of Unique taxa by gear and average abundance-----
#lets start with getting average abundance for each taxa per sample and add sci and common names
avab <- spdata%>%
  select(-Site,-Sample)%>%
  group_by(method)%>%
  pivot_longer(2:30, names_to = "Taxa",values_to = "abundance")%>%
  group_by(method,Taxa)%>%
  summarise(avg_ab=mean(abundance))%>%
  left_join(taxaid2)

# get a data set with scientific and common names
taxaid3 <- taxaid%>%
  select(upID,scientific,common)%>%
  unique()%>%
  rename(Taxa=upID)
# add common names, select one gear only taxa and remove 0 values
avab2 <- avab%>%
  left_join(taxaid3)%>%
  filter(Taxa %in% c("fsh-3","fsh-6","fsh-10","crb-7","shmp-2","iso-8","bug-2","nem-1","fsh-9","fsh-13","amp-7","biv-4","biv-3","biv-5"))%>%
  filter(avg_ab!=0)

#reorder rows
avab3 <- avab2[,c(4,5,3)]

write.xlsx(avab3, file = "figures2/tables/3.6.method_comparison_unique_captured_taxa_av_abund_by_gear.xlsx")

#### calculations for mean se of how many unique taxa were only caught by one gear--------
#tray first
#get taxa names into columns and filter the taxa I need
too <- spdata%>%
  group_by(Site,Sample)%>%
  filter(method=="t")%>%
  pivot_longer(4:32, names_to = "Taxa",values_to = "abundance")%>%
  filter(Taxa %in% c("fsh-3","fsh-6","fsh-10","crb-7","shmp-2","iso-8","bug-2","nem-1"))

#now remove taxa and get a mean abundance number of unique taxa by site and sample. need to do this once per sampling method
too2 <- too%>%
  group_by(method)%>%
  mutate(avg_ab=mean(abundance))%>%
  mutate(se=std.error(abundance))

#suction samples
#get taxa names into columns and filter the taxa I need
soo <- spdata%>%
  group_by(Site,Sample)%>%
  filter(method!="t")%>%
  pivot_longer(4:32, names_to = "Taxa",values_to = "abundance")%>%
  filter(Taxa %in% c("fsh-9","fsh-13","amp-7","biv-4","biv-3","biv-5"))

#now remove taxa and get a mean abundance number of unique taxa by site and sample. need to do this once per sampling method
soo2 <- soo%>%
  group_by(method)%>%
  mutate(avg_ab=mean(abundance))%>%
  mutate(se=std.error(abundance))

##### proportion of taxa represented by a single individual per sample -----
#change format so taxa are listed as rows instead of columns, remove 0 abundnace data, multiply by .2304 to get data back to whole numbers and not standardized m^2
issing <- spdata%>%
  pivot_longer(4:32, names_to = "taxa",values_to = "abundance")%>%
  filter(abundance!=0)%>%
  mutate(abund=abundance*0.2304)%>%
  select(-abundance)
#make all taxa represented by a single ind 1 and taxa not 0. then group by and summarize to get the total number of taxa represented by a single individual per sample
issing2 <- issing%>%
  mutate(sing.ind=case_when(
    abund==1~1,
    abund!=1~0
  ))%>%
  group_by(Site, Sample, method)%>%
  mutate(n.taxa=sum(sing.ind))
#incorporate total number of taxa per sample by getting taxa richness values from zcom data set rename columns to match issing2 to join the data sets
tnumtaxa <- zcom%>%
  select(site, Sample, meth, spr)%>%
  rename(Site=site,method=meth)
#now join the data sets and calculate proportion of taxa represented by a single ind per sampel,select only columns we need. then summarizie to get 1 row per sample
issing3 <- issing2%>%
  left_join(tnumtaxa)%>%
  mutate(propsing=n.taxa/spr)%>%
  select(Site, Sample, method, propsing)%>%
  summarize(propsing2=mean(propsing))
#round
issing3$propsing2 <- format(round(issing3$propsing2,1),nsmall=1)
#now get an average by sampling gear of what percent of each sample was represented by a single taxa
#first ungroup variables in issing3
ungroup(issing3)
issing3$propsing2 <- as.numeric(issing3$propsing2)
avsing.t <- issing3%>%
  filter(method!="s")%>%
  group_by(method)%>%
  summarize(avpropsing=mean(propsing2))
avsing.s <- issing3%>%
  filter(method=="s")%>%
  group_by(method)%>%
  summarize(avpropsing=mean(propsing2))

##### proportion of taxa represented by 2 times the sample specific mean -----
#create a dataset that has the average abundance per sample
domcom <- spdata%>%
  pivot_longer(4:32, names_to = "taxa",values_to = "abundance")%>%
  filter(abundance!=0)%>%
  mutate(abund=abundance*0.2304)%>%
  select(-abundance)%>%
  group_by(Site, Sample, method)%>%
  mutate(m.abund=mean(abund))%>%
  mutate(twoxabun=m.abund*2)%>%
  mutate(dubm=case_when(
    abund>=twoxabun~1,
    abund<=twoxabun~0
  ))%>%
  mutate(twoxdomsp=sum(dubm))

domcom.s <-domcom%>%
  filter(method=='s')%>%
  ungroup()%>%
  summarise(m.domcom=mean(twoxdomsp))
domcom.s2 <- domcom%>%
  ungroup()%>%
  filter(method=='s')%>%
  select(Site,Sample,twoxdomsp)%>%
  group_by(Site,Sample)%>%
  summarise(twoxdsp=mean(twoxdomsp))

domcom.t <-domcom%>%
  filter(method!='s')%>%
  ungroup()%>%
  summarise(m.domcom=mean(twoxdomsp))
domcom.t2 <- domcom%>%
  ungroup()%>%
  filter(method!='s')%>%
  select(Site,Sample,twoxdomsp)%>%
  group_by(Site,Sample)%>%
  summarise(twoxdsp=mean(twoxdomsp))


#### Table showing all taxa biomass collect by sampling method-----
#tray samples only 
bmass3 <- bmass%>%
  select(-Site,-Sample)%>%
  group_by(method)%>%
  aggregate(.~method, sum)%>%
  pivot_longer(2:30, names_to = "Taxa",values_to = "biomass")%>%
  pivot_wider(names_from = method,values_from=biomass,values_fill=0)%>%
  mutate_if(is.numeric, round, digits=3)

bmass3[bmass3==0] <- NA

bm4table <- arrange(bmass3, Taxa)%>%
  left_join(taxaid2)%>%
  rename(`Tray samples`=t,`Suction samples`=s)%>%
  ungroup()%>%
  select(-Taxa)%>%
  rename(Taxa=scientific)

bm5table <- bm4table[, c(3, 2,1)]


#write csv file
write.csv(bm5table, file = "figures2/tables/B.1.method_comparison_taxa_biomass_by_gear.csv",row.names = F)

##### Table of biodiv metrics mean se min and max by reef and site -----
bma <- zcom%>%
  select(-Sample,-t.biomass)%>%
  group_by(site,meth)%>%
  pivot_longer(3:6, names_to = "metric",values_to = "value")


bma2 <- bma%>%
  group_by(site, meth, metric)%>%
  summarize(mean = mean(value), SE = std.error(value), min=min(value),max=max(value))%>%
  mutate_if(is.numeric, ~round(., 2))

#combine min and max for a range abnd combine mean and se
bma2$range <- paste(bma2$min, "-", bma2$max)
bma2$mese <- paste(bma2$mean, "(", bma2$SE)
#reorder rows
bma23 <- bma2[,c(1,2,3,4,5,6,7,9,8)]
#flip x and y axis 

bma24 <- bma23%>%
  select(-min,-max, -mean,-SE)%>%
  pivot_longer(4:5, names_to = "stat",values_to="value")
#flip x and y again?
bma25 <- bma24%>%
  group_by(site, meth,metric)%>%
  pivot_wider(names_from = site,values_from=value,values_fill=NA)
#export to a xlsx file

write.xlsx(bma25, file = "figures2/tables/3.1.method_comparison_reef_characteristics_p2.xlsx")