require('readr') require('tidyverse') require('sjmisc') require('aod') require('reshape') require('scales') require("car") require("ggplot2") rm(list=ls()) graphics.off() #set your working directory setwd("C:/Users/Miles Nesbit/OneDrive - Imperial College London/Documents/PhD/Bb_fungi/code") #load in data data1<- read.csv("../data/datalog.csv", stringsAsFactors = T) #format data to build #subset the relevant columns for growth Data2 <- subset(data1, select = c('hive.id','tissue.type', 'd1.actual.daily.growth','d2.actual.daily.growth','d3.actual.daily.growth' ,'d4.actual.daily.growth','d5.actual.daily.growth','d6.actual.daily.growth')) #clean out na's Data2 <- Data2 %>% drop_na() #sum the rows Data2$row_sum <- rowSums(Data2[ , c(3,4,5,6,7,8)], na.rm=TRUE) #presence absence data Data2$presence <- ifelse(Data2$row_sum >0, 1, 0) #insert data summary function data_summary <- function(data, varname, groupnames){ require(plyr) summary_func <- function(x, col){ c(mean = mean(x[[col]], na.rm=TRUE), sd = sd(x[[col]], na.rm=TRUE)) } data_sum<-ddply(data, groupnames, .fun=summary_func, varname) data_sum <- rename(data_sum, c("mean" = varname)) return(data_sum) } df2 <- data_summary(Data2, varname="presence", groupnames="tissue.type") #plot the barplot plus error bars errorplot <- ggplot(df2, aes(x= tissue.type, y=presence, fill =tissue.type)) + geom_bar(stat="identity", color="black", position=position_dodge()) + geom_errorbar(aes(ymin=presence-sd, ymax=presence+sd), width=.2, position=position_dodge(.9)) errorplot+labs(title="Growth in Substrates", x="Tissue Type", y = "Average Presence of Growth")+ theme_classic() #to test the hypothesis that there is a difference in growth across substrates a probit model is required to be performed myprobit <- glm(presence ~ tissue.type + hive.id, family = binomial(link = "probit"), data = Data2) summary(myprobit) confint(myprobit) #waldorf test on effect of tissue on growth wald.test(b = coef(myprobit), Sigma = vcov(myprobit), Terms = 2:4) #chi squared p value is technically significant #effect of tissue type on presence absence is significant #waldorf test on effect of hive on data wald.test(b = coef(myprobit), Sigma = vcov(myprobit), Terms = 5:17) #dead random across colonies #no specificity for growth in specific colonies #rename tissue types Data3 <- Data2 %>% mutate(tissue.type = case_when( tissue.type == 'F' ~ 'Frass', tissue.type == 'H' ~ 'Honey', tissue.type == 'HCW' ~ 'Honey Cup Wall', tissue.type == 'LCW' ~ 'Egg Cup Wall' )) #plot without bars plot1 <- ggplot(Data3, aes(x= tissue.type, y=presence, fill =tissue.type)) + geom_bar(stat = 'identity') plot1+labs(title="Growth in Substrates", x="Tissue Type", y = "Presence of Growth Across Colonies")+ theme_classic() + theme(plot.title = element_text(hjust = 0.5)) + scale_y_continuous(expand = expansion(mult = 0))+ theme(legend.position="none") #plot with yes and no to build a graph showing proportion of growing fungi in substrates #presence absence data Data3$absence <- ifelse(Data3$row_sum >0, 'Present', 'Absent') mdf = Data3 %>% mutate(absence = factor(absence, levels = c("Absent", "Present"))) #build probability data tbl <- xtabs(~ absence + tissue.type, mdf) proptbl <- proportions(tbl, margin = "tissue.type") proptbl <- as.data.frame(proptbl) proptbl <- proptbl[proptbl$Freq != 0, ] proptbl <- na.omit(proptbl) #graph the whole thing together ggplot(proptbl, aes(tissue.type, Freq, fill = absence)) + geom_col(position = position_fill()) + geom_text(aes(label = scales::percent(Freq)), colour = "Black", position = position_fill(vjust = 0.5))+ labs(title="Growth in Substrates", x="Substrate Type", y = "Proportion of Growing Fungi in Substrates", )+ theme_classic() + theme(plot.title = element_text(hjust = 0.5))+labs(fill = "Fungal Growth: Present or Absent")+ scale_y_continuous(limits = c(0,1), expand = c(0, 0)) gg <- ggplot(proptbl, aes(tissue.type, Freq, fill = absence)) + geom_col(position = position_fill()) + geom_text(aes(label = scales::percent(Freq)), colour = "Black", position = position_fill(vjust = 0.5)) + labs(x = "Substrate Type", y = "Percentage of Growing Fungi in Substrates") + theme_classic() + theme(plot.title = element_text(hjust = 0.5)) + labs(fill = "Fungal Growth") + scale_y_continuous(labels = scales::percent, limits = c(0, 1), expand = c(0, 0)) gg gg <- ggplot(proptbl, aes(tissue.type, Freq, fill = absence)) + geom_col(position = position_fill(), color = "black") + geom_text(aes(label = scales::percent(Freq)), colour = "Black", position = position_fill(vjust = 0.5), size = 4) + labs(x = "Substrate Type", y = "Percentage of Growing Fungi in Substrates", fill = "Fungal Growth") + # Set legend title here theme_classic() + theme(plot.title = element_text(hjust = 0.5), axis.title = element_text(size = 14), axis.text = element_text(size = 12), legend.text = element_text(size = 12), legend.title = element_text(size = 14), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(color = "black")) + scale_y_continuous(labels = scales::percent, limits = c(0, 1), expand = c(0, 0)) print(gg) # Save as a high-resolution PNG file ggsave("testfigure5.png", gg, width = 10, height = 8, units = "in", dpi = 600) #to test the hypothesis that there is a difference in growth across substrates a probit model is required to be performed mybinom <- glm(presence ~ tissue.type + hive.id, family = binomial, data = Data2) summary(mybinom) confint(mybinom) # Use Anova from the car package to get p-values Anova(mybinom, type="II")