+--- title: "Life history stage and behavior in a social network" author: "A Tringali" date: "May 8, 2019" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## R Markdown This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. #Load libraries ```{r} library(readxl) library(xlsx) library(sna) library(asnipe) library(vegan) library(car) library(Rmisc) library(RColorBrewer) library(wesanderson) library(ggplot2) library(gridExtra) library(plyr) library(tidyr) library(reshape2) library(dplyr) library(mosaic) library(magicfor) library(onewaytests) library(lawstat) library(beepr) library(cowplot) library(igraph) library(MASS) library(bbmle) library(binom) library(SRCS) library(abind) library(gdata) ``` Set working directory and read in attribute file ```{r} #setwd("R:/Manuscripts/In Review/Tringali et al Life history stage social networks/Second Submission/For PeerJ/For Resubmission") #Read in attribute data attrib <- read_xlsx("LifeHistStageSNA_data_forPeerJ_20191120.xlsx", sheet = "Attribute Data") #Check headers and make sure dates and factors coded correctly #head(attrib) attrib$LastCensusObs <- as.Date(attrib$LastCensusObs, format = "%d-%b-%y") #Make sure factors are coded correctly attrib$Sex <- factor(attrib$Sex, levels = c("F", "M")) attrib$SC_2016 <- factor(attrib$SC_2016, levels = c("Breeder", "Dominant", "Helper")) attrib$SC_2017 <- factor(attrib$SC_2017, levels = c("Breeder", "Dominant", "Helper")) attrib$SC_2018 <- factor(attrib$SC_2018, levels = c("Breeder", "Dominant", "Helper")) attrib$Reclass_Contents2018 <- factor(attrib$Reclass_Contents2018, levels = c("Breeder", "Dominant", "Helper")) attrib$Terr2016 <- as.factor(attrib$Terr2016) attrib$Terr2017 <- as.factor(attrib$Terr2017) attrib$Terr2018 <- as.factor(attrib$Terr2018) attrib$NearestPt2016 <- as.numeric(attrib$NearestPt2016) attrib$NearestPt2017 <- as.numeric(attrib$NearestPt2017) attrib$NearestPt2018 <- as.numeric(attrib$NearestPt2018) attrib$NumAdjTerrs2016 <- as.numeric(attrib$NumAdjTerrs2016) attrib$NumAdjTerrs2017 <- as.numeric(attrib$NumAdjTerrs2017) attrib$NumAdjTerrs2018 <- as.numeric(attrib$NumAdjTerrs2018) attrib$UniquePtsDetected2016 <- as.numeric(attrib$UniquePtsDetected2016) attrib$UniquePtsDetected2017 <- as.numeric(attrib$UniquePtsDetected2017) attrib$UniquePtsDetected2018 <- as.numeric(attrib$UniquePtsDetected2018) attrib$Exclude2016 <- as.factor(attrib$Exclude2016) attrib$Exclude2017 <- as.factor(attrib$Exclude2017) attrib$Exclude2018 <- as.factor(attrib$Exclude2018) ``` Read in aggregation sampling metadata ```{r} meta16 <- read_xlsx("LifeHistStageSNA_data_forPeerJ_20191120.xlsx", sheet = "2016AggMeta") meta17 <- read_xlsx("LifeHistStageSNA_data_forPeerJ_20191120.xlsx", sheet = "2017AggMeta") meta18 <- read_xlsx("LifeHistStageSNA_data_forPeerJ_20191120.xlsx", sheet = "2018AggMeta") meta16$SurveyPoint <- as.numeric(meta16$SurveyPoint) meta17$SurveyPoint <- as.numeric(meta17$SurveyPoint) meta18$SurveyPoint <- as.numeric(meta18$SurveyPoint) meta16$AggID <- as.factor(meta16$AggID) meta17$AggID <- as.factor(meta17$AggID) meta18$AggID <- as.factor(meta18$AggID) meta16$TerrLoc <- as.factor(meta16$TerrLoc) meta17$TerrLoc <- as.factor(meta17$TerrLoc) meta18$TerrLoc <- as.factor(meta18$TerrLoc) ``` #Use lapply to format gbi data as matrices, get graphs, and calculate variables ```{r} gbi_2016 <- read_xlsx("LifeHistStageSNA_data_forPeerJ_20191120.xlsx", sheet = "2016_GBI_all") gbi_2017 <- read_xlsx("LifeHistStageSNA_data_forPeerJ_20191120.xlsx", sheet = "2017_GBI_all") gbi_2018 <- read_xlsx("LifeHistStageSNA_data_forPeerJ_20191120.xlsx", sheet = "2018_GBI_all") dim(gbi_2016) dim(gbi_2017) dim(gbi_2018) gbi.list <- list(gbi_2016, gbi_2017, gbi_2018) m.list <- lapply(gbi.list, function(x){ x <- as.matrix(x) #save as matrix rownames(x) <- x[,1]; x x <- x[,-1];x x[x > 0.01] <- 1; x # If an individual was observed, 1 x[is.na(x)] <- 0; x # else 0 x <- x[,which(colSums(x)>2)] #Only include birds seen more than twice }) gbi_2016 <- m.list[[1]] gbi_2017 <- m.list[[2]] gbi_2018 <- m.list[[3]] #Now we have 3 individual by group matrices, saved as a list #get a network for each matrix in the list (exclude incidental obs) #adj2016 <- get_network(gbi_2016, data_format = "GBI", association_index = "SRI", locations = meta16$SurveyPoint[!is.na(meta16$SurveyPoint)]) #adj2017 <- get_network(gbi_2017, data_format = "GBI", association_index = "SRI", locations = meta17$SurveyPoint[!is.na(meta17$SurveyPoint)]) #get a network for each matrix in the list (include incidental obs) adj2016 <- get_network(gbi_2016, data_format = "GBI", association_index = "SRI", locations = meta16$TerrLoc) adj2017 <- get_network(gbi_2017, data_format = "GBI", association_index = "SRI", locations = meta17$TerrLoc) adj2018 <- get_network(gbi_2018, data_format = "GBI", association_index = "SRI", locations = meta18$TerrLoc) adjs=list(adj2016, adj2017, adj2018) gs = lapply(adjs, function(x) graph_from_adjacency_matrix(x, "undirected", weighted = TRUE)) #the networks ##Plot par(mfrow = c(1, 3), mar = c(1, 1, 1, 1)) #create a plotting aea with 1 row and two columns for(i in 1:length(gs)){ plot(gs[[i]], vertex.color = "gold1", vertex.size = 8, vertex.label = "") } #Calculate degree, betweenness and clustering coefficient de <- lapply(gs, function(x) igraph::degree(x)) # degree centrality, number of adjacent edges, also called binary degree, number of associates btwn_norm <- lapply(gs, function(x) igraph::betweenness(x, weights = 1/E(x)$weight, normalized = TRUE)) #Vertex betweenness centrality, how often you are on the shortest path btwn 2 others, normalized for number of vertices in network, make weight inverse of edge weight (https://twitter.com/mattjsilk/status/997029135548669957) cc <- lapply(gs, function(x) igraph::transitivity(x, "local")) cc[cc = 0] <- NA cc[cc == "NaN"] <- NA #Apply node names names <- lapply(gs, function(x) V(x)$name) ##Create data frames for each year's networks net_met_2016 <- data.frame(node.names = names[[1]], degree = de[[1]], be_norm = btwn_norm[[1]], ClCoef = cc[[1]]) net_met_2017 <- data.frame(node.names = names[[2]], degree = de[[2]], be_norm = btwn_norm[[2]], ClCoef = cc[[2]]) net_met_2018 <- data.frame(node.names = names[[3]], degree = de[[3]], be_norm = btwn_norm[[3]], ClCoef = cc[[3]]) #Add metrics to attribute file attrib$de_16 <- net_met_2016$degree[match(attrib$JayID, net_met_2016$node.names)] attrib$benorm_16 <- net_met_2016$be_norm[match(attrib$JayID, net_met_2016$node.names)] attrib$ClCoef_16 <- net_met_2016$ClCoef[match(attrib$JayID, net_met_2016$node.names)] attrib$de_17 <- net_met_2017$degree[match(attrib$JayID, net_met_2017$node.names)] attrib$benorm_17 <- net_met_2017$be_norm[match(attrib$JayID, net_met_2017$node.names)] attrib$ClCoef_17 <- net_met_2017$ClCoef[match(attrib$JayID, net_met_2017$node.names)] attrib$de_18 <- net_met_2018$degree[match(attrib$JayID, net_met_2018$node.names)] attrib$benorm_18 <- net_met_2018$be_norm[match(attrib$JayID, net_met_2018$node.names)] attrib$ClCoef_18 <- net_met_2018$ClCoef[match(attrib$JayID, net_met_2018$node.names)] ``` #What is the denominator for the simple ratio for each dyad for each year, rule of thumb is 20 ```{r} #Print max # obs, denominator, mean denominator, and % below 20, and draw histogram for (i in 1:length(m.list)){ gbi <- m.list[[i]] N <- ncol(gbi) both.seen <- matrix(apply(gbi,2,function(x) { colSums((x+gbi)>1) }),nrow=N,ncol=N) diag(both.seen) <- 0 either.observed <- matrix(apply(gbi,2,function(x) { colSums((x+gbi)>0) }),nrow=N,ncol=N) SR_denoms <- upperTriangle(either.observed, diag = FALSE) #Find upper triangle of matrix when either member of diagonal was observed print(c("max colsum", max(colSums(gbi)))) print(c("max denominator", max(SR_denoms))) print(c("mean denominator", mean(SR_denoms))) print(c("SE", sd(SR_denoms, na.rm=TRUE)/sqrt(length(SR_denoms[!is.na(SR_denoms)])))) print(c("prop < 20", length(SR_denoms[SR_denoms < 20])/length(SR_denoms))) hist(SR_denoms) abline(v=20, col = "red") } ``` 2016 ave denominator is 18 and 58% of dyads below cut off. 2017 20% dyads below cut off and 2018, 6% #Create dataframes based on exclude criteria ```{r} attrib2016 <- as.data.frame(attrib[attrib$Exclude2016 == "No",]) attrib2016 <- as.data.frame(attrib2016[!is.na(attrib2016$JayID) & !is.na(attrib2016$de_16) & !is.na(attrib2016$SC_2016),]) #write.xlsx(as.data.frame(attrib2016), "attrib_w_vars_2016inclusion.xlsx", sheetName = "Sheet1", col.names = TRUE, row.names = FALSE) attrib2017 <- as.data.frame(attrib[attrib$Exclude2017 == "No",]) attrib2017 <- as.data.frame(attrib2017[!is.na(attrib2017$JayID) & !is.na(attrib2017$de_17) & !is.na(attrib2017$SC_2017),]) #write.xlsx(as.data.frame(attrib2017), "attrib_w_vars_2017inclusion.xlsx", sheetName = "Sheet1", col.names = TRUE, row.names = FALSE) attrib2018 <- as.data.frame(attrib[attrib$Exclude2018 == "No",]) attrib2018 <- as.data.frame(attrib2018[!is.na(attrib2018$JayID) & !is.na(attrib2018$de_18) & !is.na(attrib2018$SC_2018),]) #write.xlsx(as.data.frame(attrib2018), "attrib_w_vars_2018inclusion.xlsx", sheetName = "Sheet1", col.names = TRUE, row.names = FALSE) ``` ##Let's graph some data ```{r} #First, let's manipulate the dataframe to make caluclating summary stats easier #Make a copy of the data and save it as a dataframe head(attrib2016) str(attrib2016) names(attrib2016) attrib2016_2 <- as.data.frame(attrib2016) names(attrib2016_2) attrib2017_2 <- as.data.frame(attrib2017) names(attrib2017_2) attrib2018_2 <- as.data.frame(attrib2018) names(attrib2018_2) #Reorder so ID vars are first, and years are separate attrib2016_2 <- attrib2016_2[, c("JayID", "Sex", "LastCensusObs", "Terr2016", "NearestPt2016", "NumAdjTerrs2016", "SC_2016", "UniquePtsDetected2016", "de_16", "benorm_16" , "ClCoef_16")] attrib2017_2 <- attrib2017_2[, c("JayID", "Sex", "LastCensusObs", "Terr2017", "NearestPt2017", "NumAdjTerrs2017", "SC_2017", "UniquePtsDetected2017", "de_17", "benorm_17" , "ClCoef_17")] attrib2018_2 <- attrib2018_2[, c("JayID", "Sex", "LastCensusObs", "Terr2018", "NearestPt2018", "NumAdjTerrs2018", "SC_2018", "UniquePtsDetected2018", "de_18", "benorm_18" , "ClCoef_18")] #Get each metric on its own row names(attrib2016_2) melted16 <- melt(attrib2016_2, id.vars = c(names(attrib2016_2)[1:7])) head(melted16) names(melted16)[8] <- "Metric" names(melted16)[9] <- "Val" melted17 <- melt(attrib2017_2, id.vars = c(names(attrib2017_2)[1:7])) head(melted17) names(melted17)[8] <- "Metric" names(melted17)[9] <- "Val" melted18 <- melt(attrib2018_2, id.vars = c(names(attrib2018_2)[1:7])) head(melted18) names(melted18)[8] <- "Metric" names(melted18)[9] <- "Val" #Put variable year in its own column melted16$Year <- rep(2016, length(melted16$JayID)) melted17$Year <- rep(2017, length(melted17$JayID)) melted18$Year <- rep(2018, length(melted18$JayID)) #Rename column headers names(melted16) <- c("JayID", "Sex", "LastCensusObs", "Terr", "NearestPt", "NumAdjTerrs", "SC", "Metric", "Val", "Year" ) names(melted17) <- c("JayID", "Sex", "LastCensusObs", "Terr", "NearestPt", "NumAdjTerrs", "SC", "Metric", "Val", "Year" ) names(melted18) <- c("JayID", "Sex", "LastCensusObs", "Terr", "NearestPt", "NumAdjTerrs", "SC", "Metric", "Val", "Year" ) #Reorder them melted16 <- melted16[c(1:3, 10, 4:9)] melted17 <- melted17[c(1:3, 10, 4:9)] melted18 <- melted18[c(1:3, 10, 4:9)] #Take the year out of the variable names #melt.list <- list(melted16, melted17, melted18) melt.list <- list(melted17, melted18) melt.list <- lapply(melt.list, function(x) { x$Metric <- as.character(x$Metric); x x$Metric[x$Metric == "de_16" | x$Metric == "de_17"| x$Metric == "de_18"] <- "degree"; x x$Metric[x$Metric == "benorm_16" | x$Metric == "benorm_17" | x$Metric == "benorm_18"] <- "benorm"; x x$Metric[x$Metric == "ClCoef_16" | x$Metric == "ClCoef_17" | x$Metric == "ClCoef_18"] <- "ClCoef"; x x$Metric[x$Metric == "UniquePtsDetected2016" | x$Metric == "UniquePtsDetected2017" | x$Metric == "UniquePtsDetected2018"] <- "UniquePtsDetected"; x }) #melted16 <- melt.list[[1]] #melted17 <- melt.list[[2]] #melted18 <- melt.list[[3]] melted17 <- melt.list[[1]] melted18 <- melt.list[[2]] #Rbind these to one melted table #melted <- rbind(melted16, melted17, melted18) melted <- rbind(melted17, melted18) #Make sure social class, metric, and year are factors melted$SC <- factor(melted$SC, levels = c("Breeder", "Dominant", "Helper")) melted$Metric <- factor(melted$Metric, levels = c("degree", "benorm", "ClCoef", "UniquePtsDetected")) #melted$Year <- factor(melted$Year, levels = c("2016", "2017", "2018")) melted$Year <- factor(melted$Year, levels = c("2017", "2018")) melted$Val <-as.numeric(melted$Val) ``` #Make a boxplot ```{r, fig.width = 8, fig.height = 6} #Make nice labels fac.labs <- c("degree" = "Degree", "benorm" = "Betweenness, normalized", "ClCoef" = "Clustering Coefficient", "UniquePtsDetected" = "Unique Points Detected") #Group by variable, males and females plotted together ggplot(melted[!is.na(melted$SC) & !is.na(melted$Metric),], aes(x = SC, y = Val, fill = Year)) + geom_boxplot() + scale_x_discrete(labels = c("Breeder", "Dominant", "Helper")) + labs(x = "Social Class", y = "") + facet_wrap(~Metric, scales = "free", nrow = 2, labeller = labeller(Metric = fac.labs)) + scale_fill_grey(start = 0.5, end = 1, name = "Year", labels = c("2016", "2017", "2018")) #Add sex melted$ClassSexInt <- interaction(melted$Sex, melted$SC) melted$Metric2 <- plyr::revalue(melted$Metric, fac.labs) #Make a new column with the nice labels to pass thru to the labeller #png(file = "Fig2.Boxplots_stage.png") Fig2 <- ggplot(melted[!is.na(melted$SC) & !is.na(melted$Metric) & !is.na(melted$ClassSexInt),], aes(x = SC, y = Val, fill = Sex)) + geom_boxplot() + scale_x_discrete(labels = c("Breeder", "Dominant", "Helper")) + labs(x = "", y = "") + facet_grid(Metric2 ~ Year, scales = "free", labeller = labeller(Metric2 = label_wrap_gen(10))) + scale_fill_manual(values = c("gray100","gray50"), name = "Sex", labels = c("Female", "Male")) + theme(aspect.ratio = .25) save_plot("Fig2.Boxplots_stage.png", Fig2, base_aspect_ratio = 3:4) #dev.off("Fig2.Boxplots_stage.png", Fig2, base_aspect_ratio = 3:4) ``` #Take the truncated tables we melted, and use them to loop through everything more efficiently ```{r} names(attrib2016_2) <- c("JayID", "Sex", "LastCensusObs", "Terr", "NearestPt", "NumAdjTerrs", "SC", "up", "de", "benorm", "ClCoef") names(attrib2017_2) <- c("JayID", "Sex", "LastCensusObs", "Terr", "NearestPt", "NumAdjTerrs", "SC", "up", "de", "benorm", "ClCoef") names(attrib2018_2) <- c("JayID", "Sex", "LastCensusObs", "Terr", "NearestPt", "NumAdjTerrs", "SC", "up", "de", "benorm", "ClCoef") data_byyear <- list(attrib2016_2, attrib2017_2, attrib2018_2) meta_byyear <- list(meta16, meta17, meta17) ``` #Sample sizes #included in network ```{r} dim(gbi_2016) dim(gbi_2017) dim(gbi_2018) colnames(gbi_2016) includedSC2016 <- attrib$JayID[match(colnames(gbi_2016), attrib$JayID)] attrib[which(attrib$JayID %in% includedSC2016),] %>% group_by(SC_2016,Sex) %>% summarize(n = length(JayID)) attrib[which(!(includedSC2016 %in% attrib$JayID) & !is.na(attrib$SC_2016)),] %>% group_by(SC_2016,Sex, Exclude2016) %>% summarize(n = length(JayID)) "%notin%" <- Negate("%in%") colnames(gbi_2016)[which(colnames(gbi_2016) %notin% attrib$JayID)] colnames(gbi_2017) includedSC2017 <- attrib$JayID[match(colnames(gbi_2017), attrib$JayID)] attrib[which(attrib$JayID %in% includedSC2017),] %>% group_by(SC_2017,Sex) %>% summarize(n = length(JayID)) attrib[which(!(attrib$JayID %in% includedSC2017)),] %>% group_by(SC_2017,Sex, Exclude2017) %>% summarize(n = length(JayID)) colnames(gbi_2017)[which(colnames(gbi_2017) %notin% attrib$JayID)] colnames(gbi_2018) includedSC2018 <- attrib$JayID[match(colnames(gbi_2018), attrib$JayID)] attrib[which(attrib$JayID %in% includedSC2018),] %>% group_by(SC_2018,Sex) %>% summarize(n = length(JayID)) includedSC2018 <- attrib$JayID[match(colnames(gbi_2018), attrib$JayID)] attrib[which(attrib$JayID %notin% includedSC2018),] %>% group_by(SC_2018,Sex) %>% summarize(n = length(JayID)) colnames(gbi_2018)[which(colnames(gbi_2018) %notin% attrib$JayID)] ``` #analyzed ```{r} for (d in 1:length(data_byyear)) { print(c("Female breeders:", length(data_byyear[[d]]$JayID[data_byyear[[d]]$Sex == "F" & data_byyear[[d]]$SC == "Breeder" & !is.na(data_byyear[[d]]$SC) & !is.na(data_byyear[[d]]$de)]))) print(c("Male breeders:", length(data_byyear[[d]]$JayID[data_byyear[[d]]$Sex == "M" & data_byyear[[d]]$SC == "Breeder" & !is.na(data_byyear[[d]]$SC) & !is.na(data_byyear[[d]]$de)]))) print(c("Female Dom:", length(data_byyear[[d]]$JayID[data_byyear[[d]]$Sex == "F" & data_byyear[[d]]$SC == "Dominant" & !is.na(data_byyear[[d]]$SC) & !is.na(data_byyear[[d]]$de)]))) print(c("Male Dom:", length(data_byyear[[d]]$JayID[data_byyear[[d]]$Sex == "M" & data_byyear[[d]]$SC == "Dominant" & !is.na(data_byyear[[d]]$SC) & !is.na(data_byyear[[d]]$de)]))) print(c("Female Helpers:", length(data_byyear[[d]]$JayID[!is.na(data_byyear[[d]]$SC) & data_byyear[[d]]$Sex == "F" & data_byyear[[d]]$SC =="Helper" & !is.na(data_byyear[[d]]$de)]))) print(c("Male Helpers:", length(data_byyear[[d]]$JayID[data_byyear[[d]]$Sex == "M" & data_byyear[[d]]$SC == "Helper" & !is.na(data_byyear[[d]]$SC) & !is.na(data_byyear[[d]]$de)]))) } ``` #Check for pre-existing diff in number of adj terrs and distance to sampling points ```{r} for (d in 1:length(data_byyear)){ model1b <- lm(NearestPt ~ SC, data = data_byyear[[d]]) print(Anova(model1b)) boxplot(NearestPt ~ SC, data = data_byyear[[d]][data_byyear[[d]]$NearestPt < 100 & data_byyear[[d]]$NumAdjTerrs > 1,]) model1 <- lm(NumAdjTerrs ~ SC, data = data_byyear[[d]]) print(Anova(model1)) boxplot(NumAdjTerrs ~ SC, data = data_byyear[[d]][data_byyear[[d]]$NearestPt < 100 & data_byyear[[d]]$NumAdjTerrs > 1,]) } TukeyHSD(NearestPt ~ SC, data = data_byyear[[3]][data_byyear[[3]]$NearestPt < 100 & data_byyear[[3]]$NumAdjTerrs > 1,]) #which comparisons are driving the significance TukeyHSD(NumAdjTerrs ~ SC, data = data_byyear[[3]][data_byyear[[3]]$NearestPt < 100 & data_byyear[[3]]$NumAdjTerrs > 1,]) #which comparisons are driving the significance ``` Dist to nearest point is diff in 2018, dominants are farther than helpers or breeders, breeders and helpers not signif diff. #Check distributions of residuals, run levene's tests ```{r} for(d in 1:length(data_byyear)) { #Unique Points hist(data_byyear[[d]]$up) up_mod <- aov(up ~ NearestPt + NumAdjTerrs + Sex*SC, data = data_byyear[[d]]) print(shapiro.test(residuals(up_mod))) hist(residuals(up_mod)) print(leveneTest(up ~ SC, data = data_byyear[[d]])) print(up_mod) print(TukeyHSD(up_mod, which = c("Sex", "SC", "Sex:SC"))) hist(sqrt(data_byyear[[d]]$up)) up_mod <- lm(sqrt(up) ~ NearestPt + NumAdjTerrs + SC*Sex, data = data_byyear[[d]]) print(shapiro.test(residuals(up_mod))) hist(residuals(up_mod)) print(leveneTest(sqrt(up) ~ SC, data = data_byyear[[d]])) #Degree hist(data_byyear[[d]]$de) de_mod <- lm(de ~ NearestPt + NumAdjTerrs + SC*Sex, data = data_byyear[[d]]) print(shapiro.test(residuals(de_mod))) hist(residuals(de_mod)) print(leveneTest(de ~ SC, data = data_byyear[[d]])) hist(sqrt(data_byyear[[d]]$de)) de_mod <- lm(sqrt(de) ~ NearestPt + NumAdjTerrs + SC*Sex, data = data_byyear[[d]]) print(shapiro.test(residuals(de_mod))) hist(residuals(de_mod)) print(leveneTest(sqrt(de) ~ SC, data = data_byyear[[d]])) #Btwns hist(data_byyear[[d]]$benorm) be_mod <- lm(benorm ~ NearestPt + NumAdjTerrs + SC*Sex, data = data_byyear[[d]]) print(shapiro.test(residuals(be_mod))) hist(residuals(be_mod)) print(leveneTest(benorm ~ SC, data = data_byyear[[d]])) #Clustering Coeff hist(data_byyear[[d]]$ClCoef) cc_mod <- lm(ClCoef ~ NearestPt + NumAdjTerrs + SC*Sex, data = data_byyear[[d]]) print(shapiro.test(residuals(cc_mod))) hist(residuals(cc_mod)) print(leveneTest(ClCoef ~ SC, data = data_byyear[[d]])) } ``` #Sqrt transformation helps degree and up #Save observed values for comparisons we care about ```{r} magic_for(silent = TRUE) for (d in 1:(length(data_byyear))) { #Degree model_de <- aov(de ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = data_byyear[[d]]) De_LeveneF <- leveneTest(de ~ SC, data = data_byyear[[d]])$'F value'[1] D_B_De_diff_obs <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_De_diff_obs <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_De_diff_obs <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] MH_FH_De_diff_obs <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] D_B_De_CI_lower <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$SC[1,2] H_D_De_CI_lower <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$SC[3,2] H_B_De_CI_lower <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$SC[2,2] MH_FH_De_CI_lower <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,2] D_B_De_CI_upper <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$SC[1,3] H_D_De_CI_upper <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$SC[3,3] H_B_De_CI_upper <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$SC[2,3] MH_FH_De_CI_upper <- TukeyHSD(model_de, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,3] D_B_De_plusmin <- D_B_De_diff_obs - D_B_De_CI_lower H_D_De_plusmin <- H_D_De_diff_obs - H_D_De_CI_lower H_B_De_plusmin <- H_B_De_diff_obs - H_B_De_CI_lower MH_FH_De_plusmin <- MH_FH_De_diff_obs - MH_FH_De_CI_lower #Betweenness model_be <- aov(benorm ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = data_byyear[[d]]) Be_LeveneF <- leveneTest(benorm ~ SC, data = data_byyear[[d]])$'F value'[1] D_B_Be_diff_obs <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_Be_diff_obs <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_Be_diff_obs <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] DomM_DomF_Be_diff_obs <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[10,1] HlprM_HlprF_Be_diff_obs <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] D_B_Be_CI_lower <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$SC[1,2] H_D_Be_CI_lower <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$SC[3,2] H_B_Be_CI_lower <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$SC[2,2] #DomM_DomF_Be_CI_lower <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[10,2] HlprM_HlprF_Be_CI_lower <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,2] D_B_Be_CI_upper <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$SC[1,3] H_D_Be_CI_upper <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$SC[3,3] H_B_Be_CI_upper <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$SC[2,3] #DomM_DomF_Be_CI_upper <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[10,3] HlprM_HlprF_Be_CI_upper <- TukeyHSD(model_be, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,3] D_B_Be_plusmin <- D_B_Be_diff_obs - D_B_Be_CI_lower H_D_Be_plusmin <- H_D_Be_diff_obs - H_D_Be_CI_lower H_B_Be_plusmin <- H_B_Be_diff_obs - H_B_Be_CI_lower HlprM_HlprF_Be_plusmin <- HlprM_HlprF_Be_diff_obs - HlprM_HlprF_Be_CI_lower #Clustering Coefficient modelcc <- aov(ClCoef ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = data_byyear[[d]]) CC_LeveneF <- leveneTest(ClCoef ~ SC, data = data_byyear[[d]])$'F value'[1] D_B_CC_diff_obs <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_CC_diff_obs <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_CC_diff_obs <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] #BrdrM_BrdrF_CC_diff_obs <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[1,1] HlprM_HlprF_CC_diff_obs <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] D_B_CC_CI_lower <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$SC[1,2] H_D_CC_CI_lower <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$SC[3,2] H_B_CC_CI_lower <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$SC[2,2] #BrdrM_BrdrF_CC_CI_lower <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[1,2] HlprM_HlprF_CC_CI_lower <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,2] D_B_CC_CI_upper <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$SC[1,3] H_D_CC_CI_upper <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$SC[3,3] H_B_CC_CI_upper <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$SC[2,3] #BrdrM_BrdrF_CC_CI_upper <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[1,3] HlprM_HlprF_CC_CI_upper <- TukeyHSD(modelcc, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,3] D_B_CC_plusmin <- D_B_CC_diff_obs - D_B_CC_CI_lower H_D_CC_plusmin <- H_D_CC_diff_obs - H_D_CC_CI_lower H_B_CC_plusmin <- H_B_CC_diff_obs - H_B_CC_CI_lower #BrdrM_BrdrF_CC_diff_obs - BrdrM_BrdrF_CC_CI_lower HlprM_HlprF_CC_plusmin <- HlprM_HlprF_CC_diff_obs - HlprM_HlprF_CC_CI_lower #Levene's Tests De_LeveneF <- leveneTest(de ~ SC, data = data_byyear[[d]])$'F value'[1] Be_LeveneF <- leveneTest(benorm ~ SC, data = data_byyear[[d]])$'F value'[1] CC_LeveneF <- leveneTest(ClCoef ~ SC, data = data_byyear[[d]])$'F value'[1] UP_LeveneF <- leveneTest(up ~ SC, data = data_byyear[[d]])$'F value'[1] #Save the observed values put(D_B_De_diff_obs, D_B_De_plusmin, H_D_De_diff_obs, H_D_De_plusmin, H_B_De_diff_obs, H_B_De_plusmin, MH_FH_De_diff_obs, MH_FH_De_plusmin, D_B_Be_diff_obs, D_B_Be_plusmin, H_D_Be_diff_obs,H_D_Be_plusmin, H_B_Be_diff_obs, H_B_Be_plusmin, HlprM_HlprF_Be_diff_obs, HlprM_HlprF_Be_plusmin, D_B_CC_diff_obs, D_B_CC_plusmin, H_D_CC_diff_obs, H_D_CC_plusmin, H_B_CC_diff_obs, H_B_CC_plusmin, HlprM_HlprF_CC_diff_obs, HlprM_HlprF_CC_plusmin, De_LeveneF, Be_LeveneF, CC_LeveneF, UP_LeveneF) } obsVals <- magic_result_as_dataframe() write.xlsx(obsVals, "20191120_ObsValues.xlsx", row.names = TRUE) obsdiffs <- obsVals[,c(seq(2, 24, 2), 26, 27, 28, 29)] ``` #2018 results very different than other 2 years, check and exclude dominants since so few ```{r} #Levene's Tests leveneTest(de ~ SC, data = attrib2018_2[attrib2018_2$SC == "Breeder" | attrib2018_2$SC == "Helper",]) leveneTest(benorm ~ SC, data = attrib2018_2[attrib2018_2$SC == "Breeder" | attrib2018_2$SC == "Helper",]) leveneTest(ClCoef ~ SC, data = attrib2018_2[attrib2018_2$SC == "Breeder" | attrib2018_2$SC == "Helper",]) leveneTest(up ~ SC, data = attrib2018_2[attrib2018_2$SC == "Breeder" | attrib2018_2$SC == "Helper",]) ``` #Nope, F values pretty similar #2016 ~Skip, network not robust Test for significance using 1000 data stream permutations. Takes ~ 10 minutes to run, great time to take a break! ```{r} #Get p networks from data stream permutation p <- 1000 #Define number of permutations rand_nets <- network_permutation(gbi_2016, data_format = "GBI", permutations = p, returns = 1, association_index = "SRI", locations = meta16$TerrLoc, within_location = TRUE) #control for location by specifiying survey point for each aggragation and randomizing within #Create an empty array to hold social network metrics from each of p permutations rand_netmets <- as.data.frame(net_met_2016) rand_netmets$SC <- attrib$SC_2016[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$Sex <- attrib$Sex[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$NearestPt <- attrib$NearestPt2016[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$NumAdjTerrs <- attrib$NumAdjTerrs2016[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$degree <- rep(NA, length(rand_netmets$node.names)) rand_netmets$be_norm <- rep(NA, length(rand_netmets$node.names)) rand_netmets$ClCoef <- rep(NA, length(rand_netmets$node.names)) rand_data <- replicate(n = p, rand_netmets, simplify = FALSE) #Call magic_for to save the outputs as a dataframe magic_for(silent = TRUE) for (i in c(1:p)) { #Graph the random network g = graph_from_adjacency_matrix(rand_nets[i,,], "undirected", weighted = TRUE) #Calculate degree, betweenness and clustering coefficient de <- igraph::degree(g) # degree centrality, number of adjacent edges, also called binary degree, number of associates btwn_norm <- igraph::betweenness(g, normalized = TRUE) #Vertex betweenness centrality, how often you are on the shortest path btwn 2 others, normalized for number of vertices in network cc <- igraph::transitivity(g, "local") cc[cc = 0] <- NA rand_data[[i]]$degree <- de rand_data[[i]]$be_norm <- btwn_norm rand_data[[i]]$ClCoef <- cc ##Define the models randmodelde <- aov(degree ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) randmodelbe <- aov(be_norm ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) randmodelcc <- aov(ClCoef ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) ##Homogeneity of Variances De_LeveneF_rand <- leveneTest(degree ~ SC, data = rand_data[[i]])$'F value'[1] Be_LeveneF_rand <- leveneTest(be_norm ~ SC, data = rand_data[[i]])$'F value'[1] CC_LeveneF_rand <- leveneTest(ClCoef ~ SC, data = rand_data[[i]])$'F value'[1] TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC")) ##Degree D_B_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] MH_FH_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] ##Betweenness D_B_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] DomM_DomF_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[10,1] HlprM_HlprF_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] ##Clustering Coefficient D_B_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] HlprM_HlprF_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] put(D_B_De_diff_rand, H_D_De_diff_rand, H_B_De_diff_rand, MH_FH_De_diff_rand, D_B_Be_diff_rand, H_D_Be_diff_rand, H_B_Be_diff_rand, HlprM_HlprF_Be_diff_rand, D_B_CC_diff_rand, H_D_CC_diff_rand, H_B_CC_diff_rand, HlprM_HlprF_CC_diff_rand, De_LeveneF_rand, Be_LeveneF_rand, CC_LeveneF_rand) } randresults16 <- magic_result_as_dataframe() beep(sound = 8) ``` ##Calculate the p-values ```{r} #Combine the observed values into a vector, making sure is order is same as randomized data outputs obsdiffs <- obsVals[,c(1, seq(2, 24, 2), 26, 27, 28)] obsvals <- obsdiffs[1,] #the first row contains data for 2016 colnames(obsvals) <- colnames(randresults16[1:16]) #check before you run this! randresults16 <- rbind.data.frame(randresults16[,1:ncol(randresults16)], obsvals) #paste the observed values at the end of the df holding the random values pvals<- sapply(randresults16[1:1001,], function(x) length(which(x[1:1000] > x[1001]))) / p #count # times rand > = obs, divide by number of permutations, 1 tailed test, for CC, subtract from one (do in excel) because predicted diff is in opposite direction (except for male and female helper comparison) pvals2<- sapply(randresults16[1:1001,], function(x) length(which(x[1:1000] < x[1001]))) / p randresults16<- rbind.data.frame(randresults16, pvals, pvals2) means <- sapply(randresults16[1:1000,], mean) CI95 <- sapply(randresults16[1:1000,], function(x) {1.96*sd(x)/sqrt(1000)}) randresults16<- rbind.data.frame(randresults16, means, CI95) rownames(randresults16) <- c(1:p, "obs", "pval", "pval2", "rand_means", "rand_CI95") #write these to excel write.xlsx(randresults16, "20190920_RandResults16_All_withinlocs.xlsx", row.names = TRUE) beep(sound = "coin") ``` Graph the distribution of the random values with the obs value, then save to pdf ```{r} for (j in 1:ncol(randresults16)) { print( ggplot(NULL, aes(x = randresults16[c(1:p),colnames(randresults16)[j]])) + #restrict to rows with random values geom_histogram(fill = "white", colour = "black") + geom_vline(xintercept = randresults16[c(p+1),colnames(randresults16)[j]], col = "red") + #draw a red line at the obs value annotate("text", x = randresults16[c(1001),colnames(randresults16)[j]], y = 100, label = paste(" p = ", randresults16[c(p + 2),colnames(randresults16)[j]]), hjust = 0) + #print the p-value next to the red line xlim(min(randresults16[c(1:p + 1),colnames(randresults16)[j]]), max(randresults16[c(1:1001),colnames(randresults16)[j]])) + #set xlims xlab(colnames(randresults16)[j]) + ylab("count") ) } #Save graphs to pdf pdf(file = "20190920_distributions16_All_withinlocs.pdf", paper = "USr") for (j in 1:ncol(randresults16)) { print(ggplot(NULL, aes(x = randresults16[c(1:p),colnames(randresults16)[j]])) + #restrict to rows with random values geom_histogram(fill = "white", colour = "black") + geom_vline(xintercept = randresults16[c(p+1),colnames(randresults16)[j]], col = "red") + #draw a red line at the obs value annotate("text", x = randresults16[c(1001),colnames(randresults16)[j]], y = 100, label = paste(" p = ", randresults16[c(p + 2),colnames(randresults16)[j]]), hjust = 0) + #print the p-value next to the red line xlim(min(randresults16[c(1:p + 1),colnames(randresults16)[j]]), max(randresults16[c(1:1001),colnames(randresults16)[j]])) + #set xlims xlab(colnames(randresults16)[j]) + ylab("count")) } dev.off() ``` #2017 Test for significance using 1000 data stream permutations. Takes ~ 10 minutes to run, great time to take a break! ```{r} #Get p networks from data stream permutation p <- 1000 #Define number of permutations rand_nets <- network_permutation(gbi_2017, data_format = "GBI", permutations = p, returns = 1, association_index = "SRI", locations = meta17$TerrLoc, within_location = TRUE) #control for location by specifiying survey point for each aggragation and randomizing within #Create an empty array to hold social network metrics from each of p permutations rand_netmets <- as.data.frame(net_met_2017) rand_netmets$SC <- attrib$SC_2017[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$Sex <- attrib$Sex[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$NearestPt <- attrib$NearestPt2017[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$NumAdjTerrs <- attrib$NumAdjTerrs2017[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$degree <- rep(NA, length(rand_netmets$node.names)) rand_netmets$be_norm <- rep(NA, length(rand_netmets$node.names)) rand_netmets$ClCoef <- rep(NA, length(rand_netmets$node.names)) rand_data <- replicate(n = p, rand_netmets, simplify = FALSE) #Call magic_for to save the outputs as a dataframe magic_for(silent = TRUE) for (i in c(1:p)) { #Graph the random network g = graph_from_adjacency_matrix(rand_nets[i,,], "undirected", weighted = TRUE) #Calculate degree, betweenness and clustering coefficient de <- igraph::degree(g) # degree centrality, number of adjacent edges, also called binary degree, number of associates btwn_norm <- igraph::betweenness(g, weights = 1/E(g)$weight, normalized = TRUE) #Vertex betweenness centrality, how often you are on the shortest path btwn 2 others, normalized for number of vertices in network cc <- igraph::transitivity(g, "local") cc[cc = 0] <- NA rand_data[[i]]$degree <- de rand_data[[i]]$be_norm <- btwn_norm rand_data[[i]]$ClCoef <- cc ##Define the models randmodelde <- aov(degree ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) randmodelbe <- aov(be_norm ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) randmodelcc <- aov(ClCoef ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) ##Homogeneity of Variances De_LeveneF_rand <- leveneTest(degree ~ SC, data = rand_data[[i]])$'F value'[1] Be_LeveneF_rand <- leveneTest(be_norm ~ SC, data = rand_data[[i]])$'F value'[1] CC_LeveneF_rand <- leveneTest(ClCoef ~ SC, data = rand_data[[i]])$'F value'[1] TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC")) ##Degree D_B_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] MH_FH_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] ##Betweenness D_B_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] DomM_DomF_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[10,1] HlprM_HlprF_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] ##Clustering Coefficient D_B_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] HlprM_HlprF_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] put(D_B_De_diff_rand, H_D_De_diff_rand, H_B_De_diff_rand, MH_FH_De_diff_rand, D_B_Be_diff_rand, H_D_Be_diff_rand, H_B_Be_diff_rand, HlprM_HlprF_Be_diff_rand, D_B_CC_diff_rand, H_D_CC_diff_rand, H_B_CC_diff_rand, HlprM_HlprF_CC_diff_rand, De_LeveneF_rand, Be_LeveneF_rand, CC_LeveneF_rand) } randresults17 <- magic_result_as_dataframe() beep(sound = 8) ``` ##Calculate the p-values ```{r} #Combine the observed values into a vector, making sure is order is same as randomized data outputs obsdiffs <- obsVals[,c(1, seq(2, 24, 2), 26, 27, 28)] obsvals <- obsdiffs[2,] #the second row contains data for 2017 colnames(obsvals) <- colnames(randresults17[1:16]) #check before you run this! randresults17 <- rbind.data.frame(randresults17[,1:ncol(randresults17)], obsvals) #paste the observed values at the end of the df holding the random values pvals<- sapply(randresults17[1:1001,], function(x) length(which(x[1:1000] > x[1001]))) / p #count # times rand > = obs, divide by number of permutations, 1 tailed test, for CC, subtract from one (do in excel) because predicted diff is in opposite direction (except for male and female helper comparison) pvals2 <- sapply(randresults17[1:1001,], function(x) length(which(x[1:1000] < x[1001]))) / p randresults17<- rbind.data.frame(randresults17, pvals, pvals2) means <- sapply(randresults17[1:1000,], mean) CI95 <- sapply(randresults17[1:1000,], function(x) {1.96*sd(x)/sqrt(1000)}) randresults17 <- rbind.data.frame(randresults17, means, CI95) rownames(randresults17) <- c(1:p, "obs", "pval", "pval2", "rand_means", "rand_CI95") #write these to excel write.xlsx(randresults17, "20191120_RandResults17_All_withinlocs.xlsx", row.names = TRUE) beep(sound = "coin") ``` Graph the distribution of the random values with the obs value, then save to pdf ```{r} for (j in 1:ncol(randresults17)) { print( ggplot(NULL, aes(x = randresults17[c(1:p),colnames(randresults17)[j]])) + #restrict to rows with random values geom_histogram(fill = "white", colour = "black") + geom_vline(xintercept = randresults17[c(p+1),colnames(randresults17)[j]], col = "red") + #draw a red line at the obs value annotate("text", x = randresults17[c(1001),colnames(randresults17)[j]], y = 100, label = paste(" p = ", randresults17[c(p + 2),colnames(randresults17)[j]]), hjust = 0) + #print the p-value next to the red line xlim(min(randresults17[c(1:p + 1),colnames(randresults17)[j]]), max(randresults17[c(1:1001),colnames(randresults17)[j]])) + #set xlims xlab(colnames(randresults17)[j]) + ylab("count") ) } #Save graphs to pdf pdf(file = "20191120_distributions17_All_withinlocs.pdf", paper = "USr") for (j in 1:ncol(randresults17)) { print( ggplot(NULL, aes(x = randresults17[c(1:p),colnames(randresults17)[j]])) + #restrict to rows with random values geom_histogram(fill = "white", colour = "black") + geom_vline(xintercept = randresults17[c(p+1),colnames(randresults17)[j]], col = "red") + #draw a red line at the obs value annotate("text", x = randresults17[c(1001),colnames(randresults17)[j]], y = 100, label = paste(" p = ", randresults17[c(p + 2),colnames(randresults17)[j]]), hjust = 0) + #print the p-value next to the red line xlim(min(randresults17[c(1:p + 1),colnames(randresults17)[j]]), max(randresults17[c(1:1001),colnames(randresults17)[j]])) + #set xlims xlab(colnames(randresults17)[j]) + ylab("count") ) } dev.off() ``` #2018 Test for significance using 1000 data stream permutations. Takes ~ 10 minutes to run, great time to take a break! ```{r} #Get p networks from data stream permutation p <- 1000 #Define number of permutations rand_nets <- network_permutation(gbi_2018, data_format = "GBI", permutations = p, returns = 1, association_index = "SRI", locations = meta18$TerrLoc, within_location = TRUE) #control for location by specifiying survey point for each aggragation and randomizing within #Create an empty array to hold social network metrics from each of p permutations rand_netmets <- as.data.frame(net_met_2018) rand_netmets$SC <- attrib$SC_2018[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$Sex <- attrib$Sex[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$NearestPt <- attrib$NearestPt2018[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$NumAdjTerrs <- attrib$NumAdjTerrs2018[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$degree <- rep(NA, length(rand_netmets$node.names)) rand_netmets$be_norm <- rep(NA, length(rand_netmets$node.names)) rand_netmets$ClCoef <- rep(NA, length(rand_netmets$node.names)) rand_data <- replicate(n = p, rand_netmets, simplify = FALSE) #Call magic_for to save the outputs as a dataframe magic_for(silent = TRUE) for (i in c(1:p)) { #Graph the random network g = graph_from_adjacency_matrix(rand_nets[i,,], "undirected", weighted = TRUE) #Calculate degree, betweenness and clustering coefficient de <- igraph::degree(g) # degree centrality, number of adjacent edges, also called binary degree, number of associates btwn_norm <- igraph::betweenness(g, weights = 1/E(g)$weight, normalized = TRUE) #Vertex betweenness centrality, how often you are on the shortest path btwn 2 others, normalized for number of vertices in network cc <- igraph::transitivity(g, "local") cc[cc = 0] <- NA rand_data[[i]]$degree <- de rand_data[[i]]$be_norm <- btwn_norm rand_data[[i]]$ClCoef <- cc ##Define the models randmodelde <- aov(degree ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) randmodelbe <- aov(be_norm ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) randmodelcc <- aov(ClCoef ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) ##Homogeneity of Variances De_LeveneF_rand <- leveneTest(degree ~ SC, data = rand_data[[i]])$'F value'[1] Be_LeveneF_rand <- leveneTest(be_norm ~ SC, data = rand_data[[i]])$'F value'[1] CC_LeveneF_rand <- leveneTest(ClCoef ~ SC, data = rand_data[[i]])$'F value'[1] TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC")) ##Degree D_B_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] MH_FH_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] ##Betweenness D_B_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] DomM_DomF_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[10,1] HlprM_HlprF_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] ##Clustering Coefficient D_B_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] HlprM_HlprF_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] put(D_B_De_diff_rand, H_D_De_diff_rand, H_B_De_diff_rand, MH_FH_De_diff_rand, D_B_Be_diff_rand, H_D_Be_diff_rand, H_B_Be_diff_rand, HlprM_HlprF_Be_diff_rand, D_B_CC_diff_rand, H_D_CC_diff_rand, H_B_CC_diff_rand, HlprM_HlprF_CC_diff_rand, De_LeveneF_rand, Be_LeveneF_rand, CC_LeveneF_rand) } randresults18 <- magic_result_as_dataframe() beep(sound = 8) ``` ##Calculate the p-values ```{r} #Combine the observed values into a vector, making sure is order is same as randomized data outputs head(randresults18) obsdiffs <- obsVals[,c(1, seq(2, 24, 2), 26, 27, 28)] obsvals <- obsdiffs[3,] #the third row contains data for 2018 colnames(obsvals) <- colnames(randresults18[1:16]) #check before you run this! randresults18 <- rbind.data.frame(randresults18[,1:ncol(randresults18)], obsvals) #paste the observed values at the end of the df holding the random values pvals <- sapply(randresults18[1:1001,], function(x) length(which(x[1:1000] > x[1001]))) / p #count # times rand > = obs, divide by number of permutations, 1 tailed test, for CC, subtract from one (do in excel) because predicted diff is in opposite direction (except for male and female helper comparison) pvals2 <- sapply(randresults18[1:1001,], function(x) length(which(x[1:1000] < x[1001]))) / p randresults18 <- rbind.data.frame(randresults18, pvals, pvals2) means <- sapply(randresults18[1:1000,], mean) CI95 <- sapply(randresults18[1:1000,], function(x) {1.96*sd(x)/sqrt(1000)}) randresults18 <- rbind.data.frame(randresults18, means, CI95) rownames(randresults18) <- c(1:p, "obs", "pval", "pval2", "rand_means", "rand_CI95") #write these to excel write.xlsx(randresults18, "20191120_RandResults18_All_withinlocs.xlsx", row.names = TRUE) beep(sound = "coin") ``` Graph the distribution of the random values with the obs value, then save to pdf ```{r} for (j in 1:ncol(randresults18)) { print( ggplot(NULL, aes(x = randresults18[c(1:p),colnames(randresults18)[j]])) + #restrict to rows with random values geom_histogram(fill = "white", colour = "black") + geom_vline(xintercept = randresults18[c(p+1),colnames(randresults18)[j]], col = "red") + #draw a red line at the obs value annotate("text", x = randresults18[c(1001),colnames(randresults18)[j]], y = 100, label = paste(" p = ", randresults18[c(p + 2),colnames(randresults18)[j]]), hjust = 0) + #print the p-value next to the red line xlim(min(randresults18[c(1:p + 1),colnames(randresults18)[j]]), max(randresults18[c(1:1001),colnames(randresults18)[j]])) + #set xlims xlab(colnames(randresults18)[j]) + ylab("count") ) } #Save graphs to pdf pdf(file = "20191120_distributions18_All_withinlocs.pdf", paper = "USr") for (j in 1:ncol(randresults18)) { print( ggplot(NULL, aes(x = randresults18[c(1:p),colnames(randresults18)[j]])) + #restrict to rows with random values geom_histogram(fill = "white", colour = "black") + geom_vline(xintercept = randresults18[c(p+1),colnames(randresults18)[j]], col = "red") + #draw a red line at the obs value annotate("text", x = randresults18[c(1001),colnames(randresults18)[j]], y = 100, label = paste(" p = ", randresults18[c(p + 2),colnames(randresults18)[j]]), hjust = 0) + #print the p-value next to the red line xlim(min(randresults18[c(1:p + 1),colnames(randresults18)[j]]), max(randresults18[c(1:1001),colnames(randresults18)[j]])) + #set xlims xlab(colnames(randresults18)[j]) + ylab("count") ) } dev.off() ``` #2018 ~ Breeders who didn't breed reclassified as dominants Test for significance using 1000 data stream permutations. Takes ~ 10 minutes to run, great time to take a break! ```{r} #Save observed values for comparisons #Degree model_de <- aov(de_18 ~ NearestPt2018 + NumAdjTerrs2018 + Sex + Reclass_Contents2018 + Sex*Reclass_Contents2018, data = attrib2018[attrib2018$Exclude2018 == "No",]) De_LeveneF <- leveneTest(de_18 ~ Reclass_Contents2018, data = attrib2018[attrib2018$Exclude2018 == "No",])$'F value'[1] D_B_De_diff_obs <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[1,1] H_D_De_diff_obs <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[3,1] H_B_De_diff_obs <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[2,1] MH_FH_De_diff_obs <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[15,1] D_B_De_CI_lower <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[1,2] H_D_De_CI_lower <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[3,2] H_B_De_CI_lower <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[2,2] MH_FH_De_CI_lower <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[15,2] D_B_De_CI_upper <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[1,3] H_D_De_CI_upper <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[3,3] H_B_De_CI_upper <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[2,3] MH_FH_De_CI_upper <- TukeyHSD(model_de, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[15,3] D_B_De_plusmin <- D_B_De_diff_obs - D_B_De_CI_lower H_D_De_plusmin <- H_D_De_diff_obs - H_D_De_CI_lower H_B_De_plusmin <- H_B_De_diff_obs - H_B_De_CI_lower MH_FH_De_plusmin <- MH_FH_De_diff_obs - MH_FH_De_CI_lower #Betweenness model_be <- aov(benorm_18 ~ NearestPt2018 + NumAdjTerrs2018 + Sex + Reclass_Contents2018 + Sex*Reclass_Contents2018, data = attrib2018[attrib2018$Exclude2018 == "No",]) Be_LeveneF <- leveneTest(benorm_18 ~ Reclass_Contents2018, data = attrib2018[attrib2018$Exclude2018 == "No",])$'F value'[1] D_B_Be_diff_obs <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[1,1] H_D_Be_diff_obs <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[3,1] H_B_Be_diff_obs <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[2,1] DomM_DomF_Be_diff_obs <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[10,1] HlprM_HlprF_Be_diff_obs <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[15,1] D_B_Be_CI_lower <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[1,2] H_D_Be_CI_lower <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[3,2] H_B_Be_CI_lower <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[2,2] #DomM_DomF_Be_CI_lower <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[10,2] HlprM_HlprF_Be_CI_lower <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[15,2] D_B_Be_CI_upper <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[1,3] H_D_Be_CI_upper <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[3,3] H_B_Be_CI_upper <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[2,3] #DomM_DomF_Be_CI_upper <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[10,3] HlprM_HlprF_Be_CI_upper <- TukeyHSD(model_be, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[15,3] D_B_Be_plusmin <- D_B_Be_diff_obs - D_B_Be_CI_lower H_D_Be_plusmin <- H_D_Be_diff_obs - H_D_Be_CI_lower H_B_Be_plusmin <- H_B_Be_diff_obs - H_B_Be_CI_lower HlprM_HlprF_Be_plusmin <- HlprM_HlprF_Be_diff_obs - HlprM_HlprF_Be_CI_lower #Clustering Coefficient modelcc <- aov(ClCoef_18 ~ NearestPt2018 + NumAdjTerrs2018 + Sex + Reclass_Contents2018 + Sex*Reclass_Contents2018, data = attrib2018[attrib2018$Exclude2018 == "No",]) CC_LeveneF <- leveneTest(ClCoef_18 ~ Reclass_Contents2018, data = attrib2018[attrib2018$Exclude2018 == "No",])$'F value'[1] D_B_CC_diff_obs <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[1,1] H_D_CC_diff_obs <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[3,1] H_B_CC_diff_obs <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[2,1] #BrdrM_BrdrF_CC_diff_obs <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[1,1] HlprM_HlprF_CC_diff_obs <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[15,1] D_B_CC_CI_lower <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[1,2] H_D_CC_CI_lower <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[3,2] H_B_CC_CI_lower <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[2,2] #BrdrM_BrdrF_CC_CI_lower <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[1,2] HlprM_HlprF_CC_CI_lower <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[15,2] D_B_CC_CI_upper <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[1,3] H_D_CC_CI_upper <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[3,3] H_B_CC_CI_upper <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$Reclass_Contents2018[2,3] #BrdrM_BrdrF_CC_CI_upper <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[1,3] HlprM_HlprF_CC_CI_upper <- TukeyHSD(modelcc, which = c("Sex", "Reclass_Contents2018", "Sex:Reclass_Contents2018"))$`Sex:Reclass_Contents2018`[15,3] D_B_CC_plusmin <- D_B_CC_diff_obs - D_B_CC_CI_lower H_D_CC_plusmin <- H_D_CC_diff_obs - H_D_CC_CI_lower H_B_CC_plusmin <- H_B_CC_diff_obs - H_B_CC_CI_lower #BrdrM_BrdrF_CC_diff_obs - BrdrM_BrdrF_CC_CI_lower HlprM_HlprF_CC_plusmin <- HlprM_HlprF_CC_diff_obs - HlprM_HlprF_CC_CI_lower #Levene's Tests De_LeveneF <- leveneTest(de_18 ~ Reclass_Contents2018, data = attrib2018[attrib2018$Exclude2018 == "No",])$'F value'[1] Be_LeveneF <- leveneTest(benorm_18 ~ Reclass_Contents2018, data = attrib2018[attrib2018$Exclude2018 == "No",])$'F value'[1] CC_LeveneF <- leveneTest(ClCoef_18 ~ Reclass_Contents2018, data = attrib2018[attrib2018$Exclude2018 == "No",])$'F value'[1] UP_LeveneF <- leveneTest(UniquePtsDetected2018 ~ Reclass_Contents2018, data = attrib2018[attrib2018$Exclude2018 == "No",])$'F value'[1] #Save the observed values obsVals <- c(D_B_De_diff_obs, D_B_De_plusmin, H_D_De_diff_obs, H_D_De_plusmin, H_B_De_diff_obs, H_B_De_plusmin, MH_FH_De_diff_obs, MH_FH_De_plusmin, D_B_Be_diff_obs, D_B_Be_plusmin, H_D_Be_diff_obs,H_D_Be_plusmin, H_B_Be_diff_obs, H_B_Be_plusmin, HlprM_HlprF_Be_diff_obs, HlprM_HlprF_Be_plusmin, D_B_CC_diff_obs, D_B_CC_plusmin, H_D_CC_diff_obs, H_D_CC_plusmin, H_B_CC_diff_obs, H_B_CC_plusmin, HlprM_HlprF_CC_diff_obs, HlprM_HlprF_CC_plusmin, De_LeveneF, Be_LeveneF, CC_LeveneF, UP_LeveneF) headernames <- c("D_B_De_diff_obs", "D_B_De_plusmin", "H_D_De_diff_obs", "H_D_De_plusmin", "H_B_De_diff_obs", "H_B_De_plusmin", "MH_FH_De_diff_obs", "MH_FH_De_plusmin", "D_B_Be_diff_obs", "D_B_Be_plusmin", "H_D_Be_diff_obs", "H_D_Be_plusmin", "H_B_Be_diff_obs", "H_B_Be_plusmin", "HlprM_HlprF_Be_diff_obs", "HlprM_HlprF_Be_plusmin", "D_B_CC_diff_obs", "D_B_CC_plusmin", "H_D_CC_diff_obs", "H_D_CC_plusmin", "H_B_CC_diff_obs", "H_B_CC_plusmin", "HlprM_HlprF_CC_diff_obs", "HlprM_HlprF_CC_plusmin", "De_LeveneF", "Be_LeveneF", "CC_LeveneF", "UP_LeveneF") obsVals_df <- rbind(obsVals) as.data.frame(obsVals_df) colnames(obsVals_df) <- headernames obsVals_df write.xlsx(obsVals_df, "20190904_ObsValues_SCbyContents.xlsx", row.names = FALSE) obsdiffs <- obsVals_df[c(seq(1, 24, 2), 25, 26, 27)] #Get p networks from data stream permutation p <- 1000 #Define number of permutations rand_nets <- network_permutation(gbi_2018, data_format = "GBI", permutations = p, returns = 1, association_index = "SRI", locations = meta18$TerrLoc, within_location = TRUE) #control for location by specifiying survey point for each aggragation and randomizing within #Create an empty array to hold social network metrics from each of p permutations rand_netmets <- as.data.frame(net_met_2018) rand_netmets$SC <- attrib$Reclass_Contents2018[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$Sex <- attrib$Sex[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$NearestPt <- attrib$NearestPt2018[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$NumAdjTerrs <- attrib$NumAdjTerrs2018[match(rand_netmets$node.names, attrib$JayID)] rand_netmets$degree <- rep(NA, length(rand_netmets$node.names)) rand_netmets$be_norm <- rep(NA, length(rand_netmets$node.names)) rand_netmets$ClCoef <- rep(NA, length(rand_netmets$node.names)) rand_data <- replicate(n = p, rand_netmets, simplify = FALSE) #Call magic_for to save the outputs as a dataframe magic_for(silent = TRUE) for (i in c(1:p)) { #Graph the random network g = graph_from_adjacency_matrix(rand_nets[i,,], "undirected", weighted = TRUE) #Calculate degree, betweenness and clustering coefficient de <- igraph::degree(g) # degree centrality, number of adjacent edges, also called binary degree, number of associates btwn_norm <- igraph::betweenness(g, weights = 1/E(g)$weight, normalized = TRUE) #Vertex betweenness centrality, how often you are on the shortest path btwn 2 others, normalized for number of vertices in network cc <- igraph::transitivity(g, "local") cc[cc = 0] <- NA rand_data[[i]]$degree <- de rand_data[[i]]$be_norm <- btwn_norm rand_data[[i]]$ClCoef <- cc ##Define the models randmodelde <- aov(degree ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) randmodelbe <- aov(be_norm ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) randmodelcc <- aov(ClCoef ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = rand_data[[i]]) ##Homogeneity of Variances De_LeveneF_rand <- leveneTest(degree ~ SC, data = rand_data[[i]])$'F value'[1] Be_LeveneF_rand <- leveneTest(be_norm ~ SC, data = rand_data[[i]])$'F value'[1] CC_LeveneF_rand <- leveneTest(ClCoef ~ SC, data = rand_data[[i]])$'F value'[1] TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC")) ##Degree D_B_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] MH_FH_De_diff_rand <- TukeyHSD(randmodelde, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] ##Betweenness D_B_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] DomM_DomF_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[10,1] HlprM_HlprF_Be_diff_rand <- TukeyHSD(randmodelbe, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] ##Clustering Coefficient D_B_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] HlprM_HlprF_CC_diff_rand <- TukeyHSD(randmodelcc, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] put(D_B_De_diff_rand, H_D_De_diff_rand, H_B_De_diff_rand, MH_FH_De_diff_rand, D_B_Be_diff_rand, H_D_Be_diff_rand, H_B_Be_diff_rand, HlprM_HlprF_Be_diff_rand, D_B_CC_diff_rand, H_D_CC_diff_rand, H_B_CC_diff_rand, HlprM_HlprF_CC_diff_rand, De_LeveneF_rand, Be_LeveneF_rand, CC_LeveneF_rand) } randresults18_SCbycontents <- magic_result_as_dataframe() beep(sound = 8) ``` ##Calculate the p-values ```{r} #Combine the observed values into a vector, making sure is order is same as randomized data outputs head(randresults18_SCbycontents) obsdiffs colnames(obsvals) <- colnames(randresults18_SCbycontents[2:16]) #check before you run this! randresults18_SCbycontents <- rbind.data.frame(randresults18_SCbycontents[,2:ncol(randresults18_SCbycontents)], obsdiffs[1:15]) #paste the observed values at the end of the df holding the random values pvals <- sapply(randresults18_SCbycontents[1:1001,], function(x) length(which(x[1:1000] > x[1001]))) / p #count # times rand > = obs, divide by number of permutations, 1 tailed test, for CC, subtract from one (do in excel) because predicted diff is in opposite direction (except for male and female helper comparison) pvals2 <- sapply(randresults18_SCbycontents[1:1001,], function(x) length(which(x[1:1000] < x[1001]))) / p randresults18_SCbycontents <- rbind.data.frame(randresults18_SCbycontents, pvals, pvals2) means <- sapply(randresults18_SCbycontents[1:1000,], mean) CI95 <- sapply(randresults18_SCbycontents[1:1000,], function(x) {1.96*sd(x)/sqrt(1000)}) randresults18_SCbycontents <- rbind.data.frame(randresults18_SCbycontents, means, CI95) rownames(randresults18_SCbycontents) <- c(1:p, "obs", "pval", "pval2", "rand_means", "rand_CI95") #write these to excel write.xlsx(randresults18_SCbycontents, "20190920_RandResults18_All_withinlocs_SCbycontents.xlsx", row.names = TRUE) beep(sound = "coin") ``` Graph the distribution of the random values with the obs value, then save to pdf ```{r} for (j in 1:ncol(randresults18_SCbycontents)) { print( ggplot(NULL, aes(x = randresults18_SCbycontents[c(1:p),colnames(randresults18_SCbycontents)[j]])) + #restrict to rows with random values geom_histogram(fill = "white", colour = "black") + geom_vline(xintercept = randresults18_SCbycontents[c(p+1),colnames(randresults18_SCbycontents)[j]], col = "red") + #draw a red line at the obs value annotate("text", x = randresults18_SCbycontents[c(1001),colnames(randresults18_SCbycontents)[j]], y = 100, label = paste(" p = ", randresults18_SCbycontents[c(p + 2),colnames(randresults18_SCbycontents)[j]]), hjust = 0) + #print the p-value next to the red line xlim(min(randresults18_SCbycontents[c(1:p + 1),colnames(randresults18_SCbycontents)[j]]), max(randresults18_SCbycontents[c(1:1001),colnames(randresults18_SCbycontents)[j]])) + #set xlims xlab(colnames(randresults18_SCbycontents)[j]) + ylab("count") ) } ``` #Unique Points ```{r} magic_for(silent = TRUE) for (d in 1:(length(data_byyear))) { #Unique Points model_up <- aov(up ~ NearestPt + NumAdjTerrs + Sex + SC + Sex*SC, data = data_byyear[[d]]) print(Anova(model_up)) print(TukeyHSD(model_up, which = c("Sex", "SC", "Sex:SC"))) UP_LeveneF <- leveneTest(up ~ SC, data = data_byyear[[d]])$'F value'[1] UP_LeveneFp <- leveneTest(up ~ SC, data = data_byyear[[d]])$'Pr(>F)'[1] D_B_UP_diff_obs <- TukeyHSD(model_up, which = c("Sex", "SC", "Sex:SC"))$SC[1,1] H_D_UP_diff_obs <- TukeyHSD(model_up, which = c("Sex", "SC", "Sex:SC"))$SC[3,1] H_B_UP_diff_obs <- TukeyHSD(model_up, which = c("Sex", "SC", "Sex:SC"))$SC[2,1] MH_FH_UP_diff_obs <- TukeyHSD(model_up, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,1] D_B_UP_CI_lower <- TukeyHSD(model_up, which = c("Sex", "SC", "Sex:SC"))$SC[1,2] H_D_UP_CI_lower <- TukeyHSD(model_up, which = c("Sex", "SC", "Sex:SC"))$SC[3,2] H_B_UP_CI_lower <- TukeyHSD(model_up, which = c("Sex", "SC", "Sex:SC"))$SC[2,2] MH_FH_UP_CI_lower <- TukeyHSD(model_up, which = c("Sex", "SC", "Sex:SC"))$`Sex:SC`[15,2] D_B_UP_plusmin <- D_B_UP_diff_obs - D_B_UP_CI_lower H_D_UP_plusmin <- H_D_UP_diff_obs - H_D_UP_CI_lower H_B_UP_plusmin <- H_B_UP_diff_obs - H_B_UP_CI_lower MH_FH_UP_plusmin <- MH_FH_UP_diff_obs - MH_FH_UP_CI_lower #Save the observed values put(D_B_UP_diff_obs, D_B_UP_plusmin, H_D_UP_diff_obs, H_D_UP_plusmin, H_B_UP_diff_obs, H_B_UP_plusmin, MH_FH_UP_diff_obs, MH_FH_UP_plusmin, UP_LeveneF, UP_LeveneFp) } UPobsVals <- magic_result_as_dataframe() ``` Plot Figure 2 ```{r} ggplot(melted,aes(x = SC, y = Val, fill = Sex)) + geom_boxplot() + scale_x_discrete(labels = c("Breeder", "Dominant", "Helper")) + labs(x = "", y = "") + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), strip.text = element_text(size = 12)) + facet_grid(Metric2 ~ Year, scales = "free", labeller = labeller(Metric2 = label_wrap_gen(10))) + scale_fill_manual(values = c("gray100","gray50"), name = "Sex", labels = c("Female", "Male")) #Split by metric p.list <- lapply(sort(unique(melted$Metric2)), function(l) { ggplot(melted[melted$Metric2 == l,], aes(x = SC, y = Val, fill = Sex)) + geom_boxplot() + scale_x_discrete(labels = c("Breeder", "Dominant", "Helper")) + labs(x = "", y = "") + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), strip.text = element_text(size = 12)) + facet_grid(Metric2~ Year, scales = "free", labeller = labeller(Metric2 = label_wrap_gen(10))) + scale_fill_manual(values = c("gray100","gray50"), name = "Sex", labels = c("Female", "Male")) }) plotme <- plot_grid(p.list[[1]], p.list[[2]], p.list[[3]], p.list[[4]], align = "v", nrow = 4) plotme save_plot("Fig2_20190920.png", plotme, ncol = 1, nrow = 2, base_aspect_ratio = 3:4) beep(sound = "coin") ``` #How much variance does each parameter explain? ```{r} for (d in 1:(length(data_byyear))) { up_mod <- lm(up ~ NearestPt + NumAdjTerrs + SC + Sex + SC*Sex, data = data_byyear[[d]]) print(Anova(up_mod)) de_mod <- lm(de ~ NearestPt + NumAdjTerrs + SC + Sex + SC*Sex, data = data_byyear[[d]]) print(Anova(de_mod)) be_mod <- lm(benorm ~ NearestPt + NumAdjTerrs + SC + Sex + SC*Sex, data = data_byyear[[d]]) print(Anova(be_mod)) cc_mod <- lm(ClCoef ~ NearestPt + NumAdjTerrs + SC + Sex + SC*Sex, data = data_byyear[[d]]) print(Anova(cc_mod)) } ``` #Estimate robustness using Farine Strandburg-Pushkin method, code below from the supp mat (4) from their 2018 publication ~ I can't get this to work, ```{r} # Parameters n.samples <- 1000 # Number of networks to generate from Bayesian intervals n.bootstraps <- 1000 # Number of bootstrap replicates # Generate observed network gbi_2017[gbi_2017 > 0.01] <- 1 gbi_2017[is.na(gbi_2017)] <- 0 gbi <- get_network(gbi_2017[,-1], data_format="GBI", identities=colnames(gbi_2017[,-1])) observed.network <- gbi diag(observed.network) <- NA #### PART A Generate complete observation data ##### # Collate information on observations of dyads N <- ncol(gbi) both.seen <- matrix(apply(gbi,2,function(x) { colSums((x+gbi)>1) }),nrow=N,ncol=N) diag(both.seen) <- 0 either.observed <- matrix(apply(gbi,2,function(x) { colSums((x+gbi)>0) }),nrow=N,ncol=N) # Clopper-Pearson confidence intervals on edge weights observed.network.cp.lower <- apply(abind(both.seen,either.observed,along=3),c(1,2),function(x){if(x[2]==0){return(0)} else{return(binom.test(x[1],x[2])$conf.int[1])}}) observed.network.cp.upper <- apply(abind(both.seen,either.observed,along=3),c(1,2),function(x){if(x[2]==0){return(1)} else{return(binom.test(x[1],x[2])$conf.int[2])}}) # Bayesian estimate # Get prior ml2.likeli <- function(a,b,si,di){ s.choose.d <- apply(cbind(si,di),1,function(x){return(choose(x[1],x[2]))}) beta1 <- apply(cbind(si,di),1,function(x){return(beta(a+x[2],b+x[1]-x[2]))}) likeli <- sum(log(s.choose.d) + log(beta1) - log(rep(beta(a,b),length(si)))) return(likeli) } si <- either.observed[upper.tri(either.observed)] di <- both.seen[upper.tri(both.seen)] #dim(either.observed) #dim(both.seen) #class(si) #length(si) #cells in one trianlge of matrix - cells on diagonal #length(di) #di[di>0] prior <- optim(par=c(1,1),fn=function(x){return(-ml2.likeli(x[1],x[2],si,di))}) ##produced NANs, probably because out of 24310 cells in matrix half, only 324 interact # Storage parameters observed.network.bayes.lower <- matrix(NA,nrow=N,ncol=N) observed.network.bayes.upper <- matrix(NA,nrow=N,ncol=N) observed.network.bayes <- matrix(NA,nrow=N,ncol=N) observed.network.bayes.d <- array(NA,dim=c(N,N,n.samples)) # Loop through each dyad ij.pairs <- cbind(rep(1:N,each=N),rep(1:N)) ij.pairs <- ij.pairs[which(ij.pairs[,1]1) }),nrow=N,ncol=N) diag(both.seen) <- 0 either.observed <- matrix(apply(gbi.s,2,function(x) { colSums((x+gbi.s)>0) }),nrow=N,ncol=N) # Clopper-Pearson confidence intervals on edge weights observed.network.cp.lower.s[,,i] <- apply(abind(both.seen, either.observed, along=3), c(1,2), function(x){if(x[2]==0){return(0)} else{return(binom.test(x[1],x[2])$conf.int[1])}}) observed.network.cp.upper.s[,,i] <- apply(abind(both.seen,either.observed,along=3),c(1,2),function(x){if(x[2]==0){return(1)} else{return(binom.test(x[1],x[2])$conf.int[2])}}) # Bayesian estimate # Get prior si <- either.observed[upper.tri(either.observed)] di <- both.seen[upper.tri(both.seen)] prior <- optim(par=c(1,1),fn=function(x){return(-ml2.likeli(x[1],x[2],si,di))}) # Storage parameters observed.network.bayes.d <- array(NA,dim=c(N,N,n.samples)) # Loop through each dyad ij.pairs <- cbind(rep(1:N,each=N),rep(1:N)) ij.pairs <- ij.pairs[which(ij.pairs[,1]