--- title: "final_statistics" author: "Corrina Thomsen" date: "February 17, 2018" output: html_document: fig_caption: yes toc: yes toc_float: yes theme: "cerulean" highlight: "pygments" pdf_document: toc: yes --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r} library(tigerstats) library(lme4) library(gvlma) library(nlme) library(car) library(VGAM) library(ape) library(plyr) ``` ```{r} Survivorship <- read.csv("~/Dropbox/MSc/Data/Survivorship.csv") Survivorship$status <- as.factor(Survivorship$status) Survivorship$inoc <- as.factor(Survivorship$inoc) levels(Survivorship$status) <- c("Dead", "Alive") levels(Survivorship$inoc) <- c("No", "Yes") PlantMeasurements <- read.csv("~/Dropbox/MSc/Data/PlantMeasurements.csv") PlantMeasurements$year <- as.factor(PlantMeasurements$year) PlantMeasurements$inoc <- as.factor(PlantMeasurements$inoc) levels(PlantMeasurements$inoc) <- c("No", "Yes") PlantMeasurements$Inoculation <- PlantMeasurements$inoc AlivePM <- subset(PlantMeasurements, subset = (shoot.length != 0 & is.na(shoot.length) == FALSE) & is.na(inoc) == FALSE & n.shoots != 0 & n.shoots != "DEAD" & n.shoots != "dead") shootAlive1415 <- subset(AlivePM, subset = (year != 2013 & year != 2016 & year != 2017)) shootAlive1617 <- subset(AlivePM, subset = (year == "2016" | year == "2017")) SanDiego <- read.csv("~/Dropbox/MSc/Data/sandiego.csv") trackingData <- read.csv("~/Dropbox/MSc/Data/trackingRethreshold.csv") longTrack <- read.csv("~/Dropbox/MSc/Data/trackingLong.csv") ``` *** ##Soil Characteristics ```{r} favstats(SanDiego$hyphal.length.mpg) favstats(SanDiego$spores.npg) favstats(SanDiego$perc.arbusc) favstats(SanDiego$perc.hyphal) favstats(SanDiego$perc.vesc) favstats(trackingdataR$P) favstats(trackingdataR$N) favstats(trackingdataR$pH) ``` *** ##Survival Fisher's Test **Assumptions:** 1. Independence 2. Fixed values ```{r} CleanSurvivorship <- subset(Survivorship, subset = (is.na(inoc) == FALSE & treatment != "O")) CleanSurvG <- subset(CleanSurvivorship, subset = (treatment == "G")) CleanSurvJ <- subset(CleanSurvivorship, subset = (treatment == "J")) CleanSurvN <- subset(CleanSurvivorship, subset = (treatment == "N")) #Do 1X and 2X J differ? CleanSurvJY <- subset(CleanSurvivorship, subset = (treatment == "J" & inoc == "Yes")) ContSurvJY <- xtabs(~ status + inoc.freq, data = CleanSurvJY) fisher.test(ContSurvJY) ContSurvivalPooled <- xtabs(~ status + inoc, data = CleanSurvivorship) ContSurvG <- xtabs(~ status + inoc, data = CleanSurvG) ContSurvJ <- xtabs(~ status + inoc, data = CleanSurvJ) ContSurvN <- xtabs(~ status + inoc, data = CleanSurvN) fisher.test(ContSurvivalPooled) fisher.test(ContSurvG) fisher.test(ContSurvJ) fisher.test(ContSurvN) ``` ```{r} #Percentages percDeadNoAll <- round(100 * prop(~status, data=subset(CleanSurvivorship, subset = (inoc == "No")), success = "Dead"), 1) percDeadNoAll percDeadYesAll <- round(100 * prop(~status, data=subset(CleanSurvivorship, subset = (inoc == "Yes")), success = "Dead"), 1) percDeadYesAll ``` *** ##Shoot length GLMM with treatment as fixed factor, year as random factor. ```{r} shootAlive1315 <- subset(AlivePM, subset = (year != 2016 & year != 2017)) ``` *Non-normal, so I'm log-transforming which greatly improves normality. ```{r} hist(shootAlive1315$shoot.length) shootAlive1315$log.shootlength <- log(shootAlive1315$shoot.length) hist(shootAlive1315$log.shootlength) ``` ```{r} shootlength.model <- lmer(log.shootlength ~ treatment * inoc +(1|year),data = shootAlive1315) hist((resid(shootlength.model) - mean(resid(shootlength.model))) / sd(resid(shootlength.model)), freq = FALSE, main = ""); curve(dnorm, add = TRUE) Anova(shootlength.model) summary(shootlength.model) ``` ##Shoot number GLMM with treatment as fixed factor, year as random factor. ```{r} shootAlive1415 <- subset(AlivePM, subset = (year != 2013 & year != 2016 & year != 2017)) shootAlive1415$n.shoots <- as.numeric(shootAlive1415$n.shoots) ``` Pretty normal as-is. ```{r} hist(shootAlive1415$n.shoots) ``` ```{r} shootn.model <- lmer(n.shoots ~ treatment * inoc +(1|year),data = shootAlive1415) hist((resid(shootn.model) - mean(resid(shootn.model))) / sd(resid(shootn.model)), freq = FALSE, main = ""); curve(dnorm, add = TRUE) Anova(shootn.model) summary(shootn.model) favstats(shootAlive1415$n.shoots ~ shootAlive1415$inoc) ``` ##Shoot diameter GLMM with treatment as fixed factor, year as random factor. Super normal. ```{r} hist(shootAlive1415$base.diameter.mm) ``` ```{r} shootdiam.model <- lmer(base.diameter.mm ~ treatment * inoc +(1|year),data = shootAlive1415) hist((resid(shootdiam.model) - mean(resid(shootdiam.model))) / sd(resid(shootdiam.model)), freq = FALSE, main = ""); curve(dnorm, add = TRUE) Anova(shootdiam.model) summary(shootdiam.model) favstats(shootAlive1415$base.diameter.mm ~ shootAlive1415$inoc) ``` ##Internode length GLMM with treatment as fixed factor, year as random factor. Super normal. ```{r} shootAlive1617 <- subset(AlivePM, subset = (year == "2016" | year == "2017")) hist(shootAlive1617$internode.length.mm) ``` ```{r} internode.model <- lmer(internode.length.mm ~ treatment * inoc +(1|year),data = shootAlive1617) hist((resid(internode.model) - mean(resid(internode.model))) / sd(resid(internode.model)), freq = FALSE, main = ""); curve(dnorm, add = TRUE) Anova(internode.model) summary(internode.model) favstats(shootAlive1617$internode.length.mm ~ shootAlive1617$inoc) ``` ##Cluster Number GLMM with treatment as fixed factor, year as random factor. Super normal. ```{r} hist(shootAlive1415$n.cluster) #non-normal, high zeroes nozeroSA1415 <- shootAlive1415[shootAlive1415$n.cluster > 0, ] nozeroSA1415$log.ncluster <- log(nozeroSA1415$n.cluster) hist(nozeroSA1415$log.ncluster) ``` ```{r} ncluster.model <- lmer(log.ncluster ~ treatment * inoc + (1|year), data = nozeroSA1415) hist((resid(ncluster.model) - mean(resid(ncluster.model))) / sd(resid(ncluster.model)), freq = FALSE, main = ""); curve(dnorm, add = TRUE) Anova(ncluster.model) summary(ncluster.model) favstats(nozeroSA1415$n.cluster ~ nozeroSA1415$inoc) ``` *** ##Production **Binary Cluster Production** ```{r} ContBinCluster <- xtabs(~ inoc + n.cluster, data = BinaryCluster) ContBinClusterJ <- xtabs(~ inoc + n.cluster, data = BinaryClusterJ) ContBinClusterG <- xtabs(~ inoc + n.cluster, data = BinaryClusterG) ContBinClusterN <- xtabs(~ inoc + n.cluster, data = BinaryClusterN) BinClusterFisher <- fisher.test(ContBinCluster) BinClusterFisher BinClusterFisherJ <- fisher.test(ContBinClusterJ) BinClusterFisherJ BinClusterFisherG <- fisher.test(ContBinClusterG) BinClusterFisherG BinClusterFisherN <- fisher.test(ContBinClusterN) BinClusterFisherN ``` Fisher's Test **Assumptions:** 1. Independence 2. Fixed values ```{r} BinaryCluster <- subset(PlantMeasurements, subset = (is.na(n.cluster) == FALSE) & n.shoots != 0 & n.shoots != "DEAD" & n.shoots != "dead" & n.shoots != "" & year == "2015") BinaryCluster$n.cluster <- ifelse(BinaryCluster[,11] != 0, 1, 0) BinaryCluster$n.cluster <- as.factor(BinaryCluster$n.cluster) levels(BinaryCluster$n.cluster) <- c("Without Grapes", "With Grapes") BinaryClusterJ <- BinaryCluster[BinaryCluster$treatment == "J",] BinaryClusterJYes <- BinaryCluster[BinaryCluster$inoc == "Yes",] BinaryClusterG <- BinaryCluster[BinaryCluster$treatment == "G",] BinaryClusterN <- BinaryCluster[BinaryCluster$treatment == "N",] ContBinCluster <- xtabs(~ n.cluster + inoc, data = BinaryCluster) ContBinCluster ContBinClusterJ <- xtabs(~ n.cluster + inoc, data = BinaryClusterJ) ContBinClusterJ ContBinClusterJY <- xtabs(~ n.cluster + inoc.freq, data = BinaryClusterJYes) ContBinClusterJY ContBinClusterG <- xtabs(~ n.cluster + inoc, data = BinaryClusterG) ContBinClusterG ContBinClusterN <- xtabs(~ n.cluster + inoc, data = BinaryClusterN) ContBinClusterN fisher.test(ContBinCluster) fisher.test(ContBinClusterJ) fisher.test(ContBinClusterJY) fisher.test(ContBinClusterG) fisher.test(ContBinClusterN) ``` ```{r} #Percentages percGrapeNo <- round(100 * ContBinCluster[2,1]/(ContBinCluster[2,1]+ContBinCluster[1,1]), 1) percGrapeNo percGrapeYes <- round(100 * ContBinCluster[2,2]/(ContBinCluster[2,2]+ContBinCluster[1,2]), 1) percGrapeYes ``` *** #ready ##Establishment GLMM with treatment as fixed factor, year as random factor. Convert first to binary and then do a Fisher's for inoculation effect on establishment. ```{r} BinaryEstablishment <- trackingdataR BinaryEstablishment[,4:9] <- ifelse(BinaryEstablishment[,4:9] != 0, 1, 0) ContBinEstM13 <- xtabs(~ ddM13S + inoc, data = BinaryEstablishment) ContBinEstM13 ContBinEstO13 <- xtabs(~ ddO13S + inoc, data = BinaryEstablishment) ContBinEstO13 ContBinEstO14 <- xtabs(~ ddO14S + inoc, data = BinaryEstablishment) ContBinEstO14 ContBinEstO15 <- xtabs(~ ddO15S + inoc, data = BinaryEstablishment) ContBinEstO15 ContBinEstO16 <- xtabs(~ ddO16S + inoc, data = BinaryEstablishment) ContBinEstO16 ContBinEstO17 <- xtabs(~ ddO17S + inoc, data = BinaryEstablishment) ContBinEstO17 fisher.test(ContBinEstM13) fisher.test(ContBinEstO13) fisher.test(ContBinEstO14) fisher.test(ContBinEstO15) fisher.test(ContBinEstO16) fisher.test(ContBinEstO17) #Fligner-Killeen test for homogeneity of variance excluding N58 O2014S longTrackxN58 <- longTrack[-70,] fligner.test(copy ~ period, data = longTrackxN58) ``` ##Rank Correlations ```{r} plot(ddM13S ~ inoc, trackingData) plot(ddO13S ~ inoc, trackingData) plot(ddO14S ~ inoc, trackingData) plot(ddO15S ~ inoc, trackingData) plot(ddO16S ~ inoc, trackingData) plot(ddO17S ~ inoc, trackingData) #using Kendall's tau because data display some non-monotonic behaviour cor.test(longTrackxN58$P, longTrackxN58$copy, method = "kendall", na.action = na.omit) cor.test(longTrackxN58$N, longTrackxN58$copy, method = "kendall", na.action = na.omit) cor.test(longTrackxN58$pH, longTrackxN58$copy, method = "kendall", na.action = na.omit) trackingDataxN58 <- trackingData[-36,] cor.test(trackingData$P, trackingData$ddO13S, method = "kendall", na.action = na.omit) cor.test(trackingData$N, trackingData$ddO13S, method = "kendall", na.action = na.omit) cor.test(trackingData$pH, trackingData$ddO13S, method = "kendall", na.action = na.omit) plot(trackingDataxN58$P, trackingDataxN58$ddO13S) plot(trackingDataxN58$N, trackingDataxN58$ddO13S) plot(trackingDataxN58$pH, trackingDataxN58$ddO13S) ``` #Alternatives ```{r} modelvc<-glm(vc ~ treatment*plantorigin +(1| block) + (1|plantid), family = poisson, data = Final_data_col) hist((resid(modelvc) - mean(resid(modelvc))) / sd(resid(modelvc)), freq = FALSE); curve(dnorm, add = TRUE) anova(modelvc) summary(modelvc) ``` *** #Moran's I ```{r} #Read in a file containing vine ID, treatment, inoculation, and row and vine position coordinates <- read.csv("~/Dropbox/MSc/Data/spatial_coordinates.csv") coordinatesLite <- coordinates[,6:7] distances <- dist(coordinatesLite) #Calculate distances between vines in undefined units matrix.dist <- round(as.matrix(distances), 2) #matrix.dist$sample <- coordinates$ID #Create a weighed matrix of inverse distance (ie, further away = less influence) influence <- round(1 / distances, 4) influence[!is.finite(influence)] <- NA influence <- as.matrix(influence) influence <- as.data.frame(influence) colnames(influence) <- coordinates$ID rownames(influence) <- coordinates$ID ``` ```{r} #In this chunk I will row-normalize the half-matrix such that the values become an index of the relative influence of one plant on another out of a total of 1, based on distance. Rows will add to one but columns will not. This reflects the fact that influence between plants may be asymmetrical - the RELATIVE influence of vine A on vine B does not need to be the same between the two because vine B may be strongly influenced by a third vine C that may not have as strong an impact on vine A. This would happen if A and C are very close, but B is far away. A and C should have similar influences, but if B is relatively isolated, A may have a much higher relative impact on B than B has on A. #influence <- as.numeric(influence) rowTotals <- rowSums(influence) rnormInfluence <- influence / rowTotals #rowSums(rnormInfluence) Row sums check out to 1 ``` ```{r} #Now I need to bring in the observed ddPCR values and left-join to the row-normalized matrix trackingData <- read.csv("~/Dropbox/MSc/Data/trackingRethreshold.csv") influence$sample <- trackingData$sample mergedInfluence <- join(trackingData, influence, by = "sample", match = "first") mergedInfluence <- mergedInfluence[,-c(2,3,10,11,12)] ``` ```{r} #ape method Moran.I(mergedInfluence$ddM13S, mergedInfluence[,8:61], na.rm = TRUE) Moran.I(mergedInfluence$ddO13S, mergedInfluence[,8:61], na.rm = TRUE) Moran.I(mergedInfluence$ddO14S, mergedInfluence[,8:61], na.rm = TRUE) Moran.I(mergedInfluence$ddO15S, mergedInfluence[,8:61], na.rm = TRUE) Moran.I(mergedInfluence$ddO16S, mergedInfluence[,8:61], na.rm = TRUE) Moran.I(mergedInfluence$ddO17S, mergedInfluence[,8:61], na.rm = TRUE) ```