---
title: "final_statistics"
author: "Corrina Thomsen"
date: "February 17, 2018"
output:
  html_document:
    fig_caption: yes
    toc: yes
    toc_float: yes
    theme: "cerulean"
    highlight: "pygments"
  pdf_document:
    toc: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r}
library(tigerstats)
library(lme4)
library(gvlma)
library(nlme)
library(car)
library(VGAM)
library(ape)
library(plyr)
```

```{r}
Survivorship <- read.csv("~/Dropbox/MSc/Data/Survivorship.csv")
Survivorship$status <- as.factor(Survivorship$status)
Survivorship$inoc <- as.factor(Survivorship$inoc)
levels(Survivorship$status) <- c("Dead", "Alive")
levels(Survivorship$inoc) <- c("No", "Yes")

PlantMeasurements <- read.csv("~/Dropbox/MSc/Data/PlantMeasurements.csv")
PlantMeasurements$year <- as.factor(PlantMeasurements$year)
PlantMeasurements$inoc <- as.factor(PlantMeasurements$inoc)
levels(PlantMeasurements$inoc) <- c("No", "Yes")
PlantMeasurements$Inoculation <- PlantMeasurements$inoc
AlivePM <- subset(PlantMeasurements, 
                      subset = (shoot.length != 0 & is.na(shoot.length) == FALSE)
                      & is.na(inoc) == FALSE
                      & n.shoots != 0
                      & n.shoots != "DEAD"
                      & n.shoots != "dead")

shootAlive1415 <- subset(AlivePM, subset = (year != 2013 & year != 2016 & year != 2017))
shootAlive1617 <- subset(AlivePM, subset = (year == "2016" | year == "2017"))

SanDiego <- read.csv("~/Dropbox/MSc/Data/sandiego.csv")

trackingData <- read.csv("~/Dropbox/MSc/Data/trackingRethreshold.csv")
longTrack <- read.csv("~/Dropbox/MSc/Data/trackingLong.csv")
```

***

##Soil Characteristics
```{r}
favstats(SanDiego$hyphal.length.mpg)
favstats(SanDiego$spores.npg)
favstats(SanDiego$perc.arbusc)
favstats(SanDiego$perc.hyphal)
favstats(SanDiego$perc.vesc)

favstats(trackingdataR$P)
favstats(trackingdataR$N)
favstats(trackingdataR$pH)
```

***

##Survival
Fisher's Test
**Assumptions:**
1. Independence
2. Fixed values

```{r}
CleanSurvivorship <- subset(Survivorship, subset = (is.na(inoc) == FALSE & treatment != "O"))
CleanSurvG <- subset(CleanSurvivorship, subset = (treatment == "G"))
CleanSurvJ <- subset(CleanSurvivorship, subset = (treatment == "J"))
CleanSurvN <- subset(CleanSurvivorship, subset = (treatment == "N"))

#Do 1X and 2X J differ?
CleanSurvJY <- subset(CleanSurvivorship, subset = (treatment == "J" & inoc == "Yes"))
ContSurvJY <- xtabs(~ status + inoc.freq, data = CleanSurvJY)
fisher.test(ContSurvJY)

ContSurvivalPooled <- xtabs(~ status + inoc, data = CleanSurvivorship)
ContSurvG <- xtabs(~ status + inoc, data = CleanSurvG)
ContSurvJ <- xtabs(~ status + inoc, data = CleanSurvJ)
ContSurvN <- xtabs(~ status + inoc, data = CleanSurvN)

fisher.test(ContSurvivalPooled)
fisher.test(ContSurvG)
fisher.test(ContSurvJ)
fisher.test(ContSurvN)
```

```{r}
#Percentages
percDeadNoAll <- round(100 * prop(~status, data=subset(CleanSurvivorship, 
                            subset = (inoc == "No")), success = "Dead"), 1)
percDeadNoAll

percDeadYesAll <- round(100 * prop(~status, data=subset(CleanSurvivorship, 
                              subset = (inoc == "Yes")), success = "Dead"), 1)
percDeadYesAll
```

***

##Shoot length
GLMM with treatment as fixed factor, year as random factor.

```{r}
shootAlive1315 <- subset(AlivePM, subset = (year != 2016 & year != 2017))
```

*Non-normal, so I'm log-transforming which greatly improves normality.
```{r}
hist(shootAlive1315$shoot.length)
shootAlive1315$log.shootlength <- log(shootAlive1315$shoot.length)
hist(shootAlive1315$log.shootlength)
```

```{r}
shootlength.model <- lmer(log.shootlength ~ treatment * inoc +(1|year),data = shootAlive1315)
hist((resid(shootlength.model) - mean(resid(shootlength.model))) / sd(resid(shootlength.model)), freq = FALSE, main = ""); curve(dnorm, add = TRUE)
Anova(shootlength.model)
summary(shootlength.model)
```

##Shoot number
GLMM with treatment as fixed factor, year as random factor.

```{r}
shootAlive1415 <- subset(AlivePM, subset = (year != 2013 & year != 2016 & year != 2017))
shootAlive1415$n.shoots <- as.numeric(shootAlive1415$n.shoots)
```

Pretty normal as-is.
```{r}
hist(shootAlive1415$n.shoots)
```

```{r}
shootn.model <- lmer(n.shoots ~ treatment * inoc +(1|year),data = shootAlive1415)
hist((resid(shootn.model) - mean(resid(shootn.model))) / sd(resid(shootn.model)), freq = FALSE, main = ""); curve(dnorm, add = TRUE)
Anova(shootn.model)
summary(shootn.model)

favstats(shootAlive1415$n.shoots ~ shootAlive1415$inoc)
```

##Shoot diameter
GLMM with treatment as fixed factor, year as random factor.

Super normal.
```{r}
hist(shootAlive1415$base.diameter.mm)
```

```{r}
shootdiam.model <- lmer(base.diameter.mm ~ treatment * inoc +(1|year),data = shootAlive1415)
hist((resid(shootdiam.model) - mean(resid(shootdiam.model))) / sd(resid(shootdiam.model)), freq = FALSE, main = ""); curve(dnorm, add = TRUE)
Anova(shootdiam.model)
summary(shootdiam.model)

favstats(shootAlive1415$base.diameter.mm ~ shootAlive1415$inoc)
```

##Internode length
GLMM with treatment as fixed factor, year as random factor.

Super normal.
```{r}
shootAlive1617 <- subset(AlivePM, subset = (year == "2016" | year == "2017"))
hist(shootAlive1617$internode.length.mm)
```

```{r}
internode.model <- lmer(internode.length.mm ~ treatment * inoc +(1|year),data = shootAlive1617)
hist((resid(internode.model) - mean(resid(internode.model))) / sd(resid(internode.model)), freq = FALSE, main = ""); curve(dnorm, add = TRUE)
Anova(internode.model)
summary(internode.model)

favstats(shootAlive1617$internode.length.mm ~ shootAlive1617$inoc)
```

##Cluster Number
GLMM with treatment as fixed factor, year as random factor.

Super normal.
```{r}
hist(shootAlive1415$n.cluster) #non-normal, high zeroes
nozeroSA1415 <- shootAlive1415[shootAlive1415$n.cluster > 0, ]
nozeroSA1415$log.ncluster <- log(nozeroSA1415$n.cluster)
hist(nozeroSA1415$log.ncluster)
```

```{r}
ncluster.model <- lmer(log.ncluster ~ treatment * inoc + (1|year), data = nozeroSA1415)
hist((resid(ncluster.model) - mean(resid(ncluster.model))) / sd(resid(ncluster.model)), freq = FALSE, main = ""); curve(dnorm, add = TRUE)
Anova(ncluster.model)
summary(ncluster.model)

favstats(nozeroSA1415$n.cluster ~ nozeroSA1415$inoc)
```

***


##Production

**Binary Cluster Production**
```{r}
ContBinCluster <- xtabs(~ inoc + n.cluster, data = BinaryCluster)
ContBinClusterJ <- xtabs(~ inoc + n.cluster, data = BinaryClusterJ)
ContBinClusterG <- xtabs(~ inoc + n.cluster, data = BinaryClusterG)
ContBinClusterN <- xtabs(~ inoc + n.cluster, data = BinaryClusterN)

BinClusterFisher <- fisher.test(ContBinCluster)
BinClusterFisher
BinClusterFisherJ <- fisher.test(ContBinClusterJ)
BinClusterFisherJ
BinClusterFisherG <- fisher.test(ContBinClusterG)
BinClusterFisherG
BinClusterFisherN <- fisher.test(ContBinClusterN)
BinClusterFisherN
```

Fisher's Test
**Assumptions:**
1. Independence
2. Fixed values
```{r}
BinaryCluster <- subset(PlantMeasurements, subset = (is.na(n.cluster) == FALSE)
                                              & n.shoots != 0
                                              & n.shoots != "DEAD"
                                              & n.shoots != "dead"
                                              & n.shoots != ""
                                              & year == "2015")
BinaryCluster$n.cluster <- ifelse(BinaryCluster[,11] != 0, 1, 0)
BinaryCluster$n.cluster <- as.factor(BinaryCluster$n.cluster)
levels(BinaryCluster$n.cluster) <- c("Without Grapes", "With Grapes")

BinaryClusterJ <- BinaryCluster[BinaryCluster$treatment == "J",]
BinaryClusterJYes <- BinaryCluster[BinaryCluster$inoc == "Yes",]
BinaryClusterG <- BinaryCluster[BinaryCluster$treatment == "G",]
BinaryClusterN <- BinaryCluster[BinaryCluster$treatment == "N",]

ContBinCluster <- xtabs(~ n.cluster + inoc, data = BinaryCluster)
ContBinCluster
ContBinClusterJ <- xtabs(~ n.cluster + inoc, data = BinaryClusterJ)
ContBinClusterJ
ContBinClusterJY <- xtabs(~ n.cluster + inoc.freq, data = BinaryClusterJYes)
ContBinClusterJY
ContBinClusterG <- xtabs(~ n.cluster + inoc, data = BinaryClusterG)
ContBinClusterG
ContBinClusterN <- xtabs(~ n.cluster + inoc, data = BinaryClusterN)
ContBinClusterN

fisher.test(ContBinCluster)
fisher.test(ContBinClusterJ)
fisher.test(ContBinClusterJY)
fisher.test(ContBinClusterG)
fisher.test(ContBinClusterN)
```


```{r}
#Percentages
percGrapeNo <- round(100 * ContBinCluster[2,1]/(ContBinCluster[2,1]+ContBinCluster[1,1]), 1)
percGrapeNo
percGrapeYes <- round(100 * ContBinCluster[2,2]/(ContBinCluster[2,2]+ContBinCluster[1,2]), 1)
percGrapeYes
```


***
#ready
##Establishment
GLMM with treatment as fixed factor, year as random factor.

Convert first to binary and then do a Fisher's for inoculation effect on establishment.
```{r}
BinaryEstablishment <- trackingdataR
BinaryEstablishment[,4:9] <- ifelse(BinaryEstablishment[,4:9] != 0, 1, 0)

ContBinEstM13 <- xtabs(~ ddM13S + inoc, data = BinaryEstablishment)
ContBinEstM13
ContBinEstO13 <- xtabs(~ ddO13S + inoc, data = BinaryEstablishment)
ContBinEstO13
ContBinEstO14 <- xtabs(~ ddO14S + inoc, data = BinaryEstablishment)
ContBinEstO14
ContBinEstO15 <- xtabs(~ ddO15S + inoc, data = BinaryEstablishment)
ContBinEstO15
ContBinEstO16 <- xtabs(~ ddO16S + inoc, data = BinaryEstablishment)
ContBinEstO16
ContBinEstO17 <- xtabs(~ ddO17S + inoc, data = BinaryEstablishment)
ContBinEstO17

fisher.test(ContBinEstM13)
fisher.test(ContBinEstO13)
fisher.test(ContBinEstO14)
fisher.test(ContBinEstO15)
fisher.test(ContBinEstO16)
fisher.test(ContBinEstO17)

#Fligner-Killeen test for homogeneity of variance excluding N58 O2014S
longTrackxN58 <- longTrack[-70,]
fligner.test(copy ~ period, data = longTrackxN58)

```

##Rank Correlations
```{r}
plot(ddM13S ~ inoc, trackingData)
plot(ddO13S ~ inoc, trackingData)
plot(ddO14S ~ inoc, trackingData)
plot(ddO15S ~ inoc, trackingData)
plot(ddO16S ~ inoc, trackingData)
plot(ddO17S ~ inoc, trackingData)

#using Kendall's tau because data display some non-monotonic behaviour
cor.test(longTrackxN58$P, longTrackxN58$copy, method = "kendall", na.action = na.omit)
cor.test(longTrackxN58$N, longTrackxN58$copy, method = "kendall", na.action = na.omit)
cor.test(longTrackxN58$pH, longTrackxN58$copy, method = "kendall", na.action = na.omit)

trackingDataxN58 <- trackingData[-36,]

cor.test(trackingData$P, trackingData$ddO13S, method = "kendall", na.action = na.omit)
cor.test(trackingData$N, trackingData$ddO13S, method = "kendall", na.action = na.omit)
cor.test(trackingData$pH, trackingData$ddO13S, method = "kendall", na.action = na.omit)

plot(trackingDataxN58$P, trackingDataxN58$ddO13S)
plot(trackingDataxN58$N, trackingDataxN58$ddO13S)
plot(trackingDataxN58$pH, trackingDataxN58$ddO13S)

```


#Alternatives
```{r}
modelvc<-glm(vc ~ treatment*plantorigin +(1| block) + (1|plantid), family = poisson, data =  Final_data_col)
hist((resid(modelvc) - mean(resid(modelvc))) / sd(resid(modelvc)), freq = FALSE); curve(dnorm, add = TRUE)
anova(modelvc)
summary(modelvc)

```


***

#Moran's I

```{r}
#Read in a file containing vine ID, treatment, inoculation, and row and vine position
coordinates <- read.csv("~/Dropbox/MSc/Data/spatial_coordinates.csv")
coordinatesLite <- coordinates[,6:7]

distances <- dist(coordinatesLite) #Calculate distances between vines in undefined units
matrix.dist <- round(as.matrix(distances), 2) 
#matrix.dist$sample <- coordinates$ID

#Create a weighed matrix of inverse distance (ie, further away = less influence)
influence <- round(1 / distances, 4)
influence[!is.finite(influence)] <- NA
influence <- as.matrix(influence)
influence <- as.data.frame(influence)
colnames(influence) <- coordinates$ID
rownames(influence) <- coordinates$ID
```

```{r}
#In this chunk I will row-normalize the half-matrix such that the values become an index of the relative influence of one plant on another out of a total of 1, based on distance. Rows will add to one but columns will not. This reflects the fact that influence between plants may be asymmetrical - the RELATIVE influence of vine A on vine B does not need to be the same between the two because vine B may be strongly influenced by a third vine C that may not have as strong an impact on vine A. This would happen if A and C are very close, but B is far away. A and C should have similar influences, but if B is relatively isolated, A may have a much higher relative impact on B than B has on A.
#influence <- as.numeric(influence)
rowTotals <- rowSums(influence)
rnormInfluence <- influence / rowTotals
#rowSums(rnormInfluence) Row sums check out to 1
```

```{r}
#Now I need to bring in the observed ddPCR values and left-join to the row-normalized matrix
trackingData <- read.csv("~/Dropbox/MSc/Data/trackingRethreshold.csv")

influence$sample <- trackingData$sample
mergedInfluence <- join(trackingData, influence, by = "sample", match = "first")
mergedInfluence <- mergedInfluence[,-c(2,3,10,11,12)]
```

```{r}
#ape method
Moran.I(mergedInfluence$ddM13S, mergedInfluence[,8:61], na.rm = TRUE)
Moran.I(mergedInfluence$ddO13S, mergedInfluence[,8:61], na.rm = TRUE)
Moran.I(mergedInfluence$ddO14S, mergedInfluence[,8:61], na.rm = TRUE)
Moran.I(mergedInfluence$ddO15S, mergedInfluence[,8:61], na.rm = TRUE)
Moran.I(mergedInfluence$ddO16S, mergedInfluence[,8:61], na.rm = TRUE)
Moran.I(mergedInfluence$ddO17S, mergedInfluence[,8:61], na.rm = TRUE)
```