---
title: "Ants and Faults Analyses"
author: "AME & IDT"
date: "January 2017"
output:
  word_document: default
  pdf_document: default
---

This is the reproducible code for analyses and figures: 
Make sure you have a reliable internet connection to download some of the spatial data needed for mapping. 

Load required libraries and setwd 

```{r}
library(raster)
library(rgdal)
library(ggplot2)
library (ggmap)
library(reshape2)
library(plyr)
library (gridExtra)
library(spatstat)
setwd("/Users/israel/Desktop/OneDrive - Lawrence University/DK Ants")
```

Read in data

```{r}

dk.ants <- read.csv("DK_RWAnts.csv") #this reads in the data and assigns it to "dk.ants"
dk.faults <- read.csv("Faults_10_26.csv", header=T) #this is the file for geological faults
summary(dk.faults)
```

So now we have two data files, dk.ants with locations and species of ants, and dk.faults with start and endpoints of each fault

Let's start with some simple plotting: 
We can plot the detailed distribution of RWA nests and absence points for each of the two study regions 
```{r, echo=FALSE}
#subset maps 
#study region boud.box= xmin=8.115, xmax=9.129, ymin=56.3, ymax=57.25
#Thy region 
Thy_base = get_map(location=c(8.3,56.89,9.08,57.17), zoom=10, maptype="toner", source="stamen")
map1 = ggmap(Thy_base)
map1

#Klosterhede region 
Klo_base = get_map(location=c(8.1,56.38,8.78,56.58), zoom=10, maptype="toner", source="stamen")
map2 = ggmap(Klo_base)
map2

#now add points to sample locations 
#subset the sites file: 
abs_pts<-dk.ants [which(dk.ants$SP_ID=="none"),]
ant_pts<-dk.ants[which(dk.ants$SP_ID!="none"),]

map1<- map1 + geom_point(data=abs_pts, aes(x=x, y=y), cex=1, col="#377eb8") + 
  geom_point(data=ant_pts, aes(x=x, y=y), cex=1.25, col="#e41a1c"  )  +
  scale_shape_identity()
map1

map2<- map2 + geom_point(data=abs_pts, aes(x=x, y=y), cex=1, col="#377eb8"  ) + 
  geom_point(data=ant_pts, aes(x=x, y=y), cex=1.25, col="#e41a1c")  +
  scale_shape_identity()
map2
```


We can also map the distribution of the faults in the entire Jutland Penninsula 
```{r}
#study region boud.box= xmin=8.115, xmax=9.129, ymin=56.3, ymax=57.25
region_base = get_map(location=c( 8.05, 55, 11,  58), maptype="toner-background")
map3<-ggmap(region_base)
map3<- map3 +  geom_point(data=abs_pts, aes(x=x, y=y), cex=1, col="#377eb8", alpha=0.5 ) + 
  geom_point(data=ant_pts, aes(x=x, y=y), cex=1, col="#e41a1c", alpha=0.5 )  + 
  geom_segment(aes(x = x_start, y = y_start, xend = x_end, yend = y_end), data = dk.faults, 
               color= "#756bb1", cex=.75)
map3
```

Now we can test for spatial clustering of the RWA nest points in both of the study region: 
The extent of the fault dataset is much larger than the extent of the ant data. 
And the ant data come in two widely separated clusters.
So let's make some subsets to work with.

We could read off the graph, or subset by hand, or... but all of those are tedious. Let's use some of the facilities within spatstat instead.
```{r}

#Start by identifying the extent ("window") of the ant data
dk.owin.ants <- owin(c(min(dk.ants$x)-.05, max(dk.ants$x)+.05), c(min(dk.ants$y)-.05, max(dk.ants$y)+.05) )

#create a "point pattern object" out of dk.ants
dk.ppp.ants <- ppp(dk.ants$x, dk.ants$y, window=dk.owin.ants, marks=dk.ants$SP_ID)

#now creat a "point segment object" out of dk.faults
dk.owin.faults <- owin(c(min(dk.faults$x_start, dk.faults$x_end),	max(dk.faults$x_start, dk.faults$x_end)), c(min(dk.faults$y_start, dk.faults$y_end),	max(dk.faults$y_start, dk.faults$y_end)))

dk.psp.faults <- psp(dk.faults$x_start, dk.faults$y_start, dk.faults$x_end, dk.faults$y_end, window=dk.owin.faults)

dk.psp.faults.1 <- dk.psp.faults[dk.owin.ants]

```


So now we have two spatial data files that we can work with: dk.ppp.ants and dk.psp.faults.1

```{r}
str(dk.ppp.ants)
str(dk.psp.faults.1)

#Are ants randomly distributed?

Kest(dk.ppp.ants[dk.ppp.ants$marks != "none"])
plot(Kest(dk.ppp.ants[dk.ppp.ants$marks != "none"])) #also illustrates nested functions
plot(envelope(dk.ppp.ants[dk.ppp.ants$marks != "none"], fun=Kest, nsim=99))

```

Aside: Ants are clumped, but is it because we have two clumps? Maybe we should just look at one clump?

```{r}

#reminder What would a random pattern look like?

nX <-npoints(dk.ppp.ants[dk.ppp.ants$marks != "none"]) #same number of points as ants
nX

PatList <- runifpoint(nX) #generates using runifpoint function, maintains owin
plot(PatList)
plot(envelope(PatList, fun=Kest, nsim=99))

par(mfrow=c(1,2))
plot(envelope(dk.ppp.ants[dk.ppp.ants$marks != "none"], fun=Kest, nsim=99), main="ants")
plot(envelope(rpoispp(273), fun=Kest, nsim=99), main="Poisson")


#smaller ant subset

#one way to identify groups
plot(dk.ants$x, dk.ants$y)
hist(dk.ants$y)

#clip it
str(dk.owin.ants)
dk.owin.ants.2 <- dk.owin.ants

#Thy clump
dk.owin.ants.2$yrange <- c(56.8, 57.2)
dk.owin.ants.2$xrange <-c(8.4, 9.08)

#Klosterhede clump
dk.owin.ants.2$yrange <- c(56.34, 56.6)
dk.owin.ants.2$xrange <-c(8.16, 8.8)

dk.ppp.ants.2 <- dk.ppp.ants[dk.owin.ants.2]
dk.psp.faults.2 <- dk.psp.faults.1[dk.owin.ants.2]

#plot it
par(mfrow=c(1,1))
plot(dk.psp.faults.2)
plot(dk.ppp.ants.2[dk.ppp.ants.2$marks =="none"], col="black", pch=1, add=TRUE)
plot(unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks !="none"]), col="red", pch=1, add=TRUE)

#test it
par(mfrow=c(1,2))
plot(envelope(dk.ppp.ants.2[dk.ppp.ants.2$marks !="none"], fun=Kest, nsim=99), main="ants")
plot(envelope(dk.ppp.ants.2[dk.ppp.ants.2$marks =="none"], fun=Kest, nsim=99), main="no ants")
```

last but not least, let's look at relationships between ants and faults
```{r}

dk.ppp.ants.2a <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks != "none"])

dk.ppp.ants.2na <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks == "none"])

par(mfrow=c(1,1))
plot(dk.ppp.ants.2a, pch=16, main="Danish ants")
plot(dk.psp.faults.2, add=TRUE)

#let's get turn faults into a covariate that is a function of spatial location. A reasonable first step is the distance from any point to a fault.

cov.faults <- distmap(dk.psp.faults.2)
plot(dk.psp.faults.2, lwd=2, man="")
contour(cov.faults, add=TRUE)

#relative distribution of ants to faults
#We examine how intensity of the point process (ants, no ants) varies as a function rho of the covariate
#Because the pattern is inhomogeneous, we use a kernel smoother
tiff("Fig2AB.tiff", height = 4, width = 8, units="in", res=600)
par(mfrow=c(1,2))
plot(rhohat(dk.ppp.ants.2a, cov.faults), xlab="Distance to nearest fault", ylim=c(0,5000), xlim=c(0,.12), main="ant mounds", col= "#e41a1c", lwd=2, legend=F)

plot(rhohat(dk.ppp.ants.2na, cov.faults), xlab="Distance to nearest fault", ylim=c(0,5000), xlim=c(0,.12), main="no ants", col="#377eb8", lwd=2, legend=F )

dev.off()
#test it....
rho.ants <- rhohat(dk.ppp.ants.2a, cov.faults)
rho.no.ants <- rhohat(dk.ppp.ants.2na, cov.faults)
ks.test(rho.ants$rho, rho.no.ants$rho)

```


make a supplementary plot to show differences between fault directionality and ant associations 
```{r}
#no ants to compare against 
par(mfrow=c(2,3))
plot(rhohat(dk.ppp.ants.2na, cov.faults), xlab="Distance to nearest fault", ylim=c(0,9500), xlim=c(0,.12), main="no ants", col="#377eb8", lwd=2, legend=F )
#### Now lets consider the directionality of the faults 
#Lets consider major faults 
dk.faults <- read.csv("Faults_10_26.csv", header=T) #this is the file for geological faults

#subset this dataset to include only normal faults 
dk.faults<-dk.faults[which(dk.faults$Fault_Tren =="NW-SE"),]
summary(dk.faults)
#Start by identifying the extent ("window") of the ant data
dk.owin.ants <- owin(c(min(dk.ants$x)-.05, max(dk.ants$x)+.05), c(min(dk.ants$y)-.05, max(dk.ants$y)+.05) )

#create a "point pattern object" out of dk.ants
dk.ppp.ants <- ppp(dk.ants$x, dk.ants$y, window=dk.owin.ants, marks=dk.ants$SP_ID)

#now creat a "point segment object" out of dk.faults
dk.owin.faults <- owin(c(min(dk.faults$x_start, dk.faults$x_end),	max(dk.faults$x_start, dk.faults$x_end)), c(min(dk.faults$y_start, dk.faults$y_end),	max(dk.faults$y_start, dk.faults$y_end)))

dk.psp.faults <- psp(dk.faults$x_start, dk.faults$y_start, dk.faults$x_end, dk.faults$y_end, window=dk.owin.faults)

dk.psp.faults.1 <- dk.psp.faults[dk.owin.ants]

str(dk.ppp.ants)
str(dk.psp.faults.1)


#clip it
str(dk.owin.ants)
dk.owin.ants.2 <- dk.owin.ants

#Thy clump
dk.owin.ants$yrange <- c(56.8, 57.2)
dk.owin.ants$xrange <-c(8.4, 9.08)

#Klosterhede clump
dk.owin.ants.2$yrange <- c(56.34, 56.6)
dk.owin.ants.2$xrange <-c(8.16, 8.8)

dk.ppp.ants.2 <- dk.ppp.ants[dk.owin.ants.2]
dk.psp.faults.2 <- dk.psp.faults.1[dk.owin.ants.2]

dk.ppp.ants.2a <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks != "none"])

dk.ppp.ants.2na <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks == "none"])

#let's get turn faults into a covariate that is a function of spatial location. A reasonable first step is the distance from any point to a fault.

cov.faults <- distmap(dk.psp.faults.2)

#relative distribution of ants to faults
#We examine how intensity of the point process (ants, no ants) varies as a function rho of the covariate
#Because the pattern is inhomogeneous, we use a kernel smoother

plot(rhohat(dk.ppp.ants.2a, cov.faults), xlab="Distance to nearest fault", ylim=c(0,11500), xlim=c(0,.12), main="NW-SE", col= "#e41a1c", lwd=2, legend=F)


#test it....
rho.ants <- rhohat(dk.ppp.ants.2a, cov.faults)
rho.no.ants <- rhohat(dk.ppp.ants.2na, cov.faults)
ks.test(rho.ants$rho, rho.no.ants$rho)

#test it....
rho.ants <- rhohat(dk.ppp.ants.2a, cov.faults)
rho.no.ants <- rhohat(dk.ppp.ants.2na, cov.faults)
ks.test(rho.ants$rho, rho.no.ants$rho)
#strong association between normal faults and ant occurances. Ranging from 4 to 10 m from a normal fault 
#strong association between normal faults and ant occurances. Ranging from 4 to 10 m from a normal fault 
dk.faults <- read.csv("Faults_10_26.csv", header=T) #this is the file for geological faults
summary (dk.faults)
#subset this dataset to include only normal faults 
dk.faults<-dk.faults[which(dk.faults$Fault_Tren =="NNW-SSE"),]
summary(dk.faults)
#Start by identifying the extent ("window") of the ant data
dk.owin.ants <- owin(c(min(dk.ants$x)-.05, max(dk.ants$x)+.05), c(min(dk.ants$y)-.05, max(dk.ants$y)+.05) )

#create a "point pattern object" out of dk.ants
dk.ppp.ants <- ppp(dk.ants$x, dk.ants$y, window=dk.owin.ants, marks=dk.ants$SP_ID)

#now creat a "point segment object" out of dk.faults
dk.owin.faults <- owin(c(min(dk.faults$x_start, dk.faults$x_end),	max(dk.faults$x_start, dk.faults$x_end)), c(min(dk.faults$y_start, dk.faults$y_end),	max(dk.faults$y_start, dk.faults$y_end)))

dk.psp.faults <- psp(dk.faults$x_start, dk.faults$y_start, dk.faults$x_end, dk.faults$y_end, window=dk.owin.faults)

dk.psp.faults.1 <- dk.psp.faults[dk.owin.ants]

str(dk.ppp.ants)
str(dk.psp.faults.1)


#clip it
str(dk.owin.ants)
dk.owin.ants.2 <- dk.owin.ants

#Thy clump
dk.owin.ants$yrange <- c(56.8, 57.2)
dk.owin.ants$xrange <-c(8.4, 9.08)

#Klosterhede clump
dk.owin.ants.2$yrange <- c(56.34, 56.6)
dk.owin.ants.2$xrange <-c(8.16, 8.8)

dk.ppp.ants.2 <- dk.ppp.ants[dk.owin.ants.2]
dk.psp.faults.2 <- dk.psp.faults.1[dk.owin.ants.2]

dk.ppp.ants.2a <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks != "none"])

dk.ppp.ants.2na <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks == "none"])

#let's get turn faults into a covariate that is a function of spatial location. A reasonable first step is the distance from any point to a fault.

cov.faults <- distmap(dk.psp.faults.2)

#relative distribution of ants to faults
#We examine how intensity of the point process (ants, no ants) varies as a function rho of the covariate
#Because the pattern is inhomogeneous, we use a kernel smoother
plot(rhohat(dk.ppp.ants.2a, cov.faults), xlab="Distance to nearest fault", ylim=c(0,11500), xlim=c(0,.12), main="NNW-SSE", col= "#e41a1c", lwd=2, legend=F)


#test it....
rho.ants <- rhohat(dk.ppp.ants.2a, cov.faults)
rho.no.ants <- rhohat(dk.ppp.ants.2na, cov.faults)
ks.test(rho.ants$rho, rho.no.ants$rho)

#strong association between normal faults and ant occurances. Ranging from 4 to 10 m from a normal fault 
dk.faults <- read.csv("Faults_10_26.csv", header=T) #this is the file for geological faults
summary (dk.faults)
#subset this dataset to include only normal faults 
dk.faults<-dk.faults[which(dk.faults$Fault_Tren =="NE-SW"),]
summary(dk.faults)
#Start by identifying the extent ("window") of the ant data
dk.owin.ants <- owin(c(min(dk.ants$x)-.05, max(dk.ants$x)+.05), c(min(dk.ants$y)-.05, max(dk.ants$y)+.05) )

#create a "point pattern object" out of dk.ants
dk.ppp.ants <- ppp(dk.ants$x, dk.ants$y, window=dk.owin.ants, marks=dk.ants$SP_ID)

#now creat a "point segment object" out of dk.faults
dk.owin.faults <- owin(c(min(dk.faults$x_start, dk.faults$x_end),	max(dk.faults$x_start, dk.faults$x_end)), c(min(dk.faults$y_start, dk.faults$y_end),	max(dk.faults$y_start, dk.faults$y_end)))

dk.psp.faults <- psp(dk.faults$x_start, dk.faults$y_start, dk.faults$x_end, dk.faults$y_end, window=dk.owin.faults)

dk.psp.faults.1 <- dk.psp.faults[dk.owin.ants]

str(dk.ppp.ants)
str(dk.psp.faults.1)


#clip it
str(dk.owin.ants)
dk.owin.ants.2 <- dk.owin.ants

#Thy clump
dk.owin.ants$yrange <- c(56.8, 57.2)
dk.owin.ants$xrange <-c(8.4, 9.08)

#Klosterhede clump
dk.owin.ants.2$yrange <- c(56.34, 56.6)
dk.owin.ants.2$xrange <-c(8.16, 8.8)

dk.ppp.ants.2 <- dk.ppp.ants[dk.owin.ants.2]
dk.psp.faults.2 <- dk.psp.faults.1[dk.owin.ants.2]

dk.ppp.ants.2a <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks != "none"])

dk.ppp.ants.2na <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks == "none"])

#let's get turn faults into a covariate that is a function of spatial location. A reasonable first step is the distance from any point to a fault.

cov.faults <- distmap(dk.psp.faults.2)

#relative distribution of ants to faults
#We examine how intensity of the point process (ants, no ants) varies as a function rho of the covariate
#Because the pattern is inhomogeneous, we use a kernel smoother
plot(rhohat(dk.ppp.ants.2a, cov.faults), xlab="Distance to nearest fault", ylim=c(0,11500), xlim=c(0,.12), main="NE-SW", col= "#e41a1c", lwd=2, legend=F)


#test it....
rho.ants <- rhohat(dk.ppp.ants.2a, cov.faults)
rho.no.ants <- rhohat(dk.ppp.ants.2na, cov.faults)
ks.test(rho.ants$rho, rho.no.ants$rho)


#strong association between normal faults and ant occurances. Ranging from 4 to 10 m from a normal fault 
dk.faults <- read.csv("Faults_10_26.csv", header=T) #this is the file for geological faults
summary (dk.faults)
#subset this dataset to include only normal faults 
dk.faults<-dk.faults[which(dk.faults$Fault_Tren =="NNE-SSW"),]
summary(dk.faults)
#Start by identifying the extent ("window") of the ant data
dk.owin.ants <- owin(c(min(dk.ants$x)-.05, max(dk.ants$x)+.05), c(min(dk.ants$y)-.05, max(dk.ants$y)+.05) )

#create a "point pattern object" out of dk.ants
dk.ppp.ants <- ppp(dk.ants$x, dk.ants$y, window=dk.owin.ants, marks=dk.ants$SP_ID)

#now creat a "point segment object" out of dk.faults
dk.owin.faults <- owin(c(min(dk.faults$x_start, dk.faults$x_end),	max(dk.faults$x_start, dk.faults$x_end)), c(min(dk.faults$y_start, dk.faults$y_end),	max(dk.faults$y_start, dk.faults$y_end)))

dk.psp.faults <- psp(dk.faults$x_start, dk.faults$y_start, dk.faults$x_end, dk.faults$y_end, window=dk.owin.faults)

dk.psp.faults.1 <- dk.psp.faults[dk.owin.ants]

str(dk.ppp.ants)
str(dk.psp.faults.1)


#clip it
str(dk.owin.ants)
dk.owin.ants.2 <- dk.owin.ants

#Thy clump
dk.owin.ants$yrange <- c(56.8, 57.2)
dk.owin.ants$xrange <-c(8.4, 9.08)

#Klosterhede clump
dk.owin.ants.2$yrange <- c(56.34, 56.6)
dk.owin.ants.2$xrange <-c(8.16, 8.8)

dk.ppp.ants.2 <- dk.ppp.ants[dk.owin.ants.2]
dk.psp.faults.2 <- dk.psp.faults.1[dk.owin.ants.2]

dk.ppp.ants.2a <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks != "none"])

dk.ppp.ants.2na <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks == "none"])

#let's get turn faults into a covariate that is a function of spatial location. A reasonable first step is the distance from any point to a fault.

cov.faults <- distmap(dk.psp.faults.2)

#relative distribution of ants to faults
#We examine how intensity of the point process (ants, no ants) varies as a function rho of the covariate
#Because the pattern is inhomogeneous, we use a kernel smoother
plot(rhohat(dk.ppp.ants.2a, cov.faults), xlab="Distance to nearest fault", ylim=c(0,11500), xlim=c(0,.12), main="NNE-SSW", col= "#e41a1c", lwd=2, legend=F)


#test it....
rho.ants <- rhohat(dk.ppp.ants.2a, cov.faults)
rho.no.ants <- rhohat(dk.ppp.ants.2na, cov.faults)
ks.test(rho.ants$rho, rho.no.ants$rho)

#strong association between normal faults and ant occurances. Ranging from 4 to 10 m from a normal fault 
dk.faults <- read.csv("Faults_10_26.csv", header=T) #this is the file for geological faults
summary (dk.faults)
#subset this dataset to include only normal faults 
dk.faults<-dk.faults[which(dk.faults$Fault_Tren =="WNW-ESE"),]
summary(dk.faults)
#Start by identifying the extent ("window") of the ant data
dk.owin.ants <- owin(c(min(dk.ants$x)-.05, max(dk.ants$x)+.05), c(min(dk.ants$y)-.05, max(dk.ants$y)+.05) )

#create a "point pattern object" out of dk.ants
dk.ppp.ants <- ppp(dk.ants$x, dk.ants$y, window=dk.owin.ants, marks=dk.ants$SP_ID)

#now creat a "point segment object" out of dk.faults
dk.owin.faults <- owin(c(min(dk.faults$x_start, dk.faults$x_end),	max(dk.faults$x_start, dk.faults$x_end)), c(min(dk.faults$y_start, dk.faults$y_end),	max(dk.faults$y_start, dk.faults$y_end)))

dk.psp.faults <- psp(dk.faults$x_start, dk.faults$y_start, dk.faults$x_end, dk.faults$y_end, window=dk.owin.faults)

dk.psp.faults.1 <- dk.psp.faults[dk.owin.ants]

str(dk.ppp.ants)
str(dk.psp.faults.1)


#clip it
str(dk.owin.ants)
dk.owin.ants.2 <- dk.owin.ants

#Thy clump
dk.owin.ants$yrange <- c(56.8, 57.2)
dk.owin.ants$xrange <-c(8.4, 9.08)

#Klosterhede clump
dk.owin.ants.2$yrange <- c(56.34, 56.6)
dk.owin.ants.2$xrange <-c(8.16, 8.8)

dk.ppp.ants.2 <- dk.ppp.ants[dk.owin.ants.2]
dk.psp.faults.2 <- dk.psp.faults.1[dk.owin.ants.2]

dk.ppp.ants.2a <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks != "none"])

dk.ppp.ants.2na <-unmark(dk.ppp.ants.2[dk.ppp.ants.2$marks == "none"])

#let's get turn faults into a covariate that is a function of spatial location. A reasonable first step is the distance from any point to a fault.

cov.faults <- distmap(dk.psp.faults.2)

#relative distribution of ants to faults
#We examine how intensity of the point process (ants, no ants) varies as a function rho of the covariate
#Because the pattern is inhomogeneous, we use a kernel smoother
plot(rhohat(dk.ppp.ants.2a, cov.faults), xlab="Distance to nearest fault", ylim=c(0,11500), xlim=c(0,.12), main="WNW-ESE", col= "#e41a1c", lwd=2, legend=F)

#test it....
rho.ants <- rhohat(dk.ppp.ants.2a, cov.faults)
rho.no.ants <- rhohat(dk.ppp.ants.2na, cov.faults)
ks.test(rho.ants$rho, rho.no.ants$rho)


```