############### setup ####################################################
rm(list = ls(all = TRUE)) #blanks out your values 
#file.choose() #find the dang filepath 
#setwd("J:\\Data\\ASE_Conservation\\Core Projects\\Butterflies\\Papers\\Backyard habitat\\CSV and R files")

#######################################################################
# Plot  graphs 
#######################################################################
d= read.csv("Graphs combining by closest day 2.csv") #load the data 

#remove rows that have a greater difference than 14 days between observations
vec = d$Day.difference<14 & d$Day.difference>-14
d2 = d[vec,]

# making a plot of dependent variable x bloom abundance for our graphs 

par(mfrow=c(2,2))

plot(sqrt(d2$Abundance)~d2$Bloom.abundance.30000max, xlab = "Bloom Abundance", ylab = "Insect Abundance")
abline(lm(sqrt(d2$Abundance)~d2$Bloom.abundance.30000max))

plot((d2$Richness)~d2$Bloom.abundance.30000max, xlab = "Bloom Abundance", ylab = "Insect Richness")
abline(lm((d2$Richness)~d2$Bloom.abundance.30000max))

plot(sqrt(d2$Abundance.flower.visitors)~d2$Bloom.abundance.30000max, xlab = "Bloom Abundance", ylab = "Sqrt(Flower Visitor Abundance)")
abline(lm(sqrt(d2$Abundance.flower.visitors)~d2$Bloom.abundance.30000max))

plot((d2$Richness.flower.visitors)~d2$Bloom.abundance.30000max, xlab = "Bloom Abundance", ylab = "Flower Visitor Richness")
abline(lm((d2$Richness.flower.visitors)~d2$Bloom.abundance.30000max))


################### Making the boxplot of orders 

rm(list = ls(all = TRUE)) #blanks out your values
bowl_df = read.csv("PFW insect data 2013+2014.csv") #Results from bowl surveys
#exploring the data
bowl_df = bowl_df[,1:12]
#bowl_df = na.omit(bowl_df) #remove empty cells
summary(bowl_df)
names(bowl_df) #for dependent and independent variables

#Barplot of orders
z1 = table(bowl_df[,'Order'])
d = as.data.frame(z1) #convert table to dataframe
d = d[-c(2,7,8),] #remove bad rows 
d2 = d[order(d[,2],decreasing = TRUE),] #creating a new df ordered by descending
barplot(d2[,2],names.arg = d2[,1], ylab = "Number of Insects") #creating a barplot 

#######################################################################
# Running the models 
####################################################################### 

# Install (if necessary) and load nlme and lme4 for the mixed models
library(nlme)
library(lme4)
#install.packages("AICcmodavg")
library("AICcmodavg")
#install.packages("ggplot2")
library("ggplot2")

rm(list = ls(all = TRUE)) #blanks out your values 
d= read.csv("Graphs combining by closest day 2.csv") #load the data 

#remove rows that have a greater difference than 14 days between observations
vec = d$Day.difference<14 & d$Day.difference>-14 
d2 = d[vec,]

#####   y = Abundance flower visitors, new linear mixed model selection##

#first, give things new names because it's picky 
#note: ALL MUST HAVE EXACT SAME RANDOM EFFECT 
Location.1 = d2$Location.1
Abundance.flower.visitors = sqrt(d2$Abundance.flower.visitors)
Site.acreage = d2$Site.acreage
Distance.to.park = d2$Distance.to.park
log.Park.Ac. = d2$log.Park.Ac.
Bloom.abundance = d2$Bloom.abundance
Site.name = d2$Site.name
Fl.friendly.bloom.abundance = d2$Fl.friendly.bloom.abundance
Native.bloom.abundance = d2$Native.bloom.abundance
Nonnative.bloomabundance = d2$Nonnative.bloomabundance
bloom.richness = d2$bloom.richness
Ff.richness = d2$Ff.richness
Native.richness = d2$Native.richness
Non.native.richness = d2$Non.native.richness
bloom.evenness = d2$bloom.evenness
plant.richness = d2$plant.richness
Date.veg.survey = d2$Date.veg.survey
Date.bowl.survey = d2$Date.bowl.survey
bloom.date = d2$bloom.date

Cand.models <- list( )

Cand.models[[1]] <- lme(Abundance.flower.visitors~ Site.acreage + Distance.to.park + log.Park.Ac. + Bloom.abundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[2]] <- lme(Abundance.flower.visitors~ Bloom.abundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[3]] <- lme(Abundance.flower.visitors~ Fl.friendly.bloom.abundance + Native.bloom.abundance + Nonnative.bloomabundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[4]] <- lme(Abundance.flower.visitors~ bloom.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[5]] <- lme(Abundance.flower.visitors~ bloom.richness + Bloom.abundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[6]] <- lme(Abundance.flower.visitors~ bloom.richness + Bloom.abundance + bloom.evenness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[7]] <- lme(Abundance.flower.visitors~  Bloom.abundance + bloom.evenness*bloom.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[8]] <- lme(Abundance.flower.visitors~  Bloom.abundance*bloom.evenness + bloom.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[9]] <- lme(Abundance.flower.visitors~  Bloom.abundance + plant.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[10]] <- lme(Abundance.flower.visitors~  Bloom.abundance + Ff.richness + Native.richness + Non.native.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")

Modnames <- paste("model", 1:length(Cand.models), sep = " ")
aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)

summary(Cand.models[[10]]) #top model # wecan say relatively confidently 10 is best 
summary(Cand.models[[9]]) #top model
summary(Cand.models[[8]]) #top model

x =  Bloom.abundance  #pearson's effect size 
y = Abundance.flower.visitors
cor(x,y,method = "pearson")
x = Ff.richness  #pearson's effect size 
x = Native.richness  #pearson's effect size 
x = Non.native.richness  #pearson's effect size 


#################   y = Abundance insects, new linear mixed model selection ####################
#originally removed dolichopodidae but put back into model and found made no difference 

#setting variables
Abundance = sqrt(d2$Abundance)

Cand.models <- list( )
Cand.models[[1]] <- lme(sqrt(Abundance)~ Site.acreage + Distance.to.park + log.Park.Ac. + plant.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[2]] <- lme(sqrt(Abundance)~  plant.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[3]] <- lme(sqrt(Abundance)~  Site.acreage + Distance.to.park + log.Park.Ac. + Ff.richness + Native.richness + Non.native.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML") 
Cand.models[[4]] <- lme(sqrt(Abundance)~ Ff.richness + Native.richness + Non.native.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML") 
Cand.models[[5]] <- lme(sqrt(Abundance)~  plant.richness + Bloom.abundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[6]] <- lme(sqrt(Abundance)~  plant.richness + Bloom.abundance*bloom.evenness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[7]] <- lme(sqrt(Abundance)~  plant.richness*Bloom.abundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[8]] <- lme(sqrt(Abundance)~ Ff.richness*Bloom.abundance + Native.richness*Bloom.abundance + Non.native.richness*Bloom.abundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML") 

Modnames <- paste("model", 1:length(Cand.models), sep = " ")
aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)

summary(Cand.models[[6]]) #top model.
summary(Cand.models[[8]]) 
summary(Cand.models[[5]]) 

y = sqrt(Abundance)
x = plant.richness
cor(x,y,method = "pearson")
x = Bloom.abundance
x = bloom.evenness
x = d2$bloom.evenness*d2$Bloom.abundance

#y = insect abundance 3-D plot 
# 3d plot with interaction sqrt(Abundance of insects)  = plant richness + bloom abundance*bloom evenness 
# load packages
#install.packages("rms")
library("rms")
#install.packages("lattice")
library("lattice")
#install.packages("Hmisc")
library("Hmisc")

y = sqrt(d2$Abundance)
x1 = d2$Bloom.abundance
x2 = d2$bloom.evenness

ddI <- datadist(d2)
options(datadist="ddI")
lininterp <- ols(y ~ x1*x2, data=d2)
bplot(Predict(lininterp, x1= 0:30000, x2 = 0:1), 
      lfun=wireframe,  # bplot passes extra arguments to wireframe
      screen = list(z = -15, x = -60), #this changes angle viewed
      drape=FALSE, 
      xlabrot = -8, #rotating the labels 
      ylabrot = 75,
      zlabrot = 95,
      xlab = "Bloom Abundance", #naming hte labels 
      ylab = "Bloom Evenness",
      zlab = "Sqrt(Insect Abundance)",
      col="light gray", #making the surface light gray instead of black
      scales = list(arrows = FALSE, distance = 1.2) #arrows ensures we keep our tick marks, distance = 1.2 is the distance between the tick marks and our labels 
)

#####################################  y = insect richness  

#Giving things new names
Richness = d2$Richness
Distance.to.park = d2$Distance.to.park

#Creating the candidate models 
Cand.models <- list( )

Cand.models[[1]] <- lme(Richness~ Site.acreage + Distance.to.park + log.Park.Ac.+ plant.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[2]] <- lme(Richness~  plant.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[3]] <- lme(Richness~ Site.acreage + Distance.to.park + log.Park.Ac.+ Ff.richness + Native.richness + Non.native.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[4]] <- lme(Richness~ Ff.richness + Native.richness + Non.native.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[5]] <- lme(Richness~ plant.richness + Bloom.abundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[6]] <- lme(Richness~ plant.richness*Bloom.abundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[7]] <- lme(Richness~ plant.richness + Bloom.abundance*bloom.evenness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[8]] <- lme(Richness~ plant.richness + Bloom.abundance + bloom.evenness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")

Modnames <- paste("model", 1:length(Cand.models), sep = " ")
aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)

summary(Cand.models[[7]]) #slightly different, but the top model remains the same -- the top 
summary(Cand.models[[4]]) # this one is pretty close to to the next top model -- present both 
summary(Cand.models[[5]]) 

x = d2$plant.richness #pearson's effect size 
y = d2$Richness
cor(x,y,method = "pearson")
x = d2$Bloom.abundance
x = d2$bloom.evenness
x = d2$bloom.evenness*d2$Bloom.abundance

# 3d plot with interaction Richness of insects  = plant richness + bloom abundance*bloom evenness 
# load packages
#install.packages("rms")
library("rms")
#install.packages("lattice")
library("lattice")
#install.packages("Hmisc")
library("Hmisc")

y = d2$Richness
x1 = d2$Bloom.abundance
x2 = d2$bloom.evenness

ddI <- datadist(d2)
options(datadist="ddI")
lininterp <- ols(y ~ x1*x2, data=d2)
bplot(Predict(lininterp, x1= 0:30000, x2 = 0:1), 
      lfun=wireframe,  # bplot passes extra arguments to wireframe
      screen = list(z = -15, x = -60), #this changes angle viewed
      drape=FALSE, 
      xlabrot = -8, #rotating the labels 
      ylabrot = 75,
      zlabrot = 95,
      xlab = "Bloom Abundance", #naming hte labels 
      ylab = "Bloom Evenness",
      zlab = "Total Insect Richness",
      col="light gray", #making the surface light gray instead of black
      scales = list(arrows = FALSE, distance = 1.2) #arrows ensures we keep our tick marks, distance = 1.2 is the distance between the tick marks and our labels 
)


#################   y = Richness flower visitors

Richness.flower.visitors = d2$Richness.flower.visitors
Ff.bloom.richness = d2$Ff.bloom.richness
native.bloom.richness = d2$native.bloom.richness
nonnative.bloom.richness = d2$nonnative.bloom.richness

# New candidate models 
Cand.models <- list( )

Cand.models[[1]] <- lme(Richness.flower.visitors ~ Site.acreage + Distance.to.park + log.Park.Ac. + Bloom.abundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[2]] <- lme(Richness.flower.visitors ~  Bloom.abundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[3]] <- lme(Richness.flower.visitors ~ Fl.friendly.bloom.abundance + Native.bloom.abundance + Nonnative.bloomabundance, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[4]] <- lme(Richness.flower.visitors ~  bloom.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[5]] <- lme(Richness.flower.visitors ~  Ff.bloom.richness + native.bloom.richness + nonnative.bloom.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[6]] <- lme(Richness.flower.visitors ~  Bloom.abundance + bloom.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[7]] <- lme(Richness.flower.visitors ~  Bloom.abundance + Ff.bloom.richness + native.bloom.richness + nonnative.bloom.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[8]] <- lme(Richness.flower.visitors ~  Bloom.abundance+ bloom.evenness*bloom.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[9]] <- lme(Richness.flower.visitors ~  Bloom.abundance*bloom.evenness+ bloom.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[10]] <- lme(Richness.flower.visitors ~  Bloom.abundance + plant.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[11]] <- lme(Richness.flower.visitors ~  Bloom.abundance+ Ff.richness + Native.richness + Non.native.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")
Cand.models[[12]] <- lme(Richness.flower.visitors ~  Bloom.abundance*bloom.evenness+ plant.richness, random = ~1|Site.name/Date.veg.survey/Date.bowl.survey/bloom.date, method = "ML")

Modnames <- paste("model", 1:length(Cand.models), sep = " ")
aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)

#11 wins 

summary(Cand.models[[11]]) #same top three, but order changed a lot! 
summary(Cand.models[[10]])
summary(Cand.models[[12]])

x = Bloom.abundance  #pearson's effect size 
y = Richness.flower.visitors
cor(x,y,method = "pearson")
x = Ff.richness  #pearson's effect size 
x = Native.richness  #pearson's effect size 
x = Non.native.richness  #pearson's effect size 

#worked better with lme model, selected this over Poisson 

#####################################################################
# Checking for normality in top models 
#####################################################################

# Richness of flower visitors  -------------------------------------------------------- 
model <- lm(Richness.flower.visitors ~  Bloom.abundance + Ff.richness + Native.richness + Non.native.richness)

#homogeneity of variances  
predicted = predict.lm(model)
residuals = sqrt(d2$Abundance.flower.visitors) - predicted
plot(predicted,residuals,cex=2,cex.lab=1.5,cex.axis=1.15, ylab=" Residuals", xlab= "Predicted Y")
abline(a=0,b=0, col="red", lwd=3,lty="dashed")

#Normality
stdRes = rstandard(model)
qqnorm(stdRes,ylab="Standardized Residuals", xlab="Theoretical Quantiles")
qqline(stdRes, col=2,lwd=2) 
#pretty good 

# Abundance of flower visitors model sqrt transfomred --------------------------------------------------------

model <- lm(sqrt(d2$Abundance.flower.visitors)~  d2$Bloom.abundance + d2$plant.richness)

#homogeneity of variances  
predicted = predict.lm(model)
residuals = sqrt(d2$Abundance.flower.visitors) - predicted
plot(predicted,residuals,cex=2,cex.lab=1.5,cex.axis=1.15, ylab=" Residuals", xlab= "Predicted Y")
abline(a=0,b=0, col="red", lwd=3,lty="dashed")

#Normality
stdRes = rstandard(model)
qqnorm(stdRes,ylab="Standardized Residuals", xlab="Theoretical Quantiles")
qqline(stdRes, col=2,lwd=2)

#pretty good 

# Richness of insects   -------------------------------------------------------- 

model <- lm(d2$Richness~ d2$plant.richness + d2$Bloom.abundance*d2$bloom.evenness)

#homogeneity of variances  
predicted = predict.lm(model)
residuals = d2$Richness - predicted
plot(predicted,residuals,cex=2,cex.lab=1.5,cex.axis=1.15, ylab=" Residuals", xlab= "Predicted Y")
abline(a=0,b=0, col="red", lwd=3,lty="dashed")

#Normality
stdRes = rstandard(model)
qqnorm(stdRes,ylab="Standardized Residuals", xlab="Theoretical Quantiles")
qqline(stdRes, col=2,lwd=2)

#wow almost perfect 

# Abundance of insects   -------------------------------------------------------- 

model <- lm(sqrt(d2$Abundance)~  d2$plant.richness + d2$Bloom.abundance*d2$bloom.evenness)

#homogeneity of variances  
predicted = predict.lm(model)
residuals = sqrt(d2$Abundance) - predicted
plot(predicted,residuals,cex=2,cex.lab=1.5,cex.axis=1.15, ylab=" Residuals", xlab= "Predicted Y")
abline(a=0,b=0, col="red", lwd=3,lty="dashed")

#Normality
stdRes = rstandard(model)
qqnorm(stdRes,ylab="Standardized Residuals", xlab="Theoretical Quantiles")
qqline(stdRes, col=2,lwd=2)

#quite good