Peril 7: Random effect categories used as a proxy for covariance structure



First we load the required R packages and set seed for reproducibility

N.B. This should be reproducible when run in R but the output in the knitted Rmarkdown file is different

set.seed(6)

library(nlme)
library(geoR)
library(lme4)
library(sp)
library(gstat)
library(RColorBrewer)
library(viridis)

Now we define some of the key parameters used in the simulations

#Number of regions
regions<-8
#Number of distinct sites per region
sites_per_region<-4
#Number of samples per site
groupsize<-6

#Number of groups (i.e. total number of sites as site is nested within region)
groups<-regions*sites_per_region

#Overall sample size
n<-regions*sites_per_region*groupsize

#Give each observation a group ID
group_ids<-rep(seq(1,groups,1),each=groupsize)

#Identify the regions that each site belongs in
site_regions<-rep(seq(1,regions,1),each=sites_per_region)
#Identify the regions that each individual observation belongs in
ind_regions<-rep(site_regions,each=groupsize)

#Define the different effect sizes we will simulate
slopes<-seq(0,0.4,0.05)

Now we assign the spatial coordinates of all of our sites and regions (used subsequently as random effects)

#Assign region coordinates
regions_centroid_x<-sample(seq(10,90,10),regions)
regions_centroid_y<-sample(seq(10,90,10),regions)

#Plot these coordinates
plot(regions_centroid_x,regions_centroid_y,pch=18,col="blue",cex=3,xlim=c(0,100),ylim=c(0,100),xaxt="n",yaxt="n")

#Assign site coordinates
site_centroids_x<-sample(seq(-10,10,0.5),groups)
site_centroids_y<-sample(seq(-10,10,0.5),groups)
site_x<-rep(NA,groups)
site_y<-rep(NA,groups)

for(i in 1:groups){
  site_x[i]<-regions_centroid_x[site_regions[i]]+site_centroids_x[i]
  site_y[i]<-regions_centroid_y[site_regions[i]]+site_centroids_y[i]
}

#Add site coordinates to the plot
points(site_x,site_y,pch=16,col="green")

#Add coordinates for individual observations
ind_centroids_x<-sample(seq(-1,1,0.0025),n)
ind_centroids_y<-sample(seq(-1,1,0.0025),n)
ind_sites<-rep(seq(1,groups,1),each=groupsize)
ind_x<-rep(NA,n)
ind_y<-rep(NA,n)

for(i in 1:n){
  ind_x[i]<-site_x[ind_sites[i]]+ind_centroids_x[i]
  ind_y[i]<-site_y[ind_sites[i]]+ind_centroids_y[i]
}

#Add these coordinates to the plot
points(ind_x,ind_y,pch=16,col="red",cex=0.1)


Now we create the spatial field used to generate spatially autocorrelated values of the response variable

Here we simulate a fine-scale spatial autocorrelation

To simulate the broader scale auto-correlation that is also in the paper then change range=10 to range=80 when defining g.dummy as demonstrated here:

g.dummy <- gstat(formula=z~1, locations=~field_sim, dummy=T, beta=1, model=vgm(psill=4,model="Exp",range=10,nugget=0.5), nmax=20)
g.dummy <- gstat(formula=z~1, locations=~field_sim, dummy=T, beta=1, model=vgm(psill=4,model="Exp",range=80,nugget=0.5), nmax=20)

Now to simulate the spatial field for real

#create coordinate sets with which to simulate spatial field
field_sim_x<-rep(seq(0,100,0.1),each=length(seq(0,100,0.1)))
field_sim_y<-rep(seq(0,100,0.1),length(seq(0,100,0.1)))
field_sim<-cbind(field_sim_x,field_sim_y)
d<-rep("d",length(field_sim_x))

#Simulate spatial field
field_sim<-SpatialPoints(field_sim)
  
# define the gstat object (spatial model)
g.dummy <- gstat(formula=z~1, locations=~field_sim, dummy=T, beta=1, model=vgm(psill=4,model="Exp",range=10,nugget=0.5), nmax=20)
yy <- predict(g.dummy, newdata=field_sim, nsim=1)
## [using unconditional Gaussian simulation]
yy$sim1<-scale(yy$sim1)

#Make colour set to plot the spatial field
col<- adjustcolor(inferno(21),alpha.f=0.6)
colz <- col[(yy$sim1 - min(yy$sim1))/diff(range(yy$sim1))*20 + 1] 

#plot map
plot(NULL,xlim=c(0,100),ylim=c(0,100),xaxt="n",yaxt="n",xlab="",ylab="",bty="n")

# show one realization
points(yy,pch=16,col=colz,cex=0.1)

#add the observations
points(regions_centroid_x,regions_centroid_y,pch=22,col="light grey",cex=10,lwd=3)
points(site_x,site_y,pch=18,col="black",cex=2)
points(ind_x,ind_y,pch=16,col="white",cex=0.6)