##################################################################### # In this program we generate bioclim environmental variables to # simulate the species distributions. ##################################################################### # Load required Packages library(rgdal) library(raster) library(gstat) setwd("...") # set working directory ######################################################################################## # Function to simulate a spatially correlated variable using a variogram. # x.range = the range of long values to simulate the varible # y.range = the range of lat values to simulate the varible # res = the size of the cell, can be a single value or a vector defining the # width and height of the cell # ext = extent of the raster. This can be used instead of x.range and y.range # psill = Limit of the variogram # model = model to use to simulate the variable. see vgm() for models. # range = Distance in which the difference of the variogram from the sill becomes negligible. # (definition from wikipedia) # nugget = the height of the jump of the semivariogram at the discontinuity at the origin. # (definition from wikipedia) # nsim = number of varibles to simulate using the same parameters # beta = the expected value of the variable # nmax = the number of nearest observations that should be used for a simulation. Nearest is defined in terms # of the spatial locations. See function gstat(). rspatvar <- function(formula,locations,x.range,y.range,ext=NULL,res,psill=NA,model="Sph",range=NA,nugget=NULL,nsim=1,beta,nmax ,proj4string=CRS(as.character(NA))){ if(!is.null(ext)){x.range<-ext[1:2];y.range<-ext[3:4]} res.len <- length(res) if(res.len==1){res <- c(res,res)} x.seq <- seq(from=x.range[1],to=x.range[2],by=res[1]) y.seq <- seq(from=y.range[1],to=y.range[2],by=res[2]) xy <- expand.grid(x=x.seq,y=y.seq) if(!is.null(nugget)){vgm.mod <- vgm(psill=psill,model=model,range=range,nugget=nugget)}else{ vgm.mod <- vgm(psill=psill,model=model,range=range) } g.dummy <- gstat(formula=formula,locations=locations,dummy=TRUE,beta=beta,model=vgm.mod,nmax=nmax) pred.vals <- predict(g.dummy,newdata=xy,nsim=nsim) if(nsim==1){ mat <- matrix(pred.vals[,3],ncol=length(x.seq),nrow=length(y.seq),byrow=TRUE) if(is.null(ext)){ext <- extent(c(x.range,y.range))} ras <- raster(mat,crs=proj4string,xmn=x.range[1],ymn=y.range[1],xmx=x.range[2],ymx=y.range[2]) }else{ pos <- 3:(nsim+2) ras.list <- list() for(i in 1:nsim){ ith.mat <- matrix(pred.vals[,pos[i]],ncol=length(x.seq),nrow=length(y.seq),byrow=TRUE) ras.list[[i]]<- raster(ith.mat,crs=proj4string ,xmn=x.range[1],ymn=y.range[1] ,xmx=x.range[2],ymx=y.range[2]) } ras <- stack(ras.list) } return(ras) } ######################################################################################## # Function to simulate the distribution of species from a probability matrix # p = probability matrix giving the presence probability of the species in a location # p can be a raster file or a matrix representing the extent of the species output. # n = the number of realized distributions of the species. spdist.rnd <- function(p,n){ if(class(p)=="RasterLayer"){p.orig <- p; p <- as.matrix(p);} ras.dim <- dim(p) if(n==1){ sp.dist <- matrix(0,nrow=ras.dim[1],ncol=ras.dim[2]) for(i in 1:ras.dim[1]){ for(j in 1:ras.dim[2]){ sp.dist[i,j] <- rbinom(1,1,p[i,j]) } } }else{ sp.dist <-list() for(i in 1:n){ ith.sp.dist <- matrix(0,nrow=ras.dim[1],ncol=ras.dim[2]) for(j in 1:ras.dim[1]){ for(l in 1:ras.dim[2]){ sp.dist[j,l] <- rbinom(1,1,p[j,l]) } } sp.dist[[i]] <- ith.sp.dist } } if(is.null(p.orig)){out <- raster(sp.dist)}else{ if(n==1){ out <- raster(sp.dist,xmn=xmin(p.orig),xmx=xmax(p.orig),ymn=ymin(p.orig),ymx=ymax(p.orig) ,crs=CRS(proj4string(p.orig))) }else{ out <- list() for(i in 1:n){ out[[i]] <- raster(sp.dist[[i]],xmn=xmin(p.orig),xmx=xmax(p.orig),ymn=ymin(p.orig),ymx=ymax(p.orig) ,crs=CRS(proj4string(p.orig))) } } } return(out) } # Create random spatial variables. Two different sets, one wiht an Exponential model and # the second one with a Spherical model. r.extent <- extent(c(1,11.5,1,11.5)) var.set1 <- rspatvar(formula=z~1,locations=~x+y,ext=r.extent,res=1/100,psill=1,range=10 ,model="Exp",nsim=5,beta=0,nmax=10) var.set2 <- rspatvar(formula=z~1,locations=~x+y,ext=r.extent,res=1/100,psill=1,range=6 ,model="Sph",nsim=5,beta=0,nmax=10) var.set <- stack(var.set1,var.set2) #save the environmental layers as tiff for (i in 1:10){ writeRaster(var.set,paste0('rbio',i),format='GTiff') } # we set the number of variables as 3 nvars <- 3 ########################################################################################## # Simulation procedure: # 1. Generate a coefficient for each independent variables using rnorm(),change the mean for weak-coefficient phenomenon # 2. Use Exp((X*betas)^2) transform the linear combination of variables and coefficients to give # a raster of presence probability. # 3. Simulate the species distribution using the spdist.rnd() function # source. The function generates a distribution based on a bernoulli process for each # quadrant in the environmental space and the probability of presence of that quadrant. # 4. Extract all of the quadrants in which the ith species is present. # 5. Sample at random 50 quadrants in which the species is present. # REPEAT the steps for strong-coefficient phenomenon ########################################################################################## file.nams <- list.files("...",full.names=TRUE) nspp <- 100 # Number of Species to simulate n.sample <- 50 var.set <- stack(file.nams) sp.dists <- stack() sp.probs <- stack() betas <- matrix(0,nrow=nspp,ncol=4,dimnames=list(paste("Sp",1:nspp,sep="_"),c("Intercept","beta1","beta2","beta3"))) var.id <- matrix(NA,nrow=nspp,ncol=3,dimnames=list(paste("Sp",1:nspp,sep="_"),c("Var1","Var2","Var3"))) spp.samp <- data.frame(SPP=rep(paste("Spp",1:nspp,sep="_"),each=n.sample),x=rep(0,nspp*n.sample),y=rep(0,nspp*n.sample)) start.ind <- seq(1,(nspp*n.sample),by=n.sample) end.ind <- seq(n.sample,nspp*n.sample,by=n.sample) for(i in 1:nspp){ r.sample <- sample(1:10,3) ith.betas <- rnorm(3,mean=1,sd=0.5) intercept <- 0 tmp.stack <- var.set[[r.sample]] tmp.list <- list() for(j in 1:length(r.sample)){ tmp.list[[j]] <- tmp.stack[[j]]*ith.betas[j] } betas[i,] <- c(intercept,ith.betas) var.id[i,] <- names(var.set)[r.sample] new.stack <- stack(tmp.list) lin.comb <- calc(new.stack,fun=sum) lin.comb <- intercept + lin.comb ith.p <- calc(lin.comb,function(x)exp(-(x)^2)) ith.spp <- spdist.rnd(ith.p,1) sp.dists <- stack(sp.dists,ith.spp) sp.probs <- stack(sp.probs,ith.p) pts.val <- extract(ith.spp,coordinates(ith.spp)) pres.coords <- coordinates(ith.spp)[pts.val==1,] pres.sampl <- sample(1:nrow(pres.coords),n.sample) spp.samp[start.ind[i]:end.ind[i],2:3] <- pres.coords[pres.sampl,] } names(sp.dists) <- names(sp.probs) <- paste("Sp",1:nspp,sep="_") ########################################################################################## # Save the simulation in Rdata, export the coordinates of the random points and export # the rasters of the simulated environmental data save(list=c("sp.dists","sp.probs","betas","var.id") ,file="species_simulations.RData") write.table(spp.samp ,"Presence_Points.txt" ,sep="\t",row.names=FALSE,quote=FALSE)