I################################################### ### chunk number 1: data ################################################### #this code was originally integrated as part of the LaTeX file that created the paper 'A non-Euclidean clustering method using relational data' by Matthew J. Vavrek. #The following is released under a Creative Commons Attribution-Noncommercial-Share Alike 3.0 Unported license #full terms of the license can be found at: http://creativecommons.org/licenses/by-nc-sa/3.0/ #This code was tested on an OS X 10.10 distribution running R version 3.2.2 #To set up the environment, be sure that you have the following packages installed: #-fossil #-ecodist #-vegan #-doMC #-multicore #-foreach #please note that some of this code (i.e. the simulated data section) may not run on a Windows computer, as the doMC package does not support Windows machines at this time. To run this section in parallel on a Windows computer, you can use the doSMP package instead. Please consult the doSMP package page at http://cran.r-project.org/web/packages/doSMP/ for full instructions on setting up a machine with doSMP. #relational clustering paper library(fossil) library(ecodist) library(vegan) #Enable parallel processing library(doMC) registerDoMC() options(cores=4) ################################################### ### chunk number 2: multiclust ################################################### #note that the 'iters' object below refers to the number of simulations per level of endemism; for the data in the paper, 'iters' was set to 1000, but for this supplementary data it is set to 3, as computation time for 1000 replicates on a quad core CPU workstation is approximately 10 hours iters <- 1000 clust.comp.end<-foreach(j=seq(0.02, 0.6, 0.02), .combine='rbind') %dopar% { print(j) clust.mat<-matrix(0,iters,6) for (i in 1:iters) { #create a simulated matrix to cluster sm<-sim.occ(endemics=j) #the original groupings orig<-c(rep(1,30), rep(2,30), rep(3,30)) #make a distance matrix sm.dist<-ecol.dist(sm, method = sorenson) #UPGMA custering - needs to be put in explicit groups using treecut() sm.up<-hclust(sm.dist, 'average') sm.up3<-cutree(sm.up, 3) #kmeans clustering smt<-t(sm) sm.cap.euc<-capscale(smt~1, dist="euclidean", metaMDS = TRUE) sm.cap.ca.euc<-sm.cap.euc$CA sm.k.euc <- kmeans(sm.cap.ca.euc$Xbar, 3, nstart=100) sm.cap<-capscale(smt~1, dist="bray", metaMDS = TRUE) sm.cap.ca<-sm.cap$CA sm.k <- kmeans(sm.cap.ca$Xbar, 3, nstart=100) #complete linkage sm.com<-hclust(sm.dist) sm.com3<-cutree(sm.com, 3) #single linkage sm.sin<-hclust(sm.dist, 'single') sm.sin3<-cutree(sm.sin, 3) #relational clustering sm.rc<-rclust(sm.dist, 3, 20) #sm.nm<-nmds(sm.dist, mindim=2, nits=1) clust.mat[i,1]<-adj.rand.index(orig, sm.up3) clust.mat[i,2]<-adj.rand.index(orig, sm.k.euc$cluster) clust.mat[i,3]<-adj.rand.index(orig, sm.k$cluster) clust.mat[i,4]<-adj.rand.index(orig, sm.com3) clust.mat[i,5]<-adj.rand.index(orig, sm.sin3) clust.mat[i,6]<-adj.rand.index(orig, sm.rc) } clust.res<-matrix(0,2,6) clust.res[1,]<-apply(clust.mat, 2, function(x) mean(x)) clust.res[2,]<-apply(clust.mat, 2, function(x) sd(x)) clust.res<-round(clust.res, 3) clust.int<-c(j,clust.res) } ################################################### ### chunk number 3: fig1 ################################################### #to create a multi tile figure that shows how the level of endemicity in the simulation affects the clstering of the different sites, at least in non-Euclidean space with an NMDS pdf('figure1.pdf') oldmar<-par()$mar #the affects the layout of the figure layout(matrix(25:1, 5, 5, byrow=TRUE)) #map with mst par(mar=c(0,0,0,0)) orig<-c(rep(1,30), rep(2,30), rep(3,30)) for (k in seq(0.02, 0.5, 0.02)) { a<-nmds(ecol.dist(sim.occ(endemics=k)), mindim=2, nits=1) plot(a$conf[[1]], pch=orig, xaxt='n', yaxt='n', ylim=c(-0.4, 0.4), xlim=c(-0.4, 0.4), col=orig+1) text(-0.25, 0.37, labels=paste('e=', k, sep=''), cex=1.5) } dev.off() ################################################### ### chunk number 4: fig2 ################################################### pdf('figure2.pdf') par(mar=oldmar) plot(clust.comp.end[,1:2], type='n', xlim = c(0.6, 0), ylim = c(-0.1, 1.1), ylab = 'Adjusted Rand Index', xlab = 'Relative Endemicity') lines(clust.comp.end[,1], clust.comp.end[,2], col=1) points(clust.comp.end[,1],clust.comp.end[,2], pch=1, col=1) lines(clust.comp.end[,1], clust.comp.end[,4], col=2) points(clust.comp.end[,1],clust.comp.end[,4], pch=2, col=2) lines(clust.comp.end[,1], clust.comp.end[,6], col=3) points(clust.comp.end[,1],clust.comp.end[,6], pch=3, col=3) lines(clust.comp.end[,1], clust.comp.end[,8], col=4) points(clust.comp.end[,1],clust.comp.end[,8], pch=4, col=4) lines(clust.comp.end[,1], clust.comp.end[,10], col='steelblue') points(clust.comp.end[,1],clust.comp.end[,10], pch=5, col='steelblue') lines(clust.comp.end[,1], clust.comp.end[,12], col='mediumvioletred') points(clust.comp.end[,1],clust.comp.end[,12], pch=6, col='mediumvioletred') legend(x='bottomleft', c('UPGMA', 'k-means/E', 'k-means/NE', 'Complete Linkage', 'Single Linkage', 'NERC'), pch=1:6, col=c(1:4,'steelblue','mediumvioletred')) dev.off() ################################################### ### chunk number 4: sparse-data ################################################### iters <- 1000 clust.comp.samp<-foreach(j=seq(0.2, 3, 0.1), .combine='rbind') %dopar% { print(j) clust.mat<-matrix(0,iters,6) for (i in 1:iters) { #create a simulated matrix to cluster sm<-sim.occ(endemics=0.2, avg.abund=j) #the original groupings orig<-c(rep(1,30), rep(2,30), rep(3,30)) #make a distance matrix sm.dist<-ecol.dist(sm, method = sorenson) #UPGMA custering - needs to be put in explicit groups using treecut() sm.up<-hclust(sm.dist, 'average') sm.up3<-cutree(sm.up, 3) #kmeans clustering smt<-t(sm) sm.cap.euc<-capscale(smt~1, dist="euclidean", metaMDS = TRUE) sm.cap.ca.euc<-sm.cap.euc$CA sm.k.euc <- kmeans(sm.cap.ca.euc$Xbar, 3, nstart=100) sm.cap<-capscale(smt~1, dist="bray", metaMDS = TRUE) sm.cap.ca<-sm.cap$CA sm.k <- kmeans(sm.cap.ca$Xbar, 3, nstart=100) #complete linkage sm.com<-hclust(sm.dist) sm.com3<-cutree(sm.com, 3) #single linkage sm.sin<-hclust(sm.dist, 'single') sm.sin3<-cutree(sm.sin, 3) #relational clustering sm.rc<-rclust(sm.dist, 3, 20) #sm.nm<-nmds(sm.dist, mindim=2, nits=1) clust.mat[i,1]<-adj.rand.index(orig, sm.up3) clust.mat[i,2]<-adj.rand.index(orig, sm.k.euc$cluster) clust.mat[i,3]<-adj.rand.index(orig, sm.k$cluster) clust.mat[i,4]<-adj.rand.index(orig, sm.com3) clust.mat[i,5]<-adj.rand.index(orig, sm.sin3) clust.mat[i,6]<-adj.rand.index(orig, sm.rc) } clust.res<-matrix(0,2,6) clust.res[1,]<-apply(clust.mat, 2, function(x) mean(x)) clust.res[2,]<-apply(clust.mat, 2, function(x) sd(x)) clust.res<-round(clust.res, 3) clust.int<-c(j,clust.res) } ################################################### ### chunk number 4: fig2 ################################################### pdf('figure3.pdf') plot(clust.comp.samp[,1:2], type='n', xlim = c(3.2,0), ylim = c(-0.1, 1.1), ylab = 'Adjusted Rand Index', xlab = 'Sampling Intensity', xaxt='n') lines(clust.comp.samp[,1], clust.comp.samp[,2], col=1) points(clust.comp.samp[,1],clust.comp.samp[,2], pch=1, col=1) lines(clust.comp.samp[,1], clust.comp.samp[,4], col=2) points(clust.comp.samp[,1],clust.comp.samp[,4], pch=2, col=2) lines(clust.comp.samp[,1], clust.comp.samp[,6], col=3) points(clust.comp.samp[,1],clust.comp.samp[,6], pch=3, col=3) lines(clust.comp.samp[,1], clust.comp.samp[,8], col=4) points(clust.comp.samp[,1],clust.comp.samp[,8], pch=4, col=4) lines(clust.comp.samp[,1], clust.comp.samp[,10], col='steelblue') points(clust.comp.samp[,1],clust.comp.samp[,10], pch=5, col='steelblue') lines(clust.comp.samp[,1], clust.comp.samp[,12], col='mediumvioletred') points(clust.comp.samp[,1],clust.comp.samp[,12], pch=6, col='mediumvioletred') legend(x='bottomleft', c('UPGMA', 'k-means/E', 'k-means/NE', 'Complete Linkage', 'Single Linkage', 'NERC'), pch=1:6, col=c(1:4,'steelblue','mediumvioletred')) axis(1, at=seq(3, 0, -0.5)) dev.off() save.image('rc.RData')