#### **** Hierarchical Clusters **** #### setwd("C:/Users/167148/OCC_work/Reference Conditions and Criteria/HKG assessment") ## 1. Climate and Geology SM<-read.csv("E:/OCC work/Reference Conditions and Criteria/Land scale work/SiteMaster.csv") names(SM) df<-SM[,c(1,15,16,47:50,110,116)] names(df) # Geology vars chosen based on their relationship to measure conductivity and alkalinity summary(df) length(unique(df$SID)) # length(unique(df$PCID)) no PCID #df<-df[,-c(1)] df <- na.omit(df) head(df) length(df[,1]) #kmeans(x, centers, iter.max = 10, nstart = 1) # standard code library(tidyverse) library(cluster) library(factoextra) library(NbClust) library(rfUtilities) #### Mode function #### Mode = function(X){ vals = unique(X) cnts = rep(NA) for(i in 1:length(vals)){ cnts[i] = length(X[X==vals[i]]) } dat = data.frame(cbind(vals,cnts)) mode = dat[dat[,2]==max(cnts),1] if(length(mode) == 1){ return(mode) }else{ return("multiple") } } #best # of clusters summary(df) dft<-as.matrix(df[,-c(1)]) dft<-scale(dft) dim(dft) summary(dft) dfc<-NbClust(dft,method="kmeans") par(mfrow=c(1,1)) # 9 (7 votes) or 2 (6 votes) clusters # Compute k-means with k = 2 set.seed(123) k2 <- kmeans(dft, 2, nstart = 25) #plot par(mfrow=c(1,2)) fviz_cluster(k2, data = dft) # Compute k-means with k = 9 set.seed(123) k9 <- kmeans(dft, 9, nstart = 25) #plot fviz_cluster(k9, data = dft) df$k2<-k2$cluster df$k9<-k9$cluster head(df) df$WBID<-rep(NA) df$Longitude<-rep(NA) df$Latitude<-rep(NA) for(i in 1:length(df[,1])){ df$WBID[i]<-SM$WBID[SM$SID == df$SID[i]] df$Longitude[i]<-SM$Longitude[SM$SID == df$SID[i]] df$Latitude[i]<-SM$Latitude[SM$SID == df$SID[i]] } head(df) write.csv(df,"H_clust_level1.csv", row.names=FALSE) K.results<-rbind(k2$centers) #K.results$K<-c(rep(2,2),rep(3,3),rep(5,5)) K.results write.csv(K.results,"HK_results_Level1.csv") ##### Prep for eastern and western assessments #### H1<-read.csv("H_clust_level1.csv") names(H1) table(H1$k2) dim(H1) length(unique(H1$WBID)) 1876/1069 sites = unique(H1$WBID) samps = rep(0) for(s in 1:length(sites)){ dat = subset(H1, WBID == sites[s]) samps[s] = nrow(dat) } samp_cnt<-cbind(sites,samps) rbind( cbind(1,2,3,4,5), cbind(length(samps[samps == 1])/1069, length(samps[samps == 2])/1069, length(samps[samps == 3])/1069, length(samps[samps == 4])/1069, length(samps[samps == 5])/1069 )) ### Explore differences library(rpart) EW<-H1[,c(2:10)] names(EW) EW$EW_Group<-rep(NA) EW$EW_Group[EW$k2 == 1] = "West" EW$EW_Group[EW$k2 == 2] = "East" head(EW) EW = EW[,-9] hct<-rpart(EW_Group ~ ., data=EW, method = "class") par(mfrow=c(1,1)) plot(hct);text(hct) plotcp(hct) hctp<-prune(hct, cp=0.1) #### East vs. West Class Tree #### par(mfrow=c(1,1)) EW_plot = plot(hctp, main = "East vs West");text(hctp) print(hctp) library(rattle) fancyRpartPlot(hctp,yesno=2,split.col="black",nn.col="black", caption="",palette="Greys",branch.col="black") # clean up groups H1$EW_Group<-rep(NA) H1$EW_Group[H1$AnRainC < 975.5] = "West" H1$EW_Group[H1$AnRainC >= 975.5] = "East" write.csv(H1, "East_West_Groups.csv", row.names = FALSE) table(H1$EW_Group) length(unique(H1$WBID[H1$EW_Group == "East"])) length(unique(H1$WBID[H1$EW_Group == "West"])) ## 2. Instream names(SM) EWsg = read.csv("East_West_Groups.csv") head(EWsg) names(EWsg) EW = EWsg[,c(1,15)] SM2 = SM %>% left_join(EW, by="SID") head(SM2) dim(SM2) SM3<-na.omit(SM2) dim(SM3) write.csv(SM2, "EW_Group_SiteMaster.csv", row.names = F) SMEast = subset(SM2, EW_Group == "East") summary(SMEast) SMWest = subset(SM2, EW_Group == "West") summary(SMWest) #### Eastern cluster #### #SMEast<-subset(H1, EW_Group == "East") names(SMEast) SMEast$Veg.Covl = log(SMEast$Veg.Cover+0.125) hist(SMEast$Veg.Covl) #df = SMEast[,c(1,26:34,37:42,46)] df = SMEast[,c(1,3,37:39,46,59,67,70,71,73:81,83,89,119)] # Original (except 119, Veg.Covl) #df = SMEast[,c(1,37:39,46,67,70:81)] names(df) summary(df) df = na.omit(df) ####### Eastern data stats ############## nrow(df) length(unique(df$WBID)) sites = unique(df$WBID) samps = rep(0) for(s in 1:length(sites)){ dat = subset(df, WBID == sites[s]) samps[s] = nrow(dat) } samp_cnt<-cbind(sites,samps) Estats<-rbind( cbind(1,2,3,4,5), cbind(length(samps[samps == 1])/258, length(samps[samps == 2])/258, length(samps[samps == 3])/258, length(samps[samps == 4])/258, length(samps[samps == 5])/258 )) Estats ########################################### dft<-as.matrix(df[,-c(1,2)]) dft<-scale(dft) dim(dft) summary(dft) dfc<-NbClust(dft,method="kmeans") par(mfrow=c(1,1)) # 4 (7 votes) or 2 (5 votes) or 3 (5 votes) clusters # Compute k-means with k = 4 (Original) set.seed(123) k4 <- kmeans(dft, 4, nstart = 25) # Compute k-means with k = 3 set.seed(123) k3 <- kmeans(dft, 3, nstart = 25) #With intermediate factors #plot par(mfrow=c(1,2)) fviz_cluster(k4, data = dft) fviz_cluster(k3, data = dft) # Compute k-means with k = 2 set.seed(123) k2 <- kmeans(dft, 2, nstart = 25) #plot fviz_cluster(k2, data = dft) df$k4<-k4$cluster df$k3<-k3$cluster df$k2<-k2$cluster head(df) df$Longitude<-rep(NA) df$Latitude<-rep(NA) for(i in 1:length(df[,1])){ df$Longitude[i]<-SM$Longitude[SM$SID == df$SID[i]] df$Latitude[i]<-SM$Latitude[SM$SID == df$SID[i]] } head(df) df<-cbind(df,dft) write.csv(df,"East_clusters2.csv", row.names=FALSE) K.results<-rbind(k4$centers) #K.results$K<-c(rep(2,2),rep(3,3),rep(5,5)) K.results write.csv(K.results,"HK_results_East2.csv") #### Apply mode designation to sites #### E4<-read.csv("East_clusters2.csv") head(E4) names(E4) E4 = E4[,-c(24:47)] # drop extra groups and scaled vars names(SM)#[c(1,2)] # Get SiteName library(dplyr) E4 = E4 %>% left_join(SM[,c(1:2)],by = "SID") nrow(E4) length(unique(E4$WBID)) Sites = unique(E4$WBID) K4 = rep(NA) EHKG = data.frame(cbind(Sites,K4)) colnames(EHKG) = c("WBID","K_Group") nrow(EHKG) for(i in 1:nrow(EHKG)){ dat = subset(E4, WBID == EHKG$WBID[i]) EHKG$K_Group[i] = Mode(dat$k4) } EHKG #E4 = E4 %>% left_join(EHKG, by = "WBID") write.csv(E4, "New_EHKG2.csv", row.names = FALSE) library(dplyr) head(E4) dim(E4) names(EHKG) dim(EHKG) E4 = E4 %>% left_join(EHKG, by = 'WBID') E4$K_Group = as.character(E4$K_Group) table(E4$K_Group) length(unique(E4$WBID[E4$K_Group == "multiple"])) #### Classification tree to assign "multiple" K_Group nrow(E4[E4$K_Group == "multiple",]) E4comp = subset(E4, K_Group != "multiple") E4multi = subset(E4,K_Group == "multiple") nrow(E4comp) names(E4comp) E4test <-E4comp[,-c(1,2,23:24)] names(E4test) library(rpart) #### Change HKG lable for plot E4test$K_Group[E4test$K_Group == 1] = "Hills" E4test$K_Group[E4test$K_Group == 2] = "East Valleys" E4test$K_Group[E4test$K_Group == 3] = "East Plains" E4test$K_Group[E4test$K_Group == 4] = "Highlands" head(E4test) write.csv(E4test,"C:/Users/167148/OCC_work/Manuscripts/Hierarchical clusters/Fig2bdata.csv",row.names=FALSE) set.seed(243) ect<-rpart(K_Group ~ ., data = E4test, method = "class") par(mfrow=c(1,1)) plot(ect);text(ect) plotcp(ect) #### East Class Tree #### ectp<-prune(ect, cp=0.05) East_ct = plot(ectp);text(ectp) print(ectp) # assign sites with multiple modes Egrp<-function(data){ cbl<-mean(data$Cobblel) bld<-mean(data$Boulder) WD<-mean(data$W.Dl) Run<-mean(data$Runs) Snd<-mean(data$Sandl) Sin<-mean(data$Sin) if(cbl >= 0.973){ if(bld >= 1.966){ Grp = 1 }else if(WD >= 3.725){ Grp = 4 }else{ Grp = 2 } }else if(Run >= 0.675){ Grp = 3 }else{ Grp = 2 } return(Grp) } for(i in 1:nrow(EHKG)){ if(EHKG$K_Group[i] == "multiple"){ dat = subset(E4,WBID == EHKG$WBID[i]) EHKG$K_Group[i] = Egrp(dat) print(EHKG$K_Group[i]) }else{ next } } table(EHKG$K_Group) # Add lat long to processed EHKG names(SM) EHKG<-EHKG %>% left_join(SM[,c(2:5)],by = "WBID", multiple = "any") nrow(EHKG) length(unique(EHKG$WBID)) write.csv(EHKG,"East_KGroups2.csv",row.names = FALSE) dim(EHKG) #### Western Cluster #### SMWest = subset(SM2, EW_Group == "West") summary(SMWest) names(SMWest) min(SMWest$Veg.Cover[SMWest$Veg.Cover > 0]) SMWest$Veg.Covl = log(SMWest$Veg.Cover + 0.05) df = SMWest[,c(1,3,37:39,46,59,67,70,71,73:81,89,119)] names(df) summary(df) length(unique(df$WBID)) df = na.omit(df) ####### Western data stats ############## nrow(df) length(unique(df$WBID)) sites = unique(df$WBID) samps = rep(0) for(s in 1:length(sites)){ dat = subset(df, WBID == sites[s]) samps[s] = nrow(dat) } samp_cnt<-cbind(sites,samps) Wstats<-rbind( cbind(1,2,3,4,5), cbind(length(samps[samps == 1])/140, length(samps[samps == 2])/140, length(samps[samps == 3])/140, length(samps[samps == 4])/140, length(samps[samps == 5])/140 )) Wstats ################### Determine West Clusters ######################## nrow(df) length(unique(df$WBID)) dft<-as.matrix(df[,-c(1,2)]) dft<-scale(dft) dim(dft) summary(dft) dfc<-NbClust(dft,method="kmeans") par(mfrow=c(1,1)) # 2 (6 votes), 4 (5 votes) or 14 (5 votes) clusters # Compute k-means with k = 2 set.seed(123) k2 <- kmeans(dft, 2, nstart = 25) #plot par(mfrow=c(1,2)) fviz_cluster(k2, data = dft) df$k2<-k2$cluster head(df) df$Longitude<-rep(NA) df$Latitude<-rep(NA) df$WBID<-rep(NA) SM$SID = paste0(SM$WBID," _ ",SM$Year) for(i in 1:length(df[,1])){ df$WBID[i]<-SM$WBID[SM$SID == df$SID[i]] df$Longitude[i]<-SM$Longitude[SM$SID == df$SID[i]] df$Latitude[i]<-SM$Latitude[SM$SID == df$SID[i]] } head(df) df<-cbind(df,dft) write.csv(df,"West_clusters2.csv", row.names=FALSE) K.results<-rbind(k2$centers) K.results write.csv(K.results,"HK_results_West2.csv") #### Prep K2 groups #### #Assign group modes to each site Mode = function(X){ vals = unique(X) cnts = rep(NA) for(i in 1:length(vals)){ cnts[i] = length(X[X==vals[i]]) } dat = data.frame(cbind(vals,cnts)) mode = dat[dat[,2]==max(cnts),1] if(length(mode) == 1){ return(mode) }else{ return("multiple") } } # Assign group based on the mode of samples #### W2<-read.csv('West_clusters2.csv') head(W2) names(W2) W2 = W2[,-c(23:43)] # drop extra groups and scaled vars names(W2) names(SM)#[c(1,2)] # Get SiteName library(dplyr) W2 = W2 %>% left_join(SM[,c(1,2)],by = "SID") nrow(W2) length(unique(W2$WBID)) Sites = unique(W2$WBID) K2 = rep(NA) WHKG = data.frame(cbind(Sites,K2)) colnames(WHKG) = c("WBID","K_Group") nrow(WHKG) for(i in 1:nrow(WHKG)){ dat = subset(W2, WBID == WHKG$WBID[i]) WHKG$K_Group[i] = Mode(dat$k2) } WHKG write.csv(W2, "New_WHKG2.csv", row.names = FALSE) library(dplyr) head(W2) dim(W2) names(WHKG) dim(WHKG) W2 = W2 %>% left_join(WHKG, by = 'WBID') W2$K_Group = as.character(W2$K_Group) table(W2$K_Group) length(unique(W2$WBID[W2$K_Group == "multiple"])) #### Classification tree to assign "multiple" K_Group nrow(W2[W2$K_Group == "multiple",]) W2comp = subset(W2, K_Group != "multiple") W2multi = subset(W2,K_Group == "multiple") nrow(W2comp) names(W2comp) W2test <-W2comp[,-c(1,2,23:24)] names(W2test) library(rpart) #### re-lable K_Groups for plot W2test$k2[W2test$k2 == 1] = rep("West Plains") W2test$k2[W2test$k2 == 2] = "West Valleys" write.csv(W2test,"C:/Users/167148/OCC_work/Manuscripts/Hierarchical clusters/Fig2cdata.csv", row.names = FALSE) set.seed(243) wct<-rpart(k2 ~ ., data = W2test, method = "class") par(mfrow=c(1,1)) plot(wct);text(wct) plotcp(wct) #### West Class Tree #### wctp<-prune(wct, cp=0.15) West_ct = plot(wctp);text(wctp) print(wctp) #### Need to assign the modal Kgroup to SID sites #### Run classification tree #### Use tree to classify non-modal sites. dim(W2) head(W2) names(W2) Wgrp = function(data){ Pools = mean(data$Pools, na.rm = T) if(Pools < 0.4125){ return(1) }else{ return(2) } } for(i in 1:nrow(WHKG)){ if(WHKG$K_Group[i] == "multiple"){ dat = subset(W2,WBID == WHKG$WBID[i]) WHKG$K_Group[i] = Wgrp(dat) print(WHKG$K_Group[i]) }else{ next } } table(WHKG$K_Group) # Add lat long to processed WHKG names(SM) WHKG<-WHKG %>% left_join(SM[,c(2:5)],by = "WBID", multiple = "any") nrow(WHKG) length(unique(WHKG$WBID)) write.csv(WHKG,"West_KGroups2.csv",row.names = FALSE) dim(WHKG) #### Compile East and West Sites with final designations #### East = read.csv("East_KGroups2.csv") head(East) West = read.csv("West_KGroups2.csv") head(West) ### Label East Stream Groups East$K_Group[East$K_Group == 1] = rep("Hills") East$K_Group[East$K_Group == 2] = rep("East Valleys") East$K_Group[East$K_Group == 3] = rep("East Plains") East$K_Group[East$K_Group == 4] = rep("Highlands") ### Lable West Stream Groups West$K_Group[West$K_Group == 1] = rep("West Plains") West$K_Group[West$K_Group == 2] = rep("West Valleys") HKG = rbind(East,West) summary(HKG) write.csv(HKG, "Final_Stream_Groups.csv", row.names = FALSE) #### Figure of Class Trees #### tiff('Classification_Trees.tiff', units="in", width=5, height=7, res=300, compression = 'lzw') par(mfrow=c(3,1)) plot(hctp,branch=1,uniform = TRUE,compress = TRUE,margin=0.3,main = "East vs. West");text(hctp) plot(ectp,branch=1,uniform = TRUE,compress = TRUE,margin=0.25,main = "East Group");text(ectp) plot(wctp,branch=1,uniform = TRUE,compress = TRUE,margin=0.25,main = "West Group");text(wctp) dev.off() par(mfrow=c(1,1)) #### BCA to compare Stream Groups against Mod Ecos by WQ and Hab #### setwd("C:/Users/167148/OCC_work/Reference Conditions and Criteria/HKG assessment") library(dplyr) library(rpart) library(caret) library(MuMIn) WQ = read.csv('Physicochemical Raw Data.csv') head(WQ) summary(WQ) WQ$WBID[WQ$SiteName == "Medicine Creek"] Sites = read.csv("Site Data.csv") head(Sites) Sites$WBID[Sites$SiteName == "Medicine Creek"] HKG = read.csv("Final_Stream_Groups.csv") head(HKG) HKG$WBID[HKG$SiteName == "Medicine Creek"] hkg = HKG[,c(1,2)] names(hkg) names(WQ) Ecos = WQ[,c(5,3)] names(Ecos) Sites = Sites %>% left_join(Ecos, by = "WBID", multiple = "any") Sites = Sites %>% left_join(hkg, by = 'WBID', multiple = "any") head(Sites) names(Sites) WQ_Sites = Sites[,c(1,2,4,5,6,10,31,32)] names(WQ_Sites) dim(WQ_Sites) WQ_Sites = na.omit(WQ_Sites) dim(WQ_Sites) names(WQ) WQ_param = data.frame(matrix(NA,927,14)) colnames(WQ_param) = c("TN","Available.N","TP","OP","Conductivity","Chloride", "Sulfate","DO","DO.perc","Alkalinity","pH","TSS","Turbidity", "Temperature.Water") WQ_Sites = cbind(WQ_Sites,WQ_param) head(WQ_Sites) for(i in 1:nrow(WQ_Sites)){ dat = subset(WQ, PCID == WQ_Sites$PCID[i]) WQ_Sites$TN[i] = median(dat$TN,na.rm=T) WQ_Sites$Available.N[i] = median(dat$Available.N,na.rm=T) WQ_Sites$TP[i] = median(dat$TP,na.rm=T) WQ_Sites$OP[i] = median(dat$OP,na.rm=T) WQ_Sites$Conductivity[i] = median(dat$Conductivity,na.rm=T) WQ_Sites$Chloride[i] = median(dat$Chloride,na.rm=T) WQ_Sites$Sulfate[i] = median(dat$Sulfate,na.rm=T) WQ_Sites$DO[i] = median(dat$DO,na.rm=T) WQ_Sites$DO.perc[i] = median(dat$DO.perc,na.rm=T) WQ_Sites$Alkalinity[i] = median(dat$Alkalinity,na.rm=T) WQ_Sites$pH[i] = median(dat$pH,na.rm=T) WQ_Sites$TSS[i] = median(dat$TSS,na.rm=T) WQ_Sites$Turbidity[i] = median(dat$Turbidity,na.rm=T) WQ_Sites$Temperature.Water[i] = median(dat$Temperature.Water,na.rm=T) } head(WQ_Sites) WQ_Sites = distinct(WQ_Sites) dim(WQ_Sites) write.csv(WQ_Sites, "HKG_WQ_final2.csv", row.names = FALSE) #### Get Habitat Data #### Hab <- read.csv("EW_Group_SiteMaster.csv") summary(Hab) unique(Hab$EW_Group) unique(Hab$Ecoregion) Hab$WBID[Hab$Ecoregion == "Wichita Mountains"] #Hab = na.omit(Hab) dim(Hab) #### Get Mod_Ecoregion and K_Group in exchange for Ecoregion and Group names(Hab) Hab = Hab[,c(57,3,7,11,13,26,27,29:34,37:42,46,88)] HKG <- read.csv("Final_Stream_Groups.csv") head(HKG) Hab = Hab %>% left_join(HKG[,c(1,2)],by = "WBID", multiple = "any") unique(Hab$K_Group) dim(Hab) Hab = na.omit(Hab) write.csv(Hab,"HKG_Hab_Final2.csv",row.names=FALSE) #### Join WQ and Habitat Data for BCA #### WQf = read.csv("HKG_WQ_final2.csv") Habf = read.csv("HKG_Hab_Final2.csv") names(WQf) names(Habf) ### Join on WQ for most complete data set HKG_data = WQf %>% left_join(Habf[,c(1,3,6:21)], by = 'PCID', multiple = 'any') head(HKG_data) summary(HKG_data) length(unique(HKG_data$PCID)) HKG_data=na.omit(HKG_data) # retain Wichita Mountains sites as separate K_Group names(HKG_data) unique(HKG_data$Modified.Ecoregion) HKG_data$K_Group[HKG_data$Modified.Ecoregion == 'Wichita'] = rep("Wichita") write.csv(HKG_data,"HKG_combined_data2.csv", row.names = FALSE) write.csv(HKG_data,"C:/Users/167148/OCC_work/Manuscripts/Hierarchical clusters/HKG_combined_data2.csv", row.names = FALSE) #### BCA for combined data types #### HKG_data = read.csv("HKG_combined_data2.csv") dim(HKG_data) ## Set up for ADE4 assessment head(HKG_data) HKG_data$Sample = paste(HKG_data$WBID,'_',HKG_data$Year) names(HKG_data) env = data.frame(HKG_data[,c(9:22,24:39)]) rownames(env) = HKG_data$Sample names(HKG_data) design = data.frame(HKG_data[,c(1:2,5,7,8)]) rownames(design) = HKG_data$Sample HKGdata = list(env,design) Env = HKGdata[[1]] row.names(Env) groups = as.factor(HKGdata[[2]]$K_Group) ecos = as.factor(HKGdata[[2]]$Modified.Ecoregion) library(ade4) library(adegraphics) methods(bca) methods(wca) # PCA # habitat (Envpca <- dudi.pca(Env, scannf = FALSE, nf = 3)) g1 <- s.corcircle(Envpca$co) g2 <- s.label(Envpca$li) ADEgS(list(g1, g2)) str(Envpca) s.class(Envpca$li, groups, col = T)#, xlim = c(-9, 7), ylim = c(-6, 8)) s.class(Envpca$li, ecos)#, xlim = c(-9, 7), ylim = c(-6, 8), col = TRUE) s.arrow(Envpca$co) s.arrow(Envpca$li) s.label(Envpca$li) s.label(Envpca$co) s1d.barchart(Envpca$eig,p1d.horizontal = F, ppolygons.col = "white") # between class anlysis # HKG Benv_g <- bca(Envpca, groups, scannf = FALSE) dim(Benv_g$tab) str(Benv_g) summary(Benv_g) # 27% plot(Benv_g, row.pellipses.col = adegpar()$ppalette$quali(6), row.ppoints.alpha = 0, row.plines.lty = 0) str(Benv_g) s.arrow(Benv_g$li) s.arrow(Benv_g$co) s.arrow(Benv_g$cw) s.corcircle(Benv_g$co) #### Plot for habitat BCA #### g1 = s.class(Benv_g$ls, groups, plines.lty = 0, ppoints.alpha = 0, pellipses = list(col = c("red","blue","lightgreen","violet","darkgreen","orange","grey"),alpha = 0.5)) g2 = s.arrow(Benv_g$c1) tiff('SG_BCA.tiff', units="in", width=7, height=5, res=300, compression = 'lzw') cbindADEg(g2,zoom(g1, 1.4, center = c(0.2,0)),plot=TRUE) dev.off() ### Ecos Benv_e <- bca(Envpca, ecos, scannf = FALSE) dim(Benv_e$tab) summary(Benv_e) # 25% plot(Benv_e, row.pellipses.col = adegpar()$ppalette$quali(13), row.ppoints.alpha = 0, row.plines.lty = 0) e1 = s.class(Benv_e$ls, ecos, plines.lty = 0, ppoints.alpha = 0, pellipses = list(col = adegpar()$ppalette$quali(13),alpha = 0.5)) e2 = s.arrow(Benv_e$c1) tiff('Eco_BCA.tiff', units="in", width=7, height=5, res=300, compression = 'lzw') cbindADEg(e2,zoom(e1, 1.3, center = c(0.3,0)),plot=TRUE) dev.off() #### Run BCA on Anthro features #### setwd("C:/Users/167148/OCC_work/Reference Conditions and Criteria/HKG assessment") library(dplyr) library(MuMIn) HKG_anthro = read.csv("HKG_by_AnthroEffects.csv") head(HKG_anthro) # Update K_Groups HKG_anthro = HKG_anthro[,-3] HKG = read.csv("Final_Stream_Groups.csv") names(HKG) HKG_anthro = HKG_anthro %>% left_join(HKG[,c(1,2)], by = 'WBID') unique(HKG_anthro$K_Group) ## Rename Wichitas K_Group to Highlands HKG_anthro$K_Group[HKG_anthro$SiteName == "Medicine Creek"] = "Highlands" dim(HKG_anthro) length(unique(HKG_anthro$COMID)) HKG_anthro$COMID = as.factor(HKG_anthro$COMID) HKG_anthro = HKG_anthro %>% distinct(COMID, .keep_all = TRUE) dim(HKG_anthro) # drop one extremely urban site: Rock Creek Oklahoma St. #HKG_anthro = subset(HKG_anthro, COMID != 19958996) ## Set up for ADE4 assessment names(HKG_anthro) # Give meaningful names write.csv(HKG_anthro, "C:/Users/167148/OCC_work/Manuscripts/Hierarchical clusters/HKG_combined_data2.csv", row.names = FALSE) anthro = data.frame(HKG_anthro[,c(7:21)]) colnames(anthro) = c("OpenUrb","LowUrb","MedUrb","HiUrb","Hay","Crop","CBNF","Fert", "Manure","ICI","CCHEM","RoadDens","Road_Xing","Houses","Population") dim(anthro) rownames(anthro) = HKG_anthro$COMID names(HKG_anthro) design = data.frame(HKG_anthro[,c(1,22)]) rownames(design) = HKG_anthro$COMID HKGdata = list(anthro,design) Anthro = HKGdata[[1]] row.names(Anthro) groups = as.factor(HKGdata[[2]]$K_Group) library(ade4) library(adegraphics) methods(bca) methods(wca) # PCA # anthro (Anthro_pca <- dudi.pca(Anthro, scannf = FALSE, nf = 3)) g1 <- s.corcircle(Anthro_pca$co) g2 <- s.label(Anthro_pca$li) ADEgS(list(g1, g2)) s.class(Anthro_pca$li, groups, col = T)#, xlim = c(-9, 7), ylim = c(-6, 8)) # between class analysis # HKG Btw_g <- bca(Anthro_pca, groups, scannf = FALSE) dim(Btw_g$tab) summary(Btw_g) # 12% plot(Btw_g, row.pellipses.col = adegpar()$ppalette$quali(6)) #### Plot for anthro BCA #### a1 = s.class(Btw_g$ls, groups, plines.lty = 0, ppoints.alpha = 0, pellipses = list(col = c("red","blue","lightgreen","violet","darkgreen","orange"),alpha = 0.5)) a2 = s.arrow(Btw_g$c1) tiff('Anthro_BCA.tiff', units="in", width=7, height=5, res=300, compression = 'lzw') cbindADEg(a2,zoom(a1, 1.4, center = c(0.5,0)),plot=TRUE) dev.off() ##### Summary stats on streams per group ##### HKG = read.csv("Final_Stream_Groups.csv") names(HKG) table(HKG$K_Group) DO <- read.csv("C:/Users/167148/OCC_work/DO Analysis/Data/All_Sites_Predicted.csv") head(DO) library(dplyr) DO = DO %>% left_join(HKG[,c('WBID','K_Group')], by='WBID') head(DO) unique(DO$Assessment) DO<-subset(DO, Assessment %in% c("Not Attaining","Undetermined")) DO<-subset(DO, Assessment %in% c("Not Attaining")) table(DO$Pred,DO$K_Group) table(DO$Pred_w_error,DO$K_Group)