# calculations
library(geomorph) # GPA
library(Morpho) # CVA
library(mgcv) # GAM
library(mgcViz) # GAM
library(stats) # MANOVA
library(phangorn) # UPGMA
# plotting and visualization
library(ggplot2) # plots
ggplot2::theme_set(theme_light())
library(rnaturalearth) # maps
library(raster) # raster
library(ggspatial) # annotation_scale
library(viridis) # colour gradientInfluence of honey bee (Apis mellifera) breeding on wing venation in Serbia and neighbouring countries
Supplementary document with statistical analysis
Libraries
Variable names
p <- 19 # number of landmarks
k <- 2 # number of dimensions, in this case 2 for coordinates (x, y)
# create coordinates names used by IdentiFly
xyNames <- c("x1", "y1")
for (i in 2:p) {
xyNames <- c(xyNames, paste0("x", i))
xyNames <- c(xyNames, paste0("y", i))
}
xyNames [1] "x1" "y1" "x2" "y2" "x3" "y3" "x4" "y4" "x5" "y5" "x6" "y6"
[13] "x7" "y7" "x8" "y8" "x9" "y9" "x10" "y10" "x11" "y11" "x12" "y12"
[25] "x13" "y13" "x14" "y14" "x15" "y15" "x16" "y16" "x17" "y17" "x18" "y18"
[37] "x19" "y19"
# The number of principal components used is 2*p-4 = 34, which is equal to the
# degrees of freedom create principal components names used by prcomp
pcNames <- paste0("PC", 1:(2 * p - 4))
pcNames [1] "PC1" "PC2" "PC3" "PC4" "PC5" "PC6" "PC7" "PC8" "PC9" "PC10"
[11] "PC11" "PC12" "PC13" "PC14" "PC15" "PC16" "PC17" "PC18" "PC19" "PC20"
[21] "PC21" "PC22" "PC23" "PC24" "PC25" "PC26" "PC27" "PC28" "PC29" "PC30"
[31] "PC31" "PC32" "PC33" "PC34"
# define which landmarks are connected by lines in wireframe graph
link.x <- c(1, 1, 2, 2, 3, 3, 4, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13, 14,
15, 16, 17)
link.y <- c(2, 3, 4, 5, 6, 19, 6, 10, 12, 8, 8, 14, 19, 9, 10, 15, 11, 12, 16, 13,
18, 15, 16, 17, 18)
links.apis <- cbind(link.x, link.y)Read Serbia data
Some data about honey bees in Serbia originate from an earlier study Oleksa et al., 2022, Oleksa et al., 2023. Data newly obtained for this study is available at Zenodo Kaur et al., 2023
wings <- read.csv("https://zenodo.org/record/7244070/files/RS-raw-coordinates.csv")
wings <- rbind(wings, read.csv("https://zenodo.org/record/10389960/files/RS_21_80-raw-coordinates.csv"))
# extract sample classifier from file name
wings$sample <- substr(wings$file, 1, 7)
# Read geographic coordinates
geo.data <- read.csv("https://zenodo.org/record/7244070/files/RS-data.csv")
geo.data <- rbind(geo.data, read.csv("https://zenodo.org/record/10389960/files/RS_21_80-data.csv"))
sample.geo.data <- aggregate(geo.data[c("latitude", "longitude", "date")], by = list(geo.data$sample),
FUN = mean)
sample.geo.data <- data.frame(sample.geo.data, row.names = 1) # move column 1 to row namesRead C lineage data
As reference for SE Europe we used the data obtained from the earlier study Oleksa et al., 2022, Oleksa et al., 2023.
wings.C <- read.csv("https://zenodo.org/record/7244070/files/EU-raw-coordinates.csv")
geo.data.C <- read.csv("https://zenodo.org/record/7244070/files/EU-geo-data.csv")
wings.C$latitude <- geo.data.C$latitude
wings.C$longitude <- geo.data.C$longitude
wings.C <- subset(wings.C, country %in% c("AT", "GR", "HR", "HU", "MD", "ME", "RO",
"SI", "TR"))
# GPA Convert 2D array into a 3D array
wings.C.3D <- arrayspecs(wings.C[xyNames], p, k)
dimnames(wings.C.3D)[[3]] <- wings.C$file
# Align the coordinates using Generalized Procrustes Analysis
GPA.C <- gpagen(wings.C.3D, print.progress = FALSE)
# Convert 3D array into a 2D array - opposite to arrayspecs
wings.C.aligned <- two.d.array(GPA.C$coords)
# rename column names from 1.X, 1.Y to x1, y1 ...
colnames(wings.C.aligned) <- gsub("([0-9]+)\\.([XY])", "\\L\\2\\1", colnames(wings.C.aligned),
perl = TRUE)
# average data within samples
C.samples <- aggregate(wings.C.aligned, by = list(wings.C$sample), FUN = mean)
C.samples <- data.frame(C.samples, row.names = 1) # move column 1 to row names
# average geo data
geo.sample <- aggregate(wings.C[c("latitude", "longitude")], by = list(wings.C$sample),
FUN = mean)
geo.sample <- data.frame(geo.sample, row.names = 1) # move column 1 to row names
C.samples <- cbind(C.samples, geo.sample)Map
# Read elevation data In order to download the TIF file uncomment the two lines
# below
# download.file('https://geodata.ucdavis.edu/climate/worldclim/2_1/base/wc2.1_2.5m_elev.zip',
# 'wc2.1_2.5m_elev.zip') unzip('wc2.1_2.5m_elev.zip') or use your local file
elev <- raster("C:/data/WorldClim/wc2.1_2.5m_elev.tif")
elev.color <- colorRampPalette(c("#f7f7f7", "#f0f0f0", "#d9d9d9", "#bdbdbd", "#636363"),
bias = 2)
x.min <- 18
x.max <- 24
y.min <- 41.8
y.max <- 46.5
e <- extent(x.min - 1, x.max + 1, y.min - 1, y.max + 1)
elev <- crop(elev, e)
elev.df <- data.frame(rasterToPoints(elev))
colnames(elev.df) <- c("longitude", "latitude", "altitude")
world <- ne_countries(scale = "medium", returnclass = "sf")
# Names of neighboring countries
neighboring_countries <- data.frame(name = c("Croatia", "Hungary", "Romania", "Bulgaria",
"North Macedonia", "Albania", "Montenegro", "Bosnia and \nHerzegovina", "Kosovo*",
"Serbia"), latitude = c(45.4, 46.4, 45.44, 42.68, 41.91, 42.1, 42.8, 44, 42.5,
44.7), longitude = c(18.2, 19.82, 22.5, 23.39, 21.7, 19.82, 19.37, 18.3, 20.8,
20.4))
# * The designation of Kosovo is without prejudice to positions on status, and
# is in line with UNSCR 1244/1999 and ICJ Opinion on the Kosovo declaration of
# independence.
ggplot(data = world) + geom_raster(data = elev.df, aes(longitude, latitude, fill = altitude)) +
scale_fill_gradientn(colours = elev.color(100)) + geom_sf(fill = NA) + geom_point(data = sample.geo.data,
aes(x = longitude, y = latitude, color = as.factor(date))) + scale_color_manual(name = "year",
values = c("#E69F00", "#009E73", "#56B4E9")) + coord_fixed() + coord_sf(xlim = c(x.min,
x.max), ylim = c(y.min, y.max)) + geom_text(data = neighboring_countries, aes(x = longitude,
y = latitude, label = name), color = "black", size = 3, nudge_y = 0.02) + annotation_scale(location = "tr",
width_hint = 0.2)GPA alignment
# Convert from 2D array to 3D array
wings.coords <- arrayspecs(wings[xyNames], p, k)
dimnames(wings.coords)[[3]] <- wings$file
# Align the coordinates using Generalized Procrustes Analysis
GPA.RS <- gpagen(wings.coords, print.progress = FALSE)
# plot landmarks after alignment
plotAllSpecimens(GPA.RS$coords, links = links.apis, label = TRUE, plot.param = list(pt.bg = "black",
pt.cex = 0.5, mean.bg = "red", mean.cex = 1, link.col = "red", txt.pos = 3, txt.cex = 1))# Convert from 3D array to 2D array
wings.aligned.RS <- two.d.array(GPA.RS$coords)
# rename column names from 1.X, 1.Y to x1, y1 ...
colnames(wings.aligned.RS) <- gsub("([0-9]+)\\.([XY])", "\\L\\2\\1", colnames(wings.aligned.RS),
perl = TRUE)
# Average coorinates within samples
RS.samples <- aggregate(wings.aligned.RS, by = list(wings$sample), FUN = mean)
RS.samples <- data.frame(RS.samples, row.names = 1) # move column 1 to row names
RS.samples <- cbind(RS.samples, sample.geo.data)PCA Serbia - with breeders
RS.pca <- prcomp(RS.samples[, xyNames])
RS.pca.scores <- as.data.frame(RS.pca$x[, pcNames])
RS.pca.scores <- cbind(RS.pca.scores, sample.geo.data)
# create plot labels
variance.tab <- summary(RS.pca)$importance
variance <- variance.tab["Proportion of Variance", "PC1"]
variance <- round(100 * variance, 1)
label.x <- paste0("PC1 (", variance, "%)")
variance <- variance.tab["Proportion of Variance", "PC2"]
variance <- round(100 * variance, 1)
label.y <- paste0("PC2 (", variance, "%)")
PCA.A <- ggplot(RS.pca.scores, aes(x = PC1, y = PC2, color = as.factor(date))) +
geom_point() + scale_color_manual(name = "year", values = c("#E69F00", "#009E73",
"#56B4E9")) + stat_ellipse() + xlab(label.x) + ylab(label.y) + theme(legend.position = c(0.9,
0.15), legend.background = element_rect(fill = "white", color = "black"), legend.title = element_blank())
plot(PCA.A)# Serbian bees collected in different years differ significantly in wing shape
year.MANOVA <- manova(as.matrix(RS.pca.scores[, pcNames]) ~ date, data = RS.pca.scores)
summary(year.MANOVA) Df Pillai approx F num Df den Df Pr(>F)
date 1 0.79193 5.0374 34 45 3.848e-07 ***
Residuals 78
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
GAM Serbia - with breeders
# PC1
ggplot(data = world) + geom_sf() + geom_point(data = RS.pca.scores, aes(x = longitude,
y = latitude, colour = PC1), size = 1) + coord_sf(xlim = c(17.4, 24.2), ylim = c(41.8,
46.5), expand = FALSE) + scale_color_viridis(name = "PC1") + xlab("Longitude") +
ylab("Latitude")GAM <- mgcv::gam(PC1 ~ s(longitude, latitude), data = RS.pca.scores)
anova(GAM)
Family: gaussian
Link function: identity
Formula:
PC1 ~ s(longitude, latitude)
Approximate significance of smooth terms:
edf Ref.df F p-value
s(longitude,latitude) 16.99 21.73 5.286 <2e-16
viz.GAM <- getViz(GAM)
GAM.A <- plot(viz.GAM, 1) + l_fitRaster() + l_fitContour(color = "grey30") + l_points() +
labs(title = NULL) + scale_fill_continuous(type = "viridis", name = "PC1", na.value = "transparent") +
scale_x_continuous(limits = c(17.4, 24.2), expand = c(0, 0)) + scale_y_continuous(limits = c(41.8,
46.5), expand = c(0, 0)) + geom_path(data = map_data("world"), aes(x = long,
y = lat, group = group), color = "black", inherit.aes = FALSE)
GAM.ACVA SE Europe - with breeders
C.samples$date <- NA
wings.C.all <- rbind(C.samples, RS.samples)
# GPA Convert 2D array into a 3D array
wings.C.3D <- arrayspecs(wings.C.all[xyNames], p, k)
dimnames(wings.C.3D)[[3]] <- rownames(wings.C.all)
# Align the coordinates using Generalized Procrustes Analysis
GPA.C <- gpagen(wings.C.3D, print.progress = FALSE)
# Convert 3D array into a 2D array - opposite to arrayspecs
wings.C.aligned <- two.d.array(GPA.C$coords)
# rename column names from 1.X, 1.Y to x1, y1 ...
colnames(wings.C.aligned) <- gsub("([0-9]+)\\.([XY])", "\\L\\2\\1", colnames(wings.C.aligned),
perl = TRUE)
PCA.C <- prcomp(wings.C.aligned[, xyNames])
PCA.C.scores <- as.data.frame(PCA.C$x[, pcNames])
PCA.C.scores$country <- substr(rownames(wings.C.all), 1, 2)
PCA.C.scores$country <- as.factor(PCA.C.scores$country)
levels(PCA.C.scores$country) <- list(Austria = "AT", Croatia = "HR", Greece = "GR",
Hungary = "HU", Moldova = "MD", Montenegro = "ME", Romania = "RO", Serbia = "RS",
Slovenia = "SI", Turkey = "TR")
PCA.C.scores <- cbind(PCA.C.scores, wings.C.all[c("latitude", "longitude", "date")])
# create plot labels
variance.tab <- summary(PCA.C)$importance
variance <- variance.tab["Proportion of Variance", "PC1"]
variance <- round(100 * variance, 1)
label.x <- paste0("PC1 (", variance, "%)")
variance <- variance.tab["Proportion of Variance", "PC2"]
variance <- round(100 * variance, 1)
label.y <- paste0("PC2 (", variance, "%)")
ggplot(PCA.C.scores, aes(x = PC1, y = PC2, color = country)) + geom_point() + stat_ellipse()# CVA
PCA.C.scores$group.year <- ifelse(PCA.C.scores$country == "Serbia", paste(as.character(PCA.C.scores$country),
PCA.C.scores$date, sep = "-"), as.character(PCA.C.scores$country))
# use equal prior probability for all groups
n.gr <- length(unique(PCA.C.scores$group.year)) # number of groups
sample.cva <- CVA(PCA.C.scores[pcNames], PCA.C.scores$group.year, rounds = 10000,
cv = TRUE, prior = rep(1/n.gr, n.gr))
sample.cva.scores <- as.data.frame(sample.cva$CVscores)
# remove unwanted spaces in variable names otherwise use `CV 1`
colnames(sample.cva.scores) <- gsub(" ", "", colnames(sample.cva.scores))
# sample.cva.scores <- cbind(sample.geo.data, sample.cva.scores)
sample.cva.scores$group.year <- PCA.C.scores$group.year
ggplot(sample.cva.scores, aes(x = CV1, y = CV2, shape = group.year, color = group.year)) +
geom_point() + scale_shape_manual(name = "group", values = c(1:12)) + scale_color_manual(name = "group",
values = rainbow(12)) + stat_ellipse()# Confusion matrix Classification of the samples to groups
CVA.class <- typprobClass(sample.cva$CVscores, groups = as.factor(sample.cva.scores$group.year),
outlier = 0)
print(CVA.class) cross-validated classification results in frequencies
Austria Croatia Greece Hungary Moldova Montenegro Romania
Austria 10 0 0 0 0 0 0
Croatia 0 120 0 7 0 0 0
Greece 0 0 231 0 0 0 0
Hungary 0 0 0 17 0 0 5
Moldova 0 0 0 0 9 0 1
Montenegro 0 0 0 0 0 17 0
Romania 0 2 1 2 2 0 183
Serbia-2005 0 0 0 0 0 0 0
Serbia-2010 0 0 0 0 0 5 0
Serbia-2022 0 0 0 0 0 0 0
Slovenia 0 1 0 2 1 0 0
Turkey 0 0 0 0 0 0 0
Serbia-2005 Serbia-2010 Serbia-2022 Slovenia Turkey
Austria 0 0 0 0 0
Croatia 3 0 6 24 0
Greece 9 0 1 0 3
Hungary 0 0 0 0 0
Moldova 0 0 0 0 0
Montenegro 0 3 0 0 0
Romania 0 0 0 7 0
Serbia-2005 3 0 1 0 0
Serbia-2010 0 15 0 0 0
Serbia-2022 1 0 55 0 0
Slovenia 0 0 0 17 0
Turkey 0 0 1 0 59
cross-validated classification result in %
Austria Croatia Greece Hungary Moldova Montenegro
Austria 100.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Croatia 0.00000 75.00000 0.00000 4.37500 0.00000 0.00000
Greece 0.00000 0.00000 94.67213 0.00000 0.00000 0.00000
Hungary 0.00000 0.00000 0.00000 77.27273 0.00000 0.00000
Moldova 0.00000 0.00000 0.00000 0.00000 90.00000 0.00000
Montenegro 0.00000 0.00000 0.00000 0.00000 0.00000 85.00000
Romania 0.00000 1.01523 0.50761 1.01523 1.01523 0.00000
Serbia-2005 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Serbia-2010 0.00000 0.00000 0.00000 0.00000 0.00000 25.00000
Serbia-2022 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Slovenia 0.00000 4.76190 0.00000 9.52381 4.76190 0.00000
Turkey 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Romania Serbia-2005 Serbia-2010 Serbia-2022 Slovenia Turkey
Austria 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Croatia 0.00000 1.87500 0.00000 3.75000 15.00000 0.00000
Greece 0.00000 3.68852 0.00000 0.40984 0.00000 1.22951
Hungary 22.72727 0.00000 0.00000 0.00000 0.00000 0.00000
Moldova 10.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Montenegro 0.00000 0.00000 15.00000 0.00000 0.00000 0.00000
Romania 92.89340 0.00000 0.00000 0.00000 3.55330 0.00000
Serbia-2005 0.00000 75.00000 0.00000 25.00000 0.00000 0.00000
Serbia-2010 0.00000 0.00000 75.00000 0.00000 0.00000 0.00000
Serbia-2022 0.00000 1.78571 0.00000 98.21429 0.00000 0.00000
Slovenia 0.00000 0.00000 0.00000 0.00000 80.95238 0.00000
Turkey 0.00000 0.00000 0.00000 1.66667 0.00000 98.33333
overall classification accuracy: 89.32039 %
Kappa statistic: 0.86945
# Mahalanobis distances between groups
knitr::kable(as.data.frame(as.matrix(sample.cva$Dist$GroupdistMaha)), digits = 3)| Austria | Croatia | Greece | Hungary | Moldova | Montenegro | Romania | Serbia-2005 | Serbia-2010 | Serbia-2022 | Slovenia | Turkey | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Austria | 0.000 | 7.997 | 9.919 | 6.901 | 8.242 | 8.055 | 6.947 | 8.225 | 8.036 | 7.695 | 8.843 | 8.324 |
| Croatia | 7.997 | 0.000 | 5.703 | 3.604 | 5.694 | 5.992 | 4.264 | 3.591 | 5.257 | 3.766 | 2.732 | 5.224 |
| Greece | 9.919 | 5.703 | 0.000 | 7.300 | 7.803 | 6.868 | 6.345 | 4.735 | 6.682 | 5.490 | 6.468 | 5.648 |
| Hungary | 6.901 | 3.604 | 7.300 | 0.000 | 5.990 | 6.944 | 4.198 | 5.637 | 6.342 | 5.270 | 4.306 | 6.263 |
| Moldova | 8.242 | 5.694 | 7.803 | 5.990 | 0.000 | 7.288 | 4.253 | 6.173 | 6.597 | 6.151 | 5.428 | 7.535 |
| Montenegro | 8.055 | 5.992 | 6.868 | 6.944 | 7.288 | 0.000 | 5.732 | 6.009 | 2.085 | 5.059 | 7.265 | 6.945 |
| Romania | 6.947 | 4.264 | 6.345 | 4.198 | 4.253 | 5.732 | 0.000 | 5.201 | 5.269 | 5.146 | 4.703 | 5.402 |
| Serbia-2005 | 8.225 | 3.591 | 4.735 | 5.637 | 6.173 | 6.009 | 5.201 | 0.000 | 5.746 | 3.161 | 4.697 | 5.101 |
| Serbia-2010 | 8.036 | 5.257 | 6.682 | 6.342 | 6.597 | 2.085 | 5.269 | 5.746 | 0.000 | 5.039 | 6.469 | 7.021 |
| Serbia-2022 | 7.695 | 3.766 | 5.490 | 5.270 | 6.151 | 5.059 | 5.146 | 3.161 | 5.039 | 0.000 | 4.916 | 5.742 |
| Slovenia | 8.843 | 2.732 | 6.468 | 4.306 | 5.428 | 7.265 | 4.703 | 4.697 | 6.469 | 4.916 | 0.000 | 6.320 |
| Turkey | 8.324 | 5.224 | 5.648 | 6.263 | 7.535 | 6.945 | 5.402 | 5.101 | 7.021 | 5.742 | 6.320 | 0.000 |
# pairwise probabilities of differences between groups
knitr::kable(as.data.frame(as.matrix(sample.cva$Dist$probsMaha)), digits = 5)| Austria | Croatia | Greece | Hungary | Moldova | Montenegro | Romania | Serbia-2005 | Serbia-2010 | Serbia-2022 | Slovenia | Turkey | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Austria | 0e+00 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.00010 | 0.00010 | 0.00010 | 0.00010 | 0.0001 |
| Croatia | 1e-04 | 0.00000 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.33477 | 0.00010 | 0.00010 | 0.00080 | 0.0001 |
| Greece | 1e-04 | 0.00010 | 0.0000 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.02040 | 0.00010 | 0.00010 | 0.00010 | 0.0001 |
| Hungary | 1e-04 | 0.00010 | 0.0001 | 0.0000 | 0.0001 | 0.00010 | 0.0001 | 0.00440 | 0.00010 | 0.00010 | 0.00010 | 0.0001 |
| Moldova | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0000 | 0.00010 | 0.0001 | 0.00360 | 0.00010 | 0.00010 | 0.00010 | 0.0001 |
| Montenegro | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.00000 | 0.0001 | 0.00180 | 0.52105 | 0.00010 | 0.00010 | 0.0001 |
| Romania | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0000 | 0.00510 | 0.00010 | 0.00010 | 0.00010 | 0.0001 |
| Serbia-2005 | 1e-04 | 0.33477 | 0.0204 | 0.0044 | 0.0036 | 0.00180 | 0.0051 | 0.00000 | 0.00340 | 0.69503 | 0.05249 | 0.0091 |
| Serbia-2010 | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.52105 | 0.0001 | 0.00340 | 0.00000 | 0.00010 | 0.00010 | 0.0001 |
| Serbia-2022 | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.69503 | 0.00010 | 0.00000 | 0.00010 | 0.0001 |
| Slovenia | 1e-04 | 0.00080 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.05249 | 0.00010 | 0.00010 | 0.00000 | 0.0001 |
| Turkey | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.00910 | 0.00010 | 0.00010 | 0.00010 | 0.0000 |
UPGMA - with breeders
UPBMA.A <- upgma(sample.cva$Dist$GroupdistMaha)
plot(UPBMA.A, label.offset = 0.1, cex = 0.5)Remove breeders
RS.samples.short <- subset(RS.samples, date %in% c("2005", "2022"))PCA Serbia - without breeders
RS.pca <- prcomp(RS.samples.short[, xyNames])
RS.pca.scores <- as.data.frame(RS.pca$x[, pcNames])
RS.pca.scores$latitude <- RS.samples.short$latitude
RS.pca.scores$longitude <- RS.samples.short$longitude
RS.pca.scores$date <- RS.samples.short$date
# create plot labels
variance.tab <- summary(RS.pca)$importance
variance <- variance.tab["Proportion of Variance", "PC1"]
variance <- round(100 * variance, 1)
label.x <- paste0("PC1 (", variance, "%)")
variance <- variance.tab["Proportion of Variance", "PC2"]
variance <- round(100 * variance, 1)
label.y <- paste0("PC2 (", variance, "%)")
PCA.B <- ggplot(RS.pca.scores, aes(x = PC1, y = PC2, color = as.factor(date))) +
geom_point() + scale_color_manual(name = "year", values = c("#E69F00", "#56B4E9")) +
stat_ellipse() + xlab(label.x) + ylab(label.y) + scale_x_continuous(expand = c(0.06,
0)) + theme(legend.position = c(0.9, 0.15), legend.background = element_rect(fill = "white",
color = "black"), legend.title = element_blank())
plot(PCA.B)GAM Serbia - without breeders
# PC1
ggplot(data = world) + geom_sf() + geom_point(data = RS.pca.scores, aes(x = longitude,
y = latitude, colour = PC1), size = 1) + coord_sf(xlim = c(17.4, 24.2), ylim = c(41.8,
46.5), expand = FALSE) + scale_color_viridis(name = "PC1") + xlab("Longitude") +
ylab("Latitude")GAM <- mgcv::gam(PC1 ~ s(longitude, latitude), data = RS.pca.scores)
anova(GAM)
Family: gaussian
Link function: identity
Formula:
PC1 ~ s(longitude, latitude)
Approximate significance of smooth terms:
edf Ref.df F p-value
s(longitude,latitude) 3.844 5.170 2.068 0.0812
viz.GAM <- getViz(GAM)
GAM.B <- plot(viz.GAM, 1) + l_fitRaster() + l_fitContour(color = "grey30") + l_points() +
labs(title = NULL) + scale_fill_continuous(type = "viridis", name = "PC1", na.value = "transparent") +
scale_x_continuous(limits = c(17.4, 24.2), expand = c(0, 0)) + scale_y_continuous(limits = c(41.8,
46.5), expand = c(0, 0)) + geom_path(data = map_data("world"), aes(x = long,
y = lat, group = group), color = "black", inherit.aes = FALSE)
GAM.BPCA SE Europe - without breeders
countries <- substr(rownames(C.samples), 1, 2)
C.samples.short <- subset(C.samples, countries %in% c("GR", "HR", "HU", "MD", "RO",
"SI", "TR"))
samples <- rbind(C.samples.short, RS.samples.short)
# GPA Convert 2D array into a 3D array
samples.3D <- arrayspecs(samples[xyNames], p, k)
dimnames(samples.3D)[[3]] <- rownames(samples)
# Align the coordinates using Generalized Procrustes Analysis
GPA.C <- gpagen(samples.3D, print.progress = FALSE)
# Convert 3D array into a 2D array - opposite to arrayspecs
samples.aligned <- two.d.array(GPA.C$coords)
# rename column names from 1.X, 1.Y to x1, y1 ...
colnames(samples.aligned) <- gsub("([0-9]+)\\.([XY])", "\\L\\2\\1", colnames(samples.aligned),
perl = TRUE)
# PCA
PCA.C <- prcomp(samples.aligned[, xyNames])
PCA.C.scores <- as.data.frame(PCA.C$x[, pcNames])
# change sign of the PC1 for easier comparisons with earlier graphs
PCA.C.scores[, "PC1"] <- -PCA.C.scores[, "PC1"]
PCA.C.scores$latitude <- samples$latitude[]
PCA.C.scores$longitude <- samples$longitude[]
PCA.C.scores$country <- substr(rownames(samples), 1, 2)
PCA.C.scores$country <- as.factor(PCA.C.scores$country)
levels(PCA.C.scores$country) <- list(Croatia = "HR", Greece = "GR", Hungary = "HU",
Moldova = "MD", Romania = "RO", Serbia = "RS", Slovenia = "SI", Turkey = "TR")
# create plot labels
variance.tab <- summary(PCA.C)$importance
variance <- variance.tab["Proportion of Variance", "PC1"]
variance <- round(100 * variance, 1)
label.x <- paste0("PC1 (", variance, "%)")
variance <- variance.tab["Proportion of Variance", "PC2"]
variance <- round(100 * variance, 1)
label.y <- paste0("PC2 (", variance, "%)")
ggplot(PCA.C.scores, aes(x = PC1, y = PC2, color = country)) + geom_point() + stat_ellipse()# PC1
ggplot(data = world) + geom_sf() + geom_point(data = PCA.C.scores, aes(x = longitude,
y = latitude, colour = PC1), size = 1) + coord_sf(xlim = c(10, 30), ylim = c(33,
50), expand = FALSE) + scale_color_viridis(name = "PC1") + xlab("Longitude") +
ylab("Latitude")GAM <- mgcv::gam(PC1 ~ s(longitude, latitude), data = PCA.C.scores)
anova(GAM)
Family: gaussian
Link function: identity
Formula:
PC1 ~ s(longitude, latitude)
Approximate significance of smooth terms:
edf Ref.df F p-value
s(longitude,latitude) 23.09 26.94 83.45 <2e-16
viz.GAM <- getViz(GAM)
plot(viz.GAM, 1) + l_fitRaster() + l_fitContour(color = "grey30") + l_points() +
labs(title = NULL) + scale_fill_continuous(type = "viridis", name = "PC1", na.value = "transparent") +
scale_x_continuous(limits = c(10, 30), expand = c(0, 0)) + scale_y_continuous(limits = c(33,
50), expand = c(0, 0)) + geom_path(data = map_data("world"), aes(x = long, y = lat,
group = group), color = "black", inherit.aes = FALSE)# PC2
ggplot(data = world) + geom_sf() + geom_point(data = PCA.C.scores, aes(x = longitude,
y = latitude, colour = PC2), size = 1) + coord_sf(xlim = c(10, 30), ylim = c(33,
50), expand = FALSE) + scale_color_viridis(name = "PC1") + xlab("Longitude") +
ylab("Latitude")GAM <- mgcv::gam(PC2 ~ s(longitude, latitude), data = PCA.C.scores)
anova(GAM)
Family: gaussian
Link function: identity
Formula:
PC2 ~ s(longitude, latitude)
Approximate significance of smooth terms:
edf Ref.df F p-value
s(longitude,latitude) 25.27 28.13 29.35 <2e-16
viz.GAM <- getViz(GAM)
plot(viz.GAM, 1) + l_fitRaster() + l_fitContour(color = "grey30") + l_points() +
labs(title = NULL) + scale_fill_continuous(type = "viridis", name = "PC2", na.value = "transparent") +
scale_x_continuous(limits = c(10, 30), expand = c(0, 0)) + scale_y_continuous(limits = c(33,
50), expand = c(0, 0)) + geom_path(data = map_data("world"), aes(x = long, y = lat,
group = group), color = "black", inherit.aes = FALSE)CVA SE Europe - without breeders
# use equal prior probability for all groups
n.gr <- length(unique(PCA.C.scores$country)) # number of groups
sample.cva <- CVA(PCA.C.scores[pcNames], PCA.C.scores$country, rounds = 10000, cv = TRUE,
prior = rep(1/n.gr, n.gr))
sample.cva.scores <- as.data.frame(sample.cva$CVscores)
# remove unwanted spaces in variable names otherwise use `CV 1`
colnames(sample.cva.scores) <- gsub(" ", "", colnames(sample.cva.scores))
sample.cva.scores$country <- PCA.C.scores$country
ggplot(sample.cva.scores, aes(x = CV1, y = CV2, shape = country, color = country)) +
geom_point() + scale_shape_manual(name = "country", values = c(1:8)) + scale_color_manual(name = "country",
values = rainbow(8)) + stat_ellipse()# Confusion matrix Classification of the samples to groups
CVA.class <- typprobClass(sample.cva$CVscores, groups = as.factor(sample.cva.scores$country),
outlier = 0)
print(CVA.class) cross-validated classification results in frequencies
Croatia Greece Hungary Moldova Romania Serbia Slovenia Turkey
Croatia 123 1 7 0 0 5 24 0
Greece 1 238 0 0 0 3 0 2
Hungary 0 0 17 0 5 0 0 0
Moldova 0 0 0 9 1 0 0 0
Romania 2 1 2 2 183 0 7 0
Serbia 1 0 0 0 0 59 0 0
Slovenia 1 0 0 1 0 0 19 0
Turkey 0 0 0 0 0 1 0 59
cross-validated classification result in %
Croatia Greece Hungary Moldova Romania Serbia Slovenia
Croatia 76.87500 0.62500 4.37500 0.00000 0.00000 3.12500 15.00000
Greece 0.40984 97.54098 0.00000 0.00000 0.00000 1.22951 0.00000
Hungary 0.00000 0.00000 77.27273 0.00000 22.72727 0.00000 0.00000
Moldova 0.00000 0.00000 0.00000 90.00000 10.00000 0.00000 0.00000
Romania 1.01523 0.50761 1.01523 1.01523 92.89340 0.00000 3.55330
Serbia 1.66667 0.00000 0.00000 0.00000 0.00000 98.33333 0.00000
Slovenia 4.76190 0.00000 0.00000 4.76190 0.00000 0.00000 90.47619
Turkey 0.00000 0.00000 0.00000 0.00000 0.00000 1.66667 0.00000
Turkey
Croatia 0.00000
Greece 0.81967
Hungary 0.00000
Moldova 0.00000
Romania 0.00000
Serbia 0.00000
Slovenia 0.00000
Turkey 98.33333
overall classification accuracy: 91.34367 %
Kappa statistic: 0.89044
# Mahalanobis distances between groups
knitr::kable(as.data.frame(as.matrix(sample.cva$Dist$GroupdistMaha)), digits = 3)| Croatia | Greece | Hungary | Moldova | Romania | Serbia | Slovenia | Turkey | |
|---|---|---|---|---|---|---|---|---|
| Croatia | 0.000 | 5.615 | 3.554 | 5.662 | 4.225 | 3.675 | 2.720 | 5.197 |
| Greece | 5.615 | 0.000 | 7.175 | 7.689 | 6.234 | 5.312 | 6.390 | 5.610 |
| Hungary | 3.554 | 7.175 | 0.000 | 5.958 | 4.157 | 5.186 | 4.275 | 6.196 |
| Moldova | 5.662 | 7.689 | 5.958 | 0.000 | 4.249 | 6.033 | 5.427 | 7.463 |
| Romania | 4.225 | 6.234 | 4.157 | 4.249 | 0.000 | 5.029 | 4.704 | 5.323 |
| Serbia | 3.675 | 5.312 | 5.186 | 6.033 | 5.029 | 0.000 | 4.876 | 5.555 |
| Slovenia | 2.720 | 6.390 | 4.275 | 5.427 | 4.704 | 4.876 | 0.000 | 6.322 |
| Turkey | 5.197 | 5.610 | 6.196 | 7.463 | 5.323 | 5.555 | 6.322 | 0.000 |
# pairwise probabilities of differences between groups
knitr::kable(as.data.frame(as.matrix(sample.cva$Dist$probsMaha)), digits = 5)| Croatia | Greece | Hungary | Moldova | Romania | Serbia | Slovenia | Turkey | |
|---|---|---|---|---|---|---|---|---|
| Croatia | 0e+00 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 2e-04 | 1e-04 |
| Greece | 1e-04 | 0e+00 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 |
| Hungary | 1e-04 | 1e-04 | 0e+00 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 |
| Moldova | 1e-04 | 1e-04 | 1e-04 | 0e+00 | 1e-04 | 1e-04 | 1e-04 | 1e-04 |
| Romania | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0e+00 | 1e-04 | 1e-04 | 1e-04 |
| Serbia | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0e+00 | 1e-04 | 1e-04 |
| Slovenia | 2e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0e+00 | 1e-04 |
| Turkey | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0e+00 |
UPGMA - without breeders
UPBMA.B <- upgma(sample.cva$Dist$GroupdistMaha)
plot(UPBMA.B, label.offset = 0.1, cex = 0.5)Figures
library(phytools) # cophylo
cophylo.upgma <- cophylo(UPBMA.A, UPBMA.B, rotate = T)Rotating nodes to optimize matching...
Done.
plot(cophylo.upgma)library(ggpubr) # ggarange
ggarrange(PCA.A, PCA.B, labels = c("A", "B"), font.label = list(size = 12, face = "plain"),
ncol = 2, nrow = 1)References
Kaur, H., Nedić, N., & Tofilski, A. (2023). Fore wing images of honey bees (Apis mellifera) from Serbia [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10389960
Oleksa, A., Căuia, E., Siceanu, A., Puškadija, Z., Kovačić, M., Pinto, M.A., Rodrigues, P.J., Hatjina, F., Charistos, L., Bouga, M., Prešern, J., Kandemir, I., Rašić, S., Kusza, S., & Tofilski, A. (2022). Collection of wing images for conservation of honey bees (Apis mellifera) biodiversity in Europe [Data set]. Zenodo. https://doi.org/10.5281/zenodo.7244070
Oleksa, A., Căuia, E., Siceanu, A., Puškadija, Z., Kovačić, M., Pinto, M. A., Rodrigues, P. J., Hatjina, F., Charistos, L., Bouga, M., Prešern, J., Kandemir, İ., Rašić, S., Kusza, S., Tofilski, A. (2023). Honey bee (Apis mellifera) wing images: a tool for identification and conservation. GigaScience, 12, giad019. https://doi.org/10.1093/gigascience/giad019
Information about session
sessionInfo()R version 4.3.2 (2023-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 22000)
Matrix products: default
locale:
[1] LC_COLLATE=Polish_Poland.utf8 LC_CTYPE=Polish_Poland.utf8
[3] LC_MONETARY=Polish_Poland.utf8 LC_NUMERIC=C
[5] LC_TIME=Polish_Poland.utf8
time zone: Europe/Warsaw
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggpubr_0.6.0 phytools_2.0-3 maps_3.4.1.1
[4] viridis_0.6.4 viridisLite_0.4.2 ggspatial_1.1.9
[7] raster_3.6-26 sp_2.1-1 rnaturalearth_0.3.4
[10] phangorn_2.11.1 ape_5.7-1 mgcViz_0.1.11
[13] ggplot2_3.4.4 qgam_1.3.4 mgcv_1.9-0
[16] nlme_3.1-163 Morpho_2.11 geomorph_4.0.6
[19] Matrix_1.6-1.1 rgl_1.2.1 RRPP_1.4.0
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 rstudioapi_0.15.0 jsonlite_1.8.7
[4] magrittr_2.0.3 farver_2.1.1 nloptr_2.0.3
[7] rmarkdown_2.25 vctrs_0.6.4 minqa_1.2.6
[10] base64enc_0.1-3 terra_1.7-55 rstatix_0.7.2
[13] htmltools_0.5.7 broom_1.0.5 KernSmooth_2.23-22
[16] htmlwidgets_1.6.3 plyr_1.8.9 igraph_1.5.1
[19] mime_0.12 lifecycle_1.0.4 iterators_1.0.14
[22] pkgconfig_2.0.3 R6_2.5.1 fastmap_1.1.1
[25] shiny_1.8.0 digest_0.6.33 numDeriv_2016.8-1.1
[28] colorspace_2.1-0 GGally_2.2.0 bezier_1.1.2
[31] labeling_0.4.3 clusterGeneration_1.3.8 fansi_1.0.5
[34] colorRamps_2.3.1 httr_1.4.7 abind_1.4-5
[37] compiler_4.3.2 proxy_0.4-27 withr_2.5.2
[40] doParallel_1.0.17 backports_1.4.1 optimParallel_1.0-2
[43] carData_3.0-5 DBI_1.1.3 gamm4_0.2-6
[46] ggstats_0.5.1 ggsignif_0.6.4 MASS_7.3-60
[49] classInt_0.4-10 scatterplot3d_0.3-44 tools_4.3.2
[52] units_0.8-4 httpuv_1.6.12 glue_1.6.2
[55] rnaturalearthdata_0.1.0 quadprog_1.5-8 promises_1.2.1
[58] grid_4.3.2 sf_1.0-14 generics_0.1.3
[61] isoband_0.2.7 gtable_0.3.4 Rvcg_0.22.1
[64] class_7.3-22 tidyr_1.3.0 car_3.1-2
[67] utf8_1.2.4 foreach_1.5.2 pillar_1.9.0
[70] later_1.3.1 splines_4.3.2 dplyr_1.1.4
[73] lattice_0.21-9 tidyselect_1.2.0 miniUI_0.1.1.1
[76] knitr_1.45 gridExtra_2.3 xfun_0.41
[79] expm_0.999-8 matrixStats_1.1.0 yaml_2.3.7
[82] boot_1.3-28.1 evaluate_0.23 codetools_0.2-19
[85] tibble_3.2.1 cli_3.6.1 xtable_1.8-4
[88] munsell_0.5.0 Rcpp_1.0.11 coda_0.19-4
[91] parallel_4.3.2 ellipsis_0.3.2 jpeg_0.1-10
[94] lme4_1.1-35.1 scales_1.2.1 e1071_1.7-13
[97] purrr_1.0.2 combinat_0.0-8 rlang_1.1.2
[100] fastmatch_1.1-4 cowplot_1.1.1 formatR_1.14
[103] mnormt_2.1.1