# calculations
library(geomorph) # GPA
library(Morpho) # CVA
library(mgcv) # GAM
library(mgcViz) # GAM
library(stats) # MANOVA
library(phangorn) # UPGMA
# plotting and visualization
library(ggplot2) # plots
::theme_set(theme_light())
ggplot2library(rnaturalearth) # maps
library(raster) # raster
library(ggspatial) # annotation_scale
library(viridis) # colour gradient
Influence of honey bee (Apis mellifera) breeding on wing venation in Serbia and neighbouring countries
Supplementary document with statistical analysis
Libraries
Variable names
<- 19 # number of landmarks
p <- 2 # number of dimensions, in this case 2 for coordinates (x, y)
k
# create coordinates names used by IdentiFly
<- c("x1", "y1")
xyNames for (i in 2:p) {
<- c(xyNames, paste0("x", i))
xyNames <- c(xyNames, paste0("y", i))
xyNames
} xyNames
[1] "x1" "y1" "x2" "y2" "x3" "y3" "x4" "y4" "x5" "y5" "x6" "y6"
[13] "x7" "y7" "x8" "y8" "x9" "y9" "x10" "y10" "x11" "y11" "x12" "y12"
[25] "x13" "y13" "x14" "y14" "x15" "y15" "x16" "y16" "x17" "y17" "x18" "y18"
[37] "x19" "y19"
# The number of principal components used is 2*p-4 = 34, which is equal to the
# degrees of freedom create principal components names used by prcomp
<- paste0("PC", 1:(2 * p - 4))
pcNames pcNames
[1] "PC1" "PC2" "PC3" "PC4" "PC5" "PC6" "PC7" "PC8" "PC9" "PC10"
[11] "PC11" "PC12" "PC13" "PC14" "PC15" "PC16" "PC17" "PC18" "PC19" "PC20"
[21] "PC21" "PC22" "PC23" "PC24" "PC25" "PC26" "PC27" "PC28" "PC29" "PC30"
[31] "PC31" "PC32" "PC33" "PC34"
# define which landmarks are connected by lines in wireframe graph
<- c(1, 1, 2, 2, 3, 3, 4, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13, 14,
link.x 15, 16, 17)
<- c(2, 3, 4, 5, 6, 19, 6, 10, 12, 8, 8, 14, 19, 9, 10, 15, 11, 12, 16, 13,
link.y 18, 15, 16, 17, 18)
<- cbind(link.x, link.y) links.apis
Read Serbia data
Some data about honey bees in Serbia originate from an earlier study Oleksa et al., 2022, Oleksa et al., 2023. Data newly obtained for this study is available at Zenodo Kaur et al., 2023
<- read.csv("https://zenodo.org/record/7244070/files/RS-raw-coordinates.csv")
wings <- rbind(wings, read.csv("https://zenodo.org/record/10389960/files/RS_21_80-raw-coordinates.csv"))
wings # extract sample classifier from file name
$sample <- substr(wings$file, 1, 7)
wings
# Read geographic coordinates
<- read.csv("https://zenodo.org/record/7244070/files/RS-data.csv")
geo.data <- rbind(geo.data, read.csv("https://zenodo.org/record/10389960/files/RS_21_80-data.csv"))
geo.data
<- aggregate(geo.data[c("latitude", "longitude", "date")], by = list(geo.data$sample),
sample.geo.data FUN = mean)
<- data.frame(sample.geo.data, row.names = 1) # move column 1 to row names sample.geo.data
Read C lineage data
As reference for SE Europe we used the data obtained from the earlier study Oleksa et al., 2022, Oleksa et al., 2023.
<- read.csv("https://zenodo.org/record/7244070/files/EU-raw-coordinates.csv")
wings.C <- read.csv("https://zenodo.org/record/7244070/files/EU-geo-data.csv")
geo.data.C
$latitude <- geo.data.C$latitude
wings.C$longitude <- geo.data.C$longitude
wings.C
<- subset(wings.C, country %in% c("AT", "GR", "HR", "HU", "MD", "ME", "RO",
wings.C "SI", "TR"))
# GPA Convert 2D array into a 3D array
.3D <- arrayspecs(wings.C[xyNames], p, k)
wings.Cdimnames(wings.C.3D)[[3]] <- wings.C$file
# Align the coordinates using Generalized Procrustes Analysis
<- gpagen(wings.C.3D, print.progress = FALSE)
GPA.C # Convert 3D array into a 2D array - opposite to arrayspecs
<- two.d.array(GPA.C$coords)
wings.C.aligned
# rename column names from 1.X, 1.Y to x1, y1 ...
colnames(wings.C.aligned) <- gsub("([0-9]+)\\.([XY])", "\\L\\2\\1", colnames(wings.C.aligned),
perl = TRUE)
# average data within samples
<- aggregate(wings.C.aligned, by = list(wings.C$sample), FUN = mean)
C.samples <- data.frame(C.samples, row.names = 1) # move column 1 to row names
C.samples
# average geo data
<- aggregate(wings.C[c("latitude", "longitude")], by = list(wings.C$sample),
geo.sample FUN = mean)
<- data.frame(geo.sample, row.names = 1) # move column 1 to row names
geo.sample <- cbind(C.samples, geo.sample) C.samples
Map
# Read elevation data In order to download the TIF file uncomment the two lines
# below
# download.file('https://geodata.ucdavis.edu/climate/worldclim/2_1/base/wc2.1_2.5m_elev.zip',
# 'wc2.1_2.5m_elev.zip') unzip('wc2.1_2.5m_elev.zip') or use your local file
<- raster("C:/data/WorldClim/wc2.1_2.5m_elev.tif")
elev <- colorRampPalette(c("#f7f7f7", "#f0f0f0", "#d9d9d9", "#bdbdbd", "#636363"),
elev.color bias = 2)
<- 18
x.min <- 24
x.max <- 41.8
y.min <- 46.5
y.max <- extent(x.min - 1, x.max + 1, y.min - 1, y.max + 1)
e <- crop(elev, e)
elev <- data.frame(rasterToPoints(elev))
elev.df colnames(elev.df) <- c("longitude", "latitude", "altitude")
<- ne_countries(scale = "medium", returnclass = "sf")
world
# Names of neighboring countries
<- data.frame(name = c("Croatia", "Hungary", "Romania", "Bulgaria",
neighboring_countries "North Macedonia", "Albania", "Montenegro", "Bosnia and \nHerzegovina", "Kosovo*",
"Serbia"), latitude = c(45.4, 46.4, 45.44, 42.68, 41.91, 42.1, 42.8, 44, 42.5,
44.7), longitude = c(18.2, 19.82, 22.5, 23.39, 21.7, 19.82, 19.37, 18.3, 20.8,
20.4))
# * The designation of Kosovo is without prejudice to positions on status, and
# is in line with UNSCR 1244/1999 and ICJ Opinion on the Kosovo declaration of
# independence.
ggplot(data = world) + geom_raster(data = elev.df, aes(longitude, latitude, fill = altitude)) +
scale_fill_gradientn(colours = elev.color(100)) + geom_sf(fill = NA) + geom_point(data = sample.geo.data,
aes(x = longitude, y = latitude, color = as.factor(date))) + scale_color_manual(name = "year",
values = c("#E69F00", "#009E73", "#56B4E9")) + coord_fixed() + coord_sf(xlim = c(x.min,
ylim = c(y.min, y.max)) + geom_text(data = neighboring_countries, aes(x = longitude,
x.max), y = latitude, label = name), color = "black", size = 3, nudge_y = 0.02) + annotation_scale(location = "tr",
width_hint = 0.2)
GPA alignment
# Convert from 2D array to 3D array
<- arrayspecs(wings[xyNames], p, k)
wings.coords dimnames(wings.coords)[[3]] <- wings$file
# Align the coordinates using Generalized Procrustes Analysis
<- gpagen(wings.coords, print.progress = FALSE)
GPA.RS
# plot landmarks after alignment
plotAllSpecimens(GPA.RS$coords, links = links.apis, label = TRUE, plot.param = list(pt.bg = "black",
pt.cex = 0.5, mean.bg = "red", mean.cex = 1, link.col = "red", txt.pos = 3, txt.cex = 1))
# Convert from 3D array to 2D array
<- two.d.array(GPA.RS$coords)
wings.aligned.RS # rename column names from 1.X, 1.Y to x1, y1 ...
colnames(wings.aligned.RS) <- gsub("([0-9]+)\\.([XY])", "\\L\\2\\1", colnames(wings.aligned.RS),
perl = TRUE)
# Average coorinates within samples
<- aggregate(wings.aligned.RS, by = list(wings$sample), FUN = mean)
RS.samples <- data.frame(RS.samples, row.names = 1) # move column 1 to row names
RS.samples <- cbind(RS.samples, sample.geo.data) RS.samples
PCA Serbia - with breeders
<- prcomp(RS.samples[, xyNames])
RS.pca <- as.data.frame(RS.pca$x[, pcNames])
RS.pca.scores <- cbind(RS.pca.scores, sample.geo.data)
RS.pca.scores
# create plot labels
<- summary(RS.pca)$importance
variance.tab <- variance.tab["Proportion of Variance", "PC1"]
variance <- round(100 * variance, 1)
variance <- paste0("PC1 (", variance, "%)")
label.x <- variance.tab["Proportion of Variance", "PC2"]
variance <- round(100 * variance, 1)
variance <- paste0("PC2 (", variance, "%)")
label.y
<- ggplot(RS.pca.scores, aes(x = PC1, y = PC2, color = as.factor(date))) +
PCA.A geom_point() + scale_color_manual(name = "year", values = c("#E69F00", "#009E73",
"#56B4E9")) + stat_ellipse() + xlab(label.x) + ylab(label.y) + theme(legend.position = c(0.9,
0.15), legend.background = element_rect(fill = "white", color = "black"), legend.title = element_blank())
plot(PCA.A)
# Serbian bees collected in different years differ significantly in wing shape
<- manova(as.matrix(RS.pca.scores[, pcNames]) ~ date, data = RS.pca.scores)
year.MANOVA summary(year.MANOVA)
Df Pillai approx F num Df den Df Pr(>F)
date 1 0.79193 5.0374 34 45 3.848e-07 ***
Residuals 78
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
GAM Serbia - with breeders
# PC1
ggplot(data = world) + geom_sf() + geom_point(data = RS.pca.scores, aes(x = longitude,
y = latitude, colour = PC1), size = 1) + coord_sf(xlim = c(17.4, 24.2), ylim = c(41.8,
46.5), expand = FALSE) + scale_color_viridis(name = "PC1") + xlab("Longitude") +
ylab("Latitude")
<- mgcv::gam(PC1 ~ s(longitude, latitude), data = RS.pca.scores)
GAM anova(GAM)
Family: gaussian
Link function: identity
Formula:
PC1 ~ s(longitude, latitude)
Approximate significance of smooth terms:
edf Ref.df F p-value
s(longitude,latitude) 16.99 21.73 5.286 <2e-16
<- getViz(GAM)
viz.GAM <- plot(viz.GAM, 1) + l_fitRaster() + l_fitContour(color = "grey30") + l_points() +
GAM.A labs(title = NULL) + scale_fill_continuous(type = "viridis", name = "PC1", na.value = "transparent") +
scale_x_continuous(limits = c(17.4, 24.2), expand = c(0, 0)) + scale_y_continuous(limits = c(41.8,
46.5), expand = c(0, 0)) + geom_path(data = map_data("world"), aes(x = long,
y = lat, group = group), color = "black", inherit.aes = FALSE)
GAM.A
CVA SE Europe - with breeders
$date <- NA
C.samples<- rbind(C.samples, RS.samples)
wings.C.all
# GPA Convert 2D array into a 3D array
.3D <- arrayspecs(wings.C.all[xyNames], p, k)
wings.Cdimnames(wings.C.3D)[[3]] <- rownames(wings.C.all)
# Align the coordinates using Generalized Procrustes Analysis
<- gpagen(wings.C.3D, print.progress = FALSE)
GPA.C # Convert 3D array into a 2D array - opposite to arrayspecs
<- two.d.array(GPA.C$coords)
wings.C.aligned # rename column names from 1.X, 1.Y to x1, y1 ...
colnames(wings.C.aligned) <- gsub("([0-9]+)\\.([XY])", "\\L\\2\\1", colnames(wings.C.aligned),
perl = TRUE)
<- prcomp(wings.C.aligned[, xyNames])
PCA.C <- as.data.frame(PCA.C$x[, pcNames])
PCA.C.scores $country <- substr(rownames(wings.C.all), 1, 2)
PCA.C.scores
$country <- as.factor(PCA.C.scores$country)
PCA.C.scoreslevels(PCA.C.scores$country) <- list(Austria = "AT", Croatia = "HR", Greece = "GR",
Hungary = "HU", Moldova = "MD", Montenegro = "ME", Romania = "RO", Serbia = "RS",
Slovenia = "SI", Turkey = "TR")
<- cbind(PCA.C.scores, wings.C.all[c("latitude", "longitude", "date")])
PCA.C.scores
# create plot labels
<- summary(PCA.C)$importance
variance.tab <- variance.tab["Proportion of Variance", "PC1"]
variance <- round(100 * variance, 1)
variance <- paste0("PC1 (", variance, "%)")
label.x <- variance.tab["Proportion of Variance", "PC2"]
variance <- round(100 * variance, 1)
variance <- paste0("PC2 (", variance, "%)")
label.y
ggplot(PCA.C.scores, aes(x = PC1, y = PC2, color = country)) + geom_point() + stat_ellipse()
# CVA
$group.year <- ifelse(PCA.C.scores$country == "Serbia", paste(as.character(PCA.C.scores$country),
PCA.C.scores$date, sep = "-"), as.character(PCA.C.scores$country))
PCA.C.scores
# use equal prior probability for all groups
<- length(unique(PCA.C.scores$group.year)) # number of groups
n.gr <- CVA(PCA.C.scores[pcNames], PCA.C.scores$group.year, rounds = 10000,
sample.cva cv = TRUE, prior = rep(1/n.gr, n.gr))
<- as.data.frame(sample.cva$CVscores)
sample.cva.scores # remove unwanted spaces in variable names otherwise use `CV 1`
colnames(sample.cva.scores) <- gsub(" ", "", colnames(sample.cva.scores))
# sample.cva.scores <- cbind(sample.geo.data, sample.cva.scores)
$group.year <- PCA.C.scores$group.year
sample.cva.scores
ggplot(sample.cva.scores, aes(x = CV1, y = CV2, shape = group.year, color = group.year)) +
geom_point() + scale_shape_manual(name = "group", values = c(1:12)) + scale_color_manual(name = "group",
values = rainbow(12)) + stat_ellipse()
# Confusion matrix Classification of the samples to groups
<- typprobClass(sample.cva$CVscores, groups = as.factor(sample.cva.scores$group.year),
CVA.class outlier = 0)
print(CVA.class)
cross-validated classification results in frequencies
Austria Croatia Greece Hungary Moldova Montenegro Romania
Austria 10 0 0 0 0 0 0
Croatia 0 120 0 7 0 0 0
Greece 0 0 231 0 0 0 0
Hungary 0 0 0 17 0 0 5
Moldova 0 0 0 0 9 0 1
Montenegro 0 0 0 0 0 17 0
Romania 0 2 1 2 2 0 183
Serbia-2005 0 0 0 0 0 0 0
Serbia-2010 0 0 0 0 0 5 0
Serbia-2022 0 0 0 0 0 0 0
Slovenia 0 1 0 2 1 0 0
Turkey 0 0 0 0 0 0 0
Serbia-2005 Serbia-2010 Serbia-2022 Slovenia Turkey
Austria 0 0 0 0 0
Croatia 3 0 6 24 0
Greece 9 0 1 0 3
Hungary 0 0 0 0 0
Moldova 0 0 0 0 0
Montenegro 0 3 0 0 0
Romania 0 0 0 7 0
Serbia-2005 3 0 1 0 0
Serbia-2010 0 15 0 0 0
Serbia-2022 1 0 55 0 0
Slovenia 0 0 0 17 0
Turkey 0 0 1 0 59
cross-validated classification result in %
Austria Croatia Greece Hungary Moldova Montenegro
Austria 100.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Croatia 0.00000 75.00000 0.00000 4.37500 0.00000 0.00000
Greece 0.00000 0.00000 94.67213 0.00000 0.00000 0.00000
Hungary 0.00000 0.00000 0.00000 77.27273 0.00000 0.00000
Moldova 0.00000 0.00000 0.00000 0.00000 90.00000 0.00000
Montenegro 0.00000 0.00000 0.00000 0.00000 0.00000 85.00000
Romania 0.00000 1.01523 0.50761 1.01523 1.01523 0.00000
Serbia-2005 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Serbia-2010 0.00000 0.00000 0.00000 0.00000 0.00000 25.00000
Serbia-2022 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Slovenia 0.00000 4.76190 0.00000 9.52381 4.76190 0.00000
Turkey 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Romania Serbia-2005 Serbia-2010 Serbia-2022 Slovenia Turkey
Austria 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Croatia 0.00000 1.87500 0.00000 3.75000 15.00000 0.00000
Greece 0.00000 3.68852 0.00000 0.40984 0.00000 1.22951
Hungary 22.72727 0.00000 0.00000 0.00000 0.00000 0.00000
Moldova 10.00000 0.00000 0.00000 0.00000 0.00000 0.00000
Montenegro 0.00000 0.00000 15.00000 0.00000 0.00000 0.00000
Romania 92.89340 0.00000 0.00000 0.00000 3.55330 0.00000
Serbia-2005 0.00000 75.00000 0.00000 25.00000 0.00000 0.00000
Serbia-2010 0.00000 0.00000 75.00000 0.00000 0.00000 0.00000
Serbia-2022 0.00000 1.78571 0.00000 98.21429 0.00000 0.00000
Slovenia 0.00000 0.00000 0.00000 0.00000 80.95238 0.00000
Turkey 0.00000 0.00000 0.00000 1.66667 0.00000 98.33333
overall classification accuracy: 89.32039 %
Kappa statistic: 0.86945
# Mahalanobis distances between groups
::kable(as.data.frame(as.matrix(sample.cva$Dist$GroupdistMaha)), digits = 3) knitr
Austria | Croatia | Greece | Hungary | Moldova | Montenegro | Romania | Serbia-2005 | Serbia-2010 | Serbia-2022 | Slovenia | Turkey | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Austria | 0.000 | 7.997 | 9.919 | 6.901 | 8.242 | 8.055 | 6.947 | 8.225 | 8.036 | 7.695 | 8.843 | 8.324 |
Croatia | 7.997 | 0.000 | 5.703 | 3.604 | 5.694 | 5.992 | 4.264 | 3.591 | 5.257 | 3.766 | 2.732 | 5.224 |
Greece | 9.919 | 5.703 | 0.000 | 7.300 | 7.803 | 6.868 | 6.345 | 4.735 | 6.682 | 5.490 | 6.468 | 5.648 |
Hungary | 6.901 | 3.604 | 7.300 | 0.000 | 5.990 | 6.944 | 4.198 | 5.637 | 6.342 | 5.270 | 4.306 | 6.263 |
Moldova | 8.242 | 5.694 | 7.803 | 5.990 | 0.000 | 7.288 | 4.253 | 6.173 | 6.597 | 6.151 | 5.428 | 7.535 |
Montenegro | 8.055 | 5.992 | 6.868 | 6.944 | 7.288 | 0.000 | 5.732 | 6.009 | 2.085 | 5.059 | 7.265 | 6.945 |
Romania | 6.947 | 4.264 | 6.345 | 4.198 | 4.253 | 5.732 | 0.000 | 5.201 | 5.269 | 5.146 | 4.703 | 5.402 |
Serbia-2005 | 8.225 | 3.591 | 4.735 | 5.637 | 6.173 | 6.009 | 5.201 | 0.000 | 5.746 | 3.161 | 4.697 | 5.101 |
Serbia-2010 | 8.036 | 5.257 | 6.682 | 6.342 | 6.597 | 2.085 | 5.269 | 5.746 | 0.000 | 5.039 | 6.469 | 7.021 |
Serbia-2022 | 7.695 | 3.766 | 5.490 | 5.270 | 6.151 | 5.059 | 5.146 | 3.161 | 5.039 | 0.000 | 4.916 | 5.742 |
Slovenia | 8.843 | 2.732 | 6.468 | 4.306 | 5.428 | 7.265 | 4.703 | 4.697 | 6.469 | 4.916 | 0.000 | 6.320 |
Turkey | 8.324 | 5.224 | 5.648 | 6.263 | 7.535 | 6.945 | 5.402 | 5.101 | 7.021 | 5.742 | 6.320 | 0.000 |
# pairwise probabilities of differences between groups
::kable(as.data.frame(as.matrix(sample.cva$Dist$probsMaha)), digits = 5) knitr
Austria | Croatia | Greece | Hungary | Moldova | Montenegro | Romania | Serbia-2005 | Serbia-2010 | Serbia-2022 | Slovenia | Turkey | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Austria | 0e+00 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.00010 | 0.00010 | 0.00010 | 0.00010 | 0.0001 |
Croatia | 1e-04 | 0.00000 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.33477 | 0.00010 | 0.00010 | 0.00080 | 0.0001 |
Greece | 1e-04 | 0.00010 | 0.0000 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.02040 | 0.00010 | 0.00010 | 0.00010 | 0.0001 |
Hungary | 1e-04 | 0.00010 | 0.0001 | 0.0000 | 0.0001 | 0.00010 | 0.0001 | 0.00440 | 0.00010 | 0.00010 | 0.00010 | 0.0001 |
Moldova | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0000 | 0.00010 | 0.0001 | 0.00360 | 0.00010 | 0.00010 | 0.00010 | 0.0001 |
Montenegro | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.00000 | 0.0001 | 0.00180 | 0.52105 | 0.00010 | 0.00010 | 0.0001 |
Romania | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0000 | 0.00510 | 0.00010 | 0.00010 | 0.00010 | 0.0001 |
Serbia-2005 | 1e-04 | 0.33477 | 0.0204 | 0.0044 | 0.0036 | 0.00180 | 0.0051 | 0.00000 | 0.00340 | 0.69503 | 0.05249 | 0.0091 |
Serbia-2010 | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.52105 | 0.0001 | 0.00340 | 0.00000 | 0.00010 | 0.00010 | 0.0001 |
Serbia-2022 | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.69503 | 0.00010 | 0.00000 | 0.00010 | 0.0001 |
Slovenia | 1e-04 | 0.00080 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.05249 | 0.00010 | 0.00010 | 0.00000 | 0.0001 |
Turkey | 1e-04 | 0.00010 | 0.0001 | 0.0001 | 0.0001 | 0.00010 | 0.0001 | 0.00910 | 0.00010 | 0.00010 | 0.00010 | 0.0000 |
UPGMA - with breeders
<- upgma(sample.cva$Dist$GroupdistMaha)
UPBMA.A plot(UPBMA.A, label.offset = 0.1, cex = 0.5)
Remove breeders
<- subset(RS.samples, date %in% c("2005", "2022")) RS.samples.short
PCA Serbia - without breeders
<- prcomp(RS.samples.short[, xyNames])
RS.pca <- as.data.frame(RS.pca$x[, pcNames])
RS.pca.scores $latitude <- RS.samples.short$latitude
RS.pca.scores$longitude <- RS.samples.short$longitude
RS.pca.scores$date <- RS.samples.short$date
RS.pca.scores
# create plot labels
<- summary(RS.pca)$importance
variance.tab <- variance.tab["Proportion of Variance", "PC1"]
variance <- round(100 * variance, 1)
variance <- paste0("PC1 (", variance, "%)")
label.x <- variance.tab["Proportion of Variance", "PC2"]
variance <- round(100 * variance, 1)
variance <- paste0("PC2 (", variance, "%)")
label.y
<- ggplot(RS.pca.scores, aes(x = PC1, y = PC2, color = as.factor(date))) +
PCA.B geom_point() + scale_color_manual(name = "year", values = c("#E69F00", "#56B4E9")) +
stat_ellipse() + xlab(label.x) + ylab(label.y) + scale_x_continuous(expand = c(0.06,
0)) + theme(legend.position = c(0.9, 0.15), legend.background = element_rect(fill = "white",
color = "black"), legend.title = element_blank())
plot(PCA.B)
GAM Serbia - without breeders
# PC1
ggplot(data = world) + geom_sf() + geom_point(data = RS.pca.scores, aes(x = longitude,
y = latitude, colour = PC1), size = 1) + coord_sf(xlim = c(17.4, 24.2), ylim = c(41.8,
46.5), expand = FALSE) + scale_color_viridis(name = "PC1") + xlab("Longitude") +
ylab("Latitude")
<- mgcv::gam(PC1 ~ s(longitude, latitude), data = RS.pca.scores)
GAM anova(GAM)
Family: gaussian
Link function: identity
Formula:
PC1 ~ s(longitude, latitude)
Approximate significance of smooth terms:
edf Ref.df F p-value
s(longitude,latitude) 3.844 5.170 2.068 0.0812
<- getViz(GAM)
viz.GAM <- plot(viz.GAM, 1) + l_fitRaster() + l_fitContour(color = "grey30") + l_points() +
GAM.B labs(title = NULL) + scale_fill_continuous(type = "viridis", name = "PC1", na.value = "transparent") +
scale_x_continuous(limits = c(17.4, 24.2), expand = c(0, 0)) + scale_y_continuous(limits = c(41.8,
46.5), expand = c(0, 0)) + geom_path(data = map_data("world"), aes(x = long,
y = lat, group = group), color = "black", inherit.aes = FALSE)
GAM.B
PCA SE Europe - without breeders
<- substr(rownames(C.samples), 1, 2)
countries <- subset(C.samples, countries %in% c("GR", "HR", "HU", "MD", "RO",
C.samples.short "SI", "TR"))
<- rbind(C.samples.short, RS.samples.short)
samples
# GPA Convert 2D array into a 3D array
.3D <- arrayspecs(samples[xyNames], p, k)
samplesdimnames(samples.3D)[[3]] <- rownames(samples)
# Align the coordinates using Generalized Procrustes Analysis
<- gpagen(samples.3D, print.progress = FALSE)
GPA.C # Convert 3D array into a 2D array - opposite to arrayspecs
<- two.d.array(GPA.C$coords)
samples.aligned # rename column names from 1.X, 1.Y to x1, y1 ...
colnames(samples.aligned) <- gsub("([0-9]+)\\.([XY])", "\\L\\2\\1", colnames(samples.aligned),
perl = TRUE)
# PCA
<- prcomp(samples.aligned[, xyNames])
PCA.C <- as.data.frame(PCA.C$x[, pcNames])
PCA.C.scores # change sign of the PC1 for easier comparisons with earlier graphs
"PC1"] <- -PCA.C.scores[, "PC1"]
PCA.C.scores[, $latitude <- samples$latitude[]
PCA.C.scores$longitude <- samples$longitude[]
PCA.C.scores
$country <- substr(rownames(samples), 1, 2)
PCA.C.scores$country <- as.factor(PCA.C.scores$country)
PCA.C.scoreslevels(PCA.C.scores$country) <- list(Croatia = "HR", Greece = "GR", Hungary = "HU",
Moldova = "MD", Romania = "RO", Serbia = "RS", Slovenia = "SI", Turkey = "TR")
# create plot labels
<- summary(PCA.C)$importance
variance.tab <- variance.tab["Proportion of Variance", "PC1"]
variance <- round(100 * variance, 1)
variance <- paste0("PC1 (", variance, "%)")
label.x <- variance.tab["Proportion of Variance", "PC2"]
variance <- round(100 * variance, 1)
variance <- paste0("PC2 (", variance, "%)")
label.y
ggplot(PCA.C.scores, aes(x = PC1, y = PC2, color = country)) + geom_point() + stat_ellipse()
# PC1
ggplot(data = world) + geom_sf() + geom_point(data = PCA.C.scores, aes(x = longitude,
y = latitude, colour = PC1), size = 1) + coord_sf(xlim = c(10, 30), ylim = c(33,
50), expand = FALSE) + scale_color_viridis(name = "PC1") + xlab("Longitude") +
ylab("Latitude")
<- mgcv::gam(PC1 ~ s(longitude, latitude), data = PCA.C.scores)
GAM anova(GAM)
Family: gaussian
Link function: identity
Formula:
PC1 ~ s(longitude, latitude)
Approximate significance of smooth terms:
edf Ref.df F p-value
s(longitude,latitude) 23.09 26.94 83.45 <2e-16
<- getViz(GAM)
viz.GAM plot(viz.GAM, 1) + l_fitRaster() + l_fitContour(color = "grey30") + l_points() +
labs(title = NULL) + scale_fill_continuous(type = "viridis", name = "PC1", na.value = "transparent") +
scale_x_continuous(limits = c(10, 30), expand = c(0, 0)) + scale_y_continuous(limits = c(33,
50), expand = c(0, 0)) + geom_path(data = map_data("world"), aes(x = long, y = lat,
group = group), color = "black", inherit.aes = FALSE)
# PC2
ggplot(data = world) + geom_sf() + geom_point(data = PCA.C.scores, aes(x = longitude,
y = latitude, colour = PC2), size = 1) + coord_sf(xlim = c(10, 30), ylim = c(33,
50), expand = FALSE) + scale_color_viridis(name = "PC1") + xlab("Longitude") +
ylab("Latitude")
<- mgcv::gam(PC2 ~ s(longitude, latitude), data = PCA.C.scores)
GAM anova(GAM)
Family: gaussian
Link function: identity
Formula:
PC2 ~ s(longitude, latitude)
Approximate significance of smooth terms:
edf Ref.df F p-value
s(longitude,latitude) 25.27 28.13 29.35 <2e-16
<- getViz(GAM)
viz.GAM plot(viz.GAM, 1) + l_fitRaster() + l_fitContour(color = "grey30") + l_points() +
labs(title = NULL) + scale_fill_continuous(type = "viridis", name = "PC2", na.value = "transparent") +
scale_x_continuous(limits = c(10, 30), expand = c(0, 0)) + scale_y_continuous(limits = c(33,
50), expand = c(0, 0)) + geom_path(data = map_data("world"), aes(x = long, y = lat,
group = group), color = "black", inherit.aes = FALSE)
CVA SE Europe - without breeders
# use equal prior probability for all groups
<- length(unique(PCA.C.scores$country)) # number of groups
n.gr <- CVA(PCA.C.scores[pcNames], PCA.C.scores$country, rounds = 10000, cv = TRUE,
sample.cva prior = rep(1/n.gr, n.gr))
<- as.data.frame(sample.cva$CVscores)
sample.cva.scores # remove unwanted spaces in variable names otherwise use `CV 1`
colnames(sample.cva.scores) <- gsub(" ", "", colnames(sample.cva.scores))
$country <- PCA.C.scores$country
sample.cva.scores
ggplot(sample.cva.scores, aes(x = CV1, y = CV2, shape = country, color = country)) +
geom_point() + scale_shape_manual(name = "country", values = c(1:8)) + scale_color_manual(name = "country",
values = rainbow(8)) + stat_ellipse()
# Confusion matrix Classification of the samples to groups
<- typprobClass(sample.cva$CVscores, groups = as.factor(sample.cva.scores$country),
CVA.class outlier = 0)
print(CVA.class)
cross-validated classification results in frequencies
Croatia Greece Hungary Moldova Romania Serbia Slovenia Turkey
Croatia 123 1 7 0 0 5 24 0
Greece 1 238 0 0 0 3 0 2
Hungary 0 0 17 0 5 0 0 0
Moldova 0 0 0 9 1 0 0 0
Romania 2 1 2 2 183 0 7 0
Serbia 1 0 0 0 0 59 0 0
Slovenia 1 0 0 1 0 0 19 0
Turkey 0 0 0 0 0 1 0 59
cross-validated classification result in %
Croatia Greece Hungary Moldova Romania Serbia Slovenia
Croatia 76.87500 0.62500 4.37500 0.00000 0.00000 3.12500 15.00000
Greece 0.40984 97.54098 0.00000 0.00000 0.00000 1.22951 0.00000
Hungary 0.00000 0.00000 77.27273 0.00000 22.72727 0.00000 0.00000
Moldova 0.00000 0.00000 0.00000 90.00000 10.00000 0.00000 0.00000
Romania 1.01523 0.50761 1.01523 1.01523 92.89340 0.00000 3.55330
Serbia 1.66667 0.00000 0.00000 0.00000 0.00000 98.33333 0.00000
Slovenia 4.76190 0.00000 0.00000 4.76190 0.00000 0.00000 90.47619
Turkey 0.00000 0.00000 0.00000 0.00000 0.00000 1.66667 0.00000
Turkey
Croatia 0.00000
Greece 0.81967
Hungary 0.00000
Moldova 0.00000
Romania 0.00000
Serbia 0.00000
Slovenia 0.00000
Turkey 98.33333
overall classification accuracy: 91.34367 %
Kappa statistic: 0.89044
# Mahalanobis distances between groups
::kable(as.data.frame(as.matrix(sample.cva$Dist$GroupdistMaha)), digits = 3) knitr
Croatia | Greece | Hungary | Moldova | Romania | Serbia | Slovenia | Turkey | |
---|---|---|---|---|---|---|---|---|
Croatia | 0.000 | 5.615 | 3.554 | 5.662 | 4.225 | 3.675 | 2.720 | 5.197 |
Greece | 5.615 | 0.000 | 7.175 | 7.689 | 6.234 | 5.312 | 6.390 | 5.610 |
Hungary | 3.554 | 7.175 | 0.000 | 5.958 | 4.157 | 5.186 | 4.275 | 6.196 |
Moldova | 5.662 | 7.689 | 5.958 | 0.000 | 4.249 | 6.033 | 5.427 | 7.463 |
Romania | 4.225 | 6.234 | 4.157 | 4.249 | 0.000 | 5.029 | 4.704 | 5.323 |
Serbia | 3.675 | 5.312 | 5.186 | 6.033 | 5.029 | 0.000 | 4.876 | 5.555 |
Slovenia | 2.720 | 6.390 | 4.275 | 5.427 | 4.704 | 4.876 | 0.000 | 6.322 |
Turkey | 5.197 | 5.610 | 6.196 | 7.463 | 5.323 | 5.555 | 6.322 | 0.000 |
# pairwise probabilities of differences between groups
::kable(as.data.frame(as.matrix(sample.cva$Dist$probsMaha)), digits = 5) knitr
Croatia | Greece | Hungary | Moldova | Romania | Serbia | Slovenia | Turkey | |
---|---|---|---|---|---|---|---|---|
Croatia | 0e+00 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 2e-04 | 1e-04 |
Greece | 1e-04 | 0e+00 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 |
Hungary | 1e-04 | 1e-04 | 0e+00 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 |
Moldova | 1e-04 | 1e-04 | 1e-04 | 0e+00 | 1e-04 | 1e-04 | 1e-04 | 1e-04 |
Romania | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0e+00 | 1e-04 | 1e-04 | 1e-04 |
Serbia | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0e+00 | 1e-04 | 1e-04 |
Slovenia | 2e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0e+00 | 1e-04 |
Turkey | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0e+00 |
UPGMA - without breeders
<- upgma(sample.cva$Dist$GroupdistMaha)
UPBMA.B plot(UPBMA.B, label.offset = 0.1, cex = 0.5)
Figures
library(phytools) # cophylo
<- cophylo(UPBMA.A, UPBMA.B, rotate = T) cophylo.upgma
Rotating nodes to optimize matching...
Done.
plot(cophylo.upgma)
library(ggpubr) # ggarange
ggarrange(PCA.A, PCA.B, labels = c("A", "B"), font.label = list(size = 12, face = "plain"),
ncol = 2, nrow = 1)
References
Kaur, H., Nedić, N., & Tofilski, A. (2023). Fore wing images of honey bees (Apis mellifera) from Serbia [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10389960
Oleksa, A., Căuia, E., Siceanu, A., Puškadija, Z., Kovačić, M., Pinto, M.A., Rodrigues, P.J., Hatjina, F., Charistos, L., Bouga, M., Prešern, J., Kandemir, I., Rašić, S., Kusza, S., & Tofilski, A. (2022). Collection of wing images for conservation of honey bees (Apis mellifera) biodiversity in Europe [Data set]. Zenodo. https://doi.org/10.5281/zenodo.7244070
Oleksa, A., Căuia, E., Siceanu, A., Puškadija, Z., Kovačić, M., Pinto, M. A., Rodrigues, P. J., Hatjina, F., Charistos, L., Bouga, M., Prešern, J., Kandemir, İ., Rašić, S., Kusza, S., Tofilski, A. (2023). Honey bee (Apis mellifera) wing images: a tool for identification and conservation. GigaScience, 12, giad019. https://doi.org/10.1093/gigascience/giad019
Information about session
sessionInfo()
R version 4.3.2 (2023-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 22000)
Matrix products: default
locale:
[1] LC_COLLATE=Polish_Poland.utf8 LC_CTYPE=Polish_Poland.utf8
[3] LC_MONETARY=Polish_Poland.utf8 LC_NUMERIC=C
[5] LC_TIME=Polish_Poland.utf8
time zone: Europe/Warsaw
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggpubr_0.6.0 phytools_2.0-3 maps_3.4.1.1
[4] viridis_0.6.4 viridisLite_0.4.2 ggspatial_1.1.9
[7] raster_3.6-26 sp_2.1-1 rnaturalearth_0.3.4
[10] phangorn_2.11.1 ape_5.7-1 mgcViz_0.1.11
[13] ggplot2_3.4.4 qgam_1.3.4 mgcv_1.9-0
[16] nlme_3.1-163 Morpho_2.11 geomorph_4.0.6
[19] Matrix_1.6-1.1 rgl_1.2.1 RRPP_1.4.0
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 rstudioapi_0.15.0 jsonlite_1.8.7
[4] magrittr_2.0.3 farver_2.1.1 nloptr_2.0.3
[7] rmarkdown_2.25 vctrs_0.6.4 minqa_1.2.6
[10] base64enc_0.1-3 terra_1.7-55 rstatix_0.7.2
[13] htmltools_0.5.7 broom_1.0.5 KernSmooth_2.23-22
[16] htmlwidgets_1.6.3 plyr_1.8.9 igraph_1.5.1
[19] mime_0.12 lifecycle_1.0.4 iterators_1.0.14
[22] pkgconfig_2.0.3 R6_2.5.1 fastmap_1.1.1
[25] shiny_1.8.0 digest_0.6.33 numDeriv_2016.8-1.1
[28] colorspace_2.1-0 GGally_2.2.0 bezier_1.1.2
[31] labeling_0.4.3 clusterGeneration_1.3.8 fansi_1.0.5
[34] colorRamps_2.3.1 httr_1.4.7 abind_1.4-5
[37] compiler_4.3.2 proxy_0.4-27 withr_2.5.2
[40] doParallel_1.0.17 backports_1.4.1 optimParallel_1.0-2
[43] carData_3.0-5 DBI_1.1.3 gamm4_0.2-6
[46] ggstats_0.5.1 ggsignif_0.6.4 MASS_7.3-60
[49] classInt_0.4-10 scatterplot3d_0.3-44 tools_4.3.2
[52] units_0.8-4 httpuv_1.6.12 glue_1.6.2
[55] rnaturalearthdata_0.1.0 quadprog_1.5-8 promises_1.2.1
[58] grid_4.3.2 sf_1.0-14 generics_0.1.3
[61] isoband_0.2.7 gtable_0.3.4 Rvcg_0.22.1
[64] class_7.3-22 tidyr_1.3.0 car_3.1-2
[67] utf8_1.2.4 foreach_1.5.2 pillar_1.9.0
[70] later_1.3.1 splines_4.3.2 dplyr_1.1.4
[73] lattice_0.21-9 tidyselect_1.2.0 miniUI_0.1.1.1
[76] knitr_1.45 gridExtra_2.3 xfun_0.41
[79] expm_0.999-8 matrixStats_1.1.0 yaml_2.3.7
[82] boot_1.3-28.1 evaluate_0.23 codetools_0.2-19
[85] tibble_3.2.1 cli_3.6.1 xtable_1.8-4
[88] munsell_0.5.0 Rcpp_1.0.11 coda_0.19-4
[91] parallel_4.3.2 ellipsis_0.3.2 jpeg_0.1-10
[94] lme4_1.1-35.1 scales_1.2.1 e1071_1.7-13
[97] purrr_1.0.2 combinat_0.0-8 rlang_1.1.2
[100] fastmatch_1.1-4 cowplot_1.1.1 formatR_1.14
[103] mnormt_2.1.1