---
title: "COVIDKID Vorpommern - Analysis script"
author: "Marcus Vollmer"
institute: "Institute of Bioinformatics, University Medicine Greifswald"
footer:
- content: '[Institute of Bioinformatics](http://www.medizin.uni-greifswald.de/bioinformatik/) • [University Medicine Greifswald](https://www.medizin.uni-greifswald.de)
'
date: "2024-08-23"
output:
pdf_document:
fig_caption: Yes
fig_width: 9
fig_height: 4.5
highlight: tango
includes:
in_header: header.tex
latex_engine: xelatex
toc: yes
keep_tex: true
---
```{r global_options, include=FALSE}
#knitr::opts_chunk$set(dev=c('png','postscript'), fig.width=12, fig.height=8, fig.path='Figs/', echo=FALSE, warning=FALSE, message=FALSE)
library(knitr)
opts_chunk$set(tidy.opts=list(width.cutoff=90), tidy=TRUE)
hook_output = knit_hooks$get('source') #this is the output for code
knit_hooks$set(source = function(x, options) {
if (!is.null(n <- options$linewidth) & knitr::is_latex_output()) {
x <- strwrap(x, width = n, exdent = 4)
}
hook_output(x, options)
})
```
# Data base
Title: CovidKid Vorpommern
Project lead: Prof. Almut Meyer-Bahlburg
DRKS Trial ID: https://www.drks.de/drks_web/navigate.do?navigationId=trial.HTML&TRIAL_ID=DRKS00024635
Methods:
Data sources:
LaGuS (for age dependent incidence)
https://www.lagus.mv-regierung.de/Gesundheit/InfektionsschutzPraevention/Daten-Corona-Pandemie/
CoMV-Gen (for virus variants composition)
https://www.comv-gen.de/fileadmin/user_upload/Berichte/2022_02_25_Bericht_KW7.pdf
https://www.comv-gen.de/aktueller-bericht/
RKI (COVID-19 vaccinations in Germany)
https://zenodo.org/records/12697471
https://github.com/robert-koch-institut/COVID-19-Impfungen_in_Deutschland
LaiV: Landesamt für innere Verwaltung, Statistisches Amt (age distribution of the communities)
https://www.laiv-mv.de/Statistik/Zahlen-und-Fakten/Gesellschaft-&-Staat/Bevoelkerung
Number of inhabitants of districts (Z011 2022 1 Bevölkerung.xlsx):
257,525 + 225,900 + 235,451
Zauberware (ZIP codes and Coordinates)
https://github.com/zauberware/postal-codes-json-xml-csv
Original creator: Data comes from http://www.geonames.org/
Information sources:
https://www.deutschlandfunk.de/rueckblick-2020-chronologie-eines-schuljahrs-in-der-100.html
DOI 10.17886/RKI-GBE-2017-063
```{r, eval=FALSE}
# Download bulletin reports RKI
library(stringr)
for (i in 1:55) {
download.file(paste0('https://www.rki.de/DE/Content/Infekt/EpidBull/Archiv/2022/Ausgaben/', str_pad(i, 2, pad="0"), '_22.pdf?__blob=publicationFile'),
destfile = paste0(str_pad(i, 2, pad="0"), '_22.pdf'))
}
```
## Load Data and Summarize
\footnotesize
```{r tidy=TRUE, warning=FALSE, message=FALSE}
# Read data sets
library(tidyverse)
library(readxl)
library(readr)
D <- readRDS("../Data/D0_ger.rds")
D1 <- readRDS("../Data/D1_ger.rds") # Only essential variables
D2 <- readRDS("../Data/D2_ger.rds") # Only self-declared uninfected and unvaccinated children
D3 <- readRDS("../Data/D3_ger.rds") # Only unvaccinated children
D4 <- readRDS("../Data/D4_ger.rds") # Only patients after vaccination start
# Import time series of SARS-CoV-2 incidence from DIVI
dailycases_mv <- readRDS('../Data/Tagesdaten_MV.rds')
# LAGuS development of infections in children by age and district
LAGuS <- readRDS('../Data/LAGuS.rds') # each single infection
LAGuS_sum <- readRDS('../Data/LAGuS_sum.rds') # sum per day and age group
LAGuS_sum2 <- readRDS('../Data/LAGuS_sum2.rds') # sum per day
# label <- c("Rostock","Schwerin","Mecklenburgische Seenplatte","Landkreis Rostock","Vorpommern-Rügen","Nordwestmecklenburg","Vorpommern-Greifswald","Ludwigslust-Parchim")
# level <- c(13003, 13004, 13071, 13072, 13073, 13074, 13075, 13076)
# RKI vaccination monitoring by age and district
A133K.long <- readRDS('../Data/LaiV_A133K.rds')
# LaiV demographic statistics
I_5_12 <- readRDS('../Data/RKI_Impfungen.rds')
```
```{r tablesettings, results='asis'}
# Set table style
library(kableExtra)
library(gtsummary)
library(flextable)
theme_gtsummary_journal(journal="jama")
theme_gtsummary_compact()
my_kbl <- function (x){
x |> as_kable_extra(
format="latex",
booktabs = TRUE,
longtable = TRUE,
linesep = ""
) |>
kableExtra::kable_styling(
position = "left",
latex_options = c("striped", "repeat_header"),
stripe_color = "gray!15"
)
}
list("tbl_summary-fn:percent_fun" = function(x) sprintf(x * 100, fmt='%#.1f')) |>
set_gtsummary_theme()
nonnormal <- c('IgG','IgG_NCP','IgA','cPass', 'MonthsAtRiskAlpha','MonthsAtRiskDelta','MonthsAtRiskOmicron')
```
```{r}
D <- D |>
mutate(
Wave_descr = fct_recode(
Wave_descr,
"Alpha wave\n(10/Dec/20-19/Jun/21)"="Alphawelle\n(10.12.20-19.06.21)",
"Delta wave\n(20/Jun/21-05/Jan/22)"="Deltawelle\n(20.06.21-05.01.22)",
"Omicron wave\n(06/Jan/22-31/Aug/22)"="Omikronwelle\n(06.01.22-31.08.22)"
)
)
D2 <- D2 |>
mutate(
Wave_descr = fct_recode(
Wave_descr,
"Alpha wave (10/Dec/20-19/Jun/21)"="Alphawelle\n(10.12.20-19.06.21)",
"Delta waves (20/Jun/21-05/Jan/22)"="Deltawelle\n(20.06.21-05.01.22)",
"Omicron waves (06/Jan/22-31/Aug/22)"="Omikronwelle\n(06.01.22-31.08.22)"
)
)
```
\newpage
# Figures
## Figure 1 Infection rates in children and adolescents in the study region of Pomerania, Germany
Daily incidence per 1000 children
```{r figure1, fig.width=8, fig.height=2}
# From table A133K 2021 00 (Germans+Foreigners)
#25230+1863 = 27093
#40691+2604 = 43295
#34940+1958 = 36898
# Subplot 1: Daily incidence per 1000 children (7-day average)
library(scales)
library(geomtextpath)
p1 <- ggplot(
data = LAGuS_sum |>
filter(type=="Cases") |>
mutate(n_7avg_pct = case_when(
Altersgruppe2=='<5' ~ n_7avg/27093,
Altersgruppe2=='5-11' ~ n_7avg/43295,
Altersgruppe2=='12-17' ~ n_7avg/36898),
n_7avg_pct = if_else(n_7avg_pct < 1/500000, 1/500000, n_7avg_pct)),
mapping = aes(as.Date(Date), n_7avg_pct, color=Altersgruppe2)
) +
annotate("rect", xmin=as.Date('2020-12-16','%Y-%m-%d'), xmax=as.Date('2021-01-10','%Y-%m-%d'), ymin=1/100000, ymax=1/50, alpha=0.2, fill="red", color=NA) +
annotate("rect", xmin=as.Date('2020-12-16','%Y-%m-%d'), xmax=as.Date('2021-02-22','%Y-%m-%d'), ymin=1/100000, ymax=1/50, alpha=0.2, fill="red", color=NA) +
annotate("rect", xmin=as.Date('2020-12-16','%Y-%m-%d'), xmax=as.Date('2021-05-26','%Y-%m-%d'), ymin=1/100000, ymax=1/50, alpha=0.2, fill="red", color=NA) +
annotate("rect", xmin=as.Date('2021-06-20','%Y-%m-%d'), xmax=as.Date('2022-01-06','%Y-%m-%d'), ymin=1/100000, ymax=1/50, alpha=0.5, fill="gray", color=NA) +
geom_vline(aes(xintercept=as.Date('2021-12-13','%Y-%m-%d')), linetype='dashed', color='darkgreen', alpha=0.5) +
geom_vline(aes(xintercept=as.Date('2021-06-07','%Y-%m-%d')), linetype='dashed', color='darkgreen', alpha=0.5) +
geom_line() +
geom_textvline(mapping=aes(xintercept=as.POSIXct(as.Date('2021-12-13','%Y-%m-%d'))), label="vaccination >5y", linetype='dashed', color='darkgreen', alpha=0.5, size=2, vjust=1.3, hjust=.9, show.legend=FALSE) +
geom_textvline(mapping=aes(xintercept=as.POSIXct(as.Date('2021-06-07','%Y-%m-%d'))), label="vaccination >12y", linetype='dashed', color='darkgreen', alpha=0.5, size=2, vjust=1.3, hjust=.9, show.legend=FALSE) +
geom_texthline(mapping=aes(yintercept=1/100), label="School lockdown", color='red', linetype=NA, size=2, vjust=-0.3, hjust=.025, show.legend=FALSE) +
scale_x_date(breaks="3 months", minor_breaks="1 months",
labels=scales::label_date_short(format=c("%Y","%b"), sep="\n"),
expand=c(0,0), limits=as.Date(c('2020-03-01','2022-09-01'))) +
scale_y_continuous(trans='log10', guide="axis_logticks", expand=c(0,0), limits=c(1/100000,1/50), breaks=1/10^c(2:5),
minor_breaks=c(seq(1e-2,1e-3,length.out=10), seq(1e-3,1e-4,length.out=10), seq(1e-4,1e-5,length.out=10))) +
labs(x='', y='Regional daily\ninfections in\nchildren') +
scale_color_manual(values=c('#1b9e77','#d95f02','#7570b3')) +
theme_bw()
Sys.setlocale("LC_TIME", "en_US.UTF-8")
p1
ggsave('figures/Figure_1_A.pdf', width=8, height=2)
# Subplot 2: Plot individual cases
LAGuS_IndividualCases <- rbind(
LAGuS |> filter(count>0, Date filter(count>0, Date>as.Date('2021-06-05'), Date
filter(type=="Cases") |>
mutate(n_7avg_pct = case_when(
Altersgruppe2=='<5' ~ n_7avg/27093,
Altersgruppe2=='5-11' ~ n_7avg/43295,
Altersgruppe2=='12-17' ~ n_7avg/36898)
)
LAGuS_7avg |> filter(Date==as.Date('2020-12-16')) |> mutate(n_7avg_pct*1000)
LAGuS_7avg |> filter(Dateas.Date('2022-03-05')) |> mutate(n_7avg_pct*1000)
LAGuS_7avg |> filter(Date==as.Date('2022-03-10')) |> mutate(n_7avg_pct*1000)
LAGuS_7avg |> filter(Date==as.Date('2022-08-31')) |> mutate(n_7avg_pct*1000)
# Print total sum of cases
LAGuS_sum |> filter(type=="Cases") |> summarise(n=sum(n))
#Cases <5 8622/27093
#Cases 5-11 25865/43295
#Cases 12-17 21834/36898
```
## Figure S1 Representativity of age
```{r figureS1, fig.width=7, fig.height=3}
# Prepare data set for plotting
ref <- A133K.long |> group_by(Landkreis, Geschlecht) |> summarise(n=sum(Anzahl))
tab <- D |>
group_by(Landkreis) |>
count(Geschlecht)
tab$ref <- ref$n
tab <- tab |> mutate(prop=n/ref, repr=ref/n)
A133K.long <- A133K.long |> filter(Alter>=0) |> mutate(Geschlecht = factor(Geschlecht, levels=c('Männlich','Weiblich'), labels=c('Male','Female')))
# Plot age distribution of study sample and add representitive distribution of covered districts
p <- ggplot(
D |> filter(Alter>=0) |>
mutate(Geschlecht = factor(Geschlecht, levels=c('männlich','weiblich'), labels=c('Male','Female')),
Landkreis = factor(Landkreis,
levels=c('Vorpommern-Greifswald','Vorpommern-Rügen','Mecklenburgische Seenplatte'),
labels=c('District\nVorpommern-Greifswald','District\nVorpommern-Rügen','District\nMecklenburgische Seenplatte'))),
aes(x=Alter)) +
geom_bar() +
geom_line( data=A133K.long, mapping=aes(x=Alter, y=Anzahl_rel), color='blue') +
geom_point(data=A133K.long, mapping=aes(x=Alter, y=Anzahl_rel), color='blue') +
geom_textsmooth(
data = A133K.long |> filter(Geschlecht=='Female', Landkreis=='District\nVorpommern-Greifswald'),
mapping = aes(x=Alter, y=Anzahl_rel, color="C"),
formula='y ~ x', method='loess', se=FALSE, label='Ideal representative sample', size=2.5, hjust=0.3, vjust=-.5, fill=NA, linewidth=NA, textcolor='#0000FF', show.legend=NA) +
facet_grid(Geschlecht~Landkreis) +
labs(x='Age of included children [years]', y='Frequency [counts]') +
theme(legend.position='none')
p
ggsave('figures/Figure_S1.pdf', width=7, height=3)
# Number of children in the three districts
sum(colSums(A133K.long |> filter(Alter<5) |> select(Anzahl)))
sum(colSums(A133K.long |> filter(Alter>=5, Alter<12) |> select(Anzahl)))
sum(colSums(A133K.long |> filter(Alter>=12, Alter<18) |> select(Anzahl)))
```
## Figure S2 Representativity of vaccination rate
```{r figureS2, fig.width=7, fig.height=3.5, warning=FALSE}
# Vaccinated and recovered vs. unvaccinated by age group
library(geomtextpath)
n_D <- D1 |> filter(!is.na(Gruppe), !is.na(Altersgruppe2)) |> droplevels() |> count(Altersgruppe2)
n_D$Groupname <- c('Children under 5\n','School children\n','Adolescents\n')
group_labeller_N_both_eng <- function(variable,value) {
return(paste0(n_D$Groupname, 'Age ', value, " years (n=", n_D$n, ")")[value])
}
mygl_eng <- guide_legend(title="", nrow=1, byrow=TRUE)
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
p <- ggplot() +
geom_textvline(data = data.frame(Altersgruppe2=factor('5-11', levels=c('<5','5-11','12-17'))),
aes(xintercept=as.POSIXct(as.Date('2021-12-13','%Y-%m-%d'))),
label="approval of vaccination", linetype='dashed', color='darkgreen', size=2, vjust=1.3, hjust=.5, show.legend=FALSE) +
geom_textvline(data = data.frame(Altersgruppe2=factor('12-17', levels=c('<5','5-11','12-17'))),
aes(xintercept=as.POSIXct(as.Date('2021-06-07','%Y-%m-%d'))),
label="approval of vaccination", linetype='dashed', color='darkgreen', size=2, vjust=1.3, hjust=.5, show.legend=FALSE) +
geom_step(data=I_5_12, mapping=aes(Impfdatum, rate, color="RKI"), linewidth=1) +
geom_smooth(
data = D1 |> filter(!is.na(Gruppe), !is.na(Altersgruppe2), Altersgruppe2 %in% c('5-11','12-17')),
mapping = aes(x=Date, y=as.numeric(Gruppe %in% c('Nur geimpft','Diagnostiziert und geimpft')), color="A"),
formula='y ~ x', method='loess', se=FALSE, linewidth=1) +
facet_wrap(~Altersgruppe2, labeller=group_labeller_N_both_eng) +
labs(title="", x="Date", y="Vaccination rate") +
scale_colour_manual(name="legend",
values=c("A"="#56B4E9", "RKI"="#E69F00"),
labels=c("Vaccination rate estimated from COVIDKID","Vaccination rate tracked by RKI DIM")) +
theme(legend.position = 'top') +
scale_x_datetime(date_breaks="3 months", date_minor_breaks="1 months", labels=scales::label_date_short(format=c("%Y","%b"), sep="\n")) +
scale_y_continuous(labels = scales::percent, breaks=seq(0,1,by=.2), limits=c(0,1)) +
coord_cartesian(ylim=c(0,.7)) +
guides(color=mygl_eng)
Sys.setlocale("LC_TIME", "en_US.UTF-8")
png(filename='figures/Figure_S2.png', width=7, height=3.5, units='in', res=300)
p
dev.off()
ggsave('figures/Figure_S2.pdf', width=7, height=3.5)
```
## Figure S3 Origin of patients by zip code
```{r figureS3, results='asis', fig.width=6, fig.height=5}
# Get the world map
library(rnaturalearth)
library(rnaturalearthdata)
world <- ne_countries(scale="large", returnclass="sf", continent="Europe")
# germany <- ne_countries(scale="large", returnclass="sf", country="Germany")
# https://dominicroye.github.io/en/2018/accessing-openstreetmap-data-with-r/
# library(osmdata)
# Get shape of states
# https://ggplot2.tidyverse.org/reference/ggsf.html
library(sf)
gadm_DEU0 <- st_read("../Data/GADM/gadm41_DEU.gpkg", layer='ADM_ADM_0', quiet=TRUE)
gadm_DEU1 <- st_read("../Data/GADM/gadm41_DEU.gpkg", layer='ADM_ADM_1', quiet=TRUE)
gadm_DEU2 <- st_read("../Data/GADM/gadm41_DEU.gpkg", layer='ADM_ADM_2', quiet=TRUE)
# Number of participants by ZIP code
D_plz <- D |>
select(PLZ,place,lat,lng) |>
group_by(place) |>
summarise(sizesum=n(), lat=mean(lat), lng=mean(lng), PLZ=paste(unique(PLZ), collapse=', '))
D_plz$place[D_plz$PLZ=='17498'] = 'Greifswalder Vororte' #'Greifswald suburbs'
D_clinic <- D |> count(Herkunft)
# Generate map
library(ggrepel)
library(ggspatial)
map_pomerania <- ggplot(data = world) +
geom_sf(colour=NA, fill='antiquewhite') +
geom_sf(data=subset(gadm_DEU2, NAME_2 %in% c('Mecklenburgische Seenplatte','Vorpommern-Greifswald','Vorpommern-Rügen')), colour='grey70', linewidth=0.1, fill="white") +
geom_sf(data=gadm_DEU1, colour='grey70', size=0.1, fill=NA) +
geom_sf(color='gray70', fill=NA) +
geom_sf(data=gadm_DEU0, color='gray30', fill=NA) +
geom_point(data=D_plz, aes(x=lng, y=lat, size=sizesum), shape=21, color="black", fill='orange', stroke=1) +
scale_size_binned_area(name='Patients', breaks=c(5,10,20,40,80,160)) +
annotation_scale(location='bl', width_hint=0.3) +
geom_text_repel(data=D_plz |> filter(sizesum>=80), aes(x=lng, y=lat, label=place), color="black", bg.color="white") +
coord_sf(xlim=c(11.5, 14.5), ylim=c(53, 54.8), expand=TRUE) +
theme(panel.grid.major = element_line(color=gray(.5), linetype='dashed', linewidth=0.5),
panel.background = element_rect(fill='aliceblue'),
legend.justification = "top")
D.tmp <- D |> count(Landkreis)
D.tmp$abbr <- c("VG","VR","MSE")
D.tmp <- D.tmp |>
left_join(data.frame(abbr=c("VG","VR","MSE"), x=c(14.20,12.75,12.80), y=c(54.11,54.52,53.44))) |>
mutate(label = paste0(abbr, " (n=", n, ")"))
map_pomerania +
geom_label(data=D.tmp, aes(x=x, y=y, label=label), fill="black", color='white') +
annotate(geom='text', x=14.15, y=54.7, label='Baltic sea', fontface='italic', color='grey22', size=6) +
labs(x='Longitude', y='Latitude', title='Origin of patients by zip code') +
theme(legend.position = c(0.09,.9875))
ggsave('figures/Figure_S3.pdf', width=6, height=5)
```
## Figure 2 Anti-SARS-CoV-2 IgG response and seropositivity
```{r figure2, fig.width=8, fig.height=3.25}
# Functions for plotting count numbers and percentages at threshold in grouped boxplots
stat_box_n <- function(x) {
return(
data.frame(
y = 0.95*min(x, na.rm=TRUE),
label = format(length(x), big.mark=",", scientific=FALSE)
)
)
}
stat_box_cutoff <- function(x, cutoff=1.1, label='counts') {
if (label=='percent') {
v1 <- paste0(sprintf('%0.1f', 100*sum(x<=cutoff)/length(x)), '%')
v2 <- paste0(sprintf('%0.1f', 100*sum(x>cutoff)/length(x)), '%')
strlength <- max(c(nchar(v1),nchar(v2)))
}
if (label=='counts') {
v1 <- format(sum(x<=cutoff), big.mark=",",scientific=FALSE)
v2 <- format(sum(x>cutoff), big.mark=",",scientific=FALSE)
strlength <- max(c(nchar(v1),nchar(v2)))
}
return(
data.frame(
y = cutoff,
label = paste0(
str_pad(v1, strlength, side="left", pad=" "),
' ',
str_pad(v2, strlength, side="right", pad=" ")
)
)
)
}
library(ggh4x)
library(shadowtext)
# Subplot 1: IgG-S1 response by status of self-reported infection and vaccination
labels_eng <- str_replace_all(levels(reorder(D1$Gruppe, D1$IgG)),
c("Undiagnostiziert und ungeimpft"="Undiagnosed and unvaccinated",
"Möglw. ungeimpft"="Possible unvaccinated",
"Nur diagnostiziert"="Only diagnosed",
"Nur geimpft"="Only vaccinated",
"Diagnostiziert und geimpft"="Diagnosed and vaccinated"))
p1 <- ggplot(D1 |> filter(!is.na(IgG)), aes(x=reorder(Gruppe, IgG), y=IgG)) +
geom_hline(yintercept=1.1, color='blue') +
geom_boxplot(alpha=0.5) +
stat_summary(fun.data=stat_box_cutoff, fun.args=list(cutoff=log2(1.1), label='percent'), geom="shadowtext", color='blue',
bg.colour='white', size=3, hjust=0.5, vjust=-.5) +
stat_summary(fun.data=stat_box_cutoff, fun.args=list(cutoff=log2(1.1), label='counts'), geom="shadowtext", color='blue',
bg.colour='white', size=2.5, hjust=0.5, vjust=1.5, fontface='italic') +
coord_flip() +
scale_y_continuous(trans='log2', guide="axis_minor", breaks=2^(-3:3), minor_breaks=c(2^c(-5,2*c(-2:2)),1:10), labels=c(0.125, 0.25, 0.50, 1, 2, 4, 8)) +
theme(axis.ticks.length.y = unit(0.4, "cm"),
ggh4x.axis.ticks.length.minor = rel(0.4),
ggh4x.axis.ticks.length.mini = rel(0.15)) +
labs(title="IgG antibody response", x="", y="IgG-S1 [ratio]") +
scale_x_discrete(labels=c(labels_eng,"Unknown"))
# Subplot 2: Seropositivity by status of self-reported infection and vaccination
Sero <- D |> filter(!is.na(IgG)) |> group_by(Gruppe) |> summarise(label=paste0(sum(Seropositive)), Seropositive=TRUE)
p2 <- ggplot(D |> filter(!is.na(IgG)), aes(x=reorder(Gruppe, IgG), fill=factor(Seropositive))) +
geom_bar(position='fill') +
scale_fill_brewer(palette="YlGn") +
coord_flip(clip = "off") +
geom_shadowtext(aes(label=scales::percent( ..count.. / tapply(..count.., ..x.., sum)[as.character(..x..)], accuracy=0.1),
color=ifelse( ..count.. / tapply(..count.., ..x.., sum)[as.character(..x..)]>0.05, 'show', 'hide')), bg.colour='white', stat='count', size=2.75, position=position_fill(vjust=.5), vjust=-.5) +
geom_shadowtext(data=Sero, aes(label=label, x=Gruppe, y=0, color='show'), bg.colour='white', size=2.25, fontface='italic', vjust=1.5) +
scale_color_manual(values=c('show'='black', 'hide'='transparent'), guide='none') +
labs(title="Seropositivity", x="", y="") +
theme(legend.position ='none',
panel.grid.major = element_line(colour = "grey"),
panel.grid.minor = element_line(colour = "grey"),
panel.background = element_blank()
) +
scale_x_discrete(labels=c('','','','','')) +
scale_y_reverse(limits=c(1,-0.05), labels=c('','','','',''))
library(patchwork)
png(filename='figures/Figure_2.png', width=8, height=3.25, units="in", res=300)
p1 + p2 + plot_layout(widths=c(3, 1)) + plot_annotation(tag_levels='a')
dev.off()
ggsave('figures/Figure_2.pdf', width=8, height=3.25)
```
## Figure 3 Estimated rates of recovered or SARS-CoV-2-vaccinated children and seroprevalence
```{r figure3, fig.width=7, fig.height=3.5}
# Plot LOESS estimated of vaccination rate and seropositivity
#library(rms)
n_D <- D1 |> filter(!is.na(Gruppe), !is.na(Altersgruppe2)) |> droplevels() |> count(Altersgruppe2)
n_D$Groupname <- c('Children under 5\n','School children\n','Adolescents\n')
group_labeller_N_both <- function(variable,value) {
return(paste0(n_D$Groupname, 'Age ', value, " years (n=", n_D$n, ")")[value])
}
mygl <- guide_legend(title="Estimated rate", title.position='left', nrow=1, byrow=TRUE)
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
p <- ggplot(D1 |> filter(!is.na(Gruppe), !is.na(Altersgruppe2)),
aes(x=Date, y=as.numeric(Gruppe %in% c('Undiagnostiziert und ungeimpft','Möglw. ungeimpft')), color="S")) +
geom_textvline(data=data.frame(Altersgruppe2=factor('5-11', levels=c('<5','5-11','12-17'))),
aes(xintercept=as.POSIXct(as.Date('2021-12-13','%Y-%m-%d'))), label="approval of vaccination", linetype='dashed', color='darkgreen', size=2, vjust=-.3, hjust=.5, show.legend=FALSE) +
geom_textvline(data=data.frame(Altersgruppe2=factor('12-17', levels=c('<5','5-11','12-17'))),
aes(xintercept=as.POSIXct(as.Date('2021-06-07','%Y-%m-%d'))), label="approval of vaccination", linetype='dashed', color='darkgreen', size=2, vjust=-.3, hjust=.5, show.legend=FALSE) +
geom_smooth(aes(x=Date, y=as.numeric(Gruppe %in% c('Nur geimpft','Nur diagnostiziert','Diagnostiziert und geimpft')), color="B"),
formula='y ~ x', method='loess', se=FALSE, linewidth=1) +
geom_smooth(aes(x=Date, y=as.numeric(Gruppe=='Undiagnostiziert und ungeimpft'), color="A"),
formula='y ~ x', method='loess', se=FALSE, linewidth=1) +
geom_smooth(aes(x=Date, y=as.numeric(Seropositive), color="C"),
formula='y ~ x', method='loess', se=FALSE, linewidth=1) +
facet_wrap(~Altersgruppe2, ncol=3, labeller=group_labeller_N_both) +
ylim(0,1) +
labs(title="", x="Sampling date", y="Estimated proportion") +
scale_colour_manual(name="legend",
values=c("A"="#D55E00", "B"="#009E73", "C"="#000000"),
labels=c("Neither vaccinated nor recovered","Vaccinated or recovered","Seroprevalence")) +
theme(legend.position = 'top') +
scale_x_datetime(limits=c(as.POSIXct("2020-11-01"), NA), date_breaks="3 months", date_minor_breaks="1 months", labels=scales::label_date_short(format=c("%Y","%b"), sep="\n"), expand=c(0,0)) +
scale_y_continuous(labels = scales::percent, breaks=seq(0,1,by=.2), limits=c(0,1)) +
guides(color=mygl, fill=mygl)
Sys.setlocale("LC_TIME", "en_US.UTF-8")
png(filename='figures/Figure_3.png', width=7, height=3.5, units='in', res=300)
p
dev.off()
p
ggsave('figures/Figure_3.pdf', width=7, height=3.5)
```
## Figure 4 Anti-SARS-CoV-2 IgG response in time course with silent infections
```{r figure4, fig.width=7.5, fig.height=5}
# Colors and shapes for plotting
library(RColorBrewer)
c1a <- c(brewer.pal(3, 'Dark2'), '#000000', '#1B9E77')
c2a <- c(NA,'#000000',c1[c(3)],'#FFFFFF', NA)
c3a <- c(1, 24, 25, 23, 4)
# Prepare data for plotting
D.tmp <- D1 |>
mutate(Gruppe = fct_explicit_na(Gruppe, na_level="Unbekannt"),
Wave_descr2 = fct_collapse(Wave_descr, "Alpha- und Deltawellen\n(10.12.20-05.01.22)" = c("Alphawelle\n(10.12.20-19.06.21)", "Deltawelle\n(20.06.21-05.01.22)")),
Gruppe = factor(Gruppe, levels=c('Undiagnostiziert und ungeimpft','Nur diagnostiziert','Nur geimpft','Diagnostiziert und geimpft','Unbekannt'))) |>
filter(!is.na(IgG), !is.na(Altersgruppe2), !is.na(Gruppe)) |>
droplevels()
labels_eng <- str_replace_all(levels(D.tmp$Gruppe),
c('Undiagnostiziert und ungeimpft'='Undiagnosed and unvaccinated',
'Nur diagnostiziert'='Diagnosed only',
'Nur geimpft'='Vaccinated only',
'Diagnostiziert und geimpft'='Diagnosed and vaccinated',
'Unbekannt'='Unknown'))
mygl <- guide_legend(title="Self-disclosure about\ninfection and vaccination:")
data_lines <- D1 |> filter(!is.na(Altersgruppe2)) |> count(Altersgruppe2) |> select(-n)
data_lineslabels <- data_lines[1, ]
data_lines <- data_lines[-1, ]
library(ggshadow)
n_D <- D.tmp |> count(Altersgruppe2)
n_D$Groupname <- c('Children under 5\n','School children\n','Adolescents\n')
group_labeller_N_both <- paste0(n_D$Groupname, 'Age ', n_D$Altersgruppe2, " years (n=", n_D$n, ")")
names(group_labeller_N_both) <- n_D$Altersgruppe2
p <- ggplot(D.tmp, aes(x=Date, y=IgG, color=Gruppe, fill=Gruppe, shape=Gruppe)) +
annotate("rect", xmin=as.POSIXct(as.Date('2020-12-16','%Y-%m-%d')), xmax=as.POSIXct(as.Date('2021-01-10','%Y-%m-%d')), ymin=0, ymax=Inf, alpha=0.2, fill="red", color=NA) +
annotate("rect", xmin=as.POSIXct(as.Date('2020-12-16','%Y-%m-%d')), xmax=as.POSIXct(as.Date('2021-02-22','%Y-%m-%d')), ymin=0, ymax=Inf, alpha=0.2, fill="red", color=NA) +
annotate("rect", xmin=as.POSIXct(as.Date('2020-12-16','%Y-%m-%d')), xmax=as.POSIXct(as.Date('2021-05-26','%Y-%m-%d')), ymin=0, ymax=Inf, alpha=0.2, fill="red", color=NA) +
geom_textvline(data=data.frame(Altersgruppe2=factor('5-11', levels=c('<5','5-11','12-17'))),
aes(xintercept=as.POSIXct(as.Date('2021-12-13','%Y-%m-%d'))), label="approval of vaccination", linetype='dashed', color='darkgreen', size=2, vjust=1.3, hjust=.5, show.legend=FALSE) +
geom_textvline(data=data.frame(Altersgruppe2=factor('12-17', levels=c('<5','5-11','12-17'))),
aes(xintercept=as.POSIXct(as.Date('2021-06-07','%Y-%m-%d'))), label="approval of vaccination", linetype='dashed', color='darkgreen', size=2, vjust=1.3, hjust=.5, show.legend=FALSE) +
geom_hline(data=data_lines, aes(yintercept=1.1), color='blue', alpha=0.5) +
geom_texthline(data=data_lineslabels, aes(yintercept=1.1), color='blue', alpha=0.5, label='threshold', size=2, hjust=0.1) +
geom_point(alpha=.85) +
geom_glowpoint(data=D.tmp |> filter(Gruppe=='Undiagnostiziert und ungeimpft', Seropositive==TRUE), color='yellow', show.legend=FALSE) +
scale_x_datetime(limits=c(as.POSIXct("2020-11-01"), NA), date_breaks="3 months", date_minor_breaks="1 months", labels=scales::label_date_short(format=c("%Y","%b"), sep="\n"), expand=expansion(mult=c(0,.03))) +
scale_y_continuous(trans='log2', breaks=2^c(-3,-1,1,3), labels=c('0.125','0.5','2','8'), guide="axis_minor", minor_breaks=2^c(-5,2*c(-2:2))) +
scale_shape_manual(values=c3a, labels=labels_eng) +
scale_color_manual(values=c2a, labels=labels_eng) +
scale_fill_manual(values=c1a, labels=labels_eng) +
facet_wrap(~Altersgruppe2, ncol=2, labeller=as_labeller(group_labeller_N_both)) +
theme(axis.ticks.length.y = unit(0.4, "cm"),
ggh4x.axis.ticks.length.minor = rel(0.4),
ggh4x.axis.ticks.length.mini = rel(0.15),
legend.position = c(.8, 0),
legend.justification = c(.8, 0)) +
guides(color=mygl, fill=mygl, shape=mygl) +
labs(title='', x='', y="IgG-S1 [ratio]")
Sys.setlocale("LC_TIME", "en_US.UTF-8")
p
ggsave('figures/Figure_4.pdf', width=7.5, height=5)
```
## Figure 5 Sensitivity of serological tests before and after emerging Omicron variants
```{r figure5, fig.width=9, fig.height=9}
# Definition of colors
library(RColorBrewer)
c1 <- c(brewer.pal(3, 'Dark2'), '#000000')
c2 <- c(NA,'#000000',c1[c(3)],'#FFFFFF')
# Prepare data frame for plotting
D.tmp <- D1 |>
filter(Gruppe=="Nur diagnostiziert") |>
mutate(IgG1 = IgG) |>
pivot_longer(cols=c(IgA,IgG_NCP,cPass,IgG1), names_to='Test', values_to='Result') |>
filter(!is.na(Result)) |>
mutate(
Test = factor(Test, levels=c('cPass','IgA','IgG_NCP','IgG1'), labels=c('NAbs [%]','IgA-S1 [ratio]','IgG-NCP [ratio]','IgG-S1 [ratio]')),
#labels=c('cPass sVNT [%]','IgA-ELISA [ratio]','NCP-ELISA (IgG) [ratio]','IgG-ELISA [ratio]')),
IgA_Deficiency = if_else(Pseudonym %in% c('U4TN7','A4EW5') & Test=='IgA-S1 [ratio]', 'SIgAD',''), # Selective immunoglobulin A (IgA) deficiency
IgG_pos = IgG>=1.1
) |>
droplevels() |>
mutate(TimeDiff.cat = factor(fct_explicit_na(TimeDiff.cat, na_level="Unbekannt"), ordered=FALSE))
n_fun <- function(data){
tmp = data |>
count(Test) |>
mutate(x=2^-3.25, y=if_else(Test=='NAbs [%]', 98, 2^3.85), label=paste0(n), color='black', fill=NA, shape=NA, TimeDiff.cat=NA)
return(tmp)
}
labels_eng <- str_replace_all(
levels(D.tmp$TimeDiff.cat),
c("weniger als 4 Wochen"="less than 4 weeks",
"4 bis 8 Wochen"="4 to 8 weeks",
"8 bis 16 Wochen"="8 to 16 weeks",
"16 Wochen oder mehr"="16 weeks or more",
"Unbekannt"="unknown"))
mygl <- guide_legend(title="Weeks since last infection", title.position='top', ncol=2, byrow=TRUE)
data_lineslabels <- data.frame(Test=factor('NAbs [%]'), Gruppe=factor('Nur diagnostiziert'))
data_lines <- D.tmp |> count(Test, Gruppe) |> select(-n) |> setdiff(data_lineslabels)
library(grImport2)
sym.grob <- symbolsGrob(readPicture("icon_kid.svg"), x=.21, y=.935, default.units="npc", size=0.07)
# Subplot 1: IgG-S1 response vs other serological tests marked by the time elapsed since the last infection
library(geomtextpath)
library(ggh4x)
p1 <- ggplot(D.tmp, aes(x=IgG, y=Result, color=TimeDiff.cat, fill=TimeDiff.cat, shape=TimeDiff.cat)) +
geom_hline(data=data_lines |> filter(Test!='NAbs [%]'), aes(yintercept=1.1), color='darkgreen', alpha=0.5) +
geom_vline(data=data_lines, aes(xintercept=1.1), color='blue', alpha=0.5) +
geom_texthline(data=data_lineslabels, aes(yintercept=30), color='darkgreen', alpha=0.5, label='threshold', size=2, hjust=0.1) +
geom_textvline(data=data_lineslabels, aes(xintercept=1.1), color='blue', alpha=0.5, label='threshold', size=2, hjust=0.9) +
geom_point(alpha=.7) +
geom_text(data=n_fun(D.tmp),
mapping=aes(x=x, y=y, label=label), hjust='right', vjust='center', angle=0, size=3, color='black', inherit.aes=FALSE) +
annotation_custom(sym.grob) +
theme(axis.ticks.length.x = unit(0.4, "cm"),
axis.ticks.length.y = unit(0.4, "cm"),
ggh4x.axis.ticks.length.minor = rel(0.4),
ggh4x.axis.ticks.length.mini = rel(0.15),
legend.position = 'bottom') +
labs(title="", x="IgG-S1 [ratio]", y="") +
facet_nested(Test~ "Children and adolescent with known infections" + "IgG-S1 [ratio]", scales='free_y', nest_line=element_line(colour="red")) +
facet_grid(Test~Gruppe, scales='free_y',
labeller=labeller(Gruppe = as_labeller(c("Nur diagnostiziert"="Children and adolescent\nwith known infections\nIgG-S1 [ratio]")))) +
scale_shape_manual(values=c(1, 24, 25, 23, 4), labels=labels_eng) +
scale_color_manual(values=c(c2, NA), labels=labels_eng) +
scale_fill_manual(values=c(c1, '#1B9E77'), labels=labels_eng) +
guides(color=mygl, fill=mygl, shape=mygl) +
facetted_pos_scales(
y = list(
Test == "NAbs [%]" ~ scale_y_continuous(limits=c(0,100), breaks=seq(0,100,30), guide="axis_minor", minor_breaks=seq(0,100,10)),
Test != "NAbs [%]" ~ scale_y_continuous(limits=2^c(-5,4), trans='log2', breaks=2^c(-3,-1,1,3), guide="axis_minor", minor_breaks=2^c(-5,2*c(-2:2)))
),
x = scale_x_continuous(trans='log2', breaks=2^c(-3,-1,1,3), guide="axis_minor", minor_breaks=2^c(-5,2*c(-2:2)))
)
# Prepare data frame for subplot 2
D.tmp2 <- D1 |>
filter(Gruppe=="Nur diagnostiziert") |>
mutate(IgG1 = IgG) |>
pivot_longer(cols=c(IgA,IgG_NCP,cPass,IgG1), names_to='Test', values_to='Result') |>
filter(!is.na(Result), !is.na(TimeDiff.cat)) |>
mutate(Result = if_else(Test=='cPass', Result, log2(Result)),
Test = factor(Test, levels=c('cPass','IgA','IgG_NCP','IgG1'), labels=c('NAbs [%]','IgA-S1 [ratio]','IgG-NCP [ratio]','IgG-S1 [ratio]')),
#labels=c('cPass sVNT [%]','IgA-ELISA [ratio]','NCP-ELISA (IgG) [ratio]','IgG-ELISA [ratio]')),
IgA_Deficiency = if_else(Pseudonym %in% c('U4TN7','A4EW5') & Test=='IgA-S1 [ratio]', 'SIgAD',''),
IgG_pos = IgG>=1.1,
Omicron = if_else((Date-days(floor(TimeDiff_mean)))>as.POSIXct("2022-01-05"), TRUE, FALSE)) |>
droplevels()
n_fun <- function(data){
tmp = data |>
count(IgG_pos, Test, Omicron) |>
mutate(x=1, y=if_else(Test=='NAbs [%]', 98, 3.85), label=paste0(n), color='black', TimeDiff.cat=NA)
return(tmp)
}
labels <- str_replace_all(levels(D.tmp$TimeDiff.cat),
c("weniger als 4 Wochen"="<4",
"4 bis 8 Wochen"="4-8",
"8 bis 16 Wochen"="8-16",
"16 Wochen oder mehr"="≥16"))
labels[4] <- expression("">=16)
data_lines <- D.tmp2 |> count(Test, IgG_pos, Omicron) |> select(-n)
data_lineslabels <- data_lines[1, ]
data_lines <- data_lines[-1, ]
library(grImport2)
sym.grob <- symbolsGrob(readPicture("icon_kid.svg"), x=.21, y=.935, default.units="npc", size=0.07)
p2 <- ggplot(D.tmp2,
aes(x=TimeDiff.cat, y=Result, fill=TimeDiff.cat)) +
geom_hline(data=data_lines |> filter(Test=='NAbs [%]'), aes(yintercept=30), color='darkgreen', alpha=0.5) +
geom_hline(data=data_lines |> filter(!(Test %in% c('nAb [%]','IgG-S1 [ratio]'))), aes(yintercept=log2(1.1)), color='darkgreen', alpha=0.5) +
geom_hline(data=data_lines |> filter(Test=='IgG-S1 [ratio]'), aes(yintercept=log2(1.1)), color='blue', alpha=0.5) +
geom_texthline(data=data_lineslabels, aes(yintercept=30), color='darkgreen', alpha=0.5, label='threshold', size=2, hjust=0.1) +
geom_boxplot(alpha=.5) +
geom_dotplot(binaxis="y", stackdir="center", dotsize=1, alpha=.75) +
geom_text(data=n_fun(D.tmp2),
mapping=aes(x=x, y=y, group=IgG_pos, label=label), hjust='right', vjust='center', angle=0, size=3, color='black', inherit.aes=FALSE) +
annotation_custom(sym.grob) +
facet_nested(Test~ Omicron + IgG_pos, scales='free_y', nest_line=element_line(colour="red"),
labeller=labeller(IgG_pos = as_labeller(c("FALSE"="'Negative\nIgG-S1 [ratio<1.1]'","TRUE"="'Positive\nIgG-S1 [ratio'>='1.1]'"), label_parsed),
Omicron = as_labeller(c("FALSE"="Infected before 06 Jan 2022\n(Delta domination)","TRUE"="Infected from 06 Jan 2022\n(Omicron domination)")))) +
scale_x_discrete(labels=labels[1:4]) +
scale_fill_manual(values=c1, labels=labels[1:4]) +
theme(axis.ticks.length.y = unit(0.4, "cm"),
ggh4x.axis.ticks.length.minor = rel(0.4),
ggh4x.axis.ticks.length.mini = rel(0.15),
ggh4x.facet.nestline = element_line(linetype = 3),
legend.position = 'none') +
labs(title="", x="Weeks since last infection", y="") +
facetted_pos_scales(
y = list(
Test == "NAbs [%]" ~ scale_y_continuous(limits=c(0, 100), breaks=seq(0,100,30), guide="axis_minor", minor_breaks=seq(0,100,10)),
Test != "NAbs [%]" ~ scale_y_continuous(limits=c(-5,4), breaks=c(-3,-1,1,3), guide="axis_minor", minor_breaks=c(-5,2*c(-2:2)), labels=paste(2^c(-3,-1,1,3)))
)
)
library(ggrepel)
library(patchwork)
(p1 + geom_label_repel(aes(label=IgA_Deficiency), color='black', fill=NA, size=2, show.legend=FALSE)) +
(p2 + geom_label_repel(aes(label=IgA_Deficiency), color='black', fill=NA, size=2, show.legend=FALSE)) +
guide_area() +
plot_layout(widths=c(2,6), heights=c(9,1), guides='collect') +
plot_annotation(title='Antibody levels compared in diagnosed children depending on time of infection', tag_levels='a')
ggsave('figures/Figure_5.pdf', width=9, height=9)
```
## Figure S4 Seropositivity by age and time of blood sampling with attribution of known infections and vaccination
```{r figureS4, fig.width=7, fig.height=6}
library(geomtextpath)
library(shadowtext)
# Subplot 1: Reported cases to the health department LAGuS
p1 <- ggplot(LAGuS_sum2 |> filter(type=="Cases"), aes(as.POSIXct(Date), n_7avg)) +
annotate("rect", xmin=as.POSIXct('2020-12-16','%Y-%m-%d'), xmax=as.POSIXct('2021-01-10','%Y-%m-%d'), ymin=0, ymax=500, alpha=0.2, fill="red", color=NA) +
annotate("rect", xmin=as.POSIXct('2020-12-16','%Y-%m-%d'), xmax=as.POSIXct('2021-02-22','%Y-%m-%d'), ymin=0, ymax=500, alpha=0.2, fill="red", color=NA) +
annotate("rect", xmin=as.POSIXct('2020-12-16','%Y-%m-%d'), xmax=as.POSIXct('2021-05-26','%Y-%m-%d'), ymin=0, ymax=500, alpha=0.2, fill="red", color=NA) +
annotate("rect", xmin=as.POSIXct('2021-06-20','%Y-%m-%d'), xmax=as.POSIXct('2022-01-06','%Y-%m-%d'), ymin=0, ymax=Inf, alpha=0.5, fill="gray", color=NA) +
geom_vline(aes(xintercept=as.POSIXct('2021-12-13','%Y-%m-%d')), linetype='dashed', color='darkgreen', alpha=0.5) +
geom_vline(aes(xintercept=as.POSIXct('2021-06-07','%Y-%m-%d')), linetype='dashed', color='darkgreen', alpha=0.5) +
geom_line() +
geom_textsmooth(data=LAGuS_sum2 |> filter(type=="Cases", Date>=as.Date('2020-12-10')), mapping=aes(as.POSIXct(Date), n_7avg),
formula='y~x', method='loess', span=.05, se=FALSE, label='Reported cases to the health department', size=2.5, hjust=0.15, vjust=-.2, fill=NA, linewidth=NA, textcolor='#333333') +
geom_textvline(mapping=aes(xintercept=as.POSIXct(as.Date('2021-12-13','%Y-%m-%d'))), label="vaccination >5y", linetype='dashed', color='darkgreen', alpha=0.5, size=2, vjust=1.3, hjust=.9, show.legend=FALSE) +
geom_textvline(mapping=aes(xintercept=as.POSIXct(as.Date('2021-06-07','%Y-%m-%d'))), label="vaccination >12y", linetype='dashed', color='darkgreen', alpha=0.5, size=2, vjust=1.3, hjust=.9, show.legend=FALSE) +
geom_texthline(mapping=aes(yintercept=500), label="School lockdown", color='red', linetype=NA, size=2, vjust=-0.3, hjust=.025, show.legend=FALSE) +
scale_x_datetime(date_breaks="2 months", date_minor_breaks="1 months",
labels=scales::label_date_short(format=c("%Y","%b"), sep="\n"),
expand=c(0,0), limits=as.POSIXct(c('2020-12-10','2022-09-01'))) +
scale_y_continuous(expand=c(0,0), limits=c(0,640)) +
labs(x='', y='Regional daily\ninfections in\nchildren')
# Facet counts
n <- D |>
filter(!is.na(Altersgruppe2), !is.na(Seropositive), !is.na(Wave), !is.na(Gruppe)) |>
count(Wave_descr)
# Count serological results and calculate percentages within each facet
n2 <- D |>
filter(!is.na(Altersgruppe2), !is.na(Seropositive), !is.na(Wave), !is.na(Gruppe)) |>
mutate(Seropositive = factor(Seropositive)) |>
group_by(Altersgruppe2, Wave_descr) |>
count(Seropositive) |>
mutate(pct=n/sum(n),
ypos = 1-(cumsum(pct)-pct/2))
# Subplot 2: Seropositivity by age group
group_labeller <- function(variable,value) {
return(paste0(n$Wave_descr, "\nn=", n$n)[value])
}
p2 <- ggplot(n2, aes(x=Altersgruppe2, y=n, fill=factor(Seropositive))) +
geom_bar(stat="identity", position='fill') +
scale_fill_brewer(palette="YlGn") +
scale_y_continuous(labels=scales::percent_format()) +
facet_grid(.~Wave_descr, labeller=group_labeller) +
geom_shadowtext(aes(label=as.character(n), y=ypos, group=factor(Seropositive), color=ifelse(n>=1, 'show', 'hide')), bg.colour='white', size=2.5, vjust=-0.3) +
geom_shadowtext(aes(label=paste0(sprintf("%1.1f", pct*100),"%"), y=ypos, group=factor(Seropositive), color=ifelse(n>=1, 'show', 'hide')), bg.colour='white', size=2.5, vjust=1.2) +
coord_flip(clip = "off") +
scale_color_manual(values=c('show'='black', 'hide'='transparent'), guide='none') +
scale_x_discrete(limits=rev) +
labs(title="", x="Age group", y="") +
theme(legend.position ='none', axis.text.x = element_text(size=6))
# Subplot 3: Seropositivity by age group in relation to known infection (diagnosed) and vaccination
group_labeller <- function(variable,value) {
return(paste0('')[value])
}
p3 <- ggplot(D |> filter(!is.na(Gruppe)), aes(x=Altersgruppe2, fill=fct_cross(Gruppe,factor(Seropositive)))) +
geom_bar(position='fill') +
scale_fill_brewer(palette="YlGn") +
scale_y_continuous(labels=scales::percent_format()) +
facet_grid(.~Wave_descr, labeller=group_labeller) +
coord_flip(clip = "off") +
geom_shadowtext(aes(label=..count.., group=fct_cross(Gruppe,factor(Seropositive)), color=ifelse(..count..>=1, 'show', 'hide')), stat='count', bg.colour='white', size=2.5, position=position_fill(vjust=.5)) +
scale_color_manual(values=c('show'='black', 'hide'='transparent'), guide='none') +
scale_x_discrete(limits=rev) +
labs(title="", x="", y="") +
scale_fill_manual(values=c('#ccebc5','#b3cde3','#fbb4ae','#984ea3','#4daf4a','#377eb8','#e41a1c')) +
theme(legend.position='none', axis.text.x = element_text(size=6))
library(patchwork)
Sys.setlocale("LC_TIME", "en_US.UTF-8")
p2 + p3 + p1 + plot_layout(heights=c(1,1,1.5), guides="collect")
ggsave('figures/Figure_S4.pdf', width=7, height=6)
```
## Figure S5 Seropositivity by age and main factors
```{r figureS5, fig.width=7, fig.height=4}
# Seroprevalence of unvaccinted
# Prepare data sets for faceting
n2a <- D2 |>
filter(!is.na(Altersgruppe2), !is.na(Seropositive), !is.na(Wave)) |>
mutate(Seropositive = factor(Seropositive)) |>
group_by(Altersgruppe2, Wave_descr) |>
count(Seropositive) |>
mutate(
pct = n/sum(n),
ypos = 1-(cumsum(pct)-pct/2),
label = as.character(Wave_descr),
Facet = 'Study period'
)
n2b <- D2 |>
filter(!is.na(Altersgruppe2), !is.na(Seropositive), !is.na(Anam_SARSCoV2_Familie)) |>
mutate(Seropositive = factor(Seropositive),
Anam_SARSCoV2_Familie = factor(Anam_SARSCoV2_Familie, levels=c('Nur Eltern','Weder Eltern noch Kind'))) |>
group_by(Altersgruppe2, Anam_SARSCoV2_Familie) |>
count(Seropositive) |>
mutate(
pct = n/sum(n),
ypos = 1-(cumsum(pct)-pct/2),
label = as.character(fct_recode(Anam_SARSCoV2_Familie,
"No infection known"="Weder Eltern noch Kind",
"Infected household member"="Nur Eltern")),
Facet = 'Household\ninfections'
)
n2c <- D2 |>
filter(!is.na(Altersgruppe2), !is.na(Seropositive), Haus_Gesundheitssystem!='unbekannt') |>
mutate(Seropositive = factor(Seropositive)) |>
group_by(Altersgruppe2, Haus_Gesundheitssystem) |>
count(Seropositive) |>
mutate(
pct = n/sum(n),
ypos = 1-(cumsum(pct)-pct/2),
label = as.character(fct_recode(Haus_Gesundheitssystem,
"Household working in health care"="ja",
"Household not working in health care"="nein")),
Facet = 'Patient\ncontact'
)
n2d <- D2 |>
mutate(
Anam_Chronische_Erkrankung.Group = factor(fct_cross(factor(Anam_Chronische_Erkrankung), factor(Anam_Chronische_Erkrankung.AtemwegeLunge)),
levels=c('FALSE:FALSE','TRUE:FALSE','TRUE:TRUE'),
labels=c('No chronic diseases', 'Chronic diseases except respiratory', 'Chronic diseases including respiratory'))) |>
filter(!is.na(Altersgruppe2), !is.na(Seropositive), !is.na(Anam_Chronische_Erkrankung.Group)) |>
mutate(
Seropositive = factor(Seropositive),
Anam_Chronische_Erkrankung.Group = factor(Anam_Chronische_Erkrankung.Group, levels=c('No chronic diseases', 'Chronic diseases except respiratory', 'Chronic diseases including respiratory'))) |>
group_by(Altersgruppe2, Anam_Chronische_Erkrankung.Group) |>
count(Seropositive) |>
mutate(
pct = n/sum(n),
ypos = 1-(cumsum(pct)-pct/2),
label = as.character(Anam_Chronische_Erkrankung.Group),
Facet = 'Chronic\ndisease'
)
# Bar plot of seropositivity by age group and main characteristics
p <- ggplot(
data = n2a |>
rbind(n2b,n2c,n2d) |>
mutate(
label=factor(label),
Facet=factor(Facet),
Altersgruppe2 = factor(Altersgruppe2, levels=c('<5','5-11','12-17'), labels=paste0(c('Children under 5\n','School children\n','Adolescents\n'), paste('Age:',c('<5','5-11','12-17'),'years')))),
mapping = aes(x=label, y=n, fill=factor(Seropositive))
) +
geom_bar(stat="identity", position='fill') +
scale_fill_brewer(palette="YlGn") +
scale_y_continuous(labels=scales::percent_format()) +
facet_grid(Facet~Altersgruppe2, scales="free_y") +
force_panelsizes(rows = c(3,3,2,2)) +
geom_shadowtext(aes(label=as.character(n), y=ypos, group=factor(Seropositive), color=ifelse(n>=1, 'show', 'hide')), bg.colour='white', size=2, fontface='italic', vjust=1.5) +
geom_shadowtext(aes(label=paste0(sprintf("%1.1f", pct*100),"%"), y=ypos, group=factor(Seropositive), color=ifelse(n>=1, 'show', 'hide')), bg.colour='white', size=2.5, vjust=0) +
coord_flip(clip = "off") +
scale_color_manual(values=c('show'='black', 'hide'='transparent'), guide='none') +
scale_x_discrete(limits=rev) +
labs(title="", x="", y="") +
theme(legend.position='none', axis.text.x=element_text(size=6), strip.text.x=element_text(size=9), strip.text.y=element_text(size=7.5))
p
ggsave('figures/Figure_S5.pdf', width=7, height=4)
```
\newpage
# Tables
## Table 1 Characteristics of the study population stratified by age
```{r table1, results='asis'}
# Prepare data set for Table 1
Dtmp <- D1 |>
mutate(
Haus_Gesundheitssystem = fct_recode(Haus_Gesundheitssystem, NULL="unbekannt"),
Haus_Abschluss = factor(Haus_Abschluss, ordered=FALSE),
Wave_descr2 = factor(fct_collapse(Wave_descr, "Alpha- und Deltawellen\n(10.12.20-05.01.22)" = c("Alphawelle\n(10.12.20-19.06.21)", "Deltawelle\n(20.06.21-05.01.22)")), ordered=FALSE),
Haus_Tiere.Group = factor(fct_cross(factor(Haus_Tiere), factor(Haus_Tiere_Art.Hund)),
levels=c('FALSE:FALSE','TRUE:FALSE','TRUE:TRUE'),
labels=c('No animals', 'Animals excluding dog', 'Animals including dog')),
Haus_Tiere.Group2 = factor(fct_cross(factor(Haus_Tiere), factor(Haus_Tiere_Art.Katze)),
levels=c('FALSE:FALSE','TRUE:FALSE','TRUE:TRUE'),
labels=c('No animals', 'Animals excluding cat', 'Animals including cat')),
Anam_Chronische_Erkrankung.Group = factor(fct_cross(factor(Anam_Chronische_Erkrankung), factor(Anam_Chronische_Erkrankung.AtemwegeLunge)),
levels=c('FALSE:FALSE','TRUE:FALSE','TRUE:TRUE'),
labels=c('No chronic diseases', 'Chronic diseases except respiratory', 'Chronic diseases including respiratory')),
Anam_Raucher_Kind = fct_collapse(Anam_Raucher_Kind, "ja/vielleicht"=c("ja","vielleicht"))
)
colselection1 <- c("Geschlecht","Wave_descr","Altersgruppe2",
'MonthsAtRiskAlpha','MonthsAtRiskDelta','MonthsAtRiskOmicron',
"ImpfungMoeglich","Gruppe","Anam_SARSCoV2_Familie",
"IgG","IgG_NCP","IgA","cPass","Seropositive","Gruppe2",
"Haus_Gruppe",
"Anam_Medikamention","Anam_Chronische_Erkrankung.Group",
"Anam_Raucher_Eltern","Anam_Raucher_Kind",
"Haus_Gesundheitssystem","Haus_Abschluss","Haus_Mitglieder",
"Haus_Kontaktpersonen_Kleinkind","Haus_Kontaktpersonen_Senior",
"Haus_Tiere",#"Haus_Tiere.Group","Haus_Tiere.Group2",
"Reise_Reiseverhalten",
"Belastung_Eltern_berufliche_Unsicherheit","Belastung_Eltern_eigeneErkrankung","Belastung_Eltern_AngehörigeErkrankung")
# Table 1: Table of participants stratified by age
tab1 <- tbl_summary(
data = Dtmp |> select(all_of(colselection1)), by=Altersgruppe2, missing="no",
#type = all_dichotomous() ~ "categorical",
statistic = list(all_dichotomous() ~ "{n}/{N} ({p}%)",
all_continuous() ~ "{mean} ({sd})",
all_of(nonnormal) ~ "{median} ({p25}, {p75})")
) |>
add_n() |>
modify_header(label = "**Variable**") |>
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Age group**") |>
modify_caption("**Table 1: Table of participants stratified by age**") |>
bold_labels()
tab1 |> as_flex_table() |> save_as_html(path="tables/Table_1.html")
# Table 1A: Table of participants stratified by age, selected variables with statistical testing
tbl_summary(
data = Dtmp |> select(Altersgruppe2,Geschlecht,Anam_Impfung_Proband,Anam_SARSCoV2_Familie), by=Altersgruppe2, missing="no",
statistic = list(
all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)")
) |>
add_n() |>
add_p(
test = list(all_categorical() ~ "fisher.test"),
test.args = list(all_tests("t.test") ~ list(var.equal=TRUE),
all_tests("fisher.test") ~ list(workspace=2e7)),
pvalue_fun = ~style_pvalue(.x, digits=3)
) |>
as_flex_table() |> save_as_html(path="tables/Table_1_A.html")
```
## Table S1 Seroprevalences of the study population stratified by vaccination and infection status
```{r tableS1, results='asis'}
# Select variables
colselection1 <- c("Altersgruppe2","Geschlecht","Wave",
"Haus_Gruppe",
"Anam_Medikamention",
"Anam_Chronische_Erkrankung","Anam_Chronische_Erkrankung.AtemwegeLunge",
"Anam_Raucher_Eltern","Anam_Raucher_Kind",
"Haus_Gesundheitssystem","Haus_Abschluss","Haus_Mitglieder",
"Haus_Kontaktpersonen_Kleinkind","Haus_Kontaktpersonen_Senior",
"Haus_Tiere",
"Reise_Reiseverhalten",
"Belastung_Eltern_berufliche_Unsicherheit","Belastung_Eltern_eigeneErkrankung","Belastung_Eltern_AngehörigeErkrankung")
# Table of seroprevalence stratified by vaccination and infection status
tab <- tbl_strata(
data = D1 |>
select(any_of(colselection1), Gruppe, Seropositive) |>
mutate(Anam_Chronische_Erkrankung.Group = factor(fct_cross(factor(Anam_Chronische_Erkrankung), factor(Anam_Chronische_Erkrankung.AtemwegeLunge)),
levels=c('FALSE:FALSE','TRUE:FALSE','TRUE:TRUE'),
labels=c('No chronic diseases', 'Chronic diseases except respiratory', 'Chronic diseases including respiratory')),
Anam_Raucher_Kind = fct_collapse(Anam_Raucher_Kind, "ja/vielleicht"=c("ja","vielleicht")),
Gruppe = fct_collapse(Gruppe, Geimpft = c('Diagnostiziert und geimpft', 'Nur geimpft')),
Haus_Gruppe = fct_collapse(Haus_Gruppe, 'Schule ohne Hort' = c("Schule ohne Hort (Alter<12)", "Schule ohne Hort (Alter>=12)"))
) |>
relocate(Anam_Chronische_Erkrankung.Group, .before=Anam_Chronische_Erkrankung) |>
select(-Anam_Chronische_Erkrankung, -Anam_Chronische_Erkrankung.AtemwegeLunge) |>
filter(!is.na(Gruppe)) |>
droplevels(),
strata = Gruppe,
~.x |>
tbl_summary(
by = Seropositive, missing="no", percent="row",
type = all_dichotomous() ~ "categorical",
statistic = list(all_categorical() ~ "{n}/{N} ({p}%)",
all_continuous() ~ "{mean} ({sd})",
any_of(nonnormal) ~ "{median} ({p25}, {p75})")
) |>
add_n() |>
add_p(
test = list(all_continuous() ~ "t.test",
any_of(nonnormal) ~ "wilcox.test",
all_categorical() ~ "fisher.test"),
test.args = list(all_tests("t.test") ~ list(var.equal=TRUE),
all_tests("fisher.test") ~ list(workspace=2e7)),
pvalue_fun = ~style_pvalue(.x, digits = 2)
) |>
modify_header(label ~ "**Variable**") |>
bold_labels()
)
tab |> as_flex_table() |> save_as_html(path="tables/Table_S1.html")
```
## Table 2 Silent infection rates among undiagnosed and unvaccinated children and adolescents in Pomerania, stratifed based on the time of study enrollment
```{r table2}
# Table 2: Silent infection rates among undiagnosed and unvaccinated children and adolescents in Pomerania, stratifed based on the time of study enrollment
tab <- D |>
mutate(label = as.character(Wave_descr)) |>
filter(Gruppe=='Undiagnostiziert und ungeimpft') |>
select(Altersgruppe2, label, Seropositive) |>
group_nest(label) |>
mutate(
tbl = map2(
label, data,
~tbl_summary(.y, by=Altersgruppe2,
label = list(Seropositive = .x),
statistic = list(all_categorical() ~ "{n} / {N} ({p}%)")
) |>
add_p() |>
modify_header(label = "**Study period**",
all_stat_cols() ~ "**Age: {level} years**") |>
modify_column_alignment(columns=label, align="right")
)
) |>
pull(tbl) |>
tbl_stack()
library(gt)
tab |> as_gt() |> gtsave(filename="tables/Table_2.html")
```
## Additional tables
```{r}
# Calculation of the confidence interval for seropositivity in unvaccinated and uninfected children according to self-report
D2 |> count(Seropositive)
library(PropCIs)
exactci(41, 806, .95)
```
\newpage
# Regression models
```{r}
# Prepare data sets for regression models
# I) Data set for logistic regression for silent SARS-CoV-2 infection
D_LR.SI <- D2 |>
mutate(
Haus_Gesundheitssystem = fct_recode(Haus_Gesundheitssystem, NULL="unbekannt"),
Altersgruppe2 = factor(Altersgruppe2, levels=c('<5','5-11','12-17'), ordered=FALSE),
Altersgruppe2 = fct_relevel(Altersgruppe2, '<5'),
Haus_Abschluss = factor(Haus_Abschluss, ordered=FALSE),
Wave_descr = factor(Wave_descr, ordered=FALSE),
Wave_descr2 = factor(fct_collapse(Wave_descr, "Alpha- and Delta waves\n(10/Dec/20-05/Jan/22)" = c("Alpha wave (10/Dec/20-19/Jun/21)", "Delta waves (20/Jun/21-05/Jan/22)")), ordered=FALSE),
Anam_Chronische_Erkrankung.Group = factor(fct_cross(factor(Anam_Chronische_Erkrankung), factor(Anam_Chronische_Erkrankung.AtemwegeLunge)),
levels=c('FALSE:FALSE','TRUE:FALSE','TRUE:TRUE'),
labels=c('No chronic diseases', 'Chronic diseases except respiratory', 'Chronic diseases including respiratory')),
Anam_Raucher_Kind = fct_collapse(Anam_Raucher_Kind, "ja/vielleicht"=c("ja","vielleicht"))
) |>
rowwise() |>
mutate(
KumulierteInfektionen = case_when(
Altersgruppe2=='<5' ~ 100*sum(LAGuS_sum$n[LAGuS_sum$type=="Cases" & LAGuS_sum$Altersgruppe2=='<5' & LAGuS_sum$Date<=Date])/27093,
Altersgruppe2=='5-11' ~ 100*sum(LAGuS_sum$n[LAGuS_sum$type=="Cases" & LAGuS_sum$Altersgruppe2=='5-11' & LAGuS_sum$Date<=Date])/43295,
Altersgruppe2=='12-17' ~ 100*sum(LAGuS_sum$n[LAGuS_sum$type=="Cases" & LAGuS_sum$Altersgruppe2=='12-17' & LAGuS_sum$Date<=Date])/36898)
)
# II) Data set for logistic regression for known and silent (overall) SARS-CoV-2 infection
D_LR.OI <- D3 |>
mutate(
Haus_Gesundheitssystem = fct_recode(Haus_Gesundheitssystem, NULL="unbekannt"),
Altersgruppe2 = factor(Altersgruppe2, levels=c('<5','5-11','12-17'), ordered=FALSE),
Altersgruppe2 = fct_relevel(Altersgruppe2, '<5'),
Haus_Abschluss = factor(Haus_Abschluss, ordered=FALSE),
Wave_descr = factor(Wave_descr, ordered=FALSE),
Wave_descr2 = factor(fct_collapse(Wave_descr, "Alpha- und Deltawellen\n(10.12.20-05.01.22)" = c("Alphawelle\n(10.12.20-19.06.21)", "Deltawelle\n(20.06.21-05.01.22)")), ordered=FALSE),
Anam_Chronische_Erkrankung.Group = factor(fct_cross(factor(Anam_Chronische_Erkrankung), factor(Anam_Chronische_Erkrankung.AtemwegeLunge)),
levels=c('FALSE:FALSE','TRUE:FALSE','TRUE:TRUE'),
labels=c('No chronic diseases', 'Chronic diseases except respiratory', 'Chronic diseases including respiratory')),
Anam_Raucher_Kind = fct_collapse(Anam_Raucher_Kind, "ja/vielleicht"=c("ja","vielleicht")),
Haus_Mitglieder = fct_collapse(Haus_Mitglieder, '1-3'=c('1 oder 2','3')),
Anam_SARSCoV2_Familie = fct_collapse(Anam_SARSCoV2_Familie, 'Eltern nicht infiziert'=c('Weder Eltern noch Kind','Nur Kind'), 'Eltern infiziert'=c('Nur Eltern','Eltern und Kind'))
) |>
rowwise() |>
mutate(
KumulierteInfektionen = case_when(
Altersgruppe2=='<5' ~ 100*sum(LAGuS_sum$n[LAGuS_sum$type=="Cases" & LAGuS_sum$Altersgruppe2=='<5' & LAGuS_sum$Date<=Date])/27093,
Altersgruppe2=='5-11' ~ 100*sum(LAGuS_sum$n[LAGuS_sum$type=="Cases" & LAGuS_sum$Altersgruppe2=='5-11' & LAGuS_sum$Date<=Date])/43295,
Altersgruppe2=='12-17' ~ 100*sum(LAGuS_sum$n[LAGuS_sum$type=="Cases" & LAGuS_sum$Altersgruppe2=='12-17' & LAGuS_sum$Date<=Date])/36898)
)
# Factors for infection
colselection1 <- c("Gruppe2","Anam_SARSCoV2_Familie",
"Geschlecht","Altersgruppe2",
#"MonthsAtRiskAlpha","MonthsAtRiskDelta","MonthsAtRiskOmicron",
"Wave_descr","KumulierteInfektionen",
"Anam_Fieber","Anam_Medikamention","Anam_Raucher_Eltern","Anam_Raucher_Kind",
"Anam_Chronische_Erkrankung","Anam_Chronische_Erkrankung.AtemwegeLunge","Anam_Chronische_Erkrankung.Herz","Anam_Chronische_Erkrankung.Group",
"Reise_Reiseverhalten",
"Haus_Gesundheitssystem","Haus_Abschluss","Haus_Mitglieder",
"Haus_Kontaktpersonen_Kleinkind","Haus_Kontaktpersonen_Senior",
"Haus_Tiere",
"Belastung_Eltern_berufliche_Unsicherheit","Belastung_Eltern_eigeneErkrankung","Belastung_Eltern_AngehörigeErkrankung")
# Data frames with selected predictor variables
D_LR.SI <- D_LR.SI |> select(all_of(colselection1))
D_LR.SI |> count(Gruppe2)
# Nicht_Infiziert 765
# Infiziert 41
D_LR.OI <- D_LR.OI |> select(all_of(colselection1))
D_LR.OI |> count(Gruppe2)
# Nicht_Infiziert 765
# Infiziert 203
```
## Imputation of missing data
```{r, fig.width=8, fig.height=6}
# Missing data pattern
library(VIM)
aggr_plot.SI <- aggr(D_LR.SI, numbers=TRUE, sortVars=TRUE, prop = c(TRUE, FALSE), cex.axis=.525, gap=3, ylab=c("Histogram of missing data","Pattern")) #labels=varnames,
aggr_plot.OI <- aggr(D_LR.OI, numbers=TRUE, sortVars=TRUE, prop = c(TRUE, FALSE), cex.axis=.525, gap=3, ylab=c("Histogram of missing data","Pattern"))
# Random Forest Imputation
library(mice)
set.seed(XXXXXXXX)
D_LR.SI.im <- mice(D_LR.SI |> select(-Gruppe2) |> mutate(across(where(is_logical), as_factor)), m=10, maxit=50, method='rf', printFlag=FALSE)
save(D_LR.SI.im, file="D_LR.SI.im.Rda")
D_LR.OI.im <- mice(D_LR.OI |> select(-Gruppe2) |> mutate(across(where(is_logical), as_factor)), m=10, maxit=50, method='rf', printFlag=FALSE)
save(D_LR.OI.im, file="D_LR.OI.im.Rda")
load("D_LR.SI.im.Rda")
load("D_LR.OI.im.Rda")
```
## Cross-validated elastic net for silent infection
```{r SI_model}
# Use all imputed data frames
SI.imp.complete <- mice::complete(D_LR.SI.im, action="all")
# Find the optimal value of lambda that minimizes the cross-validation error and pool results from multiple imputed data frames
library(glmnet)
my.cvglmnet <- function(data, y, alpha=.5, family="binomial", nfolds=4) {
x = model.matrix(~ KumulierteInfektionen:Altersgruppe2 + ., data=data)
cv.lasso = cv.glmnet(x, y, alpha=alpha, family=family, nfolds=nfolds) # Perform cross-validated elastic net (alpha=1: for lasso regression, 0 for ridge regression)
print(log(c(cv.lasso$lambda.min, cv.lasso$lambda.1se)))
lasso.model = glmnet(x, y, alpha=alpha, family=family, lambda=cv.lasso$lambda.1se) # Final model with lambda.1se
coef(lasso.model) |> as.matrix() |> as.data.frame() |> rownames_to_column()#|> rename(estimate='s0')
#plot(cv.lasso)
#coef(cv.lasso, cv.lasso$lambda.min)
#coef(cv.lasso, cv.lasso$lambda.1se)
}
set.seed(XXXXXXXX)
fit <- SI.imp.complete |>
lapply(my.cvglmnet, y=as.numeric(D_LR.SI$Gruppe2=='Infiziert'))
fits <- reshape::melt.list(fit) |>
group_by(rowname) |> summarise(M = sum(abs(value)>1e-20)) |>
ungroup() |>
filter(M>1)
# Generate final model
# No missing variables in Altersgruppe2 and KumulierteInfektionen
final.model.SI <-
glm(data=D_LR.SI, formula = D_LR.SI$Gruppe2=='Infiziert' ~ Altersgruppe2:KumulierteInfektionen + KumulierteInfektionen + Anam_SARSCoV2_Familie + 1, family=binomial)
summary(final.model.SI)
# final.model.SI <- SI.imp.complete |>
# lapply(glm, formula = D_LR.SI$Gruppe2=='Infiziert' ~ Altersgruppe2:KumulierteInfektionen + KumulierteInfektionen + Anam_SARSCoV2_Familie + 1, family=binomial)
# summary(final.model.SI |> pool())
library(car)
model.SI.lrtest <- Anova(final.model.SI, type='III') # likelihood-ratio tests
model.SI.lrtest
tbl_regression(final.model.SI, exponentiate=TRUE) # odds ratio calculation
# Check for multicollinearity
library(easystats)
x <- check_collinearity(final.model.SI)
x
plot(x) + coord_flip()
x <- check_autocorrelation(final.model.SI)
x
```
## Cross-validated elastic net for overall infection
```{r OI_model}
# Use all imputed data frames
OI.imp.complete <- mice::complete(D_LR.OI.im, action="all")
# Find the optimal value of lambda that minimizes the cross-validation error and pool results from multiple imputed data frames
set.seed(XXXXXXXX)
fit <- OI.imp.complete |>
lapply(my.cvglmnet, y=as.numeric(D_LR.OI$Gruppe2=='Infiziert'))
fits <- reshape::melt.list(fit) |>
group_by(rowname) |> summarise(M = sum(abs(value)>1e-20)) |>
ungroup() |>
filter(M>1)
# Generate final model
#final.model.OI <- OI.imp.complete |>
# lapply(glm, formula = D_LR.OI$Gruppe2=='Infiziert' ~ Anam_SARSCoV2_Familie + KumulierteInfektionen:Altersgruppe2 + KumulierteInfektionen + Wave_descr + 1, family=binomial)
#summary(final.model.OI |> pool())
final.model.OI <- with(D_LR.OI.im, glm(D_LR.OI$Gruppe2=='Infiziert' ~ Anam_SARSCoV2_Familie + KumulierteInfektionen:Altersgruppe2 + KumulierteInfektionen + Wave_descr + 1, family=binomial))
# final.model.OI |> pool() |> mice::tidy(exponentiate=TRUE, conf.int=TRUE, conf.level=0.95)
library(car)
final.model.OI_complete <-
glm(data=D_LR.OI, formula = D_LR.OI$Gruppe2=='Infiziert' ~ Anam_SARSCoV2_Familie + KumulierteInfektionen:Altersgruppe2 + KumulierteInfektionen + Wave_descr + 1, family=binomial)
model.OI.lrtest <- Anova(final.model.OI_complete, type='III') # likelihood-ratio tests
model.OI.lrtest
tbl_regression(final.model.OI_complete, exponentiate=TRUE) # odds ratio calculation
# Check for multicollinearity
library(easystats)
x <- check_collinearity(final.model.OI_complete)
x
plot(x) + coord_flip()
x <- check_autocorrelation(final.model.OI_complete)
x
```
## Table 3 Results of logistic regressions for associations with overall and silent infections
```{r table3}
gt_r1 <- tbl_regression(final.model.OI, exponentiate=TRUE)
gt_r2 <- tbl_regression(final.model.SI, exponentiate=TRUE)
theme_gtsummary_compact()
reg_all <- tbl_merge(
list(gt_r1, gt_r2),
tab_spanner = c("**Known and silent infections**", "**Silent infections**")
)
reg_all |> as_flex_table() |> save_as_html(path="tables/Table_3.html")
```
```{r bib, include=FALSE}
# KEEP THIS AT THE END OF THE DOCUMENT TO GENERATE A LOCAL bib FILE FOR PKGS USED
knitr::write_bib(sub("^package:", "", grep("package", search(), value=TRUE)), file='skeleton.bib')
```