library(outbreaks) library(tidyverse) library(plyr) library(mice) library(caret) library(purrr) H7N9 <- read.csv("C:/eksperimen/eksperimen/ave_inves_1819_4.csv", header=T, dec=",", sep=";") H7N9_backup <- H7N9 H7N9_backup <- fluH7N9_china_2013 head (H7N9) H7N9$luas[which(H7N9$luas == "?")] <- NA #H7N9_gather <- H7N9 %>% mutate(case_id = paste("luas", luas, sep = "_"),luas = as.numeric(luas)) %>% H7N9$case.ID <- paste("code", H7N9$kode_stat, sep = "_") head(H7N9) library(tidyr) H7N9_gather <- H7N9 %>% gather(Group, Date, bai_1819:ndvi_1819) H7N9_gather$Group <- factor(H7N9_gather$Group, levels = c("bai_1819", "ndvi_1819", "csi_1819")) library(plyr) H7N9_gather$Group <- mapvalues(H7N9_gather$Group, from = c("bai_1819", "ndvi_1819", "csi_1819"), to = c("bai_1819", "ndvi_1819", "csi_1819")) #H7N9_gather$nama_kab <- mapvalues(H7N9_gather$nama_kab,from = c("BANYUMAS", "PURBALINGGA", "BANJARNEGARA", "KEBUMEN", #"PURWOREJO", "WONOSOBO", "MAGELANG", "BOYOLALI", "KLATEN", "SUKOHARJO","GROBOGAN","PATI","KUDUS","JEPARA","DEMAK","SEMARANG", #"TEMANGGUNG","KENDAL","BATANG", "PEKALONGAN","PEMALANG","KOTA MAGELANG", #"KOTA SURAKARTA","KOTA SALATIGA","KOTA SEMARANG","KOTA PEKALONGAN"), to = rep("Other", 26)) H7N9_gather$nama_kab <- mapvalues(H7N9_gather$nama_kab,from = c("BANYUMAS", "PURBALINGGA", "BANJARNEGARA", "KEBUMEN", "PURWOREJO", "WONOSOBO", "MAGELANG", "BOYOLALI", "KLATEN", "SUKOHARJO"), to = rep("Other", 10)) levels(H7N9_gather$ddr) <- c(levels(H7N9_gather$ddr), "unknown") H7N9_gather$ddr[is.na(H7N9_gather$ddr)] <- "unknown" head(H7N9_gather) my_theme <- function(base_size = 12, base_family = "sans"){ theme_minimal(base_size = base_size, base_family = base_family) + theme( axis.text = element_text(size = 12), axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.5), axis.title = element_text(size = 14), panel.grid.major = element_line(color = "grey"), panel.grid.minor = element_blank(), panel.background = element_rect(fill = "aliceblue"), strip.background = element_rect(fill = "lightgrey", color = "grey", size = 1), strip.text = element_text(face = "bold", size = 12, color = "black"), legend.position = "bottom", legend.justification = "top", legend.box = "horizontal", legend.box.background = element_rect(colour = "grey50"), legend.background = element_blank(), panel.border = element_rect(color = "grey", fill = NA, size = 0.5) ) } ggplot(data = H7N9_gather, aes(x = Date, y = luas, fill = ddr)) + stat_density2d(aes(alpha = ..level..), geom = "polygon") + geom_jitter(aes(color = ddr, shape = ddr), size = 1.5) + geom_rug(aes(color = ddr)) + scale_y_continuous(limits = c(0, 90)) + labs( fill = "Outcome", color = "Outcome", alpha = "Level", shape = "Gender", x = "Date in 2013", y = "Age", title = "2013 Influenza A H7N9 cases in China", subtitle = "Dataset from 'outbreaks' package (Kucharski et al. 2014)", caption = "" ) + facet_grid(Group ~ nama_kab) + my_theme() + scale_shape_manual(values = c(15, 16, 17)) + scale_color_brewer(palette="Set1", na.value = "grey50") + scale_fill_brewer(palette="Set1") ggplot(data = fluH7N9_china_2013_gather, aes(x = Date, y = age, color = outcome)) + geom_point(aes(color = outcome, shape = gender), size = 1.5, alpha = 0.6) + geom_path(aes(group = case_id)) + facet_wrap( ~ province, ncol = 2) + my_theme() + scale_shape_manual(values = c(15, 16, 17)) + scale_color_brewer(palette="Set1", na.value = "grey50") + scale_fill_brewer(palette="Set1") + labs( color = "Outcome", shape = "Gender", x = "Date in 2013", y = "Age", title = "2013 Influenza A H7N9 cases in China", subtitle = "Dataset from 'outbreaks' package (Kucharski et al. 2014)", caption = "\nTime from onset of flu to outcome." ) dataset <- fluH7N9_china_2013 %>% mutate(hospital = as.factor(ifelse(is.na(date_of_hospitalisation), 0, 1)), gender_f = as.factor(ifelse(gender == "f", 1, 0)), province_Jiangsu = as.factor(ifelse(province == "Jiangsu", 1, 0)), province_Shanghai = as.factor(ifelse(province == "Shanghai", 1, 0)), province_Zhejiang = as.factor(ifelse(province == "Zhejiang", 1, 0)), province_other = as.factor(ifelse(province == "Zhejiang" | province == "Jiangsu" | province == "Shanghai", 0, 1)), days_onset_to_outcome = as.numeric(as.character(gsub(" days", "", as.Date(as.character(date_of_outcome), format = "%Y-%m-%d") - as.Date(as.character(date_of_onset), format = "%Y-%m-%d")))), days_onset_to_hospital = as.numeric(as.character(gsub(" days", "", as.Date(as.character(date_of_hospitalisation), format = "%Y-%m-%d") - as.Date(as.character(date_of_onset), format = "%Y-%m-%d")))), age = age, early_onset = as.factor(ifelse(date_of_onset < summary(fluH7N9_china_2013$date_of_onset)[[3]], 1, 0)), early_outcome = as.factor(ifelse(date_of_outcome < summary(fluH7N9_china_2013$date_of_outcome)[[3]], 1, 0))) %>% subset(select = -c(2:4, 6, 8)) rownames(dataset) <- dataset$case_id dataset[, -2] <- as.numeric(as.matrix(dataset[, -2])) head(dataset) md.pattern(dataset) dataset_impute <- mice(data = dataset[, -2], print = FALSE) datasets_complete <- right_join(dataset[, c(1, 2)], complete(dataset_impute, "long"), by = "case_id") %>% select(-.id) head(datasets_complete) datasets_complete %>% gather(x, y, age:early_outcome) %>% ggplot(aes(x = y, fill = .imp, color = .imp)) + facet_wrap(~ x, ncol = 3, scales = "free") + geom_density(alpha = 0.4) + scale_fill_brewer(palette="Set1", na.value = "grey50") + scale_color_brewer(palette="Set1", na.value = "grey50") + my_theme() train_index <- which(is.na(datasets_complete$outcome)) train_data <- datasets_complete[-train_index, ] test_data <- datasets_complete[train_index, -2] set.seed(42) val_data <- train_data %>% group_by(.imp) %>% nest() %>% mutate(val_index = map(data, ~ createDataPartition(.$outcome, p = 0.7, list = FALSE)), val_train_data = map2(data, val_index, ~ .x[.y, ]), val_test_data = map2(data, val_index, ~ .x[-.y, ])) model_function <- function(df) { caret::train(outcome ~ ., data = df, method = "rf", preProcess = c("scale", "center"), trControl = trainControl(method = "repeatedcv", number = 5, repeats = 3, verboseIter = FALSE))} set.seed(42) val_data_model <- val_data %>% mutate(model = map(val_train_data, ~ model_function(.x)), predict = map2(model, val_test_data, ~ data.frame(prediction = predict(.x, .y[, -2]))), predict_prob = map2(model, val_test_data, ~ data.frame(outcome = .y[, 2], prediction = predict(.x, .y[, -2], type = "prob"))), confusion_matrix = map2(val_test_data, predict, ~ confusionMatrix(.x$outcome, .y$prediction)), confusion_matrix_tbl = map(confusion_matrix, ~ as.tibble(.x$table))) val_data_model %>% unnest(confusion_matrix_tbl) %>% ggplot(aes(x = Prediction, y = Reference, fill = n)) + facet_wrap(~ .imp, ncol = 5, scales = "free") + geom_tile() + my_theme() val_data_model %>% unnest(predict_prob) %>% gather(x, y, prediction.Death:prediction.Recover) %>% ggplot(aes(x = x, y = y, fill = outcome)) + facet_wrap(~ .imp, ncol = 5, scales = "free") + geom_boxplot() + scale_fill_brewer(palette="Set1", na.value = "grey50") + my_theme()