# 01_Multieffects_Multimodel_v4.R
# PeerJ-CS, "Personality-based pair programming"
#
# This script:
#   1) Loads the combined dataset (Stats_WS2021+SS2022_Ready.xlsx).
#   2) Prepares the data for mixed-effects modeling (e.g., pivot to long).
#   3) Fits mixed-effects models with two different baseline roles (Pilot, Solo).
#   4) Produces a boxplot of IntrinsicMotivation by Role.
#   5) Saves model summaries and a PDF with diagnostic plots.

# ------------------------------------------------
# Install and load required packages
# ------------------------------------------------
required_packages <- c("openxlsx", "nlme", "dplyr", "tidyr", "ggplot2", "emmeans", "MuMIn")
new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) {
  install.packages(new_packages, repos = "https://cloud.r-project.org")
}
lapply(required_packages, require, character.only = TRUE)

# ------------------------------------------------
# Define the dataset path and output files
# ------------------------------------------------
dataset <- "Stats_WS2021+SS2022_Ready.xlsx"

results_file_interaction_pilot  <- "Model_Summaries_InteractionBigFive_Pilot.txt"
results_file_interaction_solo   <- "Model_Summaries_InteractionBigFive_Solo.txt"
results_file_fixed_pilot        <- "Model_Summaries_BigFiveFixedEffects_Pilot.txt"
results_file_fixed_solo         <- "Model_Summaries_BigFiveFixedEffects_Solo.txt"
results_file_cluster_pilot      <- "Model_Summaries_PersonalityCluster_Pilot.txt"
results_file_cluster_solo       <- "Model_Summaries_PersonalityCluster_Solo.txt"
graphical_file                  <- "S1_Graphical_Diagnostics.pdf"

# Clear previous outputs
cat("", file = results_file_interaction_pilot,  append = FALSE)
cat("", file = results_file_interaction_solo,   append = FALSE)
cat("", file = results_file_fixed_pilot,        append = FALSE)
cat("", file = results_file_fixed_solo,         append = FALSE)
cat("", file = results_file_cluster_pilot,      append = FALSE)
cat("", file = results_file_cluster_solo,       append = FALSE)

# ------------------------------------------------
# Open graphical output (PDF) for plots
# ------------------------------------------------
pdf(graphical_file)

# ------------------------------------------------
# Step 1: Load and prepare the dataset
# ------------------------------------------------
Stats <- openxlsx::read.xlsx(dataset)
cat("Dataset:", dataset, "has", nrow(Stats), "rows.\n")

# Convert role columns to factors
role_columns <- c("Role_01", "Role_02", "Role_03", "Role_04", "Role_05", "Role_06")
Stats[role_columns] <- lapply(Stats[role_columns], as.factor)

# Create a PersonalityCluster column (dominant trait)
Stats <- Stats %>%
  rowwise() %>%
  mutate(
    PersonalityCluster = case_when(
      B5_O == max(c(B5_O, B5_C, B5_E, B5_A, B5_N)) ~ "Openness",
      B5_C == max(c(B5_O, B5_C, B5_E, B5_A, B5_N)) ~ "Conscientiousness",
      B5_E == max(c(B5_O, B5_C, B5_E, B5_A, B5_N)) ~ "Extraversion",
      B5_A == max(c(B5_O, B5_C, B5_E, B5_A, B5_N)) ~ "Agreeableness",
      B5_N == max(c(B5_O, B5_C, B5_E, B5_A, B5_N)) ~ "Neuroticism"
    )
  ) %>%
  ungroup()

Stats$PersonalityCluster <- factor(Stats$PersonalityCluster, 
                                   levels = c("Openness", "Conscientiousness", 
                                              "Extraversion", "Agreeableness", 
                                              "Neuroticism"))

# ------------------------------------------------
# Step 2: Pivot data to long format for mixed-effects models
# ------------------------------------------------
long_data <- Stats %>%
  pivot_longer(
    cols = starts_with("INNER_R"), 
    names_to = "Round", 
    values_to = "IntrinsicMotivation"
  ) %>%
  mutate(
    Role = case_when(
      Round == "INNER_R1" ~ as.character(Role_01),
      Round == "INNER_R2" ~ as.character(Role_02),
      Round == "INNER_R3" ~ as.character(Role_03),
      Round == "INNER_R4" ~ as.character(Role_04),
      Round == "INNER_R5" ~ as.character(Role_05),
      Round == "INNER_R6" ~ as.character(Role_06)
    ),
    # Convert "1","2","3" to factor labels Pilot, Solo, Navigator
    Role = factor(Role, levels = c("1", "2", "3"),
                  labels = c("Pilot", "Solo", "Navigator"))
  )

# ------------------------------------------------
# Step 3: Function to relevel baseline and fit models
# ------------------------------------------------
fit_models <- function(baseline, results_file_interaction, 
                       results_file_fixed, results_file_cluster) {
  
  # Relevel Role
  long_data$Role <- relevel(long_data$Role, ref = baseline)
  
  # Model 1: Big Five interaction
  model_interaction <- lme(
    fixed = IntrinsicMotivation ~ Role * (B5_O + B5_C + B5_E + B5_A + B5_N),
    random = ~ 1 | Student_ID,
    data = long_data,
    method = "REML"
  )
  capture.output(summary(model_interaction), file = results_file_interaction)

  # ------------------------------------------------
  # Step 4: Obtain Marginal and Conditional R^2 for the LME Models
  # ------------------------------------------------
  # Marginal R^2: Proportion of variance explained by the fixed effects alone.
  # Conditional R^2: Proportion of variance explained by both fixed effects and random effects (i.e., the entire model).
  r2_values <- r.squaredGLMM(model_interaction)
  capture.output(
    paste("Marginal R^2 =", round(r2_values[1], 3),
          "| Conditional R^2 =", round(r2_values[2], 3)),
    file = results_file_interaction,
    append = TRUE
  )
  
  # Model 2: Big Five fixed effects
  model_fixed <- lme(
    fixed = IntrinsicMotivation ~ Role + B5_O + B5_C + B5_E + B5_A + B5_N,
    random = ~ 1 | Student_ID,
    data = long_data,
    method = "REML"
  )
  capture.output(summary(model_fixed), file = results_file_fixed)
  
  r2_values <- r.squaredGLMM(model_fixed)
  capture.output(
    paste("Marginal R^2 =", round(r2_values[1], 3),
          "| Conditional R^2 =", round(r2_values[2], 3)),
    file = results_file_fixed,
    append = TRUE
  )
  
  # Model 3: PersonalityCluster effects
  model_cluster <- lme(
    fixed = IntrinsicMotivation ~ Role * PersonalityCluster,
    random = ~ 1 | Student_ID,
    data = long_data,
    method = "REML"
  )
  capture.output(summary(model_cluster), file = results_file_cluster)
  
  r2_values <- r.squaredGLMM(model_interaction)
  capture.output(
    paste("Marginal R^2 =", round(r2_values[1], 3),
          "| Conditional R^2 =", round(r2_values[2], 3)),
    file = results_file_cluster,
    append = TRUE
  )
  
  # Return the fitted model so we can use it outside this function
  return(model_interaction)
}

# ------------------------------------------------
# Step 5: Run models with different baselines
# ------------------------------------------------
# Baseline = Pilot
model_int_pilot <- fit_models("Pilot", results_file_interaction_pilot, 
           results_file_fixed_pilot, results_file_cluster_pilot)

# Baseline = Solo
fit_models("Solo", results_file_interaction_solo, 
           results_file_fixed_solo, results_file_cluster_solo)

# ------------------------------------------------
# Step 6: Visualization
# ------------------------------------------------
# 6.1 Plot boxplot of raw data
p <- ggplot(long_data, aes(x = Role, y = IntrinsicMotivation, fill = Role)) +
  geom_boxplot(outlier.shape = NA) +
  stat_summary(fun=mean, geom="point", shape=23, size=3, fill="white") + 
  theme_minimal() +
  labs(
    title = paste("Intrinsic Motivation by Programming Role -", dataset),
    x = "Programming Role",
    y = "Intrinsic Motivation (7-35)"
  )
print(p)

# 6.2 Linear prediction of effects at ±1 SD of particular B5, e.g., Openness
# => The plot confirms that +1 SD openness individuals will especially favor Pilot
#   (or show a larger difference between Pilot and Solo/Navigator).
#   It also shows that the Solo role is not as demotivating for those who are low
#   in Openness—everyone is fairly close together in the top facet. 
#   But for those with high Openness, Solo is distinctly lower than Pilot.
mean_B5O <- mean(long_data$B5_O, na.rm = TRUE)
sd_B5O   <- sd(long_data$B5_O, na.rm = TRUE)

openness_plot <- emmip(model_int_pilot, B5_O ~ Role,
                       at = list(B5_O = c(mean_B5O - sd_B5O, mean_B5O + sd_B5O)),
                       CIs = TRUE)
emm_openness <- emmeans(model_int_pilot, specs = ~ Role | B5_O,
                        at = list(B5_O = c(mean_B5O - sd_B5O, mean_B5O + sd_B5O)))

print(openness_plot)
print(plot(emm_openness, comparison=TRUE))

# 6.3 Q–Q Plot of Residuals and Residuals vs. Fitted:
qqnorm(model_int_pilot, ~ resid(., type = "p"), main = "Q–Q Plot of Residuals")
plot(model_int_pilot, which = 1, main = "Residuals vs. Fitted")  # sometimes available in 'nlme' or you can do:

dev.off()  # Close the PDF device

cat("Analysis complete. Results saved in:\n",
    results_file_interaction_pilot, "\n",
    results_file_interaction_solo,  "\n",
    results_file_fixed_pilot,       "\n",
    results_file_fixed_solo,        "\n",
    results_file_cluster_pilot,     "\n",
    results_file_cluster_solo,      "\n",
    graphical_file, "\n")