# 03_Descriptives_BigFive.R # PeerJ-CS, "Personality-based pair programming" # This script: # 1) Reads the Big Five averages dataset (Stats_B5Avg). # 2) Computes descriptive statistics (descriptives + Shapiro–Wilk). # 3) Loads the main "Ready" dataset for demographic info. # 4) Summarizes demographics. # ------------------------------------------------ # Install and load required libraries # ------------------------------------------------ required_packages <- c("openxlsx", "psych", "dplyr") new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])] if(length(new_packages)) { install.packages(new_packages) } lapply(required_packages, require, character.only = TRUE) # ------------------------------------------------ # Read the Big Five averages per participant # (Created in "00_Stats_Computation_v4.R", step 3) # ------------------------------------------------ b5avg_file <- "Stats_WS2021+SS2022_B5Avg.xlsx" Stats_B5Avg <- read.xlsx(b5avg_file) cat("Loaded Stats_B5Avg with", nrow(Stats_B5Avg), "rows.\n") # ------------------------------------------------ # Big Five Descriptives & Normality Tests # ------------------------------------------------ b5_vars <- c("B5_O", "B5_C", "B5_E", "B5_A", "B5_N") descriptives_file <- "Descriptives_BigFive.txt" cat("", file = descriptives_file, append = FALSE) # Clear previous content for (v in b5_vars) { # Extract the vector of trait scores trait_scores <- Stats_B5Avg[[v]] # Descriptive statistics (psych::describe gives n, mean, sd, median, min, max, skew, kurtosis) desc_results <- psych::describe(trait_scores) # Shapiro–Wilk normality test sw_results <- shapiro.test(trait_scores) # Print results to console cat("\n=== ", v, " ===\n", sep="") cat("Descriptive statistics:\n") print(desc_results) cat("Shapiro–Wilk normality test:\n") print(sw_results) # Append the same results to the output file capture.output( list( Trait = v, DescriptiveStatistics = desc_results, ShapiroWilkTest = sw_results ), file = descriptives_file, append = TRUE ) } cat("\nDescriptive statistics and Shapiro–Wilk tests completed.\n") cat("Results saved in:", descriptives_file, "\n") # ------------------------------------------------ # DEMOGRAPHICS # ------------------------------------------------ # Here we read the "Ready" dataset created in "00_Stats_Computation_v4.R" step 4, # which contains Student_ID, Experience_yrs, Gender, etc. df_file <- "Stats_WS2021+SS2022_Ready.xlsx" df <- read.xlsx(df_file) cat("\nLoaded df with", nrow(df), "rows for demographics.\n") # 1) Keep only unique participants, ignoring duplicates df_unique <- df %>% distinct(Student_ID, .keep_all = TRUE) # 2) Report the total number of unique participants n_participants <- nrow(df_unique) cat("Number of unique participants:", n_participants, "\n") # 3) Compute mean (and SD) of experience in years mean_experience <- mean(df_unique$Experience_yrs, na.rm = TRUE) sd_experience <- sd(df_unique$Experience_yrs, na.rm = TRUE) cat("Avg. years of experience:", mean_experience, "SD:", sd_experience, "\n") # 4) Count gender distribution gender_counts <- table(df_unique$Gender) cat("Gender distribution:\n") print(gender_counts) cat("\nDemographic summary completed.\n")