# Load ncessary packages ----------------------------------------------- install.packages("readbulk") install.packages("foreign") install.packages("tidyr") install.packages("dplyr") install.packages("ez") install.packages("apaTables") install.packages ("tidyverse") install.packages("readxl") library(rstudioapi) # used for some file working directory stuff at the begining of every script you use library(readbulk) # used for import and combining many data files into one big data file library(ggplot2) # used for creating plots and graphs library(foreign) # used for importing non-typical data into R, including SPSS data files library(tidyr) # used for wrangling and tidying data into the formats and shapes you want it library(dplyr) # same as tidyr library(tidyverse) library(readxl) # Set working directory to source script location ------------------------- dir.name <- dirname(rstudioapi::getActiveDocumentContext()$path) setwd(dir.name) getwd() # Import data ------------------------------------------------------------- # Know that you can always read about how to use any package or function in the # R-Studio 'Help' tab. Simply enter the name of the pakage or funciton in the search bar. # readbulk will append multiple data files into a single data frame to be used in R. # When we use loaded package in a script, it is good practice to prepend the use of the function with the name of the package # Below we will load a whole study's worth of participant attentional bias data. data <- readbulk::read_bulk(extension = "MalleabilityUSYD2_raw.dat", fun = utils::read.delim, verbose = TRUE) write.csv(data, file = "C:/Users/justine.macgoris/OneDrive/Bureau/data_raw.csv") data <- data_raw # Generate summary stats of data ------------------------------------------ # Once we have our large data frame we may wish to get some summary stats to make sure we imported things correctly # First we may simply wish to see a summary of the data we have str(data) # We can also ensure we have the right number of participants imported by simply counting them count(distinct(data_raw_new1,subject)) # Next we may wish to ensure that the participants we imported have all their data, or determine whether any participants are missing trial data # To do this, we will use as series of dplyr functions all in one line of code. We dont have to do this, but it makes things quicker to type and easier to read :) # We can do this using the %>% symbols, which tells R to follow one command on with another. # See if you can tell what each line of code here is doing trial.summary <- data %>% dplyr::group_by(subject, blockcode) %>% dplyr::summarise(num_trials = n()) trial.summary #-------------------------------------------------------------------------------------- #ATTENTIONAL BIAS #-------------------------------------------------------------------------------------- ABdata <- data_raw_new1 %>% filter(blocknum == 13| blocknum == 14|blocknum ==15|blocknum ==16| blocknum==17 ) %>% filter(stimulusitem1 == "+" & subject != "45" ) %>% .[c(1:4, 6:9, 14:16, 18)] trial.summary <- ABdata %>% dplyr::group_by(subject, blockcode) %>% dplyr::summarise(num_trials = n()) trial.summary ABdata$trialcode <- as.character (ABdata$trialcode) #ABdata$category <- substr(ABdata$trialcode, start=1, stop=2) ABdata$congruency <- ifelse(grepl("NeutPainB",ABdata$trialcode),'congruent', ifelse(grepl("zNeutPainB", ABdata$trialcode), 'congruent', ifelse(grepl("xNeutPainB", ABdata$trialcode), 'congruent', ifelse(grepl("PainNeutT",ABdata$trialcode),'congruent', ifelse(grepl("zPainNeutT",ABdata$trialcode),'congruent', ifelse(grepl("xPainNeutT",ABdata$trialcode),'congruent', ifelse(grepl("NeutPainT",ABdata$trialcode),'incongruent', ifelse(grepl("zNeutPainT",ABdata$trialcode),'incongruent', ifelse(grepl("xNeutPainT",ABdata$trialcode),'incongruent', ifelse(grepl("PainNeutB",ABdata$trialcode),'incongruent', ifelse(grepl("zPainNeutB",ABdata$trialcode),'incongruent', ifelse(grepl("xPainNeutB",ABdata$trialcode),'incongruent','error')))))))))))) #works out which order participants did training in # AT = away, then towards pain # TA = towards, then away from pain trainingorder <- ABdata %>% filter(blocknum == 14) %>% .[c(4,7)] %>% distinct(subject, blockcode) trainingorder$blockcode <- as.character(trainingorder$blockcode) trainingorder$blockcode <- substr(trainingorder$blockcode, 1, nchar(trainingorder$blockcode)-1) trainingorder$ABMorder <- ifelse(grepl("BiasTrainTowards",trainingorder$blockcode),'TA', ifelse(grepl("BiasTrainAway", trainingorder$blockcode), 'AT','error')) trainingorder <-trainingorder[c(1,3)] #looks at time order of attentional bias blocks # pre = first assessment block completed, pre training # mid = assessment block between the two training blocks # end = final assessment block AB.pre <- ABdata %>% filter(blocknum == 13) AB.mid <- ABdata %>% filter(blocknum == 15) AB.end <- ABdata %>% filter(blocknum == 17) ################# #preblock ################# #filtering out incorrect trials #filtering out responses <150ms or >2000ms; can delete if not wanted AB.correct.pre <- AB.pre %>% filter (correct == 1) %>% filter (latency < 2000) %>% filter (latency > 200) #counting the number of rows ntrials <- nrow(AB.pre) #counting the number of CORRECT rows ntrialscorrect <- nrow(AB.correct.pre) #percentage of trials removed from original data frame for results section #note this currently is after removing both incorrect trials and responses <150ms/>2000ms nrow(AB.pre) - nrow(AB.correct.pre) 100-((nrow(AB.correct.pre)/nrow(AB.pre))*100) #counting number of rows per person ntrialsperson.pre <- AB.pre %>% group_by(subject) %>% summarise(num_trials = n()) #counting number of correct rows per person ntrialscorrectperson.pre <-AB.correct.pre %>% group_by(subject) %>% summarise(num_trials = n()) #merge the two (after renaming num_trials which appears in both sets) names(ntrialscorrectperson.pre)[names(ntrialscorrectperson.pre)=="num_trials"] <- "num_trials_corr" percent.correct.person.pre <- merge(ntrialsperson.pre, ntrialscorrectperson.pre) #calculate percent correct as a variable in data "percent.correct.person" percent.correct.person.pre$per.corr <- ((percent.correct.person.pre$num_trials_corr/percent.correct.person.pre$num_trials)*100) #Exclude participants with <75% accuracy perc.corr.pre <- percent.correct.person.pre[c(1,4)] AB.correct.pre <- merge(AB.correct.pre, perc.corr.pre, by = c("subject")) AB.correct.pre <- filter(AB.correct.pre, per.corr >= 75) #report how many participants were excluded from this process corr.personout.pre <- perc.corr.pre %>% filter(per.corr >=75) #counting the number of rows npeople <- nrow(perc.corr.pre) #counting the number of CORRECT rows npeoplecorrect.pre <- nrow(corr.personout.pre) #percentage of trials removed from original data frame for results section #note this currently is after removing both incorrect trials and responses <150ms/>2000ms nrow(perc.corr.pre) - nrow(corr.personout.pre) 100-((nrow(corr.personout.pre)/nrow(perc.corr.pre))*100) ###OUTLIER ANALYSIS### #can choose to just use mean or median, and 2, 2.5, or 3 deviations. Currently gives all combos descriptives.pre = group_by(AB.correct.pre, subject, trialcode) %>% summarise(mad = mad(latency), median = median(latency), mean = mean(latency), sd = sd(latency)) %>% mutate(lower_MAD25 = median - 2.5*mad, upper_MAD25 = median + 2.5*mad) #only select data above the lower bound and below the upper bound: joined_dat_MAD25.pre = left_join(AB.correct.pre, descriptives.pre, by = c("subject", "trialcode")) %>% filter(latency > lower_MAD25, latency < upper_MAD25) # raw scores with outliers removed (e.g. for calculating reliability indices) write.csv(joined_dat_MAD25.pre, file = "C:/Users/justine.macgoris/OneDrive/Bureau/ABpre_mad_outliersremoved_last23.csv",sep = ";", quote = F, row.names = F) #GROUP DATA #creating matrices with means/medians per cell (trialcode) for each particpant: #This is using the mean of the latency scores for each participant, #but the data cleaning varies depending on MAD or SD based, and the deviations from the mean/median (2, 2.5, 3) matrix_madout25.pre = joined_dat_MAD25.pre %>% group_by(subject, congruency, trialcode) %>% summarise(mean_MadExcl25 = mean(latency), N_MadExcl25 = n()) #this is for calculating an overall attentional bias if go for joined analysis. Otherwise redundant matrix_madout25_overall.pre = joined_dat_MAD25.pre %>% group_by(subject, congruency) %>% summarise(mean = mean(latency), n = n()) #Percentage of excluded trials (this is total, across incorrect and MADout) ntrials <- nrow(AB.pre) #counting the number of CORRECT rows ntrialsmadout.pre <- nrow(joined_dat_MAD25.pre) #percentage of trials removed from original data frame for results section #note this currently is after removing incorrect trials, NOT responses <150ms/>2000ms nrow(AB.pre) - nrow(joined_dat_MAD25.pre) 100-((nrow(joined_dat_MAD25.pre)/nrow(AB.pre))*100) #______________________________________________________________________________ #calculate Attentional Bias Index AB.mean.congruency.pre <- matrix_madout25.pre %>% group_by(subject, congruency) %>% summarise(mean = mean(mean_MadExcl25), n = sum(N_MadExcl25)) %>% .[c("subject", "congruency", "mean")] #this removes n; if I need this later then I can put back in, but then need to change the code below to make data sensible AB.pre.congruency <- matrix_madout25_overall.pre %>% .[c("subject", "congruency", "mean")] AB.pre.final <- spread (AB.pre.congruency, congruency, mean) AB.pre.final$ABpre <- AB.pre.final$incongruent - AB.pre.final$congruent #This line cuts out the congruent/incongruent for easy merging. AB.pre.final <- AB.pre.final[c(1, 4)] write.csv(AB.pre.final, file = "C:/Users/justine.macgoris/OneDrive/Bureau/ABpre_last23.csv") ########################################################## #AB mid block ########################################################### #filtering out incorrect trials #filtering out responses <150ms or >2000ms; can delete if not wanted AB.correct.mid <- AB.mid %>% filter (correct == 1) %>% filter (latency < 2000) %>% filter (latency > 200) #counting the number of rows ntrials <- nrow(AB.mid) #counting the number of CORRECT rows ntrialscorrect <- nrow(AB.correct.mid) #percentage of trials removed from original data frame for results section #note this currently is after removing both incorrect trials and responses <150ms/>2000ms nrow(AB.mid) - nrow(AB.correct.mid) 100-((nrow(AB.correct.mid)/nrow(AB.mid))*100) #counting number of rows per person ntrialsperson.mid <- AB.mid %>% group_by(subject) %>% summarise(num_trials = n()) #counting number of correct rows per person ntrialscorrectperson.mid <-AB.correct.mid %>% group_by(subject) %>% summarise(num_trials = n()) #merge the two (after renaming num_trials which appears in both sets) names(ntrialscorrectperson.mid)[names(ntrialscorrectperson.mid)=="num_trials"] <- "num_trials_corr" percent.correct.person.mid <- merge(ntrialsperson.mid, ntrialscorrectperson.mid) #calculate percent correct as a variable in data "percent.correct.person" percent.correct.person.mid$per.corr <- ((percent.correct.person.mid$num_trials_corr/percent.correct.person.mid$num_trials)*100) #Exclude participants with <75% accuracy perc.corr.mid <- percent.correct.person.mid[c(1,4)] AB.correct.mid <- merge(AB.correct.mid, perc.corr.mid, by = c("subject")) AB.correct.mid <- filter(AB.correct.mid, per.corr >= 75) #report how many participants were excluded from this process corr.personout.mid <- perc.corr.mid %>% filter(per.corr >=75) #counting the number of rows npeople <- nrow(perc.corr.mid) #counting the number of CORRECT rows npeoplecorrect.mid <- nrow(corr.personout.mid) #percentage of trials removed from original data frame for results section #note this currently is after removing both incorrect trials and responses <150ms/>2000ms nrow(perc.corr.mid) - nrow(corr.personout.mid) 100-((nrow(corr.personout.mid)/nrow(perc.corr.mid))*100) ###OUTLIER ANALYSIS### #can choose to just use mean or median, and 2, 2.5, or 3 deviations. Currently gives all combos descriptives.mid = group_by(AB.correct.mid, subject, trialcode) %>% summarise(mad = mad(latency), median = median(latency), mean = mean(latency), sd = sd(latency)) %>% mutate(lower_MAD25 = median - 2.5*mad, upper_MAD25 = median + 2.5*mad) #only select data above the lower bound and below the upper bound: joined_dat_MAD25.mid = left_join(AB.correct.mid, descriptives.mid, by = c("subject", "trialcode")) %>% filter(latency > lower_MAD25, latency < upper_MAD25) # raw scores with outliers removed (e.g. for calculating reliability indices) write.csv(joined_dat_MAD25.mid,file = "C:/Users/justine.macgoris/OneDrive/Bureau//ABmid_mad_outliersremoved_last23.csv") #GROUP DATA #creating matrices with means/medians per cell (trialcode) for each particpant: #This is using the mean of the latency scores for each participant, #but the data cleaning varies depending on MAD or SD based, and the deviations from the mean/median (2, 2.5, 3) matrix_madout25.mid = joined_dat_MAD25.mid %>% group_by(subject, congruency, trialcode) %>% summarise(mean_MadExcl25 = mean(latency), N_MadExcl25 = n()) #this is for calculating an overall attentional bias if go for joined analysis. Otherwise redundant matrix_madout25_overall.mid = joined_dat_MAD25.mid %>% group_by(subject, congruency) %>% summarise(mean = mean(latency), n = n()) #Percentage of excluded trials (this is total, across incorrect and MADout) ntrials <- nrow(AB.mid) #counting the number of CORRECT rows ntrialsmadout.mid <- nrow(joined_dat_MAD25.mid) #percentage of trials removed from original data frame for results section #note this currently is after removing incorrect trials, NOT responses <150ms/>2000ms nrow(AB.mid) - nrow(joined_dat_MAD25.mid) 100-((nrow(joined_dat_MAD25.mid)/nrow(AB.mid))*100) #______________________________________________________________________________ #calculate Attentional Bias Index AB.mean.congruency.mid <- matrix_madout25.mid %>% group_by(subject, congruency) %>% summarise(mean = mean(mean_MadExcl25), n = sum(N_MadExcl25)) %>% .[c("subject", "congruency", "mean")] #this removes n; if I need this later then I can put back in, but then need to change the code below to make data sensible AB.mid.congruency <- matrix_madout25_overall.mid %>% .[c("subject", "congruency", "mean")] AB.mid.final <- spread (AB.mid.congruency, congruency, mean) AB.mid.final$ABmid <- AB.mid.final$incongruent - AB.mid.final$congruent #This line cuts out the congruent/incongruent for easy merging. AB.mid.final <- AB.mid.final[c(1, 4)] write.csv(AB.mid.final, file = "C:/Users/justine.macgoris/OneDrive/Bureau//ABmid_last23.csv") ########################################################## #AB end block ########################################################### #filtering out incorrect trials #filtering out responses <150ms or >2000ms; can delete if not wanted AB.correct.end <- AB.end %>% filter (correct == 1) %>% filter (latency < 2000) %>% filter (latency > 200) #counting the number of rows ntrials <- nrow(AB.end) #counting the number of CORRECT rows ntrialscorrect <- nrow(AB.correct.end) #percentage of trials removed from original data frame for results section #note this currently is after removing both incorrect trials and responses <150ms/>2000ms nrow(AB.end) - nrow(AB.correct.end) 100-((nrow(AB.correct.end)/nrow(AB.end))*100) #counting number of rows per person ntrialsperson.end <- AB.end %>% group_by(subject) %>% summarise(num_trials = n()) #counting number of correct rows per person ntrialscorrectperson.end <-AB.correct.end %>% group_by(subject) %>% summarise(num_trials = n()) #merge the two (after renaming num_trials which appears in both sets) names(ntrialscorrectperson.end)[names(ntrialscorrectperson.end)=="num_trials"] <- "num_trials_corr" percent.correct.person.end <- merge(ntrialsperson.end, ntrialscorrectperson.end) #calculate percent correct as a variable in data "percent.correct.person" percent.correct.person.end$per.corr <- ((percent.correct.person.end$num_trials_corr/percent.correct.person.end$num_trials)*100) #Exclude participants with <75% accuracy perc.corr.end <- percent.correct.person.end[c(1,4)] AB.correct.end <- merge(AB.correct.end, perc.corr.end, by = c("subject")) AB.correct.end <- filter(AB.correct.end, per.corr >= 75) #report how many participants were excluded from this process corr.personout.end <- perc.corr.end %>% filter(per.corr >=75) #counting the number of rows npeople <- nrow(perc.corr.end) #counting the number of CORRECT rows npeoplecorrect.end <- nrow(corr.personout.end) #percentage of trials removed from original data frame for results section #note this currently is after removing both incorrect trials and responses <150ms/>2000ms nrow(perc.corr.end) - nrow(corr.personout.end) 100-((nrow(corr.personout.end)/nrow(perc.corr.end))*100) ###OUTLIER ANALYSIS### #can choose to just use mean or median, and 2, 2.5, or 3 deviations. Currently gives all combos descriptives.end = group_by(AB.correct.end, subject, trialcode) %>% summarise(mad = mad(latency), median = median(latency), mean = mean(latency), sd = sd(latency)) %>% mutate(lower_MAD25 = median - 2.5*mad, upper_MAD25 = median + 2.5*mad) #only select data above the lower bound and below the upper bound: joined_dat_MAD25.end = left_join(AB.correct.end, descriptives.end, by = c("subject", "trialcode")) %>% filter(latency > lower_MAD25, latency < upper_MAD25) # raw scores with outliers removed (e.g. for calculating reliability indices) write.csv(joined_dat_MAD25.end, file = "C:/Users/justine.macgoris/OneDrive/Bureau/ABend_mad_outliersremoved_last23.csv") #GROUP DATA #creating matrices with means/medians per cell (trialcode) for each particpant: #This is using the mean of the latency scores for each participant, #but the data cleaning varies depending on MAD or SD based, and the deviations from the mean/median (2, 2.5, 3) matrix_madout25.end = joined_dat_MAD25.end %>% group_by(subject, congruency, trialcode) %>% summarise(mean_MadExcl25 = mean(latency), N_MadExcl25 = n()) #this is for calculating an overall attentional bias if go for joined analysis. Otherwise redundant matrix_madout25_overall.end = joined_dat_MAD25.end %>% group_by(subject, congruency) %>% summarise(mean = mean(latency), n = n()) #Percentage of excluded trials (this is total, across incorrect and MADout) ntrials <- nrow(AB.end) #counting the number of CORRECT rows ntrialsmadout.end <- nrow(joined_dat_MAD25.end) #percentage of trials removed from original data frame for results section #note this currently is after removing incorrect trials, NOT responses <150ms/>2000ms nrow(AB.end) - nrow(joined_dat_MAD25.end) 100-((nrow(joined_dat_MAD25.end)/nrow(AB.end))*100) #______________________________________________________________________________ #calculate Attentional Bias Index AB.mean.congruency.end <- matrix_madout25.end %>% group_by(subject, congruency) %>% summarise(mean = mean(mean_MadExcl25), n = sum(N_MadExcl25)) %>% .[c("subject", "congruency", "mean")] #this removes n; if I need this later then I can put back in, but then need to change the code below to make data sensible AB.end.congruency <- matrix_madout25_overall.end %>% .[c("subject", "congruency", "mean")] AB.end.final <- spread (AB.end.congruency, congruency, mean) AB.end.final$ABend <- AB.end.final$incongruent - AB.end.final$congruent #This line cuts out the congruent/incongruent for easy merging. AB.end.final <- AB.end.final[c(1, 4)] write.csv(AB.end.final, file = "C:/Users/justine.macgoris/OneDrive/Bureau//ABend_final23.csv") ############################################################################### ############################################################################### ############################################################################### #merge together ABdata <- full_join(AB.pre.final, AB.mid.final, by = c("subject")) ABdata <- full_join(ABdata, AB.end.final, by = c("subject")) ABdata <- full_join(ABdata, trainingorder, by = c("subject")) ABdata$ABdiff1 <- ABdata$ABmid - ABdata$ABpre ABdata$ABdiff2 <- ABdata$ABend - ABdata$ABmid ABdata$ABabsflex <- abs(ABdata$ABdiff1) + abs(ABdata$ABdiff2) ABdata$ABtowarddiff <- ifelse((ABdata$ABMorder == "TA"), ABdata$ABdiff1, ifelse((ABdata$ABMorder == "AT"), ABdata$ABdiff2, "error")) ABdata$ABtoward <- ifelse((ABdata$ABMorder == "TA"), ABdata$ABmid, ifelse((ABdata$ABMorder == "AT"), ABdata$ABend, "error")) ABdata$ABawayddiff <- ifelse((ABdata$ABMorder == "AT"), ABdata$ABdiff1, ifelse((ABdata$ABMorder == "TA"), ABdata$ABdiff2, "error")) ABdata$ABaway <- ifelse((ABdata$ABMorder == "AT"), ABdata$ABmid, ifelse((ABdata$ABMorder == "TA"), ABdata$ABend, "error")) #save final dataset write.csv(ABdata, file = "C:/Users/justine.macgoris/OneDrive/Bureau//ABdata_LASTVERSION23.csv")