# Load ncessary packages -----------------------------------------------
install.packages("readbulk")
install.packages("foreign")
install.packages("tidyr")
install.packages("dplyr")
install.packages("ez")
install.packages("apaTables")
install.packages ("tidyverse")
install.packages("readxl")

library(rstudioapi) # used for some file working directory stuff at the begining of every script you  use
library(readbulk) # used for import and combining many data files into one big data file
library(ggplot2) # used for creating plots and graphs
library(foreign) # used for importing non-typical data into R, including SPSS data files
library(tidyr) # used for wrangling and tidying data into the formats and shapes you want it
library(dplyr) # same as tidyr
library(tidyverse)
library(readxl)

# Set working directory to source script location -------------------------
dir.name <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(dir.name)
getwd()


# Import data -------------------------------------------------------------
# Know that you can always read about how to use any package or function in the
# R-Studio 'Help' tab. Simply enter the name of the pakage or funciton in the search bar.
# readbulk will append multiple data files into a single data frame to be used in R.
# When we use loaded package in a script, it is good practice to prepend the use of the function with the name of the package
# Below we will load a whole study's worth of participant attentional bias data.
data <- readbulk::read_bulk(extension = "MalleabilityUSYD2_raw.dat", fun = utils::read.delim, verbose = TRUE)
write.csv(data, file = "C:/Users/justine.macgoris/OneDrive/Bureau/data_raw.csv")
data <- data_raw
# Generate summary stats of data ------------------------------------------
# Once we have our large data frame we may wish to get some summary stats to make sure we imported things correctly
# First we may simply wish to see a summary of the data we have
str(data)
# We can also ensure we have the right number of participants imported by simply counting them
count(distinct(data_raw_new1,subject))

# Next we may wish to ensure that the participants we imported have all their data, or determine whether any participants are missing trial data
# To do this, we will use as series of dplyr functions all in one line of code. We dont have to do this, but it makes things quicker to type and easier to read :)
# We can do this using the %>% symbols, which tells R to follow one command on with another.
# See if you can tell what each line of code here is doing
trial.summary <- data %>% 
  dplyr::group_by(subject, blockcode) %>% 
  dplyr::summarise(num_trials = n())
trial.summary


#--------------------------------------------------------------------------------------
#ATTENTIONAL BIAS
#--------------------------------------------------------------------------------------

ABdata <- data_raw_new1 %>% 
  filter(blocknum == 13| blocknum == 14|blocknum ==15|blocknum ==16| blocknum==17 ) %>%
  filter(stimulusitem1 == "+" & subject != "45" ) %>%
  .[c(1:4, 6:9, 14:16, 18)]

trial.summary <- ABdata %>% 
  dplyr::group_by(subject, blockcode) %>% 
  dplyr::summarise(num_trials = n())
trial.summary

ABdata$trialcode <- as.character (ABdata$trialcode)

#ABdata$category <- substr(ABdata$trialcode, start=1, stop=2)

ABdata$congruency <- ifelse(grepl("NeutPainB",ABdata$trialcode),'congruent',
                            ifelse(grepl("zNeutPainB", ABdata$trialcode), 'congruent',
                                  ifelse(grepl("xNeutPainB", ABdata$trialcode), 'congruent',
                      ifelse(grepl("PainNeutT",ABdata$trialcode),'congruent',
                            ifelse(grepl("zPainNeutT",ABdata$trialcode),'congruent',
                                  ifelse(grepl("xPainNeutT",ABdata$trialcode),'congruent',
                      ifelse(grepl("NeutPainT",ABdata$trialcode),'incongruent',
                            ifelse(grepl("zNeutPainT",ABdata$trialcode),'incongruent',
                                   ifelse(grepl("xNeutPainT",ABdata$trialcode),'incongruent',
                      ifelse(grepl("PainNeutB",ABdata$trialcode),'incongruent', 
                            ifelse(grepl("zPainNeutB",ABdata$trialcode),'incongruent',
                                    ifelse(grepl("xPainNeutB",ABdata$trialcode),'incongruent','error'))))))))))))

#works out which order participants did training in
#  AT = away, then towards pain
#  TA = towards, then away from pain
trainingorder <- ABdata %>% 
  filter(blocknum == 14) %>%
  .[c(4,7)] %>%
  distinct(subject, blockcode)

trainingorder$blockcode <- as.character(trainingorder$blockcode)
trainingorder$blockcode <- substr(trainingorder$blockcode, 1, nchar(trainingorder$blockcode)-1)

trainingorder$ABMorder <- ifelse(grepl("BiasTrainTowards",trainingorder$blockcode),'TA',
                              ifelse(grepl("BiasTrainAway", trainingorder$blockcode), 'AT','error'))
trainingorder <-trainingorder[c(1,3)]

#looks at time order of attentional bias blocks
# pre = first  assessment block completed, pre training
# mid = assessment block between the two training blocks
# end = final assessment block
AB.pre <- ABdata %>%
  filter(blocknum == 13)

AB.mid <- ABdata %>%
  filter(blocknum == 15)

AB.end <- ABdata %>%
  filter(blocknum == 17)

#################
#preblock
#################


#filtering out incorrect trials
#filtering out responses <150ms or >2000ms; can delete if not wanted
AB.correct.pre <- AB.pre %>%
  filter (correct == 1) %>% 
  filter (latency < 2000) %>%
  filter (latency > 200)

#counting the number of rows
ntrials <- nrow(AB.pre)
#counting the number of CORRECT rows
ntrialscorrect <- nrow(AB.correct.pre)
#percentage of trials removed from original data frame for results section
#note this currently is after removing both incorrect trials and responses <150ms/>2000ms
nrow(AB.pre) - nrow(AB.correct.pre)
100-((nrow(AB.correct.pre)/nrow(AB.pre))*100)


#counting number of rows per person
ntrialsperson.pre <- AB.pre %>% 
  group_by(subject) %>% 
  summarise(num_trials = n())
#counting number of correct rows per person
ntrialscorrectperson.pre <-AB.correct.pre %>%
  group_by(subject) %>% 
  summarise(num_trials = n())
#merge the two (after renaming num_trials which appears in both sets)
names(ntrialscorrectperson.pre)[names(ntrialscorrectperson.pre)=="num_trials"] <- "num_trials_corr"
percent.correct.person.pre <- merge(ntrialsperson.pre, ntrialscorrectperson.pre)
#calculate percent correct as a variable in data "percent.correct.person"
percent.correct.person.pre$per.corr <- ((percent.correct.person.pre$num_trials_corr/percent.correct.person.pre$num_trials)*100)

#Exclude participants with <75% accuracy
perc.corr.pre <- percent.correct.person.pre[c(1,4)]
AB.correct.pre <- merge(AB.correct.pre, perc.corr.pre, by = c("subject"))
AB.correct.pre <- filter(AB.correct.pre, per.corr >= 75)


#report how many participants were excluded from this process
corr.personout.pre <- perc.corr.pre %>% filter(per.corr >=75)
#counting the number of rows
npeople <- nrow(perc.corr.pre)
#counting the number of CORRECT rows
npeoplecorrect.pre <- nrow(corr.personout.pre)
#percentage of trials removed from original data frame for results section
#note this currently is after removing both incorrect trials and responses <150ms/>2000ms
nrow(perc.corr.pre) - nrow(corr.personout.pre)
100-((nrow(corr.personout.pre)/nrow(perc.corr.pre))*100)


###OUTLIER ANALYSIS###
#can choose to just use mean or median, and 2, 2.5, or 3 deviations. Currently gives all combos
descriptives.pre = group_by(AB.correct.pre, subject, trialcode) %>%
  summarise(mad = mad(latency),
            median = median(latency),
            mean = mean(latency),
            sd = sd(latency)) %>%
  mutate(lower_MAD25 = median - 2.5*mad,
         upper_MAD25 = median + 2.5*mad)
#only select data above the lower bound and below the upper bound:
joined_dat_MAD25.pre = left_join(AB.correct.pre, descriptives.pre, by = c("subject", "trialcode")) %>%
  filter(latency > lower_MAD25, latency < upper_MAD25)
# raw scores with outliers removed (e.g. for calculating reliability indices)
write.csv(joined_dat_MAD25.pre, file = "C:/Users/justine.macgoris/OneDrive/Bureau/ABpre_mad_outliersremoved_last23.csv",sep =  ";", quote = F, row.names = F)

#GROUP DATA
#creating matrices with means/medians per cell (trialcode) for each particpant:
#This is using the mean of the latency scores for each participant, 
#but the data cleaning varies depending on MAD or SD based, and the deviations from the mean/median (2, 2.5, 3)
matrix_madout25.pre = joined_dat_MAD25.pre %>% 
  group_by(subject, congruency, trialcode) %>%
  summarise(mean_MadExcl25 = mean(latency), N_MadExcl25 = n())

#this is for calculating an overall attentional bias if go for joined analysis. Otherwise redundant
matrix_madout25_overall.pre = joined_dat_MAD25.pre %>% 
    group_by(subject, congruency) %>%
  summarise(mean = mean(latency), n = n())


#Percentage of excluded trials (this is total, across incorrect and MADout)
ntrials <- nrow(AB.pre)
#counting the number of CORRECT rows
ntrialsmadout.pre <- nrow(joined_dat_MAD25.pre)
#percentage of trials removed from original data frame for results section
#note this currently is after removing incorrect trials, NOT responses <150ms/>2000ms
nrow(AB.pre) - nrow(joined_dat_MAD25.pre)
100-((nrow(joined_dat_MAD25.pre)/nrow(AB.pre))*100)



#______________________________________________________________________________
#calculate Attentional Bias Index
AB.mean.congruency.pre <- matrix_madout25.pre %>% 
  group_by(subject, congruency) %>%
  summarise(mean = mean(mean_MadExcl25), n = sum(N_MadExcl25)) %>%
  .[c("subject", "congruency", "mean")] #this removes n; if I need this later then I can put back in, but then need to change the code below to make data sensible

AB.pre.congruency <- matrix_madout25_overall.pre %>%
  .[c("subject", "congruency", "mean")]

AB.pre.final <- spread (AB.pre.congruency, congruency, mean)
AB.pre.final$ABpre <- AB.pre.final$incongruent - AB.pre.final$congruent
#This line cuts out the congruent/incongruent for easy merging.
AB.pre.final <- AB.pre.final[c(1, 4)]

write.csv(AB.pre.final, file = "C:/Users/justine.macgoris/OneDrive/Bureau/ABpre_last23.csv")

##########################################################
#AB mid block
###########################################################



#filtering out incorrect trials
#filtering out responses <150ms or >2000ms; can delete if not wanted
AB.correct.mid <- AB.mid %>%
  filter (correct == 1) %>% 
  filter (latency < 2000) %>%
  filter (latency > 200)

#counting the number of rows
ntrials <- nrow(AB.mid)
#counting the number of CORRECT rows
ntrialscorrect <- nrow(AB.correct.mid)
#percentage of trials removed from original data frame for results section
#note this currently is after removing both incorrect trials and responses <150ms/>2000ms
nrow(AB.mid) - nrow(AB.correct.mid)
100-((nrow(AB.correct.mid)/nrow(AB.mid))*100)


#counting number of rows per person
ntrialsperson.mid <- AB.mid %>% 
  group_by(subject) %>% 
  summarise(num_trials = n())
#counting number of correct rows per person
ntrialscorrectperson.mid <-AB.correct.mid %>%
  group_by(subject) %>% 
  summarise(num_trials = n())
#merge the two (after renaming num_trials which appears in both sets)
names(ntrialscorrectperson.mid)[names(ntrialscorrectperson.mid)=="num_trials"] <- "num_trials_corr"
percent.correct.person.mid <- merge(ntrialsperson.mid, ntrialscorrectperson.mid)
#calculate percent correct as a variable in data "percent.correct.person"
percent.correct.person.mid$per.corr <- ((percent.correct.person.mid$num_trials_corr/percent.correct.person.mid$num_trials)*100)

#Exclude participants with <75% accuracy
perc.corr.mid <- percent.correct.person.mid[c(1,4)]
AB.correct.mid <- merge(AB.correct.mid, perc.corr.mid, by = c("subject"))
AB.correct.mid <- filter(AB.correct.mid, per.corr >= 75)


#report how many participants were excluded from this process
corr.personout.mid <- perc.corr.mid %>% filter(per.corr >=75)
#counting the number of rows
npeople <- nrow(perc.corr.mid)
#counting the number of CORRECT rows
npeoplecorrect.mid <- nrow(corr.personout.mid)
#percentage of trials removed from original data frame for results section
#note this currently is after removing both incorrect trials and responses <150ms/>2000ms
nrow(perc.corr.mid) - nrow(corr.personout.mid)
100-((nrow(corr.personout.mid)/nrow(perc.corr.mid))*100)

###OUTLIER ANALYSIS###
#can choose to just use mean or median, and 2, 2.5, or 3 deviations. Currently gives all combos
descriptives.mid = group_by(AB.correct.mid, subject, trialcode) %>%
  summarise(mad = mad(latency),
            median = median(latency),
            mean = mean(latency),
            sd = sd(latency)) %>%
  mutate(lower_MAD25 = median - 2.5*mad,
         upper_MAD25 = median + 2.5*mad)
#only select data above the lower bound and below the upper bound:
joined_dat_MAD25.mid = left_join(AB.correct.mid, descriptives.mid, by = c("subject", "trialcode")) %>%
  filter(latency > lower_MAD25, latency < upper_MAD25)
# raw scores with outliers removed (e.g. for calculating reliability indices)
write.csv(joined_dat_MAD25.mid,file = "C:/Users/justine.macgoris/OneDrive/Bureau//ABmid_mad_outliersremoved_last23.csv")

#GROUP DATA
#creating matrices with means/medians per cell (trialcode) for each particpant:
#This is using the mean of the latency scores for each participant, 
#but the data cleaning varies depending on MAD or SD based, and the deviations from the mean/median (2, 2.5, 3)
matrix_madout25.mid = joined_dat_MAD25.mid %>% 
  group_by(subject, congruency, trialcode) %>%
  summarise(mean_MadExcl25 = mean(latency), N_MadExcl25 = n())

#this is for calculating an overall attentional bias if go for joined analysis. Otherwise redundant
matrix_madout25_overall.mid = joined_dat_MAD25.mid %>% 
  group_by(subject, congruency) %>%
  summarise(mean = mean(latency), n = n())


#Percentage of excluded trials (this is total, across incorrect and MADout)
ntrials <- nrow(AB.mid)
#counting the number of CORRECT rows
ntrialsmadout.mid <- nrow(joined_dat_MAD25.mid)
#percentage of trials removed from original data frame for results section
#note this currently is after removing incorrect trials, NOT responses <150ms/>2000ms
nrow(AB.mid) - nrow(joined_dat_MAD25.mid)
100-((nrow(joined_dat_MAD25.mid)/nrow(AB.mid))*100)


#______________________________________________________________________________
#calculate Attentional Bias Index
AB.mean.congruency.mid <- matrix_madout25.mid %>% 
  group_by(subject, congruency) %>%
  summarise(mean = mean(mean_MadExcl25), n = sum(N_MadExcl25)) %>%
  .[c("subject", "congruency", "mean")] #this removes n; if I need this later then I can put back in, but then need to change the code below to make data sensible

AB.mid.congruency <- matrix_madout25_overall.mid %>%
  .[c("subject", "congruency", "mean")]

AB.mid.final <- spread (AB.mid.congruency, congruency, mean)
AB.mid.final$ABmid <- AB.mid.final$incongruent - AB.mid.final$congruent
#This line cuts out the congruent/incongruent for easy merging.
AB.mid.final <- AB.mid.final[c(1, 4)]

write.csv(AB.mid.final, file = "C:/Users/justine.macgoris/OneDrive/Bureau//ABmid_last23.csv")



##########################################################
#AB end block
###########################################################


#filtering out incorrect trials
#filtering out responses <150ms or >2000ms; can delete if not wanted
AB.correct.end <- AB.end %>%
  filter (correct == 1) %>% 
  filter (latency < 2000) %>%
  filter (latency > 200)

#counting the number of rows
ntrials <- nrow(AB.end)
#counting the number of CORRECT rows
ntrialscorrect <- nrow(AB.correct.end)
#percentage of trials removed from original data frame for results section
#note this currently is after removing both incorrect trials and responses <150ms/>2000ms
nrow(AB.end) - nrow(AB.correct.end)
100-((nrow(AB.correct.end)/nrow(AB.end))*100)


#counting number of rows per person
ntrialsperson.end <- AB.end %>% 
  group_by(subject) %>% 
  summarise(num_trials = n())
#counting number of correct rows per person
ntrialscorrectperson.end <-AB.correct.end %>%
  group_by(subject) %>% 
  summarise(num_trials = n())
#merge the two (after renaming num_trials which appears in both sets)
names(ntrialscorrectperson.end)[names(ntrialscorrectperson.end)=="num_trials"] <- "num_trials_corr"
percent.correct.person.end <- merge(ntrialsperson.end, ntrialscorrectperson.end)
#calculate percent correct as a variable in data "percent.correct.person"
percent.correct.person.end$per.corr <- ((percent.correct.person.end$num_trials_corr/percent.correct.person.end$num_trials)*100)

#Exclude participants with <75% accuracy
perc.corr.end <- percent.correct.person.end[c(1,4)]
AB.correct.end <- merge(AB.correct.end, perc.corr.end, by = c("subject"))
AB.correct.end <- filter(AB.correct.end, per.corr >= 75)


#report how many participants were excluded from this process
corr.personout.end <- perc.corr.end %>% filter(per.corr >=75)
#counting the number of rows
npeople <- nrow(perc.corr.end)
#counting the number of CORRECT rows
npeoplecorrect.end <- nrow(corr.personout.end)
#percentage of trials removed from original data frame for results section
#note this currently is after removing both incorrect trials and responses <150ms/>2000ms
nrow(perc.corr.end) - nrow(corr.personout.end)
100-((nrow(corr.personout.end)/nrow(perc.corr.end))*100)


###OUTLIER ANALYSIS###
#can choose to just use mean or median, and 2, 2.5, or 3 deviations. Currently gives all combos
descriptives.end = group_by(AB.correct.end, subject, trialcode) %>%
  summarise(mad = mad(latency),
            median = median(latency),
            mean = mean(latency),
            sd = sd(latency)) %>%
  mutate(lower_MAD25 = median - 2.5*mad,
         upper_MAD25 = median + 2.5*mad)
#only select data above the lower bound and below the upper bound:
joined_dat_MAD25.end = left_join(AB.correct.end, descriptives.end, by = c("subject", "trialcode")) %>%
  filter(latency > lower_MAD25, latency < upper_MAD25)
# raw scores with outliers removed (e.g. for calculating reliability indices)
write.csv(joined_dat_MAD25.end, file = "C:/Users/justine.macgoris/OneDrive/Bureau/ABend_mad_outliersremoved_last23.csv")

#GROUP DATA
#creating matrices with means/medians per cell (trialcode) for each particpant:
#This is using the mean of the latency scores for each participant, 
#but the data cleaning varies depending on MAD or SD based, and the deviations from the mean/median (2, 2.5, 3)
matrix_madout25.end = joined_dat_MAD25.end %>% 
  group_by(subject, congruency, trialcode) %>%
  summarise(mean_MadExcl25 = mean(latency), N_MadExcl25 = n())

#this is for calculating an overall attentional bias if go for joined analysis. Otherwise redundant
matrix_madout25_overall.end = joined_dat_MAD25.end %>% 
  group_by(subject, congruency) %>%
  summarise(mean = mean(latency), n = n())


#Percentage of excluded trials (this is total, across incorrect and MADout)
ntrials <- nrow(AB.end)
#counting the number of CORRECT rows
ntrialsmadout.end <- nrow(joined_dat_MAD25.end)
#percentage of trials removed from original data frame for results section
#note this currently is after removing incorrect trials, NOT responses <150ms/>2000ms
nrow(AB.end) - nrow(joined_dat_MAD25.end)
100-((nrow(joined_dat_MAD25.end)/nrow(AB.end))*100)

#______________________________________________________________________________
#calculate Attentional Bias Index
AB.mean.congruency.end <- matrix_madout25.end %>% 
  group_by(subject, congruency) %>%
  summarise(mean = mean(mean_MadExcl25), n = sum(N_MadExcl25)) %>%
  .[c("subject", "congruency", "mean")] #this removes n; if I need this later then I can put back in, but then need to change the code below to make data sensible

AB.end.congruency <- matrix_madout25_overall.end %>%
  .[c("subject", "congruency", "mean")]

AB.end.final <- spread (AB.end.congruency, congruency, mean)
AB.end.final$ABend <- AB.end.final$incongruent - AB.end.final$congruent
#This line cuts out the congruent/incongruent for easy merging.
AB.end.final <- AB.end.final[c(1, 4)]

write.csv(AB.end.final, file = "C:/Users/justine.macgoris/OneDrive/Bureau//ABend_final23.csv")


###############################################################################
###############################################################################
###############################################################################


#merge together
ABdata <- full_join(AB.pre.final,  AB.mid.final, by = c("subject"))
ABdata <- full_join(ABdata, AB.end.final, by = c("subject"))
ABdata <- full_join(ABdata, trainingorder, by = c("subject"))

ABdata$ABdiff1 <- ABdata$ABmid - ABdata$ABpre
ABdata$ABdiff2 <- ABdata$ABend - ABdata$ABmid
ABdata$ABabsflex <- abs(ABdata$ABdiff1) + abs(ABdata$ABdiff2)

ABdata$ABtowarddiff <- ifelse((ABdata$ABMorder == "TA"), ABdata$ABdiff1,
                              ifelse((ABdata$ABMorder == "AT"), ABdata$ABdiff2, "error"))

ABdata$ABtoward <- ifelse((ABdata$ABMorder == "TA"), ABdata$ABmid,
                          ifelse((ABdata$ABMorder == "AT"), ABdata$ABend, "error"))

ABdata$ABawayddiff <- ifelse((ABdata$ABMorder == "AT"), ABdata$ABdiff1,
                         ifelse((ABdata$ABMorder == "TA"), ABdata$ABdiff2, "error"))

ABdata$ABaway <- ifelse((ABdata$ABMorder == "AT"), ABdata$ABmid,
                        ifelse((ABdata$ABMorder == "TA"), ABdata$ABend, "error"))


#save final dataset
write.csv(ABdata, file = "C:/Users/justine.macgoris/OneDrive/Bureau//ABdata_LASTVERSION23.csv")