#Content: Data analysis for Spider study #Author: Guillaume Dezecache (guillaume.dezecache@gmail.com) #Last update: Fri Oct 23 19:48:11 2020 #Checks: [OK] next to the corresponding code bit ###USE THE CHECK_OUTLIERS FUNCTION!! #Set working directory [OK] setwd('/Users/dtaylor/Desktop/pan_spider/') #Libraries + script needed (clean it for some are not used) library(tseries) library(e1071) library(ez) library(car) library(readxl) library(effects) library(lme4) library(MASS) library(DescTools) library(irr) library(lsr) library(emmeans) library(dplyr) library(lattice) library(ggplot2) source("RogerMundrysdiagnostic_fcns.R") library(performance) #Load dataset data = read_excel("data.xlsx", sheet = "data") #Load dataset with wide structure datawide = read_excel("data.xlsx", sheet = "datawide") #Check structure and transform some variables as factors str(datawide) datawide$ID=as.factor(datawide$ID) datawide$GROUP=as.factor(datawide$GROUP) datawide$P.CALL_1=as.factor(datawide$P.CALL_1) datawide$P.CALL_2=as.factor(datawide$P.CALL_2) #Data structure ##Check balance wrt predictors table(datawide$ID, datawide$GROUP) ##Inspection of predictors hist(datawide$SPIDER.MOVE_2) skewness(datawide$SPIDER.MOVE_2) ##not so bad datawide$SPIDER.MOVE_2z=as.vector(scale(datawide$SPIDER.MOVE_2)) hist(datawide$DELAY) ##seriously right skewed skewness(datawide$DELAY) hist(log(datawide$DELAY)) ##try log: looks better skewness(log(datawide$DELAY)) ##try log: looks better indeed datawide$DELAYlog=log(datawide$DELAY) ##apply log transformation datawide$DELAYlogz=as.vector(scale(datawide$DELAYlog)) #Hypothesis 1: More likely to call in Social group ##Call model specification Hyp1FullModel=glm(P.CALL_2~GROUP+SPIDER.MOVE_2z+DELAYlogz+P.CALL_1,data=datawide, family=binomial) ##Model diagnostics using package 'performance' check_model(Hyp1FullModel) model_performance(Hyp1FullModel) ##Model diagnostics using Roger Mundry's tools & guidelines vif(Hyp1FullModel) ##VIF, relatively OK max(influence(Hyp1FullModel)$hat)-lev.thresh(Hyp1FullModel) ##max influence does not go beyond the leverage's threshold beta=cbind(coefficients(Hyp1FullModel), coefficients(Hyp1FullModel)+t(apply(X=dfbeta(Hyp1FullModel), MARGIN=2, FUN=range))) ##actual in column 1, min in column 2 & max in column 3 m.stab.plot(beta) ##not very good ##Inference r2(Hyp1FullModel) Hyp1NullModel=glm(P.CALL_2~1,data=datawide, family=binomial) anova(Hyp1FullModel,Hyp1NullModel, test='Chisq') #Hypothesis 2: More gazing in Social Group ##Calculation of dependent variable datawide$proportionoflookingtrial1=datawide$D.LOOK.30.R_1/datawide$CODINGTIMEMAX30_1 ##for a linear model datawide$proportionoflookingtrial2=datawide$D.LOOK.30.R_2/datawide$CODINGTIMEMAX30_2 ##for a linear model datawide$look=datawide$D.LOOK.30.R_2 ##for a binomial model datawide$nolook=datawide$CODINGTIMEMAX30_2-datawide$D.LOOK.30.R_2 ##for a binomial model datawide$lookvsnolook=round(cbind(datawide$look,datawide$nolook),0) ##for a binomial model ##Transformation of predictors? hist(datawide$D.LOOK.PROP_1) ##quite left skewed... skewness(datawide$D.LOOK.PROP_1) ##indeed... hist(datawide$D.LOOK.PROP_1^2) ##try ^2 transformation hist(log(datawide$D.LOOK.PROP_1)) ##try log; it makes things worse... hist(datawide$D.LOOK.PROP_1^(1/3)) ##try cuberoot; makes thing worse... datawide$D.LOOK.PROP_1_t=datawide$D.LOOK.PROP_1^2 ##apply ^2 skewness(datawide$D.LOOK.PROP_1_t) ##less skewing... datawide$D.LOOK.PROP_1_tz=as.vector(scale(datawide$D.LOOK.PROP_1_t)) ##zscore it ##Model specification with binomial family Hyp2FullModel=glm(lookvsnolook~GROUP+SPIDER.MOVE_2+DELAYlogz+D.LOOK.PROP_1_tz,data=datawide, family=binomial) ###Model diagnostics using package 'performance' check_model(Hyp2FullModel) model_performance(Hyp2FullModel) ###Model diagnostics using Roger Mundry's tools & guidelines vif(Hyp2FullModel) ##VIF, relatively OK max(influence(Hyp2FullModel)$hat)-lev.thresh(Hyp2FullModel) ##max influence does not go beyond the leverage's threshold beta=cbind(coefficients(Hyp2FullModel), coefficients(Hyp2FullModel)+t(apply(X=dfbeta(Hyp2FullModel), MARGIN=2, FUN=range))) ##actual in column 1, min in column 2 & max in column 3 m.stab.plot(beta) ##looks OK? ###Inference Hyp2NullModel=glm(lookvsnolook~1,data=datawide, family=binomial) r2(Hyp2FullModel) anova(Hyp2NullModel, Hyp2FullModel, test="Chisq") summary(Hyp2FullModel)$coefficients Hyp2table=cbind(coefficients(Hyp2FullModel), confint(object=Hyp2FullModel)) plot(allEffects(Hyp2FullModel)) ##Model specification with gaussian family Hyp2FullModellinear=lm(D.LOOK.PROP_2~GROUP+SPIDER.MOVE_2+DELAYlogz+D.LOOK.PROP_1_tz,data=datawide) ###Model diagnostics using package 'performance' check_model(Hyp2FullModellinear) ###Model diagnostics following R. Mundry's diagnostics.plot(Hyp2FullModellinear) ###all look relatively ok besides residuals against fitted values, where there seems to be sort of a pattern maybe? cor.test(fitted(Hyp2FullModellinear), abs(residuals(Hyp2FullModellinear))) dffits(Hyp2FullModellinear) max(abs(dffits(Hyp2FullModellinear))) ###Two problematic cases here, 5th and 9th dfbeta(Hyp2FullModellinear) round(cbind(coefficients(Hyp2FullModellinear), coefficients(Hyp2FullModellinear)+ t(apply(X=dfbeta(Hyp2FullModellinear), MARGIN=2, FUN=range))), 3) ##does not look so problematic? dfbetas(Hyp2FullModellinear) ##5th and 9th values are problematic max(influence(Hyp2FullModellinear)$hat)-lev.thresh(Hyp2FullModellinear) ##looks OK plot(as.vector(influence(Hyp2FullModellinear)$hat)) max(cooks.distance(Hyp2FullModellinear)) plot(cooks.distance(Hyp2FullModellinear)) ##5th and 9th look quite problematic ###Run model without likely influential cases Hyp2FullModellinearNOINFL=lm(D.LOOK.PROP_2~GROUP+SPIDER.MOVE_2+DELAYlogz+D.LOOK.PROP_1_tz,data=datawide[-c(5, 9),]) plot(cooks.distance(Hyp2FullModellinear)) ##but it reveals other influential cases... ###Inference summary(Hyp2FullModellinear) r2(Hyp2FullModellinear) Hyp2NullModellinear=lm(D.LOOK.PROP_2~1,data=datawide) anova(Hyp2NullModellinear,Hyp2FullModellinear, test="F") Hyp2RedModellinear=lm(D.LOOK.PROP_2~SPIDER.MOVE_2+DELAY+D.LOOK.PROP_1,data=datawide) anova(Hyp2RedModellinear, Hyp2FullModellinear, test="F") plot(allEffects(Hyp2FullModellinear)) Anova(Hyp2FullModellinear, type=3) summary(Hyp1FullModel)$coefficients Hyp2table=cbind(coefficients(Hyp2FullModel), confint(object=Hyp2FullModel)) ###Explore models without primary influential cases? summary(Hyp2FullModellinearNOINFL) r2(Hyp2FullModellinearNOINFL) Hyp2NullModellinearNOINFL=lm(D.LOOK.PROP_2~1,data=datawide[-c(5, 9),]) anova(Hyp2NullModellinearNOINFL,Hyp2FullModellinearNOINFL, test="F")