import matplotlib.pyplot as plt import pandas as pd import numpy as np %matplotlib notebook demo = pd.read_csv("data/demographic.csv") # demographic.csv columns have space as a first character.. remove it! demo.columns = [col.replace(' ', '') for col in demo.columns] erp_data = pd.read_csv("data/ERPdata.csv") ERP = erp_data.merge(demo, on='subject') ERP.group.replace(to_replace={0: 'Control', 1: 'Schizo'}, inplace=True) ERP = ERP.drop(ERP.columns[5:11], axis=1) Baseline correction ERP = ERP[(ERP.time_ms >= -100) & (ERP.time_ms <= 300)].groupby(['group', 'condition', 'time_ms']).mean() display(ERP[:3]) ERP.loc[(slice(None),1), ['Fz', 'FCz', 'Cz']] = ERP.loc[(slice(None),1), ['Fz', 'FCz', 'Cz']] - ERP.loc[(slice(None),3), ['Fz', 'FCz', 'Cz']].values ERP = ERP.drop(index=3, level=1) ERP.reset_index() display(ERP[:3]) ERP=ERP.reset_index() ERP = pd.melt(ERP, id_vars=['group', 'condition', 'time_ms'], value_vars=['Fz', 'FCz', 'Cz']) ERP.condition.replace(to_replace={1: 'Button tone', 2: 'Playback'}, inplace=True) ERP.head() # It was mentioned that on average Schizophrenia patients have lower level of # education. Here we can see that gender and ages are quite similar between two groups # but education is really a bit lower when it comes to Schizophrenia patients. dummy_demo = demo.copy() dummy_demo.group.replace(to_replace={0: 'Control', 1: 'Schizo'}, inplace=True) gender_frame = pd.DataFrame(data={'gender': dummy_demo.gender, 'group': dummy_demo.group}) display(gender_frame.groupby(['gender','group']).size()) print('\n _____________________________ \n') print('Control subjects') age_edu_frame = pd.DataFrame(data={'education': dummy_demo.education, 'group': dummy_demo.group, 'age': dummy_demo.age}) display(age_edu_frame[dummy_demo.group=='Control'].describe()) print('Schizophrenia patients') age_edu_frame = pd.DataFrame(data={'education': dummy_demo.education, 'group': dummy_demo.group, 'age': dummy_demo.age}) display(age_edu_frame[dummy_demo.group=='Schizo'].describe()) plt.style.use('fivethirtyeight') ERP['variable'].unique() PB=ERP[ERP['condition']=='Playback'] BT=ERP[ERP['condition']=='Button tone'] for v in ERP['variable'].unique(): for g in ERP['group'].unique(): cond = {'variable': v, 'group': g} PBc = PB.loc[(PB['variable'] == cond['variable']) & (PB['group'] == cond['group']), ['time_ms','value']] BTc = BT.loc[(BT['variable'] == cond['variable']) & (BT['group'] == cond['group']), ['time_ms','value']] plt.figure() plt.plot(PBc['time_ms'], PBc['value']) plt.plot(BTc['time_ms'], BTc['value']) plt.title('variable: {variable}, group: {group}'.format(**cond)) plt.legend(['Playback','Button tone']) plt.style.use('fivethirtyeight') ERP['variable'].unique() ERP = ERP[(ERP.time_ms >= 30) & (ERP.time_ms <= 70)] PB=ERP[ERP['condition']=='Playback'] BT=ERP[ERP['condition']=='Button tone'] for v in ERP['variable'].unique(): for g in ERP['group'].unique(): cond = {'variable': v, 'group': g} PBc = PB.loc[(PB['variable'] == cond['variable']) & (PB['group'] == cond['group']), ['time_ms','value']] BTc = BT.loc[(BT['variable'] == cond['variable']) & (BT['group'] == cond['group']), ['time_ms','value']] plt.figure() plt.plot(PBc['time_ms'], PBc['value']) plt.plot(BTc['time_ms'], BTc['value']) plt.title('variable: {variable}, group: {group}'.format(**cond)) plt.legend(['Playback','Button tone']) elec_right = ['FC4', 'C4', 'CP4'] elec_left = ['FC3', 'C3', 'CP3'] electrodes_lrp = elec_right + elec_left # Substract baseline BL = erp_data[(erp_data.time_ms >= -600) & (erp_data.time_ms <= -500)].groupby(['subject', 'condition']).mean() RPamps = erp_data[(erp_data.time_ms >= -100) & (erp_data.time_ms <= 0)].groupby(['subject', 'condition']).mean() RPamps = RPamps[electrodes_lrp] - BL[electrodes_lrp] # Drop playback (won't need it here) RPamps = RPamps.drop(index=2, level=1) RPamps = RPamps.reset_index() # Give proper names to group and condition values RPamps_demo = RPamps.merge(demo, on='subject') RPamps_demo.group.replace(to_replace={0: 'Control', 1: 'Schizo'}, inplace=True) RPamps_demo.condition.replace(to_replace={1: 'Button tone', 3: 'Control press'}, inplace=True) # Display for testing display(RPamps_demo[:4]) # Boxplot for el in electrodes_lrp: RPamps_demo.boxplot(column=el, by=['group', 'condition'], figsize=(8, 8), fontsize=8) # For a single plot of values from every electrode amongst ['FC4', 'C4', 'CP4', 'FC3', 'C3', 'CP3'] RPamps_demo_hem = RPamps_demo.copy() RPamps_demo_hem = pd.melt(RPamps_demo_hem, id_vars=['group', 'condition'], value_vars=electrodes_lrp) display(RPamps_demo_hem[:10]) # Boxplot RPamps_demo_hem.boxplot(column='value', by=['group', 'condition'], figsize=(8, 8), fontsize=8) df = RPamps_demo.copy() #convert electrode columns to rows with labels: df = pd.melt(df, id_vars=['subject', 'group', 'condition'], value_vars=electrodes_lrp, var_name='electrode', value_name='RP') #Add the hemisphere column df['hemisphere'] = df['electrode'] df['hemisphere'] = df['hemisphere'].apply(lambda x: 'left' if '3' in x else 'right') #Remove the numbers from the electrodes df['electrode'] = df['electrode'].str[:-1] print('Right, left together: ') result = stats.f_oneway( df['RP'][df['condition'] == 'Button tone'][df['group'] == 'Control'], df['RP'][df['condition'] == 'Button tone'][df['group'] == 'Schizo']) print(result) result = stats.f_oneway( df['RP'][df['condition'] == 'Control press'][df['group'] == 'Control'], df['RP'][df['condition'] == 'Control press'][df['group'] == 'Schizo']) print(result) def anova_table(aov): aov['mean_sq'] = aov[:]['sum_sq']/aov[:]['df'] aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq']) aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*aov['mean_sq'][-1]))/(sum(aov['sum_sq'])+aov['mean_sq'][-1]) cols = ['sum_sq', 'df', 'mean_sq', 'F', 'PR(>F)', 'eta_sq', 'omega_sq'] aov = aov[cols] return aov df_tone = df[df['condition'] == 'Button tone'] results = ols('RP ~ C(group)', data=df_tone).fit() #results.summary() table = sm.stats.anova_lm(results, typ=2) anova_table(table) df_control = df[df['condition'] == 'Control press'] results = ols('RP ~ C(group)', data=df_control).fit() #results.summary() table = sm.stats.anova_lm(results, typ=2) anova_table(table) df_tone = df[df['group'] == 'Control'] results = ols('RP ~ C(condition)', data=df_tone).fit() #results.summary() table = sm.stats.anova_lm(results, typ=2) anova_table(table) df_tone = df[df['group'] == 'Schizo'] results = ols('RP ~ C(condition)', data=df_tone).fit() #results.summary() table = sm.stats.anova_lm(results, typ=2) anova_table(table)