#R code for antibody validation analysis #Load required libraries ---- library(readxl) library(foreign) library(tidyverse) library(tidyr) library(dplyr) library(broom) library(irr) library(psych) #load data sets ---- antibodies <-read_excel(".../Supplemental Data S1.xlsx") #load here the antibody data set (supplemental data S1) reliability <-read_excel(".../Supplemental Data S2.xlsx") #load here the interrater reliability data set (supplemental data S2) #In the data sets the value 1 means yes, 0 means no, 9 means not applicable, 999 means not clear # Data transformation ---- #interrater reliability data reliability$J_how_many_validated[reliability$J_how_many_validated=="na"]<-"0" #code answer not applicable as 0 for this variable reliability$W_how_many_validated[reliability$W_how_many_validated=="na"]<-"0" #code answer not applicable as 0 for this variable reliability[sapply(reliability, is.character)]<-lapply(reliability[sapply(reliability, is.character)],as.factor) reliability[sapply(reliability, function(x) as.character(x) %in% c("na","?"))] <- NA #the answers 'not applicable' and 'not clear' are coded as missing values for all other variables reliability$J_how_many_used <-as.numeric(as.character(reliability$J_how_many_used)) reliability$W_how_many_used <-as.numeric(as.character(reliability$W_how_many_used)) reliability$J_how_many_validated <-as.numeric(as.character(reliability$J_how_many_validated)) reliability$W_how_many_validated <-as.numeric(as.character(reliability$W_how_many_validated)) #antibody data antibodies$How_many_abs_validated<-as.numeric(as.character(antibodies$How_many_abs_validated)) antibodies$How_many_abs_validated[antibodies$How_many_abs_validated==999]<-NA #answer 'not clear = 999' is classiefied as missing antibodies$How_many_different_abs_used<-as.numeric(as.character(antibodies$How_many_different_abs_used)) antibodies$How_many_different_abs_used[antibodies$How_many_different_abs_used==999]<-NA #answer 'not clear = 999' is classiefied as missing antibodies$percentage_abs_validated<-as.numeric(as.character(antibodies$percentage_abs_validated)) antibodies$percentage_abs_validated[antibodies$percentage_abs_validated==999]<-NA #answer 'not clear = 999' is classiefied as missing #The following comands are used to create separate datasets for variables that need to be analysed in same way later on results<-gather(antibodies, key = "type", value = "validated",Basic_info_all_primary_abs_complete_na_no, Validation_info_present, All_primary_ab_validated_na_no) results2<-gather(antibodies, key = "type", value = "validated",Secondary_without_primary, Spatial_localization, Preadsorption, MW_WB, Reference_validation_supplier, Reference_validation_literature, Basic_info_all_primary_abs_complete_na_no, Validation_info_present, All_primary_ab_validated_na_no, Validation_by_researcher) results3<-gather(antibodies, key = "type", value = "validated",Secondary_without_primary, Spatial_localization, Preadsorption, Reference_validation_literature, Basic_info_all_primary_abs_complete_na_no, Validation_info_present, All_primary_ab_validated_na_no) results4<-gather(antibodies, key = "type", value = "validated",Secondary_without_primary, Spatial_localization, Preadsorption, MW_WB, Reference_validation_literature, Basic_info_all_primary_abs_complete_na_no, Validation_info_present, All_primary_ab_validated_na_no) results5<-gather(antibodies, key = "type", value = "validated",Secondary_without_primary, Spatial_localization, MW_WB, Reference_validation_supplier, Reference_validation_literature, Basic_info_all_primary_abs_complete_na_no, Validation_info_present, All_primary_ab_validated_na_no) results6<-gather(antibodies, key = "type", value = "validated",Secondary_without_primary, Spatial_localization, Preadsorption, MW_WB, Reference_validation_supplier, Reference_validation_literature, Basic_info_all_primary_abs_complete_na_no, Validation_info_present, All_primary_ab_validated_na_no) #Missing values: 9=na, 999=unclear, both will be defined as missing values here results$validated[results$validated==9 |results$validated==999]<-NA results2$validated[results2$validated==9 |results2$validated==999]<-NA results3$validated[results3$validated==9 |results3$validated==999]<-NA results4$validated[results4$validated==9 |results4$validated==999]<-NA results5$validated[results5$validated==9 |results5$validated==999]<-NA results6$validated[results6$validated==9 |results6$validated==999]<-NA #interrater reliability ---- #calculate cohen's kappa and the percentage agreement between assessments of two raters: cohen.kappa(x=cbind(reliability$J_val_info_present,reliability$W_val_info_present),alpha=.05); agree(cbind(reliability$J_val_info_present,reliability$W_val_info_present)) #for any validation information present of at least one antibody cohen.kappa(x=cbind(reliability$J_basic_info_all_primary_complete,reliability$W_basic_info_all_primary_complete), alpha =.05); agree(cbind(reliability$J_basic_info_all_primary_complete,reliability$W_basic_info_all_primary_complete)) #for basic information all primary antibodies complete cohen.kappa(x=cbind(reliability$J_all_primary_validated_na_no,reliability$W_all_primary_validated_na_no), alpha =.05); agree(cbind(reliability$J_all_primary_validated_na_no,reliability$W_all_primary_validated_na_no)) #for all primary antibodies validated cohen.kappa(x=cbind(reliability$J_ref_supplier,reliability$W_ref_supplier), alpha =.05); agree(cbind(reliability$J_ref_supplier,reliability$W_ref_supplier)) #for validation by reference to antibody supplier cohen.kappa(x=cbind(reliability$J_ref_lit,reliability$W_ref_lit), alpha =.05);agree(cbind(reliability$J_ref_lit,reliability$W_ref_lit)) #for validation by reference to the literature cohen.kappa(x=cbind(reliability$J_ref_database,reliability$W_ref_database), alpha =.05);agree(cbind(reliability$J_ref_database,reliability$W_ref_database)) #for validation by reference to database cohen.kappa(x=cbind(reliability$J_Val_by_researcher,reliability$W_Val_by_researcher), alpha =.05);agree(cbind(reliability$J_Val_by_researcher,reliability$W_Val_by_researcher)) #for validation carried out by authors of the article cohen.kappa(x=cbind(reliability$J_MW_WB,reliability$W_MW_WB), alpha =.05);agree(cbind(reliability$J_MW_WB,reliability$W_MW_WB)) #for validation method: molecular weight similar to target in WB cohen.kappa(x=cbind(reliability$J_spatial_loc,reliability$W_spatial_loc), alpha =.05);agree(cbind(reliability$J_spatial_loc,reliability$W_spatial_loc)) #for validation method: spatial localization cohen.kappa(x=cbind(reliability$J_preadsorption,reliability$W_preadsorption), alpha =.05);agree(cbind(reliability$J_preadsorption,reliability$W_preadsorption)) #for validation method: preadsorption cohen.kappa(x=cbind(reliability$J_sec_without_prim,reliability$W_sec_without_prim), alpha =.05);agree(cbind(reliability$J_sec_without_prim,reliability$W_sec_without_prim)) #for validation method: secondary antibody without primary cohen.kappa(x=cbind(reliability$J_five_pillar,reliability$W_five_pillar), alpha =.05);agree(cbind(reliability$J_five_pillar,reliability$W_five_pillar)) #for validation method: five pillars cohen.kappa(x=cbind(reliability$J_positive_control,reliability$W_positive_control), alpha =.05);agree(cbind(reliability$J_positive_control,reliability$W_positive_control)) #for validation method: positive control cohen.kappa(x=cbind(reliability$J_negative_control,reliability$W_negative_control), alpha =.05);agree(cbind(reliability$J_negative_control,reliability$W_negative_control)) #for validation method: negative control cohen.kappa(x=cbind(reliability$J_other_total,reliability$W_other_total), alpha =.05);agree(cbind(reliability$J_other_total,reliability$W_other_total)) #for all other validation methods #intra-class correlation icc(cbind(reliability$J_how_many_used,reliability$W_how_many_used),model="twoway", unit="single") icc(cbind(reliability$J_how_many_validated,reliability$W_how_many_validated),model="twoway", unit="single") (J_percentage_validated<-reliability$J_how_many_validated/reliability$J_how_many_used *100) #calculate the percentage of validated antibodies for each evaluated article (W_percentage_validated<-reliability$W_how_many_validated/reliability$W_how_many_used *100) #calculate the percentage of validated antibodies for each evaluated article icc(cbind(J_percentage_validated,W_percentage_validated),model="twoway", unit="single") #analysis antibody data ---- #tables ---- #percentage of validated antibodies per article (data of figure 2): antibodies %>% group_by(Journal, after_guidelines)%>% #compare samples of journals before and after the introduction of guidelines summarise_at(vars(percentage_abs_validated), list(~mean(., na.rm=TRUE))) percentage_validated_table<-data.frame(antibodies$Journal, antibodies$after_guidelines, antibodies$percentage_abs_validated, antibodies$How_many_abs_validated, antibodies$How_many_different_abs_used) #(data for supplemental table 1) #proportion and count tables for each category: #JCN JCNB <-filter(results2, Journal == "JCN" & after_guidelines == 0) #filter results for journal JCN before introduction of guidelines JCNA <-filter(results2, Journal == "JCN" & after_guidelines == 1) #filter results for journal JCN after introduction of guidelines prop.table(table(JCNB$type,JCNB$validated),1)#proportion table of JCN before introduction of guidelines prop.table(table(JCNA$type,JCNA$validated),1)#proportion table of JCN after introduction of guidelines table(JCNB$type,JCNB$validated) # count table JCN before table(JCNA$type,JCNA$validated) # count table JCN after #Nature NatureB <- filter(results2, Journal == "Nature" & after_guidelines==0) #filter results for journal Nature before introduction of guidelines NatureA <-filter(results2, Journal == "Nature" & after_guidelines==1) #filter results for journal Nature after introduction of guidelines prop.table(table(NatureB$type,NatureB$validated),1)#proportion table of Nature before introduction of guidelines prop.table(table(NatureA$type,NatureA$validated),1)#proportion table of Nature after introduction of guidelines table(NatureB$type,NatureB$validated) # count table Nature before table(NatureA$type,NatureA$validated) # count table Nature after #Total with guidelines withB<-filter(results2, with_guidelines == 1 & after_guidelines==0) #filter results for journals with guidelines before introduction of guidelines withA<-filter(results2, with_guidelines == 1 & after_guidelines==1) #filter results for journals with guidelines after introduction of guidelines prop.table(table(withB$type,withB$validated),1)#proportion table of journals with guidelines before introduction of guidelines prop.table(table(withA$type,withA$validated),1)#proportion table of journals with guidelines after introduction of guidelines table(withB$type,withB$validated) # count table journals with guidelines before table(withA$type,withA$validated) # count table journals with guidelines after #Neuroscience NeuroB <-filter(results2, Journal == "Neuroscience" & after_guidelines==0) #filter results for journal Neuroscience before introduction of guidelines NeuroA <-filter(results2, Journal == "Neuroscience" & after_guidelines==1) #filter results for journal Neuroscience after introduction of guidelines prop.table(table(NeuroB$type,NeuroB$validated),1)#proportion table of Neuroscience before introduction of guidelines prop.table(table(NeuroA$type,NeuroA$validated),1)#proportion table of Neuroscience after introduction of guidelines table(NeuroB$type,NeuroB$validated) # count table Neuroscience before table(NeuroA$type,NeuroA$validated) # count table Neuroscience after #Science ScienceB <-filter(results2, Journal == "Science" & after_guidelines==0) #filter results for journal Science before introduction of guidelines ScienceA <-filter(results2, Journal == "Science" & after_guidelines==1) #filter results for journal Science after introduction of guidelines prop.table(table(ScienceB$type,ScienceB$validated),1)#proportion table of Science before introduction of guidelines prop.table(table(ScienceA$type,ScienceA$validated),1)#proportion table of Science after introduction of guidelines table(ScienceB$type,ScienceB$validated) # count table Science before table(ScienceA$type,ScienceA$validated) # count table Science after #Total without guidelines withoutB<-filter(results2, with_guidelines == 0 & after_guidelines==0) #filter results for journals without guidelines before introduction of guidelines withoutA<-filter(results2, with_guidelines == 0 & after_guidelines==1) #filter results for journals without guidelines after introduction of guidelines prop.table(table(withoutB$type,withoutB$validated),1)#proportion table of journals without guidelines before introduction of guidelines prop.table(table(withoutA$type,withoutA$validated),1)#proportion table of journals without guidelines after introduction of guidelines table(withoutB$type,withoutB$validated) # count table journals without guidelines before table(withoutA$type,withoutA$validated) # count table journals without guidelines after #total, all articles before <- filter(results2, after_guidelines ==0) #filter results before guidelines after <- filter(results2, after_guidelines ==1) #filter results after guidelines prop.table(table(before$type,before$validated),1)#proportion table of all journals before introduction of guidelines prop.table(table(after$type,after$validated),1)#proportion table of all journals after introduction of guidelines table(before$type,before$validated) # count table all journals before table(after$type,after$validated) # count table all journals after #fisher exact test for difference in validation before and after guideline introduction ---- (JWithG_BA<- filter(results5, with_guidelines ==1)%>% #journals with guidelines group_by(with_guidelines,type) %>% do(tidy(fisher.test(.$after_guidelines, .$validated, alternative ="greater", conf.int=TRUE, conf.level=0.95)))) (JCNNat_BA<- filter(results, with_guidelines ==1) %>% #JCN and Nature group_by(Journal,type) %>% do(tidy(fisher.test(.$after_guidelines, .$validated, alternative ="greater", conf.int=TRUE, conf.level=0.95)))) (JWHG_BA<- filter(results3, with_guidelines ==0) %>% #journals without guidelines -> two sided test group_by(with_guidelines,type) %>% do(tidy(fisher.test(.$after_guidelines, .$validated, alternative ="two.sided", conf.int=TRUE, conf.level=0.95)))) (SciNeu_BA<- filter(results, with_guidelines ==0) %>% #Science and Neuroscience -> two sided test group_by(Journal,type) %>% do(tidy(fisher.test(.$after_guidelines, .$validated, alternative ="two.sided", conf.int=TRUE, conf.level=0.95)))) (all_BA<- results %>% #all journals -> two sided test group_by(type) %>% do(tidy(fisher.test(.$after_guidelines, .$validated, alternative ="two.sided", conf.int=TRUE, conf.level=0.95)))) #fisher exact test for difference in validation between similar journals with and without guidelines ---- (WWB<- filter(results4, after_guidelines ==0) %>% #journals with guidelines versus journals without guidelines, before group_by(type) %>% do(tidy(fisher.test(.$with_guidelines, .$validated, alternative ="two.sided", conf.int=TRUE, conf.level=0.95)))) (WWA<- filter(results6, after_guidelines ==1) %>% #journals with guidelines versus journals without guidelines, after group_by(type) %>% do(tidy(fisher.test(.$with_guidelines, .$validated, alternative ="greater", conf.int=TRUE, conf.level=0.95)))) (JCNNeu_WWB<- filter(results, (Journal == "JCN" | Journal=="Neuroscience") & after_guidelines == 0) %>% #JCN versus Neuroscience, before group_by(type) %>% do(tidy(fisher.test(.$with_guidelines, .$validated, alternative ="two.sided", conf.int=TRUE, conf.level=0.95)))) (JCNNeu_WWA<- filter(results, (Journal == "JCN" | Journal=="Neuroscience") & after_guidelines == 1) %>% #JCN versus Neuroscience, after group_by(type) %>% do(tidy(fisher.test(.$with_guidelines, .$validated, alternative ="greater", conf.int=TRUE, conf.level=0.95)))) (NatSci_WWB<- filter(results, (Journal == "Nature"|Journal=="Science")&after_guidelines==0) %>% #Nature versus Science, before group_by(type) %>% do(tidy(fisher.test(.$with_guidelines, .$validated, alternative ="two.sided", conf.int=TRUE, conf.level=0.95)))) (NatSci_WWA<- filter(results, (Journal == "Nature"|Journal=="Science")&after_guidelines==1) %>% #Nature versus Science, after group_by(type) %>% do(tidy(fisher.test(.$with_guidelines, .$validated, alternative ="greater", conf.int=TRUE, conf.level=0.95)))) #holm-bonferroni correction---- (total<- rbind(JCNNat_BA,JWithG_BA,SciNeu_BA,JWHG_BA,all_BA, WWB, WWA, JCNNeu_WWB, JCNNeu_WWA, NatSci_WWB, NatSci_WWA )) #collect all p values padjusted<-p.adjust(total$p.value, method="holm", n=length(total$p.value)) (pvalues<-data.frame(total$with_guidelines, total$Journal, total$type, total$alternative, total$p.value,padjusted, total$estimate, total$conf.low, total$conf.high)) #dataframe with all statistical tests, odds ratio's, p-values and adjusted p-values