--- title: "Consulting" date: "12 de abril de 2018" output: pdf_document: default html_document: default --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r} load("clean_data/Figure2/Figure2B.RData") L1b<-length(Figure2_Distances_cohorts1vs2_unweighted_cohort1) L2b<-length(Figure2_Distances_cohorts1vs2_unweighted_cohort2) UM1b<-as.data.frame(t(Figure2_Distances_cohorts1vs2_unweighted_cohort1)) UM1b$V2<-1 UM1b<-UM1b[2:L1b,] UM2b<-as.data.frame(t(Figure2_Distances_cohorts1vs2_unweighted_cohort2)) UM2b$V2<-2 UM2b<-UM2b[2:L2b,] UMb<-rbind(UM1b,UM2b) #UM is the unweighted matrix for cohort 1 and 2 combined colnames(UMb)<-c("Unweighted_dist","Cohort") L3b<-length(Figure2_Distances_cohorts1vs2_weighted_cohort1) L4b<-length(Figure2_Distances_cohorts1vs2_weighted_cohort2) WM3b<-as.data.frame(t(Figure2_Distances_cohorts1vs2_weighted_cohort1)) WM3b$V2<-1 WM3b<-WM3b[2:L3b,] WM4b<-as.data.frame(t(Figure2_Distances_cohorts1vs2_weighted_cohort2)) WM4b$V2<-2 WM4b<-WM4b[2:L4b,] WMb<-rbind(WM3b,WM4b) #WM is the weighted matrix for cohort 1 and 2 combined colnames(WMb)<-c("Weighted_dist","Cohort") load("clean_data/Figure2/Figure2D.RData") L1d<-length(Figure2_Distances_cohorts1vs3_unweighted_cohort1) L2d<-length(Figure2_Distances_cohorts1vs3_unweighted_cohort3) UM1d<-as.data.frame(t(Figure2_Distances_cohorts1vs3_unweighted_cohort1)) UM1d$V2<-1 UM1d<-UM1d[2:L1d,] UM2d<-as.data.frame(t(Figure2_Distances_cohorts1vs3_unweighted_cohort3)) UM2d$V2<-3 UM2d<-UM2d[2:L2d,] UMd<-rbind(UM1d,UM2d) #UM is the unweighted matrix for cohort 1 and 3 combined colnames(UMd)<-c("Unweighted_dist","Cohort") L3d<-length(Figure2_Distances_cohorts1vs3_weighted_cohort1) L4d<-length(Figure2_Distances_cohorts1vs3_weighted_cohort3) WM3d<-as.data.frame(t(Figure2_Distances_cohorts1vs3_weighted_cohort1)) WM3d$V2<-1 WM3d<-WM3d[2:L3d,] WM4d<-as.data.frame(t(Figure2_Distances_cohorts1vs3_weighted_cohort3)) WM4d$V2<-3 WM4d<-WM4d[2:L4d,] WMd<-rbind(WM3d,WM4d) #WM is the weighted matrix for cohort 1 and 2 combined colnames(WMd)<-c("Weighted_dist","Cohort") ``` ```{r fns} unpairedpermttest <- function(x, m1, m2, nperm, typecol, datacol, h0=0, alt="two.sided", vareq=TRUE, r=FALSE, p=NULL, utest=TRUE){ # x = Database, with columns as variables and rows as observations. # m1, m2 = Values to compare for the difference in means (i.e. cohort numbers). # nperm = Number of permutations to do. # Typecol = Column with the subsamples to compare. # Datacol = Column with the data. # h0 = H0 that is being tested. # alt = Type of alternative. vareq = Assume equal variances? Default is true. # r = With or without replacement?, # p = Resampling weights. # utest = If a t or u test should be done, default is TRUE, else a t test is done instead x <- as.data.frame(x) stats <- c() difmeans <- c() var1 <- c() var2 <- c() if (isTRUE(utest)) { for (i in 1:nperm) { perm <- sample(x=x[,typecol],size=nrow(x),replace=r,prob=p) x <- cbind.data.frame(x,perm) vec1 <- (as.numeric(x[which(x[,ncol(x)]==m1),datacol])) vec2 <- (as.numeric(x[which(x[,ncol(x)]==m2),datacol])) stats <- c(stats, wilcox.test(vec1,vec2,paired=FALSE,mu=h0,alternative=alt)$statistic) difmeans <- c(difmeans,mean(vec1)-mean(vec2)) var1 <- c(var1,var(vec1)) var2 <- c(var2,var(vec2)) } } else { for (i in 1:nperm) { perm <- sample(x=x[,typecol], size=nrow(x), replace=r, prob=p) #shuffling the labels x <- cbind.data.frame(x,perm) vec1 <- (as.numeric(x[which(x[,ncol(x)]==m1),datacol])) vec2 <- (as.numeric(x[which(x[,ncol(x)]==m2),datacol])) stats <- c(stats, t.test(vec1,vec2,paired=FALSE,mu=h0,alternative=alt,var.equal=vareq)$statistic) difmeans <- c(difmeans,mean(vec1)-mean(vec2)) var1 <- c(var1,var(vec1)) var2 <- c(var2,var(vec2)) } } return(list(stats = stats, difmeans = difmeans, var1 = var1, var2 = var2, x = x)) #Returns a list with the t or u stats, the mean differences, # group variances and a data frame with the specific permutations for each iteration. } ``` ```{r permtests_2b} auwb <- mean(as.numeric(as.vector(UMb[which(UMb[,2]==1),1]))) #average of cohort 1 buwb <- mean(as.numeric(as.vector(UMb[which(UMb[,2]==2),1]))) #average of cohort 2 obsmeanuwb<-auwb-buwb #Observed mean for the unweighted distances awb<-mean(as.numeric(as.vector(WMb[which(WMb[,2]==1),1]))) #average of cohort 1 bwb<-mean(as.numeric(as.vector(WMb[which(WMb[,2]==2),1]))) #average of cohort 2 obsmeanwb<-awb-bwb #Observed mean for the weighted distances #Permutation tests for unweighted distances UWperm1b<-unpairedpermttest(UMb,m1=1,m2=2,nperm=10000,typecol=2,datacol=1,r=FALSE) #Permutation test obsuuwb <- wilcox.test(as.numeric(as.vector(UMb[which(UMb[,2]==1),1])),as.numeric(as.vector(UMb[which(UMb[,2]==2),1])),alternative = "two.sided",mu=0,paired=FALSE)$statistic #u-test from the observed sample obsuuwb #Same for weighted distances Wperm1b<-unpairedpermttest(WMb,m1=1,m2=2,nperm=10000,typecol=2,datacol=1,r=FALSE) obsuwb<-wilcox.test(as.numeric(as.vector(WMb[which(WMb[,2]==1),1])),as.numeric(as.vector(WMb[which(WMb[,2]==2),1])),alternative = "two.sided",mu=0,paired=FALSE)$statistic obsuwb #Size - 2 sided test pvalUWperm1b <- length(UWperm1b[[1]][which(quantile(UWperm1b[[1]],probs=.975)<(obsuuwb) | quantile(UWperm1b[[1]],probs=.025)>(obsuuwb))])/length(UWperm1b[[1]]) pvalWperm1b<-length(Wperm1b[[1]][which(quantile(Wperm1b[[1]],probs=.975)<(obsuwb) | quantile(Wperm1b[[1]],probs=.025)>(obsuwb))])/length(Wperm1b[[1]]) pvalsb<-rbind.data.frame(pvalUWperm1b,pvalWperm1b) rownames(pvalsb)<-c("Unweighted","Weighted") colnames(pvalsb)<-c("p-values") pvalsb plot(density(UWperm1b[[1]]),main="Density, unweighted",sub=paste("Statistic: ",obsuuwb," - Green: 2.5% and 97.5% quantiles"), xlim = c(30000, 170000)) abline(v=obsuuwb,col="red") abline(v=quantile(UWperm1b[[1]],probs=.975),col="green") abline(v=quantile(UWperm1b[[1]],probs=.025),col="green") plot(density(Wperm1b[[1]]),main="Density, weighted",sub=paste("Statistic: ",obsuwb," - Green: 2.5% and 97.5% quantiles"), xlim=c(60000, 160000)) abline(v=obsuwb,col="red") abline(v=quantile(Wperm1b[[1]],probs=.975),col="green") abline(v=quantile(Wperm1b[[1]],probs=.025),col="green") ``` ```{r permtests_2d} auwd<-mean(as.numeric(as.vector(UMd[which(UMd[,2]==1),1]))) #average of cohort 1 buwd<-mean(as.numeric(as.vector(UMd[which(UMd[,2]==3),1]))) #average of cohort 3 obsmeanuwd<-auwd-buwd #Observed mean for the unweighted distances awd<-mean(as.numeric(as.vector(WMd[which(WMd[,2]==1),1]))) #average of cohort 1 bwd<-mean(as.numeric(as.vector(WMd[which(WMd[,2]==3),1]))) #average of cohort 3 obsmeanwd<-awd-bwd #Observed mean for the weighted distances #Permutation tests for unweighted distances UWperm1d<-unpairedpermttest(UMd,m1=1,m2=3,nperm=10000,typecol=2,datacol=1,r=FALSE) #Permutation test without replacement obsuuwd<-wilcox.test(as.numeric(as.vector(UMd[which(UMd[,2]==1),1])),as.numeric(as.vector(UMd[which(UMd[,2]==3),1])),alternative = "two.sided",mu=0,paired=FALSE)$statistic #u-test from the observed sample obsuuwd #Same for weighted distances Wperm1d<-unpairedpermttest(WMd,m1=1,m2=3,nperm=10000,typecol=2,datacol=1,r=FALSE) obsuwd<-wilcox.test(as.numeric(as.vector(WMd[which(WMd[,2]==1),1])),as.numeric(as.vector(WMd[which(WMd[,2]==3),1])),alternative = "two.sided",mu=0,paired=FALSE)$statistic obsuwd #Size pvalUWperm1d<-length(UWperm1d[[1]][which(quantile(UWperm1d[[1]],probs=.975)<(obsuuwd) | quantile(UWperm1d[[1]],probs=.025)>(obsuuwd))])/length(UWperm1d[[1]]) pvalWperm1d<-length(Wperm1d[[1]][which(quantile(Wperm1d[[1]],probs=.975)<(obsuwd) | quantile(Wperm1d[[1]],probs=.025)>(obsuwd))])/length(Wperm1d[[1]]) pvalsd<-rbind.data.frame(pvalUWperm1d,pvalWperm1d) rownames(pvalsd)<-c("Unweighted","Weighted") colnames(pvalsd)<-c("p-values") pvalsd plot(density(UWperm1d[[1]]),main="Density, unweighted",sub=paste("Statistic: ",obsuuwd," - Green: 2.5% and 97.5% quantiles"), xlim=c(80000, 550000)) abline(v=obsuuwd,col="red") abline(v=quantile(UWperm1d[[1]],probs=.975),col="green") abline(v=quantile(UWperm1d[[1]],probs=.025),col="green") plot(density(Wperm1d[[1]]),main="Density, weighted",sub=paste("Statistic: ",obsuwd," - Green: 2.5% and 97.5% quantiles"), xlim=c(250000, 550000)) abline(v=obsuwd,col="red") abline(v=quantile(Wperm1d[[1]],probs=.975),col="green") abline(v=quantile(Wperm1d[[1]],probs=.025),col="green") ```