---
title: "Consulting"
date: "12 de abril de 2018"
output:
  pdf_document: default
  html_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r}
load("clean_data/Figure2/Figure2B.RData")
L1b<-length(Figure2_Distances_cohorts1vs2_unweighted_cohort1)
L2b<-length(Figure2_Distances_cohorts1vs2_unweighted_cohort2)
UM1b<-as.data.frame(t(Figure2_Distances_cohorts1vs2_unweighted_cohort1))
UM1b$V2<-1
UM1b<-UM1b[2:L1b,]
UM2b<-as.data.frame(t(Figure2_Distances_cohorts1vs2_unweighted_cohort2))
UM2b$V2<-2
UM2b<-UM2b[2:L2b,]
UMb<-rbind(UM1b,UM2b) #UM is the unweighted matrix for cohort 1 and 2 combined
colnames(UMb)<-c("Unweighted_dist","Cohort")
L3b<-length(Figure2_Distances_cohorts1vs2_weighted_cohort1)
L4b<-length(Figure2_Distances_cohorts1vs2_weighted_cohort2)
WM3b<-as.data.frame(t(Figure2_Distances_cohorts1vs2_weighted_cohort1))
WM3b$V2<-1
WM3b<-WM3b[2:L3b,]
WM4b<-as.data.frame(t(Figure2_Distances_cohorts1vs2_weighted_cohort2))
WM4b$V2<-2
WM4b<-WM4b[2:L4b,]
WMb<-rbind(WM3b,WM4b) #WM is the weighted matrix for cohort 1 and 2 combined
colnames(WMb)<-c("Weighted_dist","Cohort")

load("clean_data/Figure2/Figure2D.RData")
L1d<-length(Figure2_Distances_cohorts1vs3_unweighted_cohort1)
L2d<-length(Figure2_Distances_cohorts1vs3_unweighted_cohort3)
UM1d<-as.data.frame(t(Figure2_Distances_cohorts1vs3_unweighted_cohort1))
UM1d$V2<-1
UM1d<-UM1d[2:L1d,]
UM2d<-as.data.frame(t(Figure2_Distances_cohorts1vs3_unweighted_cohort3))
UM2d$V2<-3
UM2d<-UM2d[2:L2d,]
UMd<-rbind(UM1d,UM2d) #UM is the unweighted matrix for cohort 1 and 3 combined
colnames(UMd)<-c("Unweighted_dist","Cohort")
L3d<-length(Figure2_Distances_cohorts1vs3_weighted_cohort1)
L4d<-length(Figure2_Distances_cohorts1vs3_weighted_cohort3)
WM3d<-as.data.frame(t(Figure2_Distances_cohorts1vs3_weighted_cohort1))
WM3d$V2<-1
WM3d<-WM3d[2:L3d,]
WM4d<-as.data.frame(t(Figure2_Distances_cohorts1vs3_weighted_cohort3))
WM4d$V2<-3
WM4d<-WM4d[2:L4d,]
WMd<-rbind(WM3d,WM4d) #WM is the weighted matrix for cohort 1 and 2 combined
colnames(WMd)<-c("Weighted_dist","Cohort")
```

```{r fns}
unpairedpermttest <- function(x, m1, m2, nperm, typecol, datacol, h0=0, alt="two.sided", vareq=TRUE, r=FALSE, p=NULL, utest=TRUE){ 
  # x = Database, with columns as variables and rows as observations. 
  # m1, m2 = Values to compare for the difference in means (i.e. cohort numbers). 
  # nperm = Number of permutations to do. 
  # Typecol = Column with the subsamples to compare. 
  # Datacol = Column with the data. 
  # h0 = H0 that is being tested. 
  # alt = Type of alternative. vareq = Assume equal variances? Default is true. 
  # r = With or without replacement?, 
  # p = Resampling weights. 
  # utest = If a t or u test should be done, default is TRUE, else a t test is done instead
  x <- as.data.frame(x)
  stats <- c()
  difmeans <- c()
  var1 <- c()
  var2 <- c()
  if (isTRUE(utest)) {
    for (i in 1:nperm) {
      perm <- sample(x=x[,typecol],size=nrow(x),replace=r,prob=p)
      x <- cbind.data.frame(x,perm)
      vec1 <- (as.numeric(x[which(x[,ncol(x)]==m1),datacol]))
      vec2 <- (as.numeric(x[which(x[,ncol(x)]==m2),datacol]))
      stats <- c(stats, wilcox.test(vec1,vec2,paired=FALSE,mu=h0,alternative=alt)$statistic)
      difmeans <- c(difmeans,mean(vec1)-mean(vec2))
      var1 <- c(var1,var(vec1))
      var2 <- c(var2,var(vec2))
    }
  } else {
    for (i in 1:nperm) {
      perm <- sample(x=x[,typecol], size=nrow(x), replace=r, prob=p) #shuffling the labels
      x <- cbind.data.frame(x,perm)
      vec1 <- (as.numeric(x[which(x[,ncol(x)]==m1),datacol]))
      vec2 <- (as.numeric(x[which(x[,ncol(x)]==m2),datacol]))
      stats <- c(stats, t.test(vec1,vec2,paired=FALSE,mu=h0,alternative=alt,var.equal=vareq)$statistic)
      difmeans <- c(difmeans,mean(vec1)-mean(vec2))
      var1 <- c(var1,var(vec1))
      var2 <- c(var2,var(vec2))
    }
  }
  return(list(stats = stats, difmeans = difmeans, var1 = var1, var2 = var2, x = x)) 
  #Returns a list with the t or u stats, the mean differences, 
  # group variances and a data frame with the specific permutations for each iteration.
}
```

```{r permtests_2b}
auwb <- mean(as.numeric(as.vector(UMb[which(UMb[,2]==1),1]))) #average of cohort 1
buwb <- mean(as.numeric(as.vector(UMb[which(UMb[,2]==2),1]))) #average of cohort 2
obsmeanuwb<-auwb-buwb #Observed mean for the unweighted distances
awb<-mean(as.numeric(as.vector(WMb[which(WMb[,2]==1),1]))) #average of cohort 1
bwb<-mean(as.numeric(as.vector(WMb[which(WMb[,2]==2),1]))) #average of cohort 2
obsmeanwb<-awb-bwb #Observed mean for the weighted distances

#Permutation tests for unweighted distances
UWperm1b<-unpairedpermttest(UMb,m1=1,m2=2,nperm=10000,typecol=2,datacol=1,r=FALSE) #Permutation test
obsuuwb <- wilcox.test(as.numeric(as.vector(UMb[which(UMb[,2]==1),1])),as.numeric(as.vector(UMb[which(UMb[,2]==2),1])),alternative = "two.sided",mu=0,paired=FALSE)$statistic #u-test from the observed sample 
obsuuwb
#Same for weighted distances
Wperm1b<-unpairedpermttest(WMb,m1=1,m2=2,nperm=10000,typecol=2,datacol=1,r=FALSE)
obsuwb<-wilcox.test(as.numeric(as.vector(WMb[which(WMb[,2]==1),1])),as.numeric(as.vector(WMb[which(WMb[,2]==2),1])),alternative = "two.sided",mu=0,paired=FALSE)$statistic
obsuwb
#Size - 2 sided test
pvalUWperm1b <- length(UWperm1b[[1]][which(quantile(UWperm1b[[1]],probs=.975)<(obsuuwb) | quantile(UWperm1b[[1]],probs=.025)>(obsuuwb))])/length(UWperm1b[[1]])
pvalWperm1b<-length(Wperm1b[[1]][which(quantile(Wperm1b[[1]],probs=.975)<(obsuwb) | quantile(Wperm1b[[1]],probs=.025)>(obsuwb))])/length(Wperm1b[[1]])
pvalsb<-rbind.data.frame(pvalUWperm1b,pvalWperm1b)
rownames(pvalsb)<-c("Unweighted","Weighted")
colnames(pvalsb)<-c("p-values")
pvalsb
plot(density(UWperm1b[[1]]),main="Density, unweighted",sub=paste("Statistic: ",obsuuwb," - Green: 2.5% and 97.5% quantiles"), xlim = c(30000, 170000))
abline(v=obsuuwb,col="red")
abline(v=quantile(UWperm1b[[1]],probs=.975),col="green")
abline(v=quantile(UWperm1b[[1]],probs=.025),col="green")
plot(density(Wperm1b[[1]]),main="Density, weighted",sub=paste("Statistic: ",obsuwb," - Green: 2.5% and 97.5% quantiles"), xlim=c(60000, 160000))
abline(v=obsuwb,col="red")
abline(v=quantile(Wperm1b[[1]],probs=.975),col="green")
abline(v=quantile(Wperm1b[[1]],probs=.025),col="green")
```

```{r permtests_2d}
auwd<-mean(as.numeric(as.vector(UMd[which(UMd[,2]==1),1]))) #average of cohort 1
buwd<-mean(as.numeric(as.vector(UMd[which(UMd[,2]==3),1]))) #average of cohort 3
obsmeanuwd<-auwd-buwd #Observed mean for the unweighted distances
awd<-mean(as.numeric(as.vector(WMd[which(WMd[,2]==1),1]))) #average of cohort 1
bwd<-mean(as.numeric(as.vector(WMd[which(WMd[,2]==3),1]))) #average of cohort 3
obsmeanwd<-awd-bwd #Observed mean for the weighted distances
#Permutation tests for unweighted distances
UWperm1d<-unpairedpermttest(UMd,m1=1,m2=3,nperm=10000,typecol=2,datacol=1,r=FALSE) #Permutation test without replacement
obsuuwd<-wilcox.test(as.numeric(as.vector(UMd[which(UMd[,2]==1),1])),as.numeric(as.vector(UMd[which(UMd[,2]==3),1])),alternative = "two.sided",mu=0,paired=FALSE)$statistic #u-test from the observed sample 
obsuuwd
#Same for weighted distances
Wperm1d<-unpairedpermttest(WMd,m1=1,m2=3,nperm=10000,typecol=2,datacol=1,r=FALSE)
obsuwd<-wilcox.test(as.numeric(as.vector(WMd[which(WMd[,2]==1),1])),as.numeric(as.vector(WMd[which(WMd[,2]==3),1])),alternative = "two.sided",mu=0,paired=FALSE)$statistic
obsuwd
#Size
pvalUWperm1d<-length(UWperm1d[[1]][which(quantile(UWperm1d[[1]],probs=.975)<(obsuuwd) | quantile(UWperm1d[[1]],probs=.025)>(obsuuwd))])/length(UWperm1d[[1]])
pvalWperm1d<-length(Wperm1d[[1]][which(quantile(Wperm1d[[1]],probs=.975)<(obsuwd) | quantile(Wperm1d[[1]],probs=.025)>(obsuwd))])/length(Wperm1d[[1]])
pvalsd<-rbind.data.frame(pvalUWperm1d,pvalWperm1d)
rownames(pvalsd)<-c("Unweighted","Weighted")
colnames(pvalsd)<-c("p-values")
pvalsd
plot(density(UWperm1d[[1]]),main="Density, unweighted",sub=paste("Statistic: ",obsuuwd," - Green: 2.5% and 97.5% quantiles"), xlim=c(80000, 550000))
abline(v=obsuuwd,col="red")
abline(v=quantile(UWperm1d[[1]],probs=.975),col="green")
abline(v=quantile(UWperm1d[[1]],probs=.025),col="green")
plot(density(Wperm1d[[1]]),main="Density, weighted",sub=paste("Statistic: ",obsuwd," - Green: 2.5% and 97.5% quantiles"), xlim=c(250000, 550000))
abline(v=obsuwd,col="red")
abline(v=quantile(Wperm1d[[1]],probs=.975),col="green")
abline(v=quantile(Wperm1d[[1]],probs=.025),col="green")
```