# Script for 'Detecting telomere elongation in longitudinal datasets: Analysis of a proposal by Simons, Stulp and Nakagawa' # Needs packages 'tidyr', and 'ggplot2'if you want to plot figure 1 ###Function definitions##### generate.true.attrition <- function( # This function tells you the proportion of true lengtheners for a specified set of parameter values max.years = 15, # the follow up interval baselineTL.mean = 7000, # the mean of the TL distribution at baseline baselineTL.sd = 700, # the sd of the TL distribution at baseline attrition.mean = 30, # mean annual attrition in bp attrition.sd = 50, # sd of annual attrition in bp error="sd", # whether the measurement error should be applied as a CV (proportional to TL) or a constant SD error.rate = 140, # Either the SDe, or a coefficient of variation expressed as a proportion not percentage, e.g. 0.02 n = 10000, # the number of individuals in the simulation r = 0 # correlation between attrition in successive years ) { require(tidyr) # generate the starting population of individuals: an array of n rows x max.years columns+1 (for the baseline) telomere.lengths = array(, dim = c(n, max.years+1)) # make an empty array for the TL data telomere.lengths[,1] = rnorm(n,baselineTL.mean,baselineTL.sd) # fill the first column with baseline values of TL # generate the TL values at yearly follow up points # calculate annual attrition # in the first year this is just drawn from the specified distribution attrition = rnorm(n,attrition.mean,attrition.sd) annual.attrition = attrition for(i in 2:(max.years+1)) { telomere.lengths[,i] = telomere.lengths[,i-1] - annual.attrition # Now calculate the annual attrition for the next year (this is equation 1 in the paper) if (r<1) {annual.attrition = r*attrition + sqrt(1-r^2)*rnorm(n,mean=attrition.mean*(1-r)/sqrt(1-r^2),sd=attrition.sd) } } # calculate true annual attrition from baseline: positive values = attrition; negative values = growth true.attrition = array(,dim = c(n,max.years)) # make an empty array for the attrition measures true.attrition = telomere.lengths[,1] - telomere.lengths[,2:(max.years+1)] true.elongation.annual=sum(true.attrition[ ,1] < 0)/length(true.attrition[ , 1]) true.elongation.overall=sum(true.attrition[ , max.years]<0)/length(true.attrition[ , max.years]) # measure TL with error measured.TL = array(, dim = c(n, max.years+1)) # make an empty array for the TL measurement data if (error=="cv"){ for (i in 1:n){ for (j in 1:(max.years+1)){ measured.TL[i,j] = rnorm(1,mean=telomere.lengths[i,j],sd=abs(telomere.lengths[i,j]*error.rate)) # abs stops negative sds } } } if (error=="sd"){ for (i in 1:n){ for (j in 1:(max.years+1)){ measured.TL[i,j] = rnorm(1,mean=telomere.lengths[i,j],error.rate) } } } return(true.elongation.overall) } generate.TL.data = function( # This function generates the data for each simulated dataset max.years = 15, # the maximum follow up interval baselineTL.mean = 7000, # the mean of the TL distribution at baseline baselineTL.sd = 700, # the sd of the TL distribution at baseline attrition.mean = 30, # mean annual attrition in bp attrition.sd = 50, # sd of annual attrition in bp error="sd", # Whether the measurement error is to be implemented as a CV (proportional to TL), or a constant SD error.rate = 140, # Either the SDe, or a coefficient of variation expressed as a proportion not percentage, e.g. 0.02 n = 10000, # the number of individuals in the simulation r = 0 # correlation between attrition in successive years ) { require(tidyr) # generate the starting population of individuals: an array of n rows x max.years columns+1 (for the baseline) telomere.lengths = array(, dim = c(n, max.years+1)) # make an empty array for the TL data telomere.lengths[,1] = rnorm(n,baselineTL.mean,baselineTL.sd) # fill the first column with baseline values of TL # generate the TL values at yearly follow up points # calculate annual attrition # in the first year this is just drawn from the specified distribution attrition = rnorm(n,attrition.mean,attrition.sd) annual.attrition = attrition for(i in 2:(max.years+1)) { telomere.lengths[,i] = telomere.lengths[,i-1] - annual.attrition # Now calculate the annual attrition for the next year (this is equation 1 in the Bateson and Nettle paper) if (r<1) {annual.attrition = r*attrition + sqrt(1-r^2)*rnorm(n,mean=attrition.mean*(1-r)/sqrt(1-r^2),sd=attrition.sd) } } # calculate true annual attrition from baseline: positive values = attrition; negative values = growth true.attrition = array(,dim = c(n,max.years)) # make an empty array for the attrition measures true.attrition = telomere.lengths[,1] - telomere.lengths[,2:(max.years+1)] true.elongation.annual=sum(true.attrition[ ,1] < 0)/length(true.attrition[ , 1]) true.elongation.overall=sum(true.attrition[ , max.years]<0)/length(true.attrition[ , max.years]) # measure TL with a defined CV or a constant SD equal to 7000 * CV # note that CV = sd/mean therefore sd = CV*mean measured.TL = array(, dim = c(n, max.years+1)) # make an empty array for the TL measurement data if (error=="cv"){ for (i in 1:n){ for (j in 1:(max.years+1)){ measured.TL[i,j] = rnorm(1,mean=telomere.lengths[i,j],sd=abs(telomere.lengths[i,j]*error.rate)) # abs stops negative sds } } } if (error=="sd"){ for (i in 1:n){ for (j in 1:(max.years+1)){ measured.TL[i,j] = rnorm(1,mean=telomere.lengths[i,j],sd=error.rate) } } } return(as.data.frame(measured.TL)) } simons = function(data=data) { # This function applies the Simons et al statistic to a data frame, returning the F and p-value # NB This is based on Mirre Simons' code published with his paper # Estimating error using individual regressions, Equation 4 in main manuscript matrixresi<-matrix(0,dim(data)[1],1) # a matrix to put the residuals of the individual regressions in in x=1:dim(data)[2] #number of columns is the number of timepoints j=1 while(j<(dim(data)[1]+1)) #loop the individual regressions for the amount of individuals in the dataset { fit<-lm(t(data[j,])~x) #individual linear regression matrixresi[j,]<-sum((residuals(fit))^2)/(length(x)-2) #residual sum of squares j<-j+1 } sigma1<-mean(c(matrixresi)) #average residual sum of squares across the individuals #Estimating error under the assumption that TL cannot increase over time, Equation 5 in main manuscript #first we calculate the increases TL increases over time (between first(1) and last(3) timepoint) deltaTL=data[,3]-data[,1] #Next we determine which individuals increase in TL between the first and last timepoint indexTLincreases=which(deltaTL>0) #We create a new variable including only the data of individual increases TLincreases=deltaTL[indexTLincreases] #sigma2 sigma2=0.5*sum(TLincreases^2)/(length(TLincreases)) #compare both estimates of error variance (sigma1 and sigma2), Equation 6 in main manuscript vratio=sigma2/sigma1 pvalue<-pf(vratio,length(TLincreases)-1,dim(data)[1]-1,lower.tail=F) return(c(vratio, pvalue)) } simons.replicate<-function(reps=100, attrition.sd = 50, r = 0, error.rate = 140, max.years=2) { # This function runs simons function repeatedly for different parameter values and returns the vector of p values ps=NA for (i in 1:reps) { print(paste("replicate ", i)) data = generate.TL.data(max.years = max.years, long.thin = FALSE, stress.attrition.sd = stress.attrition.sd, CV = CV, r = r) ps[i]=simons(data)[2] } print(sum(ps<0.05)/length(ps)) return(ps) } run.sims<-function(reps=100, attrition.sd=50, error="sd", error.rate=140, n=10000){ # This function runs the whole set of simulations for one column of figure 1 rs=NA true.elongation=NA years=NA simons.sig=NA ps=NA current.true.elongations=NA for (current.r in c(1, 0.5, 0)){ print(paste("r = ", current.r)) for (current.years in 2:11) { print(paste("Years = ", current.years)) rs=append(rs, current.r) years=append(years,current.years) # Calculate the proportion of true elongation at that parameter combination for (i in 1:reps) {current.true.elongations[i]=generate.true.attrition(max.years=current.years, attrition.sd=attrition.sd, error.rate=error.rate, r=current.r, n=n, error=error)} true.elongation=append(true.elongation, mean(current.true.elongations)) print(paste("Proportion of true lengtheners: ", mean(current.true.elongations))) for (i in 1:reps) { data = generate.TL.data(max.years = current.years, attrition.sd = attrition.sd, error.rate=error.rate, r=current.r, n=n, error=error) ps[i]=simons(data)[2]} sig.prop=sum(ps<0.05)/length(ps) print(paste("Simons' test significant proportion: ", sig.prop)) simons.sig=append(simons.sig, sig.prop) }} d=data.frame(rs, years, true.elongation, simons.sig) d=d[-1, ] write.csv(d, file=paste("simons.simulations.data.",error, error.rate,".csv")) return(d) } ##### Replication code###### # Caution: these lines take a long time to run! (about 8 hours for each run.sims statement on my ordinary desktop PC) # They save their output so you can leave them running run.sims(reps=100, n=10000, error="sd", error.rate=0) run.sims(reps=100, n=10000, error="sd", error.rate=140) run.sims(reps=100, n=10000, error="sd", error.rate=560) # Making figure 1 library(ggplot2) d0=read.csv("simons.simulations.data. sd 0 .csv") d2=read.csv("simons.simulations.data. sd 140 .csv") d8=read.csv("simons.simulations.data. sd 560 .csv") d0$rtext[d2$rs==1]="r = 1" d0$rtext[d2$rs==0.5] = "r = 0.5" d0$rtext[d2$rs==0] = "r = 0" d2$rtext[d2$rs==1]="r = 1" d2$rtext[d2$rs==0.5] = "r = 0.5" d2$rtext[d2$rs==0] = "r = 0" d8$rtext="r = 1" d8$rtext[d8$rs==0.5] = "r = 0.5" d8$rtext[d8$rs==0] = "r = 0" d0$cv=rep("SDe = 0", times=length(d0$rtext)) d2$cv=rep("SDe = 140", times=length(d2$rtext)) d8$cv=rep("SDe = 560", times=length(d8$rtext)) d=rbind(d0, d2, d8) d$fill<-"no" d$fill[d$true.elongation>=0.05]<-"yes" figsd=ggplot(d, aes(x=years, y=true.elongation)) + coord_cartesian(ylim=c(0,1)) + geom_ribbon(aes(ymax=1, ymin=0, fill=fill)) + scale_fill_manual(values=c("white", "grey80")) + geom_line() + geom_point(aes(y=simons.sig), colour="red") + geom_line(aes(y=simons.sig), colour="red", linetype=2) + guides(fill=FALSE) + theme_bw() + theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) + ylab("Proportion") + xlab("Years of follow-up") + coord_cartesian(ylim=c(0, 1.05)) + facet_grid(rtext ~ cv) figsd png("figure1.png", res=300, units="in", width=8, height=8) figsd dev.off()