# Supplemental Information: R code for gapfilling algorithm, with an example applied to runoff in the Hubbard Brook record. data1<-read.csv("HBEF_flow.csv",header = TRUE) ##Read in datasheet with 3 coloumns: Date, Measured flow in L/s, and Predicted flow in L/s #1L=0.035ft^3 watershed6=1423976.4ft^2 300sec/5min 12inches/ft 25.4mm/inch data1$Actual_mm_5min<-data1$Actual_Q_L_s*0.0353147*(1/1423976.4)*300*12*25.4 #convert flow rate to watershed mm per 5 minutes data1$Predicted_mm_5min<-data1$Predicted_Q_L_s*0.0353147*(1/1423976.4)*300*12*25.4#convert flow rate to watershed mm per 5 minutes date1 = as.POSIXct(data1[,1],origin = "1970-01-01",tz="EST") # convert the numeric date into a date object date1 = strptime(date1,format="%Y-%m-%d %H:%M:%S",tz="EST") date1. = as.numeric(date1) ######################################### # CREATE A FUNCTION FOR MAKING FAKE GAPS# ######################################### #"gaplength" = the duration of the gap to simulate (i.e. how many 5 minute blocks to include) #"iterations"= how many fake gaps to simulate #"data"= the name of the dataframe to use (dataframe must have 3 columns: timestamp, observed Q (l/s), and modeled Q (l/s), observed q (mm), modeled q (mm)) mdprdct <- function(gaplength, iterations, data){ output<-matrix(ncol=6,nrow=iterations) for (i in 1:iterations) { sample((gaplength+3527):(nrow(data)-(gaplength+3527)), 1)->a # identify the random beginning of the simulated gap diffy1 <- data[a-1,4]-data[a,5] diffy2 <- data[a+gaplength+1,4]-data[a+gaplength,5] diffy <- c(diffy1, diffy2) diffx <- c(data[a,1], data[a+gaplength,1]) mm <- approx(diffx, diffy, data[a:(a+gaplength),1]) gapobserved <- data[a:(a+gaplength),4] #grab real flux observations for W6 during the fake gap gappredicted <- data[a:(a+gaplength),5]+mm$y #grab modeled (predicted) flux observations for W6 during the fake gap gapflowrate <- data[a:(a+gaplength),2] #grab flow rate (for averaging in figure 3) output[i,1] <- sum(gapobserved, na.rm=T) # sum the actual total mm in W6 during the gap output[i,2] <- sum(gappredicted, na.rm=T) # sum the predicted total mm in W6 during the gap output[i,3] <- max(gapobserved, na.rm=T) # calculate the max runoff in mm/5 min in W6 during the gap output[i,4] <- mean(gapobserved, na.rm=T) # calculate the mean runoff in mm/5 min in W6 during the gap output[i,5] <- output[i,2]-output[i,1] #calculate error in total flux during gap (predicted -observed, so that overestimations are positive?) output[i,6] <- mean(gapflowrate, na.rm=T) # calculate the mean flow rate l/s in W6 during the gap } colnames(output) <- c("W6 Real"," W6 Predicted", "Max W6", "Mean W6", "Error", "avgFlow_l_s") return(output) } ####################################################################################### #APPLY THE NEW FUNCTION TO QUANTIFY UNCERTAINTY IN RUNOFF DUE TO GAPS AT HUBBARD BROOK# ####################################################################################### w6gaps = read.csv("realHBgapsW6_JLC.csv") # read in spreadsheet of real gaps in the watershed 6 record w6gaps$begin = strptime(paste(w6gaps[,1], sprintf("%04d",w6gaps[,2])), format="%m/%d/%Y %H %M", tz="EST") # make the beginning of the gap a date object w6gaps$end = strptime(paste(w6gaps[,3], sprintf("%04d",w6gaps[,4])), format="%m/%d/%Y %H %M",tz="EST") # make the end of the gap a date object nogaps=data1 for(i in 1:length(w6gaps[,1])){ nogaps=subset(nogaps,Dateas.POSIXlt(w6gaps[i,8])) } #remove the gaps nogaps=subset(nogaps,Date>820479600) nogaps=subset(nogaps,Date<1262329200) withgaps=subset(data1,Date>820479600) withgaps=subset(withgaps,Date<1262329200) #Subset and quantify uncertiany in the gaps of interest w6gaps.partial = subset(w6gaps, w6gaps$Cause=="TechErr") # subset by gap cause (currently technician error) begin = strptime(paste(w6gaps.partial[,1], sprintf("%04d",w6gaps.partial[,2])), format="%m/%d/%Y %H %M",tz="EST") # make the beginning of the gap a date object end = strptime(paste(w6gaps.partial[,3], sprintf("%04d",w6gaps.partial[,4])), format="%m/%d/%Y %H %M",tz="EST") # make the end of the gap a date object duration = as.numeric(end) - as.numeric(begin) # calculate the gap duration, in seconds nsteps = as.numeric(duration/300-1) # number of 5-min intervals in the gap #calculate Runoff During the Gaps...(currently using predictions from ensemble regression model at HBEF) ROs = matrix(NA, nrow(w6gaps.partial), 1) for(i in 1:nrow(w6gaps.partial)){ ROs[i] = sum(subset(data1$Predicted_mm_5min, date1.>=as.numeric(begin[i]) & date1.<=as.numeric(end[i]))) } q = matrix(NA, length(w6gaps.partial[,1]), 1) # make a matrix to hold the runoff info during the W6 gap gapAvs <- matrix(NA, length(w6gaps.partial[,1]), 1) # make a matrix to hold the average flowrate info during the W6 gap for(i in 1:length(q)){ # loop through the gaps dd = subset(data1, date1. >= as.numeric(begin[i]) & date1. <= as.numeric(end[i])) # get the W6 runoff data from the period of the gap q[i] = sum(dd[,5], na.rm=TRUE) # calculate the predicted runoff in W6 during the gap gapAvs[i] <- mean(dd[,5], na.rm=TRUE) #mean PREDICTED flow rate q (mm/5min) for each of the W6 gaps } # now estimate the uncertainty due to gaps for the annual runoff dt = strptime(paste(w6gaps.partial[,3], sprintf("%04d",w6gaps.partial[,4])), format="%m/%d/%Y %H %M",tz="EST") # remake the date object for the gaps list n = 10000 # set the number of iterations (x10) best.errors = matrix(NA,ncol=as.integer(n/10),nrow = length(w6gaps.partial[,1])) for (i in 1:length(q)){ iterations = as.matrix(mdprdct(nsteps[i],n,data1)) iterations <- iterations[order(abs(ROs[i]-iterations[,1])),] iterations <- head(iterations,(n/10)) best<-(iterations[,5]) best.errors[i,]= best print(i) } # this loop determines the water year for each gap w.yr = matrix(0, 1,length(q)) for(i in 1:length(q)){ w.yr[i] = dt$year[i]+1900 if(dt$mon[i]+1<6){w.yr[i]=dt$year[i]+1899} } # this loop sums the random samples for each gap within a water year, so that we have a population of total errors for that water year gap.uncert = matrix(NA,nrow=length(c(1996:2009)), ncol=(as.integer(n/10))) yr = c(1996:2009) for(i in 1:length(yr)){ ww = subset(best.errors, w.yr==yr[i]) for(j in 1:(as.integer(n/10))){ gap.uncert[i,j] = sum(ww[,j]) } } #Calculate 95% confidence interval based on gap.uncert annquantiles=matrix(NA,nrow = 14,ncol=2) for (i in 1:14){ annquantiles[i,1]=quantile(gap.uncert[i,],0.025) annquantiles[i,2]=quantile(gap.uncert[i,],0.975) }