################################################################################## # R code to construct SCIs for all possible ratios of variances of several ZILN # based on Bayesian GPQ and PB approaches with # Daily precipitation Datasets ########################################S########################################## ################################################################################## ################################################################################## # Three sample groups # k = 3 ################################################################################## ################################################################################## #Simulation runs M = 5000 m = 2500 # Function for generating random sample drawn from delta-lognormal distributions with k-th populations library(EnvStats) xGen = function(k,nTotal,Ex_LN,cv_LN,deltaTotal){ sum_nonzero_value2 = rep(0,k) mu_hat = rep(0,k) sigmaSq_hat= rep(0,k) delta_hatnon= rep(0,k) size_non = rep(0,k) size_zero = rep(0,k) xlist = list() for (l in 1:k){ #print(l) while(TRUE){ x = rzmlnormAlt(nTotal[l], Ex_LN[l], cv_LN[l], deltaTotal[l]) xlist[l] = list(x) Temp.nonzero_value = log(x[which(x!=0)]) Temp.size_nonzero =length(Temp.nonzero_value) if(Temp.size_nonzero < (nTotal[l]-nTotal[l]*deltaTotal[l])){ next } nonzero_value = Temp.nonzero_value size_nonzero = Temp.size_nonzero sum_nonzero_value2[l] = sum(nonzero_value^2) mu_hat[l] = mean(nonzero_value) sigmaSq_hat[l] = var(nonzero_value) delta_hatnon[l] = size_nonzero/nTotal[l] size_non[l] = size_nonzero size_zero[l] = nTotal[l]-size_nonzero break } } return(list("data" = cbind(size_non, size_zero, mu_hat, sigmaSq_hat, delta_hatnon, sum_nonzero_value2),"xlist"=xlist)) } # Parameter estimations writeDataToFile=function(x,k,rep,currentFolderPath){ datatable = t(data.frame(x)) filename = paste(currentFolderPath,'/D',k,'_xlist.csv',sep="") write.table( datatable, filename , append= T, sep=',',col.names=FALSE,row.names = FALSE ) } createDatasource = function(nCase,sCase,dCase){ ###############################Delta Varied######################################## folderSourceName = paste("n",nCase,"s",sCase,"d",dCase, sep = "") print(folderSourceName) ################################################################################### #Create folder dir.create(file.path("./DataSource/"), showWarnings = FALSE) ## set current folder to keep all data ./Datasource/Data_m_2500_n1_100n2_100....... currentFolderPath = paste("./DataSource/",folderSourceName,sep="") dir.create(file.path(currentFolderPath),showWarnings = FALSE) return (currentFolderPath) } ntotal_allcase = matrix(c( 30, 30, 30, 50, 50, 50, 100, 100, 100, 30, 50, 100, 50, 100, 200, 100, 100, 200), ncol = 3,byrow = TRUE) sigmaSq_allcase = matrix(c(3, 5, 7), ncol = 3,byrow = TRUE) delta_allcase = matrix(c( 0.1, 0.2, 0.3, 0.1, 0.3, 0.5, 0.3, 0.5, 0.5), ncol = 3,byrow = TRUE) for (id_ntotal_alcase in seq(1:nrow(ntotal_allcase))){ config_ntotal =ntotal_allcase[id_ntotal_alcase,] for (id_sigmaSq_allcase in seq(1:nrow(sigmaSq_allcase))){ config_sigmaSq =sigmaSq_allcase[id_sigmaSq_allcase,] for (id_delta_allcase in seq(1:nrow(delta_allcase))){ config_delta =delta_allcase[id_delta_allcase,] currentFolderPath = createDatasource(id_ntotal_alcase,id_sigmaSq_allcase,id_delta_allcase) allData = data.frame(data.frame(config_ntotal),data.frame(config_sigmaSq),data.frame(config_delta)) write.csv(allData,file=paste(currentFolderPath,"/config.csv",sep=""),row.names = FALSE) } } } listFolder = list.dirs("./DataSource/", recursive=FALSE) eachFolder = listFolder[1] print(listFolder) for (eachFolder in listFolder){ currentFolder = eachFolder configfile = read.csv(file=paste(currentFolder,"/config.csv",sep = "")) nTotal = configfile$config_ntotal sigmaSqTotal = configfile$config_sigmaSq deltaTotal = configfile$config_delta k = length(nTotal) muTotal = -sigmaSqTotal/2 mu1 = muTotal[1] mu2 = muTotal[2] mu3 = muTotal[3] deltanonTotal = 1 - deltaTotal size_nonzeroTotal = nTotal*(1-deltaTotal) Ex_LN = exp(muTotal+(sigmaSqTotal/2)) cv_LN = sqrt(exp(sigmaSqTotal)-1) theta = log(deltanonTotal)+(2*muTotal+sigmaSqTotal)+log(exp(sigmaSqTotal)-deltanonTotal) theta12 = theta[1]-theta[2] theta13 = theta[1]-theta[3] theta23 = theta[2]-theta[3] AllDataset = data.frame() ## Check file .xlxs is exist it will be deleted delfiles = dir(path=currentFolder ,pattern="D.*xlist.csv") # Check its existence file.remove(file.path(currentFolder, delfiles)) idx_rep =1 for ( idx_rep in seq(1:M)){ #Start generate data packData = xGen(k, nTotal, Ex_LN, cv_LN, deltaTotal) dataset= data.frame(loop = rep(idx_rep,k),num_k= 1:k) data = packData$data xlist = packData$xlist size_nonzero = data size_zero = data[,2] mu_hat = data[,3] sigmaSq_hat = data[,4] delta_hatnon = data[,5] delta_hat = 1-delta_hatnon sum_nonzero_value2 = data[,6] dataset = cbind(dataset, data) ## Store Dataset each loop AllDataset = rbind(AllDataset,dataset) # Write raw xlist data for (idx_xlist in seq(1:length(xlist))){ eachlist = xlist[idx_xlist] writeDataToFile(eachlist,idx_xlist,idx_rep,currentFolder) } } write.table( AllDataset, paste(currentFolder,"/basicSummary_K",k,".csv",sep="") , sep=',',row.names = FALSE ) } ############################################################################## # Algorithm 1: B-PMB # ############################################################################## library(rstan) #Stan file ### This function for loading the raw dataset loadXlist = function(folderselect,Kselect){ D1 = read.csv(file=paste(folderselect,"/D",Kselect,"_xlist.csv",sep = ""),header = FALSE) return (D1) } resultoutput = "./BPM_result/" #Delete all result delfiles = dir(path=resultoutput ,pattern="*.txt") file.remove(file.path(resultoutput, delfiles)) M = 5000 m = 2500 listFolder = list.dirs("./DataSource/", recursive=FALSE) eachFolder = listFolder[1] print(listFolder) for (eachFolder in listFolder){ print(paste("Start Folder",eachFolder,sep="")) ## read data from config file currentFolder = eachFolder configfile = read.csv(file=paste(currentFolder,"/config.csv",sep = "")) nTotal = configfile$config_ntotal sigmaSqTotal = configfile$config_sigmaSq deltaTotal = configfile$config_delta muTotal = -sigmaSqTotal/2 k = length(nTotal) # Calculate parameter from data in config file deltanonTotal = 1 - deltaTotal size_nonzeroTotal = nTotal*(1-deltaTotal) Ex_LN = exp(muTotal+(sigmaSqTotal/2)) cv_LN = sqrt(exp(sigmaSqTotal)-1) theta = log(deltanonTotal)+(2*muTotal+sigmaSqTotal)+log(exp(sigmaSqTotal)-deltanonTotal) theta12 = theta[1]-theta[2] theta13 = theta[1]-theta[3] theta23 = theta[2]-theta[3] # Read summary data summaryData = read.csv(file= paste(currentFolder,"/basicSummary_K",k,".csv",sep = "")) # B-PMB: Bayesian-based probability-matching-beta prior CI.BPM = data.frame() D.BPM = rep(0,m) rep = 1 dataframe.x1 = loadXlist(currentFolder,1) dataframe.x2 = loadXlist(currentFolder,2) dataframe.x3 = loadXlist(currentFolder,3) for(rep in seq(1:M)){ #Select data from repeat loop number currentData = summaryData[which(summaryData$loop==rep),] #Extract each parameter delta_hatnon = currentData$delta_hatnon delta_hat = 1-delta_hatnon size_nonzero = currentData$size_non size_zero = currentData$size_zero mu_hat = currentData$mu_hat sigmaSq_hat = currentData$sigmaSq_hat n = size_nonzero + size_zero thetahat = log(delta_hatnon) + (2*(mu_hat+sigmaSq_hat)) thetahat.12 = thetahat[1]-thetahat[2] thetahat.13 = thetahat[1]-thetahat[3] thetahat.23 = thetahat[2]-thetahat[3] Var_varDLN1 = delta_hat/(n*delta_hatnon) Var_varDLN2 = (sigmaSq_hat/size_nonzero) + (2*sigmaSq_hat^2/(size_nonzero-1)) Var_varDLN = Var_varDLN1 + (4*Var_varDLN2) Var_varDLN12 = Var_varDLN[1]+Var_varDLN[2] Var_varDLN13 = Var_varDLN[1]+Var_varDLN[3] Var_varDLN23 = Var_varDLN[2]+Var_varDLN[3] ############################### BPM ######################################## ## chain = 4, iter = 1250 to obtain m = 4*(1250/2) = 2500 ## refresh = 0 denoted as Stan running without output details x1 = as.numeric(dataframe.x1[rep,]) x2 = as.numeric(dataframe.x2[rep,]) x3 = as.numeric(dataframe.x3[rep,]) fit.pm = stan('Stan_PMBprior_K3.stan', data=list(n1=n[1], sizenonzero1=size_nonzero[1], y1=x1, sizezero1=size_zero[1], mu1=mu_hat[1], sigmaSq1=sigmaSq_hat[1], delta1=delta_hat[1], n2=n[2], sizenonzero2=size_nonzero[2], y2=x2, sizezero2=size_zero[2], mu2=mu_hat[2], sigmaSq2=sigmaSq_hat[2], delta2=delta_hat[2], n3=n[3], sizenonzero3=size_nonzero[3], y3=x3, sizezero3=size_zero[3], mu3=mu_hat[3], sigmaSq3=sigmaSq_hat[3], delta3=delta_hat[3]), iter = 1250, refresh = 0) # Extracting the posterior draws post.mu.pm1 = extract(fit.pm)$mu1 post.sigmaSq.pm1 = extract(fit.pm)$sigmaSq1 post.delta.pm1 = extract(fit.pm)$delta1 post_varDLN.pm1 = log(1-post.delta.pm1)+2*(post.mu.pm1+post.sigmaSq.pm1) post.mu.pm2 = extract(fit.pm)$mu2 post.sigmaSq.pm2 = extract(fit.pm)$sigmaSq2 post.delta.pm2 = extract(fit.pm)$delta2 post_varDLN.pm2 = log(1-post.delta.pm2)+2*(post.mu.pm2+post.sigmaSq.pm2) post.mu.pm3 = extract(fit.pm)$mu3 post.sigmaSq.pm3 = extract(fit.pm)$sigmaSq3 post.delta.pm3 = extract(fit.pm)$delta3 post_varDLN.pm3 = log(1-post.delta.pm3)+2*(post.mu.pm3+post.sigmaSq.pm3) post_varDLN.pm12 = post_varDLN.pm1-post_varDLN.pm2 post_varDLN.pm13 = post_varDLN.pm1-post_varDLN.pm3 post_varDLN.pm23 = post_varDLN.pm2-post_varDLN.pm3 for(idx.BPM in 1:m){ D12.BPM = abs(thetahat.12 - post_varDLN.pm12[idx.BPM]) D13.BPM = abs(thetahat.13 - post_varDLN.pm13[idx.BPM]) D23.BPM = abs(thetahat.23 - post_varDLN.pm23[idx.BPM]) A.BPM = max(D12.BPM, D13.BPM, D23.BPM) B.BPM = min(D12.BPM, D13.BPM, D23.BPM) D.BPM[idx.BPM] = A.BPM - B.BPM } q.BPM = quantile(D.BPM,0.95,type=8) L.BPM12 = thetahat.12 - q.BPM U.BPM12 = thetahat.12 + q.BPM CI.BPM12 = data.frame(rep,"CI.BPM12",L.BPM12,U.BPM12,theta12) names(CI.BPM12) = c("rep","CI","lower","upper","theta") CI.BPM = rbind(CI.BPM,CI.BPM12) L.BPM13 = thetahat.13 - q.BPM U.BPM13 = thetahat.13 + q.BPM CI.BPM13 = data.frame(rep,"CI.BPM13",L.BPM13,U.BPM13,theta13) names(CI.BPM13) = c("rep","CI","lower","upper","theta") CI.BPM = rbind(CI.BPM,CI.BPM13) L.BPM23 = thetahat.23 - q.BPM U.BPM23 = thetahat.23 + q.BPM CI.BPM23 = data.frame(rep,"CI.BPM23",L.BPM23,U.BPM23,theta23) names(CI.BPM23) = c("rep","CI","lower","upper","theta") CI.BPM = rbind(CI.BPM,CI.BPM23) if(rep%%100 ==0){ print(rep) } } CI.BPM['CP.LE'] = 0 CI.BPM['CP'] = 0 CI.BPM['CP.UE'] = 0 CI.BPM[which(CI.BPM["lower"]CI.BPM["theta"]),"CP"]=1 CI.BPM[which(CI.BPM["lower"]>CI.BPM["theta"]),"CP.LE"]=1 CI.BPM[which(CI.BPM["upper"]CI.BR["theta"]),"CP"]=1 CI.BR[which(CI.BR["lower"]>CI.BR["theta"]),"CP.LE"]=1 CI.BR[which(CI.BR["upper"]CI.GPQ["theta"]),"CP"]=1 CI.GPQ[which(CI.GPQ["lower"]>CI.GPQ["theta"]),"CP.LE"]=1 CI.GPQ[which(CI.GPQ["upper"]CI.PB["theta"]),"CP"]=1 CI.PB[which(CI.PB["lower"]>CI.PB["theta"]),"CP.LE"]=1 CI.PB[which(CI.PB["upper"]CI.BPM["theta"]),"CP"]=1 CI.BPM[which(CI.BPM["lower"]>CI.BPM["theta"]),"CP.LE"]=1 CI.BPM[which(CI.BPM["upper"]CI.BR["theta"]),"CP"]=1 CI.BR[which(CI.BR["lower"]>CI.BR["theta"]),"CP.LE"]=1 CI.BR[which(CI.BR["upper"]CI.GPQ["theta"]),"CP"]=1 CI.GPQ[which(CI.GPQ["lower"]>CI.GPQ["theta"]),"CP.LE"]=1 CI.GPQ[which(CI.GPQ["upper"]CI.PB["theta"]),"CP"]=1 CI.PB[which(CI.PB["lower"]>CI.PB["theta"]),"CP.LE"]=1 CI.PB[which(CI.PB["upper"]