# load the functionality into Julia
using Plots, StatPlots
using ExcelReaders, FileIO, DataFrames, Query, Distributions, Optim
pyplot()
df = readxlsheet(DataFrame,"Data.xlsx",1) |>
@query(i, begin
@select {date=i.Date, i=i.Onset_Dec27}
end) |> DataFrame
df[:i] = Int64.(df[:i]); df[:date] = DateTime.(df[:date]); df[:t] = 1:nrow(df);
df[:datestring] = Dates.format(df[:date], "u dd");
df |> head
# We omit last five datapoints due to the reporting reporting delay
df = df[maximum(df[:t])-df[:t].>=5,:]
@df df bar(:datestring,:i,legend=false, xrotation=60)
(We assume that no infection before the onset of symptoms, so generation time = serial interval)
mean_gt = 8.0; # mean = 8 days
CV_gt = 0.5; #CV is 50%
αgt=1/CV_gt^2; θgt = mean_gt/αgt
gt = Gamma(αgt,θgt)
show(gt)
plot(gt, fill=(0, .5, :orange), size=(350,220), legend=false, xlim=(0,32), xlab="day")
gen_time = (x -> cdf(gt,x)-cdf(gt,x-1)).(1:nrow(df))
plot(gen_time,size=(400,300),legend=false)
quantile.(gt,[.025,.5,.975])
## The function is adopted from:
## from https://github.com/robertfeldt/BlackBoxOptim.jl/blob/master/src/utilities/latin_hypercube_sampling.jl
"""
latin_hypercube_sampling(mins, maxs, numSamples)
Randomly sample `numSamples` values from the parallelogram defined
by `mins` and `maxs` using the Latin hypercube algorithm.
"""
function latin_hypercube_sampling{T}(mins::AbstractVector{T}, maxs::AbstractVector{T}, numSamples::Integer)
dims = length(mins)
result = zeros(T, numSamples, dims)
@inbounds for i in 1:dims
interval_len = (maxs[i] - mins[i]) / numSamples
result[:,i] = shuffle!(linspace(mins[i], maxs[i] - interval_len, numSamples) +
interval_len*rand(numSamples))
end
return result'
end
nsamples = 1000
# for reproducibility we fix the seed of the random number generator
srand(7)
smpls = latin_hypercube_sampling([0.0],[1.0],nsamples);
vmin = 0.0
vmax = 0.70
vpeak = (vmax-vmin)/2
vs = quantile.(TriangularDist(vmin,vmax,vpeak),smpls[1,:])
plot(TriangularDist(vmin,vmax,vpeak),legend=false,fill=(0,.5,:orange),size=(300,220),
xlim=(0,1),xlab="v",ylab="probability density")
plot(vs,legend=false)
hline!([vmin,vmax,vpeak],line=(:dash, :green),size=(450,300),xlab="# sample",ylab="v")
N = 579384
println("Effective population size: $(Int64(N))")
v = (vmax+vmin)/2
kmax = nrow(df)
function getNegativeLoglk(params)
# Log-exponential transformation of input variables to be sure that they all stay positive for the optimization routine
R0 = exp(params[1])
α1 = exp(params[2])
α2 = exp(params[3])
α = [[α1 for x in 1:34]; [α2 for x in 35:kmax]]
loglk = 0
for k in 2:kmax
λ = R0*(1-v-sum(df[1:(k-1),:i]./α[1:(k-1)])/N)*α[k]*sum(df[1:(k-1),:i]./α[1:(k-1)].*gen_time[(k-1):-1:1])
loglk += λ<=0 ? 0 : df[k,:i]*log(λ)-λ
end
return(-loglk)
end
getNegativeLoglk(log.([7,.1,.01]))
res0 = optimize(getNegativeLoglk, log.([6,0.1,0.1]))
res = Optim.minimizer(res0)
exp.(res)
(We can derive directly $R_0$ as the function $\alpha_1$, $\alpha_2$ and $v$)
v = (vmax+vmin)/2
kmax = nrow(df)
function getNegativeLoglk(params)
α1 = exp(params[1])
α2 = exp(params[2])
α = [[α1 for x in 1:34]; [α2 for x in 35:kmax]]
R0numerator = 0.0; R0denominator = 0.0
for k in 2:kmax
R0numerator += df[k,:i]
R0denominator += (1-v-sum(df[1:(k-1),:i]./α[1:(k-1)])/N)*α[k]*sum(df[1:(k-1),:i]./α[1:(k-1)].*gen_time[(k-1):-1:1])
end
R0 = R0numerator/R0denominator
loglk = 0
for k in 2:kmax
λ = R0*(1-v-sum(df[1:(k-1),:i]./α[1:(k-1)])/N)*α[k]*sum(df[1:(k-1),:i]./α[1:(k-1)].*gen_time[(k-1):-1:1])
loglk += λ<=0 ? 0 : df[k,:i]*log(λ)-λ
end
return(-loglk)
end
function getR0(params)
α1 = exp(params[1])
α2 = exp(params[2])
α = [[α1 for x in 1:34]; [α2 for x in 35:kmax]]
R0numerator = 0.0; R0denominator = 0.0;
for k in 2:kmax
R0numerator += df[k,:i]
R0denominator += (1-v-sum(df[1:(k-1),:i]./α[1:(k-1)])/N)*α[k]*sum(df[1:(k-1),:i]./α[1:(k-1)].*gen_time[(k-1):-1:1])
end
return(R0numerator/R0denominator)
end
getNegativeLoglk(log.([.1,.1]))
res0 = optimize(getNegativeLoglk, log.([.01,.01]))
res = Optim.minimizer(res0)
exp.(res)
R0 = getR0(res)
Testing that BFGS() method gives the same result as we just had for Nelder-Mead
res0 = optimize(getNegativeLoglk, log.([.01,.01]), BFGS())
R0s = Vector(nsamples)
α1s = Vector(nsamples)
α2s = Vector(nsamples)
initial = [.01, .01]
for k in 1:nsamples
v = vs[k]
println(k)
res0 = optimize(getNegativeLoglk,log.(initial))
res = Optim.minimizer(res0)
α = [[exp(res[1]) for x in 1:34]; [exp(res[2]) for x in 35:kmax]]
if (1-v-sum(df[:i]./α)/N)<=0
R0s[k] = -1; α1s[k] = -1; α2s[k] = -1
else
R0s[k] = getR0(res)
α1s[k] = exp(res[1])
α2s[k] = exp(res[2])
end
end
dfResults = DataFrame(v = Float64.(vs), R0 = Float64.(R0s), α2 = Float64.(α2s), α1 = Float64.(α1s))
dfResults |> head
describe(dfResults)
mode(sort(dfResults,cols = [:R0])[:R0])
describe(sort(dfResults,cols = [:R0])[26:(end-25),:R0])
describe(sort(dfResults,cols = [:α1])[26:(end-25),:α1])
describe(sort(dfResults,cols = [:α2])[26:(end-25),:α2])
scatter(α1s,R0s,xlab="α1",ylab="R0",legend=false,xlim=(0,.025))
scatter(vs[1:nsamples],R0s,xlab="v",ylab="R0",legend=false,xlim=(0,.7))
scatter(vs[1:nsamples],α1s,xlab="v",ylab="α1",legend=false,xlim=(0,.7),ylim=(0,.025))
scatter(α2s,α1s,xlab="α2",ylab="α1",legend=false,xlim=(0,.025),ylim=(0,.025))
histogram(R0s,legend=:none,xlab="R0")
histogram(α1s,legend=:none,xlab="α1")
histogram(α2s,legend=:none,xlab="α2")
writetable("output_Julia_50.csv",DataFrame(R0=R0s,a1=α1s,a2=α2s,v=vs))
mean_gt = 8.0; # mean = 5 days
CV_gt = 0.25; #CV is 25%
αgt=1/CV_gt^2; θgt = mean_gt/αgt;
gt = Gamma(αgt,θgt)
show(gt)
plot(gt, fill=(0, .5, :orange), size=(350,220), legend=false, xlim=(0,32), xlab="day")
gen_time = (x -> cdf(gt,x)-cdf(gt,x-1)).(1:nrow(df))
plot(gen_time,size=(400,300),legend=false)
quantile.(gt,[.025,.5,.975])
R0s = Vector(nsamples)
α1s = Vector(nsamples)
α2s = Vector(nsamples)
initial = [.01, .01]
for k in 1:nsamples
v = vs[k]
println(k)
res0 = optimize(getNegativeLoglk,log.(initial))
res = Optim.minimizer(res0)
α = [[exp(res[1]) for x in 1:34]; [exp(res[2]) for x in 35:kmax]]
if (1-v-sum(df[:i]./α)/N)<=0
R0s[k] = -1; α1s[k] = -1; α2s[k] = -1
else
R0s[k] = getR0(res)
α1s[k] = exp(res[1])
α2s[k] = exp(res[2])
end
end
dfResults = DataFrame(v = Float64.(vs), R0 = Float64.(R0s), α2 = Float64.(α2s), α1 = Float64.(α1s))
dfResults |> head
describe(dfResults)
mode(sort(dfResults,cols = [:R0])[:R0])
describe(sort(dfResults,cols = [:R0])[26:(end-25),:R0])
histogram(R0s,legend=:none,xlab="R0")
writetable("output_Julia_25.csv",DataFrame(R0=R0s,a1=α1s,a2=α2s,v=vs))
mean_gt = 8.0; # mean = 5 days
CV_gt = 0.75; #CV is 75%
αgt=1/CV_gt^2; θgt = mean_gt/αgt;
gt = Gamma(αgt,θgt)
show(gt)
plot(gt, fill=(0, .5, :orange), size=(350,220), legend=false, xlim=(0,32), xlab="day")
gen_time = (x -> cdf(gt,x)-cdf(gt,x-1)).(1:nrow(df))
plot(gen_time,size=(400,300),legend=false)
quantile.(gt,[.025,.5,.975])
R0s = Vector(nsamples)
α1s = Vector(nsamples)
α2s = Vector(nsamples)
initial = [.01, .01]
for k in 1:nsamples
v = vs[k]
println(k)
res0 = optimize(getNegativeLoglk,log.(initial))
res = Optim.minimizer(res0)
α = [[exp(res[1]) for x in 1:34]; [exp(res[2]) for x in 35:kmax]]
if (1-v-sum(df[:i]./α)/N)<=0
R0s[k] = -1; α1s[k] = -1; α2s[k] = -1
else
R0s[k] = getR0(res)
α1s[k] = exp(res[1])
α2s[k] = exp(res[2])
end
end
dfResults = DataFrame(v = Float64.(vs), R0 = Float64.(R0s), α2 = Float64.(α2s), α1 = Float64.(α1s))
dfResults |> head
describe(dfResults)
mode(sort(dfResults,cols = [:R0])[:R0])
describe(sort(dfResults,cols = [:R0])[26:(end-25),:R0])
histogram(R0s,legend=:none,xlab="R0")
writetable("output_Julia_75.csv",DataFrame(R0=R0s,a1=α1s,a2=α2s,v=vs))