This R Markdown document walks through the steps for calculating time-varying reproduction number, R(t), assuming a flux of infectors between various adjacent states. This was adapted in STAN from Zhou and White
Step 1. Load data
Step 2. Create the case matrix of integer values
site_names <- colnames(sample_multi_site)[c(2, 3)]
Y <- matrix(integer(1), nrow = nrow(sample_multi_site), ncol = 2)
for(i in 1:nrow(Y)) {
for(j in c(2, 3)) {
Y[i,j-1] <- as.integer(sample_multi_site[i,j])
}
}
all(is.integer(Y))
#> [1] TRUE
Step 3. Define the serial interval. The
si()
function creates a vector of length 14 with shape and
rate for a gamma distribution. Note, the serial interval for in this
example CANNOT start with a leading 0.
Step 4. Run STAN. The v2
option
indicates an experimental STAN formulation that includes a non-centered
parameterization, partial pooling, and an AR1 process. Running this
option takes more time, and requires more customization of STAN options.
Additional options to STAN can be specified in the last argument (e.g.,
chains, cores, control).
sample_m_hier <- spatialRt(report_dates = sample_multi_site$date,
case_matrix = Y,
transfer_matrix = transfer_matrix,
v2 = FALSE,
sip = sip, chains = 1)
Check output. Check divergences and diagnostics before continuing.
data("sample_m_hier")
rstan::check_divergences(sample_m_hier)
#> 0 of 1000 iterations ended with a divergence.
rstan::check_hmc_diagnostics(sample_m_hier)
#>
#> Divergences:
#> 0 of 1000 iterations ended with a divergence.
#>
#> Tree depth:
#> 0 of 1000 iterations saturated the maximum tree depth of 10.
#>
#> Energy:
#> E-BFMI indicated no pathological behavior.
out <- rstan::extract(sample_m_hier)
# QC
# launch_shinystan(m_hier)
Summarize the output data.
dim(out$M)
#> [1] 1000 80 2
dim(out$xsigma)
#> [1] 1000 2
data_l <- lapply(1:dim(out$M)[3], function(i) {
data.frame(
x = 1:dim(out$M)[2],
y_real = Y[, i],
y = apply(out$M[, , i], 2, mean),
yl = apply(out$M[, , i], 2, quantile, probs = 0.025),
yh = apply(out$M[, , i], 2, quantile, probs = 0.975),
Rt = apply(out$R[, , i], 2, mean),
Rtl = apply(out$R[, , i], 2, quantile, probs = 0.025),
Rth = apply(out$R[, , i], 2, quantile, probs = 0.975),
region = site_names[i] # must be a string
)
})
data_all <- do.call(rbind, data_l)
head(data_all)
#> x y_real y yl yh Rt Rtl Rth region
#> 1 1 10 10.0000000 10.0000000 10.0000000 2.960779 2.118388 3.741398 Tatooine
#> 2 2 1 0.5869221 0.4969556 0.6784966 2.960779 2.118388 3.741398 Tatooine
#> 3 3 4 4.1246381 3.4892261 4.7731303 2.960779 2.118388 3.741398 Tatooine
#> 4 4 11 7.1737399 6.0203087 8.3754012 2.960779 2.118388 3.741398 Tatooine
#> 5 5 8 8.6618196 6.9999981 10.5168484 2.960779 2.118388 3.741398 Tatooine
#> 6 6 10 10.6333360 8.0671222 13.6184957 2.960779 2.118388 3.741398 Tatooine
# Summarise data
data_all_summarise <- aggregate(
cbind(y, y_real, yl, yh, Rt, Rtl, Rth) ~ x + region,
data = data_all,
FUN = mean
)
head(data_all_summarise)
#> x region y y_real yl yh Rt Rtl Rth
#> 1 1 Hoth 10.0000000 10 10.0000000 10.0000000 0.9915897 0.1797246 2.052818
#> 2 2 Hoth 0.2519622 1 0.1665596 0.3678789 0.9915897 0.1797246 2.052818
#> 3 3 Hoth 1.7678656 3 1.1699140 2.5833208 0.9915897 0.1797246 2.052818
#> 4 4 Hoth 3.0401639 0 2.0274692 4.4494860 0.9915897 0.1797246 2.052818
#> 5 5 Hoth 3.5129603 4 2.3997236 5.1575001 0.9915897 0.1797246 2.052818
#> 6 6 Hoth 4.0195730 6 2.7888033 5.7955901 0.9915897 0.1797246 2.052818
Get specific colors for different regions.
# Generate a color palette
regions <- unique(data_all_summarise$region)
# Define a set of distinct colors
colors <- c("red", "blue", "green", "purple", "orange", "brown",
"pink", "yellow", "cyan", "magenta")
colors <- colors[1:length(regions)]
names(colors) <- regions
Plot expected cases. The lines and shaded confidence intervals represent the expected cases given the calculated R(t) parameters.
# Plot expected cases
plot(
x = as.integer(data_all_summarise$x),
y = as.numeric(data_all_summarise$y),
type = "n",
xlab = "Days",
ylab = "Cases",
main = "Expected Cases"
)
for (region_i in regions) {
region_data <- subset(data_all_summarise, region == region_i)
points(x = region_data$x, region_data$y_real, col = colors[region_i])
polygon(
c(region_data$x, rev(region_data$x)),
c(region_data$yl, rev(region_data$yh)),
col = adjustcolor(colors[region_i], alpha.f = 0.3), border = NA
)
lines(region_data$x, region_data$y, col = colors[region_i], lwd = 0.5)
}
legend("topright",
legend = regions,
col = colors,
lty = rep(1, length(regions)),
cex = 0.8,
pt.cex = 1.5 ) # Text size
Plot expected R(t). The lines and shaded confidence intervals represent the expected R(t) given the data.
# Plot R(t)
plot(
data_all_summarise$x, data_all_summarise$Rt,
xlab = "Days", ylab = "Reproduction Number",
type = "n",
main = "R(t)",
ylim = c(0, 5)
)
for (region_i in regions) {
region_data <- subset(data_all_summarise, region == region_i)
polygon(
c(region_data$x, rev(region_data$x)),
c(region_data$Rtl, rev(region_data$Rth)),
col = adjustcolor(colors[region_i], alpha.f = 0.3), border = NA
)
lines(region_data$x, region_data$Rt, col = colors[region_i], lwd = 0.5)
}
abline(h = 1, col = "black", lwd = 1, lty = 1)
legend("topright",
legend = regions,
col = colors,
lty = c(1, 1),
cex = 0.8,
pt.cex = 1.5 ) # Text size