Spatial R(t) estimation with transfer estimates

This R Markdown document walks through the steps for calculating time-varying reproduction number, R(t), assuming a flux of infectors between various adjacent states. This was adapted in STAN from Zhou and White

Example of 2 states:

library(WhiteLabRt)

Step 1. Load data

data("sample_multi_site")
data("transfer_matrix")

Step 2. Create the case matrix of integer values

site_names <- colnames(sample_multi_site)[c(2, 3)]
Y <- matrix(integer(1), nrow = nrow(sample_multi_site), ncol = 2)
for(i in 1:nrow(Y)) {
  for(j in c(2, 3)) {
    Y[i,j-1] <- as.integer(sample_multi_site[i,j])
  }
}
all(is.integer(Y))
#> [1] TRUE

Step 3. Define the serial interval. The si() function creates a vector of length 14 with shape and rate for a gamma distribution. Note, the serial interval for in this example CANNOT start with a leading 0.

sip <- si(14, 4.29, 1.18, leading0 = FALSE)

Step 4. Run STAN. The v2 option indicates an experimental STAN formulation that includes a non-centered parameterization, partial pooling, and an AR1 process. Running this option takes more time, and requires more customization of STAN options. Additional options to STAN can be specified in the last argument (e.g., chains, cores, control).

sample_m_hier <- spatialRt(report_dates = sample_multi_site$date,
                 case_matrix = Y,
                 transfer_matrix = transfer_matrix,
                 v2 = FALSE,
                 sip = sip, chains = 1)

Check output. Check divergences and diagnostics before continuing.

data("sample_m_hier")
rstan::check_divergences(sample_m_hier)
#> 0 of 1000 iterations ended with a divergence.
rstan::check_hmc_diagnostics(sample_m_hier)
#> 
#> Divergences:
#> 0 of 1000 iterations ended with a divergence.
#> 
#> Tree depth:
#> 0 of 1000 iterations saturated the maximum tree depth of 10.
#> 
#> Energy:
#> E-BFMI indicated no pathological behavior.

out <- rstan::extract(sample_m_hier)
# QC
# launch_shinystan(m_hier)

Summarize the output data.

dim(out$M)
#> [1] 1000   80    2
dim(out$xsigma)
#> [1] 1000    2

data_l <- lapply(1:dim(out$M)[3], function(i) {
  data.frame(
    x      = 1:dim(out$M)[2],
    y_real = Y[, i],
    y      = apply(out$M[, , i], 2, mean),
    yl     = apply(out$M[, , i], 2, quantile, probs = 0.025),
    yh     = apply(out$M[, , i], 2, quantile, probs = 0.975),
    Rt     = apply(out$R[, , i], 2, mean),
    Rtl    = apply(out$R[, , i], 2, quantile, probs = 0.025),
    Rth    = apply(out$R[, , i], 2, quantile, probs = 0.975),
    region = site_names[i] # must be a string
  )
})

data_all <- do.call(rbind, data_l)
head(data_all)
#>   x y_real          y         yl         yh       Rt      Rtl      Rth   region
#> 1 1     10 10.0000000 10.0000000 10.0000000 2.960779 2.118388 3.741398 Tatooine
#> 2 2      1  0.5869221  0.4969556  0.6784966 2.960779 2.118388 3.741398 Tatooine
#> 3 3      4  4.1246381  3.4892261  4.7731303 2.960779 2.118388 3.741398 Tatooine
#> 4 4     11  7.1737399  6.0203087  8.3754012 2.960779 2.118388 3.741398 Tatooine
#> 5 5      8  8.6618196  6.9999981 10.5168484 2.960779 2.118388 3.741398 Tatooine
#> 6 6     10 10.6333360  8.0671222 13.6184957 2.960779 2.118388 3.741398 Tatooine

# Summarise data
data_all_summarise <- aggregate(
  cbind(y, y_real, yl, yh, Rt, Rtl, Rth) ~ x + region,
  data = data_all,
  FUN = mean
)

head(data_all_summarise)
#>   x region          y y_real         yl         yh        Rt       Rtl      Rth
#> 1 1   Hoth 10.0000000     10 10.0000000 10.0000000 0.9915897 0.1797246 2.052818
#> 2 2   Hoth  0.2519622      1  0.1665596  0.3678789 0.9915897 0.1797246 2.052818
#> 3 3   Hoth  1.7678656      3  1.1699140  2.5833208 0.9915897 0.1797246 2.052818
#> 4 4   Hoth  3.0401639      0  2.0274692  4.4494860 0.9915897 0.1797246 2.052818
#> 5 5   Hoth  3.5129603      4  2.3997236  5.1575001 0.9915897 0.1797246 2.052818
#> 6 6   Hoth  4.0195730      6  2.7888033  5.7955901 0.9915897 0.1797246 2.052818

Get specific colors for different regions.

# Generate a color palette
regions <- unique(data_all_summarise$region)
# Define a set of distinct colors
colors <- c("red", "blue", "green", "purple", "orange", "brown",
            "pink", "yellow", "cyan", "magenta")
colors <- colors[1:length(regions)]
names(colors) <- regions

Plot expected cases. The lines and shaded confidence intervals represent the expected cases given the calculated R(t) parameters.

# Plot expected cases
plot(
  x = as.integer(data_all_summarise$x), 
  y = as.numeric(data_all_summarise$y), 
  type = "n",
  xlab = "Days", 
  ylab = "Cases",
  main = "Expected Cases"
)

for (region_i in regions) {
  region_data <- subset(data_all_summarise, region == region_i)
  points(x = region_data$x, region_data$y_real, col = colors[region_i])
  polygon(
    c(region_data$x, rev(region_data$x)), 
    c(region_data$yl, rev(region_data$yh)), 
    col = adjustcolor(colors[region_i], alpha.f = 0.3), border = NA
  )
  lines(region_data$x, region_data$y, col = colors[region_i], lwd = 0.5)
}

legend("topright",
       legend = regions,
       col = colors,
       lty = rep(1, length(regions)), 
       cex = 0.8,
       pt.cex = 1.5 ) # Text size

Plot expected R(t). The lines and shaded confidence intervals represent the expected R(t) given the data.

# Plot R(t)
plot(
  data_all_summarise$x, data_all_summarise$Rt, 
  xlab = "Days", ylab = "Reproduction Number",
  type = "n",
  main = "R(t)",
  ylim = c(0, 5)
)
for (region_i in regions) {
  region_data <- subset(data_all_summarise, region == region_i)
  polygon(
    c(region_data$x, rev(region_data$x)), 
    c(region_data$Rtl, rev(region_data$Rth)), 
    col = adjustcolor(colors[region_i], alpha.f = 0.3), border = NA
  )
  lines(region_data$x, region_data$Rt, col = colors[region_i], lwd = 0.5)
}
abline(h = 1, col = "black", lwd = 1, lty = 1)
legend("topright",
       legend = regions,
       col = colors,
       lty = c(1, 1), 
       cex = 0.8,
       pt.cex = 1.5 ) # Text size