This vignette produces the graphs included in the initial MBR manuscript.
Figure 1: Raw monthly birth rates (General Fertility Rates; GFRs) for Oklahoma County, 1990-1999, plotted in a linear plot; the “bombing effect” is located ten months after the Oklahoma City bombing.
Smoothed monthly birth rates (General Fertility Rates; GFRs) for Oklahoma County, 1990-1999, plotted in a linear plot. The top plot shows the connected raw data with a February smoother; the middle plot shows smoothing with a 12-month moving average, blue/green line, superimposed on a February smoother, red line); the bottom plot shows the smoothers and confidence bands, which are H-spreads defined using the distribution of GFRs for the given month and 11 previous months.
First, some R packages are loaded, and some variables and functions are defined.
<- base::as.Date("1996-02-15") #as.Date("1995-04-19") + lubridate::weeks(39) = "1996-01-17"
change_month set.seed(444) # So bootstrap won't trigger a git diff
<- function(x, y) {
vp_layout ::viewport(layout.pos.row = x, layout.pos.col = y)
grid
}<- function(scores) {
full_spread ::range(scores) # A new function isn't necessary. It's defined in order to be consistent.
base
}<- function(scores) {
h_spread ::quantile(x = scores, probs = c(.25, .75))
stats
}<- function(scores) {
se_spread ::mean(scores) + base::c(-1, 1) * stats::sd(scores) / base::sqrt(base::sum(!base::is.na(scores)))
base
}<- function(scores, conf = .68) {
boot_spread <- function(d, i) {
plugin ::mean(d[i])
base
}
<- boot::boot(data = scores, plugin, R = 99) # 999 for the publication
distribution <- boot::boot.ci(distribution, type = c("bca"), conf = conf)
ci $bca[4:5] # The fourth & fifth elements correspond to the lower & upper bound.
ci
}
<- ggplot2::theme(
dark_theme axis.title = ggplot2::element_text(color = "gray30", size = 9),
axis.text.x = ggplot2::element_text(color = "gray30", hjust = 0),
axis.text.y = ggplot2::element_text(color = "gray30"),
axis.ticks = ggplot2::element_blank(),
# panel.grid.minor.y = element_line(color = "gray95", linewidth = .1),
# panel.grid.major = element_line(color = "gray90", linewidth = .1),
panel.spacing = grid::unit(c(0, 0, 0, 0), "cm"),
plot.margin = grid::unit(c(0, 0, 0, 0), "cm")
)# qplot(mtcars$hp) + dark_theme
<-
light_theme +
dark_theme ::theme(
ggplot2axis.title = ggplot2::element_text(color = "gray80", size = 9),
axis.text.x = ggplot2::element_text(color = "gray80", hjust = 0),
axis.text.y = ggplot2::element_text(color = "gray80"),
panel.grid.minor.y = ggplot2::element_line(color = "gray99", linewidth = .1),
panel.grid.major = ggplot2::element_line(color = "gray95", linewidth = .1)
)<-
date_sequence ::seq.Date(
basefrom = base::as.Date("1990-01-01"),
to = base::as.Date("1999-01-01"),
by = "years"
)<-
x_scale ::scale_x_date(
ggplot2breaks = date_sequence,
labels = scales::date_format("%Y")
)# This keeps things proportional down the three frames.
<-
x_scale_blank ::scale_x_date(
ggplot2breaks = date_sequence,
labels = NULL
)
Here is the basic linear rolling graph. It doesn’t require much specification, and will work with a wide range of appropriate datasets. This first (unpublished) graph displays all components.
# Uncomment the next two lines to use the version built into the package. By default, it uses the
# CSV to promote reproducible research, since the CSV format is more open and accessible to more software.
<-
ds_linear_all |>
county_month_birth_rate_2005_version ::as_tibble()
tibble
<-
ds_linear_okc |>
ds_linear_all ::filter(county_name == "oklahoma") |>
dplyraugment_year_data_with_month_resolution(date_name = "date")
<-
portfolio_cartesian annotate_data(
ds_linear_okc,dv_name = "birth_rate",
center_function = stats::median,
spread_function = h_spread
)
cartesian_rolling(
ds_linear = portfolio_cartesian$ds_linear,
x_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
change_point_labels = "Bombing Effect"
)
The version for the manuscript was tweaked to take advantage of certain features of the dataset. This is what it looks like when all three stylized panels are combined.
<-
top_panel ::cartesian_rolling(
Watsds_linear = portfolio_cartesian$ds_linear,
x_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
y_title = "General Fertility Rate",
change_point_labels = "Bombing Effect",
draw_rolling_band = FALSE,
draw_rolling_line = FALSE
)
<-
middle_panel ::cartesian_rolling(
Watsds_linear = portfolio_cartesian$ds_linear,
x_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
y_title = "General Fertility Rate",
change_point_labels = "",
draw_rolling_band = FALSE,
draw_jagged_line = FALSE
)
<-
bottom_panel ::cartesian_rolling(
Watsds_linear = portfolio_cartesian$ds_linear,
x_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
y_title = "General Fertility Rate",
change_point_labels = "",
# draw_rolling_band = FALSE,
draw_jagged_line = FALSE
)
<- top_panel + x_scale + dark_theme
top_panel <- middle_panel + x_scale + dark_theme
middle_panel <- bottom_panel + x_scale_blank + dark_theme
bottom_panel
::grid.newpage()
grid::pushViewport(grid::viewport(layout = grid::grid.layout(3,1)))
gridprint(top_panel , vp = vp_layout(1, 1))
print(middle_panel, vp = vp_layout(2, 1))
print(bottom_panel, vp = vp_layout(3, 1))
::popViewport() grid
Cartesian plot of the GFR time series data in Oklahoma County, with H-spread Bands superimposed.
<-
cartesian_periodic ::cartesian_periodic(
Wats$ds_linear,
portfolio_cartesian$ds_periodic,
portfolio_cartesianx_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
change_point_labels = "Bombing Effect",
y_title = "General Fertility Rate",
draw_periodic_band = TRUE #The only difference from the simple linear graph above
)print(cartesian_periodic)
<- cartesian_periodic + x_scale + dark_theme
cartesian_periodic print(cartesian_periodic)
Wrap Around Time Series (WATS Plot) of the Oklahoma City GFR data, 1990-1999.
<-
portfolio_polar polarize_cartesian(
ds_linear = portfolio_cartesian$ds_linear,
ds_stage_cycle = portfolio_cartesian$ds_stage_cycle,
y_name = "birth_rate",
stage_id_name = "stage_id",
plotted_point_count_per_cycle = 7200
)
::grid.newpage()
gridpolar_periodic(
ds_linear = portfolio_polar$ds_observed_polar,
ds_stage_cycle = portfolio_polar$ds_stage_cycle_polar,
y_name = "radius",
stage_id_name = "stage_id",
draw_periodic_band = FALSE,
draw_stage_labels = TRUE,
draw_radius_labels = TRUE,
cardinal_labels = c("Jan1", "Apr1", "July1", "Oct1")
)
Wrap Around Time Series (WATS Plot) of the Oklahoma City GFR data, 1990-1999.
<-
portfolio_polar ::polarize_cartesian(
Watsds_linear = portfolio_cartesian$ds_linear,
ds_stage_cycle = portfolio_cartesian$ds_stage_cycle,
y_name = "birth_rate",
stage_id_name = "stage_id",
plotted_point_count_per_cycle = 7200
)
::grid.newpage()
grid::pushViewport(grid::viewport(
gridlayout = grid::grid.layout(
nrow = 2,
ncol = 2,
respect = TRUE,
widths = grid::unit(c(1, 1), c("null", "null")),
heights = grid::unit(c(1, .5), c("null", "null"))
),gp = grid::gpar(cex = 1, fill = NA)
))
## Create top left panel
::pushViewport(grid::viewport(layout.pos.col = 1, layout.pos.row = 1))
grid<-
top_left_panel ::polar_periodic(
Watsds_linear = portfolio_polar$ds_observed_polar,
ds_stage_cycle_polar = portfolio_polar$ds_stage_cycle_polar,
y_name = "radius",
stage_id_name = "stage_id", #graph_ceiling = 7,
cardinal_labels = c("Jan1", "Apr1", "July1", "Oct1")
)::upViewport()
grid
## Create top right panel
::pushViewport(grid::viewport(layout.pos.col = 2, layout.pos.row = 1))
grid<-
top_right_panel ::polar_periodic(
Watsds_linear = portfolio_polar$ds_observed_polar,
ds_stage_cycle_polar = portfolio_polar$ds_stage_cycle_polar,
y_name = "radius",
stage_id_name = "stage_id", #graph_ceiling = 7,
draw_observed_line = FALSE,
cardinal_labels = c("Jan1", "Apr1", "July1", "Oct1"),
origin_label = NULL
)::upViewport()
grid
## Create bottom panel
::pushViewport(grid::viewport(layout.pos.col = 1:2, layout.pos.row = 2, gp = grid::gpar(cex = 1)))
gridprint(cartesian_periodic, vp = vp_layout(x = 1:2, y = 2)) # Print across both columns of the bottom row.
::upViewport() grid
This figure compares Oklahoma County against the (other) largest urban counties.
# ds_linear_all <- Wats::augment_year_data_with_month_resolution(ds_linear = county_month_birth_rate_2005_version, date_name="date")
# Identify the average size of the fecund population
|>
ds_linear_all ::group_by(county_name) |>
dplyr::summarize(
dplyrMean = base::mean(fecund_population)
|>
) ::ungroup() dplyr
# A tibble: 12 × 2
county_name Mean
<chr> <dbl>
1 canadian 18332.
2 cleveland 48865.
3 comanche 26268.
4 creek 13402.
5 logan 7065.
6 mcclain 5434.
7 oklahoma 146882.
8 osage 8529.
9 pottawatomie 13604.
10 rogers 13383.
11 tulsa 123783.
12 wagoner 11580.
<- function(
graph_row_comparison row_label = "",
.county_name = "oklahoma",
spread_function = h_spread,
change_month = as.Date("1996-02-15")
) {<-
ds_linear |>
ds_linear_all ::filter(county_name == .county_name) |>
dplyr::augment_year_data_with_month_resolution(date_name = "date")
Wats
<-
portfolio_cartesian |>
ds_linear ::annotate_data(
Watsdv_name = "birth_rate",
center_function = stats::median,
spread_function = spread_function
)
<-
portfolio_polar $ds_linear |>
portfolio_cartesian::polarize_cartesian(
Watsds_stage_cycle = portfolio_cartesian$ds_stage_cycle,
y_name = "birth_rate",
stage_id_name = "stage_id",
plotted_point_count_per_cycle = 7200
)
<-
cartesian_periodic $ds_linear |>
portfolio_cartesian::cartesian_periodic(
Wats$ds_periodic,
portfolio_cartesianx_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
change_point_labels = ""
)
::pushViewport(grid::viewport(
gridlayout =
::grid.layout(
gridnrow = 1,
ncol = 3,
respect = FALSE,
widths = grid::unit(c(1.5, 1, 3), c("line", "null", "null"))
),gp = grid::gpar(cex = 1, fill = NA)
))
::pushViewport(grid::viewport(layout.pos.col = 1))
grid::grid.rect(gp = grid::gpar(fill = "gray90", col = NA))
grid::grid.text(row_label, rot = 90)
grid::popViewport()
grid
::pushViewport(grid::viewport(layout.pos.col = 2))
grid::polar_periodic(
Watsds_linear = portfolio_polar$ds_observed_polar,
ds_stage_cycle_polar = portfolio_polar$ds_stage_cycle_polar,
draw_observed_line = FALSE,
y_name = "radius",
stage_id_name = "stage_id",
origin_label = NULL,
plot_margins = c(0, 0, 0, 0)
)::popViewport()
grid
::pushViewport(grid::viewport(layout.pos.col = 3))
gridprint(cartesian_periodic + x_scale + light_theme, vp = vp_layout(x = 1, y = 1))
::popViewport()
grid::popViewport() #Finish the row
grid
}
<- c("Comanche", "Cleveland", "Oklahoma", "Tulsa", "Rogers")
county_names <- tolower(county_names)
counties
::grid.newpage()
grid::pushViewport(grid::viewport(
gridlayout = grid::grid.layout(nrow = length(counties), ncol = 1),
gp = grid::gpar(cex = 1, fill = NA)
))
for (i in base::seq_along(counties)) {
::pushViewport(grid::viewport(layout.pos.row = i))
gridgraph_row_comparison(.county_name = counties[i], row_label = county_names[i])
::popViewport()
grid
}::popViewport() grid
Here are all 12 counties that Ronnie collected birth records for. This extended graph is not in the manuscript.
<- base::sort(base::unique(ds_linear_all$county_name))
counties <- c("Canadian", "Cleveland", "Comanche", "Creek", "Logan", "McClain", "Oklahoma", "Osage", "Pottawatomie", "Rogers", "Tulsa", "Wagoner")
county_names
::grid.newpage()
grid::pushViewport(grid::viewport(
gridlayout = grid::grid.layout(nrow = base::length(counties), ncol = 1),
gp = grid::gpar(cex = 1, fill = NA)
))
for (i in base::seq_along(counties)) {
::pushViewport(grid::viewport(layout.pos.row = i))
gridgraph_row_comparison(.county_name = counties[i], row_label = county_names[i])
::popViewport()
grid
}::popViewport() grid
This figure demonstrates that WATS accommodates many types of error bands.
<- c("h_spread", "full_spread", "se_spread", "boot_spread")
spreads <- c("H-Spread", "Range", "+/-1 SE", "Bootstrap")
spread_names ::grid.newpage()
grid::pushViewport(grid::viewport(
gridlayout = grid::grid.layout(nrow = base::length(spreads), ncol = 1),
gp = grid::gpar(cex = 1, fill = NA)
))for (i in base::seq_along(spreads)) {
::pushViewport(grid::viewport(layout.pos.row = i))
gridgraph_row_comparison(spread_function = base::get(spreads[i]), row_label = spread_names[i])
::upViewport()
grid
}::upViewport() grid
The current vignette was build on a system using the following software.
Report created by wibeasley at Fri 10 Mar 2023 08:43:44 AM CST, -0600
R version 4.2.2 (2022-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.10
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.1
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] Wats_1.0.1
loaded via a namespace (and not attached):
[1] highr_0.10 RColorBrewer_1.1-3 pillar_1.8.1 bslib_0.4.2
[5] compiler_4.2.2 jquerylib_0.1.4 tools_4.2.2 boot_1.3-28.1
[9] digest_0.6.31 lattice_0.20-45 jsonlite_1.8.4 lubridate_1.9.2
[13] evaluate_0.20 lifecycle_1.0.3 tibble_3.2.0 gtable_0.3.1
[17] timechange_0.2.0 pkgconfig_2.0.3 rlang_1.0.6 cli_3.6.0
[21] rstudioapi_0.14 yaml_2.3.7 xfun_0.37 fastmap_1.1.1
[25] withr_2.5.0 dplyr_1.1.0 knitr_1.42 generics_0.1.3
[29] vctrs_0.5.2 sass_0.4.5 grid_4.2.2 tidyselect_1.2.0
[33] glue_1.6.2 R6_2.5.1 fansi_1.0.4 rmarkdown_2.20
[37] farver_2.1.1 ggplot2_3.4.1 magrittr_2.0.3 scales_1.2.1
[41] htmltools_0.5.4 testit_0.13 colorspace_2.1-0 labeling_0.4.2
[45] utf8_1.2.3 munsell_0.5.0 cachem_1.0.7 zoo_1.8-11