VisitorCounts

In this vignette, functions in the VisitorCounts package are demonstrated using park visitation data from Yellowstone National Park.

Sample Datasets: park_visitation and flickr_userdays

First, we load two datasets: park_visitation stores 156 monthly observations spanning 2005 through 2017 of flickr user-days (PUD) and visitor counts by the national park service (NPS) for 20 popular national parks in the United States. Second, flickr_userdays stores log US flickr user-days for the corresponding time period.

library(VisitorCounts)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
data("park_visitation")
data("flickr_userdays")

Sample data for Yellowstone National Park

For the purposes of this vignette, three time series are extracted from these datasets. First, log_yellowstone_pud is a time series of 156 monthly observations of flickr photo-user-days geolocated within Yellowstone National Park. Second, log_yellowstone_nps is a time series of 156 monthly observations of counts of park visitation by the national park service. Third, flickr_userdays is a time series of 156 monthly observations of log flickr user-days taken within the United States.

yellowstone_pud <- park_visitation[park_visitation$park == "YELL",]$pud #photo user days
yellowstone_nps <- park_visitation[park_visitation$park == "YELL",]$nps #national park service counts

yellowstone_pud <- ts(yellowstone_pud, start = 2005, freq = 12)
yellowstone_nps <- ts(yellowstone_nps, start = 2005, freq = 12)

log_yellowstone_pud <- log(yellowstone_pud)
log_yellowstone_nps <- log(yellowstone_nps)

log_flickr_userdays <- log(flickr_userdays)
plot(log_yellowstone_pud, main = "Yellowstone Photo-User-Days (PUD)", ylab = "PUD")

plot(log_yellowstone_nps, main = "Yellowstone National Park Service Visitation Counts (NPS)", ylab = "NPS")

plot(log_flickr_userdays, main = "Log US Flickr user-days", ylab = "UD")

visitation_model()

The visitation_model() function uses social media data, such as the log flickr photo-user-days in log_yellowstone_pud, coupled with a popularity measure of the social media platform, like the log US flickr userdays in log_flickr_userdays, to model percent changes in visitation counts. By default, visitation_model() assumes that no visitation counts are available.

yell_visitation_model <- visitation_model(log_yellowstone_pud,
                                          log_flickr_userdays, is_output_logged = TRUE, is_input_logged = TRUE)
## The additive constant for the model is assumed to be equal to zero.
##             If a better constant is known, change the value in the constant argument.
##             Instead, the actual series may be supplied in the ref_series argument.
## When no or linear trend is assumed, popularity_proxy will not be used.

If national park data is available, a reference series may be supplied to assist in parameter estimates:

yell_visitation_model_nps <- visitation_model(log_yellowstone_pud,
                                              log_flickr_userdays,
                                              ref_series = log_yellowstone_nps, is_output_logged = TRUE, is_input_logged = TRUE)
## When no or linear trend is assumed, popularity_proxy will not be used.

plot.visitation_model()

By default, plot.visiation_model() plots the differenced series. Typical graphical parameters may be passed to plot.visitation_model(), such as line width:

true_differences <- diff(log_yellowstone_nps)
lower_bound <- min(c(true_differences,diff(yell_visitation_model$visitation_fit)))-1
upper_bound <- max(c(true_differences,diff(yell_visitation_model$visitation_fit)))

plot(yell_visitation_model, ylim = c(lower_bound, upper_bound), lwd = 2)
lines(diff(log_yellowstone_nps), col = "red")
legend("bottom",c("Model Fit","True Differences"),col = c("black","red"),lty = c(1,1))

true_differences <- diff(log_yellowstone_nps)
lower_bound <- min(c(true_differences,diff(yell_visitation_model_nps$visitation_fit)))-1
upper_bound <- max(c(true_differences,diff(yell_visitation_model_nps$visitation_fit)))

plot(yell_visitation_model_nps, ylim = c(lower_bound, upper_bound), 
     lwd = 2,
     main = "Fitted Values for Visitation Model (NPS assisted)", difference = TRUE)
lines(true_differences, col = "red")
legend("bottom",c("Model Fit","True Differences"),col = c("black","red"),lty = c(1,1))

summary.visitation_model()

Parameters can be inspected using summary.visitation_model(). Two examples can be seen below:

summary(yell_visitation_model)
## Call: visitation_model(onsite_usage = log_yellowstone_pud, popularity_proxy = log_flickr_userdays, 
##     is_input_logged = TRUE, is_output_logged = TRUE)
## 
##  Parameter Estimates: 
## =============================== 
## Parameter:           Estimate: 
## ----------           --------- 
## Beta_0 (Constant):   0 
## Beta_1 (Seasonality): 1.308 
## Beta_2 (Trend):      0 
## Lag:                 0 
## Lag Criterion:       cross-correlation 
## ===============================
summary(yell_visitation_model_nps)
## Call: visitation_model(onsite_usage = log_yellowstone_pud, popularity_proxy = log_flickr_userdays, 
##     ref_series = log_yellowstone_nps, is_input_logged = TRUE, 
##     is_output_logged = TRUE)
## 
##  Parameter Estimates: 
## =============================== 
## Parameter:           Estimate: 
## ----------           --------- 
## Beta_0 (Constant):   11.371 
## Beta_1 (Seasonality): 1.572 
## Beta_2 (Trend):      0.002 
## Lag:                 0 
## Lag Criterion:       cross-correlation 
## ===============================

predict.visitation_model()

Forecasts can be made using predict.visitation_model(), whose output is a visitation_forecast class object which can be inspected using plot or summary functions.

yellowstone_visitation_forecasts <- predict(yell_visitation_model, n_ahead = 12)
## WARNING : the model's constant (Beta_0) parameter is 0. This will result in likely inaccurate predictions.
## The model constant is understood as the mean log adjusted monthly visitation relative to the base month.
## Please provide a ref_series to the visitation_model object or provide your own custom value for the constant to visitation_model constructor
yellowstone_visitation_forecasts_nps <- predict(yell_visitation_model_nps, n_ahead = 12)

yellowstone_visitation_forecasts_withpast <- predict(yell_visitation_model, n_ahead = 12, only_new = FALSE)
## WARNING : the model's constant (Beta_0) parameter is 0. This will result in likely inaccurate predictions.
## The model constant is understood as the mean log adjusted monthly visitation relative to the base month.
## Please provide a ref_series to the visitation_model object or provide your own custom value for the constant to visitation_model constructor

plot.visitation_forecast()

Forecasts can be plotted using plot.visitation_forecast():

plot(yellowstone_visitation_forecasts, difference = TRUE)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

plot(yellowstone_visitation_forecasts_nps, main = "Forecasts for Visitation Model (NPS Assisted)", date_label = "%b", date_breaks = "1 month")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

plot(yellowstone_visitation_forecasts_withpast, difference = TRUE, date_breaks = "1 year", date_label = "%y")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

summary.visitation_forecast()

summary(yellowstone_visitation_forecasts)
## Visitation model forecasts: 
##  
## Parameter Estimates: 
## =============================== 
## Parameter:           Estimate: 
## ----------           --------- 
## Beta_0 (Constant):   0 
## Beta_1 (Seasonality): 1.308 
## Beta_2 (Trend):      0 
## Lag:                  
## =============================== 
## Criterion for Lag Estimate: cross-correlation 
## Number of Forecasts: 12
summary(yellowstone_visitation_forecasts_nps)
## Visitation model forecasts: 
##  
## Parameter Estimates: 
## =============================== 
## Parameter:           Estimate: 
## ----------           --------- 
## Beta_0 (Constant):   11.371 
## Beta_1 (Seasonality): 1.572 
## Beta_2 (Trend):      0.002 
## Lag:                  
## =============================== 
## Criterion for Lag Estimate: cross-correlation 
## Number of Forecasts: 12

auto_decompose()

The automatic decomposition function uses singular-spectrum analysis, as implemented by the Rssa package, in conjunction with an automated procedure for classifying components to decompose a time series into trend, seasonality and noise.

yell_pud_decomposition <- auto_decompose(yellowstone_pud)

plot.decomposition()

Several plot options are available for examining this decomposition.

plot(yell_pud_decomposition)

plot(yell_pud_decomposition, type = "period")

plot(yell_pud_decomposition, type = "classical")

summary.decomposition()

The eigenvector grouping can be examined using summary.decomposition.

summary(yell_pud_decomposition)
## Decomposition: 
##  
## Period or Component  || Eigenvector Grouping 
## ===================  || ====================
##  12                  || 2, 3 
##  6                   || 5, 6 
##  4                   || 9, 10 
##  3                   || 12, 13 
##  Trend               || 1, 4
##  
##  Window Length: 72
##  Number of Observations: 156

predict.decomposition()

Forecasts can be made using predict.decomposition():

plot(predict(yell_pud_decomposition, n_ahead = 12)$forecast, main = "Decomposition 12-ahead Forecast", ylab = "Forecast Value")