This vignette reproduce a design considered in Cucci et al. (2022) and implement a Monte Carlo simulation comparing the performance of the GMWMX-1, the GMWMX-2 and the MLE implemented in Hector. Note that this file is not intended to be run on a personal computer but should rather be executed on a high performance computing cluster.
# libraries
library(simts)
library(gmwmx)
library(dplyr)
<- function(theta_vec, ci_mat) {
check_if_theta_in_ci <- vector(mode = "logical", length = length(theta_vec))
vec_emp_coverage for (i in seq(length(theta_vec))) {
if (dplyr::between(theta_vec[i], ci_mat[i, 1], ci_mat[i, 2])) {
<- T
vec_emp_coverage[i] else {
} <- F
vec_emp_coverage[i]
}
}return(as.numeric(vec_emp_coverage))
}
# we consider example the model considered in model 3
<- 0.45
phase <- 2.5
amplitude <- 15
sigma2_wn <- 10
sigma2_powerlaw <- 0.4
d <- 0
bias <- 5 / 365.25
trend <- amplitude * cos(phase)
cosU <- amplitude * sin(phase) sinU
Let us consider 5 years of daily data
# consider n years of daily observations
<- 20
year <- year * 365 n
We consider a gaussian White noise and a Power Law process as the stochastic model of the residuals \(\varepsilon\).
Note that the functions that enable to generate stochastic models
that include Power Law process, Matèrn process or Fractional Gaussian
noise are (for now) only available from the development version of the
package simts
that can be easily installed with:
install.packages("devtools")
::install_github("SMAC-Group/simts") devtools
# define model for generating gaussian white noise + PLP
<- WN(sigma2 = sigma2_wn) + PLP(sigma2 = sigma2_powerlaw, d = d) model_gaussian_wn_plp
We consider three location shifts (jumps) at three different heights.
# generate data
# define time at which there are jumps
<- c(200, 300, 500)
jump_vec <- c(10, 15, 20) jump_height
We define a seed for reproducibility
# define seed
<- 123
myseed
# add trend, gaps and sin
<- 1 # define number of sinusoidal process nbr_sin
and create the matrix \(\boldsymbol{A}\) with function
create_A_matrix()
.
# define A matrix
<- create_A_matrix(1:n, jump_vec, n_seasonal = nbr_sin) A
We set the vector of functional parameters \(\boldsymbol{x}_0\).
# define beta
<- c(bias, trend, cosU, sinU, jump_height) x_0
We define the number of simulations to perform
# define number of Monte Carlo simulation
<- 1000
n_simu
# define number of parameter estimated (depends on the model)
# bias + trend + height * nbr of jump vec + 2* nbr of sin process + wn + powerlaw parameters
<- 4
nbr_param_check_coverage <- 2 + length(jump_vec) + 2 * nbr_sin + 3
nbr_param <- (nbr_param + nbr_param_check_coverage) * 3 + 1 * 3
dim_mat_results
# define matrix of results
<- matrix(NA, ncol = dim_mat_results, nrow = n_simu) mat_results
We perform the simulation, saving for each Monte Carlo run, the estimated parameters of the functional and stochastic model and the empirical coverage on the functional parameters for the GMWMX-1, the GMWMX-2 as well as the MLE implemented in Hector.
for (simu_b in seq(n_simu)) {
# fix seed for reproducibility
set.seed(myseed + simu_b)
# generate residuals from a Gaussian White noise + Power law process
<- simts::gen_gts(model = model_gaussian_wn_plp, n = n)
eps
# create time series
<- A %*% x_0 + eps
yy
# create gnssts
<- create.gnssts(t = 1:length(yy), y = yy, jumps = jump_vec)
gnssts_obj
# MLE
<- estimate_hector(
fit_simu_b_mle x = gnssts_obj,
model = "wn+powerlaw",
n_seasonal = 1
)
# gmwmx 1 step
<- estimate_gmwmx(
fit_simu_b_gmwm_1_step x = gnssts_obj,
model = "wn+powerlaw",
theta_0 = c(0.1, 0.1, 0.1),
n_seasonal = 1,
ci = T,
k_iter = 1
)
# gmwmx 2 steps
<- estimate_gmwmx(
fit_simu_b_gmwm_2_step x = gnssts_obj,
model = "wn+powerlaw",
theta_0 = c(0.1, 0.1, 0.1),
n_seasonal = 1,
ci = T,
k_iter = 2
)
# define vector of estimators
<- c(fit_simu_b_mle$beta_hat, fit_simu_b_mle$theta_hat)
fit_mle <- c(fit_simu_b_gmwm_1_step$beta_hat, fit_simu_b_gmwm_1_step$theta_hat)
fit_gmwm_1_step <- c(fit_simu_b_gmwm_2_step$beta_hat, fit_simu_b_gmwm_2_step$theta_hat)
fit_gmwm_2_step
# compute coverage for each method
<- .05
alpha <- qnorm(1 - alpha / 2)
z_val
<- matrix(c(
mat_ci_mle_beta $beta_hat - z_val * fit_simu_b_mle$beta_std,
fit_simu_b_mle$beta_hat + z_val * fit_simu_b_mle$beta_std
fit_simu_b_mle
),byrow = F, ncol = 2
)
<- matrix(c(
mat_ci_gmwm_1_step_beta $beta_hat - z_val * fit_simu_b_gmwm_1_step$beta_std,
fit_simu_b_gmwm_1_step$beta_hat + z_val * fit_simu_b_gmwm_1_step$beta_std
fit_simu_b_gmwm_1_step
),byrow = F, ncol = 2
)
<- matrix(c(
mat_ci_gmwm_2_step_beta $beta_hat - z_val * fit_simu_b_gmwm_2_step$beta_std,
fit_simu_b_gmwm_2_step$beta_hat + z_val * fit_simu_b_gmwm_2_step$beta_std
fit_simu_b_gmwm_2_step
),byrow = F, ncol = 2
)
# save empirical coverage
<- check_if_theta_in_ci(x_0, mat_ci_mle_beta)[1:4]
inside_ci_mle <- check_if_theta_in_ci(x_0, mat_ci_gmwm_1_step_beta)[1:4]
inside_ci_gmwm_1 <- check_if_theta_in_ci(x_0, mat_ci_gmwm_2_step_beta)[1:4]
inside_ci_gmwm_2
# define vector of estimated parameters as object
<- c(
res
fit_mle, fit_gmwm_1_step,
fit_gmwm_2_step,
inside_ci_mle,
inside_ci_gmwm_1, inside_ci_gmwm_2,$estimation_time[3], fit_simu_b_gmwm_1_step$estimation_time[3], fit_simu_b_gmwm_2_step$estimation_time[3]
fit_simu_b_mle
)
# save in matrix of results
<- res
mat_results[simu_b, ]
# print status
cat(paste("Completed simulation", simu_b, "\n",sep = " "))
}
# define names of mat results
<- c("bias", "trend", "cosU", "sinU")
name_param_functionnal colnames(mat_results) <- c(
paste("mle", names(fit_mle), sep = "_"),
paste("gmwm_1_step", names(fit_gmwm_1_step), sep = "_"),
paste("gmwm_2_step", names(fit_gmwm_2_step), sep = "_"),
paste0("mle_inside_ci_", name_param_functionnal),
paste0("gmwm_1_inside_ci_", name_param_functionnal),
paste0("gmwm_2_inside_ci_", name_param_functionnal),
c("time_mle", "time_gmwm_1", "time_gmwm_2")
)