## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
# xgboost uses data.table
data.table::setDTthreads(2)
RhpcBLASctl::blas_set_num_threads(2)
RhpcBLASctl::omp_set_num_threads(2)

## -----------------------------------------------------------------------------
library(tidysdm)
lacerta_thin <- readRDS(system.file("extdata/lacerta_thin_all_vars.rds",
  package = "tidysdm"
))
lacerta_thin$bio05[37] <- NA

## -----------------------------------------------------------------------------
lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>%
  step_rm(all_of(c(
    "bio01", "bio02", "bio03", "bio04", "bio07", "bio08",
    "bio09", "bio10", "bio11", "bio12", "bio14", "bio16",
    "bio17", "bio18", "bio19", "altitude"
  )))

lacerta_models <-
  # create the workflow_set
  workflow_set(
    preproc = list(default = lacerta_rec),
    models = list(
      # the standard glm specs
      glm = sdm_spec_glm(),
      # rf specs with tuning
      rf = sdm_spec_rf()
    ),
    # make all combinations of preproc and models,
    cross = TRUE
  ) %>%
  # tweak controls to store information needed later to create the ensemble
  option_add(control = control_ensemble_grid())

## ----error=TRUE---------------------------------------------------------------
try({
set.seed(100)
lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5)
lacerta_models <-
  lacerta_models %>%
  workflow_map("tune_grid",
    resamples = lacerta_cv, grid = 3,
    metrics = sdm_metric_set(), verbose = TRUE
  )
})

## -----------------------------------------------------------------------------
lacerta_prep <- lacerta_rec %>% prep(lacerta_thin)
lacerta_prep

## -----------------------------------------------------------------------------
lacerta_thin <- readRDS(system.file("extdata/lacerta_thin_all_vars.rds",
  package = "tidysdm"
))
suggested_vars <- c("bio05", "bio06", "bio13", "bio14", "bio15")
lacerta_rec_sel <- recipe(lacerta_thin, formula = class ~ .) %>%
  step_select(all_of(suggested_vars))

## ----error=TRUE---------------------------------------------------------------
try({
lacerta_models <-
  # create the workflow_set
  workflow_set(
    preproc = list(default = lacerta_rec_sel),
    models = list(
      # the standard glm specs
      glm = sdm_spec_glm(),
      # rf specs with tuning
      rf = sdm_spec_rf()
    ),
    # make all combinations of preproc and models,
    cross = TRUE
  ) %>%
  # tweak controls to store information needed later to create the ensemble
  option_add(control = control_ensemble_grid())

set.seed(100)
lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5)
lacerta_models <-
  lacerta_models %>%
  workflow_map("tune_grid",
    resamples = lacerta_cv, grid = 3,
    metrics = sdm_metric_set(), verbose = TRUE
  )
})

## -----------------------------------------------------------------------------
lacerta_prep_sel <- lacerta_rec_sel %>% prep(lacerta_thin)
lacerta_prep_sel

## -----------------------------------------------------------------------------
lacerta_thin <- readRDS(system.file("extdata/lacerta_thin_all_vars.rds",
  package = "tidysdm"
))

lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>%
  step_rm(all_of(c(
    "bio01", "bio02", "bio03", "bio04", "bio07", "bio08",
    "bio09", "bio10", "bio11", "bio12", "bio14", "bio16",
    "bio17", "bio18", "bio19", "altitude"
  )))

lacerta_models <-
  # create the workflow_set
  workflow_set(
    preproc = list(default = lacerta_rec),
    models = list(
      # the standard glm specs
      glm = sdm_spec_glm(),
      # the standard gam specs
      gam = sdm_spec_gam()
    ),
    # make all combinations of preproc and models,
    cross = TRUE
  ) %>%
  # set formula for gams
  update_workflow_model("default_gam",
    spec = sdm_spec_gam(),
    formula = gam_formula(lacerta_rec)
  ) %>%
  # tweak controls to store information needed later to create the ensemble
  option_add(control = control_ensemble_grid())

## -----------------------------------------------------------------------------
set.seed(100)
lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5)
lacerta_models <-
  lacerta_models %>%
  workflow_map("tune_grid",
    resamples = lacerta_cv, grid = 3,
    metrics = sdm_metric_set(), verbose = TRUE
  )

## -----------------------------------------------------------------------------
lacerta_thin <- readRDS(system.file("extdata/lacerta_thin_all_vars.rds",
  package = "tidysdm"
))
set.seed(123)
lacerta_thin <- lacerta_thin[sample(
  seq_len(nrow(lacerta_thin)),
  nrow(lacerta_thin) / 5
), ]

lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>%
  step_rm(all_of(c(
    "bio01", "bio02", "bio03", "bio04", "bio07", "bio08",
    "bio09", "bio10", "bio11", "bio12", "bio14", "bio16",
    "bio17", "bio18", "bio19", "altitude"
  )))

lacerta_models <-
  # create the workflow_set
  workflow_set(
    preproc = list(default = lacerta_rec),
    models = list(
      # the standard glm specs
      glm = sdm_spec_glm(),
      # the standard gam specs
      gam = sdm_spec_gam(),
      # rf specs with tuning
      rf = sdm_spec_rf()
    ),
    # make all combinations of preproc and models,
    cross = TRUE
  ) %>%
  # set formula for gams
  update_workflow_model("default_gam",
    spec = sdm_spec_gam(),
    formula = gam_formula(lacerta_rec)
  ) %>%
  # tweak controls to store information needed later to create the ensemble
  option_add(control = control_ensemble_grid())

## -----------------------------------------------------------------------------
set.seed(100)
lacerta_cv <- spatial_block_cv(lacerta_thin, v = 3)
lacerta_models <-
  lacerta_models %>%
  workflow_map("tune_grid",
    resamples = lacerta_cv, grid = 3,
    metrics = sdm_metric_set(), verbose = TRUE
  )

## -----------------------------------------------------------------------------
gam_results <- extract_workflow_set_result(lacerta_models, id = "default_gam")
gam_results

## -----------------------------------------------------------------------------
gam_results$.notes[2]

## -----------------------------------------------------------------------------
problem_split <- gam_results$splits[2][[1]]
summary(training(problem_split))

## -----------------------------------------------------------------------------
gam_workflow <- extract_workflow(lacerta_models, id = "default_gam")
faulty_gam <- fit(gam_workflow, training(problem_split))