## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) # xgboost uses data.table data.table::setDTthreads(2) RhpcBLASctl::blas_set_num_threads(2) RhpcBLASctl::omp_set_num_threads(2) download_data <- FALSE # note that data were created in the overview vignette ## ----------------------------------------------------------------------------- library(tidysdm) lacerta_ensemble ## ----------------------------------------------------------------------------- explainer_lacerta_ens <- explain_tidysdm(lacerta_ensemble) ## ----vip, fig.width=6, fig.height=4------------------------------------------- library(DALEX) vip_ensemble <- model_parts(explainer = explainer_lacerta_ens) plot(vip_ensemble) ## ----pdp, fig.width=6, fig.height=4------------------------------------------- pdp_bio05 <- model_profile(explainer_lacerta_ens, N = 500, variables = "bio05") plot(pdp_bio05) ## ----------------------------------------------------------------------------- explainer_list <- explain_tidysdm(tidysdm::lacerta_ensemble, by_workflow = TRUE) ## ----profile, fig.width=6, fig.height=4--------------------------------------- profile_list <- lapply(explainer_list, model_profile, N = 500, variables = "bio05" ) plot(profile_list) ## ----------------------------------------------------------------------------- library(tidysdm) library(sf) lacerta_thin <- readRDS(system.file("extdata/lacerta_thin_all_vars.rds", package = "tidysdm" )) ## ----initial_split, fig.width=6, fig.height=4--------------------------------- set.seed(1005) lacerta_initial <- spatial_initial_split(lacerta_thin, prop = 1 / 5, spatial_block_cv ) autoplot(lacerta_initial) ## ----------------------------------------------------------------------------- check_splits_balance(lacerta_initial, class) ## ----training_cv, fig.width=6, fig.height=4----------------------------------- set.seed(1005) lacerta_training <- training(lacerta_initial) lacerta_cv <- spatial_block_cv(lacerta_training, v = 5, cellsize = grid_cellsize(lacerta_thin), offset = grid_offset(lacerta_thin) + 0.00001 ) autoplot(lacerta_cv) ## ----------------------------------------------------------------------------- check_splits_balance(lacerta_cv, class) ## ----recipe------------------------------------------------------------------- lacerta_rec_all <- recipe(lacerta_thin, formula = class ~ .) lacerta_rec_uncor <- lacerta_rec_all %>% step_rm(all_of(c( "bio01", "bio02", "bio03", "bio04", "bio07", "bio08", "bio09", "bio10", "bio11", "bio12", "bio14", "bio16", "bio17", "bio18", "bio19", "altitude" ))) lacerta_rec_uncor ## ----workflow_set------------------------------------------------------------- lacerta_models <- # create the workflow_set workflow_set( preproc = list( uncor = lacerta_rec_uncor, # recipe for the glm all = lacerta_rec_all, # recipe for the random forest all = lacerta_rec_uncor # recipe for svm ), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf(), # svm specs with tuning svm = parsnip::svm_poly( cost = tune::tune(), degree = tune::tune() ) %>% parsnip::set_engine("kernlab") %>% parsnip::set_mode("classification") ), # make all combinations of preproc and models, cross = FALSE ) %>% # tweak controls to store information needed later to create the ensemble # note that we use the bayes version as we will use a Bayes search (see later) option_add(control = stacks::control_stack_bayes()) ## ----------------------------------------------------------------------------- rf_param <- lacerta_models %>% # extract the rf workflow extract_workflow("all_rf") %>% # extract its parameters dials (used to tune) extract_parameter_set_dials() %>% # give it the predictors to finalize mtry finalize(x = st_drop_geometry(lacerta_thin) %>% select(-class)) # now update the workflowset with the new parameter info lacerta_models <- lacerta_models %>% option_add(param_info = rf_param, id = "all_rf") ## ----tune_grid---------------------------------------------------------------- set.seed(1234567) lacerta_models <- lacerta_models %>% workflow_map("tune_bayes", resamples = lacerta_cv, initial = 8, metrics = sdm_metric_set(), verbose = TRUE ) ## ----------------------------------------------------------------------------- autoplot(lacerta_models) ## ----build_stack, fig.width=6, fig.height=4----------------------------------- library(stacks) set.seed(1005) lacerta_stack <- # initialize the stack stacks() %>% # add candidate members add_candidates(lacerta_models) %>% # determine how to combine their predictions blend_predictions() %>% # fit candidates with non-zero weights (i.e. non-zero stacking coefficients) fit_members() autoplot(lacerta_stack, type = "weights") ## ----predict_test------------------------------------------------------------- lacerta_testing <- testing(lacerta_initial) lacerta_test_pred <- lacerta_testing %>% bind_cols(predict(lacerta_stack, ., type = "prob")) ## ----assess_test-------------------------------------------------------------- sdm_metric_set()(data = lacerta_test_pred, truth = class, .pred_presence) ## ----eval=download_data------------------------------------------------------- # download_dataset("WorldClim_2.1_10m") # climate_vars <- lacerta_rec_all$var_info %>% # filter(role == "predictor") %>% # pull(variable) # # climate_present <- pastclim::region_slice( # time_ce = 1985, # bio_variables = climate_vars, # data = "WorldClim_2.1_10m", # crop = iberia_poly # ) ## ----echo=FALSE, results='hide', eval=!download_data-------------------------- climate_present <- terra::readRDS( system.file("extdata/lacerta_climate_present_10m.rds", package = "tidysdm") ) climate_vars <- lacerta_rec_all$var_info %>% filter(role == "predictor") %>% pull(variable) if (!all(climate_vars %in% names(climate_present))) { stop("mismatched variables in the raster") } ## ----plot_present, fig.width=6, fig.height=4---------------------------------- prediction_present <- predict_raster(lacerta_stack, climate_present, type = "prob" ) library(tidyterra) ggplot() + geom_spatraster(data = prediction_present, aes(fill = .pred_presence)) + scale_fill_terrain_c() + # plot presences used in the model geom_sf(data = lacerta_thin %>% filter(class == "presence")) ## ----------------------------------------------------------------------------- library(tidysdm) # load the dataset lacerta_thin <- readRDS(system.file("extdata/lacerta_thin_all_vars.rds", package = "tidysdm" )) # create a topography variable with 3 levels based on altitude lacerta_thin$topography <- cut(lacerta_thin$altitude, breaks = c(-Inf, 200, 800, Inf), labels = c("plains", "hills", "mountains") ) table(lacerta_thin$topography) ## ----------------------------------------------------------------------------- # subset to variable of interest lacerta_thin <- lacerta_thin %>% select( class, bio05, bio06, bio12, bio15, topography ) lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>% step_dummy(topography) lacerta_rec ## ----------------------------------------------------------------------------- lacerta_prep <- prep(lacerta_rec) summary(lacerta_prep) ## ----------------------------------------------------------------------------- lacerta_bake <- bake(lacerta_prep, new_data = lacerta_thin) glimpse(lacerta_bake) ## ----------------------------------------------------------------------------- # define the models lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) # tune set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 3) lacerta_models <- lacerta_models %>% workflow_map("tune_grid", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) # fit the ensemble lacerta_ensemble <- simple_ensemble() %>% add_member(lacerta_models, metric = "boyce_cont") ## ----------------------------------------------------------------------------- lacerta_ensemble$workflow[[1]] %>% extract_fit_parsnip() ## ----------------------------------------------------------------------------- climate_present <- terra::readRDS( system.file("extdata/lacerta_climate_present_10m.rds", package = "tidysdm" ) ) # first we add a topography variable to the climate data climate_present$topography <- climate_present$altitude climate_present$topography <- terra::classify(climate_present$topography, rcl = c(-Inf, 200, 800, Inf), include.lowest = TRUE, brackets = TRUE ) library(terra) levels(climate_present$topography) <- data.frame(ID = c(0, 1, 2), topography = c("plains", "hills", "mountains")) # now we can predict predict_factor <- predict_raster(lacerta_ensemble, climate_present) plot(predict_factor)