## ----message=FALSE------------------------------------------------------------
# Load core libraries; install these packages if you have not already
library(ridigbio)
library(tidyverse)

# Load library for making nice HTML output
library(kableExtra)

## ----echo = FALSE-------------------------------------------------------------

verify_records <- FALSE

#Test that examples will run
tryCatch({
    # Your code that might throw an error
    verify_records <- idig_search_records(rq = list(genus = c("manis",
                                                   "rhinolophus",
                                                   "paguma")),
        limit = 10)
}, error = function(e) {
    # Code to run if an error occurs
    cat("An error occurred during the idig_search_records call: ", e$message, "\n")
    cat("Vignettes will not be fully generated. Please try again after resolving the issue.")
    # Optionally, you can return NULL or an empty dataframe
    verify_records <- FALSE
})

## ----eval=verify_records------------------------------------------------------
# Edit the fields (e.g. `genus`) and values (e.g. "manis") in `list()` 
# to adjust your query and the fields (e.g. `uuid`) in `fields` to adjust the
# columns returned in your results
records <- idig_search_records(rq = list(genus = c("manis",
                                                   "rhinolophus",
                                                   "paguma")),
                       fields = c("uuid",
                                  "recordset",
                                  "institutioncode",
                                  "genus",
                                  "scientificname",
                                  "country",
                                  "data.dwc:year",
                                  "data.dwc:collectionCode",
                                  "catalognumber",
                                  "data.dwc:preparations"))

## ----eval=verify_records, echo = FALSE, results = 'asis'----------------------
knitr::kable(head(records)) %>% 
    kable_styling(bootstrap_options = 
                         c("striped", "hover", "condensed", "responsive")) %>% 
  scroll_box(width = "100%")

## ----eval=verify_records------------------------------------------------------
# List distinct values for the `preparation` field
prepsummary <- records %>% 
  group_by(`data.dwc:preparations`) %>% 
  tally()

# Display `prepsummary` in HTML output
knitr::kable(prepsummary) %>% 
    kable_styling(bootstrap_options = 
                         c("striped", "hover", "condensed", "responsive"),
                  full_width = FALSE) %>% 
  scroll_box(height = "400px")

## ----eval=verify_records------------------------------------------------------
# Normalize values in `data.dwc:preparations` to be all lowercase; then
# filter rows that include our search terms
recordsfiltered <- records %>% 
  mutate(`data.dwc:preparations` = str_to_lower(`data.dwc:preparations`)) %>% 
  filter(grepl('freeze|froze|tissue', `data.dwc:preparations`))

## ----eval=verify_records, echo = FALSE, results = 'asis'----------------------
knitr::kable(recordsfiltered) %>% 
    kable_styling(bootstrap_options = 
                         c("striped", "hover", "condensed", "responsive")) %>% 
  scroll_box(height = "600px")

# If you have this code open in R, you can uncomment the line below to
# save `recordsfiltered` as a csv file to your working directory
# write_csv(recordsfiltered, "recordsfiltered.csv")

## ----eval=verify_records, echo = FALSE, results = 'asis'----------------------
# List distinct values for the `preparation` field in recordsfiltered
recordsfiltered %>% 
  group_by(`data.dwc:preparations`) %>% 
  tally() %>% 
  knitr::kable() %>% 
    kable_styling(bootstrap_options = 
                         c("striped", "hover", "condensed", "responsive"),
                  full_width = FALSE) %>% 
  scroll_box(height = "400px")

## ----eval=verify_records------------------------------------------------------
# Count how many records in the data were contributed by each recordset
recordtally <- recordsfiltered %>% 
  group_by(recordset) %>% 
  tally() %>% rename()

# Get metadata from the attributes of the `records` data frame
collections <- tibble(collection = attr(recordsfiltered, "attribution")) %>% 
  # Expand information captured in nested lists
  hoist(collection, 
        recordset_uuid = "uuid",
        recordset_name = "name",
        recordset_url= "url",
        contacts = "contacts") %>% 
  # Get rid of extraneous attribution metadata
  select(-collection) %>% 
  # Expand information captured in nested lists
  unnest_longer(contacts) %>% 
  # Expand information captured in nested lists
  unnest_wider(contacts) %>% 
  # Remove any contacts without an email address listed
  filter(!is.na(email)) %>% 
  # Get rid of duplicate contacts within the same recordset
  distinct() %>% 
  # Rename some columns
  rename(contact_role = role, contact_email = email) %>% 
  # Group first and last names together in the same column
  unite(col = "contact_name", 
        first_name, last_name, 
        sep = " ", 
        na.rm = TRUE) %>% 
  # Restructure data frame so that there is one row per recordset
  group_by(recordset_uuid) %>% 
  mutate(contact_index = row_number()) %>%
  mutate(recordset_url = if_else(grepl("^http://", recordset_url),
    gsub("^http://", "https://", recordset_url),
    recordset_url
  )) %>%
  mutate(recordset_url = if_else(grepl("www.amnh.org/our-research/vertebrate-zoology/mammalogy", recordset_url),
    gsub("www.amnh.org/our-research/vertebrate-zoology/mammalogy", "www.amnh.org/research/vertebrate-zoology/mammalogy", recordset_url),
    recordset_url
  )) %>%
  mutate(recordset_url = if_else(grepl("www.burkemuseum.org/mammalogy", recordset_url),
    gsub("www.burkemuseum.org/mammalogy", "www.burkemuseum.org/collections-and-research/biology/mammalogy", recordset_url),
    recordset_url
  )) %>%
  mutate(recordset_url = if_else(grepl("www.nhm.org", recordset_url),
    gsub("www.nhm.org", "nhm.org", recordset_url),
    recordset_url
  )) %>%
  mutate(recordset_url = if_else(
    grepl("appl003.lsu.edu/natsci/lmns.nsf/\\$Content/Mammals\\?OpenDocument", recordset_url),
    gsub("appl003.lsu.edu/natsci/lmns.nsf/\\$Content/Mammals\\?OpenDocument", "appl103.lsu.edu/natsci/Collections/natscicolsearch.nsf/OpenMainPage?OpenAgent&ID=1042", recordset_url),
    recordset_url
  )) %>%
  mutate(recordset_url = if_else(grepl("www.nsrl.ttu.edu/collections/Mammals/index.htm", recordset_url),
    gsub("www.nsrl.ttu.edu/collections/Mammals/index.htm", "www.depts.ttu.edu/nsrl/collections/mammal.php", recordset_url),
    recordset_url
  )) %>%
  mutate(recordset_url = if_else(grepl("sites01.lsu.edu/wp/mns/research-collections/genetic-resources/", recordset_url),
    gsub("sites01.lsu.edu/wp/mns/research-collections/genetic-resources/", "appl103.lsu.edu/natsci/Collections/natscicolsearch.nsf/OpenMainPage?OpenAgent&ID=1050", recordset_url),
    recordset_url
  )) %>%
  mutate(recordset_url = if_else(grepl("https://www.msb.unm.edu", recordset_url),
    gsub("www.msb.unm.edu", "www.msb.unm.edu", recordset_url),
    recordset_url
  )) %>%
  pivot_wider(names_from = contact_index,
                values_from = c(contact_name, contact_role, contact_email)) %>%
  # Include how many records in the data were contributed by each recordset
  left_join(recordtally, by = c("recordset_uuid"="recordset")) %>% 
   # Filter and remove n = 0
  filter(!is.na(n)) %>% 
  # Get rid of any rows which don't actually contribute data to `records`;
  # necessary because the attribute metadata by default includes all recordsets
  # in iDigBio that match the `idig_search_records` query, even if you filter
  # or limit those results in your own code
  filter(recordset_uuid %in% records$recordset) 
  

## ----eval=verify_records, echo = FALSE, results = 'asis'----------------------
knitr::kable(collections) %>% 
    kable_styling(bootstrap_options = 
                         c("striped", "hover", "condensed", "responsive")) %>% 
  scroll_box(height = "400px")