nhs-r-community
diff --git a/‎.Rbuildignore‎
Lines changed: 1 addition & 0 deletions b/‎.Rbuildignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 1 addition & 0 deletions b/‎DESCRIPTION‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/get_postcode_data.R‎
Lines changed: 41 additions & 16 deletions b/‎R/get_postcode_data.R‎
Lines changed: 41 additions & 16 deletions
diff --git a/‎R/helpers.R‎
Lines changed: 19 additions & 0 deletions b/‎R/helpers.R‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎R/schema_table.R‎
Lines changed: 12 additions & 0 deletions b/‎R/schema_table.R‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎R/sysdata.rda‎
338 Bytes b/‎R/sysdata.rda‎
338 Bytes
diff --git a/‎data-raw/schema_fields.R‎
Lines changed: 12 additions & 0 deletions b/‎data-raw/schema_fields.R‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎data/schema_table.rda‎
1.91 KB b/‎data/schema_table.rda‎
1.91 KB
diff --git a/‎man/exclude_codes.Rd‎
Lines changed: 11 additions & 0 deletions b/‎man/exclude_codes.Rd‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎man/filter_fields.Rd‎
Lines changed: 14 additions & 0 deletions b/‎man/filter_fields.Rd‎
Lines changed: 14 additions & 0 deletions
@@ -10,3 +10,4 @@
 ^\.lintr
 ^[.]?air[.]toml$
 ^\.vscode$
+^data-raw$
@@ -43,3 +43,4 @@ Suggests:
 URL: https://nhs-r-community.github.io/NHSRpostcodetools/,
     https://github.com/nhs-r-community/NHSRpostcodetools
 BugReports: https://github.com/nhs-r-community/NHSRpostcodetools/issues
+LazyData: true
@@ -7,16 +7,31 @@
 #' @param as_list boolean, default `FALSE`. The default behaviour is to
 #'  extract the data from the list and return it as a tibble. If set `TRUE`,
 #'  an unprocessed list of the JSON data from the API will be returned instead
-#' @param include_codes boolean, default `TRUE`. Include columns for the ONS
-#'  administrative codes for geographic units in the returned tibble. Irrelevant
-#'  if `as_list` is `TRUE`; in this case all ONS codes are included as a
-#'  nested list in the list data for each postcode.
-#'  See \url{https://postcodes.io/docs/postcode/lookup} for details
+#' @param filter character vector or helper function. The default is
+#'  [filter_fields], which by default evaluates to `NULL`, which means no
+#'  fields will be filtered out from the returned data. You can also use
+#'  [filter_fields] to validate a vector of field names that you supply - see
+#'  Examples below.
+#'  The full list of field names is available in the [schema_table] data. Some
+#'  other helper functions are provided for common scenarios: [exclude_codes]
+#'  and [minimal_fields].
+#'  See \url{https://postcodes.io/docs/postcode/bulk} for details.
 #' @examples
-#' get_postcode_data(c("NP22 3PS", "NP22 4PS", "NP22 5PS"))
+#'  codes <- c("NP22 3PS", "NP22 4PS", "NP22 5PS")
+#'  get_postcode_data(codes)
+#'  my_fields <- c("postcode", "lsoa", "codes.lsoa", "eastings", "northings")
+#'  # get_postcode_data(codes, filter = my_fields) is fine, but using
+#'  # filter_fields() to wrap your vector gives you a validation check:
+#'  get_postcode_data(codes, filter = filter_fields(my_fields))
+#'  # The `schema_table` dataset within NHSRpostcodetools contains a "field"
+#'  # column with all available fields. You can use this as a starting point:
+#'  excl_fields <- setdiff(schema_table[["field"]], c("quality", "country"))
+#'  get_postcode_data(codes, filter = filter_fields(excl_fields))
+#'  # or use a helper function as a starting point to add to:
+#'  get_postcode_data(codes, TRUE, filter_fields(c(minimal_fields(), "region")))
 #' @returns A tibble by default, otherwise a list if `as_list` is TRUE
 #' @export
-get_postcode_data <- function(x, as_list = FALSE, include_codes = TRUE) {
+get_postcode_data <- function(x, as_list = FALSE, filter = filter_fields()) {
   x <- unique(toupper(purrr::discard(x, is.na)))
   assertthat::assert_that(length(x) > 0L, msg = "No postcodes have been found.")
 
@@ -37,19 +52,18 @@ get_postcode_data <- function(x, as_list = FALSE, include_codes = TRUE) {
   if (length(valid_codes) > 0L) {
     results_list <- valid_codes |>
       batch_it(100L) |>
-      purrr::map(bulk_lookup, .progress = "Looking up postcode data...")
+      purrr::map(
+        \(x) bulk_lookup(x, filter_fields = filter),
+        .progress = "Looking up postcode data..."
+      )
 
     if (as_list) {
       results_list
     } else {
-      results_df <- tibblise_results_list(results_list)
-      if (!include_codes) {
-        dplyr::select(results_df, !tidyselect::ends_with("_code"))
-      } else {
-        results_df
-      }
+      tibblise_results_list(results_list)
     }
   } else {
+    cli::cli_alert_warning("No valid postcodes were supplied")
     invisible(NULL)
   }
 }
@@ -69,12 +83,23 @@ get_postcode_data <- function(x, as_list = FALSE, include_codes = TRUE) {
 #' ) |>
 #'   postcode_data_join()
 #' @export
-postcode_data_join <- function(tbl, .col = "postcode", include_codes = TRUE) {
+postcode_data_join <- function(
+  tbl,
+  .col = "postcode",
+  filter = filter_fields()
+) {
   assertthat::assert_that(
     inherits(tbl, "data.frame"),
     .col %in% colnames(tbl)
   )
-  api_data <- get_postcode_data(tbl[[.col]], include_codes = include_codes)
+  assertthat::assert_that(
+    (is.null(filter) || "postcode" %in% filter),
+    msg = cli::cli_abort(paste0(
+      "The {.val postcode} field must be included in {.arg filter} in order ",
+      "for {.fn postcode_data_join} to work."
+    ))
+  )
+  api_data <- get_postcode_data(tbl[[.col]], filter = filter)
   dplyr::left_join(tbl, api_data, by = dplyr::join_by({{ .col }} == "postcode"))
 }
 
 
@@ -1,3 +1,22 @@
+#' Helper function to check a vector of fields to filter
+#' @param fields character vector. See [schema_table].
+#' @export
+filter_fields <- function(fields = NULL) {
+  if (!is.null(fields)) {
+    fields <- unique(rlang::arg_match(fields, schema_fields, multiple = TRUE))
+  }
+  fields
+}
+
+#' Provides a vector of fields to return, which excludes all ONS code fields
+#' @export
+exclude_codes <- \() purrr::discard(schema_fields, \(x) grepl("^codes", x))
+
+#' Provides a minimal vector of fields to return
+#' @export
+minimal_fields <- \() c("postcode", "lsoa", "msoa", "admin_district")
+
+
 #' Batch a vector or list into a list of elements with a maximum size
 #'
 #' @param x A vector or list
 
@@ -0,0 +1,12 @@
+#' Copy of the postcodes.io schema summary table
+#'
+#' @format ## `schema_table`
+#' A data frame with 7,240 rows and 60 columns:
+#' \describe{
+#'   \item{field}{field name}
+#'   \item{type}{type of data expected for this field}
+#'   \item{description}{description of the field and its format}
+#'   \item{example}{an example of the type of data expected to be returned}
+#' }
+#' @source <https://postcodes.io/docs/postcode/schema>
+"schema_table"
@@ -0,0 +1,12 @@
+schema_page <- rvest::read_html("https://postcodes.io/docs/postcode/schema")
+
+schema_table <- schema_page |>
+  rvest::html_elements("table") |>
+  purrr::pluck(1L) |>
+  rvest::html_table() |>
+  dplyr::rename_with(tolower)
+
+schema_fields <- schema_table[["field"]]
+
+usethis::use_data(schema_fields, internal = TRUE)
+usethis::use_data(schema_table)