Skip to content

Commit d2efd11

Browse files
Merge pull request #56 from nhs-r-community/francisbarton/issue54
Add filter_fields option to get_postcode_data() etc
2 parents 763ed1c + 7552efc commit d2efd11

15 files changed

+184
-31
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@
1010
^\.lintr
1111
^[.]?air[.]toml$
1212
^\.vscode$
13+
^data-raw$

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,4 @@ Suggests:
4343
URL: https://nhs-r-community.github.io/NHSRpostcodetools/,
4444
https://github.com/nhs-r-community/NHSRpostcodetools
4545
BugReports: https://github.com/nhs-r-community/NHSRpostcodetools/issues
46+
LazyData: true

R/get_postcode_data.R

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,31 @@
77
#' @param as_list boolean, default `FALSE`. The default behaviour is to
88
#' extract the data from the list and return it as a tibble. If set `TRUE`,
99
#' an unprocessed list of the JSON data from the API will be returned instead
10-
#' @param include_codes boolean, default `TRUE`. Include columns for the ONS
11-
#' administrative codes for geographic units in the returned tibble. Irrelevant
12-
#' if `as_list` is `TRUE`; in this case all ONS codes are included as a
13-
#' nested list in the list data for each postcode.
14-
#' See \url{https://postcodes.io/docs/postcode/lookup} for details
10+
#' @param filter character vector or helper function. The default is
11+
#' [filter_fields], which by default evaluates to `NULL`, which means no
12+
#' fields will be filtered out from the returned data. You can also use
13+
#' [filter_fields] to validate a vector of field names that you supply - see
14+
#' Examples below.
15+
#' The full list of field names is available in the [schema_table] data. Some
16+
#' other helper functions are provided for common scenarios: [exclude_codes]
17+
#' and [minimal_fields].
18+
#' See \url{https://postcodes.io/docs/postcode/bulk} for details.
1519
#' @examples
16-
#' get_postcode_data(c("NP22 3PS", "NP22 4PS", "NP22 5PS"))
20+
#' codes <- c("NP22 3PS", "NP22 4PS", "NP22 5PS")
21+
#' get_postcode_data(codes)
22+
#' my_fields <- c("postcode", "lsoa", "codes.lsoa", "eastings", "northings")
23+
#' # get_postcode_data(codes, filter = my_fields) is fine, but using
24+
#' # filter_fields() to wrap your vector gives you a validation check:
25+
#' get_postcode_data(codes, filter = filter_fields(my_fields))
26+
#' # The `schema_table` dataset within NHSRpostcodetools contains a "field"
27+
#' # column with all available fields. You can use this as a starting point:
28+
#' excl_fields <- setdiff(schema_table[["field"]], c("quality", "country"))
29+
#' get_postcode_data(codes, filter = filter_fields(excl_fields))
30+
#' # or use a helper function as a starting point to add to:
31+
#' get_postcode_data(codes, TRUE, filter_fields(c(minimal_fields(), "region")))
1732
#' @returns A tibble by default, otherwise a list if `as_list` is TRUE
1833
#' @export
19-
get_postcode_data <- function(x, as_list = FALSE, include_codes = TRUE) {
34+
get_postcode_data <- function(x, as_list = FALSE, filter = filter_fields()) {
2035
x <- unique(toupper(purrr::discard(x, is.na)))
2136
assertthat::assert_that(length(x) > 0L, msg = "No postcodes have been found.")
2237

@@ -37,19 +52,18 @@ get_postcode_data <- function(x, as_list = FALSE, include_codes = TRUE) {
3752
if (length(valid_codes) > 0L) {
3853
results_list <- valid_codes |>
3954
batch_it(100L) |>
40-
purrr::map(bulk_lookup, .progress = "Looking up postcode data...")
55+
purrr::map(
56+
\(x) bulk_lookup(x, filter_fields = filter),
57+
.progress = "Looking up postcode data..."
58+
)
4159

4260
if (as_list) {
4361
results_list
4462
} else {
45-
results_df <- tibblise_results_list(results_list)
46-
if (!include_codes) {
47-
dplyr::select(results_df, !tidyselect::ends_with("_code"))
48-
} else {
49-
results_df
50-
}
63+
tibblise_results_list(results_list)
5164
}
5265
} else {
66+
cli::cli_alert_warning("No valid postcodes were supplied")
5367
invisible(NULL)
5468
}
5569
}
@@ -69,12 +83,23 @@ get_postcode_data <- function(x, as_list = FALSE, include_codes = TRUE) {
6983
#' ) |>
7084
#' postcode_data_join()
7185
#' @export
72-
postcode_data_join <- function(tbl, .col = "postcode", include_codes = TRUE) {
86+
postcode_data_join <- function(
87+
tbl,
88+
.col = "postcode",
89+
filter = filter_fields()
90+
) {
7391
assertthat::assert_that(
7492
inherits(tbl, "data.frame"),
7593
.col %in% colnames(tbl)
7694
)
77-
api_data <- get_postcode_data(tbl[[.col]], include_codes = include_codes)
95+
assertthat::assert_that(
96+
(is.null(filter) || "postcode" %in% filter),
97+
msg = cli::cli_abort(paste0(
98+
"The {.val postcode} field must be included in {.arg filter} in order ",
99+
"for {.fn postcode_data_join} to work."
100+
))
101+
)
102+
api_data <- get_postcode_data(tbl[[.col]], filter = filter)
78103
dplyr::left_join(tbl, api_data, by = dplyr::join_by({{ .col }} == "postcode"))
79104
}
80105

R/helpers.R

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,22 @@
1+
#' Helper function to check a vector of fields to filter
2+
#' @param fields character vector. See [schema_table].
3+
#' @export
4+
filter_fields <- function(fields = NULL) {
5+
if (!is.null(fields)) {
6+
fields <- unique(rlang::arg_match(fields, schema_fields, multiple = TRUE))
7+
}
8+
fields
9+
}
10+
11+
#' Provides a vector of fields to return, which excludes all ONS code fields
12+
#' @export
13+
exclude_codes <- \() purrr::discard(schema_fields, \(x) grepl("^codes", x))
14+
15+
#' Provides a minimal vector of fields to return
16+
#' @export
17+
minimal_fields <- \() c("postcode", "lsoa", "msoa", "admin_district")
18+
19+
120
#' Batch a vector or list into a list of elements with a maximum size
221
#'
322
#' @param x A vector or list

R/schema_table.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#' Copy of the postcodes.io schema summary table
2+
#'
3+
#' @format ## `schema_table`
4+
#' A data frame with 7,240 rows and 60 columns:
5+
#' \describe{
6+
#' \item{field}{field name}
7+
#' \item{type}{type of data expected for this field}
8+
#' \item{description}{description of the field and its format}
9+
#' \item{example}{an example of the type of data expected to be returned}
10+
#' }
11+
#' @source <https://postcodes.io/docs/postcode/schema>
12+
"schema_table"

R/sysdata.rda

338 Bytes
Binary file not shown.

data-raw/schema_fields.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
schema_page <- rvest::read_html("https://postcodes.io/docs/postcode/schema")
2+
3+
schema_table <- schema_page |>
4+
rvest::html_elements("table") |>
5+
purrr::pluck(1L) |>
6+
rvest::html_table() |>
7+
dplyr::rename_with(tolower)
8+
9+
schema_fields <- schema_table[["field"]]
10+
11+
usethis::use_data(schema_fields, internal = TRUE)
12+
usethis::use_data(schema_table)

data/schema_table.rda

1.91 KB
Binary file not shown.

man/exclude_codes.Rd

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/filter_fields.Rd

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)