Skip to content

Commit 763ed1c

Browse files
Merge pull request #55 from nhs-r-community:francisbarton/issue53
Improve logic of suggest_fixes()
2 parents 281f09b + f1f3eaa commit 763ed1c

23 files changed

+394
-267
lines changed

.lintr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
linters: all_linters()
1+
linters: linters_with_defaults()
22
exclusions: list(
33
"vignettes"
44
)

DESCRIPTION

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ Authors@R:
1616
)
1717
)
1818
Maintainer: Zoë Turner <zoe.turner3@nhs.net>
19-
Description: Extracts data from the postcodes.io API for a given UK postcode,
20-
such as its LSOA (or equivalent), parliamentary constituency, and spatial
21-
coordinates. It can also check a list of postcodes for currency/validity
22-
and suggest a replacement current postcode for an expired code.
19+
Description: Extracts data from the postcodes.io API for collections of UK
20+
postcodes, such as their LSOA (or equivalent), parliamentary constituency,
21+
and spatial coordinates. It can also check postcodes for currency/validity
22+
and suggest replacement current postcodes for terminated codes.
2323
License: MIT + file LICENSE
2424
Encoding: UTF-8
2525
RoxygenNote: 7.3.2
@@ -37,7 +37,6 @@ Imports:
3737
rlang,
3838
tibble,
3939
tidyr,
40-
tidyselect,
4140
utils
4241
Suggests:
4342
testthat (>= 3.0.0)

NAMESPACE

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
# Generated by roxygen2: do not edit by hand
22

33
export(batch_it)
4+
export(exclude_codes)
5+
export(filter_fields)
46
export(get_postcode_data)
7+
export(minimal_fields)
58
export(postcode_data_join)
69
export(suggest_fixes)
710
importFrom(rlang,.data)
8-
importFrom(rlang,`:=`)

R/NHSRpostcodetools-package.R

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,7 @@
11
#' @keywords internal
22
"_PACKAGE"
33

4-
#' @importFrom rlang .data `:=`
4+
## usethis namespace: start
5+
#' @importFrom rlang .data
6+
## usethis namespace: end
57
NULL
6-
7-
8-
#' Batch a vector or list into a list of elements with a maximum size
9-
#'
10-
#' @param x A vector or list
11-
#' @param batch_size integer. The size (length) of batches to create
12-
#' @examples
13-
#' batch_it(letters, 6L)
14-
#' @returns A list
15-
#' @export
16-
batch_it <- function(x, batch_size) {
17-
assertthat::assert_that(
18-
rlang::is_vector(x),
19-
rlang::is_scalar_integerish(batch_size),
20-
batch_size >= 1L
21-
)
22-
bsize <- min(length(x), batch_size)
23-
24-
# Create a vector of factors of length length(x), then pass this as the factor
25-
# argument to [split()].
26-
f <- rep(seq_len(ceiling(length(x) / bsize)), each = bsize)[seq_along(x)]
27-
unname(split(x, f))
28-
}

R/get_postcode_data.R

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@
1818
#' @export
1919
get_postcode_data <- function(x, as_list = FALSE, include_codes = TRUE) {
2020
x <- unique(toupper(purrr::discard(x, is.na)))
21-
assertthat::assert_that(length(x) > 0, msg = "No postcodes have been found.")
21+
assertthat::assert_that(length(x) > 0L, msg = "No postcodes have been found.")
2222

2323
valid_index <- purrr::map_lgl(x, validate_code, .progress = "Checking codes")
2424
valid_codes <- x[valid_index]
2525
invalid_codes <- x[!valid_index]
2626

2727
if (length(invalid_codes) > 0L) {
28-
inv <- cli::cli_vec(invalid_codes, list(`vec-trunc` = 5))
28+
inv <- cli::cli_vec(invalid_codes, list(`vec-trunc` = 5L)) # nolint
2929
paste0(
3030
"{.fn get_postcode_data} found {length(invalid_codes)} invalid ",
3131
"postcode{?s}. Example{?s}: {.val {inv}}. You can use ",
@@ -64,7 +64,7 @@ get_postcode_data <- function(x, as_list = FALSE, include_codes = TRUE) {
6464
#'
6565
#' @examples
6666
#' tibble::tibble(
67-
#' place = paste0("place_", 1:3),
67+
#' place = paste0("place_", seq(3L)),
6868
#' postcode = c("NP22 3PS", "NP22 4PS", "NP22 5PS")
6969
#' ) |>
7070
#' postcode_data_join()
@@ -78,17 +78,15 @@ postcode_data_join <- function(tbl, .col = "postcode", include_codes = TRUE) {
7878
dplyr::left_join(tbl, api_data, by = dplyr::join_by({{ .col }} == "postcode"))
7979
}
8080

81-
82-
#' Unnest codes (wider): from a list-col to a column each
83-
#' @param tbl A data frame with ONS (etc.) codes data in a list-col
84-
#' @keywords internal
85-
unnest_codes <- function(tbl) {
86-
tbl |>
87-
dplyr::mutate(codes_names = names(.data[["codes"]])) |>
88-
dplyr::mutate(dplyr::across("codes", unlist)) |>
89-
tidyr::pivot_wider(
90-
names_from = "codes_names",
91-
names_glue = "{codes_names}_code",
92-
values_from = "codes"
93-
)
81+
tibblise_results_list <- function(results_list) {
82+
results_list |>
83+
purrr::map("result") |>
84+
purrr::list_flatten() |>
85+
purrr::map("result") |>
86+
purrr::map(purrr::compact) |>
87+
# If result contains nested codes then flatten these out and rename.
88+
# Hopefully this doesn't break things if codes are not included!
89+
purrr::map(\(x) purrr::list_flatten(x, name_spec = "{inner}_code")) |>
90+
purrr::map(tibble::as_tibble_row) |>
91+
purrr::list_rbind()
9492
}

R/helpers.R

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#' Batch a vector or list into a list of elements with a maximum size
2+
#'
3+
#' @param x A vector or list
4+
#' @param batch_size integer. The size (length) of batches to create
5+
#' @examples batch_it(letters, 6L)
6+
#' @returns A list
7+
#' @export
8+
batch_it <- function(x, batch_size) {
9+
assertthat::assert_that(
10+
rlang::is_vector(x),
11+
rlang::is_scalar_integerish(batch_size),
12+
batch_size >= 1L
13+
)
14+
bsize <- min(length(x), batch_size)
15+
16+
# Create a vector of factors of length length(x), then pass this as the factor
17+
# argument to [split()].
18+
f <- rep(seq_len(ceiling(length(x) / bsize)), each = bsize)[seq_along(x)]
19+
unname(split(x, f))
20+
}

R/requests.R

Lines changed: 14 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -22,73 +22,23 @@ check_terminated <- function(code) {
2222
check_terminated_possibly <- purrr::possibly(check_terminated)
2323

2424

25-
bulk_reverse_geocode <- function(dat, prev_data = NULL, curr_radius = 125L) {
26-
if (curr_radius > 2000L) {
27-
cli::cli_alert_info(paste0(
28-
"Geocoding searches have not found some replacement postcodes despite ",
29-
"searching up to 2km in radius from the original postcode location."
30-
))
31-
prev_data # return early
32-
}
33-
34-
geodata_return <- dat |>
35-
dplyr::select(c("longitude", "latitude")) |>
36-
dplyr::mutate(limit = 1L) |>
37-
dplyr::mutate(radius = curr_radius) |>
38-
dplyr::mutate(batch = ceiling(dplyr::row_number() / 100L)) |>
39-
# batch into groups of max 100 rows
40-
tidyr::nest(.by = "batch") |>
41-
dplyr::pull("data") |>
42-
purrr::map(get_geodata_return) |>
43-
purrr::list_flatten() # re-combine batches into a single list
44-
45-
geodata_queries <- geodata_return |>
46-
purrr::map("query")
47-
48-
geodata_query_results <- geodata_return |>
49-
purrr::map("result") |>
50-
purrr::list_flatten()
51-
52-
nifty_bind_cols <- function(x, y) {
53-
x2 <- x |>
54-
tibble::as_tibble_row() |>
55-
dplyr::select(c("longitude", "latitude")) |>
56-
dplyr::rename_with(\(x) paste0("orig_", x))
57-
if (!is.null(y)) {
58-
y2 <- y |>
59-
purrr::compact() |>
60-
tibble::as_tibble() |>
61-
unnest_codes()
62-
} else {
63-
y2 <- NULL
64-
}
65-
dplyr::bind_cols(x2, y2)
66-
}
67-
68-
data_out <- geodata_queries |>
69-
purrr::map2(geodata_query_results, nifty_bind_cols) |>
70-
purrr::list_rbind()
71-
72-
if (!"postcode" %in% colnames(data_out)) {
73-
data_out <- data_out |>
74-
dplyr::mutate(postcode = NA_character_)
75-
}
76-
77-
dat_done <- data_out |>
78-
dplyr::filter(!dplyr::if_any("postcode", is.na)) |>
79-
dplyr::bind_rows(prev_data)
25+
get_geodata_return <- function(x) {
26+
base_request() |>
27+
httr2::req_body_json(list(geolocations = x)) |>
28+
pluck_result()
29+
}
8030

81-
dat_missing <- data_out |>
82-
dplyr::filter(dplyr::if_any("postcode", is.na)) |>
83-
dplyr::select(tidyselect::starts_with("orig_")) |>
84-
# rename to "longitude" and "latitude" for resubmission
85-
dplyr::rename_with(\(x) sub("^orig_", "", x))
8631

87-
if (nrow(dat_missing) > 0L) {
88-
bulk_reverse_geocode(dat_missing, dat_done, curr_radius * 2)
32+
bulk_lookup <- function(x, filter_fields = filter_fields()) {
33+
if (is.null(filter_fields)) {
34+
req <- base_request()
8935
} else {
90-
dat_done
36+
req <- base_request() |>
37+
httr2::req_url_query(filter = filter_fields, .multi = "comma")
9138
}
39+
req |>
40+
httr2::req_body_json(list(postcodes = x), auto_unbox = FALSE) |>
41+
get_json_data()
9242
}
9343

9444

@@ -107,21 +57,7 @@ autocomplete <- function(code) {
10757
unlist() |>
10858
sample(1L)
10959
}
110-
autocomplete_possibly <- purrr::possibly(autocomplete)
111-
112-
113-
bulk_lookup <- function(x) {
114-
base_request() |>
115-
httr2::req_body_json(list(postcodes = x), auto_unbox = FALSE) |>
116-
get_json_data()
117-
}
118-
119-
120-
get_geodata_return <- function(x) {
121-
base_request() |>
122-
httr2::req_body_json(list(geolocations = x)) |>
123-
pluck_result()
124-
}
60+
autocomplete_possibly <- purrr::possibly(autocomplete, NA_character_)
12561

12662

12763
pluck_result <- \(req) purrr::pluck(get_json_data(req), "result")

R/reverse_geocode.R

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#' Recursive function to use the reverse geocoding API to find current
2+
#' postcodes to replace terminated codes, by proximity
3+
#' @keywords internal
4+
reverse_geocode <- function(dat, prev_data = NULL, curr_radius = 125L) {
5+
if (curr_radius > 2000L) {
6+
paste0(
7+
"Geocoding searches have not found some replacement postcodes despite ",
8+
"searching up to 2km in radius from the original postcode location."
9+
) |>
10+
cli::cli_alert_info()
11+
prev_data # return early
12+
}
13+
14+
geodata_return <- dat |>
15+
dplyr::select(c("longitude", "latitude")) |>
16+
dplyr::mutate(
17+
limit = 1L,
18+
radius = curr_radius,
19+
batch = ceiling(dplyr::row_number() / 100L)
20+
) |>
21+
# batch into groups of max 100 rows
22+
tidyr::nest(.by = "batch") |>
23+
dplyr::pull("data") |>
24+
purrr::map(get_geodata_return) |>
25+
purrr::list_flatten() # re-combine batches into a single list
26+
27+
geodata_query_data <- geodata_return |>
28+
purrr::map("query") |>
29+
purrr::map(tibble::as_tibble_row) |>
30+
purrr::list_rbind() |>
31+
dplyr::select(c("longitude", "latitude"))
32+
33+
geodata_found_postcodes <- geodata_return |>
34+
purrr::map("result") |>
35+
purrr::list_flatten() |>
36+
purrr::map_chr("postcode", .default = NA_character_) |>
37+
tibble::as_tibble_col("new_postcode")
38+
39+
data_out <- dplyr::bind_cols(geodata_query_data, geodata_found_postcodes)
40+
41+
dat_done <- prev_data |>
42+
dplyr::bind_rows(dplyr::filter(data_out, !is.na(.data[["new_postcode"]])))
43+
dat_still_missing <- data_out |>
44+
dplyr::filter(dplyr::if_any("new_postcode", is.na))
45+
46+
if (nrow(dat_still_missing) > 0L) {
47+
reverse_geocode(dat_still_missing, dat_done, curr_radius * 2L)
48+
} else {
49+
dat_done
50+
}
51+
}

0 commit comments

Comments
 (0)