Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions pipeline/L1_normalize-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,9 @@ expect_silent(unit_conversion(x$value, x$research_name, y, quiet = TRUE))

# The second thing we pass is the observation data frame

# This function returns a logical vector, of the same length as the data_df
# input, that becomes F_OOS
oos <- function(oos_df, data_df) {
# This function returns a logical vector, of the same length as the
# data_df input, that becomes F_OOS
check_oos <- function(oos_df, data_df) {
oos_df <- as.data.frame(oos_df)

# Make sure that any 'extra' condition columns (in addition to the
Expand Down Expand Up @@ -303,37 +303,38 @@ test_oos <- function() {

# No other conditions beyond time window
oos_df <- data.frame(oos_begin = 1, oos_end = 1)
stopifnot(oos(oos_df, data_df) == c(TRUE, FALSE, FALSE))
stopifnot(check_oos(oos_df, data_df) == c(TRUE, FALSE, FALSE))
oos_df <- data.frame(oos_begin = 4, oos_end = 5)
stopifnot(oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))
stopifnot(check_oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))
oos_df <- data.frame(oos_begin = 0, oos_end = 2)
stopifnot(oos(oos_df, data_df) == c(TRUE, TRUE, FALSE))
stopifnot(check_oos(oos_df, data_df) == c(TRUE, TRUE, FALSE))
oos_df <- data.frame(oos_begin = 0, oos_end = 3)
stopifnot(oos(oos_df, data_df) == c(TRUE, TRUE, TRUE))
stopifnot(check_oos(oos_df, data_df) == c(TRUE, TRUE, TRUE))

# x condition - doesn't match even though timestamp does
oos_df <- data.frame(oos_begin = 1, oos_end = 1, x = "b")
stopifnot(oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))
stopifnot(check_oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))
# x condition - matches and timestamp does
oos_df <- data.frame(oos_begin = 1, oos_end = 1, x = "a")
stopifnot(oos(oos_df, data_df) == c(TRUE, FALSE, FALSE))
stopifnot(check_oos(oos_df, data_df) == c(TRUE, FALSE, FALSE))
# x condition - some match, some don't
oos_df <- data.frame(oos_begin = 1, oos_end = 2, x = "b")
stopifnot(oos(oos_df, data_df) == c(FALSE, TRUE, FALSE))
stopifnot(check_oos(oos_df, data_df) == c(FALSE, TRUE, FALSE))
# x and y condition
oos_df <- data.frame(oos_begin = 1, oos_end = 2, x = "b", y = 5)
stopifnot(oos(oos_df, data_df) == c(FALSE, TRUE, FALSE))
stopifnot(check_oos(oos_df, data_df) == c(FALSE, TRUE, FALSE))
oos_df <- data.frame(oos_begin = 1, oos_end = 2, x = "a", y = 5)
stopifnot(oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))
stopifnot(check_oos(oos_df, data_df) == c(FALSE, FALSE, FALSE))

# Error thrown if condition column(s) not present
oos_df <- data.frame(oos_begin = 1, oos_end = 2, z = 1)
out <- try(oos(oos_df, data_df), silent = TRUE)
out <- try(check_oos(oos_df, data_df), silent = TRUE)
stopifnot(class(out) == "try-error")
}
test_oos()

# Read all out-of-service files and check their formatting
# Read all out-of-service files, check their formatting,
# and return as a list of the oos data tibbles
read_oos_data <- function(oos_dir) {
message("Dir is ", oos_dir)
oos_files <- list.files(oos_dir, pattern = "\\.csv$", full.names = TRUE)
Expand Down
15 changes: 2 additions & 13 deletions pipeline/L1_normalize.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,8 @@ L1_NORMALIZE <- file.path(params$DATA_ROOT, params$L1_NORMALIZE)

source("helpers.R")

# Read out-of-service data and set up a structure to keep track
# of whether each table is used or not
# Read out-of-service data
oos_data <- read_oos_data(params$OOS)
oos_data_used <- rep(FALSE, length.out = length(oos_data))
names(oos_data_used) <- names(oos_data)

# Restore old warning setting
options(warn = oldwarn)
Expand Down Expand Up @@ -260,9 +257,8 @@ for(i in seq_along(files_to_process)) {
# Is this reasonable? If not, we need to run through
# all `oos_data` entries, OR'ing the results as we go,
# instead of breaking out
dat$F_OOS <- as.integer(oos(oos_data[[tbl]], dat))
dat$F_OOS <- as.integer(check_oos(oos_data[[tbl]], dat))
if(sum(dat$F_OOS)) message("\tAdded ", sum(dat$F_OOS), " OOS flags for ", tbl)
oos_data_used[tbl] <- TRUE # won't work when parallelized
break
}
}
Expand Down Expand Up @@ -323,13 +319,6 @@ for(i in seq_along(files_to_process)) {

out[[i]] <- smry
}

# Check whether all out-of-service tables were checked
# If not, this probably indicates a naming mistake
if(!all(oos_data_used)) {
warning("Out-of-service tables never used: ",
paste(names(oos_data_used)[!oos_data_used], collapse = ","))
}
```

## Summary
Expand Down
8 changes: 4 additions & 4 deletions pipeline/out-of-service/README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# out-of-service

This folder holds out-of-service (OOS) tables that are
read by `L1_normalize.qmd` and used to add out-of-service flags to data.
read by `L1_normalize.qmd` to add out-of-service flags to data.

Note that **the CSV filenames are important.** They are used as a
pattern that's checked against the data 'Table' during L1_normalize
processing.
**Note that the CSV filenames are important:** they are used as a
pattern that's checked against the data 'Table' column during
L1_normalize processing.

OOS tables are CSV files and _must_ have at least three columns:
* `Site` - Site name for which these OOS entries apply
Expand Down
14 changes: 0 additions & 14 deletions pipeline/out-of-service/compass-fme-oos/README.md

This file was deleted.

16 changes: 0 additions & 16 deletions pipeline/out-of-service/compass-fme-oos/exo_log.csv

This file was deleted.

93 changes: 0 additions & 93 deletions pipeline/out-of-service/compass-fme-oos/fme-oos.R

This file was deleted.

Loading
Loading