diff --git a/NEWS.md b/NEWS.md index f2b1e43..aac0778 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,6 +24,20 @@ behaviour. debian/ubuntu only; for alpine-based images you must pass `user = NULL` and create the user yourself. Closes #100. +## Security + +- `dock_from_desc()` and `dock_from_renv()` now validate every + user-supplied parameter that flows into a Dockerfile shell context + (`FROM`, `AS`, `repos` values and names, `extra_sysreqs`, + `renv_version`, `renv_paths_cache`, `lockfile` basename, `use_pak`, + `strict_install`, plus `r_version` read from the lockfile). Inputs + that contain shell metacharacters, newlines, or do not match the + documented format raise an explicit error at function entry, + rather than silently producing a malformed Dockerfile or one that + executes attacker-controlled commands at `docker build` time. + This closes the post-#106 audit follow-up for all five sites + surfaced by Copilot review. + ## New features - `dock$ARG()` and the internal `add_arg()` helper gain a `default` @@ -37,10 +51,13 @@ via `--build-arg GITHUB_PAT=$GITHUB_PAT`), or `"secret"` to use BuildKit secret mounts (the PAT is never persisted in image metadata; recommended for published images). Closes #18. -- `dock_from_renv()` gains a `renv_paths_cache` parameter (default - `/root/.cache/R/renv`) used as the build-arg default, the propagated - `ENV` value and the cache mount target. Users can override the renv - cache location at image build time with +- `dock_from_renv()` gains a `renv_paths_cache` parameter that + controls the `RENV_PATHS_CACHE` build-arg default, the propagated + `ENV` value, and the cache mount target. When `NULL` (the + default), the path is auto-derived from `user`: + `/root/.cache/R/renv` for `user = NULL`, and + `/home//.cache/R/renv` otherwise. Users can override the + renv cache location at image build time with `--build-arg RENV_PATHS_CACHE=...` without regenerating the Dockerfile. - `dock_from_desc()` gains a `strict_install` parameter (default diff --git a/R/dock_from_desc.R b/R/dock_from_desc.R index bcefe62..c0bb4ff 100644 --- a/R/dock_from_desc.R +++ b/R/dock_from_desc.R @@ -42,17 +42,29 @@ quote_not_na <- function(x){ # #' @param path path to the DESCRIPTION file to use as an input. #' @param FROM The FROM of the Dockerfile. Default is -#' FROM rocker/r-ver:`R.Version()$major`.`R.Version()$minor`. -#' @param AS The AS of the Dockerfile. Default it NULL. +#' `paste0("rocker/r-ver:", R.Version()$major, ".", R.Version()$minor)`. +#' Validated as a Docker image reference (alphanumerics, dot, slash, +#' dash, underscore, optional `:tag` and / or `@sha256:`); other +#' values raise an error to prevent shell-metacharacter injection +#' into the generated FROM directive. +#' @param AS The AS of the Dockerfile. Default it NULL. When non-NULL, +#' validated as a simple build-stage name (`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`). #' @param sysreqs boolean. If TRUE, the Dockerfile will contain sysreq installation. -#' @param repos character. The URL(s) of the repositories to use for `options("repos")`. +#' @param repos character. The URL(s) of the repositories to use for +#' `options("repos")`. Each value must look like an http(s) URL (no +#' quotes, spaces or newlines); each name (when set) must be a simple +#' identifier (`^[A-Za-z][A-Za-z0-9._-]*$`). Other values raise an +#' error to prevent injection into the generated `echo "options(...)"` +#' shell command. #' @param expand boolean. If `TRUE` each system requirement will have its own `RUN` line. #' @param build_from_source boolean. If `TRUE` no tar.gz is created and #' the Dockerfile directly mount the source folder. #' @param update_tar_gz boolean. If `TRUE` and `build_from_source` is also `TRUE`, #' an updated tar.gz is created. #' @param extra_sysreqs character vector. Extra debian system requirements. -#' Will be installed with apt-get install. +#' Will be installed with apt-get install. Each entry must be a Debian +#' package name (`^[a-z0-9][a-z0-9.+-]+$`); other values raise an error +#' to prevent injection into the generated apt-get RUN. #' @param github_pat character. How to provide a GitHub PAT to #' `remotes::install_github()` for private dependency repositories. #' One of `"none"` (default; the generated Dockerfile does not @@ -103,16 +115,11 @@ dock_from_desc <- function( strict_install = TRUE ) { github_pat <- match.arg(github_pat) - if ( - !is.logical(strict_install) || - length(strict_install) != 1L || - is.na(strict_install) - ) { - stop( - "`strict_install` must be a single `TRUE` or `FALSE`, got: ", - deparse(strict_install) - ) - } + .validate_scalar_logical(strict_install, "strict_install") + .validate_FROM(FROM) + .validate_AS(AS) + .validate_repos(repos) + .validate_extra_sysreqs(extra_sysreqs) path <- fs::path_abs(path) packages <- desc_get_deps(path)$package diff --git a/R/dock_from_renv.R b/R/dock_from_renv.R index 8b9504e..67d7c81 100644 --- a/R/dock_from_renv.R +++ b/R/dock_from_renv.R @@ -8,15 +8,32 @@ pkg_sysreqs_mem <- memoise::memoise( #' Create a Dockerfile from an `renv.lock` file #' -#' @param lockfile Path to an `renv.lock` file to use as an input.. -#' @param FROM Docker image to start FROM Default is FROM rocker/r-base -#' @param AS The AS of the Dockerfile. Default it `NULL`. +#' @param lockfile Path to an `renv.lock` file to use as an input. +#' The `basename(lockfile)` must be located at the docker build +#' context root at `docker build` time, because the generated +#' Dockerfile emits `COPY renv.lock`. Validated +#' as a single string whose basename contains only alphanumerics, +#' dots, underscores or hyphens (no spaces or shell metacharacters). +#' @param FROM Docker image to start FROM. Default is `"rocker/r-base"`. +#' Validated as a Docker image reference (alphanumerics, dot, slash, +#' dash, underscore, optional `:tag` and / or `@sha256:`); other +#' values raise an error to prevent shell-metacharacter injection +#' into the generated FROM directive. +#' @param AS The AS of the Dockerfile. Default is `NULL`. When non-NULL, +#' validated as a simple build-stage name (`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`). #' @param distro - deprecated - only debian/ubuntu based images are supported #' @param sysreqs boolean. If `TRUE`, the Dockerfile will contain sysreq installation. #' @param expand boolean. If `TRUE` each system requirement will have its own `RUN` line. -#' @param repos character. The URL(s) of the repositories to use for `options("repos")`. +#' @param repos character. The URL(s) of the repositories to use for +#' `options("repos")`. Each value must look like an http(s) URL +#' (no quotes, spaces or newlines); each name (when set) must be a +#' simple identifier (`^[A-Za-z][A-Za-z0-9._-]*$`). Other values raise +#' an error to prevent injection into the generated `echo "options(...)"` +#' shell command. #' @param extra_sysreqs character vector. Extra debian system requirements. -#' Will be installed with apt-get install. +#' Will be installed with apt-get install. Each entry must be a Debian +#' package name (`^[a-z0-9][a-z0-9.+-]+$`); other values raise an error +#' to prevent injection into the generated apt-get RUN. #' @param renv_version character or `NULL`. The renv version to install. #' The argument has three distinct modes, deliberately encoded with #' the missing-vs-`NULL` distinction: @@ -28,7 +45,12 @@ pkg_sysreqs_mem <- memoise::memoise( #' version. #' - a character string such as `"1.0.0"`: install that specific #' version regardless of what the lockfile says. -#' @param use_pak boolean. If `TRUE` use pak to deal with dependencies during `renv::restore()`. FALSE by default +#' +#' When supplied as a string, validated as a version-like token +#' (`^[0-9]+(\.[0-9]+){0,3}([-.][a-zA-Z0-9]+)?$`). +#' @param use_pak boolean. If `TRUE` use pak to deal with dependencies +#' during `renv::restore()`. FALSE by default. Must be a single +#' `TRUE` or `FALSE` (no `NA`, no vector). #' @param user Name of the user the runtime container drops privilege #' to before the `renv::restore()` step (and therefore at runtime). #' Default is `"rstudio"` so the generated image runs as a non-root @@ -139,6 +161,16 @@ dock_from_renv <- function( renv_paths_cache = NULL ) { github_pat <- match.arg(github_pat) + .validate_lockfile(lockfile) + .validate_FROM(FROM) + .validate_AS(AS) + .validate_repos(repos) + .validate_extra_sysreqs(extra_sysreqs) + .validate_scalar_logical(use_pak, "use_pak") + if (!missing(renv_version)) { + .validate_renv_version(renv_version) + } + .validate_renv_paths_cache(renv_paths_cache) if (!is.null(user)) { # `user` is interpolated into shell commands (id, useradd, chown, USER). # Reject anything that is not a strict POSIX username so a caller passing @@ -173,6 +205,7 @@ dock_from_renv <- function( # start the dockerfile R_major_minor <- lock$R$Version + .validate_r_version(R_major_minor) dock <- Dockerfile$new( FROM = gen_base_image( r_version = R_major_minor, diff --git a/R/gen_base_image.R b/R/gen_base_image.R index d1704fe..93c6b95 100644 --- a/R/gen_base_image.R +++ b/R/gen_base_image.R @@ -15,6 +15,13 @@ if (!is.null(distro)){ warning("the `distro` parameter is not used anymore, only debian/ubuntu based images are supported") } - glue::glue("{FROM}:{r_version}") +# If the caller already pinned a tag (`:something`) or digest +# (`@sha256:...`), do not append `:r_version` on top of it -- that +# would yield an invalid image reference like `repo:tag:r_version`. +if (grepl("(:[^/]+|@sha256:[a-fA-F0-9]+)$", FROM)) { + glue::glue("{FROM}") +} else { + glue::glue("{FROM}:{r_version}") +} } diff --git a/R/utils.R b/R/utils.R index 9addb44..8cec9da 100644 --- a/R/utils.R +++ b/R/utils.R @@ -90,6 +90,240 @@ cat_info <- function(...) { } } +#' Validate user-supplied strings that flow into a Dockerfile shell context. +#' +#' These helpers reject inputs that, if interpolated raw, would either +#' break the generated Dockerfile / shell command or allow injection of +#' arbitrary commands at `docker build` time. Each helper raises an +#' error with a clear message naming the offending parameter. `NULL` is +#' accepted where it has a documented meaning (e.g. `renv_paths_cache`, +#' `renv_version`, `extra_sysreqs`, `repos`). +#' @noRd +.validate_FROM <- function(x) { + if (!is.character(x) || length(x) != 1L || is.na(x)) { + stop( + "`FROM` must be a single string, got: ", + deparse(x) + ) + } + # Docker reference grammar permits `[:]/[:][@sha256:]`. + # The host segment may itself contain `:` (port). To keep the regex simple + # and unambiguous, accept any number of `` separated by `/` where + # each segment is alphanumerics + `._-`, optionally followed by `:digits` + # for the port; then the final segment may carry the `:tag` or `@sha256:` + # suffix. Shell metacharacters (`$`, `` ` ``, `\\`, quotes, newlines, + # spaces) remain forbidden by the alphabet alone. + if ( + !grepl( + paste0( + "^", + "[a-zA-Z0-9][a-zA-Z0-9._-]*(:[0-9]+)?", + "(/[a-zA-Z0-9][a-zA-Z0-9._-]*)*", + "(:[a-zA-Z0-9._-]+)?", + "(@sha256:[a-fA-F0-9]+)?", + "$" + ), + x + ) + ) { + stop( + "`FROM` must be a valid Docker image reference ", + "(`[:]/[:][@sha256:]`; alphanumerics ", + "plus `._-` per segment; no newlines or shell metacharacters), got: ", + deparse(x) + ) + } + invisible() +} + +#' @noRd +.validate_r_version <- function(x) { + if (!is.character(x) || length(x) != 1L || is.na(x)) { + stop( + "`r_version` (read from the lockfile) must be a single string, got: ", + deparse(x) + ) + } + if (!grepl("^[0-9]+\\.[0-9]+(\\.[0-9]+)?$", x)) { + stop( + "`r_version` (read from the lockfile) must look like a numeric ", + "R version such as \"4.5\" or \"4.5.0\", got: ", + deparse(x) + ) + } + invisible() +} + +#' @noRd +.validate_repos <- function(x) { + if (is.null(x)) { + return(invisible()) + } + if (!is.character(x)) { + stop( + "`repos` must be a character vector, got: ", + deparse(x) + ) + } + # The validated value lands inside double-quoted shell context + # (`echo "options(repos = ...)"`). Inside `"..."`, the shell still + # interprets `$`, `\``, `\\` and `!` (bash history). Reject these + # explicitly so the validator doubles as an escape primitive for the + # known wrapper. Parens are also rejected because some shells expand + # them in alias contexts; the URL grammar tolerates `%28`/`%29`. + bad <- is.na(x) | !grepl( + "^https?://[A-Za-z0-9._~:/?#@&*+,;=%-]+$", + x + ) + if (any(bad)) { + stop( + "`repos` entries must be http(s) URLs containing only ", + "URL-safe characters (no quotes, parentheses, dollar, ", + "backtick, backslash, spaces or newlines); invalid: ", + paste(vapply(x[bad], deparse, character(1)), collapse = ", ") + ) + } + # Names are emitted via `dput(repos)`, which wraps R-syntax-unsafe + # names in backticks. Backticks inside a Dockerfile RUN's outer + # double-quoted shell context trigger command substitution. Tight + # regex on names to keep them simple identifiers. + nms <- names(x) + if (!is.null(nms)) { + bad_nms <- is.na(nms) | !grepl("^[A-Za-z][A-Za-z0-9._-]*$", nms) + if (any(bad_nms)) { + stop( + "`names(repos)` must be simple identifiers ", + "(`^[A-Za-z][A-Za-z0-9._-]*$`); invalid: ", + paste(vapply(nms[bad_nms], deparse, character(1)), collapse = ", ") + ) + } + } + invisible() +} + +#' @noRd +.validate_AS <- function(x) { + if (is.null(x)) { + return(invisible()) + } + if (!is.character(x) || length(x) != 1L || is.na(x)) { + stop( + "`AS` must be a single string or NULL, got: ", + deparse(x) + ) + } + if (!grepl("^[a-zA-Z0-9][a-zA-Z0-9._-]*$", x)) { + stop( + "`AS` must be a simple build-stage name ", + "(`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`), got: ", + deparse(x) + ) + } + invisible() +} + +#' @noRd +.validate_scalar_logical <- function(x, name) { + if ( + !is.logical(x) || + length(x) != 1L || + is.na(x) + ) { + stop( + sprintf("`%s` must be a single `TRUE` or `FALSE`, got: ", name), + deparse(x) + ) + } + invisible() +} + +#' @noRd +.validate_extra_sysreqs <- function(x) { + if (is.null(x)) { + return(invisible()) + } + if (!is.character(x)) { + stop( + "`extra_sysreqs` must be a character vector, got: ", + deparse(x) + ) + } + bad <- is.na(x) | !grepl("^[a-z0-9][a-z0-9.+-]+$", x) + if (any(bad)) { + stop( + "`extra_sysreqs` entries must be Debian package names ", + "matching `^[a-z0-9][a-z0-9.+-]+$`; invalid: ", + paste(vapply(x[bad], deparse, character(1)), collapse = ", ") + ) + } + invisible() +} + +#' @noRd +.validate_renv_version <- function(x) { + if (is.null(x)) { + return(invisible()) + } + if (!is.character(x) || length(x) != 1L || is.na(x)) { + stop( + "`renv_version` must be a single string or NULL, got: ", + deparse(x) + ) + } + if (!grepl("^[0-9]+(\\.[0-9]+){0,3}([-.][a-zA-Z0-9]+)?$", x)) { + stop( + "`renv_version` must look like a version string such as ", + "\"1.0.0\" or \"0.16.0-beta\", got: ", + deparse(x) + ) + } + invisible() +} + +#' @noRd +.validate_lockfile <- function(x) { + if (!is.character(x) || length(x) != 1L || is.na(x)) { + stop( + "`lockfile` must be a single path string, got: ", + deparse(x) + ) + } + bn <- basename(x) + if (!grepl("^[A-Za-z0-9._-]+$", bn)) { + stop( + "`lockfile` basename must contain only alphanumerics, dots, ", + "underscores or hyphens (no spaces or shell metacharacters); ", + "the COPY directive in the generated Dockerfile would otherwise ", + "be malformed. Got basename: ", + deparse(bn) + ) + } + invisible() +} + +#' @noRd +.validate_renv_paths_cache <- function(x) { + if (is.null(x)) { + return(invisible()) + } + if (!is.character(x) || length(x) != 1L || is.na(x)) { + stop( + "`renv_paths_cache` must be a single path string or NULL, got: ", + deparse(x) + ) + } + if (!grepl("^/[A-Za-z0-9._/-]*$", x)) { + stop( + "`renv_paths_cache` must be an absolute path containing only ", + "alphanumerics, dots, slashes, underscores or hyphens ", + "(`^/[A-Za-z0-9._/-]*$`); shell metacharacters or newlines ", + "would break the ARG directive. Got: ", + deparse(x) + ) + } + invisible() +} + #' Emit a one-shot reminder describing how the PAT must be supplied at #' `docker build` time. No-op when mode is `"none"`. #' @noRd diff --git a/man/dock_from_renv.Rd b/man/dock_from_renv.Rd index 5f86ee4..3eefac0 100644 --- a/man/dock_from_renv.Rd +++ b/man/dock_from_renv.Rd @@ -23,26 +23,45 @@ dock_from_renv( ) } \arguments{ -\item{lockfile}{Path to an \code{renv.lock} file to use as an input..} +\item{lockfile}{Path to an \code{renv.lock} file to use as an input. +The \code{basename(lockfile)} must be located at the docker build +context root at \verb{docker build} time, because the generated +Dockerfile emits \verb{COPY renv.lock}. Validated +as a single string whose basename contains only alphanumerics, +dots, underscores or hyphens (no spaces or shell metacharacters).} \item{distro}{\itemize{ \item deprecated - only debian/ubuntu based images are supported }} -\item{FROM}{Docker image to start FROM Default is FROM rocker/r-base} +\item{FROM}{Docker image to start FROM. Default is \code{"rocker/r-base"}. +Validated as a Docker image reference (alphanumerics, dot, slash, +dash, underscore, optional \verb{:tag} and / or \verb{@sha256:}); other +values raise an error to prevent shell-metacharacter injection +into the generated FROM directive.} -\item{AS}{The AS of the Dockerfile. Default it \code{NULL}.} +\item{AS}{The AS of the Dockerfile. Default is \code{NULL}. When non-NULL, +validated as a simple build-stage name (\verb{^[a-zA-Z0-9][a-zA-Z0-9._-]*$}).} \item{sysreqs}{boolean. If \code{TRUE}, the Dockerfile will contain sysreq installation.} -\item{repos}{character. The URL(s) of the repositories to use for \code{options("repos")}.} +\item{repos}{character. The URL(s) of the repositories to use for +\code{options("repos")}. Each value must look like an http(s) URL +(no quotes, spaces or newlines); each name (when set) must be a +simple identifier (\verb{^[A-Za-z][A-Za-z0-9._-]*$}). Other values raise +an error to prevent injection into the generated \verb{echo "options(...)"} +shell command.} \item{expand}{boolean. If \code{TRUE} each system requirement will have its own \code{RUN} line.} \item{extra_sysreqs}{character vector. Extra debian system requirements. -Will be installed with apt-get install.} +Will be installed with apt-get install. Each entry must be a Debian +package name (\verb{^[a-z0-9][a-z0-9.+-]+$}); other values raise an error +to prevent injection into the generated apt-get RUN.} -\item{use_pak}{boolean. If \code{TRUE} use pak to deal with dependencies during \code{renv::restore()}. FALSE by default} +\item{use_pak}{boolean. If \code{TRUE} use pak to deal with dependencies +during \code{renv::restore()}. FALSE by default. Must be a single +\code{TRUE} or \code{FALSE} (no \code{NA}, no vector).} \item{user}{Name of the user the runtime container drops privilege to before the \code{renv::restore()} step (and therefore at runtime). @@ -100,7 +119,10 @@ configured repos, even when the lockfile pins a specific version. \item a character string such as \code{"1.0.0"}: install that specific version regardless of what the lockfile says. -}} +} + +When supplied as a string, validated as a version-like token +(\verb{^[0-9]+(\\.[0-9]+)\{0,3\}([-.][a-zA-Z0-9]+)?$}).} \item{github_pat}{character. How to provide a GitHub PAT to \code{renv::restore()} for private dependency repositories. One of diff --git a/man/dockerfiles.Rd b/man/dockerfiles.Rd index 8c6276c..1627a35 100644 --- a/man/dockerfiles.Rd +++ b/man/dockerfiles.Rd @@ -22,13 +22,23 @@ dock_from_desc( \item{path}{path to the DESCRIPTION file to use as an input.} \item{FROM}{The FROM of the Dockerfile. Default is -FROM rocker/r-ver:\code{R.Version()$major}.\code{R.Version()$minor}.} +\code{paste0("rocker/r-ver:", R.Version()$major, ".", R.Version()$minor)}. +Validated as a Docker image reference (alphanumerics, dot, slash, +dash, underscore, optional \verb{:tag} and / or \verb{@sha256:}); other +values raise an error to prevent shell-metacharacter injection +into the generated FROM directive.} -\item{AS}{The AS of the Dockerfile. Default it NULL.} +\item{AS}{The AS of the Dockerfile. Default it NULL. When non-NULL, +validated as a simple build-stage name (\verb{^[a-zA-Z0-9][a-zA-Z0-9._-]*$}).} \item{sysreqs}{boolean. If TRUE, the Dockerfile will contain sysreq installation.} -\item{repos}{character. The URL(s) of the repositories to use for \code{options("repos")}.} +\item{repos}{character. The URL(s) of the repositories to use for +\code{options("repos")}. Each value must look like an http(s) URL (no +quotes, spaces or newlines); each name (when set) must be a simple +identifier (\verb{^[A-Za-z][A-Za-z0-9._-]*$}). Other values raise an +error to prevent injection into the generated \verb{echo "options(...)"} +shell command.} \item{expand}{boolean. If \code{TRUE} each system requirement will have its own \code{RUN} line.} @@ -39,7 +49,9 @@ an updated tar.gz is created.} the Dockerfile directly mount the source folder.} \item{extra_sysreqs}{character vector. Extra debian system requirements. -Will be installed with apt-get install.} +Will be installed with apt-get install. Each entry must be a Debian +package name (\verb{^[a-z0-9][a-z0-9.+-]+$}); other values raise an error +to prevent injection into the generated apt-get RUN.} \item{github_pat}{character. How to provide a GitHub PAT to \code{remotes::install_github()} for private dependency repositories. diff --git a/tests/testthat/test-dock_from_desc.R b/tests/testthat/test-dock_from_desc.R index 38ad237..b39d4a6 100644 --- a/tests/testthat/test-dock_from_desc.R +++ b/tests/testthat/test-dock_from_desc.R @@ -207,6 +207,66 @@ withr::with_dir( ) }) + test_that("dock_from_desc rejects shell-metacharacter / unsafe values across user-supplied params", { + skip_if(is_rdevel, "skip on R-devel") + + # extra_sysreqs: each element must be a Debian package name. A + # value with `;` would inject into the apt-get RUN. + expect_error( + dock_from_desc( + file.path(".", "DESCRIPTION__"), + sysreqs = FALSE, + extra_sysreqs = "libfoo; rm -rf /" + ), + "extra_sysreqs" + ) + + # repos: each URL must look like a real http(s) URL. A value with + # `'` or `"` breaks the R / shell nested quoting in the + # `echo "options(repos = ...)" > Rprofile.site` RUN. + expect_error( + dock_from_desc( + file.path(".", "DESCRIPTION__"), + sysreqs = FALSE, + repos = c(CRAN = "https://evil'); cat /etc/passwd; #") + ), + "repos" + ) + + # FROM: a Docker image reference. Newlines or shell metacharacters + # would break the FROM directive or change the image being pulled. + expect_error( + dock_from_desc( + file.path(".", "DESCRIPTION__"), + sysreqs = FALSE, + FROM = "rocker/r-ver:4.0\nRUN evil" + ), + "FROM" + ) + + # AS: docker build-stage name. Newlines or shell metacharacters + # would break the `FROM AS ` directive. + expect_error( + dock_from_desc( + file.path(".", "DESCRIPTION__"), + sysreqs = FALSE, + AS = "stage1\nRUN evil" + ), + "`AS`" + ) + + # names(repos): backticks in names would inject command + # substitution into the `echo "options(repos=...)"` RUN. + expect_error( + dock_from_desc( + file.path(".", "DESCRIPTION__"), + sysreqs = FALSE, + repos = c(`CRAN; system('evil')` = "https://cran.rstudio.com/") + ), + "names\\(repos\\)" + ) + }) + test_that("dock_from_desc emits no GITHUB_PAT plumbing by default", { skip_if(is_rdevel, "skip on R-devel") my_dock <- dock_from_desc(file.path(".", "DESCRIPTION__")) diff --git a/tests/testthat/test-dock_from_renv.R b/tests/testthat/test-dock_from_renv.R index 2ac6433..0b2032a 100644 --- a/tests/testthat/test-dock_from_renv.R +++ b/tests/testthat/test-dock_from_renv.R @@ -148,6 +148,34 @@ test_that("gen_base_image works", { expect_equal(out, "rocker/verse:4.0") }) +test_that("gen_base_image preserves an already-pinned tag instead of appending r_version", { + # Without the guard, `gen_base_image(FROM = "rocker/r-base:4.5", r_version = "4.1.2")` + # returned "rocker/r-base:4.5:4.1.2" -- an invalid image reference. + expect_equal( + dockerfiler:::gen_base_image( + r_version = "4.1.2", + FROM = "rocker/r-base:4.5" + ), + "rocker/r-base:4.5" + ) + # Same protection for sha256 digests. + expect_equal( + dockerfiler:::gen_base_image( + r_version = "4.1.2", + FROM = "rocker/r-base@sha256:abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789" + ), + "rocker/r-base@sha256:abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789" + ) + # Untagged FROM still gets the r_version appended. + expect_equal( + dockerfiler:::gen_base_image( + r_version = "4.1.2", + FROM = "rocker/r-base" + ), + "rocker/r-base:4.1.2" + ) +}) + test_that("gen_base_image warns when the deprecated `distro` is supplied", { expect_warning( out <- dockerfiler:::gen_base_image( @@ -173,7 +201,7 @@ test_that("dock_from_renv works with specific renv", { the_lockfile1.0.0 <- system.file("renv_with_1.0.0.lock",package = "dockerfiler") for (lf in list(the_lockfile,the_lockfile1.0.0)){ -for (renv_version in list(NULL,"banana","missing")){ +for (renv_version in list(NULL,"1.2.3","missing")){ if (!is.null(renv_version) && renv_version == "missing") { @@ -192,10 +220,10 @@ socle_install_version <- "remotes::install_version\\(\"renv\", version = \"" test_string <- 'install.packages\\(\"renv\"\\)' } else if (lf == the_lockfile1.0.0 & is.null(renv_version)) { test_string <- 'install.packages\\(\"renv\"\\)' - } else if (lf == the_lockfile & renv_version == "banana") { - test_string <- paste0(socle_install_version,"banana" ,"\"\\)") - } else if (lf == the_lockfile1.0.0 & renv_version == "banana") { - test_string <- paste0(socle_install_version,"banana","\"\\)") + } else if (lf == the_lockfile & renv_version == "1.2.3") { + test_string <- paste0(socle_install_version,"1.2.3" ,"\"\\)") + } else if (lf == the_lockfile1.0.0 & renv_version == "1.2.3") { + test_string <- paste0(socle_install_version,"1.2.3","\"\\)") } else if (lf == the_lockfile & renv_version == "missing") { # When the lockfile does not pin renv, we install the latest from # the configured repos (same behaviour as renv_version = NULL). @@ -802,5 +830,206 @@ test_that("dock_from_renv with user = 'kevin' parameterises the useradd, cache a expect_false(any(grepl("\\brstudio\\b(?!\\.com)", out$Dockerfile, perl = TRUE))) }) +test_that("dock_from_renv rejects shell-active metacharacters in repos URL even though the URL grammar tolerates them", { + # `.validate_repos` doubles as the escape primitive for the + # double-quoted shell `echo "options(repos = ...)"` wrapper. Inside + # `"..."`, the shell still interprets `$`, backtick, backslash and + # `!`. A repos URL with `$(...)` would expand to a command at + # `docker build` time even though the URL itself is otherwise + # well-formed. + skip_if(is_rdevel, "skip on R-devel") + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + user = NULL, + repos = c(CRAN = "https://evil$(id).example.com"), + renv_version = "0.0.0" + ), + "repos" + ) +}) + +test_that("dock_from_renv accepts a private-registry FROM with host:port/image", { + # The Docker reference grammar permits `:/`. + # The validator must not reject this common private-registry form. + skip_if(is_rdevel, "skip on R-devel") + expect_silent( + dockerfiler:::.validate_FROM("localhost:5000/myimage") + ) + expect_silent( + dockerfiler:::.validate_FROM("registry.example.com:443/org/img:1.2") + ) + expect_silent( + dockerfiler:::.validate_FROM("rocker/r-base:4.5") + ) + expect_silent( + dockerfiler:::.validate_FROM( + "rocker/r-base@sha256:abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789" + ) + ) +}) + +test_that("dock_from_renv rejects shell-metacharacter / unsafe values across user-supplied params", { + skip_if(is_rdevel, "skip on R-devel") + + # use_pak: scalar logical. A non-logical value would be interpolated raw + # into `echo "options(renv.config.pak.enabled = %s, ...)"`, breaking + # the outer shell quoting and allowing command substitution at build + # time. + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + user = NULL, + use_pak = 'TRUE) ; system("evil") ; #', + renv_version = "0.0.0" + ), + "use_pak" + ) + + # names(repos): must be simple identifiers. `dput()` would otherwise + # wrap shell-unsafe names in backticks, which inside the outer + # double-quoted `echo "..."` triggers command substitution. + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + user = NULL, + repos = c(`CRAN; system('evil')` = "https://cran.rstudio.com/"), + renv_version = "0.0.0" + ), + "names\\(repos\\)" + ) + + # AS: docker build-stage name. Newlines or shell metacharacters + # would break the `FROM AS ` directive. + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + AS = "stage1\nRUN evil", + user = NULL, + renv_version = "0.0.0" + ), + "`AS`" + ) + + # extra_sysreqs: each element must be a Debian package name. + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + user = NULL, + extra_sysreqs = "libfoo; rm -rf /", + renv_version = "0.0.0" + ), + "extra_sysreqs" + ) + + # NA in vector inputs must not crash the validator with + # "missing value where TRUE/FALSE needed"; treat as invalid and + # raise the same clear message as other malformed entries. + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + user = NULL, + extra_sysreqs = c("libcurl", NA_character_), + renv_version = "0.0.0" + ), + "extra_sysreqs" + ) + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + user = NULL, + repos = c(CRAN = NA_character_), + renv_version = "0.0.0" + ), + "repos" + ) + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + user = NULL, + repos = setNames( + "https://cran.rstudio.com/", + NA_character_ + ), + renv_version = "0.0.0" + ), + "names\\(repos\\)" + ) + + # repos: each URL must look like a real http(s) URL. + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + user = NULL, + repos = c(CRAN = "https://evil'); cat /etc/passwd; #"), + renv_version = "0.0.0" + ), + "repos" + ) + + # renv_version: must look like a version string. Anything else would + # be interpolated raw into `R -e 'install_version("renv", version = + # "")'` and could break the inner quoting. + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + user = NULL, + renv_version = '"); system("evil"); #' + ), + "renv_version" + ) + + # FROM: docker image reference. + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse\nRUN evil", + user = NULL, + renv_version = "0.0.0" + ), + "FROM" + ) + + # renv_paths_cache: absolute path, no shell metacharacters or newlines. + expect_error( + dock_from_renv( + lockfile = the_lockfile, + FROM = "rocker/verse", + user = NULL, + renv_paths_cache = "/foo\nRUN evil", + renv_version = "0.0.0" + ), + "renv_paths_cache" + ) +}) + +test_that("dock_from_renv rejects a lockfile path whose basename contains shell metacharacters", { + skip_if(is_rdevel, "skip on R-devel") + # Spaces in the basename would break the COPY directive + # (`COPY foo bar.lock renv.lock` parses as src=foo, dst=bar.lock). + bad <- tempfile(pattern = "lock with space ") + file.copy(the_lockfile, bad) + on.exit(unlink(bad), add = TRUE) + expect_error( + dock_from_renv( + lockfile = bad, + FROM = "rocker/verse", + user = NULL, + renv_version = "0.0.0" + ), + "lockfile" + ) +}) + unlink(dir_build, recursive = TRUE)