From a61d0c43074b192731a162c37397bcc8d361d8dd Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Tue, 21 Apr 2026 08:15:24 -0400 Subject: [PATCH 1/6] =?UTF-8?q?lib:=20expose=20libexslt=20=E2=80=94=20regi?= =?UTF-8?q?ster=5Fexslt=20safe=20wrapper=20+=20bindings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LaTeXML stylesheets rely on EXSLT extension functions (str:tokenize, math:*, set:*, date:*). Consumers currently have to reach for their own extern "C" { fn exsltRegisterAll(); } unsafe declaration; that's unpleasant and duplicates the crate's FFI policy outside the wrapper. Changes: * build.rs — also look up libexslt via pkg-config, falling back to `cargo:rustc-link-lib=dylib=exslt` so systems with libexslt on the default search path still link when pkg-config is unhelpful. * src/bindings.rs — add \`pub fn exsltRegisterAll();\` inside a new extern "C" block, with a block comment documenting purpose and idempotence. * src/lib.rs — new top-level \`register_exslt()\` Once-guarded safe wrapper. Application code now calls this exactly-once on startup instead of writing its own unsafe FFI. No behaviour change; downstream consumers can now drop their extern "C" blocks for exsltRegisterAll. --- build.rs | 8 ++++++++ src/bindings.rs | 9 +++++++++ src/lib.rs | 14 ++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/build.rs b/build.rs index ecba0370..36f5ed2e 100644 --- a/build.rs +++ b/build.rs @@ -5,6 +5,14 @@ fn main() { if find_library("libxslt").is_err() { panic!("Could not find libxslt using pkg-config"); } + // libexslt provides the str:*, math:*, set:*, date:* extension functions + // used by many stylesheets. We need to link it so `exsltRegisterAll` is + // resolvable. If pkg-config can't find it (e.g. minimal installs), we + // fall back to a plain `cargo:rustc-link-lib=dylib=exslt` so systems + // with libexslt on the default search path still link. + if find_library("libexslt").is_err() { + println!("cargo:rustc-link-lib=dylib=exslt"); + } // // The bindgen::Builder is the main entry point // // to bindgen, and lets you build up options for // // the resulting bindings. diff --git a/src/bindings.rs b/src/bindings.rs index 4cdeffaa..6ac858bf 100644 --- a/src/bindings.rs +++ b/src/bindings.rs @@ -5106,6 +5106,15 @@ extern "C" { extern "C" { pub fn xslDropCall(); } + +// EXSLT extensions — str:*, math:*, set:*, date:*. Needed for the +// LaTeXML stylesheets that use `str:tokenize`, `math:*`, etc. The +// function links against libexslt (see build.rs); it must be called +// before any xsltApplyStylesheet invocation. Safe to call more than +// once — libexslt guards against re-registration internally. +extern "C" { + pub fn exsltRegisterAll(); +} pub type __builtin_va_list = [__va_list_tag; 1usize]; #[repr(C)] #[derive(Debug, Copy, Clone)] diff --git a/src/lib.rs b/src/lib.rs index df1a3669..890e65c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,3 +11,17 @@ pub mod bindings; pub mod parser; /// Stylesheet functionality for Document transformation. pub mod stylesheet; + +/// Register the full set of EXSLT extension functions (str:*, math:*, +/// set:*, date:*) into libxslt's global extension registry. Must be +/// called before any stylesheet-application that uses EXSLT functions +/// (LaTeXML's stylesheets, for example, rely on `str:tokenize`). Safe to +/// call more than once — internally guarded by `std::sync::Once`, and +/// libexslt itself tolerates re-registration. +pub fn register_exslt() { + use std::sync::Once; + static REGISTER: Once = Once::new(); + REGISTER.call_once(|| unsafe { + bindings::exsltRegisterAll(); + }); +} From 10a6fe0d561b83ae97eb1f0cf175cf65d9b42602 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 22 Apr 2026 12:35:43 -0400 Subject: [PATCH 2/6] exslt: auto-register on parse + release polish for v0.1.3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up on a61d0c43. Shifts EXSLT from opt-in to default-on, since the crate already links libexslt unconditionally — the previous opt-in wrapper paid the full cost without the default benefit. `xsltproc` and other mainstream XSLT tooling enable EXSLT by default for the same reason. Changes: * src/parser.rs — `parse_file` and `parse_bytes` call `register_exslt()` on entry; the `Once` guard makes the FFI fire exactly once per process. Stylesheets that reference `str:tokenize` et al. now Just Work without ceremony at the call site. * src/lib.rs — doc comment on `register_exslt` reframed: the function is now an idempotent manual hook for deterministic early init (tests, embedders), not a required call. Thread-safety credited to `std::sync::Once`. * build.rs — symmetric pkg-config handling for libxslt and libexslt. Both probe via pkg-config and fall back to `cargo:rustc-link-lib` on miss, instead of libxslt panicking while libexslt falls back. * src/bindings.rs — `MANUAL EDIT` marker on the `exsltRegisterAll` extern block so a future bindgen regeneration doesn't silently drop it. * tests/base_tests.rs — new `exslt_str_tokenize_auto_registers` regression test, self-contained (inline XSL + `` source via `parse_bytes` / `parse_string`, no fixture files). Deliberately avoids a manual `register_exslt()` call so it would fail if auto-registration regressed. * Cargo.toml — version bumped 0.1.2 → 0.1.3. * CHANGELOG.md — 0.1.3 section dated 2026-22-04 with release notes; new empty `[0.1.4] (in development)` header opened above. Verified: `cargo clippy --all-targets -- -D warnings` is clean; `cargo test` passes all 16 bindings-layout tests and 3 base tests (including the new EXSLT test). Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 7 ++++++- Cargo.toml | 2 +- build.rs | 13 +++++++------ src/bindings.rs | 11 ++++++----- src/lib.rs | 18 +++++++++++++----- src/parser.rs | 3 +++ tests/base_tests.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 76 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42de6954..a66cb0ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,13 @@ # Change Log -## [0.1.3] (in development) +## [0.1.4] (in development) +## [0.1.3] 2026-22-04 + +* Link against `libexslt` (via pkg-config, with a `-lexslt` fallback) and auto-register the EXSLT extension functions (`str:*`, `math:*`, `set:*`, `date:*`) on the first `parser::parse_file` / `parser::parse_bytes` call, matching `xsltproc`'s default behaviour. +* New public `libxslt::register_exslt()` — an idempotent, thread-safe manual hook for callers that want deterministic early init. Internally guarded by `std::sync::Once`. + ## [0.1.2] 2021-26-11 * Added `Parser::parse_bytes` and the ability to give parameters to `stylesheet::transform`, thanks @antmelnyk! diff --git a/Cargo.toml b/Cargo.toml index f0250a3a..f2aca237 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libxslt" -version = "0.1.2" +version = "0.1.3" edition = "2021" authors = ["Deyan Ginev "] description = "A Rust wrapper for libxslt - The XSLT C library for GNOME" diff --git a/build.rs b/build.rs index 36f5ed2e..bc716486 100644 --- a/build.rs +++ b/build.rs @@ -2,14 +2,15 @@ extern crate pkg_config; use pkg_config::find_library; fn main() { + // For both libxslt and libexslt we first ask pkg-config; if that fails + // (e.g. minimal installs without the .pc files) we emit a plain + // `cargo:rustc-link-lib=dylib=…` so systems with the library on the + // default linker search path still link. libexslt provides the str:*, + // math:*, set:*, date:* extension functions used by many stylesheets — + // it is required so `exsltRegisterAll` is resolvable. if find_library("libxslt").is_err() { - panic!("Could not find libxslt using pkg-config"); + println!("cargo:rustc-link-lib=dylib=xslt"); } - // libexslt provides the str:*, math:*, set:*, date:* extension functions - // used by many stylesheets. We need to link it so `exsltRegisterAll` is - // resolvable. If pkg-config can't find it (e.g. minimal installs), we - // fall back to a plain `cargo:rustc-link-lib=dylib=exslt` so systems - // with libexslt on the default search path still link. if find_library("libexslt").is_err() { println!("cargo:rustc-link-lib=dylib=exslt"); } diff --git a/src/bindings.rs b/src/bindings.rs index 6ac858bf..3b5f6820 100644 --- a/src/bindings.rs +++ b/src/bindings.rs @@ -5107,11 +5107,12 @@ extern "C" { pub fn xslDropCall(); } -// EXSLT extensions — str:*, math:*, set:*, date:*. Needed for the -// LaTeXML stylesheets that use `str:tokenize`, `math:*`, etc. The -// function links against libexslt (see build.rs); it must be called -// before any xsltApplyStylesheet invocation. Safe to call more than -// once — libexslt guards against re-registration internally. +// MANUAL EDIT — not produced by bindgen; re-add after regenerating +// this file. EXSLT extensions — str:*, math:*, set:*, date:*. Needed +// for stylesheets that use `str:tokenize`, `math:*`, etc. The function +// links against libexslt (see build.rs); it must be called before any +// xsltApplyStylesheet invocation. Safe to call more than once — +// libexslt tolerates re-registration internally. extern "C" { pub fn exsltRegisterAll(); } diff --git a/src/lib.rs b/src/lib.rs index 890e65c9..d529b505 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,11 +13,19 @@ pub mod parser; pub mod stylesheet; /// Register the full set of EXSLT extension functions (str:*, math:*, -/// set:*, date:*) into libxslt's global extension registry. Must be -/// called before any stylesheet-application that uses EXSLT functions -/// (LaTeXML's stylesheets, for example, rely on `str:tokenize`). Safe to -/// call more than once — internally guarded by `std::sync::Once`, and -/// libexslt itself tolerates re-registration. +/// set:*, date:*) into libxslt's global extension registry. +/// +/// You do not normally need to call this: [`parser::parse_file`] and +/// [`parser::parse_bytes`] invoke it on the first stylesheet load, so +/// EXSLT is available to every stylesheet this crate parses. The +/// function is exposed for callers who want deterministic early init +/// (tests, embedding frameworks, or code paths that construct +/// stylesheets through other bindings). +/// +/// Safe to call more than once and from multiple threads: concurrent +/// callers are serialized by `std::sync::Once`, so `exsltRegisterAll` +/// runs exactly once per process. libexslt additionally tolerates +/// re-registration as a belt-and-braces measure. pub fn register_exslt() { use std::sync::Once; static REGISTER: Once = Once::new(); diff --git a/src/parser.rs b/src/parser.rs index 2fdcf93f..4df3e746 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,11 +3,13 @@ use std::path::Path; use crate::bindings::{xsltParseStylesheetDoc, xsltParseStylesheetFile}; use crate::libxml::bindings::xmlReadMemory; +use crate::register_exslt; use crate::stylesheet::Stylesheet; /// Load an XSLT stylesheet from (typically `.xsl`) file. pub fn parse_file(path_str: &str) -> Result { + register_exslt(); let path = Path::new(path_str); if !path.is_file() { Err(format!( @@ -29,6 +31,7 @@ pub fn parse_file(path_str: &str) -> Result { /// Load an XSLT stylesheet from UTF-8 string in byte format pub fn parse_bytes(file_string_as_bytes: Vec, url: &str) -> Result { + register_exslt(); unsafe { let xsl_file_string_len = file_string_as_bytes.len() as i32; let xsl_file_c_str = CString::new(file_string_as_bytes).unwrap(); diff --git a/tests/base_tests.rs b/tests/base_tests.rs index 64d579ae..d94bea0e 100644 --- a/tests/base_tests.rs +++ b/tests/base_tests.rs @@ -69,3 +69,43 @@ fn from_string_bytes_builder() { let new_len = new_serialized.len(); assert!(new_len > 1500); } + +#[test] +/// Apply a stylesheet that uses `str:tokenize` (EXSLT) to verify that +/// parser-triggered auto-registration of libexslt works end-to-end. +/// Deliberately does *not* call `register_exslt()` manually — if it +/// did, the test would pass even if auto-registration regressed. +fn exslt_str_tokenize_auto_registers() { + const XSL: &[u8] = br#" + + + + + + + + + +"#; + + let source = XMLParser::default() + .parse_string("") + .expect("parse trivial source xml"); + let mut stylesheet = xslt_parser::parse_bytes(XSL.to_vec(), "exslt_tokenize.xsl") + .expect("parse exslt stylesheet"); + + let output = stylesheet + .transform(&source, Vec::new()) + .expect("transform with str:tokenize") + .to_string(); + + for tok in ["a", "b", "c", "d"] { + assert!( + output.contains(tok), + "expected {tok} in EXSLT output, got: {output}" + ); + } +} From fbf2d0641e875ae35c763c11e88276f9b9a02be4 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 22 Apr 2026 12:37:37 -0400 Subject: [PATCH 3/6] ci,docs: declare libexslt as a required system dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After 10a6fe0d, the crate unconditionally links libexslt. Reflect that in the two places downstream users actually look: * .travis.yml — add `libexslt-dev` to the apt package list so the existing Linux build matrix keeps passing. * README.md — new "Installation" section listing the development headers (libxml2, libxslt, libexslt) across Debian/Ubuntu, Fedora and macOS. Previously the README said nothing about system deps; this is also the natural spot to land the EXSLT callout. Co-Authored-By: Claude Opus 4.7 (1M context) --- .travis.yml | 1 + README.md | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/.travis.yml b/.travis.yml index a9da1fbf..d637d63d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,7 @@ addons: packages: - libxml2-dev - libxslt1-dev + - libexslt-dev after_success: "./scripts/doc-upload.sh" env: global: diff --git a/README.md b/README.md index 77339b29..bbb5d6c2 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,14 @@ Rust wrapper for [libxslt](http://xmlsoft.org/), and a sibling crate to [libxml] The main goal of this project is to benefit from libxslt's maturity and stability while native Rust XSLT crates mature to be near-drop-in replacements. +## Installation + +The crate links against `libxslt` and `libexslt` (for the EXSLT extension functions) via `pkg-config`. Install the development headers before building: + +* Debian / Ubuntu: `apt install libxml2-dev libxslt1-dev libexslt-dev` +* Fedora / RHEL: `dnf install libxml2-devel libxslt-devel` (libexslt ships inside `libxslt-devel`) +* macOS (Homebrew): `brew install libxml2 libxslt` (bundles libexslt) + **Coverage**: This is an infant proof of concept in both coverage and feature richness, **NOT** ready for production use. **Welcome!** With these caveats, the contributors to the project are migrating production work towards Rust and find a continuing reliance on libxslt a helpful relief for initial ports. As such, contributions to this crate are welcome, if your workflow is not yet fully supported. From a220c694d86bef1e261a6bf6f3c1f0eb561a5f19 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 22 Apr 2026 12:41:52 -0400 Subject: [PATCH 4/6] ci: migrate from Travis to GitHub Actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit travis-ci.org was decommissioned in 2021; the existing .travis.yml has been non-functional for years. Port the Linux build matrix (stable / beta / nightly) to GitHub Actions, keep clippy-on-stable, and retire the dead configuration. * .github/workflows/ci.yml — new. Runs on push to master and on all pull requests. Installs libxml2-dev / libxslt1-dev / libexslt-dev via apt, then `cargo build`, `cargo test`, and (stable only) `cargo clippy --all-targets -- -D warnings`. * .travis.yml — removed. * scripts/doc-upload.sh — removed; it relied on TRAVIS_* env vars and a TRAVIS-era GH_TOKEN secret, so it was dead code. Porting gh-pages deployment is a separate decision (branch, URL, token source) and is deliberately left out of this transition. * Cargo.toml — drop the now-stale `exclude = ["scripts/*"]`. * README.md — swap the Travis build badge for the GitHub Actions CI badge. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/ci.yml | 35 +++++++++++++++++++++++++++++++++++ .travis.yml | 18 ------------------ Cargo.toml | 3 --- README.md | 2 +- scripts/doc-upload.sh | 28 ---------------------------- 5 files changed, 36 insertions(+), 50 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml delete mode 100755 scripts/doc-upload.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..1f992151 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,35 @@ +name: CI + +on: + push: + branches: [master] + pull_request: + +jobs: + test: + name: test (${{ matrix.rust }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + rust: [stable, beta, nightly] + steps: + - uses: actions/checkout@v4 + + - name: Install system libraries + run: | + sudo apt-get update + sudo apt-get install -y libxml2-dev libxslt1-dev libexslt-dev + + - name: Install Rust (${{ matrix.rust }}) + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + components: clippy + + - run: cargo build --all-targets --verbose + - run: cargo test --all-targets --verbose + + - name: cargo clippy + if: matrix.rust == 'stable' + run: cargo clippy --all-targets -- -D warnings diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d637d63d..00000000 --- a/.travis.yml +++ /dev/null @@ -1,18 +0,0 @@ -sudo: false -dist: trusty -language: rust -rust: -- stable -- beta -- nightly -addons: - apt: - packages: - - libxml2-dev - - libxslt1-dev - - libexslt-dev -after_success: "./scripts/doc-upload.sh" -env: - global: - - SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt - - secure: FxgazbjsmCg2KUgenH7Sv+dAsGPXPAeLRHjwI+mybF+bK7u6TMlWa72E6gt6Gz4t36af5LVJQ0O5fmLIdmkQYkGjLLzokzKrkb6GE81N0XZ2tTVV4lMRUS4LoJupyF9n9PhXiPV7e42VWPLVUfxeX76iljsQJQkLwCwH4XL6cTb4ARRciiLZ8EncITYz4kjODf2K7u6/Z7nVVYVaCIQTPaN/wfV4Mz1U4rtqpuXHRHAzurDIbXIDYN0cEcqgWfSpG0sJmbpzwwEp+VWydBETkUemBYoTmTrgi3sN9kp8Yqa/WumGSwLAN0STTtWFw6dNYeZBQ+wZJVxUdBAE+qqUiIfHzykcgvBsxPtV+gGGgvjTo+mN7oCjoYiI92K9sbqzVHBAc/9krEi91qkcz66e1unZg2DjXqpIIvf7QRBndJzKdv5TBJil+dmEPGwdXAbkEPBAOcDDnIeFdhE4cKWI2p0ciOWn1Fz8Pny7JxERVVXRyvwG2D+MdgLe4yPAqe5RL6DsfPrh42ap8NCL9Yu/BQIYI2sHJbVqU3O4FeF7wdjJlLDkEHlP2hmD9XusEixL7ksIARiYciqflE46frU4BoyPxGUulbTS3et6ro0haDJVFcO9Im/4m8gwCfs1HC6y4NmjAuuG0TCn/+SS8jFFowMulbthXpH3v29+LJwNrn4= diff --git a/Cargo.toml b/Cargo.toml index f2aca237..9296175f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,9 +10,6 @@ readme = "README.md" license = "MIT" build = "build.rs" -exclude = [ - "scripts/*" -] [dependencies] libc = "0.2" diff --git a/README.md b/README.md index bbb5d6c2..b423ea03 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://secure.travis-ci.org/KWARC/rust-libxslt.png?branch=master)](http://travis-ci.org/KWARC/rust-libxslt) +[![CI](https://github.com/KWARC/rust-libxslt/actions/workflows/ci.yml/badge.svg)](https://github.com/KWARC/rust-libxslt/actions/workflows/ci.yml) [![API Documentation](https://img.shields.io/badge/docs-API-blue.svg)](http://KWARC.github.io/rust-libxslt/libxslt/index.html) [![License](http://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/KWARC/rust-libxslt/master/LICENSE) [![crates.io](https://img.shields.io/crates/v/libxslt.svg)](https://crates.io/crates/libxslt) diff --git a/scripts/doc-upload.sh b/scripts/doc-upload.sh deleted file mode 100755 index 3f8cce22..00000000 --- a/scripts/doc-upload.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# thanks to the original at: https://github.com/shepmaster/sxd-document/blob/master/deploy_gh_pages.sh -set -eux - -if [[ "${TRAVIS_BRANCH}" != 'master' ]] || [[ "${TRAVIS_PULL_REQUEST}" = 'true' ]] || [[ "${TRAVIS_RUST_VERSION}" != 'stable' ]]; then - exit 0 -fi - -cargo doc - -# Add an automatic redirect -repo_name=$(echo "${TRAVIS_REPO_SLUG}" | cut -d '/' -f 2 | sed 's/-/_/') -echo "" > target/doc/index.html - -rm -rf generated-documentation -mv target/doc generated-documentation - -cd generated-documentation - -git init -git config user.name "Travis-CI" -git config user.email "builder@travis" - -git add . -git commit -m "Deployed to Github Pages" - -set +x # Never print the token! -git push --force --quiet "https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}" master:gh-pages \ No newline at end of file From c377665ebf6b9af0eca95fb8da9bb5ba044eb156 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 22 Apr 2026 12:48:58 -0400 Subject: [PATCH 5/6] ci: drop libexslt-dev from apt install line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modern Debian/Ubuntu (Bookworm, Noble, and later) bundle the libexslt headers, shared library, and pkg-config metadata inside libxslt1-dev. The separate `libexslt-dev` package was dropped — the GHA run for PR #7 failed with `E: Unable to locate package libexslt-dev` on the ubuntu-latest (noble) image. Verified locally: $ dpkg -L libxslt1-dev | grep -E 'exslt|libexslt\.' /usr/include/libexslt /usr/include/libexslt/exslt.h ... /usr/lib/x86_64-linux-gnu/pkgconfig/libexslt.pc Drop `libexslt-dev` from the CI apt line and from the README's per-distro install snippet; note in the README that libexslt now rides along with the libxslt devel package on all three platforms. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/ci.yml | 2 +- README.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1f992151..50a5eea5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: - name: Install system libraries run: | sudo apt-get update - sudo apt-get install -y libxml2-dev libxslt1-dev libexslt-dev + sudo apt-get install -y libxml2-dev libxslt1-dev - name: Install Rust (${{ matrix.rust }}) uses: dtolnay/rust-toolchain@master diff --git a/README.md b/README.md index b423ea03..993e8e36 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,11 @@ The main goal of this project is to benefit from libxslt's maturity and stabilit ## Installation -The crate links against `libxslt` and `libexslt` (for the EXSLT extension functions) via `pkg-config`. Install the development headers before building: +The crate links against `libxslt` and `libexslt` (for the EXSLT extension functions) via `pkg-config`. On modern Debian/Ubuntu, Fedora, and macOS Homebrew, libexslt ships inside the main libxslt development package — a single install is enough: -* Debian / Ubuntu: `apt install libxml2-dev libxslt1-dev libexslt-dev` -* Fedora / RHEL: `dnf install libxml2-devel libxslt-devel` (libexslt ships inside `libxslt-devel`) -* macOS (Homebrew): `brew install libxml2 libxslt` (bundles libexslt) +* Debian / Ubuntu: `apt install libxml2-dev libxslt1-dev` +* Fedora / RHEL: `dnf install libxml2-devel libxslt-devel` +* macOS (Homebrew): `brew install libxml2 libxslt` **Coverage**: This is an infant proof of concept in both coverage and feature richness, **NOT** ready for production use. From ffe49e030aced124ea0f9dcd01bbd3c48f79af17 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 22 Apr 2026 12:52:29 -0400 Subject: [PATCH 6/6] =?UTF-8?q?stylesheet:=20consume=20Document=20in=20tra?= =?UTF-8?q?nsform=20=E2=80=94=20fixes=20#6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit libxslt mutates the input xmlDoc while applying stylesheet-directed whitespace stripping (see https://gitlab.gnome.org/GNOME/libxslt/-/issues/14). The previous signature pub fn transform(&mut self, doc: &Document, ...) handed that C-side mutation a shared Rust reference, which is undefined behaviour reachable from safe code. Switch to consuming ownership: pub fn transform(&mut self, doc: Document, ...) `doc` is dropped at the end of the transform; libxml's `Document` Drop impl frees the underlying xmlDoc. The returned `real_dom` is a separately allocated xmlDocPtr produced by libxslt, so there is no aliasing between input and output. Matches the resolution I proposed on the issue. This is a breaking API change for every caller — but the crate is already cutting a breaking 0.1.3 release (link-time libexslt dep), so the two fit into a single upgrade cycle. Downstreams should replace `transform(&doc, ...)` with `transform(doc, ...)`; clone the `Document` up front if you need to run it through multiple stylesheets. Verified: `cargo clippy --all-targets -- -D warnings` clean; all 16 bindings-layout and 3 base tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 1 + src/stylesheet.rs | 13 ++++++++++--- tests/base_tests.rs | 6 +++--- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a66cb0ae..f28fdf15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * Link against `libexslt` (via pkg-config, with a `-lexslt` fallback) and auto-register the EXSLT extension functions (`str:*`, `math:*`, `set:*`, `date:*`) on the first `parser::parse_file` / `parser::parse_bytes` call, matching `xsltproc`'s default behaviour. * New public `libxslt::register_exslt()` — an idempotent, thread-safe manual hook for callers that want deterministic early init. Internally guarded by `std::sync::Once`. +* **Breaking, soundness fix (#6)**: `Stylesheet::transform` now takes the input `Document` *by value* (`doc: Document`) instead of by shared reference (`doc: &Document`). libxslt can mutate the input while applying stylesheet-controlled whitespace stripping; exposing that mutation through `&Document` was undefined behaviour reachable from safe code. Call sites should pass `source` where they previously passed `&source`; clone the `Document` up front if you need to transform it through multiple stylesheets. ## [0.1.2] 2021-26-11 diff --git a/src/stylesheet.rs b/src/stylesheet.rs index 958c4a8f..3961967c 100644 --- a/src/stylesheet.rs +++ b/src/stylesheet.rs @@ -30,9 +30,16 @@ impl Stylesheet { self.ptr } - /// Transforms a libxml `Document` per the current stylesheet - pub fn transform(&mut self, doc: &Document, params: Vec<(&str, &str)>) -> Result> { - let ctxt = self.build_context(doc)?; + /// Transforms a libxml `Document` per the current stylesheet. + /// + /// The input `Document` is consumed: libxslt may mutate it while applying + /// stylesheet-directed whitespace stripping, so handing out a shared + /// reference would be unsound (see issue #6). `doc` is dropped — and the + /// underlying `xmlDoc` freed — once the transform returns. If you need + /// to transform the same source through several stylesheets, clone the + /// `Document` at the call site. + pub fn transform(&mut self, doc: Document, params: Vec<(&str, &str)>) -> Result> { + let ctxt = self.build_context(&doc)?; // ctxt.xinclude = 1; // ctxt._private = (void *) wrapper; diff --git a/tests/base_tests.rs b/tests/base_tests.rs index d94bea0e..06dc0edf 100644 --- a/tests/base_tests.rs +++ b/tests/base_tests.rs @@ -26,7 +26,7 @@ fn hello_builder() { let source_len = source.to_string().len(); assert!(source_len > 1000); - let new_doc_result = stylesheet.transform(&source, Vec::new()); + let new_doc_result = stylesheet.transform(source, Vec::new()); assert!(new_doc_result.is_ok()); let new_doc: Document = new_doc_result.unwrap(); let new_serialized = new_doc.to_string_with_options(SaveOptions { @@ -58,7 +58,7 @@ fn from_string_bytes_builder() { ("yearto", "2000") ]; - let new_doc_result = stylesheet.transform(&source, xslt_params); + let new_doc_result = stylesheet.transform(source, xslt_params); assert!(new_doc_result.is_ok()); let new_doc: Document = new_doc_result.unwrap(); let new_serialized = new_doc.to_string_with_options(SaveOptions { @@ -98,7 +98,7 @@ fn exslt_str_tokenize_auto_registers() { .expect("parse exslt stylesheet"); let output = stylesheet - .transform(&source, Vec::new()) + .transform(source, Vec::new()) .expect("transform with str:tokenize") .to_string();