diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..50a5eea5 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,35 @@ +name: CI + +on: + push: + branches: [master] + pull_request: + +jobs: + test: + name: test (${{ matrix.rust }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + rust: [stable, beta, nightly] + steps: + - uses: actions/checkout@v4 + + - name: Install system libraries + run: | + sudo apt-get update + sudo apt-get install -y libxml2-dev libxslt1-dev + + - name: Install Rust (${{ matrix.rust }}) + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + components: clippy + + - run: cargo build --all-targets --verbose + - run: cargo test --all-targets --verbose + + - name: cargo clippy + if: matrix.rust == 'stable' + run: cargo clippy --all-targets -- -D warnings diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index a9da1fbf..00000000 --- a/.travis.yml +++ /dev/null @@ -1,17 +0,0 @@ -sudo: false -dist: trusty -language: rust -rust: -- stable -- beta -- nightly -addons: - apt: - packages: - - libxml2-dev - - libxslt1-dev -after_success: "./scripts/doc-upload.sh" -env: - global: - - SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt - - secure: FxgazbjsmCg2KUgenH7Sv+dAsGPXPAeLRHjwI+mybF+bK7u6TMlWa72E6gt6Gz4t36af5LVJQ0O5fmLIdmkQYkGjLLzokzKrkb6GE81N0XZ2tTVV4lMRUS4LoJupyF9n9PhXiPV7e42VWPLVUfxeX76iljsQJQkLwCwH4XL6cTb4ARRciiLZ8EncITYz4kjODf2K7u6/Z7nVVYVaCIQTPaN/wfV4Mz1U4rtqpuXHRHAzurDIbXIDYN0cEcqgWfSpG0sJmbpzwwEp+VWydBETkUemBYoTmTrgi3sN9kp8Yqa/WumGSwLAN0STTtWFw6dNYeZBQ+wZJVxUdBAE+qqUiIfHzykcgvBsxPtV+gGGgvjTo+mN7oCjoYiI92K9sbqzVHBAc/9krEi91qkcz66e1unZg2DjXqpIIvf7QRBndJzKdv5TBJil+dmEPGwdXAbkEPBAOcDDnIeFdhE4cKWI2p0ciOWn1Fz8Pny7JxERVVXRyvwG2D+MdgLe4yPAqe5RL6DsfPrh42ap8NCL9Yu/BQIYI2sHJbVqU3O4FeF7wdjJlLDkEHlP2hmD9XusEixL7ksIARiYciqflE46frU4BoyPxGUulbTS3et6ro0haDJVFcO9Im/4m8gwCfs1HC6y4NmjAuuG0TCn/+SS8jFFowMulbthXpH3v29+LJwNrn4= diff --git a/CHANGELOG.md b/CHANGELOG.md index 42de6954..f28fdf15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,14 @@ # Change Log -## [0.1.3] (in development) +## [0.1.4] (in development) +## [0.1.3] 2026-22-04 + +* Link against `libexslt` (via pkg-config, with a `-lexslt` fallback) and auto-register the EXSLT extension functions (`str:*`, `math:*`, `set:*`, `date:*`) on the first `parser::parse_file` / `parser::parse_bytes` call, matching `xsltproc`'s default behaviour. +* New public `libxslt::register_exslt()` — an idempotent, thread-safe manual hook for callers that want deterministic early init. Internally guarded by `std::sync::Once`. +* **Breaking, soundness fix (#6)**: `Stylesheet::transform` now takes the input `Document` *by value* (`doc: Document`) instead of by shared reference (`doc: &Document`). libxslt can mutate the input while applying stylesheet-controlled whitespace stripping; exposing that mutation through `&Document` was undefined behaviour reachable from safe code. Call sites should pass `source` where they previously passed `&source`; clone the `Document` up front if you need to transform it through multiple stylesheets. + ## [0.1.2] 2021-26-11 * Added `Parser::parse_bytes` and the ability to give parameters to `stylesheet::transform`, thanks @antmelnyk! diff --git a/Cargo.toml b/Cargo.toml index f0250a3a..9296175f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libxslt" -version = "0.1.2" +version = "0.1.3" edition = "2021" authors = ["Deyan Ginev "] description = "A Rust wrapper for libxslt - The XSLT C library for GNOME" @@ -10,9 +10,6 @@ readme = "README.md" license = "MIT" build = "build.rs" -exclude = [ - "scripts/*" -] [dependencies] libc = "0.2" diff --git a/README.md b/README.md index 77339b29..993e8e36 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://secure.travis-ci.org/KWARC/rust-libxslt.png?branch=master)](http://travis-ci.org/KWARC/rust-libxslt) +[![CI](https://github.com/KWARC/rust-libxslt/actions/workflows/ci.yml/badge.svg)](https://github.com/KWARC/rust-libxslt/actions/workflows/ci.yml) [![API Documentation](https://img.shields.io/badge/docs-API-blue.svg)](http://KWARC.github.io/rust-libxslt/libxslt/index.html) [![License](http://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/KWARC/rust-libxslt/master/LICENSE) [![crates.io](https://img.shields.io/crates/v/libxslt.svg)](https://crates.io/crates/libxslt) @@ -7,6 +7,14 @@ Rust wrapper for [libxslt](http://xmlsoft.org/), and a sibling crate to [libxml] The main goal of this project is to benefit from libxslt's maturity and stability while native Rust XSLT crates mature to be near-drop-in replacements. +## Installation + +The crate links against `libxslt` and `libexslt` (for the EXSLT extension functions) via `pkg-config`. On modern Debian/Ubuntu, Fedora, and macOS Homebrew, libexslt ships inside the main libxslt development package — a single install is enough: + +* Debian / Ubuntu: `apt install libxml2-dev libxslt1-dev` +* Fedora / RHEL: `dnf install libxml2-devel libxslt-devel` +* macOS (Homebrew): `brew install libxml2 libxslt` + **Coverage**: This is an infant proof of concept in both coverage and feature richness, **NOT** ready for production use. **Welcome!** With these caveats, the contributors to the project are migrating production work towards Rust and find a continuing reliance on libxslt a helpful relief for initial ports. As such, contributions to this crate are welcome, if your workflow is not yet fully supported. diff --git a/build.rs b/build.rs index ecba0370..bc716486 100644 --- a/build.rs +++ b/build.rs @@ -2,8 +2,17 @@ extern crate pkg_config; use pkg_config::find_library; fn main() { + // For both libxslt and libexslt we first ask pkg-config; if that fails + // (e.g. minimal installs without the .pc files) we emit a plain + // `cargo:rustc-link-lib=dylib=…` so systems with the library on the + // default linker search path still link. libexslt provides the str:*, + // math:*, set:*, date:* extension functions used by many stylesheets — + // it is required so `exsltRegisterAll` is resolvable. if find_library("libxslt").is_err() { - panic!("Could not find libxslt using pkg-config"); + println!("cargo:rustc-link-lib=dylib=xslt"); + } + if find_library("libexslt").is_err() { + println!("cargo:rustc-link-lib=dylib=exslt"); } // // The bindgen::Builder is the main entry point // // to bindgen, and lets you build up options for diff --git a/scripts/doc-upload.sh b/scripts/doc-upload.sh deleted file mode 100755 index 3f8cce22..00000000 --- a/scripts/doc-upload.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# thanks to the original at: https://github.com/shepmaster/sxd-document/blob/master/deploy_gh_pages.sh -set -eux - -if [[ "${TRAVIS_BRANCH}" != 'master' ]] || [[ "${TRAVIS_PULL_REQUEST}" = 'true' ]] || [[ "${TRAVIS_RUST_VERSION}" != 'stable' ]]; then - exit 0 -fi - -cargo doc - -# Add an automatic redirect -repo_name=$(echo "${TRAVIS_REPO_SLUG}" | cut -d '/' -f 2 | sed 's/-/_/') -echo "" > target/doc/index.html - -rm -rf generated-documentation -mv target/doc generated-documentation - -cd generated-documentation - -git init -git config user.name "Travis-CI" -git config user.email "builder@travis" - -git add . -git commit -m "Deployed to Github Pages" - -set +x # Never print the token! -git push --force --quiet "https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}" master:gh-pages \ No newline at end of file diff --git a/src/bindings.rs b/src/bindings.rs index 4cdeffaa..3b5f6820 100644 --- a/src/bindings.rs +++ b/src/bindings.rs @@ -5106,6 +5106,16 @@ extern "C" { extern "C" { pub fn xslDropCall(); } + +// MANUAL EDIT — not produced by bindgen; re-add after regenerating +// this file. EXSLT extensions — str:*, math:*, set:*, date:*. Needed +// for stylesheets that use `str:tokenize`, `math:*`, etc. The function +// links against libexslt (see build.rs); it must be called before any +// xsltApplyStylesheet invocation. Safe to call more than once — +// libexslt tolerates re-registration internally. +extern "C" { + pub fn exsltRegisterAll(); +} pub type __builtin_va_list = [__va_list_tag; 1usize]; #[repr(C)] #[derive(Debug, Copy, Clone)] diff --git a/src/lib.rs b/src/lib.rs index df1a3669..d529b505 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,3 +11,25 @@ pub mod bindings; pub mod parser; /// Stylesheet functionality for Document transformation. pub mod stylesheet; + +/// Register the full set of EXSLT extension functions (str:*, math:*, +/// set:*, date:*) into libxslt's global extension registry. +/// +/// You do not normally need to call this: [`parser::parse_file`] and +/// [`parser::parse_bytes`] invoke it on the first stylesheet load, so +/// EXSLT is available to every stylesheet this crate parses. The +/// function is exposed for callers who want deterministic early init +/// (tests, embedding frameworks, or code paths that construct +/// stylesheets through other bindings). +/// +/// Safe to call more than once and from multiple threads: concurrent +/// callers are serialized by `std::sync::Once`, so `exsltRegisterAll` +/// runs exactly once per process. libexslt additionally tolerates +/// re-registration as a belt-and-braces measure. +pub fn register_exslt() { + use std::sync::Once; + static REGISTER: Once = Once::new(); + REGISTER.call_once(|| unsafe { + bindings::exsltRegisterAll(); + }); +} diff --git a/src/parser.rs b/src/parser.rs index 2fdcf93f..4df3e746 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,11 +3,13 @@ use std::path::Path; use crate::bindings::{xsltParseStylesheetDoc, xsltParseStylesheetFile}; use crate::libxml::bindings::xmlReadMemory; +use crate::register_exslt; use crate::stylesheet::Stylesheet; /// Load an XSLT stylesheet from (typically `.xsl`) file. pub fn parse_file(path_str: &str) -> Result { + register_exslt(); let path = Path::new(path_str); if !path.is_file() { Err(format!( @@ -29,6 +31,7 @@ pub fn parse_file(path_str: &str) -> Result { /// Load an XSLT stylesheet from UTF-8 string in byte format pub fn parse_bytes(file_string_as_bytes: Vec, url: &str) -> Result { + register_exslt(); unsafe { let xsl_file_string_len = file_string_as_bytes.len() as i32; let xsl_file_c_str = CString::new(file_string_as_bytes).unwrap(); diff --git a/src/stylesheet.rs b/src/stylesheet.rs index 958c4a8f..3961967c 100644 --- a/src/stylesheet.rs +++ b/src/stylesheet.rs @@ -30,9 +30,16 @@ impl Stylesheet { self.ptr } - /// Transforms a libxml `Document` per the current stylesheet - pub fn transform(&mut self, doc: &Document, params: Vec<(&str, &str)>) -> Result> { - let ctxt = self.build_context(doc)?; + /// Transforms a libxml `Document` per the current stylesheet. + /// + /// The input `Document` is consumed: libxslt may mutate it while applying + /// stylesheet-directed whitespace stripping, so handing out a shared + /// reference would be unsound (see issue #6). `doc` is dropped — and the + /// underlying `xmlDoc` freed — once the transform returns. If you need + /// to transform the same source through several stylesheets, clone the + /// `Document` at the call site. + pub fn transform(&mut self, doc: Document, params: Vec<(&str, &str)>) -> Result> { + let ctxt = self.build_context(&doc)?; // ctxt.xinclude = 1; // ctxt._private = (void *) wrapper; diff --git a/tests/base_tests.rs b/tests/base_tests.rs index 64d579ae..06dc0edf 100644 --- a/tests/base_tests.rs +++ b/tests/base_tests.rs @@ -26,7 +26,7 @@ fn hello_builder() { let source_len = source.to_string().len(); assert!(source_len > 1000); - let new_doc_result = stylesheet.transform(&source, Vec::new()); + let new_doc_result = stylesheet.transform(source, Vec::new()); assert!(new_doc_result.is_ok()); let new_doc: Document = new_doc_result.unwrap(); let new_serialized = new_doc.to_string_with_options(SaveOptions { @@ -58,7 +58,7 @@ fn from_string_bytes_builder() { ("yearto", "2000") ]; - let new_doc_result = stylesheet.transform(&source, xslt_params); + let new_doc_result = stylesheet.transform(source, xslt_params); assert!(new_doc_result.is_ok()); let new_doc: Document = new_doc_result.unwrap(); let new_serialized = new_doc.to_string_with_options(SaveOptions { @@ -69,3 +69,43 @@ fn from_string_bytes_builder() { let new_len = new_serialized.len(); assert!(new_len > 1500); } + +#[test] +/// Apply a stylesheet that uses `str:tokenize` (EXSLT) to verify that +/// parser-triggered auto-registration of libexslt works end-to-end. +/// Deliberately does *not* call `register_exslt()` manually — if it +/// did, the test would pass even if auto-registration regressed. +fn exslt_str_tokenize_auto_registers() { + const XSL: &[u8] = br#" + + + + + + + + + +"#; + + let source = XMLParser::default() + .parse_string("") + .expect("parse trivial source xml"); + let mut stylesheet = xslt_parser::parse_bytes(XSL.to_vec(), "exslt_tokenize.xsl") + .expect("parse exslt stylesheet"); + + let output = stylesheet + .transform(source, Vec::new()) + .expect("transform with str:tokenize") + .to_string(); + + for tok in ["a", "b", "c", "d"] { + assert!( + output.contains(tok), + "expected {tok} in EXSLT output, got: {output}" + ); + } +}