diff --git a/conformance.toml b/conformance.toml
index cc40d69..8e212d6 100644
--- a/conformance.toml
+++ b/conformance.toml
@@ -29,7 +29,7 @@
 
 [manifest]
 implementation = "openarmature-python"
-spec_pin = "v0.24.0"
+spec_pin = "v0.26.0"
 
 # Status values:
 #   implemented   — shipped behavior matches the proposal's contract
@@ -150,8 +150,8 @@ status = "textual-only"
 since = "0.9.0"
 note = "Drain snapshot semantic and timeout-input validation already implemented as part of the proposal 0010 impl PR (v0.9.0); no additional module-level work needed."
 
-# Spec v0.23.0 + v0.24.0 batch (proposals 0031, 0032). Both proposals
-# have impl work landing across the v0.10.0 release cycle; status
+# Spec v0.23.0-v0.26.0 batch (proposals 0031, 0032, 0033, 0034). All
+# four have impl work landing across the v0.10.0 release cycle; status
 # stays `not-yet` until the release PR flips them to `implemented`
 # with `since = "0.10.0"`. The pinned spec submodule advances ahead
 # of the impl status because newer fixtures need to be visible to
@@ -161,3 +161,9 @@ status = "not-yet"
 
 [proposals."0032"]
 status = "not-yet"
+
+[proposals."0033"]
+status = "not-yet"
+
+[proposals."0034"]
+status = "not-yet"
diff --git a/docs/concepts/prompts.md b/docs/concepts/prompts.md
index 41aea4f..abede54 100644
--- a/docs/concepts/prompts.md
+++ b/docs/concepts/prompts.md
@@ -100,7 +100,79 @@ a working-but-wrong prompt, often invisibly. If you need
 lenient behavior, wrap your variables in your own defaulting
 layer before passing them to `render()`.
 
-The Python implementation uses Jinja2's `StrictUndefined`.
+The Python implementation uses Jinja2's `StrictUndefined`. To opt
+out, pass a different `Undefined` subclass at `PromptManager`
+construction:
+
+```python
+import jinja2
+
+manager = PromptManager(backend, jinja_undefined=jinja2.Undefined)
+```
+
+`jinja2.Undefined` renders a missing variable as the empty string;
+`jinja2.ChainableUndefined` is the other common opt-out for
+templates that walk nested attributes. Reach for these only when the
+strict default is actively wrong for your workflow.
+
+## Per-prompt sampling parameters
+
+A `Prompt` carries an optional `sampling` field — a `SamplingConfig`
+sub-record mirroring `RuntimeConfig`'s seven declared fields
+(`temperature`, `max_tokens`, `top_p`, `seed`, `frequency_penalty`,
+`presence_penalty`, `stop_sequences`) plus the extras pass-through
+bag. Backends that source per-prompt config (Langfuse's
+`prompt.config`, a filesystem sidecar) populate it; backends that
+don't leave it `None`.
+
+```python
+prompt = await manager.fetch("classify", "production")
+if prompt.sampling is not None:
+    response = await provider.complete(messages, config=prompt.sampling)
+else:
+    response = await provider.complete(messages)
+```
+
+`SamplingConfig` is a subclass of `RuntimeConfig`, so it splats
+directly into `provider.complete()` without translation.
+`PromptResult.sampling` carries the value verbatim from the source
+`Prompt`; rendering doesn't touch it.
+
+The `FilesystemPromptBackend` reads sidecar config when constructed
+with `sampling_source="per-prompt-sidecar"` (reading
+`<root>/<label>/<name>.config.json` next to each template) or
+`sampling_source="unified"` (reading `<root>/prompt_configs.json`
+once at construction, keyed by prompt name).
+
+## Deployment-time label routing with `LabelResolver`
+
+`PromptManager.fetch(name)` without an explicit `label` consults a
+configured `LabelResolver` and falls back to `"production"`. This
+lets one prompt be A/B-tested or canaried without code changes —
+edit the resolver's data, not the call sites.
+
+```python
+from openarmature.prompts import MappingLabelResolver, PromptManager
+
+resolver = MappingLabelResolver({
+    "default": "production",
+    "experimental_classifier": "staging",
+    "extract_claims": "variant-a",
+})
+manager = PromptManager(backend, label_resolver=resolver)
+
+# Resolver returns "staging" — staging template fetched.
+classify = await manager.fetch("experimental_classifier")
+# Resolver returns "production" (the default) — production fetched.
+greet = await manager.fetch("greet")
+# Explicit label bypasses the resolver entirely.
+audit = await manager.fetch("greet", "audit")
+```
+
+`LabelResolver` is a Protocol with one method, `resolve(name) -> str`.
+The reference implementation is `MappingLabelResolver`, but any
+class with the right shape works (a JSON-file-backed resolver, a
+remote-config-service-backed resolver).
 
 ## Composite backends and fallback
 
@@ -212,6 +284,18 @@ Nesting is innermost-wins. If you activate a result inside
 another active result, the inner one wins for the duration
 of the inner block.
 
+### Backend-keyed observability entity references
+
+A `Prompt` also carries an optional `observability_entities`
+mapping for backend-keyed references to first-class entities
+the prompt has been registered as in observability backends. The
+spec-normative key is `langfuse_prompt`, holding the Langfuse SDK
+`Prompt` reference. The Langfuse observer (when it ships) reads
+this field to establish the native Generation → Prompt link
+rather than reaching into the implementation-defined `metadata`
+mapping. Backends that don't surface such references leave the
+field `None`.
+
 ## Determinism and content-addressed caching
 
 `render` is deterministic: same `Prompt`, same `variables` →
diff --git a/openarmature-spec b/openarmature-spec
index ca30cf1..53a91d6 160000
--- a/openarmature-spec
+++ b/openarmature-spec
@@ -1 +1 @@
-Subproject commit ca30cf15bf61cd9e83f18764ceaff869c89eef62
+Subproject commit 53a91d60c253dbdc66f443fc9b9710c6b70aa86d
diff --git a/pyproject.toml b/pyproject.toml
index 068d9d3..796bc96 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec"
 openarmature = "openarmature.cli:main"
 
 [tool.openarmature]
-spec_version = "0.24.0"
+spec_version = "0.26.0"
 
 [dependency-groups]
 dev = [
diff --git a/src/openarmature/AGENTS.md b/src/openarmature/AGENTS.md
index baeeb91..9daadfb 100644
--- a/src/openarmature/AGENTS.md
+++ b/src/openarmature/AGENTS.md
@@ -1,6 +1,6 @@
 # OpenArmature — Agent documentation
 
-*This is the agent guide bundled with the openarmature Python package, version 0.9.0 (spec v0.24.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
+*This is the agent guide bundled with the openarmature Python package, version 0.9.0 (spec v0.26.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
 
 ## TL;DR
 
@@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents 
 
 ## Capability contracts
 
-_Sourced from openarmature-spec v0.24.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
+_Sourced from openarmature-spec v0.26.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
 
 ### Capability: `graph-engine`
 
diff --git a/src/openarmature/__init__.py b/src/openarmature/__init__.py
index 01057e9..07b3b47 100644
--- a/src/openarmature/__init__.py
+++ b/src/openarmature/__init__.py
@@ -25,4 +25,4 @@
 """
 
 __version__ = "0.9.0"
-__spec_version__ = "0.24.0"
+__spec_version__ = "0.26.0"
diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py
index 93cc1d2..3308f65 100644
--- a/src/openarmature/llm/providers/openai.py
+++ b/src/openarmature/llm/providers/openai.py
@@ -59,8 +59,13 @@
     current_namespace_prefix,
 )
 from openarmature.observability.llm_event import LlmEventPayload
-from openarmature.prompts.context import current_prompt_group, current_prompt_result
 
+# ``current_prompt_group`` / ``current_prompt_result`` are imported
+# lazily inside :meth:`OpenAIProvider.complete` to avoid a module-load
+# cycle: ``openarmature.prompts.prompt`` imports ``RuntimeConfig`` from
+# this package (for the ``SamplingConfig`` subclass), so a top-level
+# import here would re-enter prompts.prompt before its types finish
+# defining.
 from ..errors import (
     LlmProviderError,
     ProviderAuthentication,
@@ -310,6 +315,12 @@ async def complete(
         # from inside the observer in the worker task returns ``None``
         # even when a node body opened a ``with_active_prompt`` block.
         # Snapshot here; the observer reads from the event payload.
+        # Lazy import: see module-level comment for the cycle reason.
+        from openarmature.prompts.context import (
+            current_prompt_group,
+            current_prompt_result,
+        )
+
         active_prompt = current_prompt_result()
         active_prompt_group = current_prompt_group()
         # Payload data the §5.5.1 / §5.5.2 / §5.5.3 attributes are
diff --git a/src/openarmature/prompts/__init__.py b/src/openarmature/prompts/__init__.py
index b14dccb..83ea053 100644
--- a/src/openarmature/prompts/__init__.py
+++ b/src/openarmature/prompts/__init__.py
@@ -20,15 +20,19 @@
 )
 from .group import PromptGroup
 from .hashing import compute_rendered_hash, compute_template_hash
+from .label_resolver import SPEC_FALLBACK_LABEL, LabelResolver, MappingLabelResolver
 from .manager import PromptManager
-from .prompt import Prompt, PromptResult
+from .prompt import Prompt, PromptResult, SamplingConfig
 
 __all__ = [
     "PROMPT_NOT_FOUND",
     "PROMPT_RENDER_ERROR",
     "PROMPT_STORE_UNAVAILABLE",
     "PROMPT_TRANSIENT_CATEGORIES",
+    "SPEC_FALLBACK_LABEL",
     "FilesystemPromptBackend",
+    "LabelResolver",
+    "MappingLabelResolver",
     "Prompt",
     "PromptBackend",
     "PromptError",
@@ -38,6 +42,7 @@
     "PromptRenderError",
     "PromptResult",
     "PromptStoreUnavailable",
+    "SamplingConfig",
     "compute_rendered_hash",
     "compute_template_hash",
     "current_prompt_group",
diff --git a/src/openarmature/prompts/backends/filesystem.py b/src/openarmature/prompts/backends/filesystem.py
index 466f910..7bc5bb7 100644
--- a/src/openarmature/prompts/backends/filesystem.py
+++ b/src/openarmature/prompts/backends/filesystem.py
@@ -3,54 +3,155 @@
 from __future__ import annotations
 
 import asyncio
+import json
 from datetime import UTC, datetime
 from pathlib import Path
+from typing import Any, Literal, cast
 
 from ..errors import PromptNotFound, PromptStoreUnavailable
 from ..hashing import compute_template_hash
-from ..prompt import Prompt
+from ..prompt import Prompt, SamplingConfig
 
 
 class FilesystemPromptBackend:
     """Reads prompts from a directory tree.
 
-    Layout convention: ``<root>/<label>/<name>.j2``. The ``label``
-    subdirectory keeps name-collisions across labels distinct
-    (e.g., ``prompts/production/greeting.j2`` and
-    ``prompts/staging/greeting.j2``). Spec §5 permits filesystem
-    backends to interpret label as "a subdirectory or filename
-    suffix"; this backend picks subdirectory.
+    Two layouts are supported via the constructor:
+
+    - ``layout="per-label"`` (default): ``<root>/<label>/<name>.j2``.
+      The ``label`` subdirectory keeps name-collisions across labels
+      distinct (e.g., ``prompts/production/greeting.j2`` and
+      ``prompts/staging/greeting.j2``). Spec §5 permits filesystem
+      backends to interpret label as "a subdirectory or filename
+      suffix"; this is the subdirectory variant.
+    - ``layout="flat"``: ``<root>/<name>.j2``. The same template
+      is returned regardless of which label was requested; the
+      Prompt's ``label`` field is the requested label verbatim.
+      Useful when label-based A/B routing is driven by a
+      :class:`~openarmature.prompts.label_resolver.LabelResolver`
+      rather than a directory tree.
 
     The ``version`` field is derived from the template content hash
     (first 16 hex chars of the SHA-256, ~64 bits) so two file
     contents map deterministically to two distinct version strings
-    without needing a sidecar metadata file. Per spec §3, this
-    satisfies the "stable identifier" requirement. The 16-char
-    prefix puts the birthday-paradox collision boundary at ~4B
-    distinct templates; well past any realistic single-backend
-    exposure. Higher-scale backends should widen further or pick a
-    different stable identifier (semver from a sidecar metadata
-    file, git short-SHAs, etc.).
-
-    This backend reads from disk on every fetch; no caching. A
-    caching backend (e.g., openarmature-langfuse) that returns
-    cached results MUST preserve the original ``fetched_at`` on the
-    returned Prompt, not the cache-hit time, per spec §3.
+    without needing a sidecar metadata file. The 16-char prefix puts
+    the birthday-paradox collision boundary at ~4B distinct templates,
+    well past any realistic single-backend exposure.
+
+    Optional ``sampling_source`` populates ``Prompt.sampling`` from a
+    sidecar file, per the spec §5 informative filesystem convention:
+
+    - ``"none"`` (default): never populate ``sampling``.
+    - ``"per-prompt-sidecar"``: read ``<name>.config.json`` from the
+      same directory as the template (i.e., ``<root>/<label>/<name>.config.json``
+      under ``per-label`` layout, ``<root>/<name>.config.json`` under
+      ``flat``). A missing sidecar leaves ``sampling = None``.
+    - ``"unified"``: read ``<root>/prompt_configs.json`` at backend
+      construction time and key into it by prompt name. A name not in
+      the unified map leaves ``sampling = None``. Construction raises
+      :class:`PromptStoreUnavailable` if the file exists but cannot
+      be parsed.
+
+    This backend reads templates from disk on every fetch; no caching.
     """
 
-    def __init__(self, root: Path) -> None:
+    def __init__(
+        self,
+        root: Path,
+        *,
+        layout: Literal["per-label", "flat"] = "per-label",
+        sampling_source: Literal["none", "per-prompt-sidecar", "unified"] = "none",
+    ) -> None:
         self._root = root
+        self._layout = layout
+        self._sampling_source = sampling_source
+        # Unified mode: load and parse at construction so the cost is
+        # paid once. Backend instances are typically long-lived
+        # process-wide singletons, so a single read on startup is
+        # cheaper than re-reading per fetch. Per-prompt values typed
+        # ``Any`` rather than ``dict[str, Any]`` so the runtime
+        # isinstance guard in ``_resolve_sampling`` remains meaningful
+        # — JSON files can have non-dict values under top-level keys.
+        self._unified_sampling: dict[str, Any] | None = None
+        if sampling_source == "unified":
+            self._unified_sampling = self._load_unified_configs()
+
+    def _load_unified_configs(self) -> dict[str, Any]:
+        path = self._root / "prompt_configs.json"
+        if not path.exists():
+            return {}
+        try:
+            data = json.loads(path.read_text(encoding="utf-8"))
+        except (OSError, json.JSONDecodeError) as exc:
+            raise PromptStoreUnavailable(
+                f"failed to load unified prompt_configs.json at {path}: {exc}",
+                name="",
+                label="",
+            ) from exc
+        if not isinstance(data, dict):
+            raise PromptStoreUnavailable(
+                f"unified prompt_configs.json at {path} is not a JSON object",
+                name="",
+                label="",
+            )
+        return cast(dict[str, Any], data)
+
+    def _template_path(self, name: str, label: str) -> Path:
+        if self._layout == "flat":
+            return self._root / f"{name}.j2"
+        return self._root / label / f"{name}.j2"
+
+    def _sidecar_path(self, name: str, label: str) -> Path:
+        if self._layout == "flat":
+            return self._root / f"{name}.config.json"
+        return self._root / label / f"{name}.config.json"
+
+    def _resolve_sampling(self, name: str, label: str) -> SamplingConfig | None:
+        if self._sampling_source == "none":
+            return None
+        if self._sampling_source == "unified":
+            assert self._unified_sampling is not None
+            raw = self._unified_sampling.get(name)
+            if raw is None:
+                return None
+            if not isinstance(raw, dict):
+                raise PromptStoreUnavailable(
+                    f"unified prompt_configs.json entry for {name!r} is not a JSON object "
+                    f"(got {type(raw).__name__})",
+                    name=name,
+                    label="",
+                )
+            entry = cast(dict[str, Any], raw)
+            return _sampling_from_dict(entry)
+        # per-prompt-sidecar
+        path = self._sidecar_path(name, label)
+        if not path.exists():
+            return None
+        try:
+            raw = json.loads(path.read_text(encoding="utf-8"))
+        except (OSError, json.JSONDecodeError) as exc:
+            raise PromptStoreUnavailable(
+                f"failed to load sidecar {path} for ({name!r}, {label!r}): {exc}",
+                name=name,
+                label=label,
+            ) from exc
+        if not isinstance(raw, dict):
+            raise PromptStoreUnavailable(
+                f"sidecar {path} is not a JSON object",
+                name=name,
+                label=label,
+            )
+        return _sampling_from_dict(cast(dict[str, Any], raw))
 
     async def fetch(self, name: str, label: str = "production") -> Prompt:
-        """Read ``<root>/<label>/<name>.j2`` and return the prompt.
+        """Read the prompt template and (optionally) its sidecar sampling config.
 
-        Reads on every call (no caching). The returned prompt's
-        ``version`` is the leading 16 hex chars of the template's
-        SHA-256, and ``template_hash`` is the full digest. Raises
-        ``PromptNotFound`` when the file is missing and
-        ``PromptStoreUnavailable`` on any other I/O error.
+        Returns a ``Prompt`` whose ``version`` is the leading 16 hex
+        chars of the template's SHA-256 and ``template_hash`` is the
+        full digest. Raises ``PromptNotFound`` when the template is
+        missing and ``PromptStoreUnavailable`` on other I/O errors.
         """
-        path = self._root / label / f"{name}.j2"
+        path = self._template_path(name, label)
         try:
             template_source = await asyncio.to_thread(path.read_text, encoding="utf-8")
         except FileNotFoundError as exc:
@@ -67,6 +168,7 @@ async def fetch(self, name: str, label: str = "production") -> Prompt:
                 label=label,
             ) from exc
 
+        sampling = await asyncio.to_thread(self._resolve_sampling, name, label)
         template_hash = compute_template_hash(template_source)
         version = template_hash.removeprefix("sha256:")[:16]
         return Prompt(
@@ -76,4 +178,18 @@ async def fetch(self, name: str, label: str = "production") -> Prompt:
             template=template_source,
             template_hash=template_hash,
             fetched_at=datetime.now(UTC),
+            sampling=sampling,
         )
+
+
+def _sampling_from_dict(data: dict[str, Any]) -> SamplingConfig:
+    # Top-level `extras` is flattened so caller-supplied vendor knobs
+    # end up in SamplingConfig's extras-allow bag rather than as a
+    # single literal `extras` key. Matches the YAML conformance-fixture
+    # convention from llm-provider/032 + the spec §5 sidecar example.
+    flat: dict[str, Any] = {k: v for k, v in data.items() if k != "extras"}
+    extras = data.get("extras")
+    if isinstance(extras, dict):
+        for k, v in cast(dict[str, Any], extras).items():
+            flat.setdefault(k, v)
+    return SamplingConfig(**flat)
diff --git a/src/openarmature/prompts/label_resolver.py b/src/openarmature/prompts/label_resolver.py
new file mode 100644
index 0000000..b6c9427
--- /dev/null
+++ b/src/openarmature/prompts/label_resolver.py
@@ -0,0 +1,75 @@
+# Spec §7 (new in proposal 0033): LabelResolver primitive that lets a
+# PromptManager map prompt names to labels at deployment time, without
+# code changes. Three-step resolve precedence is normative; the
+# storage shape behind the resolver is impl-defined (mapping, JSON
+# file, remote service, env vars).
+
+"""LabelResolver Protocol and reference mapping-backed implementation."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Protocol, runtime_checkable
+
+# Spec §6 step-3 fallback when neither a resolver-supplied per-name
+# override nor a resolver-supplied default is available.
+SPEC_FALLBACK_LABEL = "production"
+
+# Reserved key in the MappingLabelResolver shape. A `"default"` entry
+# in the mapping is the resolver's default-override (step 2 in the
+# fallback chain); any other key is a per-name override (step 1).
+_DEFAULT_KEY = "default"
+
+
+@runtime_checkable
+class LabelResolver(Protocol):
+    """Resolves a prompt name to the label to fetch under.
+
+    Implementations MUST follow the §7 fallback chain in
+    :meth:`resolve`: per-name override > default override > spec
+    fallback ``"production"``.
+    """
+
+    def resolve(self, name: str) -> str:
+        """Return the label to fetch ``name`` under.
+
+        Synchronous; deterministic for given resolver state.
+        """
+        ...
+
+
+class MappingLabelResolver:
+    """Reference resolver backed by a static name → label mapping.
+
+    The mapping recognizes one reserved key, ``"default"``, as the
+    resolver's default-override; every other key is a per-name
+    override. Construct from a literal dict in code or from a parsed
+    JSON file at startup; the resolver is immutable after
+    construction.
+
+        >>> r = MappingLabelResolver({"default": "production", "experimental": "staging"})
+        >>> r.resolve("experimental")
+        'staging'
+        >>> r.resolve("anything-else")
+        'production'
+    """
+
+    def __init__(self, mapping: Mapping[str, str]) -> None:
+        self._mapping: dict[str, str] = dict(mapping)
+
+    def resolve(self, name: str) -> str:
+        # Step 1: per-name override (any non-`default` key).
+        if name in self._mapping and name != _DEFAULT_KEY:
+            return self._mapping[name]
+        # Step 2: default override (a `default` entry in the mapping).
+        if _DEFAULT_KEY in self._mapping:
+            return self._mapping[_DEFAULT_KEY]
+        # Step 3: spec fallback.
+        return SPEC_FALLBACK_LABEL
+
+
+__all__ = [
+    "SPEC_FALLBACK_LABEL",
+    "LabelResolver",
+    "MappingLabelResolver",
+]
diff --git a/src/openarmature/prompts/manager.py b/src/openarmature/prompts/manager.py
index 76bee30..aca3ee6 100644
--- a/src/openarmature/prompts/manager.py
+++ b/src/openarmature/prompts/manager.py
@@ -14,22 +14,11 @@
 from .backend import PromptBackend
 from .errors import PromptNotFound, PromptRenderError, PromptStoreUnavailable
 from .hashing import compute_rendered_hash
+from .label_resolver import SPEC_FALLBACK_LABEL, LabelResolver
 from .prompt import Prompt, PromptResult
 
 _log = logging.getLogger(__name__)
 
-# Module-level singleton. Stateless given the configuration (no
-# filters, globals, or per-call mutation), and jinja2.Environment is
-# documented as thread-safe for compile + render — so a single
-# shared instance avoids re-parsing the env config on every render
-# call. autoescape disabled by design: render output goes to an LLM
-# API call (plain text), not an HTML response.
-_RENDER_ENV = jinja2.Environment(
-    undefined=jinja2.StrictUndefined,
-    autoescape=False,
-    keep_trailing_newline=True,
-)
-
 
 class PromptManager:
     """Composes one or more PromptBackends and exposes fetch + render.
@@ -37,16 +26,41 @@ class PromptManager:
     Users interact with the manager; backends are an implementation
     detail of construction. The manager owns:
 
-    - ``fetch``: consults backends in order per §8 fallback semantics.
+    - ``fetch``: consults backends in order per §9 (was §8) fallback semantics.
     - ``render``: synchronous local string transform; produces a
       ``PromptResult``.
     - ``get``: convenience: ``render(await fetch(...), variables)``.
+
+    Constructor knobs:
+
+    - ``label_resolver``: optional ``LabelResolver`` consulted by
+      :meth:`fetch` / :meth:`get` when no explicit ``label`` argument
+      is supplied (§6 step-2 of the fallback chain).
+    - ``jinja_undefined``: Jinja ``Undefined`` subclass for render-time
+      variable resolution. Default ``StrictUndefined`` matches spec
+      §8 (was §7); pass ``jinja2.ChainableUndefined`` or any other
+      ``Undefined`` subclass to opt out of strict-by-default rendering.
     """
 
-    def __init__(self, *backends: PromptBackend) -> None:
+    def __init__(
+        self,
+        *backends: PromptBackend,
+        label_resolver: LabelResolver | None = None,
+        jinja_undefined: type[jinja2.Undefined] = jinja2.StrictUndefined,
+    ) -> None:
         if not backends:
             raise ValueError("PromptManager requires at least one backend")
         self._backends: tuple[PromptBackend, ...] = backends
+        self._label_resolver = label_resolver
+        # autoescape disabled by design: render output goes to an LLM
+        # API call (plain text), not an HTML response. The env is
+        # per-manager (was module-level) so jinja_undefined can be
+        # overridden per-instance.
+        self._render_env = jinja2.Environment(
+            undefined=jinja_undefined,
+            autoescape=False,
+            keep_trailing_newline=True,
+        )
         # template_hash → compiled jinja2 Template. Per-manager,
         # unbounded. Correct by construction: template_hash is
         # content-derived, so a backend returning updated content
@@ -55,8 +69,23 @@ def __init__(self, *backends: PromptBackend) -> None:
         # pressure; typical apps have O(10) prompts.
         self._template_cache: dict[str, jinja2.Template] = {}
 
-    async def fetch(self, name: str, label: str = "production") -> Prompt:
-        """Consult composed backends in order, applying §8 fallback.
+    def _resolve_label(self, label: str | None, name: str) -> str:
+        # Spec §6 fallback chain:
+        #   1. Explicit label argument wins (caller pinned it).
+        #   2. Resolver is consulted when one was configured.
+        #   3. Spec fallback "production" when neither applies.
+        if label is not None:
+            return label
+        if self._label_resolver is not None:
+            return self._label_resolver.resolve(name)
+        return SPEC_FALLBACK_LABEL
+
+    async def fetch(self, name: str, label: str | None = None) -> Prompt:
+        """Consult composed backends in order, applying §9 (was §8) fallback.
+
+        Label is resolved per §6's three-step chain: explicit
+        argument > configured ``LabelResolver`` > spec fallback
+        ``"production"``.
 
         - First successful fetch wins; further backends are not consulted.
         - ``PromptNotFound`` from any backend STOPS the chain: the
@@ -66,10 +95,11 @@ async def fetch(self, name: str, label: str = "production") -> Prompt:
           next. After ALL backends are exhausted with unavailable
           failures, the manager raises ``PromptStoreUnavailable``.
         """
+        resolved_label = self._resolve_label(label, name)
         causes: list[BaseException] = []
         for backend in self._backends:
             try:
-                return await backend.fetch(name, label)
+                return await backend.fetch(name, resolved_label)
             except PromptNotFound:
                 raise
             except PromptStoreUnavailable as exc:
@@ -78,7 +108,7 @@ async def fetch(self, name: str, label: str = "production") -> Prompt:
                     "prompt backend %r unavailable for (%r, %r); falling back",
                     backend,
                     name,
-                    label,
+                    resolved_label,
                 )
                 continue
         if not causes:
@@ -95,9 +125,9 @@ async def fetch(self, name: str, label: str = "production") -> Prompt:
                 "PromptManager.fetch internal invariant violated: no backends consulted but loop exhausted"
             )
         raise PromptStoreUnavailable(
-            f"all prompt backends unavailable for ({name!r}, {label!r})",
+            f"all prompt backends unavailable for ({name!r}, {resolved_label!r})",
             name=name,
-            label=label,
+            label=resolved_label,
             backends_tried=[type(b).__name__ for b in self._backends],
             causes=list(causes),
         ) from causes[-1]
@@ -125,7 +155,7 @@ def render(
         try:
             template = self._template_cache.get(prompt.template_hash)
             if template is None:
-                template = _RENDER_ENV.from_string(prompt.template)
+                template = self._render_env.from_string(prompt.template)
                 self._template_cache[prompt.template_hash] = template
             rendered_text = template.render(**variables)
         except jinja2.UndefinedError as exc:
@@ -177,14 +207,25 @@ def render(
             variables=variables,
             fetched_at=prompt.fetched_at,
             rendered_at=datetime.now(UTC),
+            # Defensive copy of the two mutable propagated fields.
+            # Caching backends re-serve the same Prompt instance, so
+            # aliasing would let a caller's mutation of the result
+            # corrupt the backend's cached state.
+            sampling=prompt.sampling.model_copy() if prompt.sampling is not None else None,
+            observability_entities=(
+                dict(prompt.observability_entities) if prompt.observability_entities is not None else None
+            ),
         )
 
     async def get(
         self,
         name: str,
-        label: str = "production",
+        label: str | None = None,
         variables: dict[str, Any] | None = None,
     ) -> PromptResult:
-        """Convenience equivalent to ``render(await fetch(name, label), variables)``."""
+        """Convenience equivalent to ``render(await fetch(name, label), variables)``.
+
+        ``label`` follows the same three-step resolution as :meth:`fetch`.
+        """
         prompt = await self.fetch(name, label)
         return self.render(prompt, variables)
diff --git a/src/openarmature/prompts/prompt.py b/src/openarmature/prompts/prompt.py
index 0ceaee0..094490a 100644
--- a/src/openarmature/prompts/prompt.py
+++ b/src/openarmature/prompts/prompt.py
@@ -8,6 +8,21 @@
 from pydantic import BaseModel, ConfigDict, Field
 
 from openarmature.llm.messages import Message
+from openarmature.llm.response import RuntimeConfig
+
+
+# SamplingConfig mirrors RuntimeConfig's declared-fields-plus-extras
+# shape so `prompt.sampling` splats directly into `provider.complete()`
+# without per-field translation (spec §12 cross-spec touchpoint;
+# proposal 0033). Subclass rather than alias so the type system
+# distinguishes the two names — a fetch returning
+# ``SamplingConfig | None`` is meaningfully different in signatures
+# from a provider call's ``RuntimeConfig`` argument. The subclass is
+# empty today; future divergence (e.g., fields meaningful for prompts-
+# at-rest but not for direct provider calls) lands on SamplingConfig
+# without touching RuntimeConfig.
+class SamplingConfig(RuntimeConfig):
+    """Per-prompt sampling configuration. Shape-compatible with ``RuntimeConfig``."""
 
 
 class Prompt(BaseModel):
@@ -32,6 +47,12 @@ class Prompt(BaseModel):
             When a caching backend serves a cached result,
             ``fetched_at`` MUST reflect the original fetch time, not
             the cache hit time.
+        sampling: Optional per-prompt sampling configuration. Splats
+            into ``provider.complete(config=...)`` without translation.
+        observability_entities: Optional backend-keyed references to
+            first-class entities the prompt has been registered as in
+            observability backends. Spec-normative key:
+            ``langfuse_prompt`` (the Langfuse SDK Prompt-entity ref).
         metadata: Optional backend-supplied metadata.
     """
 
@@ -43,6 +64,8 @@ class Prompt(BaseModel):
     template: str
     template_hash: str
     fetched_at: datetime
+    sampling: SamplingConfig | None = None
+    observability_entities: dict[str, Any] | None = None
     metadata: dict[str, Any] | None = None
 
 
@@ -87,3 +110,7 @@ class PromptResult(BaseModel):
     variables: dict[str, Any]
     fetched_at: datetime
     rendered_at: datetime
+    # Per spec §4: propagated verbatim from the source Prompt.
+    # Rendering does NOT modify or reinterpret either field.
+    sampling: SamplingConfig | None = None
+    observability_entities: dict[str, Any] | None = None
diff --git a/tests/conformance/harness/prompt_management.py b/tests/conformance/harness/prompt_management.py
index e908346..0975fcd 100644
--- a/tests/conformance/harness/prompt_management.py
+++ b/tests/conformance/harness/prompt_management.py
@@ -46,6 +46,10 @@ class FixturePromptSpec(_StrictModel):
     version: str
     template: str
     template_hash: str
+    # Proposal 0033: optional typed sub-record + observability-entities
+    # mapping the mock backend attaches to the returned Prompt.
+    sampling: dict[str, Any] | None = None
+    observability_entities: dict[str, Any] | None = None
 
 
 class FixtureBackendSpec(_StrictModel):
@@ -54,8 +58,16 @@ class FixtureBackendSpec(_StrictModel):
     simulate_unavailable: bool = False
 
 
+class FixtureLabelResolverSpec(_StrictModel):
+    # Mapping shape per spec §7 informative example: `"default"` is
+    # the resolver's default-override (step 2 of the fallback chain);
+    # any other key is a per-name override (step 1).
+    mapping: dict[str, str]
+
+
 class FixtureManagerSpec(_StrictModel):
     backends: list[str]
+    label_resolver_ref: str | None = None
 
 
 # ---------------------------------------------------------------------------
@@ -67,7 +79,15 @@ class BackendTarget(_StrictModel):
     backend: str
 
 
-CallTarget = BackendTarget | Literal["manager", "construct_prompt_group"]
+CallTarget = (
+    BackendTarget
+    | Literal[
+        "manager",
+        "secondary_manager",
+        "tertiary_manager",
+        "construct_prompt_group",
+    ]
+)
 
 
 class FixtureExpectedRaises(_PermissiveModel):
@@ -109,6 +129,10 @@ class FixtureCall(_StrictModel):
     # indicator (no separate operation field on the call).
     operation: Literal["fetch", "render", "get"] | None = None
     name: str | None = None
+    # `label` is optional per spec §6 v0.26.0: omitting it triggers
+    # the configured LabelResolver (step 2) or the spec fallback
+    # `"production"` (step 3). Distinct from ``label: null`` which
+    # YAML elides; pydantic still maps both to ``None``.
     label: str | None = None
     variables: dict[str, Any] | None = None
     # Render-only inputs — either an inline ``fetched_prompt`` (which
@@ -151,7 +175,20 @@ class FixtureExpectedResultEquivalence(_PermissiveModel):
     fields_must_differ: list[str] = []
 
 
-class FixtureExpectedTopLevel(_StrictModel):
+class FixtureExpectedTopLevel(_PermissiveModel):
+    """Top-level expected block.
+
+    Most fixtures set one or more of the typed sub-blocks below
+    (``prompt_group``, ``result_equivalence``, ``rendered_hash_*``).
+    Fixture 015 (label-resolver) introduces a capture-name-keyed
+    shape where each top-level key under ``expected:`` is a capture
+    name and the value is a dict of Prompt/PromptResult attributes
+    the harness MUST verify against the corresponding capture. Those
+    keys arrive on ``model_extra`` since the typed fields below don't
+    cover them; the runner walks ``model_extra`` to apply per-capture
+    assertions.
+    """
+
     prompt_group: FixtureExpectedPromptGroup | None = None
     result_equivalence: FixtureExpectedResultEquivalence | None = None
     # Some fixtures (012) have multiple result-equivalence blocks; keep
@@ -171,6 +208,25 @@ class FixtureExpectedTopLevel(_StrictModel):
 
 class PromptManagementFixture(_StrictModel):
     backends: list[FixtureBackendSpec]
+    # Fixture 016 uses a top-level ``cases:`` list to split into
+    # independent sub-cases that share the backends declaration but
+    # each have their own manager + calls. The runner walks the list
+    # and runs each case in declaration order; the per-case shape is
+    # the same as the top-level fixture (manager, calls, expected).
+    cases: list[dict[str, Any]] | None = None
     manager: FixtureManagerSpec | None = None
-    calls: list[FixtureCall]
+    calls: list[FixtureCall] = []
+    # Named LabelResolver specs; managers reference them by key name
+    # via ``label_resolver_ref``. Fixture 015 introduces three named
+    # slots — the primary `label_resolver` plus a `tertiary_label_resolver`
+    # for the no-default branch. Future fixtures MAY add more slots
+    # here; the harness resolves refs by attribute lookup.
+    label_resolver: FixtureLabelResolverSpec | None = None
+    tertiary_label_resolver: FixtureLabelResolverSpec | None = None
+    # Fixture 015's multi-manager shape. Each `<prefix>_manager` /
+    # `<prefix>_calls` pair runs independently with shared backends.
+    secondary_manager: FixtureManagerSpec | None = None
+    secondary_calls: list[FixtureCall] = []
+    tertiary_manager: FixtureManagerSpec | None = None
+    tertiary_calls: list[FixtureCall] = []
     expected: FixtureExpectedTopLevel | None = None
diff --git a/tests/conformance/test_fixture_parsing.py b/tests/conformance/test_fixture_parsing.py
index d21d0d2..e87bac3 100644
--- a/tests/conformance/test_fixture_parsing.py
+++ b/tests/conformance/test_fixture_parsing.py
@@ -52,6 +52,24 @@ def _id(case: tuple[str, Path]) -> str:
     "observability/022-langfuse-basic-trace": "Langfuse harness lands in PR 3 (proposal 0031)",
     "observability/023-langfuse-generation-rendering": "Langfuse harness lands in PR 3 (proposal 0031)",
     "observability/024-langfuse-prompt-linkage": "Langfuse harness lands in PR 3 (proposal 0031)",
+    # proposal 0034 caller-supplied invocation metadata fixtures (PR 4).
+    "observability/027-langfuse-caller-supplied-metadata": "Caller-metadata harness lands in PR 4 (0034)",
+    "observability/028-caller-metadata-namespace-rejection": "Caller-metadata harness lands in PR 4 (0034)",
+    "observability/029-caller-metadata-fan-out-per-instance": "Caller-metadata harness lands in PR 4 (0034)",
+    "observability/030-caller-metadata-parallel-branches-per-branch": (
+        "Caller-metadata harness lands in PR 4 (0034)"
+    ),
+    # proposal 0033 added typed directive shapes (`secondary_manager`,
+    # `label_resolver`, `cases`) the canonical parser doesn't model.
+    # The capability-specific harness at
+    # tests/conformance/harness/prompt_management.py models the new
+    # shapes; defer the cross-capability parser until that catches up.
+    "prompt-management/015-label-resolver-fallback-chain": (
+        "Label-resolver shape models live in the PM-specific capability harness"
+    ),
+    "prompt-management/016-prompt-observability-entities-propagation": (
+        "Cases shape models live in the PM-specific capability harness"
+    ),
 }
 
 
diff --git a/tests/conformance/test_prompt_management.py b/tests/conformance/test_prompt_management.py
index 51f9afc..4250bf6 100644
--- a/tests/conformance/test_prompt_management.py
+++ b/tests/conformance/test_prompt_management.py
@@ -12,12 +12,13 @@
 
 from datetime import UTC, datetime
 from pathlib import Path
-from typing import Any
+from typing import Any, cast
 
 import pytest
 import yaml
 
 from openarmature.prompts import (
+    MappingLabelResolver,
     Prompt,
     PromptError,
     PromptGroup,
@@ -25,6 +26,7 @@
     PromptNotFound,
     PromptResult,
     PromptStoreUnavailable,
+    SamplingConfig,
 )
 
 from .harness.loader import CONFORMANCE_ROOT
@@ -32,6 +34,7 @@
     FixtureBackendSpec,
     FixtureCall,
     FixtureExpectedResultEquivalence,
+    FixtureManagerSpec,
     PromptManagementFixture,
 )
 
@@ -71,6 +74,18 @@ def __init__(self, spec: FixtureBackendSpec) -> None:
         self._prompts: dict[tuple[str, str], Prompt] = {}
         now = datetime.now(UTC)
         for ps in spec.prompts:
+            # Sampling sub-record (fixture 013): flatten the fixture's
+            # `extras:` sub-block into top-level kwargs so caller-
+            # supplied vendor knobs land in SamplingConfig's extras-
+            # allow bag rather than as a literal `extras` key.
+            sampling: SamplingConfig | None = None
+            if ps.sampling is not None:
+                flat: dict[str, Any] = {k: v for k, v in ps.sampling.items() if k != "extras"}
+                extras = ps.sampling.get("extras")
+                if isinstance(extras, dict):
+                    for k, v in cast(dict[str, Any], extras).items():
+                        flat.setdefault(k, v)
+                sampling = SamplingConfig(**flat)
             self._prompts[(ps.name, ps.label)] = Prompt(
                 name=ps.name,
                 version=ps.version,
@@ -78,6 +93,10 @@ def __init__(self, spec: FixtureBackendSpec) -> None:
                 template=ps.template,
                 template_hash=ps.template_hash,
                 fetched_at=now,
+                sampling=sampling,
+                observability_entities=(
+                    dict(ps.observability_entities) if ps.observability_entities is not None else None
+                ),
             )
         self.call_count = 0
 
@@ -125,10 +144,14 @@ async def _run_call(
             members = [captures[ref] for ref in call.members_refs]
             return PromptGroup(group_name=call.group_name, members=members), None
 
-        if isinstance(target, str) and target == "manager":
+        if isinstance(target, str) and target in {"manager", "secondary_manager", "tertiary_manager"}:
+            # All three manager targets dispatch to the currently-active
+            # manager in the per-pair iteration loop. The naming exists
+            # only to keep fixture YAML self-describing under a
+            # multi-manager shape (e.g., fixture 015).
             assert manager is not None
             if operation == "fetch":
-                assert call.name is not None and call.label is not None
+                assert call.name is not None
                 return await manager.fetch(call.name, call.label), None
             if operation == "render":
                 # Either inline fetched_prompt or a ref to a capture.
@@ -140,7 +163,7 @@ async def _run_call(
                     prompt = fetched
                 return manager.render(prompt, call.variables or {}), None
             if operation == "get":
-                assert call.name is not None and call.label is not None
+                assert call.name is not None
                 return await manager.get(call.name, call.label, call.variables or {}), None
             raise AssertionError(f"unsupported manager operation: {operation!r}")
 
@@ -271,29 +294,137 @@ def _assert_result_equivalence(
 # ---------------------------------------------------------------------------
 
 
+def _build_manager(
+    spec: FixtureManagerSpec,
+    backends_map: dict[str, MockPromptBackend],
+    resolvers_map: dict[str, MappingLabelResolver],
+) -> PromptManager:
+    ordered = [backends_map[name] for name in spec.backends]
+    resolver: MappingLabelResolver | None = None
+    if spec.label_resolver_ref is not None:
+        if spec.label_resolver_ref not in resolvers_map:
+            raise AssertionError(f"unknown label_resolver_ref: {spec.label_resolver_ref!r}")
+        resolver = resolvers_map[spec.label_resolver_ref]
+    return PromptManager(*ordered, label_resolver=resolver)
+
+
+def _assert_capture_attrs(capture_name: str, actual: Any, expected: dict[str, Any]) -> None:
+    # Walk fixture-supplied expected attributes against a captured
+    # Prompt / PromptResult. Handles sampling (flatten extras + dump),
+    # *_absent flags, and dict-typed observability_entities.
+    for key, expected_value in expected.items():
+        if key == "sampling_absent":
+            if expected_value:
+                actual_sampling = getattr(actual, "sampling", None)
+                assert actual_sampling is None, (
+                    f"{capture_name}.sampling: expected absent, got {actual_sampling!r}"
+                )
+            continue
+        if key == "observability_entities_absent":
+            if expected_value:
+                actual_oe = getattr(actual, "observability_entities", None)
+                assert actual_oe is None, (
+                    f"{capture_name}.observability_entities: expected absent, got {actual_oe!r}"
+                )
+            continue
+        if key == "sampling":
+            actual_sampling = getattr(actual, "sampling", None)
+            assert actual_sampling is not None, f"{capture_name}.sampling: expected present, got None"
+            # Spec sidecar convention nests vendor extras under
+            # `extras:`; SamplingConfig.model_dump() flattens them to
+            # the top level (extra="allow"). Normalize the expected
+            # shape before equality compare.
+            expected_flat = {k: v for k, v in expected_value.items() if k != "extras"}
+            if isinstance(expected_value.get("extras"), dict):
+                expected_flat.update(expected_value["extras"])
+            actual_flat = actual_sampling.model_dump(exclude_none=True)
+            assert actual_flat == expected_flat, (
+                f"{capture_name}.sampling: expected {expected_flat!r}, got {actual_flat!r}"
+            )
+            continue
+        actual_value = getattr(actual, key)
+        assert actual_value == expected_value, (
+            f"{capture_name}.{key}: expected {expected_value!r}, got {actual_value!r}"
+        )
+
+
 @pytest.mark.parametrize("fixture_path", _fixture_paths(), ids=_fixture_id)
 async def test_prompt_management_fixture(fixture_path: Path) -> None:
     raw: Any = yaml.safe_load(fixture_path.read_text())
     fixture = PromptManagementFixture.model_validate(raw)
 
     backends: dict[str, MockPromptBackend] = {spec.name: MockPromptBackend(spec) for spec in fixture.backends}
-    manager: PromptManager | None = None
-    if fixture.manager is not None:
-        ordered = [backends[name] for name in fixture.manager.backends]
-        manager = PromptManager(*ordered)
+
+    # Named LabelResolvers; managers reference them by their fixture-
+    # top-level key name via ``label_resolver_ref``.
+    resolvers_map: dict[str, MappingLabelResolver] = {}
+    if fixture.label_resolver is not None:
+        resolvers_map["label_resolver"] = MappingLabelResolver(fixture.label_resolver.mapping)
+    if fixture.tertiary_label_resolver is not None:
+        resolvers_map["tertiary_label_resolver"] = MappingLabelResolver(
+            fixture.tertiary_label_resolver.mapping
+        )
 
     captures: dict[str, Any] = {}
-    for call in fixture.calls:
-        result, raised = await _run_call(call, backends, manager, captures)
-        _assert_per_call(call, result, raised, backends)
-        if call.capture_as is not None and raised is None:
-            captures[call.capture_as] = result
+
+    # Fixture 015 introduces secondary/tertiary manager+calls slots
+    # that run independently with shared backends. Run each (manager,
+    # calls) pair in declaration order; the captures dict is shared
+    # so cross-manager assertions on capture names still work.
+    manager_pairs = [
+        (fixture.manager, fixture.calls),
+        (fixture.secondary_manager, fixture.secondary_calls),
+        (fixture.tertiary_manager, fixture.tertiary_calls),
+    ]
+    for manager_spec, manager_calls in manager_pairs:
+        if manager_spec is None:
+            continue
+        manager = _build_manager(manager_spec, backends, resolvers_map)
+        for call in manager_calls:
+            result, raised = await _run_call(call, backends, manager, captures)
+            _assert_per_call(call, result, raised, backends)
+            if call.capture_as is not None and raised is None:
+                captures[call.capture_as] = result
+
+    # Cases-form fixtures (016) split into independent sub-cases that
+    # share the backends but use their own per-case manager + calls.
+    cases = raw.get("cases")
+    if cases:
+        for case in cases:
+            # Strip case-level metadata (``name``, ``description``)
+            # that PromptManagementFixture doesn't model; the runner
+            # doesn't need them.
+            case_payload = {
+                **{k: v for k, v in raw.items() if k not in {"cases", "expected"}},
+                **{k: v for k, v in case.items() if k not in {"name", "description"}},
+            }
+            case_fixture = PromptManagementFixture.model_validate(case_payload)
+            case_manager_pairs = [
+                (case_fixture.manager, case_fixture.calls),
+                (case_fixture.secondary_manager, case_fixture.secondary_calls),
+                (case_fixture.tertiary_manager, case_fixture.tertiary_calls),
+            ]
+            for manager_spec, manager_calls in case_manager_pairs:
+                if manager_spec is None:
+                    continue
+                manager = _build_manager(manager_spec, backends, resolvers_map)
+                for call in manager_calls:
+                    result, raised = await _run_call(call, backends, manager, captures)
+                    _assert_per_call(call, result, raised, backends)
+                    if call.capture_as is not None and raised is None:
+                        captures[call.capture_as] = result
+            if case_fixture.expected is not None:
+                _apply_top_level_expected(case_fixture.expected, captures)
 
     if fixture.expected is None:
         return
 
-    if fixture.expected.prompt_group is not None:
-        pg_expected = fixture.expected.prompt_group
+    _apply_top_level_expected(fixture.expected, captures)
+
+
+def _apply_top_level_expected(expected: Any, captures: dict[str, Any]) -> None:
+    if expected.prompt_group is not None:
+        pg_expected = expected.prompt_group
         group = captures[pg_expected.of]
         assert isinstance(group, PromptGroup)
         assert group.group_name == pg_expected.group_name
@@ -301,18 +432,31 @@ async def test_prompt_management_fixture(fixture_path: Path) -> None:
         if pg_expected.member_names is not None:
             assert [m.name for m in group.members] == pg_expected.member_names
 
-    if fixture.expected.result_equivalence is not None:
-        _assert_result_equivalence(fixture.expected.result_equivalence, captures)
-    for eq in fixture.expected.result_equivalences:
+    if expected.result_equivalence is not None:
+        _assert_result_equivalence(expected.result_equivalence, captures)
+    for eq in expected.result_equivalences:
         _assert_result_equivalence(eq, captures)
 
-    for pair in fixture.expected.rendered_hash_equal:
+    for pair in expected.rendered_hash_equal:
         a, b = pair
         assert captures[a].rendered_hash == captures[b].rendered_hash, (
             f"rendered_hash differs between {a!r} and {b!r} but fixture expects equal"
         )
-    for pair in fixture.expected.rendered_hash_different:
+    for pair in expected.rendered_hash_different:
         a, b = pair
         assert captures[a].rendered_hash != captures[b].rendered_hash, (
             f"rendered_hash matches between {a!r} and {b!r} but fixture expects different"
         )
+
+    # Fixtures 013-016 use capture-name-keyed top-level expected
+    # entries instead of the per-call expected:{prompt|prompt_result}
+    # shape. Walk those via pydantic's model_extra (FixtureExpectedTopLevel
+    # is permissive) and assert each capture matches the supplied
+    # attribute dict.
+    model_extra: dict[str, Any] = expected.model_extra or {}
+    for capture_name, expected_attrs in model_extra.items():
+        if not isinstance(expected_attrs, dict):
+            continue
+        if capture_name not in captures:
+            raise AssertionError(f"expected capture {capture_name!r} not found in captures")
+        _assert_capture_attrs(capture_name, captures[capture_name], cast(dict[str, Any], expected_attrs))
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
index 5a5b9bf..06716f6 100644
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@@ -9,7 +9,7 @@
 
 def test_package_versions() -> None:
     assert openarmature.__version__ == "0.9.0"
-    assert openarmature.__spec_version__ == "0.24.0"
+    assert openarmature.__spec_version__ == "0.26.0"
 
 
 def test_spec_version_matches_pyproject() -> None:
diff --git a/tests/unit/test_prompts.py b/tests/unit/test_prompts.py
index f9ccfb7..3b33517 100644
--- a/tests/unit/test_prompts.py
+++ b/tests/unit/test_prompts.py
@@ -420,3 +420,126 @@ async def fetch(self, name: str, label: str = "production") -> Prompt:
     assert isinstance(result.messages[0], UserMessage)
     msg_content: Any = result.messages[0].content
     assert msg_content == "Hello, Alice!"
+
+
+# Wish 5 (proposal 0033 python-side ergonomic): the StrictUndefined
+# default matches spec §8 (was §7), but callers MAY opt out by passing
+# a different Jinja Undefined subclass at PromptManager construction.
+
+
+def test_manager_jinja_undefined_opt_out_renders_empty_for_missing_var() -> None:
+    import jinja2
+
+    from openarmature.prompts import PromptManager
+
+    prompt = Prompt(
+        name="opt_out",
+        version="v1",
+        label="production",
+        template="Hello, {{ user }}!",
+        template_hash="sha256:opt-out",
+        fetched_at=datetime.now(UTC),
+    )
+    manager = PromptManager(_StubBackend(prompt), jinja_undefined=jinja2.Undefined)
+    result = manager.render(prompt, {})  # `user` deliberately omitted
+    msg_content: Any = result.messages[0].content
+    # Default Jinja Undefined renders to empty string; StrictUndefined
+    # would have raised PromptRenderError here.
+    assert msg_content == "Hello, !"
+
+
+# Wish 1 (proposal 0033 python-side ergonomic): FilesystemPromptBackend
+# accepts a ``layout`` constructor flag. ``per-label`` (default) keeps
+# v0.5.0 behavior; ``flat`` reads `<root>/<name>.j2` ignoring label and
+# returns the requested label on the resulting Prompt verbatim.
+
+
+async def test_filesystem_backend_flat_layout(tmp_path: Path) -> None:
+    (tmp_path / "greet.j2").write_text("Hello, {{ user }}!", encoding="utf-8")
+    backend = FilesystemPromptBackend(tmp_path, layout="flat")
+
+    # Both label requests return the same template; .label echoes the request.
+    p_prod = await backend.fetch("greet", "production")
+    p_stage = await backend.fetch("greet", "staging")
+
+    assert p_prod.template == p_stage.template == "Hello, {{ user }}!"
+    assert p_prod.label == "production"
+    assert p_stage.label == "staging"
+
+
+# Spec §5 informative filesystem-sidecar convention. The
+# FilesystemPromptBackend opts in via ``sampling_source``.
+
+
+async def test_filesystem_backend_per_prompt_sidecar(tmp_path: Path) -> None:
+    (tmp_path / "production").mkdir()
+    (tmp_path / "production" / "summarize.j2").write_text("Summarize: {{ text }}", encoding="utf-8")
+    (tmp_path / "production" / "summarize.config.json").write_text(
+        '{"temperature": 0.0, "max_tokens": 256, "extras": {"repetition_penalty": 1.05}}',
+        encoding="utf-8",
+    )
+
+    backend = FilesystemPromptBackend(tmp_path, sampling_source="per-prompt-sidecar")
+    prompt = await backend.fetch("summarize", "production")
+
+    assert prompt.sampling is not None
+    assert prompt.sampling.temperature == 0.0
+    assert prompt.sampling.max_tokens == 256
+    # Vendor extra rides through the extras-allow bag.
+    assert (prompt.sampling.model_extra or {}).get("repetition_penalty") == 1.05
+
+
+async def test_filesystem_backend_unified_sampling(tmp_path: Path) -> None:
+    (tmp_path / "production").mkdir()
+    (tmp_path / "production" / "classify.j2").write_text("Classify: {{ topic }}", encoding="utf-8")
+    (tmp_path / "production" / "extract.j2").write_text("Extract: {{ text }}", encoding="utf-8")
+    (tmp_path / "prompt_configs.json").write_text(
+        '{"classify": {"temperature": 0.0}, "extract": {"temperature": 0.7, "max_tokens": 1024}}',
+        encoding="utf-8",
+    )
+
+    backend = FilesystemPromptBackend(tmp_path, sampling_source="unified")
+
+    classify = await backend.fetch("classify", "production")
+    extract = await backend.fetch("extract", "production")
+
+    assert classify.sampling is not None
+    assert classify.sampling.temperature == 0.0
+    assert extract.sampling is not None
+    assert extract.sampling.max_tokens == 1024
+
+
+# LabelResolver fallback chain — covered by fixture 015 end-to-end,
+# but the resolver class is python-only and a focused unit test
+# documents the precedence rules in code.
+
+
+def test_mapping_label_resolver_per_name_override() -> None:
+    from openarmature.prompts import MappingLabelResolver
+
+    resolver = MappingLabelResolver({"default": "production", "experimental": "staging"})
+    assert resolver.resolve("experimental") == "staging"
+
+
+def test_mapping_label_resolver_default_override() -> None:
+    from openarmature.prompts import MappingLabelResolver
+
+    resolver = MappingLabelResolver({"default": "canary", "other": "staging"})
+    assert resolver.resolve("anything-not-listed") == "canary"
+
+
+def test_mapping_label_resolver_spec_fallback_when_no_default() -> None:
+    from openarmature.prompts import MappingLabelResolver
+
+    resolver = MappingLabelResolver({"experimental": "staging"})
+    assert resolver.resolve("anything-not-listed") == "production"
+
+
+class _StubBackend:
+    """Minimal PromptBackend that returns a single canned prompt."""
+
+    def __init__(self, prompt: Prompt) -> None:
+        self._prompt = prompt
+
+    async def fetch(self, name: str, label: str = "production") -> Prompt:
+        return self._prompt