diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index f956a4f..ce3f80a 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -5,17 +5,11 @@ on: branches: [develop, main] paths: - "src/**" - - "resources/schemas/**" - - "pyproject.toml" - - "poetry.lock" - "Makefile" - ".github/workflows/code-quality.yaml" pull_request: paths: - "src/**" - - "resources/schemas/**" - - "pyproject.toml" - - "poetry.lock" - "Makefile" - ".github/workflows/code-quality.yaml" @@ -34,7 +28,7 @@ jobs: - name: Read Python version from pyproject.toml id: python-version - run: echo "version=$(grep -m1 'python = ' pyproject.toml | grep -oP '\d+\.\d+' | head -1)" >> $GITHUB_OUTPUT + run: echo "version=$(grep -m1 'python = ' src/pyproject.toml | grep -oP '\d+\.\d+' | head -1)" >> $GITHUB_OUTPUT - name: Set up Python uses: actions/setup-python@v6 @@ -48,16 +42,16 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pypoetry - key: poetry-${{ runner.os }}-${{ hashFiles('poetry.lock') }} + key: poetry-${{ runner.os }}-${{ hashFiles('src/poetry.lock') }} - name: Install dependencies run: make install - name: Lint LinkML schemas - run: poetry run linkml lint --ignore-warnings resources/schemas/ + run: make lint-schema - name: Run ruff linter - run: poetry run ruff check src/ + run: make lint - name: Generate models and docs run: make -B all @@ -70,7 +64,7 @@ jobs: OUT_OF_SYNC="" # src/ and JSON schemas are deterministic — use git diff directly - for f in $(git diff --name-only src/ resources/schemas/*json); do + for f in $(git diff --name-only src/erspec/ src/resources/schemas/*json); do OUT_OF_SYNC="$OUT_OF_SYNC $f" done diff --git a/CHANGELOG.md b/CHANGELOG.md index 909ccd6..fe22f6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [unreleased] +## [1.0.0-rc.1] - 2026-04-21 +### Added +* data model: `context` field on `EntityMention` — carries the surrounding textual context for an entity mention + +### Changed +* Repository layout restructured: schemas, scripts, templates, and build configuration consolidated under `src/`; root-level `resources/`, `Makefile`, `pyproject.toml` and `poetry.lock` moved into `src/` +* Schema files renamed to drop the version suffix (e.g. `core-schema-v0.1.0.yaml` → `core-schema.yaml`); internal schema version bumped to 1.0.0 +* Makefile: build logic consolidated into a single root-level `Makefile`; CI workflows updated to match revised `make` targets and paths +* Gherkin test suite overhauled: new feature files added for outcome integration, request publishing, and detailed resolution cases; outdated `ere-ers-common-cases` and `ere-ers-full-rebuilds` feature files removed; unhappy-path scenarios significantly extended +* Schema docs and worked examples updated to reflect the current schema structure and revised repository paths + +### Fixed +* CI: PR comment step is now skipped on cross-fork pull requests to prevent permission failures + ## [0.2.0-rc.2] - 2026-02-20 ### Added * CI: GitHub Actions quality-check workflow (`.github/workflows/code-quality.yaml`) — LinkML schema linting, `ruff` Python linting, model/docs generation with sync verification, and PR comment posting ([ERS1-103]) @@ -49,6 +63,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [0.1.0-rc.1] - 2025-12-22 * Initial release, fulfilling Project's Delivery 1 (ERE Technical Contract and related code). -* [LinkML schema](resources/schema/ers-core_v0.1.0.yaml) to specify the interaction with the ERE service. +* [LinkML schema](src/resources/schemas/core-schema.yaml) to specify the interaction with the ERE service. * Includes auto-generated [navigable documents](docs/schema/README.md), a [class diagram](docs/schema/README.md) and a [sequence diagram](docs/ere-interface-seq-diag.png). * [Gherkin Tests](test/features/), based on [collected test data](test/test_data/), possible [test cases](test/test_data/analysis/README.md) diff --git a/Makefile b/Makefile index e87a99b..08f0831 100644 --- a/Makefile +++ b/Makefile @@ -18,30 +18,28 @@ define log_done endef # ─── Paths & Naming ───────────────────────────────────────────────────────────── +# Root-relative paths — used by Make for dependency tracking only. +# Recipes use src/-relative paths via `cd src &&`. -SCHEMAS_DIR = resources/schemas -SCRIPTS_DIR = resources/scripts -TEMPLATES_DIR = resources/templates +SCHEMAS_DIR = src/resources/schemas +SCRIPTS_DIR = src/resources/scripts +TEMPLATES_DIR = src/resources/templates MODELS_DIR = src/erspec/models -# Schema identifiers -ERE_SCHEMA_NAME = ere-service-schema -CORE_SCHEMA_NAME = core-schema -JSON_SCHEMA_NAME = er-schema -SCHEMA_VERSION = 0.1.0 +ERE_SCHEMA_NAME = ere-service-schema +CORE_SCHEMA_NAME = core-schema +JSON_SCHEMA_NAME = er-schema -# Source schemas (core is imported by ere, so it is a dependency) -ERE_SCHEMA_PATH = $(SCHEMAS_DIR)/$(ERE_SCHEMA_NAME)-v$(SCHEMA_VERSION).yaml -CORE_SCHEMA_PATH = $(SCHEMAS_DIR)/$(CORE_SCHEMA_NAME)-v$(SCHEMA_VERSION).yaml +ERE_SCHEMA_PATH = $(SCHEMAS_DIR)/$(ERE_SCHEMA_NAME).yaml +CORE_SCHEMA_PATH = $(SCHEMAS_DIR)/$(CORE_SCHEMA_NAME).yaml ALL_SCHEMA_SOURCES = $(ERE_SCHEMA_PATH) $(CORE_SCHEMA_PATH) -# Generated artefacts -PYTHON_ERE_MODEL = $(MODELS_DIR)/ere.py -PYTHON_CORE_MODEL = $(MODELS_DIR)/core.py -JSON_SCHEMA_PATH = $(SCHEMAS_DIR)/$(JSON_SCHEMA_NAME)-v$(SCHEMA_VERSION).json +PYTHON_ERE_MODEL = $(MODELS_DIR)/ere.py +PYTHON_CORE_MODEL = $(MODELS_DIR)/core.py +JSON_SCHEMA_PATH = $(SCHEMAS_DIR)/$(JSON_SCHEMA_NAME).json -MODEL_DOCS_DIR = docs/schema -MODEL_DOCS_README = $(MODEL_DOCS_DIR)/README.md +MODEL_DOCS_DIR = docs/schema +MODEL_DOCS_README = $(MODEL_DOCS_DIR)/README.md # ─── Help ──────────────────────────────────────────────────────────────────────── @@ -58,12 +56,11 @@ help: }' $(MAKEFILE_LIST) # ─── Setup ─────────────────────────────────────────────────────────────────────── -# Note: Python, Poetry and Make are pre-requisites and are not handled here. .PHONY: install install: ## Install dependencies using Poetry $(call log_progress,Installing dependencies using Poetry...) - @poetry sync + @cd src && poetry sync $(call log_done,Dependencies installed.) # ─── Quality ───────────────────────────────────────────────────────────────────── @@ -71,13 +68,14 @@ install: ## Install dependencies using Poetry .PHONY: lint lint: ## Run ruff linter on source code $(call log_progress,Running ruff checks...) - @poetry run ruff check src/ + @cd src && poetry run ruff check erspec/ $(call log_done,Ruff checks completed.) .PHONY: lint-schema lint-schema: ## Run LinkML linter on YAML schemas $(call log_progress,Linting LinkML schemas...) - @poetry run linkml lint --ignore-warnings $(SCHEMAS_DIR)/ + @cd src && poetry run linkml lint --ignore-warnings resources/schemas/$(ERE_SCHEMA_NAME).yaml + @cd src && poetry run linkml lint --ignore-warnings resources/schemas/$(CORE_SCHEMA_NAME).yaml $(call log_done,LinkML schema lint completed.) # ─── Aggregate targets ────────────────────────────────────────────────────────── @@ -94,26 +92,26 @@ generate-models: $(PYTHON_ERE_MODEL) $(JSON_SCHEMA_PATH) ## Generate Python mode generate-doc: $(MODEL_DOCS_README) ## Generate schema documentation and diagrams $(call log_done,Documentation generated.) -# ─── Python Pydantic models (split generation: ere + core) ────────────────────── +# ─── Python Pydantic models ────────────────────────────────────────────────────── $(PYTHON_ERE_MODEL) $(PYTHON_CORE_MODEL) &: $(ALL_SCHEMA_SOURCES) $(call log_progress,Generating Python models...) @mkdir -p $(MODELS_DIR) - @poetry run python $(SCRIPTS_DIR)/generate_models.py \ - --schema $(ERE_SCHEMA_PATH) \ - --output $(PYTHON_ERE_MODEL) \ - --template-dir $(TEMPLATES_DIR) \ - --schemas-dir $(SCHEMAS_DIR) - @poetry run ruff check --fix $(MODELS_DIR) + @cd src && poetry run python resources/scripts/generate_models.py \ + --schema resources/schemas/$(ERE_SCHEMA_NAME).yaml \ + --output erspec/models/ere.py \ + --template-dir resources/templates \ + --schemas-dir resources/schemas + @cd src && poetry run ruff check --fix erspec/models/ $(call log_done,Python models generated.) # ─── JSON Schema ───────────────────────────────────────────────────────────────── -# The ERE schema imports core, so `linkml generate json-schema` will include both. $(JSON_SCHEMA_PATH): $(ALL_SCHEMA_SOURCES) $(call log_progress,Generating JSON Schema...) @mkdir -p $(dir $(JSON_SCHEMA_PATH)) - @poetry run linkml generate json-schema --indent 2 $(ERE_SCHEMA_PATH) > $(JSON_SCHEMA_PATH) + @cd src && poetry run linkml generate json-schema --indent 2 \ + resources/schemas/$(ERE_SCHEMA_NAME).yaml > resources/schemas/$(JSON_SCHEMA_NAME).json $(call log_done,JSON Schema generated -> $(JSON_SCHEMA_PATH)) # ─── Documentation & PlantUML diagrams ────────────────────────────────────────── @@ -121,14 +119,10 @@ $(JSON_SCHEMA_PATH): $(ALL_SCHEMA_SOURCES) $(MODEL_DOCS_README): $(ALL_SCHEMA_SOURCES) $(call log_progress,Generating schema documentation...) @mkdir -p $(MODEL_DOCS_DIR) -# Index is named README.md so GitHub renders it when browsing the directory. - @poetry run linkml generate doc $(ERE_SCHEMA_PATH) \ - -d $(MODEL_DOCS_DIR) --index-name README -# TODO: Prefer PNG once upstream is fixed (https://github.com/linkml/linkml/issues/3009) -# TODO: --no-mergeimports doesn't work (https://github.com/linkml/linkml/issues/1296), so, for -# the moment, we include core imported classes in the diagram. - @poetry run linkml generate plantuml \ - -d $(MODEL_DOCS_DIR) --format svg $(ERE_SCHEMA_PATH) + @cd src && poetry run linkml generate doc resources/schemas/$(ERE_SCHEMA_NAME).yaml \ + -d ../docs/schema --index-name README + @cd src && poetry run linkml generate plantuml \ + -d ../docs/schema --format svg resources/schemas/$(ERE_SCHEMA_NAME).yaml $(call log_done,Documentation generated -> $(MODEL_DOCS_DIR)) # ─── Clean ─────────────────────────────────────────────────────────────────────── diff --git a/README.md b/README.md index e931c94..b84de4d 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,7 @@ # Entity Resolution Specifications -Formal software contract, shared data models, sample messages, and compliance tests required for integrating new Entity Resolution Engines (EREs) into the system. +Formal software contract, shared data models, sample messages, and compliance tests required for integrating new Entity Resolution Engines (EREs) into Entity Resolution System. -> Note: Active development continues in the OP-TED repository: https://github.com/OP-TED/entity-resolution-spec ## Requirements @@ -39,9 +38,15 @@ make install This will install the necessary user dependencies in a Poetry-managed virtual environment. +## Repository Layout + +This repository follows the repository owner's requirements for project structure, which place the self-contained Python project (source code, dependencies, and build scripts) under `src/`. This layout is required for the repository owner's deployment tooling to locate and operate the project correctly. + +The canonical `Makefile` lives at the repo root and runs all targets from there. All `poetry` commands are directed to the project in `src/` via `poetry --directory src`. + ## Development -This project uses principles of model-driven development (MDD) and domain-driven design (DDD). The core models are defined in the `resources/schemas` directory using [LinkML](https://linkml.io/), and the Python (Pydantic) models are generated from these specifications. +This project uses principles of model-driven development (MDD) and domain-driven design (DDD). The core models are defined in the `src/resources/schemas` directory using [LinkML](https://linkml.io/), and the Python (Pydantic) models are generated from these specifications. Generated Python models are in `src/erspec/models`. Regenerate them with: @@ -52,10 +57,9 @@ make all This regenerates both the LinkML-based models (Python, JSONSchema) and the navigable documentation. See the Makefile for more granular targets. -## Running and Testing +## Gherkin Specification -TODO: this will be added in future. Right now, this repository contains -specifications only and does not have runnable unit tests. +This repository contains Gherkin feature files under `test/features/` that serve as a formal specification of the expected behaviour of the ERE. They describe the observable contract between ERS and ERE at specification level — independent of any particular ERE implementation — and may serve as the basis for implementing acceptance tests for a conformant ERE. ## Test data diff --git a/docs/schema/about_entity_mention.md b/docs/schema/about_entity_mention.md index 881bb06..6c95c1f 100644 --- a/docs/schema/about_entity_mention.md +++ b/docs/schema/about_entity_mention.md @@ -17,8 +17,8 @@ Alias: about_entity_mention | Name | Description | Modifies Slot | | --- | --- | --- | -| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | | [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | no | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | diff --git a/docs/schema/candidates.md b/docs/schema/candidates.md index d4dce2b..4887a5e 100644 --- a/docs/schema/candidates.md +++ b/docs/schema/candidates.md @@ -17,9 +17,9 @@ Alias: candidates | Name | Description | Modifies Slot | | --- | --- | --- | -| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | | [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | | [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | no | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | diff --git a/docs/schema/created_at.md b/docs/schema/created_at.md index b40cfa7..ea2eb60 100644 --- a/docs/schema/created_at.md +++ b/docs/schema/created_at.md @@ -17,8 +17,8 @@ Alias: created_at | Name | Description | Modifies Slot | | --- | --- | --- | -| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | | [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | no | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | diff --git a/docs/schema/ere_request_id.md b/docs/schema/ere_request_id.md index ab234d0..f98e468 100644 --- a/docs/schema/ere_request_id.md +++ b/docs/schema/ere_request_id.md @@ -34,12 +34,12 @@ Alias: ere_request_id | Name | Description | Modifies Slot | | --- | --- | --- | +| [EREMessage](EREMessage.md) | Root abstraction to represent attributes common to both requests and results | no | | [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | | [EREResponse](EREResponse.md) | Root class to represent all the responses sent by the ERE | no | | [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | no | -| [EREMessage](EREMessage.md) | Root abstraction to represent attributes common to both requests and results | no | -| [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | | [ERERequest](ERERequest.md) | Root class to represent all the requests sent to the ERE | no | +| [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | diff --git a/docs/schema/id.md b/docs/schema/id.md index e28a08f..188feea 100644 --- a/docs/schema/id.md +++ b/docs/schema/id.md @@ -17,8 +17,8 @@ Alias: id | Name | Description | Modifies Slot | | --- | --- | --- | -| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | | [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | no | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | diff --git a/docs/schema/source_id.md b/docs/schema/source_id.md index b500ee3..8fe0c66 100644 --- a/docs/schema/source_id.md +++ b/docs/schema/source_id.md @@ -17,8 +17,8 @@ Alias: source_id | Name | Description | Modifies Slot | | --- | --- | --- | -| [LookupState](LookupState.md) | Tracks the resolution state for entity mentions from a particular source | no | | [EntityMentionIdentifier](EntityMentionIdentifier.md) | A container that groups the attributes needed to identify an entity mention i... | no | +| [LookupState](LookupState.md) | Tracks the resolution state for entity mentions from a particular source | no | diff --git a/docs/schema/timestamp.md b/docs/schema/timestamp.md index fba1838..dbd9abf 100644 --- a/docs/schema/timestamp.md +++ b/docs/schema/timestamp.md @@ -24,12 +24,12 @@ Alias: timestamp | Name | Description | Modifies Slot | | --- | --- | --- | +| [EREMessage](EREMessage.md) | Root abstraction to represent attributes common to both requests and results | no | | [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | | [EREResponse](EREResponse.md) | Root class to represent all the responses sent by the ERE | no | | [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | no | -| [EREMessage](EREMessage.md) | Root abstraction to represent attributes common to both requests and results | no | -| [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | | [ERERequest](ERERequest.md) | Root class to represent all the requests sent to the ERE | no | +| [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | diff --git a/docs/schema/type.md b/docs/schema/type.md index 6da5dcd..9da5766 100644 --- a/docs/schema/type.md +++ b/docs/schema/type.md @@ -36,12 +36,12 @@ Alias: type | Name | Description | Modifies Slot | | --- | --- | --- | +| [EREMessage](EREMessage.md) | Root abstraction to represent attributes common to both requests and results | no | | [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | | [EREResponse](EREResponse.md) | Root class to represent all the responses sent by the ERE | no | | [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | no | -| [EREMessage](EREMessage.md) | Root abstraction to represent attributes common to both requests and results | no | -| [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | | [ERERequest](ERERequest.md) | Root class to represent all the requests sent to the ERE | no | +| [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | diff --git a/src/VERSION b/src/VERSION new file mode 100644 index 0000000..3eefcb9 --- /dev/null +++ b/src/VERSION @@ -0,0 +1 @@ +1.0.0 diff --git a/src/erspec/models/core.py b/src/erspec/models/core.py index 785d538..7fda2c1 100644 --- a/src/erspec/models/core.py +++ b/src/erspec/models/core.py @@ -16,7 +16,7 @@ metamodel_version = "None" -version = "0.1.0" +version = "1.0.0" class UserActionType(str, Enum): diff --git a/src/erspec/models/ere.py b/src/erspec/models/ere.py index f718aca..d76a95f 100644 --- a/src/erspec/models/ere.py +++ b/src/erspec/models/ere.py @@ -23,7 +23,7 @@ metamodel_version = "None" -version = "0.1.0" +version = "1.0.0" class UserActionType(str, Enum): diff --git a/poetry.lock b/src/poetry.lock similarity index 100% rename from poetry.lock rename to src/poetry.lock diff --git a/pyproject.toml b/src/pyproject.toml similarity index 89% rename from pyproject.toml rename to src/pyproject.toml index 82f497a..b2ddedf 100644 --- a/pyproject.toml +++ b/src/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ers-spec" -version = "0.3.0" +version = "1.0.0" description = """ The core components for the Entity Resolution System (ERS) components. @@ -11,7 +11,7 @@ authors = [ {name = "Meaningfy", email = "hi@meaningfy.ws"} ] -readme = "README.md" +readme = "../README.md" requires-python = ">=3.12" [tool.poetry.dependencies] @@ -30,7 +30,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] packages = [ - { include = "erspec", from = "src" } + { include = "erspec" } ] [tool.ruff] diff --git a/resources/schemas/core-schema-v0.1.0.yaml b/src/resources/schemas/core-schema.yaml similarity index 99% rename from resources/schemas/core-schema-v0.1.0.yaml rename to src/resources/schemas/core-schema.yaml index f069975..a52fba6 100644 --- a/resources/schemas/core-schema-v0.1.0.yaml +++ b/src/resources/schemas/core-schema.yaml @@ -1,7 +1,7 @@ id: https://data.europa.eu/ers/schema/ers name: coreSchema description: ERS domain layer models for entity resolution -version: 0.1.0 +version: 1.0.0 imports: - linkml:types prefixes: diff --git a/resources/schemas/er-schema-v0.1.0.json b/src/resources/schemas/er-schema.json similarity index 99% rename from resources/schemas/er-schema-v0.1.0.json rename to src/resources/schemas/er-schema.json index 4fbbaaa..aea3498 100644 --- a/resources/schemas/er-schema-v0.1.0.json +++ b/src/resources/schemas/er-schema.json @@ -420,6 +420,6 @@ "metamodel_version": "1.7.0", "title": "ereServiceSchema", "type": "object", - "version": "0.1.0" + "version": "1.0.0" } diff --git a/resources/schemas/ere-service-schema-v0.1.0.yaml b/src/resources/schemas/ere-service-schema.yaml similarity index 99% rename from resources/schemas/ere-service-schema-v0.1.0.yaml rename to src/resources/schemas/ere-service-schema.yaml index 7c18438..7a61db5 100644 --- a/resources/schemas/ere-service-schema-v0.1.0.yaml +++ b/src/resources/schemas/ere-service-schema.yaml @@ -1,10 +1,10 @@ id: https://data.europa.eu/ers/schema/ere name: ereServiceSchema description: A LinkML schema for the ERS/ERE Service -version: 0.1.0 +version: 1.0.0 imports: - linkml:types - - ./core-schema-v0.1.0 + - ./core-schema prefixes: linkml: https://w3id.org/linkml/ ere: https://data.europa.eu/ers/schema/ere/ diff --git a/resources/scripts/generate_models.py b/src/resources/scripts/generate_models.py similarity index 98% rename from resources/scripts/generate_models.py rename to src/resources/scripts/generate_models.py index 9ab8df6..7478095 100644 --- a/resources/scripts/generate_models.py +++ b/src/resources/scripts/generate_models.py @@ -27,7 +27,7 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: "--schema", required=True, type=Path, - help="Path to the top-level LinkML YAML schema (e.g. resources/schemas/ere-service-schema-v0.1.0.yaml).", + help="Path to the top-level LinkML YAML schema (e.g. resources/schemas/ere-service-schema.yaml).", ) parser.add_argument( "--output", diff --git a/resources/scripts/linkml-classes.py b/src/resources/scripts/linkml-classes.py similarity index 100% rename from resources/scripts/linkml-classes.py rename to src/resources/scripts/linkml-classes.py diff --git a/resources/templates/base_model.py.jinja b/src/resources/templates/base_model.py.jinja similarity index 100% rename from resources/templates/base_model.py.jinja rename to src/resources/templates/base_model.py.jinja diff --git a/resources/templates/class.py.jinja b/src/resources/templates/class.py.jinja similarity index 100% rename from resources/templates/class.py.jinja rename to src/resources/templates/class.py.jinja diff --git a/resources/templates/imports.py.jinja b/src/resources/templates/imports.py.jinja similarity index 100% rename from resources/templates/imports.py.jinja rename to src/resources/templates/imports.py.jinja diff --git a/resources/templates/module.py.jinja b/src/resources/templates/module.py.jinja similarity index 100% rename from resources/templates/module.py.jinja rename to src/resources/templates/module.py.jinja diff --git a/test/features/ere-ers-common-cases.feature b/test/features/ere-ers-common-cases.feature deleted file mode 100644 index 43156f4..0000000 --- a/test/features/ere-ers-common-cases.feature +++ /dev/null @@ -1,59 +0,0 @@ -Feature: ERE/ERS common case interactions - - Note that in all the tests, the exact meaning of "known/unknown entity" depends on the ERE implementation, - e.g., it has already seen the entity in a previous request, or it is a test ERE, with a pre-loaded - set of canonical entities. - -Scenario: A resolution request returns existing cluster candidate references - - A resolution request is pushed to the ERE with an entity E, which is known to be equivalent to - other canonical entities and with sufficiently high confidence scores. The ERE asynchronously returns - the cluster IDs represented by such canonical entities. - - Detailed examples: see [ere-test-cases.md](../test_data/analysis/ere-test-cases.md), - examples 1, 2, 4, 5 -Given - Entity clusters C1, C2, C3 are already known to the ERE -When - The ERS pushes a resolution request for an entity mention E into the ERE requests channel -And - The entity E is estimated to be equivalent to the canonical entities of C1, C2, C3, - with sufficiently high confidence scores -Then - The ERE asynchronously pushes an `EntityMentionResolutionResponse` object that contains: - - - Common response properties: - - `request_id`: the original request ID - - `type`: "EntityMentionResolutionResponse" - - `entity_mention_id`: an instance of `EntityMentionIdentifier` with - `source_id`, `request_id`,`entity_type` equal to the mention in the original request and - corresponding to E taken from the original request - - These are common to all responses and we won't repeat them in the following - - - - `candidates`: a list of `ClusterReference` objects such like:` - - `{ "cluster_id": , "confidence_score": }` - for i = 0..3 - - All of `score(E, Ci)` are above the confidence threshold configured in the ERE - - Returning the cluster references in score order is not required, though it's recommended. - Having 3 items in the result is arbitrary. In general, it depends on how many clusters are found and on the - ERE configuration (e.g., top N results, confidence threshold, or both). - - -Scenario: A resolution request returns a new singleton cluster reference - - A resolution request is pushed to the ERE with an unknown entity, which has no equivalents already - resolved by the ERE -Given - The ERE does not know the entity E (ie, it has no equivalent cluster for it) -When - The ERS pushes the entity E into the requests channel -Then - The ERE asynchronously pushes an entity resolution object to the responses channel that contains - a reference to one cluster only. - The response has the same format as in the previous scenario. - The cluster ID in the response is formed based on `canonicalID(requestId, sourceId, entityType)`, ie, - on the function, described in the technical contract, which computes the canonical ID from the - composite key of an entity mention. - diff --git a/test/features/ere-ers-full-rebuilds.feature b/test/features/ere-ers-full-rebuilds.feature deleted file mode 100644 index 2da0e1f..0000000 --- a/test/features/ere-ers-full-rebuilds.feature +++ /dev/null @@ -1,54 +0,0 @@ -Feature: ERE/ERS interaction upon rebuild requests - - TODO: probably to remove, since this request has been turned into an internal operation. - - The ERE replies to a full rebuild request with an acknowledgement response. - - After that, past resolutions are possibly recomputed. Since this is optional and implementation-dependent, - we don't specifically test it here, and possibly, it has to be an implementation-level test. - - However, the ERE must remember excluded clusters across rebuilds, so this is covered in a scenario below. - - TODO: per-type rebuild requests. - - -Scenario: The ERE acknowledges a rebuild request - - Upon a full rebuild request pushed to the ERE, this asynchronously replies with a response that - indicates the request has been received and the internal state has been reset. -When - The ERS pushes an instance of `FullBuildRequest` into the requests channel -Then - The ERE asynchronously pushes an instance of `FullRebuildResponse` to the rebuild responses channel. - - As for all responses, this has `ereRequestId` se to the ERE request ID, and `type` set to `FullRebuildResponse`. - - -Scenario: The ERE keeps resolving entities as usually after a rebuild request - - Note that, as in other tests, the exact meaning of "known/unknown entity" depends on the ERE implementation, see - the ERE contract section on full rebuilds. -Given - a rebuild request was pushed to the ERE and the ERE has responded with a rebuild response -When - The ERS pushes a resolution request into the ERE requests channel for the entity E -Then - The ERE asynchronously pushes an entity resolution object to the responses channel, within the - configured system timeout. The response is like - [a regular resolution response](ere-ers-common-cases.feature), possibly with a new - set of cluster references. - - -Scenario: Rejected clusters are preserved across full rebuilds - -Given - The ERE has received a resolution request for an entity E, which excludes - a set `R[]` of cluster IDs -And - The ERE has replied to the initial request -And - A `FullBuildRequest` has been pushed since the previous resolution -When - A new resolution about E is sent that contains no excluded clusters -Then - The ERE response after a full rebuild doesn't contain any cluster ID in R[]. \ No newline at end of file diff --git a/test/features/ere-ers-idempotent-resolutions.feature b/test/features/ere-ers-idempotent-resolutions.feature index 8188da2..0042762 100644 --- a/test/features/ere-ers-idempotent-resolutions.feature +++ b/test/features/ere-ers-idempotent-resolutions.feature @@ -1,6 +1,7 @@ Feature: ERE/ERS idempotent requests - This feature tests idempotent interactions with the ERE. + This feature defines the idempotency contract between ERS and ERE: what ERE can expect + when the same request arrives more than once, and what ERS guarantees about repeated submissions. Scenario Outline: Repeated regular resolution request returns the same result @@ -14,19 +15,32 @@ And No `EntityMentionResolutionResponse` has been emitted with `ere_request_id` having the ‘ereNotification:’ prefix and `entity_mention_id` pointing to E (ie, no internal updates happened, see the contract document) When - The ERS pushes again the same resolution request {Req} for the entity E into the ERE requests channel -Then + ERS pushes again the same resolution request {Req} for the entity E into the ERE request channel +Then The ERE asynchronously pushes an `EntityMentionResolutionResponse` object that contains the same set of `candidates` as in the previous response for E, including their confidence score. - Since returning the cluster references in score order is not required, such order doesn't apply to - this invariant, i.e., the second result can come in a different order for the cluster references. + Since ERS selects the canonical assignment positionally (`candidates[0]`), the ordering of candidates + must also be stable across repeated requests — the same best candidate must appear first each time. This applies to the following variants of {Req}: - | Req | - | a regular resolution request without neither `proposed_cluster_ids` nor `excluded_cluster_ids` | - | a resolution request with values for `proposed_cluster_ids` | - | a resolution request with values for `excluded_cluster_ids` | - | a resolution request with values for both `proposed_cluster_ids` and `excluded_cluster_ids` | + | Req | + | a regular resolution request without `proposed_cluster_ids` or `excluded_cluster_ids` | + | a resolution request with values for `proposed_cluster_ids` | + | a resolution request with values for `excluded_cluster_ids` | - TODO: we have to decide if to support the last case about combos. + +Scenario: ERS does not forward a second resolution request for the same triad with different content + + ERS enforces idempotency at submission time: if an entity mention triad is submitted a second + time with different content, ERS rejects the conflict before it reaches ERE. From ERE's + perspective, each triad arrives at most once with a consistent payload. + +Given + ERS has already received and forwarded a resolution request for entity mention triad T + with content C1 +When + A second submission arrives for the same triad T with different content C2 +Then + ERS rejects the second submission as a conflict + And no second resolution request for triad T is published to the ERE channel diff --git a/test/features/ere-ers-outcome-integration.feature b/test/features/ere-ers-outcome-integration.feature new file mode 100644 index 0000000..58af37f --- /dev/null +++ b/test/features/ere-ers-outcome-integration.feature @@ -0,0 +1,50 @@ +Feature: ERE/ERS outcome integration — ERE-observable guarantees + + This feature defines what guarantees ERE can rely on when it emits resolution outcomes to ERS. + It does not describe ERS-internal processing; it focuses on what ERE can observe or assume + about how ERS handles its responses. + + +Scenario: Submitting the same outcome a second time produces no error + + ERE is permitted to re-send the same clustering outcome (at-least-once delivery). ERS must + accept duplicate outcomes silently. + +Given + ERE has already emitted a valid clustering outcome for an entity mention E +When + ERE emits the same clustering outcome for E a second time +Then + ERS accepts the message without raising an error +And + The system continues to process subsequent outcomes normally + + +Scenario: An outcome for a mention that was never submitted is silently discarded + + ERE may emit outcomes that ERS has no record of (e.g. stale or cross-environment messages). + ERS discards such outcomes without producing an error response, so ERE need not handle + failure notifications for these cases. + +Given + No resolution request for entity mention E has ever been submitted to ERS +When + ERE emits a clustering outcome for E +Then + ERS discards the outcome without raising an error +And + No error message is sent back to ERE + + +Scenario: An ERE-initiated reclustering outcome is accepted + + ERE may proactively emit reclustering outcomes (with `ere_request_id` using the + `ereNotification:` prefix) without a prior ERS request. ERS must accept such outcomes. + +Given + ERE emits a reclustering outcome for an entity mention E with an `ere_request_id` + using the `ereNotification:` prefix +When + The outcome is delivered to the ERS response channel +Then + ERS accepts the outcome without raising an error and subsequently updates its internal state diff --git a/test/features/ere-ers-request-publishing.feature b/test/features/ere-ers-request-publishing.feature new file mode 100644 index 0000000..d6ecf1c --- /dev/null +++ b/test/features/ere-ers-request-publishing.feature @@ -0,0 +1,71 @@ +Feature: ERE/ERS request publishing — what ERE receives on its channel + + This feature defines what requests ERE can expect to receive on the ERE request channel, + covering the standard resolution flow and the re-evaluation flow triggered by curator actions. + + +Scenario: A standard resolution request appears on the ERE request channel after an entity mention is submitted + + When an originator submits a new entity mention to ERS, ERS publishes a resolution request + to the ERE request channel. ERE can rely on the request being correlated to the entity mention + triad and carrying the entity mention content. + + Contract reference: see src/resources/schemas/ere-service-schema.yaml (EntityMentionResolutionRequest) + +Given + A valid entity mention with a known triad (source_id, request_id, entity_type) is submitted to ERS +When + ERS processes the submission +Then + A resolution request of type `EntityMentionResolutionRequest` appears on the ERE request channel +And + The request is correlated to the submitted entity mention triad +And + The request carries the entity mention content + + +Scenario: When ERE does not respond in time, no follow-up request is published automatically + + When ERS does not receive a response from ERE within the execution window (default: 30s), + it issues a provisional draft identifier internally and returns it to the originator. + No additional request is published to the ERE request channel at this point — ERE will not be + notified of the timeout. A `resolveConsideringRecommendation` request only arrives later + if a curator subsequently submits a placement recommendation for the provisional assignment + (see Scenario 3). + +Given + A valid entity mention is submitted to ERS +And + ERE does not respond within the ERS execution window +When + The ERS execution window expires +Then + No follow-up request is published to the ERE request channel +And + ERE may later receive a `resolveConsideringRecommendation` request if a curator acts + on the provisional assignment — but not before + + +Scenario Outline: A re-evaluation request appears on the ERE request channel after a curator submits a recommendation + + When a curator submits a re-evaluation recommendation for a previously resolved entity mention, + ERS publishes a re-evaluation request to the ERE request channel. The curator interaction is the + triggering context; the subject under test is the request that ERE receives. + + Contract reference: see src/resources/schemas/ere-service-schema.yaml (EntityMentionResolutionRequest) + +Given + An entity mention E has previously been resolved and a cluster assignment is available +And + A curator has submitted a {recommendation_type} recommendation for E +When + ERS publishes the resulting re-evaluation request +Then + A re-evaluation request appears on the ERE request channel +And + The request carries the {recommendation_type} interaction type + + Examples: + | recommendation_type | + | placement | + | exclusion | diff --git a/test/features/ere-ers-resolution-cases.feature b/test/features/ere-ers-resolution-cases.feature new file mode 100644 index 0000000..a9859d3 --- /dev/null +++ b/test/features/ere-ers-resolution-cases.feature @@ -0,0 +1,107 @@ +Feature: ERE/ERS common case interactions — resolution and clustering expectations + + Note that in all the tests, the exact meaning of "known/unknown entity" depends on the ERE implementation, + e.g., it has already seen the entity in a previous request, or it is a test ERE, with a pre-loaded + set of canonical entities. + + +Scenario: A resolution request returns existing cluster candidate references + + A resolution request is pushed to the ERE with an entity E, which is known to be equivalent to + other canonical entities and with sufficiently high confidence scores. The ERE asynchronously returns + the cluster IDs represented by such canonical entities. + + Detailed examples: see [ere-test-cases.md](../test_data/analysis/ere-test-cases.md), + examples 1, 2, 4, 5 +Given + Entity clusters C1, C2, C3 are already known to the ERE +When + ERS pushes a resolution request for an entity mention E into the ERE request channel +And + The entity E is estimated to be equivalent to the canonical entities of C1, C2, C3, + with sufficiently high confidence scores +Then + The ERE asynchronously pushes an `EntityMentionResolutionResponse` object that contains: + + - Common response properties: + - `request_id`: the original request ID + - `type`: "EntityMentionResolutionResponse" + - `entity_mention_id`: an instance of `EntityMentionIdentifier` with + `source_id`, `request_id`, `entity_type` equal to the mention in the original request and + corresponding to E taken from the original request + - These are common to all responses and we won't repeat them in the following + + - `candidates`: a list of `ClusterReference` objects such like: + - `{ "cluster_id": , "confidence_score": }` + for i = 0..3 + - All of `score(E, Ci)` are above the confidence threshold configured in the ERE + + ERE must place the best-matching candidate first (`candidates[0]`), as ERS selects the canonical + assignment positionally. ERE is fully responsible for ordering; ERS does not re-rank. + Having 3 items in the result is arbitrary. In general, it depends on how many clusters are found and on the + ERE configuration (e.g., top N results, confidence threshold, or both). + + +Scenario: A resolution request returns a new singleton cluster reference + + A resolution request is pushed to the ERE with an unknown entity, which has no equivalents already + resolved by the ERE +Given + The ERE does not know the entity E (ie, it has no equivalent cluster for it) +When + ERS pushes the entity E into the ERE request channel +Then + The ERE asynchronously pushes an entity resolution object to the ERS response channel that contains + a reference to one cluster only. + The response has the same format as in the previous scenario. + The cluster ID in the response is formed based on `canonicalID(requestId, sourceId, entityType)`, ie, + on the function, described in the technical contract, which computes the canonical ID from the + composite key of an entity mention. + + +Scenario: Two entity mentions referring to equivalent entities are assigned to the same cluster + + When the same real-world entity is submitted as two separate entity mentions A and B, the ERE + must assign them to the same cluster — `candidates[0]` in each response must carry the same + `cluster_id`. + + Detailed examples: see test/test_data/organizations/group1/ (equivalent organisations) +Given + Entity mention A and entity mention B refer to the same real-world entity +When + ERS pushes a resolution request for A and a resolution request for B into the ERE request channel +Then + The primary candidate (`candidates[0]`) in the response for A and the primary candidate in the + response for B have the same `cluster_id` + + +Scenario: Two entity mentions referring to non-equivalent entities produce distinct clusters + + When two entity mentions A and B refer to different real-world entities, the ERE must assign + them to different clusters — `candidates[0]` in each response must carry different `cluster_id` + values. + + Detailed examples: see test/test_data/organizations/group1/ vs test/test_data/organizations/group2/ +Given + Entity mention A and entity mention B refer to different real-world entities +When + ERS pushes a resolution request for A and a resolution request for B into the ERE request channel +Then + The primary candidate (`candidates[0]`) in the response for A and the primary candidate in the + response for B have different `cluster_id` values + + +Scenario: An entity mention that is similar but not equivalent surfaces as a candidate only + + When entity mention E is similar to a known entity but not equivalent to it, the ERE must not + assign E to that entity's cluster as the primary result. Instead, ERE creates a new singleton + cluster for E as `candidates[0]`, and the similar entity's cluster may appear as a secondary + candidate further down the list. +Given + The ERE knows an entity whose cluster is similar to, but not equivalent to, entity mention E +When + ERS pushes a resolution request for E into the ERE request channel +Then + `candidates[0]` in the response is a newly created singleton cluster for E + And the similar entity's cluster may appear as a secondary candidate in the list, but is not + the primary assignment diff --git a/test/features/ere-ers-unhappy-paths.feature b/test/features/ere-ers-unhappy-paths.feature index 75df568..5dc2bfe 100644 --- a/test/features/ere-ers-unhappy-paths.feature +++ b/test/features/ere-ers-unhappy-paths.feature @@ -1,14 +1,56 @@ Feature: ERE/ERS unhappy path interactions -This feature describes what happens in cases like malformed requests, system errors, or alike. + This feature describes what happens when ERE emits structurally invalid messages to ERS, + or when a resolution request involves an entity type that ERE does not support. -Scenario: The ERE replies with an error response to a malformed request -When - The ERS pushes the malformed request into the ERE requests channel -Then - The ERE asynchronously pushes an error response to the responses channel that looks like: +Scenario Outline: A structurally invalid ERE message is discarded and the system continues operating - `ere_request_id`: the ID of the malformed request - `error_title`/`error_message`: a human-readable description of the error - `type`: "EREErrorResponse" # JSON object property, matches the LinkML class in the service schema. + When ERE emits a malformed or incomplete message to the ERS response channel, ERS discards + the message without raising an error back to ERE and continues processing subsequent messages. + +When + ERE emits a "{fault_type}" message to the ERS response channel +Then + The message is discarded without raising an error +And + The system continues to accept and process subsequent valid outcomes + + Examples: + | fault_type | + | missing cluster identifier | + | missing mention triad fields | + | empty message body | + | non-JSON payload | + + +Scenario: ERE replies with an error response to a malformed resolution request + + When ERS publishes a resolution request that is structurally invalid, ERE must reply with + an `EREErrorResponse` on the ERS response channel. + + Contract reference: src/resources/schemas/er-schema.json (EREErrorResponse) + +When + ERS pushes a malformed resolution request into the ERE request channel +Then + ERE asynchronously pushes an error response to the ERS response channel containing: + - `ere_request_id`: the ID of the malformed request + - `error_title` / `error_detail`: a human-readable description of the error + - `type`: "EREErrorResponse" + + +Scenario: An entity mention with an unsupported entity type is rejected by ERE + + ERE only handles entity types it supports. When it receives a resolution request for an + unknown type, it must return an error response. ERS will not forward requests for types + not listed in the service schema. + + Contract reference: src/resources/schemas/ere-service-schema.yaml (supported entity types) + +When + ERS pushes a resolution request for an entity mention with an unsupported entity type +Then + ERE returns an `EREErrorResponse` indicating the entity type is not supported +And + No cluster reference is included in the response diff --git a/test/test_data/analysis/ere-test-cases.md b/test/test_data/analysis/ere-test-cases.md index 71c5611..309d669 100644 --- a/test/test_data/analysis/ere-test-cases.md +++ b/test/test_data/analysis/ere-test-cases.md @@ -18,27 +18,24 @@ Outcome: equivalent entities with high confidence. **Request**: -```javascript +```json { "type": "EntityMentionResolutionRequest", - "entityMention": - { - // This is an instance of the EntityMention class (see the LinkML schema) - "type": "http://www.w3.org/ns/org#Organization", - "identifier": "http://data.europa.eu/ers/id/324fs3r345vx-ted-sws-pipeline", - "payload": "", - "dataFormat": "text/turtle" + "entity_mention": { + "identifiedBy": { + "source_id": "ted-sws-pipeline", + "request_id": "324fs3r345vx", + "entity_type": "http://www.w3.org/ns/org#Organization" + }, + "content": "", + "content_type": "text/turtle" }, - "requestId": "324fs3r345vx", - "originator": "TED SWS pipeline", - "metadata": { - "originator system": "VocBench editor", - "originator timestamp": "23748737643" - } + "ere_request_id": "324fs3r345vx:01", + "timestamp": "2026-01-14T12:34:56Z" } ``` -This is the content of the `payload` for this example: +This is the content of the `content` field for this example: ```javascript PREFIX cccev: @@ -85,33 +82,38 @@ epd:id_2023-S-210-661238_ReviewerOrganisationAddress_LLhJHMi9mby8ixbkfyGoWj As you can see, The data have a triple-centric description of the entity to resolve, plus linked entities. The ERE is supposed to resolve the former, possibly using the linked entities (such as addresses or contact points). -*Note*: `identifier` is derived from the request data. It can be done in serveral ways. For the purpose of this example, the URI `http://data.europa.eu/ers/id/324fs3r345vx-ted-sws-pipeline` is built by simple concatenation of request id and originator. - **Resolution**: In this case, we have a canonical entity with high confidence matching score (due to key fields being identical): -```javascript +```json { "type": "EntityMentionResolutionResponse", - "requestId": "324fs3r345vx", - "alignmentLinkSet": { - "subjectEntityMentionIdentifier": "http://data.europa.eu/ers/id/324fs3r345vx-ted-sws-pipeline", - "alignmentOptions": [ - { - "canonicalIdentifier": "http://data.europa.eu/ers/id/324fs3r345vxaa32wa", - "confidenceScore": 0.91 - }, - { - "canonicalIdentifier": "http://data.europa.eu/ers/id/324fs3r345vxbb45we", - "confidenceScore": 0.65 - } - ] - } + "entity_mention_id": { + "source_id": "ted-sws-pipeline", + "request_id": "324fs3r345vx", + "entity_type": "http://www.w3.org/ns/org#Organization" + }, + "candidates": [ + { + "cluster_id": "e05a78bda0dbd2aa0d4a41e94949e2c97503db06c0cc3c111610076936eb0c0c", + "confidence_score": 0.91, + "similarity_score": 0.89 + }, + { + "cluster_id": "aec9934e70d35c8e41aa4c3afd0262820423ec159c81236e73e9890e2237597f", + "confidence_score": 0.65, + "similarity_score": 0.62 + } + ], + "ere_request_id": "324fs3r345vx:01", + "timestamp": "2026-01-14T12:34:59Z" } ``` -*Note*: `http://data.europa.eu/ers/id/324fs3r345vxaa32wa` and `http://data.europa.eu/ers/id/324fs3r345vxbb45we` correspond to canonical URIs annotating clusters of entity mentions. Payload of the entity mentions is irrelevant and therefore not presented in this example. +*Note*: `e05a78bda0dbd2aa0d4a41e94949e2c97503db06c0cc3c111610076936eb0c0c` and `aec9934e70d35c8e41aa4c3afd0262820423ec159c81236e73e9890e2237597f` are cluster identifiers (SHA-256 hex digests). Payload of the entity mentions in those clusters is irrelevant and therefore not presented in this example. + +The first candidate (`candidates[0]`) is the one ERS selects as the canonical assignment. ERE is responsible for placing the best-matching candidate first. --- @@ -124,22 +126,20 @@ Outcome: distinct entities with low confidence match. **Request**: -```javascript +```json { "type": "EntityMentionResolutionRequest", - "entityMention": - { - // This is an instance of the EntityMention class (see the LinkML schema) - "type": "http://www.w3.org/ns/org#Procedure", - "identifier": "http://data.europa.eu/ers/id/324fs3r345vx-ted-sws-pipeline", - "payload": "", - "dataFormat": "text/turtle" + "entity_mention": { + "identifiedBy": { + "source_id": "ted-sws-pipeline", + "request_id": "324fs3r345vx", + "entity_type": "http://www.w3.org/ns/org#Procedure" + }, + "content": "", + "content_type": "text/turtle" }, - "requestId": "324fs3r345vx", - "originator": "TED SWS pipeline", - "metadata": { - "originator timestamp": "23748737643" - } + "ere_request_id": "324fs3r345vx:01", + "timestamp": "2026-01-14T12:34:56Z" } ``` @@ -168,65 +168,78 @@ epd:id_2023-S-211-665742_ProcedurePurpose_faF7Q5dyoGpXu3Ru4RGg73 No match found above the confidence threshold, the ERE creates a new canonical URI for the incoming entity: -```javascript +```json { "type": "EntityMentionResolutionResponse", - "requestId": "324fs3r345vx", - "alignmentLinkSet": { - "subjectEntityMentionIdentifier": "http://data.europa.eu/ers/id/324fs3r345vx-ted-sws-pipeline", - "alignmentOptions": [ - { - "canonicalIdentifier": "http://data.europa.eu/ers/id/324fs3r345vxwer4rq", - "confidenceScore": 1.0 - } - ] - } + "entity_mention_id": { + "source_id": "ted-sws-pipeline", + "request_id": "324fs3r345vx", + "entity_type": "http://www.w3.org/ns/org#Procedure" + }, + "candidates": [ + { + "cluster_id": "e7cd6ade8061a4bcbf446bb6809c96451b20e5b3387306629348b34a7386d5ac", + "confidence_score": 0.0, + "similarity_score": 0.0 + } + ], + "ere_request_id": "324fs3r345vx:01", + "timestamp": "2026-01-14T12:34:59Z" } ``` ## Example 3: Organisations with minor detail variations -- Resolution with excluded identifiers + This example is built on top of Example 1 and presents a case when a subsequent resolution request is submitted to obtain other URIs than the provided two. +The previously returned cluster IDs are passed as `excluded_cluster_ids` to steer the ERE +away from those assignments. **Request**: -```javascript +```json { "type": "EntityMentionResolutionRequest", - "entityMention": { - "type": "http://www.w3.org/ns/org#Organization", - "identifier": "http://data.europa.eu/ers/id/324fs3r345vx-ted-sws-pipeline", - "payload": "", - "dataFormat": "text/turtle" + "entity_mention": { + "identifiedBy": { + "source_id": "ted-sws-pipeline", + "request_id": "324fs3r345a4fr", + "entity_type": "http://www.w3.org/ns/org#Organization" + }, + "content": "", + "content_type": "text/turtle" }, - "rejectedCanonicalIdentifiers": [ - "http://data.europa.eu/ers/id/324fs3r345vxaa32wa", - "http://data.europa.eu/ers/id/324fs3r345vxbb45we" + "excluded_cluster_ids": [ + "e05a78bda0dbd2aa0d4a41e94949e2c97503db06c0cc3c111610076936eb0c0c", + "aec9934e70d35c8e41aa4c3afd0262820423ec159c81236e73e9890e2237597f" ], - "requestId": "324fs3r345a4fr", - "originator": "TED SWS pipeline", - "creationTime": "2026-01-15T14:50:56Z" + "ere_request_id": "324fs3r345a4fr:01", + "timestamp": "2026-01-15T14:50:56Z" } ``` **Resolution**: -In this case, no other match was found besides the two URIs that have been excluded and therefore a new canonical URI is returned: +In this case, no other match was found besides the two clusters that have been excluded and therefore a new canonical URI is returned: -```javascript +```json { "type": "EntityMentionResolutionResponse", - "requestId": "324fs3r345a4fr", - "alignmentLinkSet": { - "subjectEntityMentionIdentifier": "http://data.europa.eu/ers/id/324fs3r345vx-ted-sws-pipeline", - "alignmentOptions": [ - { - "canonicalIdentifier": "http://data.europa.eu/ers/id/324fs3r345vuaa3990", - "confidenceScore": 1.0 - } - ] - } + "entity_mention_id": { + "source_id": "ted-sws-pipeline", + "request_id": "324fs3r345a4fr", + "entity_type": "http://www.w3.org/ns/org#Organization" + }, + "candidates": [ + { + "cluster_id": "ce2ff706f4a30d12c2f4ca9686abcaedf878e8e20f1ff4dd1dab2433b7f5af14", + "confidence_score": 0.0, + "similarity_score": 0.0 + } + ], + "ere_request_id": "324fs3r345a4fr:01", + "timestamp": "2026-01-15T14:51:02Z" } -``` \ No newline at end of file +```