diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml new file mode 100644 index 0000000..82e2c8b --- /dev/null +++ b/.github/workflows/code-quality.yaml @@ -0,0 +1,112 @@ +name: Quality Check + +on: + push: + branches: [develop, main] + paths: + - "src/**" + - "resources/schemas/**" + - "pyproject.toml" + - "poetry.lock" + - "Makefile" + - ".github/workflows/code-quality.yaml" + pull_request: + paths: + - "src/**" + - "resources/schemas/**" + - "pyproject.toml" + - "poetry.lock" + - "Makefile" + - ".github/workflows/code-quality.yaml" + +jobs: + quality: + name: Lint, Build & Verify + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Read Python version from pyproject.toml + id: python-version + run: echo "version=$(grep -m1 'python = ' pyproject.toml | grep -oP '\d+\.\d+' | head -1)" >> $GITHUB_OUTPUT + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ steps.python-version.outputs.version }} + + - name: Install Poetry + run: pipx install poetry + + - name: Cache Poetry dependencies + uses: actions/cache@v4 + with: + path: ~/.cache/pypoetry + key: poetry-${{ runner.os }}-${{ hashFiles('poetry.lock') }} + + - name: Install dependencies + run: make install + + - name: Lint LinkML schemas + run: poetry run linkml lint --ignore-warnings resources/schemas/ + + - name: Run ruff linter + run: poetry run ruff check src/ + + - name: Generate models and docs + run: make -B all + + - name: Verify generated code is up to date + id: verify_generated_code + continue-on-error: true + run: | + echo "Checking if generated files match LinkML schemas..." + OUT_OF_SYNC="" + + # src/ and JSON schemas are deterministic — use git diff directly + for f in $(git diff --name-only src/ resources/schemas/*json); do + OUT_OF_SYNC="$OUT_OF_SYNC $f" + done + + # docs/ uses order-insensitive comparison because linkml generate doc + # produces non-deterministic markdown table row order across runs. + for f in $(git diff --name-only docs/); do + committed=$(git show HEAD:"$f" | sort | sha256sum | awk '{print $1}') + generated=$(sort "$f" | sha256sum | awk '{print $1}') + if [ "$committed" != "$generated" ]; then + OUT_OF_SYNC="$OUT_OF_SYNC $f" + fi + done + + if [ -n "$OUT_OF_SYNC" ]; then + echo "status=failed" >> $GITHUB_OUTPUT + echo "Generated code is out of sync!" + { + echo "**Generated code is out of sync with LinkML schemas.**" + echo "" + echo "Please run \`make all\` and commit the updated files:" + echo '```' + git diff --stat $OUT_OF_SYNC + echo '```' + } > sync_report.txt + else + echo "status=passed" >> $GITHUB_OUTPUT + echo "Generated code is up to date" > sync_report.txt + fi + + - name: Publish sync check comment + if: always() && github.event_name == 'pull_request' + uses: thollander/actions-comment-pull-request@v3 + with: + file-path: sync_report.txt + comment-tag: sync-check + + - name: Fail if out of sync + if: steps.verify_generated_code.outputs.status == 'failed' + run: exit 1 diff --git a/.gitignore b/.gitignore index 304bd99..e194b92 100644 --- a/.gitignore +++ b/.gitignore @@ -218,4 +218,15 @@ __marimo__/ .idea/ .vscode/ .mypy_cache/ -.pyre/ \ No newline at end of file +.pyre/ + +# CI/CD artifacts +sync_report.txt +.gitnexus +.serenity/ +.pycharm_plugin +.venv +.trueflow +.claude +AGENTS.md +CLAUDE.md \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 1323e0d..909ccd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [unreleased] -... + +## [0.2.0-rc.2] - 2026-02-20 +### Added +* CI: GitHub Actions quality-check workflow (`.github/workflows/code-quality.yaml`) — LinkML schema linting, `ruff` Python linting, model/docs generation with sync verification, and PR comment posting ([ERS1-103]) +* `lint` and `lint-schema` Make targets ([ERS1-103]) +* data model: `proposed_cluster_ids` field on `EntityMentionResolutionRequest` — allows the caller to suggest candidate clusters; the ERE has no obligation to honour the proposal ([ERS1-119]) +* data model: `similarity_score` field on `ClusterReference` — a 0–1 pairwise score between an entity mention and a cluster representative ([ERS1-119]) +* Schema docs: `EREErrorResponse.md`, `EREMessage.md`, `EntityMentionResolutionResponse.md`, `proposed_cluster_ids.md`, `similarity_score.md` ([ERS1-119]) + +### Changed +* data model: revised semantics of `excluded_cluster_ids` — ERE has no obligation to honour exclusions, and it remains the ultimate resolution authority ([ERS1-119]) +* data model: clarified `ere_request_id` — notification responses originating inside the ERE (without a prior request) use the prefix `ereNotification:` ([ERS1-119]) +* Removed `FullRebuildRequest` / `FullRebuildResponse` classes from ERE schema and docs ([ERS1-119]) +* Renamed `entityType.md` → `EntityType.md` in schema docs ([ERS1-119]) +* Gherkin tests updated to reflect the V4 ERE Contract — simplified idempotent-resolution scenarios, added full-rebuild stub, aligned unhappy-path tests ([ERS1-120]) +* README updated: added `lint` / `lint-schema` targets to the Makefile overview ([ERS1-103]) ## [0.2.0-rc.1] - 2026-02-03 ### Added diff --git a/Makefile b/Makefile index b21fa20..e87a99b 100644 --- a/Makefile +++ b/Makefile @@ -1,79 +1,150 @@ SHELL=/bin/bash -o pipefail -BUILD_PRINT = \e[1;34m -END_BUILD_PRINT = \e[0m - -ICON_DONE = [✔] -ICON_ERROR = [x] -ICON_WARNING = [!] -ICON_PROGRESS = [-] - -LINKML_MODEL_NAME=ere-service-schema -LINKML_MODEL_VERSION=0.1.0 -PYTHON_MODEL_PATH=src/ere/models/core.py - -SCHEMAS_DIR=resources/schemas - -LINKML_MODEL_PATH=$(SCHEMAS_DIR)/$(LINKML_MODEL_NAME)-v$(LINKML_MODEL_VERSION).yaml -JSON_SCHEMA_PATH=$(SCHEMAS_DIR)/$(LINKML_MODEL_NAME)-v$(LINKML_MODEL_VERSION).json - -MODEL_DOCS_DIR=docs/schema -MODEL_DOCS_README=$(MODEL_DOCS_DIR)/README.md - -## Setup commands -# - -# Note that Python, Poetry and Make are a pre-requisites and we don't deal with them here. -# - -install: - @ echo "Installing dependencies using Poetry..." - @ poetry sync - - -## Build commands -# - -all: $(PYTHON_MODEL_PATH) $(JSON_SCHEMA_PATH) $(MODEL_DOCS_README) - -generate-models: $(PYTHON_MODEL_PATH) $(JSON_SCHEMA_PATH) -generate-doc: $(MODEL_DOCS_README) - -.PHONY: all generate-models generate-doc clean clean-doc clean-models install install-dev check-uv - - -$(PYTHON_MODEL_PATH): $(LINKML_MODEL_PATH) - @ echo "Generating Python service model..." - @ mkdir -p $(dir $(PYTHON_MODEL_PATH)) - @ poetry run linkml generate pydantic $(LINKML_MODEL_PATH) > $(PYTHON_MODEL_PATH) - -$(JSON_SCHEMA_PATH): $(LINKML_MODEL_PATH) - @ echo "Generating JSON Schema for the ERE service..." - @ mkdir -p $(dir $(JSON_SCHEMA_PATH)) - @ poetry run linkml generate json-schema --indent 2 $(LINKML_MODEL_PATH) > $(JSON_SCHEMA_PATH) - - - -$(MODEL_DOCS_README): $(LINKML_MODEL_PATH) - @ echo "Generating documentation for the ERE service Schema..." -# Changing default index name from index.md to README.md, since the github browser automatically shows the latter name -# when entering the MODEL_DOCS_DIR - @ poetry run linkml generate doc $(LINKML_MODEL_PATH) -d $(MODEL_DOCS_DIR) --index-name README -# TODO: Probably we want PNG instead, but it doesn't work yet (https://github.com/linkml/linkml/issues/3009) - @ poetry run linkml generate plantuml -d $(MODEL_DOCS_DIR) --format svg $(LINKML_MODEL_PATH) - -# (Brandizi) I've played with it, but the result isn't great (single-class diagrams in each -# class file) -# @ poetry run linkml generate doc -d $(MODEL_DOCS_DIR) --diagram-type plantuml_class_diagram $(LINKML_MODEL_PATH) - - -clean-models: - @ echo "Cleaning up generated models..." - @ rm -rf $(PYTHON_MODEL_PATH) $(JSON_SCHEMA_PATH) - -clean-doc: - @ echo "Cleaning up generated documentation..." - @ rm -rf $(MODEL_DOCS_DIR)/*.md - -clean: clean-doc clean-models - @ echo "All generated files cleaned." \ No newline at end of file +# ─── Formatting ────────────────────────────────────────────────────────────────── + +BUILD_PRINT = \e[1;34m +END_PRINT = \e[0m + +ICON_DONE = $(BUILD_PRINT)$(END_PRINT) [✔] +ICON_ERROR = $(BUILD_PRINT)$(END_PRINT) [✗] +ICON_PROGRESS = $(BUILD_PRINT)$(END_PRINT) [-] + +define log_progress + @printf "$(ICON_PROGRESS) $(BUILD_PRINT)$(1)$(END_PRINT)\n" +endef + +define log_done + @printf "$(ICON_DONE) $(BUILD_PRINT)$(1)$(END_PRINT)\n" +endef + +# ─── Paths & Naming ───────────────────────────────────────────────────────────── + +SCHEMAS_DIR = resources/schemas +SCRIPTS_DIR = resources/scripts +TEMPLATES_DIR = resources/templates +MODELS_DIR = src/erspec/models + +# Schema identifiers +ERE_SCHEMA_NAME = ere-service-schema +CORE_SCHEMA_NAME = core-schema +JSON_SCHEMA_NAME = er-schema +SCHEMA_VERSION = 0.1.0 + +# Source schemas (core is imported by ere, so it is a dependency) +ERE_SCHEMA_PATH = $(SCHEMAS_DIR)/$(ERE_SCHEMA_NAME)-v$(SCHEMA_VERSION).yaml +CORE_SCHEMA_PATH = $(SCHEMAS_DIR)/$(CORE_SCHEMA_NAME)-v$(SCHEMA_VERSION).yaml +ALL_SCHEMA_SOURCES = $(ERE_SCHEMA_PATH) $(CORE_SCHEMA_PATH) + +# Generated artefacts +PYTHON_ERE_MODEL = $(MODELS_DIR)/ere.py +PYTHON_CORE_MODEL = $(MODELS_DIR)/core.py +JSON_SCHEMA_PATH = $(SCHEMAS_DIR)/$(JSON_SCHEMA_NAME)-v$(SCHEMA_VERSION).json + +MODEL_DOCS_DIR = docs/schema +MODEL_DOCS_README = $(MODEL_DOCS_DIR)/README.md + +# ─── Help ──────────────────────────────────────────────────────────────────────── + +.PHONY: help +help: + @echo "" + @echo "Usage:" + @echo " make " + @echo "" + @echo "Available targets:" + @awk 'BEGIN {FS = ":.*##"; printf ""} \ + /^[a-zA-Z0-9_-]+:.*##/ { \ + printf " \033[1;34m%-20s\033[0m %s\n", $$1, $$2 \ + }' $(MAKEFILE_LIST) + +# ─── Setup ─────────────────────────────────────────────────────────────────────── +# Note: Python, Poetry and Make are pre-requisites and are not handled here. + +.PHONY: install +install: ## Install dependencies using Poetry + $(call log_progress,Installing dependencies using Poetry...) + @poetry sync + $(call log_done,Dependencies installed.) + +# ─── Quality ───────────────────────────────────────────────────────────────────── + +.PHONY: lint +lint: ## Run ruff linter on source code + $(call log_progress,Running ruff checks...) + @poetry run ruff check src/ + $(call log_done,Ruff checks completed.) + +.PHONY: lint-schema +lint-schema: ## Run LinkML linter on YAML schemas + $(call log_progress,Linting LinkML schemas...) + @poetry run linkml lint --ignore-warnings $(SCHEMAS_DIR)/ + $(call log_done,LinkML schema lint completed.) + +# ─── Aggregate targets ────────────────────────────────────────────────────────── + +.PHONY: all +all: generate-models generate-doc ## Generate all artefacts (models + docs) + $(call log_done,All artefacts generated.) + +.PHONY: generate-models +generate-models: $(PYTHON_ERE_MODEL) $(JSON_SCHEMA_PATH) ## Generate Python models and JSON Schema + $(call log_done,All models generated.) + +.PHONY: generate-doc +generate-doc: $(MODEL_DOCS_README) ## Generate schema documentation and diagrams + $(call log_done,Documentation generated.) + +# ─── Python Pydantic models (split generation: ere + core) ────────────────────── + +$(PYTHON_ERE_MODEL) $(PYTHON_CORE_MODEL) &: $(ALL_SCHEMA_SOURCES) + $(call log_progress,Generating Python models...) + @mkdir -p $(MODELS_DIR) + @poetry run python $(SCRIPTS_DIR)/generate_models.py \ + --schema $(ERE_SCHEMA_PATH) \ + --output $(PYTHON_ERE_MODEL) \ + --template-dir $(TEMPLATES_DIR) \ + --schemas-dir $(SCHEMAS_DIR) + @poetry run ruff check --fix $(MODELS_DIR) + $(call log_done,Python models generated.) + +# ─── JSON Schema ───────────────────────────────────────────────────────────────── +# The ERE schema imports core, so `linkml generate json-schema` will include both. + +$(JSON_SCHEMA_PATH): $(ALL_SCHEMA_SOURCES) + $(call log_progress,Generating JSON Schema...) + @mkdir -p $(dir $(JSON_SCHEMA_PATH)) + @poetry run linkml generate json-schema --indent 2 $(ERE_SCHEMA_PATH) > $(JSON_SCHEMA_PATH) + $(call log_done,JSON Schema generated -> $(JSON_SCHEMA_PATH)) + +# ─── Documentation & PlantUML diagrams ────────────────────────────────────────── + +$(MODEL_DOCS_README): $(ALL_SCHEMA_SOURCES) + $(call log_progress,Generating schema documentation...) + @mkdir -p $(MODEL_DOCS_DIR) +# Index is named README.md so GitHub renders it when browsing the directory. + @poetry run linkml generate doc $(ERE_SCHEMA_PATH) \ + -d $(MODEL_DOCS_DIR) --index-name README +# TODO: Prefer PNG once upstream is fixed (https://github.com/linkml/linkml/issues/3009) +# TODO: --no-mergeimports doesn't work (https://github.com/linkml/linkml/issues/1296), so, for +# the moment, we include core imported classes in the diagram. + @poetry run linkml generate plantuml \ + -d $(MODEL_DOCS_DIR) --format svg $(ERE_SCHEMA_PATH) + $(call log_done,Documentation generated -> $(MODEL_DOCS_DIR)) + +# ─── Clean ─────────────────────────────────────────────────────────────────────── + +.PHONY: clean-models +clean-models: ## Remove all generated models + $(call log_progress,Cleaning generated models...) + @rm -f $(PYTHON_ERE_MODEL) $(PYTHON_CORE_MODEL) $(JSON_SCHEMA_PATH) + $(call log_done,Generated models cleaned.) + +.PHONY: clean-doc +clean-doc: ## Remove generated docs and diagrams + $(call log_progress,Cleaning generated documentation...) + @rm -rf $(MODEL_DOCS_DIR)/*.md $(MODEL_DOCS_DIR)/*.svg + $(call log_done,Generated documentation cleaned.) + +.PHONY: clean +clean: clean-models clean-doc ## Remove all generated files + $(call log_done,All generated files cleaned.) diff --git a/README.md b/README.md index 6e83dc4..e931c94 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Entity Resolution Specifications + Formal software contract, shared data models, sample messages, and compliance tests required for integrating new Entity Resolution Engines (EREs) into the system. > Note: Active development continues in the OP-TED repository: https://github.com/OP-TED/entity-resolution-spec @@ -7,23 +8,25 @@ Formal software contract, shared data models, sample messages, and compliance te - UNIX-compatible environment (Linux/macOS/WSL2) - Make -- Python (managed via [uv](https://docs.astral.sh/uv/getting-started/installation/)) +- Python 3.12+ +- [Poetry](https://python-poetry.org/) (for dependency management) ## Quick Start ```bash -make # installs user dependencies via uv -make install-dev # installs development tooling (tests, lint, codegen) -make generate_models -make generate_docs +make install # install dependencies via Poetry +make all # generate all models, schemas, and documentation ``` ## Make targets overview -- install: user dependencies -- install-dev: dev dependencies (tests, lint, LinkML codegen) -- generate_models: regenerate Pydantic models from LinkML -- generate_docs: regenerate documentation +- `install`: install dependencies via Poetry +- `all`: generate all models, schemas, and documentation +- `generate-models`: regenerate Pydantic models and JSON Schema from LinkML +- `generate-docs`: regenerate documentation +- `lint`: run ruff linter on source code +- `lint-schema`: run LinkML linter on YAML schemas +- `clean`: remove all generated artifacts ## Installation @@ -38,17 +41,15 @@ This will install the necessary user dependencies in a Poetry-managed virtual en ## Development -This project uses principles of model-driven development (MDD) and domain-driven design (DDD). The core model is defined in the `resources/linkml` directory, and the Python (Pydantic) models (pluralized to refer to all the classes as is the practice in the programming community) are generated using the [LinkML](https://linkml.io/) framework. +This project uses principles of model-driven development (MDD) and domain-driven design (DDD). The core models are defined in the `resources/schemas` directory using [LinkML](https://linkml.io/), and the Python (Pydantic) models are generated from these specifications. -Generated Python models are in `src/models`. Regenerate them with: -The generated Python models can be found in the `src/models` directory. -You can regenerate both the LinkML-based models (Python, JSONSchema) and the navigable documentation, by running: +Generated Python models are in `src/erspec/models`. Regenerate them with: ```bash make all ``` -*the Makefile has more granular targets, see its content for details*. +This regenerates both the LinkML-based models (Python, JSONSchema) and the navigable documentation. See the Makefile for more granular targets. ## Running and Testing diff --git a/docs/architecture/diagrams/ERS messages.png b/docs/architecture/diagrams/ERS messages.png index e4229bf..c75d969 100644 Binary files a/docs/architecture/diagrams/ERS messages.png and b/docs/architecture/diagrams/ERS messages.png differ diff --git a/docs/architecture/diagrams/L0.png b/docs/architecture/diagrams/L0.png index e50b645..497d0bf 100644 Binary files a/docs/architecture/diagrams/L0.png and b/docs/architecture/diagrams/L0.png differ diff --git a/docs/architecture/diagrams/L1.png b/docs/architecture/diagrams/L1.png index 76a82ec..11ee335 100644 Binary files a/docs/architecture/diagrams/L1.png and b/docs/architecture/diagrams/L1.png differ diff --git a/docs/architecture/diagrams/L2 - Application Cooperation Overview.png b/docs/architecture/diagrams/L2 - Application Cooperation Overview.png index 9865268..34a568b 100644 Binary files a/docs/architecture/diagrams/L2 - Application Cooperation Overview.png and b/docs/architecture/diagrams/L2 - Application Cooperation Overview.png differ diff --git a/docs/architecture/diagrams/Technology choices.png b/docs/architecture/diagrams/Technology choices.png index 6b58066..1c8a0d7 100644 Binary files a/docs/architecture/diagrams/Technology choices.png and b/docs/architecture/diagrams/Technology choices.png differ diff --git a/docs/architecture/diagrams/Technology deployment.png b/docs/architecture/diagrams/Technology deployment.png index ae8e04f..ac451e6 100644 Binary files a/docs/architecture/diagrams/Technology deployment.png and b/docs/architecture/diagrams/Technology deployment.png differ diff --git a/docs/architecture/diagrams/canonical registry.png b/docs/architecture/diagrams/canonical registry.png deleted file mode 100644 index 1c68009..0000000 Binary files a/docs/architecture/diagrams/canonical registry.png and /dev/null differ diff --git a/docs/architecture/diagrams/decision store.png b/docs/architecture/diagrams/decision store.png index accbf8f..05443df 100644 Binary files a/docs/architecture/diagrams/decision store.png and b/docs/architecture/diagrams/decision store.png differ diff --git a/docs/architecture/diagrams/ere messages.png b/docs/architecture/diagrams/ere messages.png index 37db915..8956adc 100644 Binary files a/docs/architecture/diagrams/ere messages.png and b/docs/architecture/diagrams/ere messages.png differ diff --git a/docs/architecture/diagrams/preview messages.png b/docs/architecture/diagrams/preview messages.png index 451a04b..51fe25e 100644 Binary files a/docs/architecture/diagrams/preview messages.png and b/docs/architecture/diagrams/preview messages.png differ diff --git a/docs/architecture/diagrams/readme.md b/docs/architecture/diagrams/readme.md index 1c333fc..10e0d23 100644 --- a/docs/architecture/diagrams/readme.md +++ b/docs/architecture/diagrams/readme.md @@ -1,72 +1,77 @@ # Architecture Diagrams Index -This folder contains architecture diagrams referenced by the ERSys Architecture Document and the ERS–ERE Technical Contract. The diagrams are provided as rendered images (`.png`) and represent **authoritative architectural views** at different abstraction levels. +This folder contains architecture diagrams referenced by the ERSys Architecture Document, ADR set, and the ERS–ERE Technical Contract. The diagrams are provided as rendered images (`.png`) and represent **authoritative architectural views** at different abstraction levels. -Unless stated otherwise, these diagrams are **normative at their stated level** and are intended to be read together with the accompanying textual sections of the architecture document. Some diagrams provide high-level context, while others zoom into specific concerns such as messaging, governance, or deployment. +Unless stated otherwise, these diagrams are **normative at their stated level** and are intended to be read together with the corresponding textual sections of the architecture documentation. Some diagrams provide high-level context, while others focus on specific concerns such as messaging, persistence, governance semantics, or deployment. + +The views reflect the consolidated engine-authoritative baseline (13 Feb 2026), where cluster identifiers in ERE are canonical identifiers and ERS stores only the latest placement per mention. --- ## Context and layering diagrams -* `L0.png` - System context diagram showing ERSys in relation to external actors and surrounding systems. +* [`L0.png`](./L0.png) + System context diagram showing ERSys in relation to originators, downstream consumers, curators, and surrounding systems (e.g. TED-SWS pipeline). -* `L1.png` - High-level functional decomposition of the ER System, identifying major responsibilities and boundaries. +* [`L1.png`](./L1.png) + High-level functional decomposition of ERSys, identifying externally visible services and responsibility boundaries. -* `L2 - Application Cooperation Overview.png` - Application-level cooperation view showing how ERS, ERE, and related applications interact at runtime. +* [`L2 - Application Cooperation Overview.png`](./L2%20-%20Application%20Cooperation%20Overview.png) + Application-level cooperation view showing how ERS (orchestration façade), ERE (authoritative clustering engine), and related components interact at runtime. --- ## Contract and integration diagrams -* `L2 - Contract Realisation and Messaging Mediation (ERS-ERE).png` - Structural view of the ERS–ERE technical contract, illustrating contract realisation by both components and mediation via messaging middleware. +* [`L2 - Contract Realisation and Messaging Mediation (ERS-ERE).png`](./L2%20-%20Contract%20Realisation%20and%20Messaging%20Mediation%20%28ERS-ERE%29.png) + Structural view of the ERS–ERE asynchronous contract, illustrating contract realisation by both components and mediation via messaging middleware. -* `ERS messages.png` - Overview of message types produced and consumed by ERS. +* [`ERS messages.png`](./ERS%20messages.png) + Overview of message types produced and consumed by ERS public APIs, including resolution, preview, decision submission, statistics, and refreshBulk. -* `ere messages.png` - Overview of message types produced and consumed by ERE. +* [`ere messages.png`](./ere%20messages.png) + Overview of resolution request and response message types exchanged with the Entity Resolution Engine, including recommendation and reclustering variants. -* `preview messages.png` - Message flows related to preview or provisional resolution outcomes. +* [`preview messages.png`](./preview%20messages.png) + Message model related to entity and canonical preview operations, including pagination metadata. -* `submit decision messages.png` - Message flows related to the submission of governed resolution decisions. +* [`submit decision messages.png`](./submit%20decision%20messages.png) + Message model for submission of curator recommendations (user actions) and associated cluster references. -* `statistics messages.png` - Messages exchanged for reporting and statistics purposes. +* [`statistics messages.png`](./statistics%20messages.png) + Messages exchanged for reporting and statistics purposes, including registry and curation metrics. --- ## Governance and persistence views -* `canonical registry.png` - View of the canonical registry and its role in maintaining governed canonical identifiers. +* [`decision store.png`](./decision%20store.png) + Diagram illustrating the storage model of the Resolution Decision Store, containing the latest cluster placement and top N alternatives per mention. + +* [`system of records.png`](./system%20of%20records.png) + Overview of entity mentions, identifiers (sourceId, requestId, entityType), and lookup state within ERSys. -* `decision store.png` - Diagram illustrating the storage and lifecycle of resolution decisions. +* [`user actions log.png`](./user%20actions%20log.png) + Diagram of the User Action Log used for traceability and training, separated from canonical decision state. -* `system of records.png` - Overview of authoritative source systems and their relationship to ERSys. +Note: ERSys does not maintain a Canonical Entity Registry under the current baseline; canonical identity is materialised by ERE clusters. --- ## Technology and deployment diagrams -* `Technology choices.png` - Summary of selected and candidate technologies used within ERSys. +* [`Technology choices.png`](./Technology%20choices.png) + Summary of selected technologies mapped to architectural roles (messaging, storage, identity, telemetry, container runtime). -* `Technology deployment.png` - Reference deployment architecture illustrating runtime components and their relationships. +* [`Technology deployment.png`](./Technology%20deployment.png) + Reference deployment architecture illustrating runtime stacks (ERS, ERE, Link Curation), messaging middleware, datastores, and container isolation boundaries. --- ## Notes * Diagram filenames correspond to figures referenced in the architecture document; titles in the document may be more descriptive than filenames. -* Some diagrams are intentionally high-level and omit implementation details; others focus on specific concerns (messaging, governance, deployment). -* Behavioural details and step-by-step flows are documented separately using Mermaid sequence diagrams. -* Where both simplified and detailed views exist, the architecture document always indicates which view is normative for the given section. +* Some diagrams are intentionally high-level and omit implementation details; others focus on specific concerns (messaging, persistence, deployment). +* Behavioural details and step-by-step flows (e.g. provisional lifecycle, asynchronous resolution) are documented separately using sequence diagrams. +* Where both simplified and detailed views exist, the architecture document explicitly indicates which view is normative for the given section. +* In case of ambiguity, the consolidated engine-authoritative baseline governs interpretation. diff --git a/docs/architecture/diagrams/statistics messages.png b/docs/architecture/diagrams/statistics messages.png index e2df036..a812bd7 100644 Binary files a/docs/architecture/diagrams/statistics messages.png and b/docs/architecture/diagrams/statistics messages.png differ diff --git a/docs/architecture/diagrams/submit decision messages.png b/docs/architecture/diagrams/submit decision messages.png index edf89a2..0948d07 100644 Binary files a/docs/architecture/diagrams/submit decision messages.png and b/docs/architecture/diagrams/submit decision messages.png differ diff --git a/docs/architecture/diagrams/system of records.png b/docs/architecture/diagrams/system of records.png index 95993f6..473ca17 100644 Binary files a/docs/architecture/diagrams/system of records.png and b/docs/architecture/diagrams/system of records.png differ diff --git a/docs/architecture/sequence_diagrams/E2E-resolution-cycle(simplified).mmd b/docs/architecture/sequence_diagrams/E2E-resolution-cycle(simplified).mmd index 21bd789..e380d75 100644 --- a/docs/architecture/sequence_diagrams/E2E-resolution-cycle(simplified).mmd +++ b/docs/architecture/sequence_diagrams/E2E-resolution-cycle(simplified).mmd @@ -7,39 +7,43 @@ config: bottomMarginAdj: 0.1 --- sequenceDiagram -%% Maps to EA diagram: UML Sequence (conceptual overview) -%% Simplified: ERSys collapsed to single ERS lifeline -%% Semantics aligned with the locked extended diagram. +%% Maps to EA diagram: UML Sequence (conceptual E2E overview) +%% Aligned with UCB11 (intake), UC12 (result processing), UC1.3 (bulk lookup) +%% ERSys collapsed to single ERS lifeline participant Originator as "Originator" participant ERS as "Entity Resolution Service (ERS)" participant MessagingMiddleware as "Messaging Middleware" participant ERE as "Entity Resolution Engine (ERE)" -%% Phase 1 — Bounded resolve (synchronous; waits for completion) +%% Phase 1 — Resolve request (UCB11) Originator ->> ERS: Resolve Entity Mention
(request identifiers + EntityMention) -ERS ->> ERS: Validate and register request
(idempotent) +ERS ->> ERS: Validate and register request
(idempotency + Request Registry) ERS ->> MessagingMiddleware: Publish resolution request
(request identifiers + EntityMention) MessagingMiddleware ->> ERE: Deliver request (async) -alt Resolution completes successfully within bounded execution window - ERE ->> ERE: Resolve EntityMention - ERE ->> MessagingMiddleware: Publish candidate alignments
(candidate links) - MessagingMiddleware ->> ERS: Deliver results (async) - ERS ->> ERS: Evaluate proposals under governance
(record Resolution Decision) - ERS ->> ERS: Update Canonical Entity Registry
(current governed assignment) - ERS -->> Originator: Canonical identifier -else Timeout or failure within bounded execution window - ERS -->> Originator: Error (no canonical identifier) +alt ERE returns within ERS to ERE execution window + ERE ->> ERE: Resolve mention to cluster + ERE ->> MessagingMiddleware: Publish resolution result
(canonical clusterId + top alternative clusterIds) + MessagingMiddleware ->> ERS: Deliver resolution result (async) + ERS ->> ERS: Process resolution result
(persist Resolution Decision and current assignment) + ERS -->> Originator: Canonical clusterId +else ERE does not return within ERS to ERE execution window + ERS ->> ERS: Create provisional singleton and persist decision + ERS ->> MessagingMiddleware: Publish placement instruction
(assign mention to singleton clusterId) + MessagingMiddleware ->> ERE: Deliver placement instruction (async) + ERS -->> Originator: Provisional clusterId +else Validation failure or internal error + ERS -->> Originator: Error end -%% Phase 2 — Late / duplicate responses (after client request completed) -ERE ->> MessagingMiddleware: Publish candidate alignments
(late or duplicate) -MessagingMiddleware ->> ERS: Deliver results (async) -ERS ->> ERS: Evaluate applicability, integrate if applicable
(update resolution decision
and current assignment) -%% note right of ERS: No callback or push to Originator; completed request outcome is final. +%% Phase 2 — Late or duplicate resolution results (UC12) +ERE ->> MessagingMiddleware: Publish resolution result
(canonical clusterId + alternatives) +MessagingMiddleware ->> ERS: Deliver resolution result (async) +ERS ->> ERS: Process resolution result
(persist Resolution Decision and current assignment) -%% Phase 3 — Convergence (read-only observation) -Originator ->> ERS: Lookup canonical assignment
(request identifiers) -ERS -->> Originator: Current canonical identifier +%% Phase 3 — Bulk lookup by source (UC1.3) +Originator ->> ERS: Bulk lookup by sourceId
(since lastSeenTimestamp) +ERS ->> ERS: Return unseen updates and mark exposed
(by sourceId + Originator) +ERS -->> Originator: Resolution update set
(clusterId assignments + metadata) diff --git a/docs/architecture/sequence_diagrams/E2E-resolution-cycle.mmd b/docs/architecture/sequence_diagrams/E2E-resolution-cycle.mmd deleted file mode 100644 index 701ea5e..0000000 --- a/docs/architecture/sequence_diagrams/E2E-resolution-cycle.mmd +++ /dev/null @@ -1,59 +0,0 @@ ---- -config: - look: neo - theme: redux-color - mirrorActors: false - sequence: - bottomMarginAdj: 0.1 ---- -sequenceDiagram -%% Maps to EA diagram: UML Sequence (high-level overview) -%% Purpose: bounded synchronous client resolve, async ERS–ERE messaging, -%% and convergence via lookup; late/duplicate results are absorbed without client push. - -participant Originator as "Originator" - -box "Entity Resolution System (ERSys)" - participant ERSAPI as "ERS Resolve API" - participant CanonicalLookup as "Canonical Lookup Service" - participant ERS as "Entity Resolution Service (ERS)" -end - -participant MessagingMiddleware as "Messaging Middleware" - -box "Entity Resolution Engine (ERE)" - participant ERE as "Entity Resolution Engine (ERE)" -end - -%% --- Client-facing bounded resolve (waits for completion) --- -Originator ->> ERSAPI: Resolve Entity Mention
(request identifiers + EntityMention) -ERSAPI ->> ERS: Validate and register request
(idempotent) - -ERS ->> MessagingMiddleware: Publish resolution request
(request identifiers + EntityMention) -MessagingMiddleware ->> ERE: Deliver request (async) - -alt Resolution completes successfully within bounded execution window - ERE ->> ERE: Resolve EntityMention - ERE ->> MessagingMiddleware: Publish candidate alignments
(candidate links) - MessagingMiddleware ->> ERS: Deliver results (async) - ERS ->> ERS: Evaluate proposals under governance
(record Resolution Decision) - ERS ->> ERS: Update Canonical Entity Registry
(current governed assignment) - ERS -->> ERSAPI: Canonical identifier - ERSAPI -->> Originator: Canonical identifier -else Timeout or failure within bounded execution window - ERS -->> ERSAPI: Error (no canonical identifier) - ERSAPI -->> Originator: Error (no canonical identifier) -end - -%% --- Late / duplicate responses (after client request completed) --- -ERE ->> MessagingMiddleware: Publish candidate alignments
(late or duplicate) -MessagingMiddleware ->> ERS: Deliver results (async) -ERS ->> ERS: Evaluate applicability, integrate if applicable
(update resolution decision context
and current governed assignment) -%% note right of ERS: Integration of late/duplicate results does not retroactively
affect the completed client request; no callback or push to Originator. - -%% --- Convergence via lookup --- -Originator ->> CanonicalLookup: Lookup canonical assignment
(originatorId, originatorRequestId, entityType) -CanonicalLookup ->> ERS: Retrieve current governed assignment -ERS -->> CanonicalLookup: Current canonical identifier -CanonicalLookup -->> Originator: Current canonical identifier - diff --git a/docs/architecture/sequence_diagrams/_participants.mmd b/docs/architecture/sequence_diagrams/_participants.mmd index c075fb2..ce1de6f 100644 --- a/docs/architecture/sequence_diagrams/_participants.mmd +++ b/docs/architecture/sequence_diagrams/_participants.mmd @@ -41,7 +41,7 @@ box "Entity Resolution System (ERSys)" %% Authoritative state (include only if needed) participant RequestRegistry as "Request Registry" participant DecisionStore as "Resolution Decision Store" - participant CanonicalRegistry as "Canonical Entity Registry" + participant UserActionLog as "User Action Log" end %% Contract and transport (explicit) diff --git a/docs/architecture/sequence_diagrams/readme.md b/docs/architecture/sequence_diagrams/readme.md index b921507..f419a1b 100644 --- a/docs/architecture/sequence_diagrams/readme.md +++ b/docs/architecture/sequence_diagrams/readme.md @@ -1,82 +1,31 @@ # Architecture Sequence Diagrams -This folder contains Mermaid (`.mmd`) sequence diagrams used in, or referenced by, the ERSys architecture documentation and the ERS–ERE technical contract. +This folder contains Mermaid (`.mmd`) sequence diagrams referenced by the ERSys Architecture Document and the ERS–ERE Technical Contract. The diagrams express **normative behavioural spines** under the engine-authoritative clustering model. They focus on interaction order, responsibility transfer, contract boundaries, and externally observable guarantees. -The diagrams are organised to support **different levels of abstraction**: - -* **Simplified diagrams** are used directly in architecture and contract documents to communicate intent, roles, and guarantees without implementation noise. -* **Non-simplified (detailed) diagrams** provide additional context and should be consulted when deeper understanding is required, for example during implementation, review, or troubleshooting. They are not all reproduced verbatim in the architecture document but remain authoritative supporting artefacts. - -Only Mermaid source files are listed below. Generated images or archives are excluded. - ---- - -## End-to-end overview diagrams - -* `E2E-resolution-cycle.mmd` - Full end-to-end resolution cycle across ERS, ERE, and supporting components. - -* `E2E-resolution-cycle(simplified).mmd` - Simplified end-to-end view used in the architecture document for high-level explanation. +Only Mermaid source files are listed below. --- -## Shared participants +## Overview and contract -* `_participants.mmd` - Common participant definitions reused across multiple sequence diagrams to ensure naming consistency. +* [`E2E-resolution-cycle(simplified).mmd`](./E2E-resolution-cycle%28simplified%29.mmd) — High-level end-to-end resolution cycle across ERS and ERE. +* [`ers-ere-inreface.mmd`](./ers-ere-inreface.mmd) — Contract-level asynchronous interaction between ERS and ERE. +* [`_participants.mmd`](./_participants.mmd) — Shared participant definitions reused across diagrams. --- -## Spine A — Resolve Entity Mention - -* `spine-A-Resolve-EntityMention.mmd` - Detailed sequence for the primary entity resolution flow, including internal processing steps. - -* `spine-A-Resolve-EntityMention(simplified).mmd` - Contract- and architecture-level view of the resolve operation, used in documentation. - ---- - -## Spine B — ERS–ERE asynchronous exchange - -* `spine-B-ERS-ERE-async-exchange.mmd` - Detailed asynchronous interaction between ERS and ERE, including proposal delivery and integration. - -* `spine-B-ERS-ERE-async-exchange(simplified).mmd` - Simplified contract-level representation of the ERS–ERE exchange, focusing on obligations and semantics. - ---- - -## Spine C — Canonical lookup - -* `spine-C-Lookup.mmd` - Sequence describing lookup of the current governed canonical assignment. - ---- - -## Spine D — Curation loop - -* `spine-D-Curation-loop.mmd` - Detailed human-in-the-loop curation sequence, including governance effects. - -* `spine-D-Curation-loop(simplified).mmd` - Simplified curation loop used for architectural explanation. - ---- - -## Spine E — Rebuild - -* `spine-E-rebuild.mmd` - Detailed rebuild sequence illustrating possible internal behaviours during a rebuild. +## Behavioural spines -* `spine-E-rebuild(simplified).mmd` - High-level rebuild semantics as referenced in the contract and architecture documents. +* [`spine-A-Resolve-EntityMention(simplified).mmd`](./spine-A-Resolve-EntityMention%28simplified%29.mmd) — Resolve flow with dual time budgets and provisional lifecycle. +* [`spine-B-ERS-ERE-async-exchange(simplified).mmd`](./spine-B-ERS-ERE-async-exchange%28simplified%29.mmd) — Asynchronous exchange, idempotency, and latest-outcome semantics. +* [`spine-C-Lookup.mmd`](./spine-C-Lookup.mmd) — Read-only canonical lookup. +* [`spine-D-Curation-loop(simplified).mmd`](./spine-D-Curation-loop%28simplified%29.mmd) — Curator recommendation and authoritative re-evaluation. --- ## Notes -* Simplified diagrams are the **primary references** in normative architecture and contract documents. -* Detailed diagrams are **supporting artefacts** and may include illustrative or informative steps that are intentionally omitted from simplified views. -* All diagrams share a consistent vocabulary aligned with the ERSys architecture, ADRs, and glossary. +* Sequence diagrams are normative at the behavioural level. +* They describe interaction semantics, not structural decomposition. +* Vocabulary and guarantees align with the ERSys Architecture Document, ADR baseline, and Business Glossary. +* Where simplified views exist, they are the primary architectural reference. diff --git a/docs/architecture/sequence_diagrams/spine-A-Resolve-EntityMention(simplified).mmd b/docs/architecture/sequence_diagrams/spine-A-Resolve-EntityMention(simplified).mmd index c548440..49531c1 100644 --- a/docs/architecture/sequence_diagrams/spine-A-Resolve-EntityMention(simplified).mmd +++ b/docs/architecture/sequence_diagrams/spine-A-Resolve-EntityMention(simplified).mmd @@ -7,10 +7,10 @@ config: bottomMarginAdj: 0.1 --- sequenceDiagram -%% Maps to EA diagram: UML Sequence (Spine A – simplified) -%% Purpose: bounded synchronous resolve with idempotent request handling. -%% Shows: async publication to Messaging Middleware as part of the bounded resolve. -%% Excludes: detailed store choreography, ERE logic, candidate integration details, curation, and rebuild. +%% Maps to EA diagram: UML Sequence (Spine A) +%% Purpose: bounded resolve with idempotent request handling (UCB11) +%% Shows: async publication to Messaging Middleware and bounded response semantics +%% Excludes: store choreography details, engine internals, curation, rebuild participant Originator as "Originator" @@ -20,27 +20,33 @@ box "Entity Resolution System (ERSys)" end participant MessagingMiddleware as "Messaging Middleware" -%% participant ERE as "Entity Resolution Engine (ERE)" -Originator ->> ERIntake: Resolve Entity Mention
(sourceId, requestId, entityType, EntityMention) -ERIntake ->> ERS: Validate request
and enforce idempotency +Originator ->> ERIntake: Resolve Entity Mention
(sourceId, requestId, entityType,
EntityMention, context) +ERIntake ->> ERS: Validate and handle idempotency
(register request if new) alt Request rejected - ERS -->> ERIntake: Reject (validation, idempotency conflict,
dependency unavailable) - ERIntake -->> Originator: 4xx / 503 + ERS -->> ERIntake: Reject
(validation, idempotency conflict, dependency unavailable) + ERIntake -->> Originator: 4xx or 503 +else Request is an idempotent repeat + ERS -->> ERIntake: Previously returned identifier + ERIntake -->> Originator: 200 OK
(identifier) else Request accepted - ERS ->> MessagingMiddleware: Publish Resolution Request (async)
(sourceId, requestId, entityType, EntityMention) - %% MessagingMiddleware ->> ERE: Deliver request (async) + ERS ->> MessagingMiddleware: Publish resolution request (async)
(sourceId, requestId, entityType, EntityMention) - alt Resolution completes successfully within bounded execution window - MessagingMiddleware -->> ERS: Deliver results (async) - ERS ->> ERS: Record Resolution Decision
and update canonical projection - ERS -->> ERIntake: Canonical identifier - ERIntake -->> Originator: 200 OK
Canonical identifier - else Timeout or failure within bounded execution window - ERS -->> ERIntake: Error (no canonical identifier) - ERIntake -->> Originator: 5xx/timeout
Error (no canonical identifier) + alt ERE result received within ERS to ERE execution window + MessagingMiddleware -->> ERS: Deliver resolution result (async)
(canonical clusterId + top alternative clusterIds) + ERS ->> ERS: Persist resolution decision and update lookup projection + ERS -->> ERIntake: Canonical clusterId + ERIntake -->> Originator: 200 OK
Canonical clusterId + else ERE result not received within ERS to ERE execution window + ERS ->> ERS: Create provisional singleton and persist decision + ERS ->> MessagingMiddleware: Publish placement instruction (async)
(assign mention to singleton clusterId) + ERS -->> ERIntake: Provisional clusterId + ERIntake -->> Originator: 200 OK
Provisional clusterId + else Client timeout budget exceeded + ERS -->> ERIntake: Error + ERIntake -->> Originator: 5xx or timeout end end -note right of ERS: Publication initiates engine processing.
Late/duplicate results may be integrated later,
but do not retroactively change the completed client response. +%% note right of ERS: Late or duplicate engine results may update the stored decision and lookup projection\nCompleted client responses are not retroactively changed diff --git a/docs/architecture/sequence_diagrams/spine-A-Resolve-EntityMention.mmd b/docs/architecture/sequence_diagrams/spine-A-Resolve-EntityMention.mmd deleted file mode 100644 index ff44ea5..0000000 --- a/docs/architecture/sequence_diagrams/spine-A-Resolve-EntityMention.mmd +++ /dev/null @@ -1,62 +0,0 @@ ---- -config: - look: neo - theme: redux-color - mirrorActors: false - sequence: - bottomMarginAdj: 0.1 ---- -sequenceDiagram - %% Maps to EA diagram: UML Sequence (Spine A) - %% Purpose: bounded client resolve, idempotent request registration, synchronous completion semantics. - %% Shows: request publication to Messaging Middleware as part of the bounded resolve. - %% Excludes: detailed ERE logic, candidate ranking/pruning, curation, rebuild, and store choreography beyond Request Registry. - - participant Originator as "Originator" - - participant ERIntake - participant ERS - participant RequestRegistry - -box "Entity Resolution System (ERSys)" - participant ERIntake as "Entity Resolution Intake Service" - participant ERS as "Entity Resolution Service (ERS)" - participant RequestRegistry as "Request Registry" -end - -participant MessagingMiddleware as "Messaging Middleware" -participant ERE as "Entity Resolution Engine (ERE)" - -Originator ->> ERIntake: Resolve Entity Mention
(sourceId, requestId, entityType, EntityMention) -ERIntake ->> ERS: Validate request and enforce idempotency - -alt Invalid request (syntax, mandatory validation, unsupported type) - ERS -->> ERIntake: Reject request (validation error) - ERIntake -->> Originator: 400 Bad Request -else Valid request - ERS ->> RequestRegistry: Register request (idempotent upsert) - alt Idempotency conflict (same triad, different payload) - RequestRegistry -->> ERS: Conflict detected - ERS -->> ERIntake: Reject request (idempotency conflict) - ERIntake -->> Originator: 409 Conflict - else Idempotency preserved (new or replay) - RequestRegistry -->> ERS: Registered or confirmed - - ERS ->> MessagingMiddleware: Publish Resolution Request (async)
(sourceId, requestId, entityType, EntityMention) - MessagingMiddleware ->> ERE: Deliver request (async) - - alt Resolution completes successfully within bounded execution window - %% ERE -->> MessagingMiddleware: Publish candidate alignments
(candidate links) - MessagingMiddleware -->> ERS: Deliver results (async) - ERS ->> ERS: Evaluate proposals under governance
(record Resolution Decision) - ERS ->> ERS: Update Canonical Entity Registry
(current governed assignment) - ERS -->> ERIntake: Canonical identifier - ERIntake -->> Originator: 200 OK
Canonical identifier - else Timeout or failure within bounded execution window - ERS -->> ERIntake: Error (no canonical identifier) - ERIntake -->> Originator: 5xx/timeout
Error (no canonical identifier) - end - end -end - -note right of ERS: Publication to Messaging Middleware initiates engine processing.
Late/duplicate engine results may be integrated later,
but do not retroactively change the completed client response. \ No newline at end of file diff --git a/docs/architecture/sequence_diagrams/spine-B-ERS-ERE-async-exchange(simplified).mmd b/docs/architecture/sequence_diagrams/spine-B-ERS-ERE-async-exchange(simplified).mmd index 82659ed..2a5fb4d 100644 --- a/docs/architecture/sequence_diagrams/spine-B-ERS-ERE-async-exchange(simplified).mmd +++ b/docs/architecture/sequence_diagrams/spine-B-ERS-ERE-async-exchange(simplified).mmd @@ -7,42 +7,36 @@ config: bottomMarginAdj: 0.1 --- sequenceDiagram -%% Maps to EA diagram: UML Sequence (Spine B – simplified) -%% Purpose: ERS–ERE asynchronous proposal exchange and governance-first integration. -%% Focus: triad correlation, advisory engine outputs, and governed state update. -%% Excludes: client REST flows, detailed store choreography, scoring internals, -%% pruning mechanics, curation UI, and rebuild initiation. +%% Maps to EA diagram: UML Sequence (Spine B) +%% Purpose: async resolution outcome integration and UC12 processing +%% Covers: solicited outcomes (Spine A, Spine D) and unsolicited outcomes (engine initiated reclustering) +%% Focus: triad correlation, optional constraints, at least once delivery tolerance +%% Excludes: client REST flows, detailed store choreography, scoring internals, curation UI -box "Entity Resolution System (ERSys)" +%%box "Entity Resolution System (ERSys)" participant ERS as "Entity Resolution Service (ERS)" -end +%%end participant MessagingMiddleware as "Messaging Middleware" -box "Entity Resolution Engine (ERE)" +%%box "Entity Resolution Engine (ERE)" participant ERE as "Entity Resolution Engine (ERE)" -end +%%end -%% --- Request (triggered by Spine A or rebuild processing) --- -ERS ->> MessagingMiddleware: Publish Resolution Request (async)
(sourceId, requestId, entityType,
EntityMention, rejectionConstraints?) -MessagingMiddleware ->> ERE: Deliver Resolution Request (async) +opt ERS publishes a resolution request + ERS ->> MessagingMiddleware: Publish resolution request (async)
(sourceId, requestId, entityType, EntityMention,
optional rejectionConstraints, optional preferredPlacement) + MessagingMiddleware ->> ERE: Deliver resolution request (async) +end -%% --- Advisory result --- -ERE ->> MessagingMiddleware: Publish Resolution Result (async)
(sourceId, requestId, entityType,
candidate cluster references + engine metadata) -MessagingMiddleware ->> ERS: Deliver Resolution Result (async) +%% Resolution outcome +ERE ->> MessagingMiddleware: Publish resolution result (async)
(sourceId, requestId, entityType,
canonical clusterId, top alternative clusterIds) +MessagingMiddleware ->> ERS: Deliver resolution result (async) -%% --- Governance-first integration --- -alt Contract violation (missing triad / invalid schema) +%% UC12 processing +alt Contract violation (missing triad or invalid schema) ERS ->> ERS: Record contract violation and ignore else Acceptable delivery (including duplicates or late arrivals) - ERS ->> ERS: Correlate by triad and apply governance constraints
(preserve curator locks, enforce rejections) - ERS ->> ERS: Persist updated Resolution Decision
and update canonical projection + ERS ->> ERS: Correlate by triad and persist latest assignment
(update Resolution Decision and lookup projection) end -%% note right of ERS: -%% ERE outputs are advisory proposals only. -%% Messaging is at-least-once; ERS tolerates duplicates and lateness. -%% Integration may occur after a client resolve has completed: -%% - governance state is updated, -%% - no callbacks or notifications are triggered, -%% - completed client requests are not retroactively altered. +%% note right of ERS: Optional rejectionConstraints express negative evidence
Optional preferredPlacement expresses recommended cluster assignment
Outcomes may arrive without a preceding request due to engine initiated reclustering
Messaging is at least once and results may be late or duplicated
Completed client responses are not retroactively changed diff --git a/docs/architecture/sequence_diagrams/spine-B-ERS-ERE-async-exchange.mmd b/docs/architecture/sequence_diagrams/spine-B-ERS-ERE-async-exchange.mmd deleted file mode 100644 index c8d2d86..0000000 --- a/docs/architecture/sequence_diagrams/spine-B-ERS-ERE-async-exchange.mmd +++ /dev/null @@ -1,60 +0,0 @@ ---- -config: - look: neo - theme: redux-color - mirrorActors: false - sequence: - bottomMarginAdj: 0.1 ---- -sequenceDiagram -%% Maps to EA diagram: UML Sequence (Spine B) -%% Purpose: ERS–ERE asynchronous resolution exchange and governance-first integration. -%% Focus: triad correlation, advisory engine outputs, idempotent absorption, decision update + projection update. -%% Excludes: client REST intake/lookup, UI flows, rebuild initiation, and detailed persistence mechanics. - -box "Entity Resolution System (ERSys)" - participant ERS as "Entity Resolution Service (ERS)" - participant DecisionStore as "Resolution Decision Store" - participant CanonicalRegistry as "Canonical Entity Registry" -end - -participant MessagingMiddleware as "Messaging Middleware" - -box "Entity Resolution Engine (ERE)" - participant ERE as "Entity Resolution Engine (ERE)" -end - -%% --- Publish request (triggered by Spine A or by rebuild processing) --- -ERS ->> MessagingMiddleware: Publish Resolution Request (async)
(sourceId, requestId, entityType,
EntityMention, excludedClusterIds) -MessagingMiddleware ->> ERE: Deliver Resolution Request (async) - -%% --- Engine computes proposals (non-authoritative) --- -ERE ->> ERE: Compute candidate cluster references
+ confidence & similarity (optional) - -%% --- Publish result --- -ERE ->> MessagingMiddleware: Publish Resolution Result (async)
(sourceId, requestId, entityType,
candidateClusterIds + confidence scores) -MessagingMiddleware ->> ERS: Deliver Resolution Result (async) - -%% note right of ERS: -%% ERE outputs are proposals only. -%% Similarity metadata is non-normative. -%% Messaging is at-least-once: ERS tolerates duplicates and lateness. -%% Integration may occur after a client resolve has completed; such integration: -%% (1) updates governance state, (2) triggers no callbacks/notifications, -%% (3) does not retroactively alter the completed client request outcome. - -%% --- Integrate under decision-centric governance --- -ERS ->> ERS: Correlate by triad and validate contract envelope -ERS ->> ERS: Prune / normalise proposals (top N policy) -ERS ->> DecisionStore: Upsert Resolution Decision
(governance state & decision context) -DecisionStore -->> ERS: Decision recorded -ERS ->> CanonicalRegistry: Project the Resolution Decision as current governed assignment
(triad -> current canonical identifier) -CanonicalRegistry -->> ERS: Projection updated - -alt Duplicate delivery (same triad, equivalent result) - ERS ->> ERS: Idempotently absorb (no additional state change) -else Late delivery (triad superseded by newer governed state) - ERS ->> ERS: Absorb only if applicable under locks and invariants -else Contract violation (invalid schema or missing triad) - ERS ->> ERS: Record as contract violation and ignore -end diff --git a/docs/architecture/sequence_diagrams/spine-C-Lookup.mmd b/docs/architecture/sequence_diagrams/spine-C-Lookup.mmd index a398c41..696e167 100644 --- a/docs/architecture/sequence_diagrams/spine-C-Lookup.mmd +++ b/docs/architecture/sequence_diagrams/spine-C-Lookup.mmd @@ -8,28 +8,35 @@ config: --- sequenceDiagram %% Maps to EA diagram: UML Sequence (Spine C) -%% Purpose: read-only lookup of the current governed canonical assignment. -%% Focus: triad correlation, projection semantics, bounded response, no side effects. -%% Excludes: resolution triggering, ERE/messaging, curation, rebuild, and detailed persistence mechanics. +%% Purpose: Bulk Refresh by sourceId (UC1.3) +%% Focus: bounded delta response with continuationCursor, no resolution triggering +%% Excludes: ERE and messaging, curation, rebuild, consumer System of Records persistence participant DownstreamConsumer as "Downstream Consumer" box "Entity Resolution System (ERSys)" participant CanonicalLookup as "Canonical Lookup Service" participant ERS as "Entity Resolution Service (ERS)" - participant CanonicalRegistry as "Canonical Entity Registry" + participant DecisionStore as "Resolution Decision Store" end -DownstreamConsumer ->> CanonicalLookup: Lookup canonical assignment
(sourceId, requestId, entityType) -CanonicalLookup ->> ERS: Validate request and resolve lookup -ERS ->> CanonicalRegistry: Read current governed assignment
(sourceId, requestId, entityType) -CanonicalRegistry -->> ERS: Current canonical identifier
+ outcome status -ERS -->> CanonicalLookup: Current canonical identifier
+ outcome status (bounded) -CanonicalLookup -->> DownstreamConsumer: 200 OK
Current canonical identifier +%% Consumer obtains lastSeenTimestamp from its own System of Records (not shown) +DownstreamConsumer ->> CanonicalLookup: Bulk refresh by sourceId
(sourceId, lastSeenTimestamp, limit?) +CanonicalLookup ->> ERS: Validate request and resolve bulk refresh -alt Unknown triad (no registered request) - ERS -->> CanonicalLookup: 404 Not Found (unknown request) - CanonicalLookup -->> DownstreamConsumer: 404 Not Found +ERS ->> DecisionStore: Read changed assignments
(sourceId, lastSeenTimestamp, effectiveLimit) +DecisionStore -->> ERS: Bulk refresh slice
(updates, hasMore, continuationCursor?) + +ERS -->> CanonicalLookup: Bulk refresh slice
(updates, hasMore, continuationCursor?) +CanonicalLookup -->> DownstreamConsumer: 200 OK
Bulk refresh slice + continuation + +alt hasMore is true + DownstreamConsumer ->> CanonicalLookup: Continue bulk refresh
(continuationCursor) + CanonicalLookup ->> ERS: Resolve continuation + ERS ->> DecisionStore: Read next slice
(continuationCursor) + DecisionStore -->> ERS: Bulk refresh slice
(updates, hasMore, continuationCursor?) + ERS -->> CanonicalLookup: Bulk refresh slice
(updates, hasMore, continuationCursor?) + CanonicalLookup -->> DownstreamConsumer: 200 OK
Next bulk refresh slice end -%%note right of CanonicalLookup: Lookup is strictly read-only and does not trigger (re-)resolution.
The returned canonical identifier is stable.
The assignment may evolve over time due to asynchronous processing, curation, or rebuild. +%%note right of CanonicalLookup: Bulk refresh is read only and does not trigger resolution\nContinuationCursor is server minted to keep responses bounded\nAssignments may evolve over time and clients should tolerate duplicates diff --git a/docs/architecture/sequence_diagrams/spine-D-Curation-loop(simplified).mmd b/docs/architecture/sequence_diagrams/spine-D-Curation-loop(simplified).mmd index 85b8727..1ba6a57 100644 --- a/docs/architecture/sequence_diagrams/spine-D-Curation-loop(simplified).mmd +++ b/docs/architecture/sequence_diagrams/spine-D-Curation-loop(simplified).mmd @@ -7,30 +7,40 @@ config: bottomMarginAdj: 0.1 --- sequenceDiagram -%% Maps to EA diagram: UML Sequence (Spine D – simplified, governance-complete) -%% Purpose: curator governance action results in updated governed decision and updated canonical projection. -%% Excludes: store-level choreography, UI browsing, ERE/messaging. +%% Maps to EA diagram: UML Sequence (Spine D) +%% Purpose: curator submits a user action (acceptTop, acceptAlt, rejectAll) that is logged and forwarded to ERE +%% Focus: user action log, async re-resolve, later UI refresh when results arrive +%% Excludes: decision browsing, detailed store choreography, scoring internals -participant Curator as "Curator" +actor Curator as "Curator" box "Entity Resolution System (ERSys)" participant LinkCurationSvc as "Link Curation Service" participant ERS as "Entity Resolution Service (ERS)" + participant UserActionLog as "User Action Log" end -Curator ->> LinkCurationSvc: Submit curation action
(sourceId, requestId, entityType,
action, targetCanonicalIdentifier?) -LinkCurationSvc ->> ERS: Apply curation action
(governance transition) +participant MessagingMiddleware as "Messaging Middleware" + +box "Entity Resolution Engine (ERE)" + participant ERE as "Entity Resolution Engine (ERE)" +end + +Curator ->> LinkCurationSvc: Submit user action
(sourceId, requestId, entityType,
acceptTop or acceptAlt or rejectAll,
targetClusterId?) +LinkCurationSvc ->> ERS: Validate triad and action alt Request rejected - ERS -->> LinkCurationSvc: Reject (validation | unknown triad | conflict) + ERS -->> LinkCurationSvc: Reject
(validation, unknown triad, conflict) LinkCurationSvc -->> Curator: 4xx -else Curation applied - ERS ->> ERS: Persist updated governed decision
and update canonical projection
(single consistent update) - ERS -->> LinkCurationSvc: 200 OK (curation applied) - LinkCurationSvc -->> Curator: 200 OK (curation applied) +else Request accepted + ERS ->> UserActionLog: Append user action record + ERS ->> MessagingMiddleware: Publish re-resolve request (async)
(sourceId, requestId, entityType,
optional rejectionConstraints,
optional preferredPlacement) + LinkCurationSvc -->> Curator: 202 Accepted
(action recorded) + + MessagingMiddleware ->> ERE: Deliver re-resolve request (async) + ERE ->> MessagingMiddleware: Publish resolution result (async)
(sourceId, requestId, entityType,
canonical clusterId, top alternative clusterIds) + MessagingMiddleware ->> ERS: Deliver resolution result (async) + ERS ->> ERS: Process resolution result
(persist Resolution Decision and update lookup projection) end -%% note right of ERS -%% Curator actions are authoritative locks and override automation. -%% The canonical assignment exposed by lookup reflects the current governed decision. -%% end +%% note right of LinkCurationSvc: The UI may refresh by polling decision preview or bulk refresh
until it observes the updated cluster assignment diff --git a/docs/architecture/sequence_diagrams/spine-D-Curation-loop.mmd b/docs/architecture/sequence_diagrams/spine-D-Curation-loop.mmd deleted file mode 100644 index 458c5b8..0000000 --- a/docs/architecture/sequence_diagrams/spine-D-Curation-loop.mmd +++ /dev/null @@ -1,46 +0,0 @@ ---- -config: - look: neo - theme: redux-color - mirrorActors: false - sequence: - bottomMarginAdj: 0.1 ---- -sequenceDiagram -%% Maps to EA diagram: UML Sequence (Spine D) -%% Purpose: curator governance loop over Resolution Decisions and canonical projection. -%% Focus: decision-centric curation, curator authority/lock semantics, projection update. -%% Excludes: ERE/messaging, candidate computation, rebuild, UI browsing flows, and internal read choreography. - -participant Curator as "Curator" - -box "Entity Resolution System (ERSys)" - participant LinkCurationSvc as "Link Curation Service" - participant ERS as "Entity Resolution Service (ERS)" - participant DecisionStore as "Resolution Decision Store" - participant CanonicalRegistry as "Canonical Entity Registry" -end - -Curator ->> LinkCurationSvc: Submit curation action
(sourceId, requestId, entityType,
action: accept | reject | reassign,
targetCanonicalIdentifier?) -LinkCurationSvc ->> ERS: Apply curation action under governance rules - -alt Invalid request (missing triad, unsupported action, invalid target) - ERS -->> LinkCurationSvc: Reject (validation error) - LinkCurationSvc -->> Curator: 400 Bad Request -else Unknown decision (no current Resolution Decision) - ERS -->> LinkCurationSvc: Reject (unknown decision) - LinkCurationSvc -->> Curator: 404 Not Found -else Concurrent modification detected - ERS -->> LinkCurationSvc: Reject (conflict) - LinkCurationSvc -->> Curator: 409 Conflict -else Curation applied - ERS ->> DecisionStore: Upsert Resolution Decision
(governance state + curator lock, confidence) - DecisionStore -->> ERS: Decision recorded - ERS ->> CanonicalRegistry: Project current governed assignment
(triad -> current canonical identifier) - CanonicalRegistry -->> ERS: Projection updated - ERS -->> LinkCurationSvc: 200 OK (curation applied) - LinkCurationSvc -->> Curator: 200 OK (curation applied) -end - -%% note right of ERS: Curator actions are authoritative locks and override automation.
Canonical Registry is a projection of the current governed Resolution Decision.
Bulk curation uses the same per-decision governance transition, repeated per triad.
- diff --git a/docs/architecture/sequence_diagrams/spine-E-rebuild(simplified).mmd b/docs/architecture/sequence_diagrams/spine-E-rebuild(simplified).mmd deleted file mode 100644 index 23b3a23..0000000 --- a/docs/architecture/sequence_diagrams/spine-E-rebuild(simplified).mmd +++ /dev/null @@ -1,51 +0,0 @@ ---- -config: - look: neo - theme: redux-color - mirrorActors: false - sequence: - bottomMarginAdj: 0.1 ---- -sequenceDiagram -%% Maps to EA diagram: UML Sequence (Spine E – simplified) -%% Purpose: system-level rebuild + governed replay. - -participant SystemAdministrator as "System Administrator" - -box "Entity Resolution System (ERSys)" - participant RebuildSvc as "Rebuild Service" - participant ERS as "Entity Resolution Service (ERS)" -end - -participant MessagingMiddleware as "Messaging Middleware" - -box "Entity Resolution Engine (ERE)" - participant ERE as "Entity Resolution Engine (ERE)" -end - -SystemAdministrator ->> RebuildSvc: Initiate rebuild (entityType) -RebuildSvc ->> ERS: Start rebuild (authorised) - -alt Rebuild rejected - ERS -->> RebuildSvc: Reject - RebuildSvc -->> SystemAdministrator: 4xx -else Rebuild accepted - ERS -->> RebuildSvc: 202 Accepted - RebuildSvc -->> SystemAdministrator: 202 Accepted -end - -ERS ->> MessagingMiddleware: Full Rebuild Request (entityType) -MessagingMiddleware ->> ERE: Full Rebuild Request -ERE ->> MessagingMiddleware: Rebuild ACK (rebuild completed) -MessagingMiddleware ->> ERS: Rebuild ACK - -%% note right of ERS -%% ERS replays Entity Mentions for the entityType using -%% the normal asynchronous resolution contract. -%% Integration preserves curator locks and identity invariants. -%% end - -ERS ->> MessagingMiddleware: Resolution requests (replay) -MessagingMiddleware ->> ERE: Deliver requests -ERE ->> MessagingMiddleware: Resolution results -MessagingMiddleware ->> ERS: Deliver results diff --git a/docs/architecture/sequence_diagrams/spine-E-rebuild.mmd b/docs/architecture/sequence_diagrams/spine-E-rebuild.mmd deleted file mode 100644 index f1798d8..0000000 --- a/docs/architecture/sequence_diagrams/spine-E-rebuild.mmd +++ /dev/null @@ -1,83 +0,0 @@ ---- -config: - look: neo - theme: redux-color - mirrorActors: false - sequence: - bottomMarginAdj: 0.1 ---- -sequenceDiagram -%% Maps to EA diagram: UML Sequence (Spine E) -%% Purpose: administrator-triggered rebuild for one entityType, using contract-faithful ERE reset + ERS-driven replay. -%% Focus: reset acknowledgement, triad-correlated replay (one-by-one), reuse of Spine B integration, lock preservation. -%% Excludes: progress APIs, batching/chunking, engine internals, and store-level replay mechanics. - -participant SystemAdministrator as "System Administrator" - -box "Entity Resolution System (ERSys)" - participant RebuildSvc as "Rebuild Service" - participant ERS as "Entity Resolution Service (ERS)" - participant DecisionStore as "Resolution Decision Store" - participant CanonicalRegistry as "Canonical Entity Registry" -end - -participant MessagingMiddleware as "Messaging Middleware" - -box "Entity Resolution Engine (ERE)" - participant ERE as "Entity Resolution Engine (ERE)" -end - -%% --- Initiate rebuild (scoped, explicit) --- -SystemAdministrator ->> RebuildSvc: Initiate rebuild
(entityType, rebuildMode) -RebuildSvc ->> ERS: Authorise and start rebuild
(entityType, rebuildMode) - -alt Rebuild not permitted (unauthorised or invalid entityType) - ERS -->> RebuildSvc: Reject (validation / authorisation error) - RebuildSvc -->> SystemAdministrator: 400 / 403 -else Rebuild already active - ERS -->> RebuildSvc: Reject (rebuild conflict) - RebuildSvc -->> SystemAdministrator: 409 Conflict -else Rebuild started - ERS ->> DecisionStore: Record rebuild session start
(entityType, rebuildMode) - DecisionStore -->> ERS: Rebuild session recorded - ERS -->> RebuildSvc: 202 Accepted (rebuild started) - RebuildSvc -->> SystemAdministrator: 202 Accepted (rebuild started) -end - -%% --- Reset ERE (contract-level) --- -ERS ->> MessagingMiddleware: Publish Full Rebuild Request (async)
(entityType) -MessagingMiddleware ->> ERE: Deliver Full Rebuild Request (async) - -ERE ->> ERE: Reset internal state for entityType &
rebuild teh clusters
(engine-internal) -ERE ->> MessagingMiddleware: Publish Full Rebuild Response (async)
(entityType, status = ready) -MessagingMiddleware ->> ERS: Deliver Full Rebuild Response (async) - -%% note right of ERS -%% Rebuild is scoped to one entityType. -%% Canonical identifier derivation rules do not change. -%% Curator locks (confidence = +1 or −1) SHALL NOT be overridden by rebuild integration. -%% end - -%% --- Replay resolution requests (ERS-driven, one-by-one) --- -loop For each known EntityMention of the entityType (ERS drives replay) - ERS ->> MessagingMiddleware: Publish a Resolution Request (async)
(sourceId, requestId, entityType,
EntityMention, rejectionConstraints?) - MessagingMiddleware ->> ERE: Deliver Resolution Request (async) - - ERE ->> ERE: Identify candidate clusters and confidence scores
(engine-internal) - ERE ->> MessagingMiddleware: Publish Resolution Result (async)
(sourceId, requestId, entityType,
candidateClusterIds + confidence scores) - MessagingMiddleware ->> ERS: Deliver Resolution Result (async) - - %% --- Integrate under decision-centric governance (same semantics as Spine B) --- - ERS ->> ERS: Correlate by triad and validate envelope - ERS ->> ERS: Apply governance constraints
(preserve locks, enforce rejections) - ERS ->> DecisionStore: Upsert Resolution Decision
(rebuild-integrated governance state) - DecisionStore -->> ERS: Decision recorded - ERS ->> CanonicalRegistry: Project current governed assignment
(triad -> current canonical identifier) - CanonicalRegistry -->> ERS: Projection updated -end - -%% note right of ERS -%% Replay pacing and "ready" signalling are implementation decisions. -%% CompletionStatus reflects rebuild policy, including accepted partial completion. -%% Lookup (Spine C) remains the convergence mechanism during and after rebuild. -%% end diff --git a/docs/schema/AuditAction.md b/docs/schema/AuditAction.md new file mode 100644 index 0000000..29738a2 --- /dev/null +++ b/docs/schema/AuditAction.md @@ -0,0 +1,64 @@ +# Enum: AuditAction + + + + +_Actions recorded in the audit log_ + + + +URI: [ere:AuditAction](https://data.europa.eu/ers/schema/ere/AuditAction) + +## Permissible Values + +| Value | Meaning | Description | +| --- | --- | --- | +| ACCEPT | None | Accept action performed | +| REJECT | None | Reject action performed | +| ASSIGN | None | Assign to alternative cluster action performed | + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + + + +## LinkML Source + +
+```yaml +name: AuditAction +description: Actions recorded in the audit log +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +permissible_values: + ACCEPT: + text: ACCEPT + description: Accept action performed + REJECT: + text: REJECT + description: Reject action performed + ASSIGN: + text: ASSIGN + description: Assign to alternative cluster action performed + +``` +
\ No newline at end of file diff --git a/docs/schema/AuditLog.md b/docs/schema/AuditLog.md new file mode 100644 index 0000000..4728d24 --- /dev/null +++ b/docs/schema/AuditLog.md @@ -0,0 +1,261 @@ + + +# Class: AuditLog + + +_Audit trail entry for curation actions_ + + + + + +URI: [ere:AuditLog](https://data.europa.eu/ers/schema/ere/AuditLog) + + + + + +```mermaid + classDiagram + class AuditLog + click AuditLog href "../AuditLog/" + AuditLog : action + + + + + + AuditLog --> "1" AuditAction : action + click AuditAction href "../AuditAction/" + + + + AuditLog : actor + + AuditLog : changes + + AuditLog : created_at + + AuditLog : id + + AuditLog : instance_id + + AuditLog : instance_type + + +``` + + + + + + + +## Slots + +| Name | Cardinality and Range | Description | Inheritance | +| --- | --- | --- | --- | +| [id](id.md) | 1
[String](String.md) | Unique identifier for the audit entry | direct | +| [actor](actor.md) | 1
[String](String.md) | User identifier who performed the action | direct | +| [action](action.md) | 1
[AuditAction](AuditAction.md) | The action performed | direct | +| [instance_type](instance_type.md) | 1
[String](String.md) | Type of entity being modified (e | direct | +| [instance_id](instance_id.md) | 1
[String](String.md) | Identifier of the modified entity | direct | +| [changes](changes.md) | 0..1
[String](String.md) | JSON representation of action-specific context | direct | +| [created_at](created_at.md) | 1
[Datetime](Datetime.md) | Timestamp when the action was performed | direct | + + + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:AuditLog | +| native | ere:AuditLog | + + + + + + +## LinkML Source + + + +### Direct + +
+```yaml +name: AuditLog +description: Audit trail entry for curation actions +from_schema: https://data.europa.eu/ers/schema/ere +attributes: + id: + name: id + description: Unique identifier for the audit entry + from_schema: https://data.europa.eu/ers/schema/ers + domain_of: + - Decision + - AuditLog + required: true + actor: + name: actor + description: User identifier who performed the action + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - AuditLog + required: true + action: + name: action + description: The action performed + from_schema: https://data.europa.eu/ers/schema/ers + domain_of: + - Decision + - AuditLog + range: AuditAction + required: true + instance_type: + name: instance_type + description: Type of entity being modified (e.g., Decision) + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - AuditLog + required: true + instance_id: + name: instance_id + description: Identifier of the modified entity + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - AuditLog + required: true + changes: + name: changes + description: JSON representation of action-specific context + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - AuditLog + created_at: + name: created_at + description: Timestamp when the action was performed + from_schema: https://data.europa.eu/ers/schema/ers + domain_of: + - Decision + - AuditLog + range: datetime + required: true + +``` +
+ +### Induced + +
+```yaml +name: AuditLog +description: Audit trail entry for curation actions +from_schema: https://data.europa.eu/ers/schema/ere +attributes: + id: + name: id + description: Unique identifier for the audit entry + from_schema: https://data.europa.eu/ers/schema/ers + alias: id + owner: AuditLog + domain_of: + - Decision + - AuditLog + range: string + required: true + actor: + name: actor + description: User identifier who performed the action + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: actor + owner: AuditLog + domain_of: + - AuditLog + range: string + required: true + action: + name: action + description: The action performed + from_schema: https://data.europa.eu/ers/schema/ers + alias: action + owner: AuditLog + domain_of: + - Decision + - AuditLog + range: AuditAction + required: true + instance_type: + name: instance_type + description: Type of entity being modified (e.g., Decision) + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: instance_type + owner: AuditLog + domain_of: + - AuditLog + range: string + required: true + instance_id: + name: instance_id + description: Identifier of the modified entity + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: instance_id + owner: AuditLog + domain_of: + - AuditLog + range: string + required: true + changes: + name: changes + description: JSON representation of action-specific context + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: changes + owner: AuditLog + domain_of: + - AuditLog + range: string + created_at: + name: created_at + description: Timestamp when the action was performed + from_schema: https://data.europa.eu/ers/schema/ers + alias: created_at + owner: AuditLog + domain_of: + - Decision + - AuditLog + range: datetime + required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/Boolean.md b/docs/schema/Boolean.md new file mode 100644 index 0000000..e604a1d --- /dev/null +++ b/docs/schema/Boolean.md @@ -0,0 +1,47 @@ +# Type: Boolean + + + + +_A binary (true or false) value_ + + + +URI: [xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) + +* [base](https://w3id.org/linkml/base): Bool + +* [uri](https://w3id.org/linkml/uri): xsd:boolean + +* [repr](https://w3id.org/linkml/repr): bool + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:boolean | +| native | ere:boolean | +| exact | schema:Boolean | + + diff --git a/docs/schema/CanonicalEntityIdentifier.md b/docs/schema/CanonicalEntityIdentifier.md new file mode 100644 index 0000000..4425065 --- /dev/null +++ b/docs/schema/CanonicalEntityIdentifier.md @@ -0,0 +1,166 @@ + + +# Class: CanonicalEntityIdentifier + + +_A logical identity construct providing a stable identity anchor._ + +_Represents a cluster of equivalent entity mentions._ + +__ + + + + + +URI: [ere:CanonicalEntityIdentifier](https://data.europa.eu/ers/schema/ere/CanonicalEntityIdentifier) + + + + + +```mermaid + classDiagram + class CanonicalEntityIdentifier + click CanonicalEntityIdentifier href "../CanonicalEntityIdentifier/" + CanonicalEntityIdentifier : equivalent_to + + + + + + CanonicalEntityIdentifier --> "1..*" EntityMentionIdentifier : equivalent_to + click EntityMentionIdentifier href "../EntityMentionIdentifier/" + + + + CanonicalEntityIdentifier : identifier + + +``` + + + + + + + +## Slots + +| Name | Cardinality and Range | Description | Inheritance | +| --- | --- | --- | --- | +| [identifier](identifier.md) | 1
[String](String.md) | Unique identifier for the canonical entity | direct | +| [equivalent_to](equivalent_to.md) | 1..*
[EntityMentionIdentifier](EntityMentionIdentifier.md) | Entity mentions that have been resolved to this canonical entity | direct | + + + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:CanonicalEntityIdentifier | +| native | ere:CanonicalEntityIdentifier | + + + + + + +## LinkML Source + + + +### Direct + +
+```yaml +name: CanonicalEntityIdentifier +description: 'A logical identity construct providing a stable identity anchor. + + Represents a cluster of equivalent entity mentions. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +attributes: + identifier: + name: identifier + description: Unique identifier for the canonical entity. + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - CanonicalEntityIdentifier + required: true + equivalent_to: + name: equivalent_to + description: Entity mentions that have been resolved to this canonical entity. + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - CanonicalEntityIdentifier + range: EntityMentionIdentifier + required: true + multivalued: true + +``` +
+ +### Induced + +
+```yaml +name: CanonicalEntityIdentifier +description: 'A logical identity construct providing a stable identity anchor. + + Represents a cluster of equivalent entity mentions. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +attributes: + identifier: + name: identifier + description: Unique identifier for the canonical entity. + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: identifier + owner: CanonicalEntityIdentifier + domain_of: + - CanonicalEntityIdentifier + range: string + required: true + equivalent_to: + name: equivalent_to + description: Entity mentions that have been resolved to this canonical entity. + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: equivalent_to + owner: CanonicalEntityIdentifier + domain_of: + - CanonicalEntityIdentifier + range: EntityMentionIdentifier + required: true + multivalued: true + +``` +
\ No newline at end of file diff --git a/docs/schema/ClusterReference.md b/docs/schema/ClusterReference.md index c356e62..5503e83 100644 --- a/docs/schema/ClusterReference.md +++ b/docs/schema/ClusterReference.md @@ -3,7 +3,7 @@ # Class: ClusterReference -_A reference to a cluster to which an entity is deemed to belong, with an associated confidence score._ +_A reference to a cluster to which an entity is deemed to belong, with an associated confidence and similarity scores._ __ @@ -33,9 +33,11 @@ URI: [ere:ClusterReference](https://data.europa.eu/ers/schema/ere/ClusterReferen classDiagram class ClusterReference click ClusterReference href "../ClusterReference/" - ClusterReference : clusterId + ClusterReference : cluster_id - ClusterReference : confidenceScore + ClusterReference : confidence_score + + ClusterReference : similarity_score ``` @@ -50,8 +52,9 @@ URI: [ere:ClusterReference](https://data.europa.eu/ers/schema/ere/ClusterReferen | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | -| [clusterId](clusterId.md) | 1
[String](String.md) | The identifier of the cluster/canonical entity that is considered equivalent ... | direct | -| [confidenceScore](confidenceScore.md) | 1
[Float](Float.md) | A 0-1 value of how confident the ERE is about the equivalence between the sub... | direct | +| [cluster_id](cluster_id.md) | 1
[String](String.md) | The identifier of the cluster/canonical entity that is considered equivalent ... | direct | +| [confidence_score](confidence_score.md) | 1
[Float](Float.md) | A 0-1 value of how confident the ERE is about the equivalence between the sub... | direct | +| [similarity_score](similarity_score.md) | 1
[Float](Float.md) | A 0-1 score representing the pairwise comparison between a mention and a clus... | direct | @@ -62,6 +65,10 @@ URI: [ere:ClusterReference](https://data.europa.eu/ers/schema/ere/ClusterReferen | used by | used in | type | used | | --- | --- | --- | --- | | [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | [candidates](candidates.md) | range | [ClusterReference](ClusterReference.md) | +| [Decision](Decision.md) | [current_placement](current_placement.md) | range | [ClusterReference](ClusterReference.md) | +| [Decision](Decision.md) | [candidates](candidates.md) | range | [ClusterReference](ClusterReference.md) | +| [UserAction](UserAction.md) | [candidates](candidates.md) | range | [ClusterReference](ClusterReference.md) | +| [UserAction](UserAction.md) | [selected_cluster](selected_cluster.md) | range | [ClusterReference](ClusterReference.md) | @@ -106,34 +113,50 @@ URI: [ere:ClusterReference](https://data.europa.eu/ers/schema/ere/ClusterReferen ```yaml name: ClusterReference description: "A reference to a cluster to which an entity is deemed to belong, with\ - \ an associated confidence score.\n\nA cluster is a set of entity mentions that\ - \ have been determined to refer to the same real-world entity.\nEach cluster has\ - \ a unique clusterId.\n\nA cluster reference is used to report the association between\ - \ an entity mention and a cluster \nof equivalence.\n" + \ an associated confidence and similarity scores.\n\nA cluster is a set of entity\ + \ mentions that have been determined to refer to the same real-world entity.\nEach\ + \ cluster has a unique clusterId.\n\nA cluster reference is used to report the association\ + \ between an entity mention and a cluster \nof equivalence.\n" from_schema: https://data.europa.eu/ers/schema/ere attributes: - clusterId: - name: clusterId + cluster_id: + name: cluster_id description: 'The identifier of the cluster/canonical entity that is considered equivalent to the subject entity mention that an `EntityMentionResolutionResponse` refers to. ' - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 domain_of: - ClusterReference required: true - confidenceScore: - name: confidenceScore + confidence_score: + name: confidence_score description: 'A 0-1 value of how confident the ERE is about the equivalence between the subject entity mention and the target canonical entity. ' - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - ClusterReference + range: float + required: true + minimum_value: 0.0 + maximum_value: 1.0 + similarity_score: + name: similarity_score + description: 'A 0-1 score representing the pairwise comparison between a mention + and a cluster (likely + + based on a representative representation). + + ' + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 domain_of: - ClusterReference @@ -151,39 +174,57 @@ attributes: ```yaml name: ClusterReference description: "A reference to a cluster to which an entity is deemed to belong, with\ - \ an associated confidence score.\n\nA cluster is a set of entity mentions that\ - \ have been determined to refer to the same real-world entity.\nEach cluster has\ - \ a unique clusterId.\n\nA cluster reference is used to report the association between\ - \ an entity mention and a cluster \nof equivalence.\n" + \ an associated confidence and similarity scores.\n\nA cluster is a set of entity\ + \ mentions that have been determined to refer to the same real-world entity.\nEach\ + \ cluster has a unique clusterId.\n\nA cluster reference is used to report the association\ + \ between an entity mention and a cluster \nof equivalence.\n" from_schema: https://data.europa.eu/ers/schema/ere attributes: - clusterId: - name: clusterId + cluster_id: + name: cluster_id description: 'The identifier of the cluster/canonical entity that is considered equivalent to the subject entity mention that an `EntityMentionResolutionResponse` refers to. ' - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 - alias: clusterId + alias: cluster_id owner: ClusterReference domain_of: - ClusterReference range: string required: true - confidenceScore: - name: confidenceScore + confidence_score: + name: confidence_score description: 'A 0-1 value of how confident the ERE is about the equivalence between the subject entity mention and the target canonical entity. ' - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: confidence_score + owner: ClusterReference + domain_of: + - ClusterReference + range: float + required: true + minimum_value: 0.0 + maximum_value: 1.0 + similarity_score: + name: similarity_score + description: 'A 0-1 score representing the pairwise comparison between a mention + and a cluster (likely + + based on a representative representation). + + ' + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 - alias: confidenceScore + alias: similarity_score owner: ClusterReference domain_of: - ClusterReference diff --git a/docs/schema/Curie.md b/docs/schema/Curie.md new file mode 100644 index 0000000..30083c2 --- /dev/null +++ b/docs/schema/Curie.md @@ -0,0 +1,51 @@ +# Type: Curie + + + + +_a compact URI_ + + + +URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) + +* [base](https://w3id.org/linkml/base): Curie + +* [uri](https://w3id.org/linkml/uri): xsd:string + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Comments + +* in RDF serializations this MUST be expanded to a URI +* in non-RDF serializations MAY be serialized as the compact representation + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:string | +| native | ere:curie | + + diff --git a/docs/schema/Date.md b/docs/schema/Date.md new file mode 100644 index 0000000..3689fb7 --- /dev/null +++ b/docs/schema/Date.md @@ -0,0 +1,47 @@ +# Type: Date + + + + +_a date (year, month and day) in an idealized calendar_ + + + +URI: [xsd:date](http://www.w3.org/2001/XMLSchema#date) + +* [base](https://w3id.org/linkml/base): XSDDate + +* [uri](https://w3id.org/linkml/uri): xsd:date + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:date | +| native | ere:date | +| exact | schema:Date | + + diff --git a/docs/schema/DateOrDatetime.md b/docs/schema/DateOrDatetime.md new file mode 100644 index 0000000..4141a36 --- /dev/null +++ b/docs/schema/DateOrDatetime.md @@ -0,0 +1,46 @@ +# Type: DateOrDatetime + + + + +_Either a date or a datetime_ + + + +URI: [linkml:DateOrDatetime](https://w3id.org/linkml/DateOrDatetime) + +* [base](https://w3id.org/linkml/base): str + +* [uri](https://w3id.org/linkml/uri): linkml:DateOrDatetime + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | linkml:DateOrDatetime | +| native | ere:date_or_datetime | + + diff --git a/docs/schema/Datetime.md b/docs/schema/Datetime.md new file mode 100644 index 0000000..702f8a4 --- /dev/null +++ b/docs/schema/Datetime.md @@ -0,0 +1,47 @@ +# Type: Datetime + + + + +_The combination of a date and time_ + + + +URI: [xsd:dateTime](http://www.w3.org/2001/XMLSchema#dateTime) + +* [base](https://w3id.org/linkml/base): XSDDateTime + +* [uri](https://w3id.org/linkml/uri): xsd:dateTime + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:dateTime | +| native | ere:datetime | +| exact | schema:DateTime | + + diff --git a/docs/schema/Decimal.md b/docs/schema/Decimal.md new file mode 100644 index 0000000..0a1a69f --- /dev/null +++ b/docs/schema/Decimal.md @@ -0,0 +1,46 @@ +# Type: Decimal + + + + +_A real number with arbitrary precision that conforms to the xsd:decimal specification_ + + + +URI: [xsd:decimal](http://www.w3.org/2001/XMLSchema#decimal) + +* [base](https://w3id.org/linkml/base): Decimal + +* [uri](https://w3id.org/linkml/uri): xsd:decimal + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:decimal | +| native | ere:decimal | +| broad | schema:Number | + + diff --git a/docs/schema/Decision.md b/docs/schema/Decision.md new file mode 100644 index 0000000..58f94cb --- /dev/null +++ b/docs/schema/Decision.md @@ -0,0 +1,290 @@ + + +# Class: Decision + + +_Canonical placement of an entity mention to a cluster._ + +_Represents the latest resolution decision (from ERE or curator override)._ + +__ + + + + + +URI: [ere:Decision](https://data.europa.eu/ers/schema/ere/Decision) + + + + + +```mermaid + classDiagram + class Decision + click Decision href "../Decision/" + Decision : about_entity_mention + + + + + + Decision --> "1" EntityMentionIdentifier : about_entity_mention + click EntityMentionIdentifier href "../EntityMentionIdentifier/" + + + + Decision : candidates + + + + + + Decision --> "1..*" ClusterReference : candidates + click ClusterReference href "../ClusterReference/" + + + + Decision : created_at + + Decision : current_placement + + + + + + Decision --> "1" ClusterReference : current_placement + click ClusterReference href "../ClusterReference/" + + + + Decision : id + + Decision : updated_at + + +``` + + + + + + + +## Slots + +| Name | Cardinality and Range | Description | Inheritance | +| --- | --- | --- | --- | +| [id](id.md) | 1
[String](String.md) | Unique decision identifier | direct | +| [about_entity_mention](about_entity_mention.md) | 1
[EntityMentionIdentifier](EntityMentionIdentifier.md) | The entity mention being resolved | direct | +| [current_placement](current_placement.md) | 1
[ClusterReference](ClusterReference.md) | The accepted cluster for this mention (latest from ERE or curator) | direct | +| [candidates](candidates.md) | 1..*
[ClusterReference](ClusterReference.md) | Top-N alternative clusters proposed by ERE (for curation UI preview) | direct | +| [created_at](created_at.md) | 1
[Datetime](Datetime.md) | When the decision was first created | direct | +| [updated_at](updated_at.md) | 0..1
[Datetime](Datetime.md) | When the decision was last updated (ERE refresh or curator action) | direct | + + + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:Decision | +| native | ere:Decision | + + + + + + +## LinkML Source + + + +### Direct + +
+```yaml +name: Decision +description: 'Canonical placement of an entity mention to a cluster. + + Represents the latest resolution decision (from ERE or curator override). + + ' +from_schema: https://data.europa.eu/ers/schema/ere +attributes: + id: + name: id + description: Unique decision identifier + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - Decision + - UserAction + required: true + about_entity_mention: + name: about_entity_mention + description: The entity mention being resolved + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - Decision + - UserAction + range: EntityMentionIdentifier + required: true + current_placement: + name: current_placement + description: 'The accepted cluster for this mention (latest from ERE or curator). + + ' + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - Decision + range: ClusterReference + required: true + candidates: + name: candidates + description: 'Top-N alternative clusters proposed by ERE (for curation UI preview). + + ' + from_schema: https://data.europa.eu/ers/schema/ers + domain_of: + - EntityMentionResolutionResponse + - Decision + - UserAction + range: ClusterReference + required: true + multivalued: true + created_at: + name: created_at + description: When the decision was first created + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - Decision + - UserAction + range: datetime + required: true + updated_at: + name: updated_at + description: When the decision was last updated (ERE refresh or curator action) + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - Decision + range: datetime + +``` +
+ +### Induced + +
+```yaml +name: Decision +description: 'Canonical placement of an entity mention to a cluster. + + Represents the latest resolution decision (from ERE or curator override). + + ' +from_schema: https://data.europa.eu/ers/schema/ere +attributes: + id: + name: id + description: Unique decision identifier + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: id + owner: Decision + domain_of: + - Decision + - UserAction + range: string + required: true + about_entity_mention: + name: about_entity_mention + description: The entity mention being resolved + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: about_entity_mention + owner: Decision + domain_of: + - Decision + - UserAction + range: EntityMentionIdentifier + required: true + current_placement: + name: current_placement + description: 'The accepted cluster for this mention (latest from ERE or curator). + + ' + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: current_placement + owner: Decision + domain_of: + - Decision + range: ClusterReference + required: true + candidates: + name: candidates + description: 'Top-N alternative clusters proposed by ERE (for curation UI preview). + + ' + from_schema: https://data.europa.eu/ers/schema/ers + alias: candidates + owner: Decision + domain_of: + - EntityMentionResolutionResponse + - Decision + - UserAction + range: ClusterReference + required: true + multivalued: true + created_at: + name: created_at + description: When the decision was first created + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: created_at + owner: Decision + domain_of: + - Decision + - UserAction + range: datetime + required: true + updated_at: + name: updated_at + description: When the decision was last updated (ERE refresh or curator action) + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: updated_at + owner: Decision + domain_of: + - Decision + range: datetime + +``` +
\ No newline at end of file diff --git a/docs/schema/DecisionAction.md b/docs/schema/DecisionAction.md new file mode 100644 index 0000000..463f0b3 --- /dev/null +++ b/docs/schema/DecisionAction.md @@ -0,0 +1,70 @@ +# Enum: DecisionAction + + + + +_Action taken on a decision by the curator_ + + + +URI: [ere:DecisionAction](https://data.europa.eu/ers/schema/ere/DecisionAction) + +## Permissible Values + +| Value | Meaning | Description | +| --- | --- | --- | +| ACCEPT_TOP | None | Curator accepted the top candidate | +| ACCEPT_ALTERNATIVE | None | Curator selected an alternative candidate | +| REJECT_ALL | None | Curator rejected all candidates | + + + + +## Slots + +| Name | Description | +| --- | --- | +| [action](action.md) | Action taken by curator | + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + + + +## LinkML Source + +
+```yaml +name: DecisionAction +description: Action taken on a decision by the curator +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +permissible_values: + ACCEPT_TOP: + text: ACCEPT_TOP + description: Curator accepted the top candidate + ACCEPT_ALTERNATIVE: + text: ACCEPT_ALTERNATIVE + description: Curator selected an alternative candidate + REJECT_ALL: + text: REJECT_ALL + description: Curator rejected all candidates + +``` +
\ No newline at end of file diff --git a/docs/schema/DecisionStatus.md b/docs/schema/DecisionStatus.md new file mode 100644 index 0000000..4aa8d63 --- /dev/null +++ b/docs/schema/DecisionStatus.md @@ -0,0 +1,70 @@ +# Enum: DecisionStatus + + + + +_Status of a resolution decision in the curation workflow_ + + + +URI: [ere:DecisionStatus](https://data.europa.eu/ers/schema/ere/DecisionStatus) + +## Permissible Values + +| Value | Meaning | Description | +| --- | --- | --- | +| AUTOMATIC_CONFIDENT | None | Resolution confidence exceeds threshold; no manual review required | +| PENDING_MANUAL_REVIEW | None | Resolution confidence below threshold; awaiting curator action | +| MANUALLY_REVIEWED | None | Curator has taken an action | + + + + +## Slots + +| Name | Description | +| --- | --- | +| [status](status.md) | Current status in the curation workflow | + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + + + +## LinkML Source + +
+```yaml +name: DecisionStatus +description: Status of a resolution decision in the curation workflow +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +permissible_values: + AUTOMATIC_CONFIDENT: + text: AUTOMATIC_CONFIDENT + description: Resolution confidence exceeds threshold; no manual review required + PENDING_MANUAL_REVIEW: + text: PENDING_MANUAL_REVIEW + description: Resolution confidence below threshold; awaiting curator action + MANUALLY_REVIEWED: + text: MANUALLY_REVIEWED + description: Curator has taken an action + +``` +
\ No newline at end of file diff --git a/docs/schema/Double.md b/docs/schema/Double.md new file mode 100644 index 0000000..4d285ec --- /dev/null +++ b/docs/schema/Double.md @@ -0,0 +1,46 @@ +# Type: Double + + + + +_A real number that conforms to the xsd:double specification_ + + + +URI: [xsd:double](http://www.w3.org/2001/XMLSchema#double) + +* [base](https://w3id.org/linkml/base): float + +* [uri](https://w3id.org/linkml/uri): xsd:double + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:double | +| native | ere:double | +| close | schema:Float | + + diff --git a/docs/schema/EREErrorResponse.md b/docs/schema/EREErrorResponse.md new file mode 100644 index 0000000..4d1ac0d --- /dev/null +++ b/docs/schema/EREErrorResponse.md @@ -0,0 +1,356 @@ + + +# Class: EREErrorResponse + + +_Response sent by the ERE when some error/exception occurs while processing a request._ + +_For instance, this may happen if the request is malformed or some internal error happens._ + +__ + +_The attributes of this class are based on [RFC-9457](https://datatracker.ietf.org/doc/html/rfc9457)._ + +__ + + + + + +URI: [ere:EREErrorResponse](https://data.europa.eu/ers/schema/ere/EREErrorResponse) + + + + + +```mermaid + classDiagram + class EREErrorResponse + click EREErrorResponse href "../EREErrorResponse/" + EREResponse <|-- EREErrorResponse + click EREResponse href "../EREResponse/" + + EREErrorResponse : ere_request_id + + EREErrorResponse : error_detail + + EREErrorResponse : error_title + + EREErrorResponse : error_trace + + EREErrorResponse : error_type + + EREErrorResponse : timestamp + + EREErrorResponse : type + + +``` + + + + + +## Inheritance +* [EREMessage](EREMessage.md) + * [EREResponse](EREResponse.md) + * **EREErrorResponse** + + + +## Slots + +| Name | Cardinality and Range | Description | Inheritance | +| --- | --- | --- | --- | +| [error_type](error_type.md) | 1
[String](String.md) | A string representing the error type, eg, the FQN of the raised exception | direct | +| [error_title](error_title.md) | 0..1
[String](String.md) | A human readable brief message about the error that occurred | direct | +| [error_detail](error_detail.md) | 0..1
[String](String.md) | A human readable detailed message about the error that occurred | direct | +| [error_trace](error_trace.md) | 0..1
[String](String.md) | A string representing a (stack) trace of the error that occurred | direct | +| [type](type.md) | 1
[String](String.md) | The type of the request or result | [EREMessage](EREMessage.md) | +| [ere_request_id](ere_request_id.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | [EREMessage](EREMessage.md) | +| [timestamp](timestamp.md) | 0..1
[Datetime](Datetime.md) | The time when the message was created | [EREMessage](EREMessage.md) | + + + + + + + + + + + +## Examples + +| Value | +| --- | +| { + "type": "EREErrorResponse", + "request_id": "324fs3r345vx", + "error_type": "ere.exceptions.MalformedRequestError", + "error_title": "The entity data is missing in the request", + "error_detail": "The 'entity' attribute is required in EntityMentionResolutionRequest message", + // Optional and not recommended for production use + "error_trace": "Traceback (most recent call last):\n File \"/app/ere/service.py\", line 45, in process_request\n..." +} + | + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:EREErrorResponse | +| native | ere:EREErrorResponse | + + + + + + +## LinkML Source + + + +### Direct + +
+```yaml +name: EREErrorResponse +description: 'Response sent by the ERE when some error/exception occurs while processing + a request. + + For instance, this may happen if the request is malformed or some internal error + happens. + + + The attributes of this class are based on [RFC-9457](https://datatracker.ietf.org/doc/html/rfc9457). + + ' +examples: +- value: "{\n \"type\": \"EREErrorResponse\",\n \"request_id\": \"324fs3r345vx\"\ + ,\n \"error_type\": \"ere.exceptions.MalformedRequestError\",\n \"error_title\"\ + : \"The entity data is missing in the request\",\n \"error_detail\": \"The 'entity'\ + \ attribute is required in EntityMentionResolutionRequest message\",\n // Optional\ + \ and not recommended for production use\n \"error_trace\": \"Traceback (most\ + \ recent call last):\\n File \\\"/app/ere/service.py\\\", line 45, in process_request\\\ + n...\"\n}\n" +from_schema: https://data.europa.eu/ers/schema/ere +is_a: EREResponse +attributes: + error_type: + name: error_type + description: 'A string representing the error type, eg, the FQN of the raised + exception. + + + This corresponds to RFC-9457''s `type`. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + domain_of: + - EREErrorResponse + required: true + error_title: + name: error_title + description: 'A human readable brief message about the error that occurred. + + + This corresponds to RFC-9457''s `title`. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + domain_of: + - EREErrorResponse + error_detail: + name: error_detail + description: 'A human readable detailed message about the error that occurred. + + + This corresponds to RFC-9457''s `detail`. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + domain_of: + - EREErrorResponse + error_trace: + name: error_trace + description: 'A string representing a (stack) trace of the error that occurred. + + + This is optional and typically used for debugging purposes only, since + + exposing this kind of server-side information is a security risk. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + domain_of: + - EREErrorResponse + +``` +
+ +### Induced + +
+```yaml +name: EREErrorResponse +description: 'Response sent by the ERE when some error/exception occurs while processing + a request. + + For instance, this may happen if the request is malformed or some internal error + happens. + + + The attributes of this class are based on [RFC-9457](https://datatracker.ietf.org/doc/html/rfc9457). + + ' +examples: +- value: "{\n \"type\": \"EREErrorResponse\",\n \"request_id\": \"324fs3r345vx\"\ + ,\n \"error_type\": \"ere.exceptions.MalformedRequestError\",\n \"error_title\"\ + : \"The entity data is missing in the request\",\n \"error_detail\": \"The 'entity'\ + \ attribute is required in EntityMentionResolutionRequest message\",\n // Optional\ + \ and not recommended for production use\n \"error_trace\": \"Traceback (most\ + \ recent call last):\\n File \\\"/app/ere/service.py\\\", line 45, in process_request\\\ + n...\"\n}\n" +from_schema: https://data.europa.eu/ers/schema/ere +is_a: EREResponse +attributes: + error_type: + name: error_type + description: 'A string representing the error type, eg, the FQN of the raised + exception. + + + This corresponds to RFC-9457''s `type`. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + alias: error_type + owner: EREErrorResponse + domain_of: + - EREErrorResponse + range: string + required: true + error_title: + name: error_title + description: 'A human readable brief message about the error that occurred. + + + This corresponds to RFC-9457''s `title`. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + alias: error_title + owner: EREErrorResponse + domain_of: + - EREErrorResponse + range: string + error_detail: + name: error_detail + description: 'A human readable detailed message about the error that occurred. + + + This corresponds to RFC-9457''s `detail`. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + alias: error_detail + owner: EREErrorResponse + domain_of: + - EREErrorResponse + range: string + error_trace: + name: error_trace + description: 'A string representing a (stack) trace of the error that occurred. + + + This is optional and typically used for debugging purposes only, since + + exposing this kind of server-side information is a security risk. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + alias: error_trace + owner: EREErrorResponse + domain_of: + - EREErrorResponse + range: string + type: + name: type + description: "The type of the request or result.\n\nAs per LinkML specification,\ + \ `designates_type` is used here in order to allow for this\nslot to tell the\ + \ concrete subclass that an instance (such as a JSON object) belongs to.\n\n\ + In other words, a particular request will have `type` set with values like \n\ + `EntityMentionResolutionRequest` or `EntityResolutionResult`\n" + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + designates_type: true + alias: type + owner: EREErrorResponse + domain_of: + - EREMessage + range: string + required: true + ere_request_id: + name: ere_request_id + description: 'A string representing the unique ID of an ERE request, or the ID + of the request a response is about. + + This **is not** the same as `request_id` + `source_id`. + + + Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` + message + + can originate from within the ERE, without any previous request counterpart, + as a notification of + + resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + alias: ere_request_id + owner: EREErrorResponse + domain_of: + - EREMessage + range: string + required: true + timestamp: + name: timestamp + description: 'The time when the message was created. Should be in ISO-8601 format. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + alias: timestamp + owner: EREErrorResponse + domain_of: + - EREMessage + range: datetime + +``` +
\ No newline at end of file diff --git a/docs/schema/EREMessage.md b/docs/schema/EREMessage.md index acf42f5..3563ddc 100644 --- a/docs/schema/EREMessage.md +++ b/docs/schema/EREMessage.md @@ -30,7 +30,7 @@ URI: [ere:EREMessage](https://data.europa.eu/ers/schema/ere/EREMessage) EREMessage <|-- EREResponse click EREResponse href "../EREResponse/" - EREMessage : ereRequestId + EREMessage : ere_request_id EREMessage : timestamp @@ -55,7 +55,7 @@ URI: [ere:EREMessage](https://data.europa.eu/ers/schema/ere/EREMessage) | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | | [type](type.md) | 1
[String](String.md) | The type of the request or result | direct | -| [ereRequestId](ereRequestId.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | direct | +| [ere_request_id](ere_request_id.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | direct | | [timestamp](timestamp.md) | 0..1
[Datetime](Datetime.md) | The time when the message was created | direct | @@ -125,12 +125,21 @@ attributes: domain_of: - EREMessage required: true - ereRequestId: - name: ereRequestId + ere_request_id: + name: ere_request_id description: 'A string representing the unique ID of an ERE request, or the ID of the request a response is about. - This **is not** the same as `requestId` + `sourceId`. + This **is not** the same as `request_id` + `source_id`. + + + Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` + message + + can originate from within the ERE, without any previous request counterpart, + as a notification of + + resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. ' from_schema: https://data.europa.eu/ers/schema/ere @@ -182,17 +191,26 @@ attributes: - EREMessage range: string required: true - ereRequestId: - name: ereRequestId + ere_request_id: + name: ere_request_id description: 'A string representing the unique ID of an ERE request, or the ID of the request a response is about. - This **is not** the same as `requestId` + `sourceId`. + This **is not** the same as `request_id` + `source_id`. + + + Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` + message + + can originate from within the ERE, without any previous request counterpart, + as a notification of + + resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. ' from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 - alias: ereRequestId + alias: ere_request_id owner: EREMessage domain_of: - EREMessage diff --git a/docs/schema/ERERequest.md b/docs/schema/ERERequest.md index 1efd01c..f50fb14 100644 --- a/docs/schema/ERERequest.md +++ b/docs/schema/ERERequest.md @@ -29,11 +29,9 @@ URI: [ere:ERERequest](https://data.europa.eu/ers/schema/ere/ERERequest) ERERequest <|-- EntityMentionResolutionRequest click EntityMentionResolutionRequest href "../EntityMentionResolutionRequest/" - ERERequest <|-- FullRebuildRequest - click FullRebuildRequest href "../FullRebuildRequest/" - ERERequest : ereRequestId + ERERequest : ere_request_id ERERequest : timestamp @@ -50,7 +48,6 @@ URI: [ere:ERERequest](https://data.europa.eu/ers/schema/ere/ERERequest) * [EREMessage](EREMessage.md) * **ERERequest** * [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) - * [FullRebuildRequest](FullRebuildRequest.md) @@ -59,7 +56,7 @@ URI: [ere:ERERequest](https://data.europa.eu/ers/schema/ere/ERERequest) | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | | [type](type.md) | 1
[String](String.md) | The type of the request or result | [EREMessage](EREMessage.md) | -| [ereRequestId](ereRequestId.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | [EREMessage](EREMessage.md) | +| [ere_request_id](ere_request_id.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | [EREMessage](EREMessage.md) | | [timestamp](timestamp.md) | 0..1
[Datetime](Datetime.md) | The time when the message was created | [EREMessage](EREMessage.md) | @@ -145,17 +142,26 @@ attributes: - EREMessage range: string required: true - ereRequestId: - name: ereRequestId + ere_request_id: + name: ere_request_id description: 'A string representing the unique ID of an ERE request, or the ID of the request a response is about. - This **is not** the same as `requestId` + `sourceId`. + This **is not** the same as `request_id` + `source_id`. + + + Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` + message + + can originate from within the ERE, without any previous request counterpart, + as a notification of + + resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. ' from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 - alias: ereRequestId + alias: ere_request_id owner: ERERequest domain_of: - EREMessage diff --git a/docs/schema/EREResponse.md b/docs/schema/EREResponse.md new file mode 100644 index 0000000..004a900 --- /dev/null +++ b/docs/schema/EREResponse.md @@ -0,0 +1,187 @@ + + +# Class: EREResponse + + +_Root class to represent all the responses sent by the ERE._ + +__ + + + + +* __NOTE__: this is an abstract class and should not be instantiated directly + + +URI: [ere:EREResponse](https://data.europa.eu/ers/schema/ere/EREResponse) + + + + + +```mermaid + classDiagram + class EREResponse + click EREResponse href "../EREResponse/" + EREMessage <|-- EREResponse + click EREMessage href "../EREMessage/" + + + EREResponse <|-- EntityMentionResolutionResponse + click EntityMentionResolutionResponse href "../EntityMentionResolutionResponse/" + EREResponse <|-- EREErrorResponse + click EREErrorResponse href "../EREErrorResponse/" + + + EREResponse : ere_request_id + + EREResponse : timestamp + + EREResponse : type + + +``` + + + + + +## Inheritance +* [EREMessage](EREMessage.md) + * **EREResponse** + * [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) + * [EREErrorResponse](EREErrorResponse.md) + + + +## Slots + +| Name | Cardinality and Range | Description | Inheritance | +| --- | --- | --- | --- | +| [type](type.md) | 1
[String](String.md) | The type of the request or result | [EREMessage](EREMessage.md) | +| [ere_request_id](ere_request_id.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | [EREMessage](EREMessage.md) | +| [timestamp](timestamp.md) | 0..1
[Datetime](Datetime.md) | The time when the message was created | [EREMessage](EREMessage.md) | + + + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:EREResponse | +| native | ere:EREResponse | + + + + + + +## LinkML Source + + + +### Direct + +
+```yaml +name: EREResponse +description: 'Root class to represent all the responses sent by the ERE. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +is_a: EREMessage +abstract: true + +``` +
+ +### Induced + +
+```yaml +name: EREResponse +description: 'Root class to represent all the responses sent by the ERE. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +is_a: EREMessage +abstract: true +attributes: + type: + name: type + description: "The type of the request or result.\n\nAs per LinkML specification,\ + \ `designates_type` is used here in order to allow for this\nslot to tell the\ + \ concrete subclass that an instance (such as a JSON object) belongs to.\n\n\ + In other words, a particular request will have `type` set with values like \n\ + `EntityMentionResolutionRequest` or `EntityResolutionResult`\n" + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + designates_type: true + alias: type + owner: EREResponse + domain_of: + - EREMessage + range: string + required: true + ere_request_id: + name: ere_request_id + description: 'A string representing the unique ID of an ERE request, or the ID + of the request a response is about. + + This **is not** the same as `request_id` + `source_id`. + + + Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` + message + + can originate from within the ERE, without any previous request counterpart, + as a notification of + + resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + alias: ere_request_id + owner: EREResponse + domain_of: + - EREMessage + range: string + required: true + timestamp: + name: timestamp + description: 'The time when the message was created. Should be in ISO-8601 format. + + ' + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + alias: timestamp + owner: EREResponse + domain_of: + - EREMessage + range: datetime + +``` +
\ No newline at end of file diff --git a/docs/schema/ERS.svg b/docs/schema/ERS.svg deleted file mode 100644 index 9f3dd98..0000000 --- a/docs/schema/ERS.svg +++ /dev/null @@ -1 +0,0 @@ -SystemOfRequestRecordsRequestRecordrequestIdentifier : urioriginatorIdentifier : uricreated : datetimedataFormat : stringpayload : stringtype : EntityTypeEntityMentionidentifier : uriparsedDataRepresentation : stringtype : EntityTypeDecissionsStoreDecissionindetifier : uricreatedAt : datetimeupdatedAt : datetimedecisionStatus : DecissionStatusdecisionAction : DecisionActionAlignmentLinkSetsubjectMentionIdentifier : uriAlignmentLinkmentionIdentifier : uricanonicalIdentifier : uriconfidenceScore : doubleCommunicationArtefactCanonicalEntityRegistryCanonicalEntityidentifier : uricreated : datetimerecord0..*entityMention0..1decision0..*decisionContext1chosenAlternativeLink0..1acceptedLink0..1acceptedAlignment0..1canonicalEntity0..*mentionLink1..*defaultAlignment1alignmentOption1..* \ No newline at end of file diff --git a/docs/schema/EntityMention.md b/docs/schema/EntityMention.md index 2b146ed..19cc443 100644 --- a/docs/schema/EntityMention.md +++ b/docs/schema/EntityMention.md @@ -5,7 +5,7 @@ _An entity mention is a representation of a real-world entity, as provided by the ERS._ -_It contains the entity data, along with metadata like type and format. _ +_It contains the entity data, along with metadata like type and format._ __ @@ -25,19 +25,21 @@ URI: [ere:EntityMention](https://data.europa.eu/ers/schema/ere/EntityMention) click EntityMention href "../EntityMention/" EntityMention : content - EntityMention : contentType + EntityMention : content_type - EntityMention : identifier + EntityMention : identifiedBy - EntityMention --> "1" EntityMentionIdentifier : identifier + EntityMention --> "1" EntityMentionIdentifier : identifiedBy click EntityMentionIdentifier href "../EntityMentionIdentifier/" + EntityMention : parsed_representation + ``` @@ -51,9 +53,10 @@ URI: [ere:EntityMention](https://data.europa.eu/ers/schema/ere/EntityMention) | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | -| [identifier](identifier.md) | 1
[EntityMentionIdentifier](EntityMentionIdentifier.md) | The identifier (with the ERS-derived components) of the entity mention | direct | -| [contentType](contentType.md) | 1
[String](String.md) | A string about the MIME format of `content` (e | direct | +| [identifiedBy](identifiedBy.md) | 1
[EntityMentionIdentifier](EntityMentionIdentifier.md) | The identification triad of the entity mention | direct | +| [content_type](content_type.md) | 1
[String](String.md) | A string about the MIME format of `content` (e | direct | | [content](content.md) | 1
[String](String.md) | A code string representing the entity mention details (eg, RDF or XML descrip... | direct | +| [parsed_representation](parsed_representation.md) | 0..1
[String](String.md) | JSON representation of the parsed entity data | direct | @@ -63,7 +66,7 @@ URI: [ere:EntityMention](https://data.europa.eu/ers/schema/ere/EntityMention) | used by | used in | type | used | | --- | --- | --- | --- | -| [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | [entityMention](entityMention.md) | range | [EntityMention](EntityMention.md) | +| [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | [entity_mention](entity_mention.md) | range | [EntityMention](EntityMention.md) | @@ -107,28 +110,31 @@ URI: [ere:EntityMention](https://data.europa.eu/ers/schema/ere/EntityMention)
```yaml name: EntityMention -description: "An entity mention is a representation of a real-world entity, as provided\ - \ by the ERS.\nIt contains the entity data, along with metadata like type and format.\ - \ \n" +description: 'An entity mention is a representation of a real-world entity, as provided + by the ERS. + + It contains the entity data, along with metadata like type and format. + + ' from_schema: https://data.europa.eu/ers/schema/ere attributes: - identifier: - name: identifier - description: 'The identifier (with the ERS-derived components) of the entity mention. + identifiedBy: + name: identifiedBy + description: 'The identification triad of the entity mention. ' - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 domain_of: - EntityMention range: EntityMentionIdentifier required: true - contentType: - name: contentType + content_type: + name: content_type description: 'A string about the MIME format of `content` (e.g. text/turtle, application/ld+json) ' - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 domain_of: - EntityMention @@ -139,11 +145,20 @@ attributes: XML description). ' - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 domain_of: - EntityMention required: true + parsed_representation: + name: parsed_representation + description: 'JSON representation of the parsed entity data. + + ' + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - EntityMention ```
@@ -153,32 +168,35 @@ attributes:
```yaml name: EntityMention -description: "An entity mention is a representation of a real-world entity, as provided\ - \ by the ERS.\nIt contains the entity data, along with metadata like type and format.\ - \ \n" +description: 'An entity mention is a representation of a real-world entity, as provided + by the ERS. + + It contains the entity data, along with metadata like type and format. + + ' from_schema: https://data.europa.eu/ers/schema/ere attributes: - identifier: - name: identifier - description: 'The identifier (with the ERS-derived components) of the entity mention. + identifiedBy: + name: identifiedBy + description: 'The identification triad of the entity mention. ' - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 - alias: identifier + alias: identifiedBy owner: EntityMention domain_of: - EntityMention range: EntityMentionIdentifier required: true - contentType: - name: contentType + content_type: + name: content_type description: 'A string about the MIME format of `content` (e.g. text/turtle, application/ld+json) ' - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 - alias: contentType + alias: content_type owner: EntityMention domain_of: - EntityMention @@ -190,7 +208,7 @@ attributes: XML description). ' - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 alias: content owner: EntityMention @@ -198,6 +216,18 @@ attributes: - EntityMention range: string required: true + parsed_representation: + name: parsed_representation + description: 'JSON representation of the parsed entity data. + + ' + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: parsed_representation + owner: EntityMention + domain_of: + - EntityMention + range: string ```
\ No newline at end of file diff --git a/docs/schema/EntityMentionIdentifier.md b/docs/schema/EntityMentionIdentifier.md index 2d82701..03c3bce 100644 --- a/docs/schema/EntityMentionIdentifier.md +++ b/docs/schema/EntityMentionIdentifier.md @@ -33,11 +33,11 @@ URI: [ere:EntityMentionIdentifier](https://data.europa.eu/ers/schema/ere/EntityM classDiagram class EntityMentionIdentifier click EntityMentionIdentifier href "../EntityMentionIdentifier/" - EntityMentionIdentifier : entityType + EntityMentionIdentifier : entity_type - EntityMentionIdentifier : requestId + EntityMentionIdentifier : request_id - EntityMentionIdentifier : sourceId + EntityMentionIdentifier : source_id ``` @@ -52,9 +52,9 @@ URI: [ere:EntityMentionIdentifier](https://data.europa.eu/ers/schema/ere/EntityM | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | -| [sourceId](sourceId.md) | 1
[String](String.md) | The ID or URI of the ERS client that originated the request | direct | -| [requestId](requestId.md) | 1
[String](String.md) | A string representing the unique ID of the request made to the ERS system | direct | -| [entityType](entityType.md) | 1
[String](String.md) | A string representing the entity type (based on CET) | direct | +| [source_id](source_id.md) | 1
[String](String.md) | The ID or URI of the ERS client that originated the request | direct | +| [request_id](request_id.md) | 1
[String](String.md) | A string representing the unique ID of the request made to the ERS system | direct | +| [entity_type](entity_type.md) | 1
[String](String.md) | A string representing the entity type (based on CET) | direct | @@ -64,8 +64,11 @@ URI: [ere:EntityMentionIdentifier](https://data.europa.eu/ers/schema/ere/EntityM | used by | used in | type | used | | --- | --- | --- | --- | -| [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | [entityMentionId](entityMentionId.md) | range | [EntityMentionIdentifier](EntityMentionIdentifier.md) | -| [EntityMention](EntityMention.md) | [identifier](identifier.md) | range | [EntityMentionIdentifier](EntityMentionIdentifier.md) | +| [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | [entity_mention_id](entity_mention_id.md) | range | [EntityMentionIdentifier](EntityMentionIdentifier.md) | +| [EntityMention](EntityMention.md) | [identifiedBy](identifiedBy.md) | range | [EntityMentionIdentifier](EntityMentionIdentifier.md) | +| [Decision](Decision.md) | [about_entity_mention](about_entity_mention.md) | range | [EntityMentionIdentifier](EntityMentionIdentifier.md) | +| [UserAction](UserAction.md) | [about_entity_mention](about_entity_mention.md) | range | [EntityMentionIdentifier](EntityMentionIdentifier.md) | +| [CanonicalEntityIdentifier](CanonicalEntityIdentifier.md) | [equivalent_to](equivalent_to.md) | range | [EntityMentionIdentifier](EntityMentionIdentifier.md) | @@ -118,36 +121,37 @@ description: "A container that groups the attributes needed to identify an entit \ entity that is initially the only cluster member.\n" from_schema: https://data.europa.eu/ers/schema/ere attributes: - sourceId: - name: sourceId + source_id: + name: source_id description: "The ID or URI of the ERS client that originated the request. This\ \ identifies an application or a \nperson accessing the ERS system.\n" - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 domain_of: - EntityMentionIdentifier + - LookupState required: true - requestId: - name: requestId + request_id: + name: request_id description: "A string representing the unique ID of the request made to the ERS\ \ system. In general, this is unique\nonly within the scope of the source and\ \ the entity type, ie, within `sourceId` and `entityType`. \n\nMoreover, this\ \ is **not** the same as `ereRequestId`, which instead, is internal to the ERE\ \ and is \nused to match responses to requests.\n" - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 domain_of: - EntityMentionIdentifier range: string required: true - entityType: - name: entityType + entity_type: + name: entity_type description: "A string representing the entity type (based on CET). This is typically\ \ a URI.\n\nNote that this is at this level, and not at `EntityMention`, since,\ \ as said above, \nit's needed to identify the entity, even when its content\ \ is not present. For the same\nreason, it's used both for `EREResolutionRequest`\ - \ and `EREResolutionResponse` messages., \n" - from_schema: https://data.europa.eu/ers/schema/ere + \ and `EREResolutionResponse` messages.,\n" + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 domain_of: - EntityMentionIdentifier @@ -170,43 +174,44 @@ description: "A container that groups the attributes needed to identify an entit \ entity that is initially the only cluster member.\n" from_schema: https://data.europa.eu/ers/schema/ere attributes: - sourceId: - name: sourceId + source_id: + name: source_id description: "The ID or URI of the ERS client that originated the request. This\ \ identifies an application or a \nperson accessing the ERS system.\n" - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 - alias: sourceId + alias: source_id owner: EntityMentionIdentifier domain_of: - EntityMentionIdentifier + - LookupState range: string required: true - requestId: - name: requestId + request_id: + name: request_id description: "A string representing the unique ID of the request made to the ERS\ \ system. In general, this is unique\nonly within the scope of the source and\ \ the entity type, ie, within `sourceId` and `entityType`. \n\nMoreover, this\ \ is **not** the same as `ereRequestId`, which instead, is internal to the ERE\ \ and is \nused to match responses to requests.\n" - from_schema: https://data.europa.eu/ers/schema/ere + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 - alias: requestId + alias: request_id owner: EntityMentionIdentifier domain_of: - EntityMentionIdentifier range: string required: true - entityType: - name: entityType + entity_type: + name: entity_type description: "A string representing the entity type (based on CET). This is typically\ \ a URI.\n\nNote that this is at this level, and not at `EntityMention`, since,\ \ as said above, \nit's needed to identify the entity, even when its content\ \ is not present. For the same\nreason, it's used both for `EREResolutionRequest`\ - \ and `EREResolutionResponse` messages., \n" - from_schema: https://data.europa.eu/ers/schema/ere + \ and `EREResolutionResponse` messages.,\n" + from_schema: https://data.europa.eu/ers/schema/ers rank: 1000 - alias: entityType + alias: entity_type owner: EntityMentionIdentifier domain_of: - EntityMentionIdentifier diff --git a/docs/schema/EntityMentionResolutionRequest.md b/docs/schema/EntityMentionResolutionRequest.md index 017775b..c089812 100644 --- a/docs/schema/EntityMentionResolutionRequest.md +++ b/docs/schema/EntityMentionResolutionRequest.md @@ -24,20 +24,22 @@ URI: [ere:EntityMentionResolutionRequest](https://data.europa.eu/ers/schema/ere/ ERERequest <|-- EntityMentionResolutionRequest click ERERequest href "../ERERequest/" - EntityMentionResolutionRequest : entityMention + EntityMentionResolutionRequest : entity_mention - EntityMentionResolutionRequest --> "1" EntityMention : entityMention + EntityMentionResolutionRequest --> "1" EntityMention : entity_mention click EntityMention href "../EntityMention/" - EntityMentionResolutionRequest : ereRequestId + EntityMentionResolutionRequest : ere_request_id - EntityMentionResolutionRequest : excludedClusterIds + EntityMentionResolutionRequest : excluded_cluster_ids + + EntityMentionResolutionRequest : proposed_cluster_ids EntityMentionResolutionRequest : timestamp @@ -61,10 +63,11 @@ URI: [ere:EntityMentionResolutionRequest](https://data.europa.eu/ers/schema/ere/ | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | -| [entityMention](entityMention.md) | 1
[EntityMention](EntityMention.md) | The data about the entity to be resolved | direct | -| [excludedClusterIds](excludedClusterIds.md) | *
[String](String.md) | When this is present, the resolution must not bin the entity mention into any... | direct | +| [entity_mention](entity_mention.md) | 1
[EntityMention](EntityMention.md) | The data about the entity to be resolved | direct | +| [proposed_cluster_ids](proposed_cluster_ids.md) | *
[String](String.md) | When this is present, the ERE may use this information to try to cluster the ... | direct | +| [excluded_cluster_ids](excluded_cluster_ids.md) | *
[String](String.md) | When this is present, the ERE may use this information to avoid clustering th... | direct | | [type](type.md) | 1
[String](String.md) | The type of the request or result | [EREMessage](EREMessage.md) | -| [ereRequestId](ereRequestId.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | [EREMessage](EREMessage.md) | +| [ere_request_id](ere_request_id.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | [EREMessage](EREMessage.md) | | [timestamp](timestamp.md) | 0..1
[Datetime](Datetime.md) | The time when the message was created | [EREMessage](EREMessage.md) | @@ -83,39 +86,58 @@ URI: [ere:EntityMentionResolutionRequest](https://data.europa.eu/ers/schema/ere/ | --- | | { "type": "EntityMentionResolutionRequest", - "entityMention": { + "entity_mention": { "identifier": { - "requestId": "324fs3r345vx", - "sourceId": "TEDSWS", - "entityType": "http://www.w3.org/ns/org#Organization" + "request_id": "324fs3r345vx", + "source_id": "TEDSWS", + "entity_type": "http://www.w3.org/ns/org#Organization" }, "content": "epd:ent005 a org:Organization; ... cccev:telephone \"+44 1924306780\" .", - "contentType": "text/turtle" + "content_type": "text/turtle" }, "timestamp": "2026-01-14T12:34:56Z", // As said, we need this internal ID and it can be auto-generated (eg, with UUIDs) - "ereRequestId": "324fs3r345vx:01" + "ere_request_id": "324fs3r345vx:01" } | | { "type": "EntityMentionResolutionRequest", - "entityMention": { + "entity_mention": { "identifier": { - "requestId": "324fs3r345vxab", - "sourceId": "TEDSWS", - "entityType": "http://www.w3.org/ns/org#Organization", + "request_id": "324fs3r345vxab", + "source_id": "TEDSWS", + "entity_type": "http://www.w3.org/ns/org#Organization", }, "content": "epd:ent005 a org:Organization; ... cccev:telephone \"+44 1924306780\" .", - "contentType": "text/turtle" + "content_type": "text/turtle" }, - "excludedClusterIds": [ + "excluded_cluster_ids": [ "324fs3r345vx-bb45we", "324fs3r345vx-cc67ui" ], "timestamp": "2026-01-14T12:40:56Z", - "ereRequestId": "324fs3r345vxab:01" + "ere_request_id": "324fs3r345vxab:01" } | +| { + "type": "EntityMentionResolutionRequest", + "entity_mention": { + "identifier": { + "request_id": "324fs3r345vxab", + "source_id": "TEDSWS", + "entity_type": "http://www.w3.org/ns/org#Organization", + }, + "content": "epd:ent005 a org:Organization; ... cccev:telephone \"+44 1924306780\" .", + "content_type": "text/turtle" + }, + "proposed_cluster_ids": [ + // which is sha256 ( source_id + request_id + entity_type ) + "e2e8eea1865aef0e2406ea326520abc252b2afa836ed71434f6a32811904bfad" + ], + "timestamp": "2026-01-14T12:40:56Z", + "ere_request_id": "324fs3r345vxab:01" +} + | ## Identifier and Mapping Information @@ -158,29 +180,39 @@ description: 'An entity resolution request sent to the ERE, containing the entit ' examples: -- value: "{\n \"type\": \"EntityMentionResolutionRequest\",\n \"entityMention\"\ - : { \n \"identifier\": {\n \"requestId\": \"324fs3r345vx\",\n \"\ - sourceId\": \"TEDSWS\",\n \"entityType\": \"http://www.w3.org/ns/org#Organization\"\ +- value: "{\n \"type\": \"EntityMentionResolutionRequest\",\n \"entity_mention\"\ + : { \n \"identifier\": {\n \"request_id\": \"324fs3r345vx\",\n \"\ + source_id\": \"TEDSWS\",\n \"entity_type\": \"http://www.w3.org/ns/org#Organization\"\ \n },\n \"content\": \"epd:ent005 a org:Organization; ... cccev:telephone\ - \ \\\"+44 1924306780\\\" .\",\n \"contentType\": \"text/turtle\"\n },\n \"\ - timestamp\": \"2026-01-14T12:34:56Z\",\n // As said, we need this internal ID\ - \ and it can be auto-generated (eg, with UUIDs)\n \"ereRequestId\": \"324fs3r345vx:01\"\ + \ \\\"+44 1924306780\\\" .\",\n \"content_type\": \"text/turtle\"\n },\n \ + \ \"timestamp\": \"2026-01-14T12:34:56Z\",\n // As said, we need this internal\ + \ ID and it can be auto-generated (eg, with UUIDs)\n \"ere_request_id\": \"324fs3r345vx:01\"\ \n}\n" description: a regular request -- value: "{\n \"type\": \"EntityMentionResolutionRequest\",\n \"entityMention\"\ - : { \n \"identifier\": {\n \"requestId\": \"324fs3r345vxab\",\n \"\ - sourceId\": \"TEDSWS\",\n \"entityType\": \"http://www.w3.org/ns/org#Organization\"\ +- value: "{\n \"type\": \"EntityMentionResolutionRequest\",\n \"entity_mention\"\ + : { \n \"identifier\": {\n \"request_id\": \"324fs3r345vxab\",\n \ + \ \"source_id\": \"TEDSWS\",\n \"entity_type\": \"http://www.w3.org/ns/org#Organization\"\ ,\n },\n \"content\": \"epd:ent005 a org:Organization; ... cccev:telephone\ - \ \\\"+44 1924306780\\\" .\",\n \"contentType\": \"text/turtle\"\n },\n \"\ - excludedClusterIds\": [\n \"324fs3r345vx-bb45we\",\n \"324fs3r345vx-cc67ui\"\ - \n ],\n \"timestamp\": \"2026-01-14T12:40:56Z\",\n \"ereRequestId\": \"324fs3r345vxab:01\"\ + \ \\\"+44 1924306780\\\" .\",\n \"content_type\": \"text/turtle\"\n },\n \ + \ \"excluded_cluster_ids\": [\n \"324fs3r345vx-bb45we\",\n \"324fs3r345vx-cc67ui\"\ + \n ],\n \"timestamp\": \"2026-01-14T12:40:56Z\",\n \"ere_request_id\": \"324fs3r345vxab:01\"\ \n}\n" description: a re-rebuild request (ie, carrying a rejection list) +- value: "{\n \"type\": \"EntityMentionResolutionRequest\",\n \"entity_mention\"\ + : { \n \"identifier\": {\n \"request_id\": \"324fs3r345vxab\",\n \ + \ \"source_id\": \"TEDSWS\",\n \"entity_type\": \"http://www.w3.org/ns/org#Organization\"\ + ,\n },\n \"content\": \"epd:ent005 a org:Organization; ... cccev:telephone\ + \ \\\"+44 1924306780\\\" .\",\n \"content_type\": \"text/turtle\"\n },\n \ + \ \"proposed_cluster_ids\": [\n // which is sha256 ( source_id + request_id\ + \ + entity_type )\n \"e2e8eea1865aef0e2406ea326520abc252b2afa836ed71434f6a32811904bfad\"\ + \n ],\n \"timestamp\": \"2026-01-14T12:40:56Z\",\n \"ere_request_id\": \"324fs3r345vxab:01\"\ + \n} \n" + description: A request with the entity as proposed cluster ID from_schema: https://data.europa.eu/ers/schema/ere is_a: ERERequest attributes: - entityMention: - name: entityMention + entity_mention: + name: entity_mention description: 'The data about the entity to be resolved. Note that, at least for the moment, we don''t support @@ -193,15 +225,36 @@ attributes: - EntityMentionResolutionRequest range: EntityMention required: true - excludedClusterIds: - name: excludedClusterIds - description: "When this is present, the resolution must not bin the entity mention\ - \ into any of the\nlisted clusters. This can be used to reject a previous resolution\ - \ proposed by the ERE.\n\nThe exact reaction to this is implementation dependent.\ - \ In the simplest case, the ERE\nmight just create a singleton cluster with\ - \ this entity as member. In a more advanced \ncase, it might recompute the similarity\ - \ with more advanced algorithms or use updated\ndata.\n\nTODO: Can this be revised?\ - \ What does it happen if an exclusion was made by mistake?\n" + proposed_cluster_ids: + name: proposed_cluster_ids + description: "When this is present, the ERE may use this information to try to\ + \ cluster the entity in one of \nthe listed clusters.\n\nIn particular, when\ + \ an initial request about an entity isn't answered within a timeout, \na subsequent\ + \ new request can be sent about the same entity and with the canonical ID of\ + \ it\nas a single proposed cluster ID. This suggests the ERE that it can create\ + \ a new singleton cluster\nwith the entity as its initial only member and its\ + \ canonical ID as the cluster ID. The ERE\ncan evolve such a cluster later,\ + \ when further similar entities are sent in, or when it \nhas had more time\ + \ to associate the initial entity to others. \n\nWhatever, the case, the ERE\ + \ **has no obligation** to fulfil the proposal, how it reacts to \nthis list\ + \ is implementation dependent, and the ERE remains the ultimate authority to\ + \ provide \nthe final resolution decision.\n" + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + domain_of: + - EntityMentionResolutionRequest + multivalued: true + excluded_cluster_ids: + name: excluded_cluster_ids + description: "When this is present, the ERE may use this information to avoid\ + \ clustering the entity in \nthe listed clusters.\n\nThis can be used to notify\ + \ the ERE that a curator has rejected a previous resolution \nproposed by the\ + \ ERE.\n\nAs for `proposed_cluster_ids`, the ERE **has no obligation** to fulfil\ + \ the exclusions, and \nit remains the ultimate authority to provide the final\ + \ resolution decision.\n\nSimilarly, the exact reaction to this is implementation\ + \ dependent. In the simplest case, the ERE\nmight just create a singleton cluster\ + \ with the current entity as member. In a more advanced \ncase, it might recompute\ + \ the similarity with more advanced algorithms or use updated\ndata.\n" from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 domain_of: @@ -221,29 +274,39 @@ description: 'An entity resolution request sent to the ERE, containing the entit ' examples: -- value: "{\n \"type\": \"EntityMentionResolutionRequest\",\n \"entityMention\"\ - : { \n \"identifier\": {\n \"requestId\": \"324fs3r345vx\",\n \"\ - sourceId\": \"TEDSWS\",\n \"entityType\": \"http://www.w3.org/ns/org#Organization\"\ +- value: "{\n \"type\": \"EntityMentionResolutionRequest\",\n \"entity_mention\"\ + : { \n \"identifier\": {\n \"request_id\": \"324fs3r345vx\",\n \"\ + source_id\": \"TEDSWS\",\n \"entity_type\": \"http://www.w3.org/ns/org#Organization\"\ \n },\n \"content\": \"epd:ent005 a org:Organization; ... cccev:telephone\ - \ \\\"+44 1924306780\\\" .\",\n \"contentType\": \"text/turtle\"\n },\n \"\ - timestamp\": \"2026-01-14T12:34:56Z\",\n // As said, we need this internal ID\ - \ and it can be auto-generated (eg, with UUIDs)\n \"ereRequestId\": \"324fs3r345vx:01\"\ + \ \\\"+44 1924306780\\\" .\",\n \"content_type\": \"text/turtle\"\n },\n \ + \ \"timestamp\": \"2026-01-14T12:34:56Z\",\n // As said, we need this internal\ + \ ID and it can be auto-generated (eg, with UUIDs)\n \"ere_request_id\": \"324fs3r345vx:01\"\ \n}\n" description: a regular request -- value: "{\n \"type\": \"EntityMentionResolutionRequest\",\n \"entityMention\"\ - : { \n \"identifier\": {\n \"requestId\": \"324fs3r345vxab\",\n \"\ - sourceId\": \"TEDSWS\",\n \"entityType\": \"http://www.w3.org/ns/org#Organization\"\ +- value: "{\n \"type\": \"EntityMentionResolutionRequest\",\n \"entity_mention\"\ + : { \n \"identifier\": {\n \"request_id\": \"324fs3r345vxab\",\n \ + \ \"source_id\": \"TEDSWS\",\n \"entity_type\": \"http://www.w3.org/ns/org#Organization\"\ ,\n },\n \"content\": \"epd:ent005 a org:Organization; ... cccev:telephone\ - \ \\\"+44 1924306780\\\" .\",\n \"contentType\": \"text/turtle\"\n },\n \"\ - excludedClusterIds\": [\n \"324fs3r345vx-bb45we\",\n \"324fs3r345vx-cc67ui\"\ - \n ],\n \"timestamp\": \"2026-01-14T12:40:56Z\",\n \"ereRequestId\": \"324fs3r345vxab:01\"\ + \ \\\"+44 1924306780\\\" .\",\n \"content_type\": \"text/turtle\"\n },\n \ + \ \"excluded_cluster_ids\": [\n \"324fs3r345vx-bb45we\",\n \"324fs3r345vx-cc67ui\"\ + \n ],\n \"timestamp\": \"2026-01-14T12:40:56Z\",\n \"ere_request_id\": \"324fs3r345vxab:01\"\ \n}\n" description: a re-rebuild request (ie, carrying a rejection list) +- value: "{\n \"type\": \"EntityMentionResolutionRequest\",\n \"entity_mention\"\ + : { \n \"identifier\": {\n \"request_id\": \"324fs3r345vxab\",\n \ + \ \"source_id\": \"TEDSWS\",\n \"entity_type\": \"http://www.w3.org/ns/org#Organization\"\ + ,\n },\n \"content\": \"epd:ent005 a org:Organization; ... cccev:telephone\ + \ \\\"+44 1924306780\\\" .\",\n \"content_type\": \"text/turtle\"\n },\n \ + \ \"proposed_cluster_ids\": [\n // which is sha256 ( source_id + request_id\ + \ + entity_type )\n \"e2e8eea1865aef0e2406ea326520abc252b2afa836ed71434f6a32811904bfad\"\ + \n ],\n \"timestamp\": \"2026-01-14T12:40:56Z\",\n \"ere_request_id\": \"324fs3r345vxab:01\"\ + \n} \n" + description: A request with the entity as proposed cluster ID from_schema: https://data.europa.eu/ers/schema/ere is_a: ERERequest attributes: - entityMention: - name: entityMention + entity_mention: + name: entity_mention description: 'The data about the entity to be resolved. Note that, at least for the moment, we don''t support @@ -252,24 +315,48 @@ attributes: ' from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 - alias: entityMention + alias: entity_mention owner: EntityMentionResolutionRequest domain_of: - EntityMentionResolutionRequest range: EntityMention required: true - excludedClusterIds: - name: excludedClusterIds - description: "When this is present, the resolution must not bin the entity mention\ - \ into any of the\nlisted clusters. This can be used to reject a previous resolution\ - \ proposed by the ERE.\n\nThe exact reaction to this is implementation dependent.\ - \ In the simplest case, the ERE\nmight just create a singleton cluster with\ - \ this entity as member. In a more advanced \ncase, it might recompute the similarity\ - \ with more advanced algorithms or use updated\ndata.\n\nTODO: Can this be revised?\ - \ What does it happen if an exclusion was made by mistake?\n" + proposed_cluster_ids: + name: proposed_cluster_ids + description: "When this is present, the ERE may use this information to try to\ + \ cluster the entity in one of \nthe listed clusters.\n\nIn particular, when\ + \ an initial request about an entity isn't answered within a timeout, \na subsequent\ + \ new request can be sent about the same entity and with the canonical ID of\ + \ it\nas a single proposed cluster ID. This suggests the ERE that it can create\ + \ a new singleton cluster\nwith the entity as its initial only member and its\ + \ canonical ID as the cluster ID. The ERE\ncan evolve such a cluster later,\ + \ when further similar entities are sent in, or when it \nhas had more time\ + \ to associate the initial entity to others. \n\nWhatever, the case, the ERE\ + \ **has no obligation** to fulfil the proposal, how it reacts to \nthis list\ + \ is implementation dependent, and the ERE remains the ultimate authority to\ + \ provide \nthe final resolution decision.\n" + from_schema: https://data.europa.eu/ers/schema/ere + rank: 1000 + alias: proposed_cluster_ids + owner: EntityMentionResolutionRequest + domain_of: + - EntityMentionResolutionRequest + range: string + multivalued: true + excluded_cluster_ids: + name: excluded_cluster_ids + description: "When this is present, the ERE may use this information to avoid\ + \ clustering the entity in \nthe listed clusters.\n\nThis can be used to notify\ + \ the ERE that a curator has rejected a previous resolution \nproposed by the\ + \ ERE.\n\nAs for `proposed_cluster_ids`, the ERE **has no obligation** to fulfil\ + \ the exclusions, and \nit remains the ultimate authority to provide the final\ + \ resolution decision.\n\nSimilarly, the exact reaction to this is implementation\ + \ dependent. In the simplest case, the ERE\nmight just create a singleton cluster\ + \ with the current entity as member. In a more advanced \ncase, it might recompute\ + \ the similarity with more advanced algorithms or use updated\ndata.\n" from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 - alias: excludedClusterIds + alias: excluded_cluster_ids owner: EntityMentionResolutionRequest domain_of: - EntityMentionResolutionRequest @@ -291,17 +378,26 @@ attributes: - EREMessage range: string required: true - ereRequestId: - name: ereRequestId + ere_request_id: + name: ere_request_id description: 'A string representing the unique ID of an ERE request, or the ID of the request a response is about. - This **is not** the same as `requestId` + `sourceId`. + This **is not** the same as `request_id` + `source_id`. + + + Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` + message + + can originate from within the ERE, without any previous request counterpart, + as a notification of + + resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. ' from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 - alias: ereRequestId + alias: ere_request_id owner: EntityMentionResolutionRequest domain_of: - EREMessage diff --git a/docs/schema/EntityMentionResolutionResponse.md b/docs/schema/EntityMentionResolutionResponse.md index 44dced2..39f8d2a 100644 --- a/docs/schema/EntityMentionResolutionResponse.md +++ b/docs/schema/EntityMentionResolutionResponse.md @@ -49,18 +49,18 @@ URI: [ere:EntityMentionResolutionResponse](https://data.europa.eu/ers/schema/ere - EntityMentionResolutionResponse : entityMentionId + EntityMentionResolutionResponse : entity_mention_id - EntityMentionResolutionResponse --> "1" EntityMentionIdentifier : entityMentionId + EntityMentionResolutionResponse --> "1" EntityMentionIdentifier : entity_mention_id click EntityMentionIdentifier href "../EntityMentionIdentifier/" - EntityMentionResolutionResponse : ereRequestId + EntityMentionResolutionResponse : ere_request_id EntityMentionResolutionResponse : timestamp @@ -84,10 +84,10 @@ URI: [ere:EntityMentionResolutionResponse](https://data.europa.eu/ers/schema/ere | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | -| [entityMentionId](entityMentionId.md) | 1
[EntityMentionIdentifier](EntityMentionIdentifier.md) | The identifier of the entity mention that has been resolved | direct | +| [entity_mention_id](entity_mention_id.md) | 1
[EntityMentionIdentifier](EntityMentionIdentifier.md) | The identifier of the entity mention that has been resolved | direct | | [candidates](candidates.md) | 1..*
[ClusterReference](ClusterReference.md) | The set of cluster reference/score pairs representing the candidate clusters | direct | | [type](type.md) | 1
[String](String.md) | The type of the request or result | [EREMessage](EREMessage.md) | -| [ereRequestId](ereRequestId.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | [EREMessage](EREMessage.md) | +| [ere_request_id](ere_request_id.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | [EREMessage](EREMessage.md) | | [timestamp](timestamp.md) | 0..1
[Datetime](Datetime.md) | The time when the message was created | [EREMessage](EREMessage.md) | @@ -106,23 +106,23 @@ URI: [ere:EntityMentionResolutionResponse](https://data.europa.eu/ers/schema/ere | --- | | { "type": "EntityMentionResolutionResponse", - "entityMentionId": { - "requestId": "324fs3r345vx", - "sourceId": "TEDSWS", - "entityType": "http://www.w3.org/ns/org#Organization" + "entity_mention_id": { + "request_id": "324fs3r345vx", + "source_id": "TEDSWS", + "entity_type": "http://www.w3.org/ns/org#Organization" }, "candidates": [ { - "clusterId": "324fs3r345vx-aa32wa", - "confidenceScore": 0.91 + "cluster_id": "324fs3r345vx-aa32wa", + "confidence_score": 0.91 }, { - "clusterId": "324fs3r345vx-bb45we", - "confidenceScore": 0.65 + "cluster_id": "324fs3r345vx-bb45we", + "confidence_score": 0.65 } ], "timestamp": "2026-01-14T12:34:59Z", - "ereRequestId": "324fs3r345vx:01" + "ere_request_id": "324fs3r345vx:01" } | @@ -170,20 +170,20 @@ description: "An entity resolution response returned by the ERE.\n\nThis is basi \ with multiple subjects), in which case \nwe might need to return multiple `EntityMentionResolutionResponse`\ \ messages, each with additional \nproperties such as `entityIndex` and `totalEntities`.\n" examples: -- value: "{\n \"type\": \"EntityMentionResolutionResponse\",\n \"entityMentionId\"\ - : {\n \"requestId\": \"324fs3r345vx\",\n \"sourceId\": \"TEDSWS\",\n \ - \ \"entityType\": \"http://www.w3.org/ns/org#Organization\"\n },\n \"candidates\"\ - : [\n { \n \"clusterId\": \"324fs3r345vx-aa32wa\",\n \"confidenceScore\"\ - : 0.91\n },\n { \n \"clusterId\": \"324fs3r345vx-bb45we\",\n \"\ - confidenceScore\": 0.65\n }\n ],\n \"timestamp\": \"2026-01-14T12:34:59Z\"\ - ,\n \"ereRequestId\": \"324fs3r345vx:01\"\n}\n \n" +- value: "{\n \"type\": \"EntityMentionResolutionResponse\",\n \"entity_mention_id\"\ + : {\n \"request_id\": \"324fs3r345vx\",\n \"source_id\": \"TEDSWS\",\n \ + \ \"entity_type\": \"http://www.w3.org/ns/org#Organization\"\n },\n \"candidates\"\ + : [\n { \n \"cluster_id\": \"324fs3r345vx-aa32wa\",\n \"confidence_score\"\ + : 0.91\n },\n { \n \"cluster_id\": \"324fs3r345vx-bb45we\",\n \ + \ \"confidence_score\": 0.65\n }\n ],\n \"timestamp\": \"2026-01-14T12:34:59Z\"\ + ,\n \"ere_request_id\": \"324fs3r345vx:01\"\n}\n \n" from_schema: https://data.europa.eu/ers/schema/ere is_a: EREResponse attributes: - entityMentionId: - name: entityMentionId + entity_mention_id: + name: entity_mention_id description: "The identifier of the entity mention that has been resolved.\n\n\ - This isn't strictly needed, since the `ereRequestId` already links the response\ + This isn't strictly needed, since the `ere_request_id` already links the response\ \ to \nthe request's entity mention. Yet, it's reported for convenience.\n" from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 @@ -204,6 +204,8 @@ attributes: rank: 1000 domain_of: - EntityMentionResolutionResponse + - Decision + - UserAction range: ClusterReference required: true multivalued: true @@ -223,24 +225,24 @@ description: "An entity resolution response returned by the ERE.\n\nThis is basi \ with multiple subjects), in which case \nwe might need to return multiple `EntityMentionResolutionResponse`\ \ messages, each with additional \nproperties such as `entityIndex` and `totalEntities`.\n" examples: -- value: "{\n \"type\": \"EntityMentionResolutionResponse\",\n \"entityMentionId\"\ - : {\n \"requestId\": \"324fs3r345vx\",\n \"sourceId\": \"TEDSWS\",\n \ - \ \"entityType\": \"http://www.w3.org/ns/org#Organization\"\n },\n \"candidates\"\ - : [\n { \n \"clusterId\": \"324fs3r345vx-aa32wa\",\n \"confidenceScore\"\ - : 0.91\n },\n { \n \"clusterId\": \"324fs3r345vx-bb45we\",\n \"\ - confidenceScore\": 0.65\n }\n ],\n \"timestamp\": \"2026-01-14T12:34:59Z\"\ - ,\n \"ereRequestId\": \"324fs3r345vx:01\"\n}\n \n" +- value: "{\n \"type\": \"EntityMentionResolutionResponse\",\n \"entity_mention_id\"\ + : {\n \"request_id\": \"324fs3r345vx\",\n \"source_id\": \"TEDSWS\",\n \ + \ \"entity_type\": \"http://www.w3.org/ns/org#Organization\"\n },\n \"candidates\"\ + : [\n { \n \"cluster_id\": \"324fs3r345vx-aa32wa\",\n \"confidence_score\"\ + : 0.91\n },\n { \n \"cluster_id\": \"324fs3r345vx-bb45we\",\n \ + \ \"confidence_score\": 0.65\n }\n ],\n \"timestamp\": \"2026-01-14T12:34:59Z\"\ + ,\n \"ere_request_id\": \"324fs3r345vx:01\"\n}\n \n" from_schema: https://data.europa.eu/ers/schema/ere is_a: EREResponse attributes: - entityMentionId: - name: entityMentionId + entity_mention_id: + name: entity_mention_id description: "The identifier of the entity mention that has been resolved.\n\n\ - This isn't strictly needed, since the `ereRequestId` already links the response\ + This isn't strictly needed, since the `ere_request_id` already links the response\ \ to \nthe request's entity mention. Yet, it's reported for convenience.\n" from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 - alias: entityMentionId + alias: entity_mention_id owner: EntityMentionResolutionResponse domain_of: - EntityMentionResolutionResponse @@ -261,6 +263,8 @@ attributes: owner: EntityMentionResolutionResponse domain_of: - EntityMentionResolutionResponse + - Decision + - UserAction range: ClusterReference required: true multivalued: true @@ -280,17 +284,26 @@ attributes: - EREMessage range: string required: true - ereRequestId: - name: ereRequestId + ere_request_id: + name: ere_request_id description: 'A string representing the unique ID of an ERE request, or the ID of the request a response is about. - This **is not** the same as `requestId` + `sourceId`. + This **is not** the same as `request_id` + `source_id`. + + + Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` + message + + can originate from within the ERE, without any previous request counterpart, + as a notification of + + resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. ' from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 - alias: ereRequestId + alias: ere_request_id owner: EntityMentionResolutionResponse domain_of: - EREMessage diff --git a/docs/schema/EntityType.md b/docs/schema/EntityType.md new file mode 100644 index 0000000..1497b0e --- /dev/null +++ b/docs/schema/EntityType.md @@ -0,0 +1,60 @@ +# Enum: EntityType + + + + +_Types of entities that can be resolved_ + + + +URI: [ere:EntityType](https://data.europa.eu/ers/schema/ere/EntityType) + +## Permissible Values + +| Value | Meaning | Description | +| --- | --- | --- | +| ORGANISATION | None | An organization entity | +| PROCEDURE | None | A procurement procedure entity | + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + + + +## LinkML Source + +
+```yaml +name: EntityType +description: Types of entities that can be resolved +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +permissible_values: + ORGANISATION: + text: ORGANISATION + description: An organization entity + PROCEDURE: + text: PROCEDURE + description: A procurement procedure entity + +``` +
\ No newline at end of file diff --git a/docs/schema/EreServiceSchema-edited.png b/docs/schema/EreServiceSchema-edited.png new file mode 100644 index 0000000..4e9a76c Binary files /dev/null and b/docs/schema/EreServiceSchema-edited.png differ diff --git a/docs/schema/EreServiceSchema.png b/docs/schema/EreServiceSchema.png new file mode 100644 index 0000000..5c3c607 Binary files /dev/null and b/docs/schema/EreServiceSchema.png differ diff --git a/docs/schema/EreServiceSchema.svg b/docs/schema/EreServiceSchema.svg index bc171ba..895b766 100644 --- a/docs/schema/EreServiceSchema.svg +++ b/docs/schema/EreServiceSchema.svg @@ -1 +1 @@ -EntityMentionResolutionResponsetype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeEntityMentionIdentifiersourceId : stringrequestId : stringentityType : stringEntityMentioncontentType : stringcontent : stringClusterReferenceclusterId : stringconfidenceScore : floatEREResponsetype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeEntityMentionResolutionRequestexcludedClusterIds : string [0..*]type(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeERERequesttype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeFullRebuildResponsetype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeEREErrorResponseerrorType : stringerrorTitle : stringerrorDetail : stringerrorTrace : stringtype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeEREMessagetype : stringereRequestId : stringtimestamp : datetimeFullRebuildRequesttype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeentityMentionId1identifier1candidates1..*entityMention1 \ No newline at end of file +UserActionid : stringaction_type : UserActionTypeactor : stringcreated_at : datetimemetadata : stringClusterReferencecluster_id : stringconfidence_score : floatsimilarity_score : floatEntityMentionIdentifiersource_id : stringrequest_id : stringentity_type : stringLookupStatesource_id : stringlast_snapshot : datetimeCanonicalEntityIdentifieridentifier : stringDecisionid : stringcreated_at : datetimeupdated_at : datetimeEntityMentionResolutionResponsetype(i) : stringere_request_id(i) : stringtimestamp(i) : datetimeEntityMentioncontent_type : stringcontent : stringparsed_representation : stringEREResponsetype(i) : stringere_request_id(i) : stringtimestamp(i) : datetimeEntityMentionResolutionRequestproposed_cluster_ids : string [0..*]excluded_cluster_ids : string [0..*]type(i) : stringere_request_id(i) : stringtimestamp(i) : datetimeERERequesttype(i) : stringere_request_id(i) : stringtimestamp(i) : datetimeEREErrorResponseerror_type : stringerror_title : stringerror_detail : stringerror_trace : stringtype(i) : stringere_request_id(i) : stringtimestamp(i) : datetimeEREMessagetype : stringere_request_id : stringtimestamp : datetimeselected_cluster0..1candidates1..*about_entity_mention1equivalent_to1..*about_entity_mention1entity_mention_id1identifiedBy1candidates1..*entity_mention1candidates1..*current_placement1 \ No newline at end of file diff --git a/docs/schema/ErsServiceDataSchema.svg b/docs/schema/ErsServiceDataSchema.svg deleted file mode 100644 index bc171ba..0000000 --- a/docs/schema/ErsServiceDataSchema.svg +++ /dev/null @@ -1 +0,0 @@ -EntityMentionResolutionResponsetype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeEntityMentionIdentifiersourceId : stringrequestId : stringentityType : stringEntityMentioncontentType : stringcontent : stringClusterReferenceclusterId : stringconfidenceScore : floatEREResponsetype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeEntityMentionResolutionRequestexcludedClusterIds : string [0..*]type(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeERERequesttype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeFullRebuildResponsetype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeEREErrorResponseerrorType : stringerrorTitle : stringerrorDetail : stringerrorTrace : stringtype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeEREMessagetype : stringereRequestId : stringtimestamp : datetimeFullRebuildRequesttype(i) : stringereRequestId(i) : stringtimestamp(i) : datetimeentityMentionId1identifier1candidates1..*entityMention1 \ No newline at end of file diff --git a/docs/schema/Float.md b/docs/schema/Float.md new file mode 100644 index 0000000..44703c0 --- /dev/null +++ b/docs/schema/Float.md @@ -0,0 +1,46 @@ +# Type: Float + + + + +_A real number that conforms to the xsd:float specification_ + + + +URI: [xsd:float](http://www.w3.org/2001/XMLSchema#float) + +* [base](https://w3id.org/linkml/base): float + +* [uri](https://w3id.org/linkml/uri): xsd:float + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:float | +| native | ere:float | +| exact | schema:Float | + + diff --git a/docs/schema/FullRebuildRequest.md b/docs/schema/FullRebuildRequest.md deleted file mode 100644 index b11b5be..0000000 --- a/docs/schema/FullRebuildRequest.md +++ /dev/null @@ -1,206 +0,0 @@ - - -# Class: FullRebuildRequest - - -_A request to reset all the resolutions computed so far and possibly rebuild them as _ - -_requests about old entities arrive again (and build new entities from scratch as usually)._ - -__ - -_It is expected that the ERE client re-sends all the entities to be resolved again,_ - -_using `EntityMentionResolutionRequest` messages exactly as the first time the resolutions _ - -_were built. This implies the a client like the ERS logs/persists the entities it receives_ - -_to resolve and also saves manual overriding of ERE results._ - -__ - -_Moreover:_ - -_* The ERE must keep track of past `EntityMention` marked as canonical._ - -_* The ERE must retain requests with `excludedClusterIds` and apply them again when the _ - -_ same entity mention is re-sent after the full rebuild. TODO: see notes about these properties,_ - -_ on the possible need of withdrawing exclusions._ - -__ - - - - - -URI: [ere:FullRebuildRequest](https://data.europa.eu/ers/schema/ere/FullRebuildRequest) - - - - - -```mermaid - classDiagram - class FullRebuildRequest - click FullRebuildRequest href "../FullRebuildRequest/" - ERERequest <|-- FullRebuildRequest - click ERERequest href "../ERERequest/" - - FullRebuildRequest : ereRequestId - - FullRebuildRequest : timestamp - - FullRebuildRequest : type - - -``` - - - - - -## Inheritance -* [EREMessage](EREMessage.md) - * [ERERequest](ERERequest.md) - * **FullRebuildRequest** - - - -## Slots - -| Name | Cardinality and Range | Description | Inheritance | -| --- | --- | --- | --- | -| [type](type.md) | 1
[String](String.md) | The type of the request or result | [EREMessage](EREMessage.md) | -| [ereRequestId](ereRequestId.md) | 1
[String](String.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | [EREMessage](EREMessage.md) | -| [timestamp](timestamp.md) | 0..1
[Datetime](Datetime.md) | The time when the message was created | [EREMessage](EREMessage.md) | - - - - - - - - - - -## Identifier and Mapping Information - - - - - - -### Schema Source - - -* from schema: https://data.europa.eu/ers/schema/ere - - - - -## Mappings - -| Mapping Type | Mapped Value | -| --- | --- | -| self | ere:FullRebuildRequest | -| native | ere:FullRebuildRequest | - - - - - - -## LinkML Source - - - -### Direct - -
-```yaml -name: FullRebuildRequest -description: "A request to reset all the resolutions computed so far and possibly\ - \ rebuild them as \nrequests about old entities arrive again (and build new entities\ - \ from scratch as usually).\n\nIt is expected that the ERE client re-sends all the\ - \ entities to be resolved again,\nusing `EntityMentionResolutionRequest` messages\ - \ exactly as the first time the resolutions \nwere built. This implies the a client\ - \ like the ERS logs/persists the entities it receives\nto resolve and also saves\ - \ manual overriding of ERE results.\n\nMoreover:\n* The ERE must keep track of past\ - \ `EntityMention` marked as canonical.\n* The ERE must retain requests with `excludedClusterIds`\ - \ and apply them again when the \n same entity mention is re-sent after the full\ - \ rebuild. TODO: see notes about these properties,\n on the possible need of withdrawing\ - \ exclusions.\n" -from_schema: https://data.europa.eu/ers/schema/ere -is_a: ERERequest - -``` -
- -### Induced - -
-```yaml -name: FullRebuildRequest -description: "A request to reset all the resolutions computed so far and possibly\ - \ rebuild them as \nrequests about old entities arrive again (and build new entities\ - \ from scratch as usually).\n\nIt is expected that the ERE client re-sends all the\ - \ entities to be resolved again,\nusing `EntityMentionResolutionRequest` messages\ - \ exactly as the first time the resolutions \nwere built. This implies the a client\ - \ like the ERS logs/persists the entities it receives\nto resolve and also saves\ - \ manual overriding of ERE results.\n\nMoreover:\n* The ERE must keep track of past\ - \ `EntityMention` marked as canonical.\n* The ERE must retain requests with `excludedClusterIds`\ - \ and apply them again when the \n same entity mention is re-sent after the full\ - \ rebuild. TODO: see notes about these properties,\n on the possible need of withdrawing\ - \ exclusions.\n" -from_schema: https://data.europa.eu/ers/schema/ere -is_a: ERERequest -attributes: - type: - name: type - description: "The type of the request or result.\n\nAs per LinkML specification,\ - \ `designates_type` is used here in order to allow for this\nslot to tell the\ - \ concrete subclass that an instance (such as a JSON object) belongs to.\n\n\ - In other words, a particular request will have `type` set with values like \n\ - `EntityMentionResolutionRequest` or `EntityResolutionResult`\n" - from_schema: https://data.europa.eu/ers/schema/ere - rank: 1000 - designates_type: true - alias: type - owner: FullRebuildRequest - domain_of: - - EREMessage - range: string - required: true - ereRequestId: - name: ereRequestId - description: 'A string representing the unique ID of an ERE request, or the ID - of the request a response is about. - - This **is not** the same as `requestId` + `sourceId`. - - ' - from_schema: https://data.europa.eu/ers/schema/ere - rank: 1000 - alias: ereRequestId - owner: FullRebuildRequest - domain_of: - - EREMessage - range: string - required: true - timestamp: - name: timestamp - description: 'The time when the message was created. Should be in ISO-8601 format. - - ' - from_schema: https://data.europa.eu/ers/schema/ere - rank: 1000 - alias: timestamp - owner: FullRebuildRequest - domain_of: - - EREMessage - range: datetime - -``` -
\ No newline at end of file diff --git a/docs/schema/Integer.md b/docs/schema/Integer.md new file mode 100644 index 0000000..a74c808 --- /dev/null +++ b/docs/schema/Integer.md @@ -0,0 +1,46 @@ +# Type: Integer + + + + +_An integer_ + + + +URI: [xsd:integer](http://www.w3.org/2001/XMLSchema#integer) + +* [base](https://w3id.org/linkml/base): int + +* [uri](https://w3id.org/linkml/uri): xsd:integer + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:integer | +| native | ere:integer | +| exact | schema:Integer | + + diff --git a/docs/schema/Jsonpath.md b/docs/schema/Jsonpath.md new file mode 100644 index 0000000..95ded40 --- /dev/null +++ b/docs/schema/Jsonpath.md @@ -0,0 +1,46 @@ +# Type: Jsonpath + + + + +_A string encoding a JSON Path. The value of the string MUST conform to JSON Point syntax and SHOULD dereference to zero or more valid objects within the current instance document when encoded in tree form._ + + + +URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) + +* [base](https://w3id.org/linkml/base): str + +* [uri](https://w3id.org/linkml/uri): xsd:string + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:string | +| native | ere:jsonpath | + + diff --git a/docs/schema/Jsonpointer.md b/docs/schema/Jsonpointer.md new file mode 100644 index 0000000..b6fee6b --- /dev/null +++ b/docs/schema/Jsonpointer.md @@ -0,0 +1,46 @@ +# Type: Jsonpointer + + + + +_A string encoding a JSON Pointer. The value of the string MUST conform to JSON Point syntax and SHOULD dereference to a valid object within the current instance document when encoded in tree form._ + + + +URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) + +* [base](https://w3id.org/linkml/base): str + +* [uri](https://w3id.org/linkml/uri): xsd:string + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:string | +| native | ere:jsonpointer | + + diff --git a/docs/schema/LookupState.md b/docs/schema/LookupState.md new file mode 100644 index 0000000..37ec977 --- /dev/null +++ b/docs/schema/LookupState.md @@ -0,0 +1,169 @@ + + +# Class: LookupState + + +_Tracks the resolution state for entity mentions from a particular source._ + +_Records when the source was last resolved against the canonical clustering._ + +__ + + + + + +URI: [ere:LookupState](https://data.europa.eu/ers/schema/ere/LookupState) + + + + + +```mermaid + classDiagram + class LookupState + click LookupState href "../LookupState/" + LookupState : last_snapshot + + LookupState : source_id + + +``` + + + + + + + +## Slots + +| Name | Cardinality and Range | Description | Inheritance | +| --- | --- | --- | --- | +| [source_id](source_id.md) | 1
[String](String.md) | The ID or URI of the ERS client (originator) for which we track lookup state | direct | +| [last_snapshot](last_snapshot.md) | 1
[Datetime](Datetime.md) | Timestamp of the last resolution operation for this source | direct | + + + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:LookupState | +| native | ere:LookupState | + + + + + + +## LinkML Source + + + +### Direct + +
+```yaml +name: LookupState +description: 'Tracks the resolution state for entity mentions from a particular source. + + Records when the source was last resolved against the canonical clustering. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +attributes: + source_id: + name: source_id + description: 'The ID or URI of the ERS client (originator) for which we track + lookup state. + + ' + from_schema: https://data.europa.eu/ers/schema/ers + domain_of: + - EntityMentionIdentifier + - LookupState + required: true + last_snapshot: + name: last_snapshot + description: 'Timestamp of the last resolution operation for this source. + + Used to determine if a refreshBulk or other update is needed. + + ' + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - LookupState + range: datetime + required: true + +``` +
+ +### Induced + +
+```yaml +name: LookupState +description: 'Tracks the resolution state for entity mentions from a particular source. + + Records when the source was last resolved against the canonical clustering. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +attributes: + source_id: + name: source_id + description: 'The ID or URI of the ERS client (originator) for which we track + lookup state. + + ' + from_schema: https://data.europa.eu/ers/schema/ers + alias: source_id + owner: LookupState + domain_of: + - EntityMentionIdentifier + - LookupState + range: string + required: true + last_snapshot: + name: last_snapshot + description: 'Timestamp of the last resolution operation for this source. + + Used to determine if a refreshBulk or other update is needed. + + ' + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: last_snapshot + owner: LookupState + domain_of: + - LookupState + range: datetime + required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/Ncname.md b/docs/schema/Ncname.md new file mode 100644 index 0000000..a59312d --- /dev/null +++ b/docs/schema/Ncname.md @@ -0,0 +1,46 @@ +# Type: Ncname + + + + +_Prefix part of CURIE_ + + + +URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) + +* [base](https://w3id.org/linkml/base): NCName + +* [uri](https://w3id.org/linkml/uri): xsd:string + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:string | +| native | ere:ncname | + + diff --git a/docs/schema/Nodeidentifier.md b/docs/schema/Nodeidentifier.md new file mode 100644 index 0000000..052505b --- /dev/null +++ b/docs/schema/Nodeidentifier.md @@ -0,0 +1,46 @@ +# Type: Nodeidentifier + + + + +_A URI, CURIE or BNODE that represents a node in a model._ + + + +URI: [shex:nonLiteral](http://www.w3.org/ns/shex#nonLiteral) + +* [base](https://w3id.org/linkml/base): NodeIdentifier + +* [uri](https://w3id.org/linkml/uri): shex:nonLiteral + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | shex:nonLiteral | +| native | ere:nodeidentifier | + + diff --git a/docs/schema/Objectidentifier.md b/docs/schema/Objectidentifier.md new file mode 100644 index 0000000..1238adb --- /dev/null +++ b/docs/schema/Objectidentifier.md @@ -0,0 +1,50 @@ +# Type: Objectidentifier + + + + +_A URI or CURIE that represents an object in the model._ + + + +URI: [shex:iri](http://www.w3.org/ns/shex#iri) + +* [base](https://w3id.org/linkml/base): ElementIdentifier + +* [uri](https://w3id.org/linkml/uri): shex:iri + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Comments + +* Used for inheritance and type checking + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | shex:iri | +| native | ere:objectidentifier | + + diff --git a/docs/schema/README.md b/docs/schema/README.md index e3646ba..2818e13 100644 --- a/docs/schema/README.md +++ b/docs/schema/README.md @@ -12,17 +12,19 @@ Name: ereServiceSchema | Class | Description | | --- | --- | +| [CanonicalEntityIdentifier](CanonicalEntityIdentifier.md) | A logical identity construct providing a stable identity anchor | | [ClusterReference](ClusterReference.md) | A reference to a cluster to which an entity is deemed to belong, with an asso... | +| [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | | [EntityMention](EntityMention.md) | An entity mention is a representation of a real-world entity, as provided by ... | | [EntityMentionIdentifier](EntityMentionIdentifier.md) | A container that groups the attributes needed to identify an entity mention i... | | [EREMessage](EREMessage.md) | Root abstraction to represent attributes common to both requests and results | |         [ERERequest](ERERequest.md) | Root class to represent all the requests sent to the ERE | |                 [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | -|                 [FullRebuildRequest](FullRebuildRequest.md) | A request to reset all the resolutions computed so far and possibly rebuild t... | |         [EREResponse](EREResponse.md) | Root class to represent all the responses sent by the ERE | |                 [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | |                 [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | -|                 [FullRebuildResponse](FullRebuildResponse.md) | A response to a `FullRebuildRequest`, confirming that the rebuild process has... | +| [LookupState](LookupState.md) | Tracks the resolution state for entity mentions from a particular source | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | @@ -30,31 +32,48 @@ Name: ereServiceSchema | Slot | Description | | --- | --- | +| [about_entity_mention](about_entity_mention.md) | The entity mention being resolved | +| [action_type](action_type.md) | The type of action the curator performed | +| [actor](actor.md) | User ID or identifier of the curator who performed the action | | [candidates](candidates.md) | The set of cluster reference/score pairs representing the candidate clusters | -| [clusterId](clusterId.md) | The identifier of the cluster/canonical entity that is considered equivalent ... | -| [confidenceScore](confidenceScore.md) | A 0-1 value of how confident the ERE is about the equivalence between the sub... | +| [cluster_id](cluster_id.md) | The identifier of the cluster/canonical entity that is considered equivalent ... | +| [confidence_score](confidence_score.md) | A 0-1 value of how confident the ERE is about the equivalence between the sub... | | [content](content.md) | A code string representing the entity mention details (eg, RDF or XML descrip... | -| [contentType](contentType.md) | A string about the MIME format of `content` (e | -| [entityMention](entityMention.md) | The data about the entity to be resolved | -| [entityMentionId](entityMentionId.md) | The identifier of the entity mention that has been resolved | -| [entityType](entityType.md) | A string representing the entity type (based on CET) | -| [ereRequestId](ereRequestId.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | -| [errorDetail](errorDetail.md) | A human readable detailed message about the error that occurred | -| [errorTitle](errorTitle.md) | A human readable brief message about the error that occurred | -| [errorTrace](errorTrace.md) | A string representing a (stack) trace of the error that occurred | -| [errorType](errorType.md) | A string representing the error type, eg, the FQN of the raised exception | -| [excludedClusterIds](excludedClusterIds.md) | When this is present, the resolution must not bin the entity mention into any... | -| [identifier](identifier.md) | The identifier (with the ERS-derived components) of the entity mention | -| [requestId](requestId.md) | A string representing the unique ID of the request made to the ERS system | -| [sourceId](sourceId.md) | The ID or URI of the ERS client that originated the request | +| [content_type](content_type.md) | A string about the MIME format of `content` (e | +| [created_at](created_at.md) | When the decision was first created | +| [current_placement](current_placement.md) | The accepted cluster for this mention (latest from ERE or curator) | +| [entity_mention](entity_mention.md) | The data about the entity to be resolved | +| [entity_mention_id](entity_mention_id.md) | The identifier of the entity mention that has been resolved | +| [entity_type](entity_type.md) | A string representing the entity type (based on CET) | +| [equivalent_to](equivalent_to.md) | Entity mentions that have been resolved to this canonical entity | +| [ere_request_id](ere_request_id.md) | A string representing the unique ID of an ERE request, or the ID of the reque... | +| [error_detail](error_detail.md) | A human readable detailed message about the error that occurred | +| [error_title](error_title.md) | A human readable brief message about the error that occurred | +| [error_trace](error_trace.md) | A string representing a (stack) trace of the error that occurred | +| [error_type](error_type.md) | A string representing the error type, eg, the FQN of the raised exception | +| [excluded_cluster_ids](excluded_cluster_ids.md) | When this is present, the ERE may use this information to avoid clustering th... | +| [id](id.md) | Unique decision identifier | +| [identifiedBy](identifiedBy.md) | The identification triad of the entity mention | +| [identifier](identifier.md) | Unique identifier for the canonical entity | +| [last_snapshot](last_snapshot.md) | Timestamp of the last resolution operation for this source | +| [metadata](metadata.md) | JSON metadata providing context (e | +| [parsed_representation](parsed_representation.md) | JSON representation of the parsed entity data | +| [proposed_cluster_ids](proposed_cluster_ids.md) | When this is present, the ERE may use this information to try to cluster the ... | +| [request_id](request_id.md) | A string representing the unique ID of the request made to the ERS system | +| [selected_cluster](selected_cluster.md) | The cluster selected by the curator (if action was ACCEPT_TOP | +| [similarity_score](similarity_score.md) | A 0-1 score representing the pairwise comparison between a mention and a clus... | +| [source_id](source_id.md) | The ID or URI of the ERS client that originated the request | | [timestamp](timestamp.md) | The time when the message was created | | [type](type.md) | The type of the request or result | +| [updated_at](updated_at.md) | When the decision was last updated (ERE refresh or curator action) | ## Enumerations | Enumeration | Description | | --- | --- | +| [EntityType](EntityType.md) | Types of entities that can be resolved | +| [UserActionType](UserActionType.md) | Types of curator actions on entity mention resolutions | ## Types diff --git a/docs/schema/Sparqlpath.md b/docs/schema/Sparqlpath.md new file mode 100644 index 0000000..f66fe82 --- /dev/null +++ b/docs/schema/Sparqlpath.md @@ -0,0 +1,46 @@ +# Type: Sparqlpath + + + + +_A string encoding a SPARQL Property Path. The value of the string MUST conform to SPARQL syntax and SHOULD dereference to zero or more valid objects within the current instance document when encoded as RDF._ + + + +URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) + +* [base](https://w3id.org/linkml/base): str + +* [uri](https://w3id.org/linkml/uri): xsd:string + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:string | +| native | ere:sparqlpath | + + diff --git a/docs/schema/String.md b/docs/schema/String.md new file mode 100644 index 0000000..61f99df --- /dev/null +++ b/docs/schema/String.md @@ -0,0 +1,46 @@ +# Type: String + + + + +_A character string_ + + + +URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) + +* [base](https://w3id.org/linkml/base): str + +* [uri](https://w3id.org/linkml/uri): xsd:string + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:string | +| native | ere:string | +| exact | schema:Text | + + diff --git a/docs/schema/Time.md b/docs/schema/Time.md new file mode 100644 index 0000000..09f01ee --- /dev/null +++ b/docs/schema/Time.md @@ -0,0 +1,47 @@ +# Type: Time + + + + +_A time object represents a (local) time of day, independent of any particular day_ + + + +URI: [xsd:time](http://www.w3.org/2001/XMLSchema#time) + +* [base](https://w3id.org/linkml/base): XSDTime + +* [uri](https://w3id.org/linkml/uri): xsd:time + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:time | +| native | ere:time | +| exact | schema:Time | + + diff --git a/docs/schema/Uri.md b/docs/schema/Uri.md new file mode 100644 index 0000000..cb44812 --- /dev/null +++ b/docs/schema/Uri.md @@ -0,0 +1,51 @@ +# Type: Uri + + + + +_a complete URI_ + + + +URI: [xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) + +* [base](https://w3id.org/linkml/base): URI + +* [uri](https://w3id.org/linkml/uri): xsd:anyURI + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Comments + +* in RDF serializations a slot with range of uri is treated as a literal or type xsd:anyURI unless it is an identifier or a reference to an identifier, in which case it is translated directly to a node + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:anyURI | +| native | ere:uri | +| close | schema:URL | + + diff --git a/docs/schema/Uriorcurie.md b/docs/schema/Uriorcurie.md new file mode 100644 index 0000000..e07232b --- /dev/null +++ b/docs/schema/Uriorcurie.md @@ -0,0 +1,46 @@ +# Type: Uriorcurie + + + + +_a URI or a CURIE_ + + + +URI: [xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) + +* [base](https://w3id.org/linkml/base): URIorCURIE + +* [uri](https://w3id.org/linkml/uri): xsd:anyURI + +* [repr](https://w3id.org/linkml/repr): str + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | xsd:anyURI | +| native | ere:uriorcurie | + + diff --git a/docs/schema/UserAction.md b/docs/schema/UserAction.md new file mode 100644 index 0000000..bc060dd --- /dev/null +++ b/docs/schema/UserAction.md @@ -0,0 +1,357 @@ + + +# Class: UserAction + + +_Immutable record of a curator action on an entity mention resolution._ + +_Stored in the User Action Log for traceability and training._ + +__ + +_NOT related to ERE messages; represents curator intent only._ + +__ + + + + + +URI: [ere:UserAction](https://data.europa.eu/ers/schema/ere/UserAction) + + + + + +```mermaid + classDiagram + class UserAction + click UserAction href "../UserAction/" + UserAction : about_entity_mention + + + + + + UserAction --> "1" EntityMentionIdentifier : about_entity_mention + click EntityMentionIdentifier href "../EntityMentionIdentifier/" + + + + UserAction : action_type + + + + + + UserAction --> "1" UserActionType : action_type + click UserActionType href "../UserActionType/" + + + + UserAction : actor + + UserAction : candidates + + + + + + UserAction --> "1..*" ClusterReference : candidates + click ClusterReference href "../ClusterReference/" + + + + UserAction : created_at + + UserAction : id + + UserAction : metadata + + UserAction : selected_cluster + + + + + + UserAction --> "0..1" ClusterReference : selected_cluster + click ClusterReference href "../ClusterReference/" + + + + +``` + + + + + + + +## Slots + +| Name | Cardinality and Range | Description | Inheritance | +| --- | --- | --- | --- | +| [id](id.md) | 1
[String](String.md) | Unique audit trail entry identifier | direct | +| [about_entity_mention](about_entity_mention.md) | 1
[EntityMentionIdentifier](EntityMentionIdentifier.md) | The entity mention the curator acted upon | direct | +| [candidates](candidates.md) | 1..*
[ClusterReference](ClusterReference.md) | The candidate clusters presented to the curator for selection | direct | +| [selected_cluster](selected_cluster.md) | 0..1
[ClusterReference](ClusterReference.md) | The cluster selected by the curator (if action was ACCEPT_TOP | direct | +| [action_type](action_type.md) | 1
[UserActionType](UserActionType.md) | The type of action the curator performed | direct | +| [actor](actor.md) | 1
[String](String.md) | User ID or identifier of the curator who performed the action | direct | +| [created_at](created_at.md) | 1
[Datetime](Datetime.md) | Timestamp when the curator action was recorded | direct | +| [metadata](metadata.md) | 0..1
[String](String.md) | JSON metadata providing context (e | direct | + + + + + + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:UserAction | +| native | ere:UserAction | + + + + + + +## LinkML Source + + + +### Direct + +
+```yaml +name: UserAction +description: 'Immutable record of a curator action on an entity mention resolution. + + Stored in the User Action Log for traceability and training. + + + NOT related to ERE messages; represents curator intent only. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +attributes: + id: + name: id + description: Unique audit trail entry identifier + from_schema: https://data.europa.eu/ers/schema/ers + domain_of: + - Decision + - UserAction + required: true + about_entity_mention: + name: about_entity_mention + description: The entity mention the curator acted upon + from_schema: https://data.europa.eu/ers/schema/ers + domain_of: + - Decision + - UserAction + range: EntityMentionIdentifier + required: true + candidates: + name: candidates + description: 'The candidate clusters presented to the curator for selection. + + Ordered by confidence (same as shown in curation UI). + + ' + from_schema: https://data.europa.eu/ers/schema/ers + domain_of: + - EntityMentionResolutionResponse + - Decision + - UserAction + range: ClusterReference + required: true + multivalued: true + selected_cluster: + name: selected_cluster + description: 'The cluster selected by the curator (if action was ACCEPT_TOP + + or ACCEPT_ALTERNATIVE). NULL if action was REJECT_ALL. + + ' + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - UserAction + range: ClusterReference + action_type: + name: action_type + description: The type of action the curator performed + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - UserAction + range: UserActionType + required: true + actor: + name: actor + description: User ID or identifier of the curator who performed the action + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - UserAction + required: true + created_at: + name: created_at + description: Timestamp when the curator action was recorded + from_schema: https://data.europa.eu/ers/schema/ers + domain_of: + - Decision + - UserAction + range: datetime + required: true + metadata: + name: metadata + description: 'JSON metadata providing context (e.g., curator notes, reasoning). + + ' + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + domain_of: + - UserAction + +``` +
+ +### Induced + +
+```yaml +name: UserAction +description: 'Immutable record of a curator action on an entity mention resolution. + + Stored in the User Action Log for traceability and training. + + + NOT related to ERE messages; represents curator intent only. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +attributes: + id: + name: id + description: Unique audit trail entry identifier + from_schema: https://data.europa.eu/ers/schema/ers + alias: id + owner: UserAction + domain_of: + - Decision + - UserAction + range: string + required: true + about_entity_mention: + name: about_entity_mention + description: The entity mention the curator acted upon + from_schema: https://data.europa.eu/ers/schema/ers + alias: about_entity_mention + owner: UserAction + domain_of: + - Decision + - UserAction + range: EntityMentionIdentifier + required: true + candidates: + name: candidates + description: 'The candidate clusters presented to the curator for selection. + + Ordered by confidence (same as shown in curation UI). + + ' + from_schema: https://data.europa.eu/ers/schema/ers + alias: candidates + owner: UserAction + domain_of: + - EntityMentionResolutionResponse + - Decision + - UserAction + range: ClusterReference + required: true + multivalued: true + selected_cluster: + name: selected_cluster + description: 'The cluster selected by the curator (if action was ACCEPT_TOP + + or ACCEPT_ALTERNATIVE). NULL if action was REJECT_ALL. + + ' + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: selected_cluster + owner: UserAction + domain_of: + - UserAction + range: ClusterReference + action_type: + name: action_type + description: The type of action the curator performed + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: action_type + owner: UserAction + domain_of: + - UserAction + range: UserActionType + required: true + actor: + name: actor + description: User ID or identifier of the curator who performed the action + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: actor + owner: UserAction + domain_of: + - UserAction + range: string + required: true + created_at: + name: created_at + description: Timestamp when the curator action was recorded + from_schema: https://data.europa.eu/ers/schema/ers + alias: created_at + owner: UserAction + domain_of: + - Decision + - UserAction + range: datetime + required: true + metadata: + name: metadata + description: 'JSON metadata providing context (e.g., curator notes, reasoning). + + ' + from_schema: https://data.europa.eu/ers/schema/ers + rank: 1000 + alias: metadata + owner: UserAction + domain_of: + - UserAction + range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/UserActionType.md b/docs/schema/UserActionType.md new file mode 100644 index 0000000..394bd26 --- /dev/null +++ b/docs/schema/UserActionType.md @@ -0,0 +1,70 @@ +# Enum: UserActionType + + + + +_Types of curator actions on entity mention resolutions_ + + + +URI: [ere:UserActionType](https://data.europa.eu/ers/schema/ere/UserActionType) + +## Permissible Values + +| Value | Meaning | Description | +| --- | --- | --- | +| ACCEPT_TOP | None | Curator accepted the top candidate from ERE | +| ACCEPT_ALTERNATIVE | None | Curator selected an alternative candidate | +| REJECT_ALL | None | Curator rejected all candidates | + + + + +## Slots + +| Name | Description | +| --- | --- | +| [action_type](action_type.md) | The type of action the curator performed | + + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + + + +## LinkML Source + +
+```yaml +name: UserActionType +description: Types of curator actions on entity mention resolutions +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +permissible_values: + ACCEPT_TOP: + text: ACCEPT_TOP + description: Curator accepted the top candidate from ERE + ACCEPT_ALTERNATIVE: + text: ACCEPT_ALTERNATIVE + description: Curator selected an alternative candidate + REJECT_ALL: + text: REJECT_ALL + description: Curator rejected all candidates + +``` +
\ No newline at end of file diff --git a/docs/schema/about_entity_mention.md b/docs/schema/about_entity_mention.md new file mode 100644 index 0000000..6c95c1f --- /dev/null +++ b/docs/schema/about_entity_mention.md @@ -0,0 +1,65 @@ + + +# Slot: about_entity_mention + + + +URI: [ere:about_entity_mention](https://data.europa.eu/ers/schema/ere/about_entity_mention) +Alias: about_entity_mention + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | no | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:about_entity_mention | +| native | ere:about_entity_mention | + + + + +## LinkML Source + +
+```yaml +name: about_entity_mention +alias: about_entity_mention +domain_of: +- Decision +- UserAction +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/accepted_candidate.md b/docs/schema/accepted_candidate.md new file mode 100644 index 0000000..49b8452 --- /dev/null +++ b/docs/schema/accepted_candidate.md @@ -0,0 +1,82 @@ + + +# Slot: accepted_candidate + + +_The cluster reference accepted for this entity mention_ + + + + + +URI: [ere:accepted_candidate](https://data.europa.eu/ers/schema/ere/accepted_candidate) +Alias: accepted_candidate + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [Decision](Decision.md) | Aggregate root representing a resolution decision requiring curation | no | + + + + + + +## Properties + +* Range: [ClusterReference](ClusterReference.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:accepted_candidate | +| native | ere:accepted_candidate | + + + + +## LinkML Source + +
+```yaml +name: accepted_candidate +description: The cluster reference accepted for this entity mention +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: accepted_candidate +owner: Decision +domain_of: +- Decision +range: ClusterReference +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/action.md b/docs/schema/action.md new file mode 100644 index 0000000..ea5ab90 --- /dev/null +++ b/docs/schema/action.md @@ -0,0 +1,65 @@ + + +# Slot: action + + + +URI: [ere:action](https://data.europa.eu/ers/schema/ere/action) +Alias: action + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [AuditLog](AuditLog.md) | Audit trail entry for curation actions | no | +| [Decision](Decision.md) | Aggregate root representing a resolution decision requiring curation | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:action | +| native | ere:action | + + + + +## LinkML Source + +
+```yaml +name: action +alias: action +domain_of: +- Decision +- AuditLog +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/action_type.md b/docs/schema/action_type.md new file mode 100644 index 0000000..df13b03 --- /dev/null +++ b/docs/schema/action_type.md @@ -0,0 +1,82 @@ + + +# Slot: action_type + + +_The type of action the curator performed_ + + + + + +URI: [ere:action_type](https://data.europa.eu/ers/schema/ere/action_type) +Alias: action_type + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | + + + + + + +## Properties + +* Range: [UserActionType](UserActionType.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:action_type | +| native | ere:action_type | + + + + +## LinkML Source + +
+```yaml +name: action_type +description: The type of action the curator performed +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: action_type +owner: UserAction +domain_of: +- UserAction +range: UserActionType +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/actor.md b/docs/schema/actor.md new file mode 100644 index 0000000..f33e64e --- /dev/null +++ b/docs/schema/actor.md @@ -0,0 +1,82 @@ + + +# Slot: actor + + +_User ID or identifier of the curator who performed the action_ + + + + + +URI: [ere:actor](https://data.europa.eu/ers/schema/ere/actor) +Alias: actor + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | + + + + + + +## Properties + +* Range: [String](String.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:actor | +| native | ere:actor | + + + + +## LinkML Source + +
+```yaml +name: actor +description: User ID or identifier of the curator who performed the action +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: actor +owner: UserAction +domain_of: +- UserAction +range: string +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/candidates.md b/docs/schema/candidates.md index f676b96..fc7f9db 100644 --- a/docs/schema/candidates.md +++ b/docs/schema/candidates.md @@ -3,15 +3,6 @@ # Slot: candidates -_The set of cluster reference/score pairs representing the candidate clusters_ - -_that the entity mention in the original request could align to (be equivalent to)._ - -__ - - - - URI: [ere:candidates](https://data.europa.eu/ers/schema/ere/candidates) Alias: candidates @@ -26,7 +17,9 @@ Alias: candidates | Name | Description | Modifies Slot | | --- | --- | --- | +| [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | no | | [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | @@ -35,11 +28,7 @@ Alias: candidates ## Properties -* Range: [ClusterReference](ClusterReference.md) - -* Multivalued: True - -* Required: True +* Range: [String](String.md) @@ -51,13 +40,6 @@ Alias: candidates -### Schema Source - - -* from schema: https://data.europa.eu/ers/schema/ere - - - ## Mappings @@ -74,21 +56,12 @@ Alias: candidates
```yaml name: candidates -description: 'The set of cluster reference/score pairs representing the candidate - clusters - - that the entity mention in the original request could align to (be equivalent to). - - ' -from_schema: https://data.europa.eu/ers/schema/ere -rank: 1000 alias: candidates -owner: EntityMentionResolutionResponse domain_of: - EntityMentionResolutionResponse -range: ClusterReference -required: true -multivalued: true +- Decision +- UserAction +range: string ```
\ No newline at end of file diff --git a/docs/schema/changes.md b/docs/schema/changes.md new file mode 100644 index 0000000..94a7fc2 --- /dev/null +++ b/docs/schema/changes.md @@ -0,0 +1,79 @@ + + +# Slot: changes + + +_JSON representation of action-specific context_ + + + + + +URI: [ere:changes](https://data.europa.eu/ers/schema/ere/changes) +Alias: changes + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [AuditLog](AuditLog.md) | Audit trail entry for curation actions | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:changes | +| native | ere:changes | + + + + +## LinkML Source + +
+```yaml +name: changes +description: JSON representation of action-specific context +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: changes +owner: AuditLog +domain_of: +- AuditLog +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/clusterId.md b/docs/schema/cluster_id.md similarity index 84% rename from docs/schema/clusterId.md rename to docs/schema/cluster_id.md index b25db30..9e605c7 100644 --- a/docs/schema/clusterId.md +++ b/docs/schema/cluster_id.md @@ -1,6 +1,6 @@ -# Slot: clusterId +# Slot: cluster_id _The identifier of the cluster/canonical entity that is considered equivalent to the_ @@ -13,8 +13,8 @@ __ -URI: [ere:clusterId](https://data.europa.eu/ers/schema/ere/clusterId) -Alias: clusterId +URI: [ere:cluster_id](https://data.europa.eu/ers/schema/ere/cluster_id) +Alias: cluster_id @@ -61,8 +61,8 @@ Alias: clusterId | Mapping Type | Mapped Value | | --- | --- | -| self | ere:clusterId | -| native | ere:clusterId | +| self | ere:cluster_id | +| native | ere:cluster_id | @@ -71,7 +71,7 @@ Alias: clusterId
```yaml -name: clusterId +name: cluster_id description: 'The identifier of the cluster/canonical entity that is considered equivalent to the @@ -80,7 +80,7 @@ description: 'The identifier of the cluster/canonical entity that is considered ' from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 -alias: clusterId +alias: cluster_id owner: ClusterReference domain_of: - ClusterReference diff --git a/docs/schema/confidence_score.md b/docs/schema/confidence_score.md new file mode 100644 index 0000000..04aa95c --- /dev/null +++ b/docs/schema/confidence_score.md @@ -0,0 +1,97 @@ + + +# Slot: confidence_score + + +_A 0-1 value of how confident the ERE is about the equivalence between the subject entity mention_ + +_and the target canonical entity._ + +__ + + + + + +URI: [ere:confidence_score](https://data.europa.eu/ers/schema/ere/confidence_score) +Alias: confidence_score + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [ClusterReference](ClusterReference.md) | A reference to a cluster to which an entity is deemed to belong, with an asso... | no | + + + + + + +## Properties + +* Range: [Float](Float.md) + +* Required: True + +* Minimum Value: 0 + +* Maximum Value: 1 + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:confidence_score | +| native | ere:confidence_score | + + + + +## LinkML Source + +
+```yaml +name: confidence_score +description: 'A 0-1 value of how confident the ERE is about the equivalence between + the subject entity mention + + and the target canonical entity. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: confidence_score +owner: ClusterReference +domain_of: +- ClusterReference +range: float +required: true +minimum_value: 0.0 +maximum_value: 1.0 + +``` +
\ No newline at end of file diff --git a/docs/schema/contentType.md b/docs/schema/content_type.md similarity index 80% rename from docs/schema/contentType.md rename to docs/schema/content_type.md index 0bb4d7e..55112b7 100644 --- a/docs/schema/contentType.md +++ b/docs/schema/content_type.md @@ -1,6 +1,6 @@ -# Slot: contentType +# Slot: content_type _A string about the MIME format of `content` (e.g. text/turtle, application/ld+json)_ @@ -11,8 +11,8 @@ __ -URI: [ere:contentType](https://data.europa.eu/ers/schema/ere/contentType) -Alias: contentType +URI: [ere:content_type](https://data.europa.eu/ers/schema/ere/content_type) +Alias: content_type @@ -59,8 +59,8 @@ Alias: contentType | Mapping Type | Mapped Value | | --- | --- | -| self | ere:contentType | -| native | ere:contentType | +| self | ere:content_type | +| native | ere:content_type | @@ -69,13 +69,13 @@ Alias: contentType
```yaml -name: contentType +name: content_type description: 'A string about the MIME format of `content` (e.g. text/turtle, application/ld+json) ' from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 -alias: contentType +alias: content_type owner: EntityMention domain_of: - EntityMention diff --git a/docs/schema/created_at.md b/docs/schema/created_at.md new file mode 100644 index 0000000..ea2eb60 --- /dev/null +++ b/docs/schema/created_at.md @@ -0,0 +1,65 @@ + + +# Slot: created_at + + + +URI: [ere:created_at](https://data.europa.eu/ers/schema/ere/created_at) +Alias: created_at + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | no | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:created_at | +| native | ere:created_at | + + + + +## LinkML Source + +
+```yaml +name: created_at +alias: created_at +domain_of: +- Decision +- UserAction +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/current_placement.md b/docs/schema/current_placement.md new file mode 100644 index 0000000..304b0ec --- /dev/null +++ b/docs/schema/current_placement.md @@ -0,0 +1,86 @@ + + +# Slot: current_placement + + +_The accepted cluster for this mention (latest from ERE or curator)._ + +__ + + + + + +URI: [ere:current_placement](https://data.europa.eu/ers/schema/ere/current_placement) +Alias: current_placement + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | no | + + + + + + +## Properties + +* Range: [ClusterReference](ClusterReference.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:current_placement | +| native | ere:current_placement | + + + + +## LinkML Source + +
+```yaml +name: current_placement +description: 'The accepted cluster for this mention (latest from ERE or curator). + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: current_placement +owner: Decision +domain_of: +- Decision +range: ClusterReference +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/entity_mention.md b/docs/schema/entity_mention.md new file mode 100644 index 0000000..e8b63c0 --- /dev/null +++ b/docs/schema/entity_mention.md @@ -0,0 +1,91 @@ + + +# Slot: entity_mention + + +_The data about the entity to be resolved. Note that, at least for the moment, we don't support_ + +_batch requests, so this property is single-valued._ + +__ + + + + + +URI: [ere:entity_mention](https://data.europa.eu/ers/schema/ere/entity_mention) +Alias: entity_mention + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | no | + + + + + + +## Properties + +* Range: [EntityMention](EntityMention.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:entity_mention | +| native | ere:entity_mention | + + + + +## LinkML Source + +
+```yaml +name: entity_mention +description: 'The data about the entity to be resolved. Note that, at least for the + moment, we don''t support + + batch requests, so this property is single-valued. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: entity_mention +owner: EntityMentionResolutionRequest +domain_of: +- EntityMentionResolutionRequest +range: EntityMention +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/entityMentionId.md b/docs/schema/entity_mention_id.md similarity index 71% rename from docs/schema/entityMentionId.md rename to docs/schema/entity_mention_id.md index e99b90e..0265f99 100644 --- a/docs/schema/entityMentionId.md +++ b/docs/schema/entity_mention_id.md @@ -1,13 +1,13 @@ -# Slot: entityMentionId +# Slot: entity_mention_id _The identifier of the entity mention that has been resolved._ __ -_This isn't strictly needed, since the `ereRequestId` already links the response to _ +_This isn't strictly needed, since the `ere_request_id` already links the response to _ _the request's entity mention. Yet, it's reported for convenience._ @@ -17,8 +17,8 @@ __ -URI: [ere:entityMentionId](https://data.europa.eu/ers/schema/ere/entityMentionId) -Alias: entityMentionId +URI: [ere:entity_mention_id](https://data.europa.eu/ers/schema/ere/entity_mention_id) +Alias: entity_mention_id @@ -65,8 +65,8 @@ Alias: entityMentionId | Mapping Type | Mapped Value | | --- | --- | -| self | ere:entityMentionId | -| native | ere:entityMentionId | +| self | ere:entity_mention_id | +| native | ere:entity_mention_id | @@ -75,13 +75,13 @@ Alias: entityMentionId
```yaml -name: entityMentionId +name: entity_mention_id description: "The identifier of the entity mention that has been resolved.\n\nThis\ - \ isn't strictly needed, since the `ereRequestId` already links the response to\ + \ isn't strictly needed, since the `ere_request_id` already links the response to\ \ \nthe request's entity mention. Yet, it's reported for convenience.\n" from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 -alias: entityMentionId +alias: entity_mention_id owner: EntityMentionResolutionResponse domain_of: - EntityMentionResolutionResponse diff --git a/docs/schema/entityType.md b/docs/schema/entity_type.md similarity index 83% rename from docs/schema/entityType.md rename to docs/schema/entity_type.md index ed7d152..b1ebc52 100644 --- a/docs/schema/entityType.md +++ b/docs/schema/entity_type.md @@ -1,6 +1,6 @@ -# Slot: entityType +# Slot: entity_type _A string representing the entity type (based on CET). This is typically a URI._ @@ -11,7 +11,7 @@ _Note that this is at this level, and not at `EntityMention`, since, as said abo _it's needed to identify the entity, even when its content is not present. For the same_ -_reason, it's used both for `EREResolutionRequest` and `EREResolutionResponse` messages., _ +_reason, it's used both for `EREResolutionRequest` and `EREResolutionResponse` messages.,_ __ @@ -19,8 +19,8 @@ __ -URI: [ere:entityType](https://data.europa.eu/ers/schema/ere/entityType) -Alias: entityType +URI: [ere:entity_type](https://data.europa.eu/ers/schema/ere/entity_type) +Alias: entity_type @@ -67,8 +67,8 @@ Alias: entityType | Mapping Type | Mapped Value | | --- | --- | -| self | ere:entityType | -| native | ere:entityType | +| self | ere:entity_type | +| native | ere:entity_type | @@ -77,15 +77,15 @@ Alias: entityType
```yaml -name: entityType +name: entity_type description: "A string representing the entity type (based on CET). This is typically\ \ a URI.\n\nNote that this is at this level, and not at `EntityMention`, since,\ \ as said above, \nit's needed to identify the entity, even when its content is\ \ not present. For the same\nreason, it's used both for `EREResolutionRequest` and\ - \ `EREResolutionResponse` messages., \n" + \ `EREResolutionResponse` messages.,\n" from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 -alias: entityType +alias: entity_type owner: EntityMentionIdentifier domain_of: - EntityMentionIdentifier diff --git a/docs/schema/equivalent_to.md b/docs/schema/equivalent_to.md new file mode 100644 index 0000000..1f93ced --- /dev/null +++ b/docs/schema/equivalent_to.md @@ -0,0 +1,85 @@ + + +# Slot: equivalent_to + + +_Entity mentions that have been resolved to this canonical entity._ + + + + + +URI: [ere:equivalent_to](https://data.europa.eu/ers/schema/ere/equivalent_to) +Alias: equivalent_to + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [CanonicalEntityIdentifier](CanonicalEntityIdentifier.md) | A logical identity construct providing a stable identity anchor | no | + + + + + + +## Properties + +* Range: [EntityMentionIdentifier](EntityMentionIdentifier.md) + +* Multivalued: True + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:equivalent_to | +| native | ere:equivalent_to | + + + + +## LinkML Source + +
+```yaml +name: equivalent_to +description: Entity mentions that have been resolved to this canonical entity. +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: equivalent_to +owner: CanonicalEntityIdentifier +domain_of: +- CanonicalEntityIdentifier +range: EntityMentionIdentifier +required: true +multivalued: true + +``` +
\ No newline at end of file diff --git a/docs/schema/ereRequestId.md b/docs/schema/ere_request_id.md similarity index 62% rename from docs/schema/ereRequestId.md rename to docs/schema/ere_request_id.md index 8a3d622..f98e468 100644 --- a/docs/schema/ereRequestId.md +++ b/docs/schema/ere_request_id.md @@ -1,20 +1,28 @@ -# Slot: ereRequestId +# Slot: ere_request_id _A string representing the unique ID of an ERE request, or the ID of the request a response is about._ -_This **is not** the same as `requestId` + `sourceId`._ +_This **is not** the same as `request_id` + `source_id`._ __ +_Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message_ + +_can originate from within the ERE, without any previous request counterpart, as a notification of_ + +_resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`._ + +__ -URI: [ere:ereRequestId](https://data.europa.eu/ers/schema/ere/ereRequestId) -Alias: ereRequestId + +URI: [ere:ere_request_id](https://data.europa.eu/ers/schema/ere/ere_request_id) +Alias: ere_request_id @@ -26,14 +34,12 @@ Alias: ereRequestId | Name | Description | Modifies Slot | | --- | --- | --- | -| [EREResponse](EREResponse.md) | Root class to represent all the responses sent by the ERE | no | | [EREMessage](EREMessage.md) | Root abstraction to represent attributes common to both requests and results | no | -| [ERERequest](ERERequest.md) | Root class to represent all the requests sent to the ERE | no | +| [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | +| [EREResponse](EREResponse.md) | Root class to represent all the responses sent by the ERE | no | | [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | no | +| [ERERequest](ERERequest.md) | Root class to represent all the requests sent to the ERE | no | | [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | -| [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | -| [FullRebuildRequest](FullRebuildRequest.md) | A request to reset all the resolutions computed so far and possibly rebuild t... | no | -| [FullRebuildResponse](FullRebuildResponse.md) | A response to a `FullRebuildRequest`, confirming that the rebuild process has... | no | @@ -68,8 +74,8 @@ Alias: ereRequestId | Mapping Type | Mapped Value | | --- | --- | -| self | ere:ereRequestId | -| native | ere:ereRequestId | +| self | ere:ere_request_id | +| native | ere:ere_request_id | @@ -78,16 +84,25 @@ Alias: ereRequestId
```yaml -name: ereRequestId +name: ere_request_id description: 'A string representing the unique ID of an ERE request, or the ID of the request a response is about. - This **is not** the same as `requestId` + `sourceId`. + This **is not** the same as `request_id` + `source_id`. + + + Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` + message + + can originate from within the ERE, without any previous request counterpart, as + a notification of + + resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. ' from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 -alias: ereRequestId +alias: ere_request_id owner: EREMessage domain_of: - EREMessage diff --git a/docs/schema/error_detail.md b/docs/schema/error_detail.md new file mode 100644 index 0000000..e890ee8 --- /dev/null +++ b/docs/schema/error_detail.md @@ -0,0 +1,90 @@ + + +# Slot: error_detail + + +_A human readable detailed message about the error that occurred._ + +__ + +_This corresponds to RFC-9457's `detail`._ + +__ + + + + + +URI: [ere:error_detail](https://data.europa.eu/ers/schema/ere/error_detail) +Alias: error_detail + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:error_detail | +| native | ere:error_detail | + + + + +## LinkML Source + +
+```yaml +name: error_detail +description: 'A human readable detailed message about the error that occurred. + + + This corresponds to RFC-9457''s `detail`. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: error_detail +owner: EREErrorResponse +domain_of: +- EREErrorResponse +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/error_title.md b/docs/schema/error_title.md new file mode 100644 index 0000000..9ef86c4 --- /dev/null +++ b/docs/schema/error_title.md @@ -0,0 +1,90 @@ + + +# Slot: error_title + + +_A human readable brief message about the error that occurred._ + +__ + +_This corresponds to RFC-9457's `title`._ + +__ + + + + + +URI: [ere:error_title](https://data.europa.eu/ers/schema/ere/error_title) +Alias: error_title + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:error_title | +| native | ere:error_title | + + + + +## LinkML Source + +
+```yaml +name: error_title +description: 'A human readable brief message about the error that occurred. + + + This corresponds to RFC-9457''s `title`. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: error_title +owner: EREErrorResponse +domain_of: +- EREErrorResponse +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/error_trace.md b/docs/schema/error_trace.md new file mode 100644 index 0000000..e39b814 --- /dev/null +++ b/docs/schema/error_trace.md @@ -0,0 +1,94 @@ + + +# Slot: error_trace + + +_A string representing a (stack) trace of the error that occurred._ + +__ + +_This is optional and typically used for debugging purposes only, since_ + +_exposing this kind of server-side information is a security risk._ + +__ + + + + + +URI: [ere:error_trace](https://data.europa.eu/ers/schema/ere/error_trace) +Alias: error_trace + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:error_trace | +| native | ere:error_trace | + + + + +## LinkML Source + +
+```yaml +name: error_trace +description: 'A string representing a (stack) trace of the error that occurred. + + + This is optional and typically used for debugging purposes only, since + + exposing this kind of server-side information is a security risk. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: error_trace +owner: EREErrorResponse +domain_of: +- EREErrorResponse +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/error_type.md b/docs/schema/error_type.md new file mode 100644 index 0000000..fb0a669 --- /dev/null +++ b/docs/schema/error_type.md @@ -0,0 +1,93 @@ + + +# Slot: error_type + + +_A string representing the error type, eg, the FQN of the raised exception._ + +__ + +_This corresponds to RFC-9457's `type`._ + +__ + + + + + +URI: [ere:error_type](https://data.europa.eu/ers/schema/ere/error_type) +Alias: error_type + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | + + + + + + +## Properties + +* Range: [String](String.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:error_type | +| native | ere:error_type | + + + + +## LinkML Source + +
+```yaml +name: error_type +description: 'A string representing the error type, eg, the FQN of the raised exception. + + + This corresponds to RFC-9457''s `type`. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: error_type +owner: EREErrorResponse +domain_of: +- EREErrorResponse +range: string +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/excludedClusterIds.md b/docs/schema/excludedClusterIds.md deleted file mode 100644 index 4afa045..0000000 --- a/docs/schema/excludedClusterIds.md +++ /dev/null @@ -1,106 +0,0 @@ - - -# Slot: excludedClusterIds - - -_When this is present, the resolution must not bin the entity mention into any of the_ - -_listed clusters. This can be used to reject a previous resolution proposed by the ERE._ - -__ - -_The exact reaction to this is implementation dependent. In the simplest case, the ERE_ - -_might just create a singleton cluster with this entity as member. In a more advanced _ - -_case, it might recompute the similarity with more advanced algorithms or use updated_ - -_data._ - -__ - -_TODO: Can this be revised? What does it happen if an exclusion was made by mistake?_ - -__ - - - - - -URI: [ere:excludedClusterIds](https://data.europa.eu/ers/schema/ere/excludedClusterIds) -Alias: excludedClusterIds - - - - - - - -## Applicable Classes - -| Name | Description | Modifies Slot | -| --- | --- | --- | -| [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | no | - - - - - - -## Properties - -* Range: [String](String.md) - -* Multivalued: True - - - - -## Identifier and Mapping Information - - - - - - -### Schema Source - - -* from schema: https://data.europa.eu/ers/schema/ere - - - - -## Mappings - -| Mapping Type | Mapped Value | -| --- | --- | -| self | ere:excludedClusterIds | -| native | ere:excludedClusterIds | - - - - -## LinkML Source - -
-```yaml -name: excludedClusterIds -description: "When this is present, the resolution must not bin the entity mention\ - \ into any of the\nlisted clusters. This can be used to reject a previous resolution\ - \ proposed by the ERE.\n\nThe exact reaction to this is implementation dependent.\ - \ In the simplest case, the ERE\nmight just create a singleton cluster with this\ - \ entity as member. In a more advanced \ncase, it might recompute the similarity\ - \ with more advanced algorithms or use updated\ndata.\n\nTODO: Can this be revised?\ - \ What does it happen if an exclusion was made by mistake?\n" -from_schema: https://data.europa.eu/ers/schema/ere -rank: 1000 -alias: excludedClusterIds -owner: EntityMentionResolutionRequest -domain_of: -- EntityMentionResolutionRequest -range: string -multivalued: true - -``` -
\ No newline at end of file diff --git a/docs/schema/excluded_cluster_ids.md b/docs/schema/excluded_cluster_ids.md new file mode 100644 index 0000000..0ea0ca5 --- /dev/null +++ b/docs/schema/excluded_cluster_ids.md @@ -0,0 +1,116 @@ + + +# Slot: excluded_cluster_ids + + +_When this is present, the ERE may use this information to avoid clustering the entity in _ + +_the listed clusters._ + +__ + +_This can be used to notify the ERE that a curator has rejected a previous resolution _ + +_proposed by the ERE._ + +__ + +_As for `proposed_cluster_ids`, the ERE **has no obligation** to fulfil the exclusions, and _ + +_it remains the ultimate authority to provide the final resolution decision._ + +__ + +_Similarly, the exact reaction to this is implementation dependent. In the simplest case, the ERE_ + +_might just create a singleton cluster with the current entity as member. In a more advanced _ + +_case, it might recompute the similarity with more advanced algorithms or use updated_ + +_data._ + +__ + + + + + +URI: [ere:excluded_cluster_ids](https://data.europa.eu/ers/schema/ere/excluded_cluster_ids) +Alias: excluded_cluster_ids + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | no | + + + + + + +## Properties + +* Range: [String](String.md) + +* Multivalued: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:excluded_cluster_ids | +| native | ere:excluded_cluster_ids | + + + + +## LinkML Source + +
+```yaml +name: excluded_cluster_ids +description: "When this is present, the ERE may use this information to avoid clustering\ + \ the entity in \nthe listed clusters.\n\nThis can be used to notify the ERE that\ + \ a curator has rejected a previous resolution \nproposed by the ERE.\n\nAs for\ + \ `proposed_cluster_ids`, the ERE **has no obligation** to fulfil the exclusions,\ + \ and \nit remains the ultimate authority to provide the final resolution decision.\n\ + \nSimilarly, the exact reaction to this is implementation dependent. In the simplest\ + \ case, the ERE\nmight just create a singleton cluster with the current entity as\ + \ member. In a more advanced \ncase, it might recompute the similarity with more\ + \ advanced algorithms or use updated\ndata.\n" +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: excluded_cluster_ids +owner: EntityMentionResolutionRequest +domain_of: +- EntityMentionResolutionRequest +range: string +multivalued: true + +``` +
\ No newline at end of file diff --git a/docs/schema/id.md b/docs/schema/id.md new file mode 100644 index 0000000..188feea --- /dev/null +++ b/docs/schema/id.md @@ -0,0 +1,65 @@ + + +# Slot: id + + + +URI: [ere:id](https://data.europa.eu/ers/schema/ere/id) +Alias: id + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | no | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:id | +| native | ere:id | + + + + +## LinkML Source + +
+```yaml +name: id +alias: id +domain_of: +- Decision +- UserAction +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/identifiedBy.md b/docs/schema/identifiedBy.md new file mode 100644 index 0000000..1bef916 --- /dev/null +++ b/docs/schema/identifiedBy.md @@ -0,0 +1,86 @@ + + +# Slot: identifiedBy + + +_The identification triad of the entity mention._ + +__ + + + + + +URI: [ere:identifiedBy](https://data.europa.eu/ers/schema/ere/identifiedBy) +Alias: identifiedBy + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [EntityMention](EntityMention.md) | An entity mention is a representation of a real-world entity, as provided by ... | no | + + + + + + +## Properties + +* Range: [EntityMentionIdentifier](EntityMentionIdentifier.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:identifiedBy | +| native | ere:identifiedBy | + + + + +## LinkML Source + +
+```yaml +name: identifiedBy +description: 'The identification triad of the entity mention. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: identifiedBy +owner: EntityMention +domain_of: +- EntityMention +range: EntityMentionIdentifier +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/identifier.md b/docs/schema/identifier.md new file mode 100644 index 0000000..068d687 --- /dev/null +++ b/docs/schema/identifier.md @@ -0,0 +1,82 @@ + + +# Slot: identifier + + +_Unique identifier for the canonical entity._ + + + + + +URI: [ere:identifier](https://data.europa.eu/ers/schema/ere/identifier) +Alias: identifier + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [CanonicalEntityIdentifier](CanonicalEntityIdentifier.md) | A logical identity construct providing a stable identity anchor | no | + + + + + + +## Properties + +* Range: [String](String.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:identifier | +| native | ere:identifier | + + + + +## LinkML Source + +
+```yaml +name: identifier +description: Unique identifier for the canonical entity. +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: identifier +owner: CanonicalEntityIdentifier +domain_of: +- CanonicalEntityIdentifier +range: string +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/instance_id.md b/docs/schema/instance_id.md new file mode 100644 index 0000000..5ffb28a --- /dev/null +++ b/docs/schema/instance_id.md @@ -0,0 +1,82 @@ + + +# Slot: instance_id + + +_Identifier of the modified entity_ + + + + + +URI: [ere:instance_id](https://data.europa.eu/ers/schema/ere/instance_id) +Alias: instance_id + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [AuditLog](AuditLog.md) | Audit trail entry for curation actions | no | + + + + + + +## Properties + +* Range: [String](String.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:instance_id | +| native | ere:instance_id | + + + + +## LinkML Source + +
+```yaml +name: instance_id +description: Identifier of the modified entity +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: instance_id +owner: AuditLog +domain_of: +- AuditLog +range: string +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/instance_type.md b/docs/schema/instance_type.md new file mode 100644 index 0000000..7578d28 --- /dev/null +++ b/docs/schema/instance_type.md @@ -0,0 +1,82 @@ + + +# Slot: instance_type + + +_Type of entity being modified (e.g., Decision)_ + + + + + +URI: [ere:instance_type](https://data.europa.eu/ers/schema/ere/instance_type) +Alias: instance_type + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [AuditLog](AuditLog.md) | Audit trail entry for curation actions | no | + + + + + + +## Properties + +* Range: [String](String.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:instance_type | +| native | ere:instance_type | + + + + +## LinkML Source + +
+```yaml +name: instance_type +description: Type of entity being modified (e.g., Decision) +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: instance_type +owner: AuditLog +domain_of: +- AuditLog +range: string +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/last_snapshot.md b/docs/schema/last_snapshot.md new file mode 100644 index 0000000..5e501be --- /dev/null +++ b/docs/schema/last_snapshot.md @@ -0,0 +1,90 @@ + + +# Slot: last_snapshot + + +_Timestamp of the last resolution operation for this source._ + +_Used to determine if a refreshBulk or other update is needed._ + +__ + + + + + +URI: [ere:last_snapshot](https://data.europa.eu/ers/schema/ere/last_snapshot) +Alias: last_snapshot + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [LookupState](LookupState.md) | Tracks the resolution state for entity mentions from a particular source | no | + + + + + + +## Properties + +* Range: [Datetime](Datetime.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:last_snapshot | +| native | ere:last_snapshot | + + + + +## LinkML Source + +
+```yaml +name: last_snapshot +description: 'Timestamp of the last resolution operation for this source. + + Used to determine if a refreshBulk or other update is needed. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: last_snapshot +owner: LookupState +domain_of: +- LookupState +range: datetime +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/metadata.md b/docs/schema/metadata.md new file mode 100644 index 0000000..88f1da1 --- /dev/null +++ b/docs/schema/metadata.md @@ -0,0 +1,83 @@ + + +# Slot: metadata + + +_JSON metadata providing context (e.g., curator notes, reasoning)._ + +__ + + + + + +URI: [ere:metadata](https://data.europa.eu/ers/schema/ere/metadata) +Alias: metadata + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:metadata | +| native | ere:metadata | + + + + +## LinkML Source + +
+```yaml +name: metadata +description: 'JSON metadata providing context (e.g., curator notes, reasoning). + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: metadata +owner: UserAction +domain_of: +- UserAction +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/parsed_representation.md b/docs/schema/parsed_representation.md new file mode 100644 index 0000000..b165c0b --- /dev/null +++ b/docs/schema/parsed_representation.md @@ -0,0 +1,83 @@ + + +# Slot: parsed_representation + + +_JSON representation of the parsed entity data._ + +__ + + + + + +URI: [ere:parsed_representation](https://data.europa.eu/ers/schema/ere/parsed_representation) +Alias: parsed_representation + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [EntityMention](EntityMention.md) | An entity mention is a representation of a real-world entity, as provided by ... | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:parsed_representation | +| native | ere:parsed_representation | + + + + +## LinkML Source + +
+```yaml +name: parsed_representation +description: 'JSON representation of the parsed entity data. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: parsed_representation +owner: EntityMention +domain_of: +- EntityMention +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/proposed_cluster_ids.md b/docs/schema/proposed_cluster_ids.md new file mode 100644 index 0000000..8334443 --- /dev/null +++ b/docs/schema/proposed_cluster_ids.md @@ -0,0 +1,118 @@ + + +# Slot: proposed_cluster_ids + + +_When this is present, the ERE may use this information to try to cluster the entity in one of _ + +_the listed clusters._ + +__ + +_In particular, when an initial request about an entity isn't answered within a timeout, _ + +_a subsequent new request can be sent about the same entity and with the canonical ID of it_ + +_as a single proposed cluster ID. This suggests the ERE that it can create a new singleton cluster_ + +_with the entity as its initial only member and its canonical ID as the cluster ID. The ERE_ + +_can evolve such a cluster later, when further similar entities are sent in, or when it _ + +_has had more time to associate the initial entity to others. _ + +__ + +_Whatever, the case, the ERE **has no obligation** to fulfil the proposal, how it reacts to _ + +_this list is implementation dependent, and the ERE remains the ultimate authority to provide _ + +_the final resolution decision._ + +__ + + + + + +URI: [ere:proposed_cluster_ids](https://data.europa.eu/ers/schema/ere/proposed_cluster_ids) +Alias: proposed_cluster_ids + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | no | + + + + + + +## Properties + +* Range: [String](String.md) + +* Multivalued: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:proposed_cluster_ids | +| native | ere:proposed_cluster_ids | + + + + +## LinkML Source + +
+```yaml +name: proposed_cluster_ids +description: "When this is present, the ERE may use this information to try to cluster\ + \ the entity in one of \nthe listed clusters.\n\nIn particular, when an initial\ + \ request about an entity isn't answered within a timeout, \na subsequent new request\ + \ can be sent about the same entity and with the canonical ID of it\nas a single\ + \ proposed cluster ID. This suggests the ERE that it can create a new singleton\ + \ cluster\nwith the entity as its initial only member and its canonical ID as the\ + \ cluster ID. The ERE\ncan evolve such a cluster later, when further similar entities\ + \ are sent in, or when it \nhas had more time to associate the initial entity to\ + \ others. \n\nWhatever, the case, the ERE **has no obligation** to fulfil the proposal,\ + \ how it reacts to \nthis list is implementation dependent, and the ERE remains\ + \ the ultimate authority to provide \nthe final resolution decision.\n" +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: proposed_cluster_ids +owner: EntityMentionResolutionRequest +domain_of: +- EntityMentionResolutionRequest +range: string +multivalued: true + +``` +
\ No newline at end of file diff --git a/docs/schema/requestId.md b/docs/schema/request_id.md similarity index 88% rename from docs/schema/requestId.md rename to docs/schema/request_id.md index ba0d5ad..9a9c6ea 100644 --- a/docs/schema/requestId.md +++ b/docs/schema/request_id.md @@ -1,6 +1,6 @@ -# Slot: requestId +# Slot: request_id _A string representing the unique ID of the request made to the ERS system. In general, this is unique_ @@ -19,8 +19,8 @@ __ -URI: [ere:requestId](https://data.europa.eu/ers/schema/ere/requestId) -Alias: requestId +URI: [ere:request_id](https://data.europa.eu/ers/schema/ere/request_id) +Alias: request_id @@ -67,8 +67,8 @@ Alias: requestId | Mapping Type | Mapped Value | | --- | --- | -| self | ere:requestId | -| native | ere:requestId | +| self | ere:request_id | +| native | ere:request_id | @@ -77,7 +77,7 @@ Alias: requestId
```yaml -name: requestId +name: request_id description: "A string representing the unique ID of the request made to the ERS system.\ \ In general, this is unique\nonly within the scope of the source and the entity\ \ type, ie, within `sourceId` and `entityType`. \n\nMoreover, this is **not** the\ @@ -85,7 +85,7 @@ description: "A string representing the unique ID of the request made to the ERS \ match responses to requests.\n" from_schema: https://data.europa.eu/ers/schema/ere rank: 1000 -alias: requestId +alias: request_id owner: EntityMentionIdentifier domain_of: - EntityMentionIdentifier diff --git a/docs/schema/selected_cluster.md b/docs/schema/selected_cluster.md new file mode 100644 index 0000000..fe69d1f --- /dev/null +++ b/docs/schema/selected_cluster.md @@ -0,0 +1,87 @@ + + +# Slot: selected_cluster + + +_The cluster selected by the curator (if action was ACCEPT_TOP_ + +_or ACCEPT_ALTERNATIVE). NULL if action was REJECT_ALL._ + +__ + + + + + +URI: [ere:selected_cluster](https://data.europa.eu/ers/schema/ere/selected_cluster) +Alias: selected_cluster + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [UserAction](UserAction.md) | Immutable record of a curator action on an entity mention resolution | no | + + + + + + +## Properties + +* Range: [ClusterReference](ClusterReference.md) + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:selected_cluster | +| native | ere:selected_cluster | + + + + +## LinkML Source + +
+```yaml +name: selected_cluster +description: 'The cluster selected by the curator (if action was ACCEPT_TOP + + or ACCEPT_ALTERNATIVE). NULL if action was REJECT_ALL. + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: selected_cluster +owner: UserAction +domain_of: +- UserAction +range: ClusterReference + +``` +
\ No newline at end of file diff --git a/docs/schema/similarity_score.md b/docs/schema/similarity_score.md new file mode 100644 index 0000000..25cbda9 --- /dev/null +++ b/docs/schema/similarity_score.md @@ -0,0 +1,97 @@ + + +# Slot: similarity_score + + +_A 0-1 score representing the pairwise comparison between a mention and a cluster (likely_ + +_based on a representative representation)._ + +__ + + + + + +URI: [ere:similarity_score](https://data.europa.eu/ers/schema/ere/similarity_score) +Alias: similarity_score + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [ClusterReference](ClusterReference.md) | A reference to a cluster to which an entity is deemed to belong, with an asso... | no | + + + + + + +## Properties + +* Range: [Float](Float.md) + +* Required: True + +* Minimum Value: 0 + +* Maximum Value: 1 + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:similarity_score | +| native | ere:similarity_score | + + + + +## LinkML Source + +
+```yaml +name: similarity_score +description: 'A 0-1 score representing the pairwise comparison between a mention and + a cluster (likely + + based on a representative representation). + + ' +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: similarity_score +owner: ClusterReference +domain_of: +- ClusterReference +range: float +required: true +minimum_value: 0.0 +maximum_value: 1.0 + +``` +
\ No newline at end of file diff --git a/docs/schema/sourceId.md b/docs/schema/sourceId.md deleted file mode 100644 index b965eae..0000000 --- a/docs/schema/sourceId.md +++ /dev/null @@ -1,87 +0,0 @@ - - -# Slot: sourceId - - -_The ID or URI of the ERS client that originated the request. This identifies an application or a _ - -_person accessing the ERS system._ - -__ - - - - - -URI: [ere:sourceId](https://data.europa.eu/ers/schema/ere/sourceId) -Alias: sourceId - - - - - - - -## Applicable Classes - -| Name | Description | Modifies Slot | -| --- | --- | --- | -| [EntityMentionIdentifier](EntityMentionIdentifier.md) | A container that groups the attributes needed to identify an entity mention i... | no | - - - - - - -## Properties - -* Range: [String](String.md) - -* Required: True - - - - -## Identifier and Mapping Information - - - - - - -### Schema Source - - -* from schema: https://data.europa.eu/ers/schema/ere - - - - -## Mappings - -| Mapping Type | Mapped Value | -| --- | --- | -| self | ere:sourceId | -| native | ere:sourceId | - - - - -## LinkML Source - -
-```yaml -name: sourceId -description: "The ID or URI of the ERS client that originated the request. This identifies\ - \ an application or a \nperson accessing the ERS system.\n" -from_schema: https://data.europa.eu/ers/schema/ere -rank: 1000 -alias: sourceId -owner: EntityMentionIdentifier -domain_of: -- EntityMentionIdentifier -range: string -required: true - -``` -
\ No newline at end of file diff --git a/docs/schema/source_id.md b/docs/schema/source_id.md new file mode 100644 index 0000000..b500ee3 --- /dev/null +++ b/docs/schema/source_id.md @@ -0,0 +1,65 @@ + + +# Slot: source_id + + + +URI: [ere:source_id](https://data.europa.eu/ers/schema/ere/source_id) +Alias: source_id + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [LookupState](LookupState.md) | Tracks the resolution state for entity mentions from a particular source | no | +| [EntityMentionIdentifier](EntityMentionIdentifier.md) | A container that groups the attributes needed to identify an entity mention i... | no | + + + + + + +## Properties + +* Range: [String](String.md) + + + + +## Identifier and Mapping Information + + + + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:source_id | +| native | ere:source_id | + + + + +## LinkML Source + +
+```yaml +name: source_id +alias: source_id +domain_of: +- EntityMentionIdentifier +- LookupState +range: string + +``` +
\ No newline at end of file diff --git a/docs/schema/status.md b/docs/schema/status.md new file mode 100644 index 0000000..f23df1b --- /dev/null +++ b/docs/schema/status.md @@ -0,0 +1,82 @@ + + +# Slot: status + + +_Current status in the curation workflow_ + + + + + +URI: [ere:status](https://data.europa.eu/ers/schema/ere/status) +Alias: status + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [Decision](Decision.md) | Aggregate root representing a resolution decision requiring curation | no | + + + + + + +## Properties + +* Range: [DecisionStatus](DecisionStatus.md) + +* Required: True + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:status | +| native | ere:status | + + + + +## LinkML Source + +
+```yaml +name: status +description: Current status in the curation workflow +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: status +owner: Decision +domain_of: +- Decision +range: DecisionStatus +required: true + +``` +
\ No newline at end of file diff --git a/docs/schema/timestamp.md b/docs/schema/timestamp.md index 8f7f21a..dbd9abf 100644 --- a/docs/schema/timestamp.md +++ b/docs/schema/timestamp.md @@ -24,14 +24,12 @@ Alias: timestamp | Name | Description | Modifies Slot | | --- | --- | --- | -| [EREResponse](EREResponse.md) | Root class to represent all the responses sent by the ERE | no | | [EREMessage](EREMessage.md) | Root abstraction to represent attributes common to both requests and results | no | -| [ERERequest](ERERequest.md) | Root class to represent all the requests sent to the ERE | no | +| [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | +| [EREResponse](EREResponse.md) | Root class to represent all the responses sent by the ERE | no | | [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | no | +| [ERERequest](ERERequest.md) | Root class to represent all the requests sent to the ERE | no | | [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | -| [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | -| [FullRebuildRequest](FullRebuildRequest.md) | A request to reset all the resolutions computed so far and possibly rebuild t... | no | -| [FullRebuildResponse](FullRebuildResponse.md) | A response to a `FullRebuildRequest`, confirming that the rebuild process has... | no | diff --git a/docs/schema/type.md b/docs/schema/type.md index 6b9033c..9da5766 100644 --- a/docs/schema/type.md +++ b/docs/schema/type.md @@ -36,14 +36,12 @@ Alias: type | Name | Description | Modifies Slot | | --- | --- | --- | -| [EREResponse](EREResponse.md) | Root class to represent all the responses sent by the ERE | no | | [EREMessage](EREMessage.md) | Root abstraction to represent attributes common to both requests and results | no | -| [ERERequest](ERERequest.md) | Root class to represent all the requests sent to the ERE | no | +| [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | +| [EREResponse](EREResponse.md) | Root class to represent all the responses sent by the ERE | no | | [EntityMentionResolutionRequest](EntityMentionResolutionRequest.md) | An entity resolution request sent to the ERE, containing the entity to be res... | no | +| [ERERequest](ERERequest.md) | Root class to represent all the requests sent to the ERE | no | | [EntityMentionResolutionResponse](EntityMentionResolutionResponse.md) | An entity resolution response returned by the ERE | no | -| [EREErrorResponse](EREErrorResponse.md) | Response sent by the ERE when some error/exception occurs while processing a ... | no | -| [FullRebuildRequest](FullRebuildRequest.md) | A request to reset all the resolutions computed so far and possibly rebuild t... | no | -| [FullRebuildResponse](FullRebuildResponse.md) | A response to a `FullRebuildRequest`, confirming that the rebuild process has... | no | diff --git a/docs/schema/updated_at.md b/docs/schema/updated_at.md new file mode 100644 index 0000000..9bb49d2 --- /dev/null +++ b/docs/schema/updated_at.md @@ -0,0 +1,79 @@ + + +# Slot: updated_at + + +_When the decision was last updated (ERE refresh or curator action)_ + + + + + +URI: [ere:updated_at](https://data.europa.eu/ers/schema/ere/updated_at) +Alias: updated_at + + + + + + + +## Applicable Classes + +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [Decision](Decision.md) | Canonical placement of an entity mention to a cluster | no | + + + + + + +## Properties + +* Range: [Datetime](Datetime.md) + + + + +## Identifier and Mapping Information + + + + + + +### Schema Source + + +* from schema: https://data.europa.eu/ers/schema/ere + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | ere:updated_at | +| native | ere:updated_at | + + + + +## LinkML Source + +
+```yaml +name: updated_at +description: When the decision was last updated (ERE refresh or curator action) +from_schema: https://data.europa.eu/ers/schema/ere +rank: 1000 +alias: updated_at +owner: Decision +domain_of: +- Decision +range: datetime + +``` +
\ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 9b57940..5f3a589 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.3.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand. [[package]] name = "alabaster" @@ -1333,6 +1333,7 @@ files = [ [package.dependencies] attrs = ">=22.2.0" rpds-py = ">=0.7.0" +typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""} [[package]] name = "requests" @@ -1520,6 +1521,34 @@ files = [ {file = "rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84"}, ] +[[package]] +name = "ruff" +version = "0.15.0" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "ruff-0.15.0-py3-none-linux_armv6l.whl", hash = "sha256:aac4ebaa612a82b23d45964586f24ae9bc23ca101919f5590bdb368d74ad5455"}, + {file = "ruff-0.15.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:dcd4be7cc75cfbbca24a98d04d0b9b36a270d0833241f776b788d59f4142b14d"}, + {file = "ruff-0.15.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d747e3319b2bce179c7c1eaad3d884dc0a199b5f4d5187620530adf9105268ce"}, + {file = "ruff-0.15.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:650bd9c56ae03102c51a5e4b554d74d825ff3abe4db22b90fd32d816c2e90621"}, + {file = "ruff-0.15.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6664b7eac559e3048223a2da77769c2f92b43a6dfd4720cef42654299a599c9"}, + {file = "ruff-0.15.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f811f97b0f092b35320d1556f3353bf238763420ade5d9e62ebd2b73f2ff179"}, + {file = "ruff-0.15.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:761ec0a66680fab6454236635a39abaf14198818c8cdf691e036f4bc0f406b2d"}, + {file = "ruff-0.15.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:940f11c2604d317e797b289f4f9f3fa5555ffe4fb574b55ed006c3d9b6f0eb78"}, + {file = "ruff-0.15.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcbca3d40558789126da91d7ef9a7c87772ee107033db7191edefa34e2c7f1b4"}, + {file = "ruff-0.15.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9a121a96db1d75fa3eb39c4539e607f628920dd72ff1f7c5ee4f1b768ac62d6e"}, + {file = "ruff-0.15.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5298d518e493061f2eabd4abd067c7e4fb89e2f63291c94332e35631c07c3662"}, + {file = "ruff-0.15.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:afb6e603d6375ff0d6b0cee563fa21ab570fd15e65c852cb24922cef25050cf1"}, + {file = "ruff-0.15.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:77e515f6b15f828b94dc17d2b4ace334c9ddb7d9468c54b2f9ed2b9c1593ef16"}, + {file = "ruff-0.15.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6f6e80850a01eb13b3e42ee0ebdf6e4497151b48c35051aab51c101266d187a3"}, + {file = "ruff-0.15.0-py3-none-win32.whl", hash = "sha256:238a717ef803e501b6d51e0bdd0d2c6e8513fe9eec14002445134d3907cd46c3"}, + {file = "ruff-0.15.0-py3-none-win_amd64.whl", hash = "sha256:dd5e4d3301dc01de614da3cdffc33d4b1b96fb89e45721f1598e5532ccf78b18"}, + {file = "ruff-0.15.0-py3-none-win_arm64.whl", hash = "sha256:c480d632cc0ca3f0727acac8b7d053542d9e114a462a145d0b00e7cd658c515a"}, + {file = "ruff-0.15.0.tar.gz", hash = "sha256:6bdea47cdbea30d40f8f8d7d69c0854ba7c15420ec75a26f463290949d7f7e9a"}, +] + [[package]] name = "shexjsg" version = "0.8.2" @@ -2063,5 +2092,5 @@ dev = ["pytest", "setuptools"] [metadata] lock-version = "2.1" -python-versions = "^3.14" -content-hash = "a1aee2f1d50f0d7ff650b5082a00913c7fbab116d0a54ad407c6084933f2766d" +python-versions = ">=3.12,<4.0" +content-hash = "30a13cf616fe1abf04ca15d9ef2d08df7beae9f1bfd19f4cc9a4e3df469139de" diff --git a/pyproject.toml b/pyproject.toml index e462009..eb2a179 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "ers-core" version = "0.0.1" description = """ The core components for the Entity Resolution System (ERS) components. - + The ERS is a pluggable entity resolution system for data transformation pipelines. """ @@ -12,15 +12,16 @@ authors = [ ] readme = "README.md" -requires-python = "^3.14" +requires-python = ">=3.12" [tool.poetry.dependencies] -python = "^3.14" +python = ">=3.12,<4.0" pydantic = ">=2.10.6,<3.0.0" [dependency-groups] dev = [ - "linkml (>=1.9.2,<2.0.0)" + "linkml (>=1.9.2,<2.0.0)", + "ruff (>=0.9.0,<1.0.0)" ] [build-system] @@ -28,7 +29,9 @@ requires = ["poetry-core>=2.0.0,<3.0.0"] build-backend = "poetry.core.masonry.api" [tool.poetry] -# Needed when the root doesn't contain $project_name packages = [ - { include = "*", from = "src" } + { include = "erspec", from = "src" } ] + +[tool.ruff] +target-version = "py312" diff --git a/resources/schemas/core-schema-v0.1.0.yaml b/resources/schemas/core-schema-v0.1.0.yaml new file mode 100644 index 0000000..2174f54 --- /dev/null +++ b/resources/schemas/core-schema-v0.1.0.yaml @@ -0,0 +1,224 @@ +id: https://data.europa.eu/ers/schema/ers +name: coreSchema +description: ERS domain layer models for entity resolution +version: 0.1.0 +imports: + - linkml:types +prefixes: + linkml: https://w3id.org/linkml/ + ers: https://data.europa.eu/ers/schema/ers/ + ere: https://data.europa.eu/ers/schema/ere/ +default_prefix: ers +default_range: string + +enums: + + EntityType: + description: Types of entities that can be resolved + permissible_values: + ORGANISATION: + description: An organization entity + PROCEDURE: + description: A procurement procedure entity + + UserActionType: + description: Types of curator actions on entity mention resolutions + permissible_values: + ACCEPT_TOP: + description: Curator accepted the top candidate from ERE + ACCEPT_ALTERNATIVE: + description: Curator selected an alternative candidate + REJECT_ALL: + description: Curator rejected all candidates + + +classes: + + EntityMention: + description: | + An entity mention is a representation of a real-world entity, as provided by the ERS. + It contains the entity data, along with metadata like type and format. + attributes: + identifiedBy: + description: | + The identification triad of the entity mention. + range: EntityMentionIdentifier + required: true + content_type: + description: | + A string about the MIME format of `content` (e.g. text/turtle, application/ld+json) + required: true + content: + description: | + A code string representing the entity mention details (eg, RDF or XML description). + required: true + parsed_representation: + description: | + JSON representation of the parsed entity data. + + EntityMentionIdentifier: + description: | + A container that groups the attributes needed to identify an entity mention in a resolution request + or response. + + As per ERS architectural decision, in the whole ERS and ERE systems, there is always a deterministic + method to build a canonical identifier from the combination of `sourceId`, `requestId` and `entityType` + (eg, string concatenation plus some prefix). Similarly, a cluster ID (mentioned in various places in + in this hereby ERE service schema) can be built from an entity that is initially the only cluster member. + attributes: + source_id: + description: | + The ID or URI of the ERS client that originated the request. This identifies an application or a + person accessing the ERS system. + required: true + request_id: + description: | + A string representing the unique ID of the request made to the ERS system. In general, this is unique + only within the scope of the source and the entity type, ie, within `sourceId` and `entityType`. + + Moreover, this is **not** the same as `ereRequestId`, which instead, is internal to the ERE and is + used to match responses to requests. + range: string + required: true + entity_type: + description: | + A string representing the entity type (based on CET). This is typically a URI. + + Note that this is at this level, and not at `EntityMention`, since, as said above, + it's needed to identify the entity, even when its content is not present. For the same + reason, it's used both for `EREResolutionRequest` and `EREResolutionResponse` messages., + required: true + + LookupState: + description: | + Tracks the resolution state for entity mentions from a particular source. + Records when the source was last resolved against the canonical clustering. + attributes: + source_id: + description: | + The ID or URI of the ERS client (originator) for which we track lookup state. + required: true + last_snapshot: + range: datetime + description: | + Timestamp of the last resolution operation for this source. + Used to determine if a refreshBulk or other update is needed. + required: true + + ClusterReference: + description: | + A reference to a cluster to which an entity is deemed to belong, with an associated confidence and similarity scores. + + A cluster is a set of entity mentions that have been determined to refer to the same real-world entity. + Each cluster has a unique clusterId. + + A cluster reference is used to report the association between an entity mention and a cluster + of equivalence. + attributes: + cluster_id: + description: | + The identifier of the cluster/canonical entity that is considered equivalent to the + subject entity mention that an `EntityMentionResolutionResponse` refers to. + required: true + confidence_score: + range: float + description: | + A 0-1 value of how confident the ERE is about the equivalence between the subject entity mention + and the target canonical entity. + required: true + minimum_value: 0.0 + maximum_value: 1.0 + similarity_score: + range: float + description: | + A 0-1 score representing the pairwise comparison between a mention and a cluster (likely + based on a representative representation). + required: true + minimum_value: 0.0 + maximum_value: 1.0 + + Decision: + description: | + Canonical placement of an entity mention to a cluster. + Represents the latest resolution decision (from ERE or curator override). + attributes: + id: + description: Unique decision identifier + required: true + about_entity_mention: + range: EntityMentionIdentifier + description: The entity mention being resolved + required: true + current_placement: + range: ClusterReference + description: | + The accepted cluster for this mention (latest from ERE or curator). + required: true + candidates: + range: ClusterReference + multivalued: true + description: | + Top-N alternative clusters proposed by ERE (for curation UI preview). + required: true + created_at: + range: datetime + description: When the decision was first created + required: true + updated_at: + range: datetime + description: When the decision was last updated (ERE refresh or curator action) + + UserAction: + description: | + Immutable record of a curator action on an entity mention resolution. + Stored in the User Action Log for traceability and training. + + NOT related to ERE messages; represents curator intent only. + attributes: + id: + description: Unique audit trail entry identifier + required: true + about_entity_mention: + range: EntityMentionIdentifier + description: The entity mention the curator acted upon + required: true + candidates: + range: ClusterReference + multivalued: true + description: | + The candidate clusters presented to the curator for selection. + Ordered by confidence (same as shown in curation UI). + required: true + selected_cluster: + range: ClusterReference + description: | + The cluster selected by the curator (if action was ACCEPT_TOP + or ACCEPT_ALTERNATIVE). NULL if action was REJECT_ALL. + action_type: + range: UserActionType + description: The type of action the curator performed + required: true + actor: + description: User ID or identifier of the curator who performed the action + required: true + created_at: + range: datetime + description: Timestamp when the curator action was recorded + required: true + metadata: + description: | + JSON metadata providing context (e.g., curator notes, reasoning). + + CanonicalEntityIdentifier: + description: | + A logical identity construct providing a stable identity anchor. + Represents a cluster of equivalent entity mentions. + attributes: + identifier: + required: true + description: Unique identifier for the canonical entity. + equivalent_to: + range: EntityMentionIdentifier + multivalued: true + description: Entity mentions that have been resolved to this canonical entity. + required: true \ No newline at end of file diff --git a/resources/schemas/ere-service-schema-v0.1.0.json b/resources/schemas/er-schema-v0.1.0.json similarity index 51% rename from resources/schemas/ere-service-schema-v0.1.0.json rename to resources/schemas/er-schema-v0.1.0.json index ca3175a..0fea496 100644 --- a/resources/schemas/ere-service-schema-v0.1.0.json +++ b/resources/schemas/er-schema-v0.1.0.json @@ -1,57 +1,134 @@ { "$defs": { + "CanonicalEntityIdentifier": { + "additionalProperties": false, + "description": "A logical identity construct providing a stable identity anchor.\nRepresents a cluster of equivalent entity mentions.", + "properties": { + "equivalent_to": { + "description": "Entity mentions that have been resolved to this canonical entity.", + "items": { + "$ref": "#/$defs/EntityMentionIdentifier" + }, + "type": "array" + }, + "identifier": { + "description": "Unique identifier for the canonical entity.", + "type": "string" + } + }, + "required": [ + "identifier", + "equivalent_to" + ], + "title": "CanonicalEntityIdentifier", + "type": "object" + }, "ClusterReference": { "additionalProperties": false, - "description": "A reference to a cluster to which an entity is deemed to belong, with an associated confidence score.\n\nA cluster is a set of entity mentions that have been determined to refer to the same real-world entity.\nEach cluster has a unique clusterId.\n\nA cluster reference is used to report the association between an entity mention and a cluster \nof equivalence.", + "description": "A reference to a cluster to which an entity is deemed to belong, with an associated confidence and similarity scores.\n\nA cluster is a set of entity mentions that have been determined to refer to the same real-world entity.\nEach cluster has a unique clusterId.\n\nA cluster reference is used to report the association between an entity mention and a cluster \nof equivalence.", "properties": { - "clusterId": { + "cluster_id": { "description": "The identifier of the cluster/canonical entity that is considered equivalent to the\nsubject entity mention that an `EntityMentionResolutionResponse` refers to.\n", "type": "string" }, - "confidenceScore": { + "confidence_score": { "description": "A 0-1 value of how confident the ERE is about the equivalence between the subject entity mention\nand the target canonical entity.\n", "maximum": 1.0, "minimum": 0.0, "type": "number" + }, + "similarity_score": { + "description": "A 0-1 score representing the pairwise comparison between a mention and a cluster (likely\nbased on a representative representation).\n", + "maximum": 1.0, + "minimum": 0.0, + "type": "number" } }, "required": [ - "clusterId", - "confidenceScore" + "cluster_id", + "confidence_score", + "similarity_score" ], "title": "ClusterReference", "type": "object" }, + "Decision": { + "additionalProperties": false, + "description": "Canonical placement of an entity mention to a cluster.\nRepresents the latest resolution decision (from ERE or curator override).", + "properties": { + "about_entity_mention": { + "$ref": "#/$defs/EntityMentionIdentifier", + "description": "The entity mention being resolved" + }, + "candidates": { + "description": "Top-N alternative clusters proposed by ERE (for curation UI preview).\n", + "items": { + "$ref": "#/$defs/ClusterReference" + }, + "type": "array" + }, + "created_at": { + "description": "When the decision was first created", + "format": "date-time", + "type": "string" + }, + "current_placement": { + "$ref": "#/$defs/ClusterReference", + "description": "The accepted cluster for this mention (latest from ERE or curator).\n" + }, + "id": { + "description": "Unique decision identifier", + "type": "string" + }, + "updated_at": { + "description": "When the decision was last updated (ERE refresh or curator action)", + "format": "date-time", + "type": [ + "string", + "null" + ] + } + }, + "required": [ + "id", + "about_entity_mention", + "current_placement", + "candidates", + "created_at" + ], + "title": "Decision", + "type": "object" + }, "EREErrorResponse": { "additionalProperties": false, "description": "Response sent by the ERE when some error/exception occurs while processing a request.\nFor instance, this may happen if the request is malformed or some internal error happens.\n\nThe attributes of this class are based on [RFC-9457](https://datatracker.ietf.org/doc/html/rfc9457).", "properties": { - "ereRequestId": { - "description": "A string representing the unique ID of an ERE request, or the ID of the request a response is about.\nThis **is not** the same as `requestId` + `sourceId`.\n", + "ere_request_id": { + "description": "A string representing the unique ID of an ERE request, or the ID of the request a response is about.\nThis **is not** the same as `request_id` + `source_id`.\n\nNote on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message\ncan originate from within the ERE, without any previous request counterpart, as a notification of\nresolution update. In this case, `ere_request_id` has the prefix `ereNotification:`.\n", "type": "string" }, - "errorDetail": { + "error_detail": { "description": "A human readable detailed message about the error that occurred.\n\nThis corresponds to RFC-9457's `detail`.\n", "type": [ "string", "null" ] }, - "errorTitle": { + "error_title": { "description": "A human readable brief message about the error that occurred.\n\nThis corresponds to RFC-9457's `title`.\n", "type": [ "string", "null" ] }, - "errorTrace": { + "error_trace": { "description": "A string representing a (stack) trace of the error that occurred.\n\nThis is optional and typically used for debugging purposes only, since\nexposing this kind of server-side information is a security risk.\n", "type": [ "string", "null" ] }, - "errorType": { + "error_type": { "description": "A string representing the error type, eg, the FQN of the raised exception.\n\nThis corresponds to RFC-9457's `type`.\n", "type": "string" }, @@ -72,9 +149,9 @@ } }, "required": [ - "errorType", + "error_type", "type", - "ereRequestId" + "ere_request_id" ], "title": "EREErrorResponse", "type": "object" @@ -87,18 +164,25 @@ "description": "A code string representing the entity mention details (eg, RDF or XML description).\n", "type": "string" }, - "contentType": { + "content_type": { "description": "A string about the MIME format of `content` (e.g. text/turtle, application/ld+json)\n", "type": "string" }, - "identifier": { + "identifiedBy": { "$ref": "#/$defs/EntityMentionIdentifier", - "description": "The identifier (with the ERS-derived components) of the entity mention.\n" + "description": "The identification triad of the entity mention.\n" + }, + "parsed_representation": { + "description": "JSON representation of the parsed entity data.\n", + "type": [ + "string", + "null" + ] } }, "required": [ - "identifier", - "contentType", + "identifiedBy", + "content_type", "content" ], "title": "EntityMention", @@ -108,23 +192,23 @@ "additionalProperties": false, "description": "A container that groups the attributes needed to identify an entity mention in a resolution request\nor response.\n\nAs per ERS architectural decision, in the whole ERS and ERE systems, there is always a deterministic\nmethod to build a canonical identifier from the combination of `sourceId`, `requestId` and `entityType`\n(eg, string concatenation plus some prefix). Similarly, a cluster ID (mentioned in various places in \nin this hereby ERE service schema) can be built from an entity that is initially the only cluster member.", "properties": { - "entityType": { - "description": "A string representing the entity type (based on CET). This is typically a URI.\n\nNote that this is at this level, and not at `EntityMention`, since, as said above, \nit's needed to identify the entity, even when its content is not present. For the same\nreason, it's used both for `EREResolutionRequest` and `EREResolutionResponse` messages., \n", + "entity_type": { + "description": "A string representing the entity type (based on CET). This is typically a URI.\n\nNote that this is at this level, and not at `EntityMention`, since, as said above, \nit's needed to identify the entity, even when its content is not present. For the same\nreason, it's used both for `EREResolutionRequest` and `EREResolutionResponse` messages.,\n", "type": "string" }, - "requestId": { + "request_id": { "description": "A string representing the unique ID of the request made to the ERS system. In general, this is unique\nonly within the scope of the source and the entity type, ie, within `sourceId` and `entityType`. \n\nMoreover, this is **not** the same as `ereRequestId`, which instead, is internal to the ERE and is \nused to match responses to requests.\n", "type": "string" }, - "sourceId": { + "source_id": { "description": "The ID or URI of the ERS client that originated the request. This identifies an application or a \nperson accessing the ERS system.\n", "type": "string" } }, "required": [ - "sourceId", - "requestId", - "entityType" + "source_id", + "request_id", + "entity_type" ], "title": "EntityMentionIdentifier", "type": "object" @@ -133,16 +217,26 @@ "additionalProperties": false, "description": "An entity resolution request sent to the ERE, containing the entity to be resolved.", "properties": { - "entityMention": { + "entity_mention": { "$ref": "#/$defs/EntityMention", "description": "The data about the entity to be resolved. Note that, at least for the moment, we don't support\nbatch requests, so this property is single-valued.\n" }, - "ereRequestId": { - "description": "A string representing the unique ID of an ERE request, or the ID of the request a response is about.\nThis **is not** the same as `requestId` + `sourceId`.\n", + "ere_request_id": { + "description": "A string representing the unique ID of an ERE request, or the ID of the request a response is about.\nThis **is not** the same as `request_id` + `source_id`.\n\nNote on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message\ncan originate from within the ERE, without any previous request counterpart, as a notification of\nresolution update. In this case, `ere_request_id` has the prefix `ereNotification:`.\n", "type": "string" }, - "excludedClusterIds": { - "description": "When this is present, the resolution must not bin the entity mention into any of the\nlisted clusters. This can be used to reject a previous resolution proposed by the ERE.\n\nThe exact reaction to this is implementation dependent. In the simplest case, the ERE\nmight just create a singleton cluster with this entity as member. In a more advanced \ncase, it might recompute the similarity with more advanced algorithms or use updated\ndata.\n\nTODO: Can this be revised? What does it happen if an exclusion was made by mistake?\n", + "excluded_cluster_ids": { + "description": "When this is present, the ERE may use this information to avoid clustering the entity in \nthe listed clusters.\n\nThis can be used to notify the ERE that a curator has rejected a previous resolution \nproposed by the ERE.\n\nAs for `proposed_cluster_ids`, the ERE **has no obligation** to fulfil the exclusions, and \nit remains the ultimate authority to provide the final resolution decision.\n\nSimilarly, the exact reaction to this is implementation dependent. In the simplest case, the ERE\nmight just create a singleton cluster with the current entity as member. In a more advanced \ncase, it might recompute the similarity with more advanced algorithms or use updated\ndata.\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "proposed_cluster_ids": { + "description": "When this is present, the ERE may use this information to try to cluster the entity in one of \nthe listed clusters.\n\nIn particular, when an initial request about an entity isn't answered within a timeout, \na subsequent new request can be sent about the same entity and with the canonical ID of it\nas a single proposed cluster ID. This suggests the ERE that it can create a new singleton cluster\nwith the entity as its initial only member and its canonical ID as the cluster ID. The ERE\ncan evolve such a cluster later, when further similar entities are sent in, or when it \nhas had more time to associate the initial entity to others. \n\nWhatever, the case, the ERE **has no obligation** to fulfil the proposal, how it reacts to \nthis list is implementation dependent, and the ERE remains the ultimate authority to provide \nthe final resolution decision.\n", "items": { "type": "string" }, @@ -168,9 +262,9 @@ } }, "required": [ - "entityMention", + "entity_mention", "type", - "ereRequestId" + "ere_request_id" ], "title": "EntityMentionResolutionRequest", "type": "object" @@ -186,12 +280,12 @@ }, "type": "array" }, - "entityMentionId": { + "entity_mention_id": { "$ref": "#/$defs/EntityMentionIdentifier", - "description": "The identifier of the entity mention that has been resolved.\n\nThis isn't strictly needed, since the `ereRequestId` already links the response to \nthe request's entity mention. Yet, it's reported for convenience.\n" + "description": "The identifier of the entity mention that has been resolved.\n\nThis isn't strictly needed, since the `ere_request_id` already links the response to \nthe request's entity mention. Yet, it's reported for convenience.\n" }, - "ereRequestId": { - "description": "A string representing the unique ID of an ERE request, or the ID of the request a response is about.\nThis **is not** the same as `requestId` + `sourceId`.\n", + "ere_request_id": { + "description": "A string representing the unique ID of an ERE request, or the ID of the request a response is about.\nThis **is not** the same as `request_id` + `source_id`.\n\nNote on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message\ncan originate from within the ERE, without any previous request counterpart, as a notification of\nresolution update. In this case, `ere_request_id` has the prefix `ereNotification:`.\n", "type": "string" }, "timestamp": { @@ -211,75 +305,115 @@ } }, "required": [ - "entityMentionId", + "entity_mention_id", "candidates", "type", - "ereRequestId" + "ere_request_id" ], "title": "EntityMentionResolutionResponse", "type": "object" }, - "FullRebuildRequest": { + "EntityType": { + "description": "Types of entities that can be resolved", + "enum": [ + "ORGANISATION", + "PROCEDURE" + ], + "title": "EntityType", + "type": "string" + }, + "LookupState": { "additionalProperties": false, - "description": "A request to reset all the resolutions computed so far and possibly rebuild them as \nrequests about old entities arrive again (and build new entities from scratch as usually).\n\nIt is expected that the ERE client re-sends all the entities to be resolved again,\nusing `EntityMentionResolutionRequest` messages exactly as the first time the resolutions \nwere built. This implies the a client like the ERS logs/persists the entities it receives\nto resolve and also saves manual overriding of ERE results.\n\nMoreover:\n* The ERE must keep track of past `EntityMention` marked as canonical.\n* The ERE must retain requests with `excludedClusterIds` and apply them again when the \n same entity mention is re-sent after the full rebuild. TODO: see notes about these properties,\n on the possible need of withdrawing exclusions.", + "description": "Tracks the resolution state for entity mentions from a particular source.\nRecords when the source was last resolved against the canonical clustering.", "properties": { - "ereRequestId": { - "description": "A string representing the unique ID of an ERE request, or the ID of the request a response is about.\nThis **is not** the same as `requestId` + `sourceId`.\n", - "type": "string" - }, - "timestamp": { - "description": "The time when the message was created. Should be in ISO-8601 format.\n", + "last_snapshot": { + "description": "Timestamp of the last resolution operation for this source.\nUsed to determine if a refreshBulk or other update is needed.\n", "format": "date-time", - "type": [ - "string", - "null" - ] + "type": "string" }, - "type": { - "description": "The type of the request or result.\n\nAs per LinkML specification, `designates_type` is used here in order to allow for this\nslot to tell the concrete subclass that an instance (such as a JSON object) belongs to.\n\nIn other words, a particular request will have `type` set with values like \n`EntityMentionResolutionRequest` or `EntityResolutionResult`\n", - "enum": [ - "FullRebuildRequest" - ], + "source_id": { + "description": "The ID or URI of the ERS client (originator) for which we track lookup state.\n", "type": "string" } }, "required": [ - "type", - "ereRequestId" + "source_id", + "last_snapshot" ], - "title": "FullRebuildRequest", + "title": "LookupState", "type": "object" }, - "FullRebuildResponse": { + "UserAction": { "additionalProperties": false, - "description": "A response to a `FullRebuildRequest`, confirming that the rebuild process has started.\n\nAs for all the requests, this carries the `ereRequestId`, which matches the full rebuild \nrequest being acknowledged.", + "description": "Immutable record of a curator action on an entity mention resolution.\nStored in the User Action Log for traceability and training.\n\nNOT related to ERE messages; represents curator intent only.", "properties": { - "ereRequestId": { - "description": "A string representing the unique ID of an ERE request, or the ID of the request a response is about.\nThis **is not** the same as `requestId` + `sourceId`.\n", + "about_entity_mention": { + "$ref": "#/$defs/EntityMentionIdentifier", + "description": "The entity mention the curator acted upon" + }, + "action_type": { + "$ref": "#/$defs/UserActionType", + "description": "The type of action the curator performed" + }, + "actor": { + "description": "User ID or identifier of the curator who performed the action", "type": "string" }, - "timestamp": { - "description": "The time when the message was created. Should be in ISO-8601 format.\n", + "candidates": { + "description": "The candidate clusters presented to the curator for selection.\nOrdered by confidence (same as shown in curation UI).\n", + "items": { + "$ref": "#/$defs/ClusterReference" + }, + "type": "array" + }, + "created_at": { + "description": "Timestamp when the curator action was recorded", "format": "date-time", + "type": "string" + }, + "id": { + "description": "Unique audit trail entry identifier", + "type": "string" + }, + "metadata": { + "description": "JSON metadata providing context (e.g., curator notes, reasoning).\n", "type": [ "string", "null" ] }, - "type": { - "description": "The type of the request or result.\n\nAs per LinkML specification, `designates_type` is used here in order to allow for this\nslot to tell the concrete subclass that an instance (such as a JSON object) belongs to.\n\nIn other words, a particular request will have `type` set with values like \n`EntityMentionResolutionRequest` or `EntityResolutionResult`\n", - "enum": [ - "FullRebuildResponse" + "selected_cluster": { + "anyOf": [ + { + "$ref": "#/$defs/ClusterReference" + }, + { + "type": "null" + } ], - "type": "string" + "description": "The cluster selected by the curator (if action was ACCEPT_TOP\nor ACCEPT_ALTERNATIVE). NULL if action was REJECT_ALL.\n" } }, "required": [ - "type", - "ereRequestId" + "id", + "about_entity_mention", + "candidates", + "action_type", + "actor", + "created_at" ], - "title": "FullRebuildResponse", + "title": "UserAction", "type": "object" + }, + "UserActionType": { + "description": "Types of curator actions on entity mention resolutions", + "enum": [ + "ACCEPT_TOP", + "ACCEPT_ALTERNATIVE", + "REJECT_ALL" + ], + "title": "UserActionType", + "type": "string" } }, "$id": "https://data.europa.eu/ers/schema/ere", diff --git a/resources/schemas/ere-service-schema-v0.1.0.yaml b/resources/schemas/ere-service-schema-v0.1.0.yaml index d79e7b1..7c18438 100644 --- a/resources/schemas/ere-service-schema-v0.1.0.yaml +++ b/resources/schemas/ere-service-schema-v0.1.0.yaml @@ -4,6 +4,7 @@ description: A LinkML schema for the ERS/ERE Service version: 0.1.0 imports: - linkml:types + - ./core-schema-v0.1.0 prefixes: linkml: https://w3id.org/linkml/ ere: https://data.europa.eu/ers/schema/ere/ @@ -29,11 +30,15 @@ classes: `EntityMentionResolutionRequest` or `EntityResolutionResult` designates_type: true required: true - ereRequestId: + ere_request_id: # We decided we don't need a responseId for the moment description: | A string representing the unique ID of an ERE request, or the ID of the request a response is about. - This **is not** the same as `requestId` + `sourceId`. + This **is not** the same as `request_id` + `source_id`. + + Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message + can originate from within the ERE, without any previous request counterpart, as a notification of + resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. required: true timestamp: range: datetime @@ -60,62 +65,102 @@ classes: description: | An entity resolution request sent to the ERE, containing the entity to be resolved. attributes: - entityMention: + entity_mention: range: EntityMention description: | The data about the entity to be resolved. Note that, at least for the moment, we don't support batch requests, so this property is single-valued. required: true - excludedClusterIds: + proposed_cluster_ids: + description: | + When this is present, the ERE may use this information to try to cluster the entity in one of + the listed clusters. + + In particular, when an initial request about an entity isn't answered within a timeout, + a subsequent new request can be sent about the same entity and with the canonical ID of it + as a single proposed cluster ID. This suggests the ERE that it can create a new singleton cluster + with the entity as its initial only member and its canonical ID as the cluster ID. The ERE + can evolve such a cluster later, when further similar entities are sent in, or when it + has had more time to associate the initial entity to others. + + Whatever, the case, the ERE **has no obligation** to fulfil the proposal, how it reacts to + this list is implementation dependent, and the ERE remains the ultimate authority to provide + the final resolution decision. + multivalued: true + excluded_cluster_ids: description: | - When this is present, the resolution must not bin the entity mention into any of the - listed clusters. This can be used to reject a previous resolution proposed by the ERE. + When this is present, the ERE may use this information to avoid clustering the entity in + the listed clusters. + + This can be used to notify the ERE that a curator has rejected a previous resolution + proposed by the ERE. - The exact reaction to this is implementation dependent. In the simplest case, the ERE - might just create a singleton cluster with this entity as member. In a more advanced + As for `proposed_cluster_ids`, the ERE **has no obligation** to fulfil the exclusions, and + it remains the ultimate authority to provide the final resolution decision. + + Similarly, the exact reaction to this is implementation dependent. In the simplest case, the ERE + might just create a singleton cluster with the current entity as member. In a more advanced case, it might recompute the similarity with more advanced algorithms or use updated data. - - TODO: Can this be revised? What does it happen if an exclusion was made by mistake? multivalued: true examples: - description: a regular request value: | { "type": "EntityMentionResolutionRequest", - "entityMention": { + "entity_mention": { "identifier": { - "requestId": "324fs3r345vx", - "sourceId": "TEDSWS", - "entityType": "http://www.w3.org/ns/org#Organization" + "request_id": "324fs3r345vx", + "source_id": "TEDSWS", + "entity_type": "http://www.w3.org/ns/org#Organization" }, "content": "epd:ent005 a org:Organization; ... cccev:telephone \"+44 1924306780\" .", - "contentType": "text/turtle" + "content_type": "text/turtle" }, "timestamp": "2026-01-14T12:34:56Z", // As said, we need this internal ID and it can be auto-generated (eg, with UUIDs) - "ereRequestId": "324fs3r345vx:01" + "ere_request_id": "324fs3r345vx:01" } - description: a re-rebuild request (ie, carrying a rejection list) value: | { "type": "EntityMentionResolutionRequest", - "entityMention": { + "entity_mention": { "identifier": { - "requestId": "324fs3r345vxab", - "sourceId": "TEDSWS", - "entityType": "http://www.w3.org/ns/org#Organization", + "request_id": "324fs3r345vxab", + "source_id": "TEDSWS", + "entity_type": "http://www.w3.org/ns/org#Organization", }, "content": "epd:ent005 a org:Organization; ... cccev:telephone \"+44 1924306780\" .", - "contentType": "text/turtle" + "content_type": "text/turtle" }, - "excludedClusterIds": [ + "excluded_cluster_ids": [ "324fs3r345vx-bb45we", "324fs3r345vx-cc67ui" ], "timestamp": "2026-01-14T12:40:56Z", - "ereRequestId": "324fs3r345vxab:01" + "ere_request_id": "324fs3r345vxab:01" } + - description: A request with the entity as proposed cluster ID + value: | + { + "type": "EntityMentionResolutionRequest", + "entity_mention": { + "identifier": { + "request_id": "324fs3r345vxab", + "source_id": "TEDSWS", + "entity_type": "http://www.w3.org/ns/org#Organization", + }, + "content": "epd:ent005 a org:Organization; ... cccev:telephone \"+44 1924306780\" .", + "content_type": "text/turtle" + }, + "proposed_cluster_ids": [ + // which is sha256 ( source_id + request_id + entity_type ) + "e2e8eea1865aef0e2406ea326520abc252b2afa836ed71434f6a32811904bfad" + ], + "timestamp": "2026-01-14T12:40:56Z", + "ere_request_id": "324fs3r345vxab:01" + } @@ -131,12 +176,12 @@ classes: we might need to return multiple `EntityMentionResolutionResponse` messages, each with additional properties such as `entityIndex` and `totalEntities`. attributes: - entityMentionId: + entity_mention_id: range: EntityMentionIdentifier description: | The identifier of the entity mention that has been resolved. - This isn't strictly needed, since the `ereRequestId` already links the response to + This isn't strictly needed, since the `ere_request_id` already links the response to the request's entity mention. Yet, it's reported for convenience. required: true candidates: @@ -150,23 +195,23 @@ classes: - value: | { "type": "EntityMentionResolutionResponse", - "entityMentionId": { - "requestId": "324fs3r345vx", - "sourceId": "TEDSWS", - "entityType": "http://www.w3.org/ns/org#Organization" + "entity_mention_id": { + "request_id": "324fs3r345vx", + "source_id": "TEDSWS", + "entity_type": "http://www.w3.org/ns/org#Organization" }, "candidates": [ { - "clusterId": "324fs3r345vx-aa32wa", - "confidenceScore": 0.91 + "cluster_id": "324fs3r345vx-aa32wa", + "confidence_score": 0.91 }, { - "clusterId": "324fs3r345vx-bb45we", - "confidenceScore": 0.65 + "cluster_id": "324fs3r345vx-bb45we", + "confidence_score": 0.65 } ], "timestamp": "2026-01-14T12:34:59Z", - "ereRequestId": "324fs3r345vx:01" + "ere_request_id": "324fs3r345vx:01" } @@ -179,24 +224,24 @@ classes: The attributes of this class are based on [RFC-9457](https://datatracker.ietf.org/doc/html/rfc9457). attributes: - errorType: + error_type: description: | A string representing the error type, eg, the FQN of the raised exception. This corresponds to RFC-9457's `type`. required: true - errorTitle: + error_title: description: | A human readable brief message about the error that occurred. This corresponds to RFC-9457's `title`. - errorDetail: + error_detail: description: | A human readable detailed message about the error that occurred. This corresponds to RFC-9457's `detail`. - errorTrace: + error_trace: description: | A string representing a (stack) trace of the error that occurred. @@ -206,117 +251,10 @@ classes: - value: | { "type": "EREErrorResponse", - "requestId": "324fs3r345vx", - "errorType": "ere.exceptions.MalformedRequestError", - "errorTitle": "The entity data is missing in the request", - "errorDetail": "The 'entity' attribute is required in EntityMentionResolutionRequest message", + "request_id": "324fs3r345vx", + "error_type": "ere.exceptions.MalformedRequestError", + "error_title": "The entity data is missing in the request", + "error_detail": "The 'entity' attribute is required in EntityMentionResolutionRequest message", // Optional and not recommended for production use - "errorTrace": "Traceback (most recent call last):\n File \"/app/ere/service.py\", line 45, in process_request\n..." + "error_trace": "Traceback (most recent call last):\n File \"/app/ere/service.py\", line 45, in process_request\n..." } - - - EntityMention: - description: | - An entity mention is a representation of a real-world entity, as provided by the ERS. - It contains the entity data, along with metadata like type and format. - attributes: - identifier: - description: | - The identifier (with the ERS-derived components) of the entity mention. - range: EntityMentionIdentifier - required: true - contentType: - description: | - A string about the MIME format of `content` (e.g. text/turtle, application/ld+json) - required: true - content: - description: | - A code string representing the entity mention details (eg, RDF or XML description). - required: true - - - EntityMentionIdentifier: - description: | - A container that groups the attributes needed to identify an entity mention in a resolution request - or response. - - As per ERS architectural decision, in the whole ERS and ERE systems, there is always a deterministic - method to build a canonical identifier from the combination of `sourceId`, `requestId` and `entityType` - (eg, string concatenation plus some prefix). Similarly, a cluster ID (mentioned in various places in - in this hereby ERE service schema) can be built from an entity that is initially the only cluster member. - attributes: - sourceId: - description: | - The ID or URI of the ERS client that originated the request. This identifies an application or a - person accessing the ERS system. - required: true - requestId: - description: | - A string representing the unique ID of the request made to the ERS system. In general, this is unique - only within the scope of the source and the entity type, ie, within `sourceId` and `entityType`. - - Moreover, this is **not** the same as `ereRequestId`, which instead, is internal to the ERE and is - used to match responses to requests. - range: string - required: true - entityType: - description: | - A string representing the entity type (based on CET). This is typically a URI. - - Note that this is at this level, and not at `EntityMention`, since, as said above, - it's needed to identify the entity, even when its content is not present. For the same - reason, it's used both for `EREResolutionRequest` and `EREResolutionResponse` messages., - required: true - - - ClusterReference: - description: | - A reference to a cluster to which an entity is deemed to belong, with an associated confidence score. - - A cluster is a set of entity mentions that have been determined to refer to the same real-world entity. - Each cluster has a unique clusterId. - - A cluster reference is used to report the association between an entity mention and a cluster - of equivalence. - attributes: - clusterId: - description: | - The identifier of the cluster/canonical entity that is considered equivalent to the - subject entity mention that an `EntityMentionResolutionResponse` refers to. - required: true - confidenceScore: - range: float - description: | - A 0-1 value of how confident the ERE is about the equivalence between the subject entity mention - and the target canonical entity. - required: true - minimum_value: 0.0 - maximum_value: 1.0 - - - FullRebuildRequest: - is_a: ERERequest - description: | - A request to reset all the resolutions computed so far and possibly rebuild them as - requests about old entities arrive again (and build new entities from scratch as usually). - - It is expected that the ERE client re-sends all the entities to be resolved again, - using `EntityMentionResolutionRequest` messages exactly as the first time the resolutions - were built. This implies the a client like the ERS logs/persists the entities it receives - to resolve and also saves manual overriding of ERE results. - - Moreover: - * The ERE must keep track of past `EntityMention` marked as canonical. - * The ERE must retain requests with `excludedClusterIds` and apply them again when the - same entity mention is re-sent after the full rebuild. TODO: see notes about these properties, - on the possible need of withdrawing exclusions. - - - FullRebuildResponse: - is_a: EREResponse - description: | - A response to a `FullRebuildRequest`, confirming that the rebuild process has started. - - As for all the requests, this carries the `ereRequestId`, which matches the full rebuild - request being acknowledged. - diff --git a/resources/scripts/generate_models.py b/resources/scripts/generate_models.py new file mode 100644 index 0000000..9ab8df6 --- /dev/null +++ b/resources/scripts/generate_models.py @@ -0,0 +1,104 @@ +"""Generate Pydantic models from LinkML schemas using split generation. + +This script is invoked by the Makefile and receives all paths as CLI arguments +so that the Makefile remains the single source of truth for project layout. +""" + +from __future__ import annotations + +import argparse +import os +import sys +from pathlib import Path + +from linkml.generators.pydanticgen import PydanticGenerator +from linkml.generators.pydanticgen.pydanticgen import SplitMode + +# Pattern applied to imported schema names to derive Python module names. +# e.g. "coreSchema" -> ".core" +SPLIT_PATTERN = ".{{ schema.name | replace('Schema', '') | replace('-', '_') | lower }}" + + +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Generate Pydantic models from a LinkML schema.", + ) + parser.add_argument( + "--schema", + required=True, + type=Path, + help="Path to the top-level LinkML YAML schema (e.g. resources/schemas/ere-service-schema-v0.1.0.yaml).", + ) + parser.add_argument( + "--output", + required=True, + type=Path, + help="Destination path for the main generated Python module (e.g. src/erspec/models/ere.py).", + ) + parser.add_argument( + "--template-dir", + required=True, + type=Path, + help="Directory containing Jinja2 template overrides for the Pydantic generator.", + ) + parser.add_argument( + "--schemas-dir", + required=True, + type=Path, + help="Directory that contains all schema YAML files (used as working directory for relative imports).", + ) + return parser.parse_args(argv) + + +def generate_models( + schema: Path, + output: Path, + template_dir: Path, + schemas_dir: Path, +) -> None: + """Generate all models from the given schema using split generation.""" + output.parent.mkdir(parents=True, exist_ok=True) + + # LinkML resolves relative schema imports from the CWD, so we need to + # chdir into the schemas directory while generating. + original_dir = Path.cwd() + os.chdir(schemas_dir) + + try: + results = PydanticGenerator.generate_split( + schema=str(schema.name), + output_path=str(output), + split_pattern=SPLIT_PATTERN, + template_dir=str(template_dir), + split_mode=SplitMode.FULL, + ) + print(f"Generated {len(results)} module(s).") + finally: + os.chdir(original_dir) + + +def main() -> None: + args = parse_args() + + # Resolve all paths relative to the project root (CWD when Make invokes us). + project_root = Path.cwd() + schema = (project_root / args.schema).resolve() + output = (project_root / args.output).resolve() + template_dir = (project_root / args.template_dir).resolve() + schemas_dir = (project_root / args.schemas_dir).resolve() + + for label, path in [("schema", schema), ("template-dir", template_dir), ("schemas-dir", schemas_dir)]: + if not path.exists(): + print(f"Error: --{label} path does not exist: {path}", file=sys.stderr) + sys.exit(1) + + generate_models( + schema=schema, + output=output, + template_dir=template_dir, + schemas_dir=schemas_dir, + ) + + +if __name__ == "__main__": + main() diff --git a/resources/scripts/linkml-classes.py b/resources/scripts/linkml-classes.py new file mode 100644 index 0000000..ff5ed59 --- /dev/null +++ b/resources/scripts/linkml-classes.py @@ -0,0 +1,36 @@ +""" +An attempt to fix the fact that `linkml generate --no-mergeimports` [doesn't work](https://github.com/linkml/linkml/issues/1296). + +This script lists all the classes defined in a LinkML file, possibly in a format that can be passed to the +generator command, ie, as a sequence of `--classes` restrictions. + +The problem is this doesn't work either, since the resulting file drags linked imported classes anyway +into output like a UML diagram. + +TODO: delete? +""" +import sys +import yaml +import argparse + +def main(): + + parser = argparse.ArgumentParser( + description="List class names from the 'classes' section of a LinkML YAML file." + ) + parser.add_argument("yaml_file", help="Path to the LinkML YAML file") + parser.add_argument("--prefix", default="", help="Prefix to prepend to each class name") + args = parser.parse_args() + + with open(args.yaml_file, "r") as f: + data = yaml.safe_load(f) + + classes = data.get("classes", {}) + for class_name in classes.keys(): + if args.prefix: + print(f"{args.prefix} {class_name}") + else: + print(class_name) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/resources/templates/base_model.py.jinja b/resources/templates/base_model.py.jinja new file mode 100644 index 0000000..edefbe8 --- /dev/null +++ b/resources/templates/base_model.py.jinja @@ -0,0 +1,6 @@ +{# Filters out LinkML boilerplate: ConfiguredBaseModel and LinkMLMeta #} +{% if name != "LinkMLMeta" and name != "ConfiguredBaseModel" %} +from erspec.models.pydantic_model import PydanticModel + +ConfiguredBaseModel = PydanticModel +{% endif %} diff --git a/resources/templates/class.py.jinja b/resources/templates/class.py.jinja new file mode 100644 index 0000000..3030bc6 --- /dev/null +++ b/resources/templates/class.py.jinja @@ -0,0 +1,29 @@ +{# Replaces ConfiguredBaseModel with custom PydanticModel #} +{% if bases is string %} + {% if bases == "ConfiguredBaseModel" %} +class {{ name }}(PydanticModel): + {% else %} +class {{ name }}({{ bases }}): + {% endif %} +{% else %} + {% set resolved_bases = bases | map('replace', 'ConfiguredBaseModel', 'PydanticModel') | list %} +class {{ name }}({{ resolved_bases | join(', ') }}): +{% endif %} + {% if description %} + """{{ description | trim }}""" + {% endif -%} + {% if attributes or validators %} + {% if attributes %} + {% for attr in attributes.values() %} + {{ attr }} + {% endfor -%} + {% endif %} + {% if validators %} + {% for validator in validators.values() %} + + {{ validator }} + {% endfor -%} + {% endif %} + {% else %} + pass + {% endif %} diff --git a/resources/templates/imports.py.jinja b/resources/templates/imports.py.jinja new file mode 100644 index 0000000..796ac59 --- /dev/null +++ b/resources/templates/imports.py.jinja @@ -0,0 +1,38 @@ +{# Standard LinkML imports template #} +{% macro import_(module, alias=None, objects = None) %} +{%- if objects is none and alias is none %} +import {{ module }} +{%- elif objects is none and alias is string %} +import {{ module }} as {{ alias }} +{%- else %} + {% if objects | length == 1 %} +from {{ module }} import {{ objects[0]['name'] }} {% if objects[0]['alias'] is not none %} as {{ objects[0]['alias'] }} {% endif %} + {%- else %} +from {{ module }} import ( + {% for object in objects %} + {% if object['alias'] is string %} + {{ object['name'] }} as {{ object['alias'] }} + {%- else %} + {{ object['name'] }} + {%- endif %} + {% if not loop.last %},{{ '\n' }}{% else %}{{ '\n' }}{% endif %} + {% endfor %} +) + {%- endif %} +{%- endif %} +{% endmacro %} +{%- if module %} +{{ import_(module, alias, objects) }} +{% endif -%} +{%- if imports -%} + {%- if render_sorted -%} + {% for i in range(imports | length) %} +{{ imports[i] }} +{%- if not loop.last and import_groups[i] != import_groups[i+1] %}{{ '\n' }}{% endif -%} + {% endfor %} + {%- else -%} + {%- for import in imports -%} +{{ import }} + {%- endfor -%} + {%- endif -%} +{% endif -%} diff --git a/resources/templates/module.py.jinja b/resources/templates/module.py.jinja new file mode 100644 index 0000000..be5a003 --- /dev/null +++ b/resources/templates/module.py.jinja @@ -0,0 +1,27 @@ +{# Module template: removes LinkML boilerplate, uses custom PydanticModel #} +{{ python_imports }} +from erspec.models.pydantic_model import PydanticModel + + +metamodel_version = "{{ metamodel_version }}" +version = "{{ version if version else None }}" + +{% if injected_classes %} + {% for c in injected_classes %} + {% if "LinkMLMeta" not in c and "ConfiguredBaseModel" not in c %} + +{{ c }} + {% endif %} + {% endfor %} +{% endif %} +{% if enums %} + {% for e in enums.values() %} + +{{ e }} + {% endfor %} +{% endif %} + +{% for c in classes.values() %} + +{{ c }} +{% endfor %} diff --git a/src/ere/models/core.py b/src/ere/models/core.py deleted file mode 100644 index a3ada46..0000000 --- a/src/ere/models/core.py +++ /dev/null @@ -1,490 +0,0 @@ -from __future__ import annotations - -import re -import sys -from datetime import ( - date, - datetime, - time -) -from decimal import Decimal -from enum import Enum -from typing import ( - Any, - ClassVar, - Literal, - Optional, - Union -) - -from pydantic import ( - BaseModel, - ConfigDict, - Field, - RootModel, - SerializationInfo, - SerializerFunctionWrapHandler, - field_validator, - model_serializer -) - - -metamodel_version = "None" -version = "0.1.0" - - -class ConfiguredBaseModel(BaseModel): - model_config = ConfigDict( - serialize_by_alias = True, - validate_by_name = True, - validate_assignment = True, - validate_default = True, - extra = "forbid", - arbitrary_types_allowed = True, - use_enum_values = True, - strict = False, - ) - - @model_serializer(mode='wrap', when_used='unless-none') - def treat_empty_lists_as_none( - self, handler: SerializerFunctionWrapHandler, - info: SerializationInfo) -> dict[str, Any]: - if info.exclude_none: - _instance = self.model_copy() - for field, field_info in type(_instance).model_fields.items(): - if getattr(_instance, field) == [] and not( - field_info.is_required()): - setattr(_instance, field, None) - else: - _instance = self - return handler(_instance, info) - - - -class LinkMLMeta(RootModel): - root: dict[str, Any] = {} - model_config = ConfigDict(frozen=True) - - def __getattr__(self, key:str): - return getattr(self.root, key) - - def __getitem__(self, key:str): - return self.root[key] - - def __setitem__(self, key:str, value): - self.root[key] = value - - def __contains__(self, key:str) -> bool: - return key in self.root - - -linkml_meta = LinkMLMeta({'default_prefix': 'ere', - 'default_range': 'string', - 'description': 'A LinkML schema for the ERS/ERE Service', - 'id': 'https://data.europa.eu/ers/schema/ere', - 'imports': ['linkml:types'], - 'name': 'ereServiceSchema', - 'prefixes': {'ere': {'prefix_prefix': 'ere', - 'prefix_reference': 'https://data.europa.eu/ers/schema/ere/'}, - 'linkml': {'prefix_prefix': 'linkml', - 'prefix_reference': 'https://w3id.org/linkml/'}}, - 'source_file': 'resources/schemas/ere-service-schema-v0.1.0.yaml'} ) - - -class EREMessage(ConfiguredBaseModel): - """ - Root abstraction to represent attributes common to both requests and results. - This is modelled as a mixin in LinkML (so that it can't be instantiated directly). - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - type: Literal["EREMessage"] = Field(default="EREMessage", description="""The type of the request or result. - -As per LinkML specification, `designates_type` is used here in order to allow for this -slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. - -In other words, a particular request will have `type` set with values like -`EntityMentionResolutionRequest` or `EntityResolutionResult` -""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) - ereRequestId: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. -This **is not** the same as `requestId` + `sourceId`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - - -class ERERequest(EREMessage): - """ - Root class to represent all the requests sent to the ERE. - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - type: Literal["ERERequest"] = Field(default="ERERequest", description="""The type of the request or result. - -As per LinkML specification, `designates_type` is used here in order to allow for this -slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. - -In other words, a particular request will have `type` set with values like -`EntityMentionResolutionRequest` or `EntityResolutionResult` -""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) - ereRequestId: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. -This **is not** the same as `requestId` + `sourceId`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - - -class EREResponse(EREMessage): - """ - Root class to represent all the responses sent by the ERE. - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - type: Literal["EREResponse"] = Field(default="EREResponse", description="""The type of the request or result. - -As per LinkML specification, `designates_type` is used here in order to allow for this -slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. - -In other words, a particular request will have `type` set with values like -`EntityMentionResolutionRequest` or `EntityResolutionResult` -""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) - ereRequestId: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. -This **is not** the same as `requestId` + `sourceId`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - - -class EntityMentionResolutionRequest(ERERequest): - """ - An entity resolution request sent to the ERE, containing the entity to be resolved. - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'examples': [{'description': 'a regular request', - 'value': '{\n' - ' "type": "EntityMentionResolutionRequest",\n' - ' "entityMention": { \n' - ' "identifier": {\n' - ' "requestId": "324fs3r345vx",\n' - ' "sourceId": "TEDSWS",\n' - ' "entityType": ' - '"http://www.w3.org/ns/org#Organization"\n' - ' },\n' - ' "content": "epd:ent005 a org:Organization; ... ' - 'cccev:telephone \\"+44 1924306780\\" .",\n' - ' "contentType": "text/turtle"\n' - ' },\n' - ' "timestamp": "2026-01-14T12:34:56Z",\n' - ' // As said, we need this internal ID and it can be ' - 'auto-generated (eg, with UUIDs)\n' - ' "ereRequestId": "324fs3r345vx:01"\n' - '}\n'}, - {'description': 'a re-rebuild request (ie, carrying a rejection ' - 'list)', - 'value': '{\n' - ' "type": "EntityMentionResolutionRequest",\n' - ' "entityMention": { \n' - ' "identifier": {\n' - ' "requestId": "324fs3r345vxab",\n' - ' "sourceId": "TEDSWS",\n' - ' "entityType": ' - '"http://www.w3.org/ns/org#Organization",\n' - ' },\n' - ' "content": "epd:ent005 a org:Organization; ... ' - 'cccev:telephone \\"+44 1924306780\\" .",\n' - ' "contentType": "text/turtle"\n' - ' },\n' - ' "excludedClusterIds": [\n' - ' "324fs3r345vx-bb45we",\n' - ' "324fs3r345vx-cc67ui"\n' - ' ],\n' - ' "timestamp": "2026-01-14T12:40:56Z",\n' - ' "ereRequestId": "324fs3r345vxab:01"\n' - '}\n'}], - 'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - entityMention: EntityMention = Field(default=..., description="""The data about the entity to be resolved. Note that, at least for the moment, we don't support -batch requests, so this property is single-valued. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionResolutionRequest']} }) - excludedClusterIds: Optional[list[str]] = Field(default=[], description="""When this is present, the resolution must not bin the entity mention into any of the -listed clusters. This can be used to reject a previous resolution proposed by the ERE. - -The exact reaction to this is implementation dependent. In the simplest case, the ERE -might just create a singleton cluster with this entity as member. In a more advanced -case, it might recompute the similarity with more advanced algorithms or use updated -data. - -TODO: Can this be revised? What does it happen if an exclusion was made by mistake? -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionResolutionRequest']} }) - type: Literal["EntityMentionResolutionRequest"] = Field(default="EntityMentionResolutionRequest", description="""The type of the request or result. - -As per LinkML specification, `designates_type` is used here in order to allow for this -slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. - -In other words, a particular request will have `type` set with values like -`EntityMentionResolutionRequest` or `EntityResolutionResult` -""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) - ereRequestId: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. -This **is not** the same as `requestId` + `sourceId`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - - -class EntityMentionResolutionResponse(EREResponse): - """ - An entity resolution response returned by the ERE. - - This is basically a list of candidate clusters to which the entity is deemed to be equivalent. - - Note that, for the moment, we don't support batch requests. In future, we might support requests - with multiple subjects in the `EntityMention` content (eg, RDF with multiple subjects), in which case - we might need to return multiple `EntityMentionResolutionResponse` messages, each with additional - properties such as `entityIndex` and `totalEntities`. - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'examples': [{'value': '{\n' - ' "type": "EntityMentionResolutionResponse",\n' - ' "entityMentionId": {\n' - ' "requestId": "324fs3r345vx",\n' - ' "sourceId": "TEDSWS",\n' - ' "entityType": ' - '"http://www.w3.org/ns/org#Organization"\n' - ' },\n' - ' "candidates": [\n' - ' { \n' - ' "clusterId": "324fs3r345vx-aa32wa",\n' - ' "confidenceScore": 0.91\n' - ' },\n' - ' { \n' - ' "clusterId": "324fs3r345vx-bb45we",\n' - ' "confidenceScore": 0.65\n' - ' }\n' - ' ],\n' - ' "timestamp": "2026-01-14T12:34:59Z",\n' - ' "ereRequestId": "324fs3r345vx:01"\n' - '}\n' - ' \n'}], - 'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - entityMentionId: EntityMentionIdentifier = Field(default=..., description="""The identifier of the entity mention that has been resolved. - -This isn't strictly needed, since the `ereRequestId` already links the response to -the request's entity mention. Yet, it's reported for convenience. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionResolutionResponse']} }) - candidates: list[ClusterReference] = Field(default=..., description="""The set of cluster reference/score pairs representing the candidate clusters -that the entity mention in the original request could align to (be equivalent to). -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionResolutionResponse']} }) - type: Literal["EntityMentionResolutionResponse"] = Field(default="EntityMentionResolutionResponse", description="""The type of the request or result. - -As per LinkML specification, `designates_type` is used here in order to allow for this -slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. - -In other words, a particular request will have `type` set with values like -`EntityMentionResolutionRequest` or `EntityResolutionResult` -""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) - ereRequestId: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. -This **is not** the same as `requestId` + `sourceId`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - - -class EREErrorResponse(EREResponse): - """ - Response sent by the ERE when some error/exception occurs while processing a request. - For instance, this may happen if the request is malformed or some internal error happens. - - The attributes of this class are based on [RFC-9457](https://datatracker.ietf.org/doc/html/rfc9457). - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'examples': [{'value': '{\n' - ' "type": "EREErrorResponse",\n' - ' "requestId": "324fs3r345vx",\n' - ' "errorType": ' - '"ere.exceptions.MalformedRequestError",\n' - ' "errorTitle": "The entity data is missing in the ' - 'request",\n' - ' "errorDetail": "The \'entity\' attribute is ' - 'required in EntityMentionResolutionRequest message",\n' - ' // Optional and not recommended for production use\n' - ' "errorTrace": "Traceback (most recent call ' - 'last):\\n File \\"/app/ere/service.py\\", line 45, ' - 'in process_request\\n..."\n' - '}\n'}], - 'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - errorType: str = Field(default=..., description="""A string representing the error type, eg, the FQN of the raised exception. - -This corresponds to RFC-9457's `type`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREErrorResponse']} }) - errorTitle: Optional[str] = Field(default=None, description="""A human readable brief message about the error that occurred. - -This corresponds to RFC-9457's `title`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREErrorResponse']} }) - errorDetail: Optional[str] = Field(default=None, description="""A human readable detailed message about the error that occurred. - -This corresponds to RFC-9457's `detail`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREErrorResponse']} }) - errorTrace: Optional[str] = Field(default=None, description="""A string representing a (stack) trace of the error that occurred. - -This is optional and typically used for debugging purposes only, since -exposing this kind of server-side information is a security risk. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREErrorResponse']} }) - type: Literal["EREErrorResponse"] = Field(default="EREErrorResponse", description="""The type of the request or result. - -As per LinkML specification, `designates_type` is used here in order to allow for this -slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. - -In other words, a particular request will have `type` set with values like -`EntityMentionResolutionRequest` or `EntityResolutionResult` -""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) - ereRequestId: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. -This **is not** the same as `requestId` + `sourceId`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - - -class EntityMention(ConfiguredBaseModel): - """ - An entity mention is a representation of a real-world entity, as provided by the ERS. - It contains the entity data, along with metadata like type and format. - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - identifier: EntityMentionIdentifier = Field(default=..., description="""The identifier (with the ERS-derived components) of the entity mention. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMention']} }) - contentType: str = Field(default=..., description="""A string about the MIME format of `content` (e.g. text/turtle, application/ld+json) -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMention']} }) - content: str = Field(default=..., description="""A code string representing the entity mention details (eg, RDF or XML description). -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMention']} }) - - -class EntityMentionIdentifier(ConfiguredBaseModel): - """ - A container that groups the attributes needed to identify an entity mention in a resolution request - or response. - - As per ERS architectural decision, in the whole ERS and ERE systems, there is always a deterministic - method to build a canonical identifier from the combination of `sourceId`, `requestId` and `entityType` - (eg, string concatenation plus some prefix). Similarly, a cluster ID (mentioned in various places in - in this hereby ERE service schema) can be built from an entity that is initially the only cluster member. - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - sourceId: str = Field(default=..., description="""The ID or URI of the ERS client that originated the request. This identifies an application or a -person accessing the ERS system. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionIdentifier']} }) - requestId: str = Field(default=..., description="""A string representing the unique ID of the request made to the ERS system. In general, this is unique -only within the scope of the source and the entity type, ie, within `sourceId` and `entityType`. - -Moreover, this is **not** the same as `ereRequestId`, which instead, is internal to the ERE and is -used to match responses to requests. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionIdentifier']} }) - entityType: str = Field(default=..., description="""A string representing the entity type (based on CET). This is typically a URI. - -Note that this is at this level, and not at `EntityMention`, since, as said above, -it's needed to identify the entity, even when its content is not present. For the same -reason, it's used both for `EREResolutionRequest` and `EREResolutionResponse` messages., -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionIdentifier']} }) - - -class ClusterReference(ConfiguredBaseModel): - """ - A reference to a cluster to which an entity is deemed to belong, with an associated confidence score. - - A cluster is a set of entity mentions that have been determined to refer to the same real-world entity. - Each cluster has a unique clusterId. - - A cluster reference is used to report the association between an entity mention and a cluster - of equivalence. - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - clusterId: str = Field(default=..., description="""The identifier of the cluster/canonical entity that is considered equivalent to the -subject entity mention that an `EntityMentionResolutionResponse` refers to. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['ClusterReference']} }) - confidenceScore: float = Field(default=..., description="""A 0-1 value of how confident the ERE is about the equivalence between the subject entity mention -and the target canonical entity. -""", ge=0.0, le=1.0, json_schema_extra = { "linkml_meta": {'domain_of': ['ClusterReference']} }) - - -class FullRebuildRequest(ERERequest): - """ - A request to reset all the resolutions computed so far and possibly rebuild them as - requests about old entities arrive again (and build new entities from scratch as usually). - - It is expected that the ERE client re-sends all the entities to be resolved again, - using `EntityMentionResolutionRequest` messages exactly as the first time the resolutions - were built. This implies the a client like the ERS logs/persists the entities it receives - to resolve and also saves manual overriding of ERE results. - - Moreover: - * The ERE must keep track of past `EntityMention` marked as canonical. - * The ERE must retain requests with `excludedClusterIds` and apply them again when the - same entity mention is re-sent after the full rebuild. TODO: see notes about these properties, - on the possible need of withdrawing exclusions. - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - type: Literal["FullRebuildRequest"] = Field(default="FullRebuildRequest", description="""The type of the request or result. - -As per LinkML specification, `designates_type` is used here in order to allow for this -slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. - -In other words, a particular request will have `type` set with values like -`EntityMentionResolutionRequest` or `EntityResolutionResult` -""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) - ereRequestId: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. -This **is not** the same as `requestId` + `sourceId`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - - -class FullRebuildResponse(EREResponse): - """ - A response to a `FullRebuildRequest`, confirming that the rebuild process has started. - - As for all the requests, this carries the `ereRequestId`, which matches the full rebuild - request being acknowledged. - - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://data.europa.eu/ers/schema/ere'}) - - type: Literal["FullRebuildResponse"] = Field(default="FullRebuildResponse", description="""The type of the request or result. - -As per LinkML specification, `designates_type` is used here in order to allow for this -slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. - -In other words, a particular request will have `type` set with values like -`EntityMentionResolutionRequest` or `EntityResolutionResult` -""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) - ereRequestId: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. -This **is not** the same as `requestId` + `sourceId`. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. -""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) - - -# Model rebuild -# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model -EREMessage.model_rebuild() -ERERequest.model_rebuild() -EREResponse.model_rebuild() -EntityMentionResolutionRequest.model_rebuild() -EntityMentionResolutionResponse.model_rebuild() -EREErrorResponse.model_rebuild() -EntityMention.model_rebuild() -EntityMentionIdentifier.model_rebuild() -ClusterReference.model_rebuild() -FullRebuildRequest.model_rebuild() -FullRebuildResponse.model_rebuild() diff --git a/src/erspec/__init__.py b/src/erspec/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/erspec/models/__init__.py b/src/erspec/models/__init__.py new file mode 100644 index 0000000..70c4069 --- /dev/null +++ b/src/erspec/models/__init__.py @@ -0,0 +1,6 @@ +from erspec.models.pydantic_model import PydanticModel, fields + +__all__ = [ + "PydanticModel", + "fields", +] diff --git a/src/erspec/models/core.py b/src/erspec/models/core.py new file mode 100644 index 0000000..135dce7 --- /dev/null +++ b/src/erspec/models/core.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +from datetime import ( + datetime +) +from enum import Enum +from typing import ( + Optional +) + +from pydantic import ( + Field +) + +from erspec.models.pydantic_model import PydanticModel + + +metamodel_version = "None" +version = "0.1.0" + + +class EntityType(str, Enum): + """ + Types of entities that can be resolved + """ + ORGANISATION = "ORGANISATION" + """ + An organization entity + """ + PROCEDURE = "PROCEDURE" + """ + A procurement procedure entity + """ + + +class UserActionType(str, Enum): + """ + Types of curator actions on entity mention resolutions + """ + ACCEPT_TOP = "ACCEPT_TOP" + """ + Curator accepted the top candidate from ERE + """ + ACCEPT_ALTERNATIVE = "ACCEPT_ALTERNATIVE" + """ + Curator selected an alternative candidate + """ + REJECT_ALL = "REJECT_ALL" + """ + Curator rejected all candidates + """ + + + +class EntityMention(PydanticModel): + """An entity mention is a representation of a real-world entity, as provided by the ERS. +It contains the entity data, along with metadata like type and format.""" + identifiedBy: EntityMentionIdentifier = Field(default=..., description="""The identification triad of the entity mention. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMention']} }) + content_type: str = Field(default=..., description="""A string about the MIME format of `content` (e.g. text/turtle, application/ld+json) +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMention']} }) + content: str = Field(default=..., description="""A code string representing the entity mention details (eg, RDF or XML description). +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMention']} }) + parsed_representation: Optional[str] = Field(default=None, description="""JSON representation of the parsed entity data. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMention']} }) + + +class EntityMentionIdentifier(PydanticModel): + """A container that groups the attributes needed to identify an entity mention in a resolution request +or response. + +As per ERS architectural decision, in the whole ERS and ERE systems, there is always a deterministic +method to build a canonical identifier from the combination of `sourceId`, `requestId` and `entityType` +(eg, string concatenation plus some prefix). Similarly, a cluster ID (mentioned in various places in +in this hereby ERE service schema) can be built from an entity that is initially the only cluster member.""" + source_id: str = Field(default=..., description="""The ID or URI of the ERS client that originated the request. This identifies an application or a +person accessing the ERS system. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionIdentifier', 'LookupState']} }) + request_id: str = Field(default=..., description="""A string representing the unique ID of the request made to the ERS system. In general, this is unique +only within the scope of the source and the entity type, ie, within `sourceId` and `entityType`. + +Moreover, this is **not** the same as `ereRequestId`, which instead, is internal to the ERE and is +used to match responses to requests. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionIdentifier']} }) + entity_type: str = Field(default=..., description="""A string representing the entity type (based on CET). This is typically a URI. + +Note that this is at this level, and not at `EntityMention`, since, as said above, +it's needed to identify the entity, even when its content is not present. For the same +reason, it's used both for `EREResolutionRequest` and `EREResolutionResponse` messages., +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionIdentifier']} }) + + +class LookupState(PydanticModel): + """Tracks the resolution state for entity mentions from a particular source. +Records when the source was last resolved against the canonical clustering.""" + source_id: str = Field(default=..., description="""The ID or URI of the ERS client (originator) for which we track lookup state. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionIdentifier', 'LookupState']} }) + last_snapshot: datetime = Field(default=..., description="""Timestamp of the last resolution operation for this source. +Used to determine if a refreshBulk or other update is needed. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['LookupState']} }) + + +class ClusterReference(PydanticModel): + """A reference to a cluster to which an entity is deemed to belong, with an associated confidence and similarity scores. + +A cluster is a set of entity mentions that have been determined to refer to the same real-world entity. +Each cluster has a unique clusterId. + +A cluster reference is used to report the association between an entity mention and a cluster +of equivalence.""" + cluster_id: str = Field(default=..., description="""The identifier of the cluster/canonical entity that is considered equivalent to the +subject entity mention that an `EntityMentionResolutionResponse` refers to. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['ClusterReference']} }) + confidence_score: float = Field(default=..., description="""A 0-1 value of how confident the ERE is about the equivalence between the subject entity mention +and the target canonical entity. +""", ge=0.0, le=1.0, json_schema_extra = { "linkml_meta": {'domain_of': ['ClusterReference']} }) + similarity_score: float = Field(default=..., description="""A 0-1 score representing the pairwise comparison between a mention and a cluster (likely +based on a representative representation). +""", ge=0.0, le=1.0, json_schema_extra = { "linkml_meta": {'domain_of': ['ClusterReference']} }) + + +class Decision(PydanticModel): + """Canonical placement of an entity mention to a cluster. +Represents the latest resolution decision (from ERE or curator override).""" + id: str = Field(default=..., description="""Unique decision identifier""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision', 'UserAction']} }) + about_entity_mention: EntityMentionIdentifier = Field(default=..., description="""The entity mention being resolved""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision', 'UserAction']} }) + current_placement: ClusterReference = Field(default=..., description="""The accepted cluster for this mention (latest from ERE or curator). +""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision']} }) + candidates: list[ClusterReference] = Field(default=..., description="""Top-N alternative clusters proposed by ERE (for curation UI preview). +""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision', 'UserAction', 'EntityMentionResolutionResponse']} }) + created_at: datetime = Field(default=..., description="""When the decision was first created""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision', 'UserAction']} }) + updated_at: Optional[datetime ] = Field(default=None, description="""When the decision was last updated (ERE refresh or curator action)""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision']} }) + + +class UserAction(PydanticModel): + """Immutable record of a curator action on an entity mention resolution. +Stored in the User Action Log for traceability and training. + +NOT related to ERE messages; represents curator intent only.""" + id: str = Field(default=..., description="""Unique audit trail entry identifier""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision', 'UserAction']} }) + about_entity_mention: EntityMentionIdentifier = Field(default=..., description="""The entity mention the curator acted upon""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision', 'UserAction']} }) + candidates: list[ClusterReference] = Field(default=..., description="""The candidate clusters presented to the curator for selection. +Ordered by confidence (same as shown in curation UI). +""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision', 'UserAction', 'EntityMentionResolutionResponse']} }) + selected_cluster: Optional[ClusterReference] = Field(default=None, description="""The cluster selected by the curator (if action was ACCEPT_TOP +or ACCEPT_ALTERNATIVE). NULL if action was REJECT_ALL. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['UserAction']} }) + action_type: UserActionType = Field(default=..., description="""The type of action the curator performed""", json_schema_extra = { "linkml_meta": {'domain_of': ['UserAction']} }) + actor: str = Field(default=..., description="""User ID or identifier of the curator who performed the action""", json_schema_extra = { "linkml_meta": {'domain_of': ['UserAction']} }) + created_at: datetime = Field(default=..., description="""Timestamp when the curator action was recorded""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision', 'UserAction']} }) + metadata: Optional[str] = Field(default=None, description="""JSON metadata providing context (e.g., curator notes, reasoning). +""", json_schema_extra = { "linkml_meta": {'domain_of': ['UserAction']} }) + + +class CanonicalEntityIdentifier(PydanticModel): + """A logical identity construct providing a stable identity anchor. +Represents a cluster of equivalent entity mentions.""" + identifier: str = Field(default=..., description="""Unique identifier for the canonical entity.""", json_schema_extra = { "linkml_meta": {'domain_of': ['CanonicalEntityIdentifier']} }) + equivalent_to: list[EntityMentionIdentifier] = Field(default=..., description="""Entity mentions that have been resolved to this canonical entity.""", json_schema_extra = { "linkml_meta": {'domain_of': ['CanonicalEntityIdentifier']} }) + diff --git a/src/erspec/models/ere.py b/src/erspec/models/ere.py new file mode 100644 index 0000000..c786885 --- /dev/null +++ b/src/erspec/models/ere.py @@ -0,0 +1,253 @@ +from __future__ import annotations + +from datetime import ( + datetime +) +from enum import Enum +from typing import ( + Literal, + Optional +) + +from pydantic import ( + Field +) + +from .core import ( + ClusterReference, + EntityMention, + EntityMentionIdentifier +) + +from erspec.models.pydantic_model import PydanticModel + + +metamodel_version = "None" +version = "0.1.0" + + +class EntityType(str, Enum): + """ + Types of entities that can be resolved + """ + ORGANISATION = "ORGANISATION" + """ + An organization entity + """ + PROCEDURE = "PROCEDURE" + """ + A procurement procedure entity + """ + + +class UserActionType(str, Enum): + """ + Types of curator actions on entity mention resolutions + """ + ACCEPT_TOP = "ACCEPT_TOP" + """ + Curator accepted the top candidate from ERE + """ + ACCEPT_ALTERNATIVE = "ACCEPT_ALTERNATIVE" + """ + Curator selected an alternative candidate + """ + REJECT_ALL = "REJECT_ALL" + """ + Curator rejected all candidates + """ + + + +class EREMessage(PydanticModel): + """Root abstraction to represent attributes common to both requests and results. +This is modelled as a mixin in LinkML (so that it can't be instantiated directly).""" + type: Literal["EREMessage"] = Field(default="EREMessage", description="""The type of the request or result. + +As per LinkML specification, `designates_type` is used here in order to allow for this +slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. + +In other words, a particular request will have `type` set with values like +`EntityMentionResolutionRequest` or `EntityResolutionResult` +""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) + ere_request_id: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. +This **is not** the same as `request_id` + `source_id`. + +Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message +can originate from within the ERE, without any previous request counterpart, as a notification of +resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + + +class ERERequest(EREMessage): + """Root class to represent all the requests sent to the ERE.""" + type: Literal["ERERequest"] = Field(default="ERERequest", description="""The type of the request or result. + +As per LinkML specification, `designates_type` is used here in order to allow for this +slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. + +In other words, a particular request will have `type` set with values like +`EntityMentionResolutionRequest` or `EntityResolutionResult` +""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) + ere_request_id: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. +This **is not** the same as `request_id` + `source_id`. + +Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message +can originate from within the ERE, without any previous request counterpart, as a notification of +resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + + +class EREResponse(EREMessage): + """Root class to represent all the responses sent by the ERE.""" + type: Literal["EREResponse"] = Field(default="EREResponse", description="""The type of the request or result. + +As per LinkML specification, `designates_type` is used here in order to allow for this +slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. + +In other words, a particular request will have `type` set with values like +`EntityMentionResolutionRequest` or `EntityResolutionResult` +""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) + ere_request_id: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. +This **is not** the same as `request_id` + `source_id`. + +Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message +can originate from within the ERE, without any previous request counterpart, as a notification of +resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + + +class EntityMentionResolutionRequest(ERERequest): + """An entity resolution request sent to the ERE, containing the entity to be resolved.""" + entity_mention: EntityMention = Field(default=..., description="""The data about the entity to be resolved. Note that, at least for the moment, we don't support +batch requests, so this property is single-valued. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionResolutionRequest']} }) + proposed_cluster_ids: Optional[list[str]] = Field(default=[], description="""When this is present, the ERE may use this information to try to cluster the entity in one of +the listed clusters. + +In particular, when an initial request about an entity isn't answered within a timeout, +a subsequent new request can be sent about the same entity and with the canonical ID of it +as a single proposed cluster ID. This suggests the ERE that it can create a new singleton cluster +with the entity as its initial only member and its canonical ID as the cluster ID. The ERE +can evolve such a cluster later, when further similar entities are sent in, or when it +has had more time to associate the initial entity to others. + +Whatever, the case, the ERE **has no obligation** to fulfil the proposal, how it reacts to +this list is implementation dependent, and the ERE remains the ultimate authority to provide +the final resolution decision. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionResolutionRequest']} }) + excluded_cluster_ids: Optional[list[str]] = Field(default=[], description="""When this is present, the ERE may use this information to avoid clustering the entity in +the listed clusters. + +This can be used to notify the ERE that a curator has rejected a previous resolution +proposed by the ERE. + +As for `proposed_cluster_ids`, the ERE **has no obligation** to fulfil the exclusions, and +it remains the ultimate authority to provide the final resolution decision. + +Similarly, the exact reaction to this is implementation dependent. In the simplest case, the ERE +might just create a singleton cluster with the current entity as member. In a more advanced +case, it might recompute the similarity with more advanced algorithms or use updated +data. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionResolutionRequest']} }) + type: Literal["EntityMentionResolutionRequest"] = Field(default="EntityMentionResolutionRequest", description="""The type of the request or result. + +As per LinkML specification, `designates_type` is used here in order to allow for this +slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. + +In other words, a particular request will have `type` set with values like +`EntityMentionResolutionRequest` or `EntityResolutionResult` +""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) + ere_request_id: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. +This **is not** the same as `request_id` + `source_id`. + +Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message +can originate from within the ERE, without any previous request counterpart, as a notification of +resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + + +class EntityMentionResolutionResponse(EREResponse): + """An entity resolution response returned by the ERE. + +This is basically a list of candidate clusters to which the entity is deemed to be equivalent. + +Note that, for the moment, we don't support batch requests. In future, we might support requests +with multiple subjects in the `EntityMention` content (eg, RDF with multiple subjects), in which case +we might need to return multiple `EntityMentionResolutionResponse` messages, each with additional +properties such as `entityIndex` and `totalEntities`.""" + entity_mention_id: EntityMentionIdentifier = Field(default=..., description="""The identifier of the entity mention that has been resolved. + +This isn't strictly needed, since the `ere_request_id` already links the response to +the request's entity mention. Yet, it's reported for convenience. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EntityMentionResolutionResponse']} }) + candidates: list[ClusterReference] = Field(default=..., description="""The set of cluster reference/score pairs representing the candidate clusters +that the entity mention in the original request could align to (be equivalent to). +""", json_schema_extra = { "linkml_meta": {'domain_of': ['Decision', 'UserAction', 'EntityMentionResolutionResponse']} }) + type: Literal["EntityMentionResolutionResponse"] = Field(default="EntityMentionResolutionResponse", description="""The type of the request or result. + +As per LinkML specification, `designates_type` is used here in order to allow for this +slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. + +In other words, a particular request will have `type` set with values like +`EntityMentionResolutionRequest` or `EntityResolutionResult` +""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) + ere_request_id: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. +This **is not** the same as `request_id` + `source_id`. + +Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message +can originate from within the ERE, without any previous request counterpart, as a notification of +resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + + +class EREErrorResponse(EREResponse): + """Response sent by the ERE when some error/exception occurs while processing a request. +For instance, this may happen if the request is malformed or some internal error happens. + +The attributes of this class are based on [RFC-9457](https://datatracker.ietf.org/doc/html/rfc9457).""" + error_type: str = Field(default=..., description="""A string representing the error type, eg, the FQN of the raised exception. + +This corresponds to RFC-9457's `type`. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREErrorResponse']} }) + error_title: Optional[str] = Field(default=None, description="""A human readable brief message about the error that occurred. + +This corresponds to RFC-9457's `title`. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREErrorResponse']} }) + error_detail: Optional[str] = Field(default=None, description="""A human readable detailed message about the error that occurred. + +This corresponds to RFC-9457's `detail`. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREErrorResponse']} }) + error_trace: Optional[str] = Field(default=None, description="""A string representing a (stack) trace of the error that occurred. + +This is optional and typically used for debugging purposes only, since +exposing this kind of server-side information is a security risk. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREErrorResponse']} }) + type: Literal["EREErrorResponse"] = Field(default="EREErrorResponse", description="""The type of the request or result. + +As per LinkML specification, `designates_type` is used here in order to allow for this +slot to tell the concrete subclass that an instance (such as a JSON object) belongs to. + +In other words, a particular request will have `type` set with values like +`EntityMentionResolutionRequest` or `EntityResolutionResult` +""", json_schema_extra = { "linkml_meta": {'designates_type': True, 'domain_of': ['EREMessage']} }) + ere_request_id: str = Field(default=..., description="""A string representing the unique ID of an ERE request, or the ID of the request a response is about. +This **is not** the same as `request_id` + `source_id`. + +Note on notification responses: as per ERE contract, an `EntityMentionResolutionResponse` message +can originate from within the ERE, without any previous request counterpart, as a notification of +resolution update. In this case, `ere_request_id` has the prefix `ereNotification:`. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + timestamp: Optional[datetime ] = Field(default=None, description="""The time when the message was created. Should be in ISO-8601 format. +""", json_schema_extra = { "linkml_meta": {'domain_of': ['EREMessage']} }) + diff --git a/src/erspec/models/pydantic_model.py b/src/erspec/models/pydantic_model.py new file mode 100644 index 0000000..6d1b154 --- /dev/null +++ b/src/erspec/models/pydantic_model.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from typing import Any, Optional, TypeVar, cast + +from pydantic import BaseModel, ConfigDict, Field + + +class PydanticModel(BaseModel): + """Base model class with core configurations for all domain models.""" + + object_description: Optional[str] = Field( + default=None, + exclude=True, + description="Optional descriptive text for the model instance.", + ) + + model_config = ConfigDict( + validate_assignment=True, + extra="forbid", + frozen=False, + arbitrary_types_allowed=False, + use_enum_values=True, + str_strip_whitespace=False, + validate_default=True, + populate_by_name=True, + ser_json_bytes="base64", + ) + + +@dataclass(frozen=True) +class _GetFields: + _model: type[BaseModel] + + def __getattr__(self, item: str) -> Any: + if item in self._model.model_fields: + return item + return getattr(self._model, item) + + +TModel = TypeVar("TModel", bound=PydanticModel) + + +def fields(model: type[TModel], /) -> TModel: + """Type-safe field name accessor for Pydantic models.""" + return cast(TModel, _GetFields(model)) diff --git a/test/features/ere-ers-common-cases.feature b/test/features/ere-ers-common-cases.feature index f8e6c2e..43156f4 100644 --- a/test/features/ere-ers-common-cases.feature +++ b/test/features/ere-ers-common-cases.feature @@ -23,16 +23,16 @@ Then The ERE asynchronously pushes an `EntityMentionResolutionResponse` object that contains: - Common response properties: - - `requestId`: the original request ID + - `request_id`: the original request ID - `type`: "EntityMentionResolutionResponse" - - `entityMentionId`: an instance of `EntityMentionIdentifier` with - `sourceId`, `requestId`,`entityType` equal to the mention in the original request and + - `entity_mention_id`: an instance of `EntityMentionIdentifier` with + `source_id`, `request_id`,`entity_type` equal to the mention in the original request and corresponding to E taken from the original request - These are common to all responses and we won't repeat them in the following - - `candidateClusters`: a list of `ClusterReference` objects such like:` - - `{ "clusterId": , "confidenceScore": }` + - `candidates`: a list of `ClusterReference` objects such like:` + - `{ "cluster_id": , "confidence_score": }` for i = 0..3 - All of `score(E, Ci)` are above the confidence threshold configured in the ERE @@ -57,23 +57,3 @@ Then on the function, described in the technical contract, which computes the canonical ID from the composite key of an entity mention. - -Scenario: A resolution request with excluded cluster IDs returns different cluster references - - ERE reacts to rejections of previously suggested cluster IDs by returning - cluster references in the response. - - Typically, we expect that the ERE creates a new singleton cluster for the entity mention, but it may also - return an alternative known cluster, eg, after a rebuild-all request or upon internal re-evaluation. - - As per the technical contract, the ERE might or might not store these rejection lists, the bottom line - is that, after a request of thi type, no further response about the same entity must contain the excluded - clusters any more (see the [idempotent resolutions feature](ere-ers-idempotent-resolutions.feature)). - -When - The ERS pushes a resolution request for an entity into the requests channel -And - The request has a set `R[]` as `excludedClusterIds` -Then - The ERE returns a resolution response such that none of `candidateClusters.clusterId` - is in `R[]`. diff --git a/test/features/ere-ers-full-rebuilds.feature b/test/features/ere-ers-full-rebuilds.feature index a35686f..2da0e1f 100644 --- a/test/features/ere-ers-full-rebuilds.feature +++ b/test/features/ere-ers-full-rebuilds.feature @@ -1,5 +1,7 @@ Feature: ERE/ERS interaction upon rebuild requests + TODO: probably to remove, since this request has been turned into an internal operation. + The ERE replies to a full rebuild request with an acknowledgement response. After that, past resolutions are possibly recomputed. Since this is optional and implementation-dependent, diff --git a/test/features/ere-ers-idempotent-resolutions.feature b/test/features/ere-ers-idempotent-resolutions.feature index 76fdb9a..8188da2 100644 --- a/test/features/ere-ers-idempotent-resolutions.feature +++ b/test/features/ere-ers-idempotent-resolutions.feature @@ -2,89 +2,31 @@ Feature: ERE/ERS idempotent requests This feature tests idempotent interactions with the ERE. -Scenario: Repeated regular resolution request returns the same result - This is true if no full rebuilds or requests with rejected clusters have happened in between. +Scenario Outline: Repeated regular resolution request returns the same result + + This is true if the ERE doesn't do any re-clustering or rebuild operation between the requests. Given - That the ERE has previously replied to a resolution request for an entity E + That the ERE has previously replied to a resolution request {Req} for an entity E (with or without excluded clusters) And - No `FullBuildRequest` has been pushed since the previous resolution -And - No resolution request about the same entity mention that contains excluded clusters - has been pushed since the previous resolution + No `EntityMentionResolutionResponse` has been emitted with `ere_request_id` having the ‘ereNotification:’ prefix + and `entity_mention_id` pointing to E (ie, no internal updates happened, see the contract document) When - The ERS pushes again the same resolution request for the entity E into the ERE requests channel + The ERS pushes again the same resolution request {Req} for the entity E into the ERE requests channel Then The ERE asynchronously pushes an `EntityMentionResolutionResponse` object that contains the - same set of `candidateClusters` as in the previous response for E, including their confidence score. + same set of `candidates` as in the previous response for E, including their confidence score. Since returning the cluster references in score order is not required, such order doesn't apply to this invariant, i.e., the second result can come in a different order for the cluster references. + This applies to the following variants of {Req}: + | Req | + | a regular resolution request without neither `proposed_cluster_ids` nor `excluded_cluster_ids` | + | a resolution request with values for `proposed_cluster_ids` | + | a resolution request with values for `excluded_cluster_ids` | + | a resolution request with values for both `proposed_cluster_ids` and `excluded_cluster_ids` | -Scenario: Repeated resolution request with rejected clusters returns the same result - - This is very similar to the previous scenario about a regular request, but considers that requests - with filtered clusters are also idempotent. - -Given - That the ERE has previously replied to a resolution request for an entity E, with - a set `R[]` of excluded cluster IDs in the request -And - No `FullBuildRequest` has been pushed since the previous resolution -When - The ERS sends again the same resolution request for the entity E and the same set `R[]` as `excludedClusterIds` -Then - The ERE asynchronously pushes an `EntityMentionResolutionResponse` object that contains the - same set of `candidateClusters` as in the previous response for E, including the same confidence scores. - - Thus, the response excludes the same clusters. - - -Scenario: Rejected clusters affect later resolution requests - - If a request excludes some clusters, then all the requests that follow will have the same exclusions. - -Given - That the ERE has previously replied to a resolution request for an entity E, with - a set `R[]` of excluded cluster IDs in the request -And - A resolution request for the same entity E is pushed again, with no excluded clusters -Then - The response to the new request must not contain any cluster in R[]. - - We assume that the reply to the first request doesn't contain R[] either, as per the - corresponding scenario in the [common cases feature](ere-ers-common-cases.feature). - - - -Scenario: Multiple resolution requests containing excluded clusters cause the respective exclusions to be merged - - If multiple requests exclude multiple clusters for the same entity, then the ERE must exclude them - all in subsequent responses. - -Given - The ERE has received a resolution request for an entity E, which excludes - a set `R1[]` of cluster IDs -And - The ERE has replied to the initial request with a set of candidate clusters R2[] -When - A new resolution about E is sent that contains R2[] as excluded clusters -Then - The ERE response doesn't contain any cluster ID in R1[] or R2[]. - - -Scenario: Multiple resolution requests with multiple rejections affect later resolution requests - - If multiple requests exclude multiple clusters for the same entity, then the ERE must exclude them - all in subsequent responses. This is a combination of the two previous scenarios. -Given - The same pre-conditions as "Multiple resolution requests containing excluded clusters cause the respective exclusions to be merged" -And - The request with R2[] has been sent and replied -When - A new resolution about E is sent that contains no excluded clusters -Then - The ERE response doesn't contain any cluster ID in R1[] or R2[]. + TODO: we have to decide if to support the last case about combos. diff --git a/test/features/ere-ers-unhappy-paths.feature b/test/features/ere-ers-unhappy-paths.feature index 33d5977..75df568 100644 --- a/test/features/ere-ers-unhappy-paths.feature +++ b/test/features/ere-ers-unhappy-paths.feature @@ -9,6 +9,6 @@ When Then The ERE asynchronously pushes an error response to the responses channel that looks like: - `ereRequestId`: the ID of the malformed request - `errorTitle`/`errorMessage`: a human-readable description of the error + `ere_request_id`: the ID of the malformed request + `error_title`/`error_message`: a human-readable description of the error `type`: "EREErrorResponse" # JSON object property, matches the LinkML class in the service schema.