From 86277588d87c66db88e824fb8b7d062942b1c3ab Mon Sep 17 00:00:00 2001
From: "tho.nguyen" <91511523+haki203@users.noreply.github.com>
Date: Sat, 23 May 2026 12:30:17 +0700
Subject: [PATCH] Add biological accession crosswalk guard
---
.../README.md | 24 ++
biological-accession-crosswalk-guard/demo.js | 74 +++++
biological-accession-crosswalk-guard/index.js | 275 ++++++++++++++++++
.../package.json | 12 +
.../render-video.js | 59 ++++
.../reports/accession-crosswalk-packet.json | 144 +++++++++
.../reports/accession-crosswalk-report.md | 40 +++
.../reports/demo.mp4 | Bin 0 -> 8372 bytes
.../reports/summary.svg | 31 ++
.../sample-data.js | 59 ++++
biological-accession-crosswalk-guard/test.js | 155 ++++++++++
11 files changed, 873 insertions(+)
create mode 100644 biological-accession-crosswalk-guard/README.md
create mode 100644 biological-accession-crosswalk-guard/demo.js
create mode 100644 biological-accession-crosswalk-guard/index.js
create mode 100644 biological-accession-crosswalk-guard/package.json
create mode 100644 biological-accession-crosswalk-guard/render-video.js
create mode 100644 biological-accession-crosswalk-guard/reports/accession-crosswalk-packet.json
create mode 100644 biological-accession-crosswalk-guard/reports/accession-crosswalk-report.md
create mode 100644 biological-accession-crosswalk-guard/reports/demo.mp4
create mode 100644 biological-accession-crosswalk-guard/reports/summary.svg
create mode 100644 biological-accession-crosswalk-guard/sample-data.js
create mode 100644 biological-accession-crosswalk-guard/test.js
diff --git a/biological-accession-crosswalk-guard/README.md b/biological-accession-crosswalk-guard/README.md
new file mode 100644
index 00000000..47c32ff3
--- /dev/null
+++ b/biological-accession-crosswalk-guard/README.md
@@ -0,0 +1,24 @@
+# Biological Accession Crosswalk Guard
+
+Self-contained Scientific Knowledge Graph Integration slice for SCIBASE issue #17.
+
+The guard validates canonical biological accession nodes before entity pages or graph recommendations are shown. It checks namespace-specific formats for NCBI Gene, UniProtKB, PubChem, and MeSH identifiers; deprecated aliases; taxon mismatch; DOI-backed evidence; low crosswalk confidence; duplicate canonical targets; and unsafe recommendation paths that depend on unresolved nodes.
+
+## Files
+
+- `index.js` - dependency-free evaluator and Markdown reviewer packet builder
+- `sample-data.js` - synthetic biological knowledge graph nodes
+- `test.js` - Node test coverage for hold, review, recommendation suppression, and approved paths
+- `demo.js` - writes JSON, Markdown, and SVG reviewer artifacts under `reports/`
+- `render-video.js` - creates a short MP4 demo artifact
+
+## Validation
+
+```bash
+npm run check
+npm test
+npm run demo
+npm run video
+```
+
+Synthetic data only. No private research objects, external ontology services, registry calls, recommendation services, credentials, network calls, payment data, or payout details are used.
diff --git a/biological-accession-crosswalk-guard/demo.js b/biological-accession-crosswalk-guard/demo.js
new file mode 100644
index 00000000..8d260649
--- /dev/null
+++ b/biological-accession-crosswalk-guard/demo.js
@@ -0,0 +1,74 @@
+const fs = require('node:fs');
+const path = require('node:path');
+
+const {evaluateBiologicalAccessionCrosswalk, buildReviewerPacket} = require('./index');
+const {samplePacket} = require('./sample-data');
+
+const REPORT_DIR = path.join(__dirname, 'reports');
+
+function escapeXml(value) {
+ return String(value)
+ .replaceAll('&', '&')
+ .replaceAll('<', '<')
+ .replaceAll('>', '>')
+ .replaceAll('"', '"');
+}
+
+function buildSummarySvg(result) {
+ const scoreWidth = Math.max(20, Math.min(740, result.readinessScore * 7.4));
+ const findingWidth = Math.max(20, Math.min(740, result.findings.length * 70));
+ const actionWidth = Math.max(20, Math.min(740, result.requiredActions.length * 82));
+ const topFindings = result.findings.slice(0, 5);
+
+ return ``;
+}
+
+function main() {
+ fs.mkdirSync(REPORT_DIR, {recursive: true});
+ const result = evaluateBiologicalAccessionCrosswalk(samplePacket);
+ fs.writeFileSync(path.join(REPORT_DIR, 'accession-crosswalk-packet.json'), `${JSON.stringify(result, null, 2)}\n`);
+ fs.writeFileSync(path.join(REPORT_DIR, 'accession-crosswalk-report.md'), buildReviewerPacket(result));
+ fs.writeFileSync(path.join(REPORT_DIR, 'summary.svg'), buildSummarySvg(result));
+ console.log(JSON.stringify({
+ graphId: result.graphId,
+ decision: result.decision,
+ readinessScore: result.readinessScore,
+ findings: result.findings.length,
+ requiredActions: result.requiredActions.length,
+ auditDigest: result.auditDigest,
+ }, null, 2));
+}
+
+if (require.main === module) {
+ main();
+}
+
+module.exports = {
+ buildSummarySvg,
+};
diff --git a/biological-accession-crosswalk-guard/index.js b/biological-accession-crosswalk-guard/index.js
new file mode 100644
index 00000000..125e9d7d
--- /dev/null
+++ b/biological-accession-crosswalk-guard/index.js
@@ -0,0 +1,275 @@
+const crypto = require('node:crypto');
+
+const FORMAT_PATTERNS = {
+ NCBIGene: /^\d+$/,
+ UniProtKB: /^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9][A-Z][A-Z0-9]{2}[0-9])$/,
+ PubChem: /^\d+$/,
+ MeSH: /^D\d{6}$/,
+};
+
+const CRITICAL_FINDINGS = new Set([
+ 'invalid-accession-format',
+ 'duplicate-canonical-target',
+]);
+
+function asArray(value) {
+ return Array.isArray(value) ? value : [];
+}
+
+function addFinding(findings, finding) {
+ findings.push({
+ severity: finding.severity || 'major',
+ ...finding,
+ });
+}
+
+function isValidFormat(node) {
+ const pattern = FORMAT_PATTERNS[node.namespace];
+ if (!pattern) return false;
+ return pattern.test(String(node.accession || ''));
+}
+
+function evaluateNode(node, findings) {
+ const hasValidFormat = isValidFormat(node);
+ if (!hasValidFormat) {
+ addFinding(findings, {
+ type: 'invalid-accession-format',
+ severity: 'critical',
+ nodeId: node.id,
+ namespace: node.namespace,
+ accession: node.accession,
+ message: `${node.namespace || 'unknown namespace'} accession ${node.accession || ''} does not match the namespace format.`,
+ action: 'fix_accession_namespace',
+ });
+ }
+
+ if (node.deprecatedAlias) {
+ addFinding(findings, {
+ type: 'deprecated-accession-alias',
+ severity: 'major',
+ nodeId: node.id,
+ namespace: node.namespace,
+ accession: node.accession,
+ canonicalTarget: node.canonicalTarget,
+ message: 'Node uses a deprecated alias that should be replaced by the canonical accession before graph publication.',
+ action: 'replace_deprecated_alias',
+ });
+ }
+
+ if (node.expectedTaxon && node.taxon && String(node.expectedTaxon) !== String(node.taxon)) {
+ addFinding(findings, {
+ type: 'taxon-mismatch',
+ severity: 'major',
+ nodeId: node.id,
+ namespace: node.namespace,
+ accession: node.accession,
+ expectedTaxon: node.expectedTaxon,
+ taxon: node.taxon,
+ message: `Taxon ${node.taxon} does not match expected taxon ${node.expectedTaxon}.`,
+ action: 'resolve_taxon_crosswalk',
+ });
+ }
+
+ if (asArray(node.evidenceDois).length === 0) {
+ addFinding(findings, {
+ type: 'missing-doi-evidence',
+ severity: 'major',
+ nodeId: node.id,
+ namespace: node.namespace,
+ accession: node.accession,
+ message: 'Crosswalk lacks DOI-backed evidence for the entity-page edge.',
+ action: 'attach_doi_evidence',
+ });
+ }
+
+ if (hasValidFormat && Number(node.crosswalkConfidence || 0) < 0.8) {
+ addFinding(findings, {
+ type: 'low-crosswalk-confidence',
+ severity: 'major',
+ nodeId: node.id,
+ namespace: node.namespace,
+ accession: node.accession,
+ confidence: Number(node.crosswalkConfidence || 0),
+ message: 'Crosswalk confidence is below the safe recommendation threshold.',
+ action: 'raise_crosswalk_confidence',
+ });
+ }
+}
+
+function evaluateDuplicates(nodes, findings) {
+ const byTarget = new Map();
+ for (const node of nodes) {
+ if (!node.canonicalTarget) continue;
+ const list = byTarget.get(node.canonicalTarget) || [];
+ list.push(node);
+ byTarget.set(node.canonicalTarget, list);
+ }
+
+ for (const [canonicalTarget, list] of byTarget.entries()) {
+ if (list.length < 2) continue;
+ addFinding(findings, {
+ type: 'duplicate-canonical-target',
+ severity: 'critical',
+ nodeId: list.map((node) => node.id).join(','),
+ canonicalTarget,
+ message: `Multiple graph nodes resolve to ${canonicalTarget}.`,
+ action: 'merge_duplicate_canonical_nodes',
+ });
+ }
+}
+
+function addRecommendationSuppressions(nodes, findings) {
+ const riskyNodeIds = new Set(findings.map((finding) => finding.nodeId).filter(Boolean));
+ for (const node of nodes) {
+ if (!node.usedInRecommendation || !riskyNodeIds.has(node.id)) continue;
+ addFinding(findings, {
+ type: 'unsafe-recommendation-crosswalk',
+ severity: 'major',
+ nodeId: node.id,
+ namespace: node.namespace,
+ accession: node.accession,
+ message: 'Recommendation path uses a node with unresolved crosswalk findings.',
+ action: 'suppress_unsafe_recommendation',
+ });
+ }
+}
+
+function summarize(nodes, findings) {
+ return {
+ nodeCount: nodes.length,
+ formatIssues: findings.filter((finding) => finding.type === 'invalid-accession-format').length,
+ duplicateTargets: findings.filter((finding) => finding.type === 'duplicate-canonical-target').length,
+ aliasIssues: findings.filter((finding) => finding.type === 'deprecated-accession-alias').length,
+ taxonIssues: findings.filter((finding) => finding.type === 'taxon-mismatch').length,
+ evidenceIssues: findings.filter((finding) => finding.type === 'missing-doi-evidence' || finding.type === 'low-crosswalk-confidence').length,
+ recommendationSuppressions: findings.filter((finding) => finding.type === 'unsafe-recommendation-crosswalk').length,
+ };
+}
+
+function chooseDecision(findings) {
+ if (findings.some((finding) => CRITICAL_FINDINGS.has(finding.type))) {
+ return 'hold-for-curation';
+ }
+ if (findings.length > 0) {
+ return 'needs-curator-review';
+ }
+ return 'approved';
+}
+
+function calculateReadinessScore(findings) {
+ const score = findings.reduce((total, finding) => {
+ if (finding.severity === 'critical') return total - 25;
+ if (finding.severity === 'major') return total - 15;
+ return total - 8;
+ }, 100);
+ return Math.max(0, score);
+}
+
+function actionMessage(finding) {
+ const messages = {
+ fix_accession_namespace: 'Correct the accession namespace or replace the node with a valid identifier.',
+ merge_duplicate_canonical_nodes: 'Merge duplicate canonical nodes and preserve edge provenance.',
+ replace_deprecated_alias: 'Replace deprecated aliases with the current canonical accession.',
+ resolve_taxon_crosswalk: 'Resolve taxon mismatch before recommendations use this node.',
+ attach_doi_evidence: 'Attach DOI-backed evidence for the crosswalk edge.',
+ raise_crosswalk_confidence: 'Add stronger evidence or curator approval to raise crosswalk confidence.',
+ suppress_unsafe_recommendation: 'Suppress recommendations using unresolved crosswalk nodes.',
+ };
+ return messages[finding.action] || finding.message;
+}
+
+function buildRequiredActions(findings) {
+ const seen = new Set();
+ return findings
+ .map((finding) => ({
+ type: finding.action,
+ nodeId: finding.nodeId,
+ findingType: finding.type,
+ message: actionMessage(finding),
+ }))
+ .filter((action) => {
+ const key = `${action.type}:${action.nodeId}:${action.findingType}`;
+ if (seen.has(key)) return false;
+ seen.add(key);
+ return true;
+ });
+}
+
+function buildDigest(input) {
+ return crypto.createHash('sha256').update(JSON.stringify(input)).digest('hex').slice(0, 16);
+}
+
+function evaluateBiologicalAccessionCrosswalk(packet) {
+ const nodes = asArray(packet.nodes);
+ const findings = [];
+ for (const node of nodes) {
+ evaluateNode(node, findings);
+ }
+ evaluateDuplicates(nodes, findings);
+ addRecommendationSuppressions(nodes, findings);
+
+ const summary = summarize(nodes, findings);
+ const decision = chooseDecision(findings);
+ const readinessScore = decision === 'approved' ? 100 : calculateReadinessScore(findings);
+
+ return {
+ graphId: packet.graphId || 'unknown-graph',
+ generatedAt: packet.generatedAt || new Date().toISOString(),
+ decision,
+ readinessScore,
+ summary,
+ findings,
+ requiredActions: buildRequiredActions(findings),
+ auditDigest: buildDigest({nodes, findings, summary, decision}),
+ };
+}
+
+function buildReviewerPacket(result) {
+ const lines = [
+ '# Biological Accession Crosswalk Guard Report',
+ '',
+ `Graph: ${result.graphId}`,
+ `Generated: ${result.generatedAt}`,
+ `Decision: ${result.decision}`,
+ `Readiness score: ${result.readinessScore}`,
+ `Findings: ${result.findings.length}`,
+ `Audit digest: ${result.auditDigest}`,
+ '',
+ '## Summary',
+ '',
+ `- Nodes reviewed: ${result.summary.nodeCount}`,
+ `- Format issues: ${result.summary.formatIssues}`,
+ `- Duplicate targets: ${result.summary.duplicateTargets}`,
+ `- Alias issues: ${result.summary.aliasIssues}`,
+ `- Taxon issues: ${result.summary.taxonIssues}`,
+ `- Evidence issues: ${result.summary.evidenceIssues}`,
+ `- Recommendation suppressions: ${result.summary.recommendationSuppressions}`,
+ '',
+ '## Findings',
+ '',
+ ];
+
+ if (result.findings.length === 0) {
+ lines.push('- No biological accession crosswalk findings.');
+ } else {
+ for (const finding of result.findings) {
+ lines.push(`- ${finding.severity.toUpperCase()} ${finding.type} for ${finding.nodeId}: ${finding.message}`);
+ }
+ }
+
+ lines.push('', '## Required Actions', '');
+ if (result.requiredActions.length === 0) {
+ lines.push('- No curator action required.');
+ } else {
+ for (const action of result.requiredActions) {
+ lines.push(`- ${action.type} (${action.nodeId}): ${action.message}`);
+ }
+ }
+
+ return `${lines.join('\n')}\n`;
+}
+
+module.exports = {
+ evaluateBiologicalAccessionCrosswalk,
+ buildReviewerPacket,
+};
diff --git a/biological-accession-crosswalk-guard/package.json b/biological-accession-crosswalk-guard/package.json
new file mode 100644
index 00000000..31a488e8
--- /dev/null
+++ b/biological-accession-crosswalk-guard/package.json
@@ -0,0 +1,12 @@
+{
+ "name": "biological-accession-crosswalk-guard",
+ "version": "1.0.0",
+ "private": true,
+ "description": "Dependency-free biological accession crosswalk guard for SCIBASE issue #17.",
+ "scripts": {
+ "check": "node --check index.js && node --check sample-data.js && node --check demo.js && node --check render-video.js && node --check test.js",
+ "test": "node --test test.js",
+ "demo": "node demo.js",
+ "video": "node render-video.js"
+ }
+}
diff --git a/biological-accession-crosswalk-guard/render-video.js b/biological-accession-crosswalk-guard/render-video.js
new file mode 100644
index 00000000..6d5ab88c
--- /dev/null
+++ b/biological-accession-crosswalk-guard/render-video.js
@@ -0,0 +1,59 @@
+const fs = require('node:fs');
+const path = require('node:path');
+const {spawnSync} = require('node:child_process');
+
+const {evaluateBiologicalAccessionCrosswalk} = require('./index');
+const {samplePacket} = require('./sample-data');
+
+const REPORT_DIR = path.join(__dirname, 'reports');
+
+function resolveFfmpeg() {
+ if (process.env.FFMPEG_PATH) return process.env.FFMPEG_PATH;
+ const candidate = path.resolve(__dirname, '..', '..', '..', 'node_modules', 'ffmpeg-static', 'ffmpeg.exe');
+ if (fs.existsSync(candidate)) return candidate;
+ return 'ffmpeg';
+}
+
+function main() {
+ fs.mkdirSync(REPORT_DIR, {recursive: true});
+ const result = evaluateBiologicalAccessionCrosswalk(samplePacket);
+ const outPath = path.join(REPORT_DIR, 'demo.mp4');
+ const ffmpeg = resolveFfmpeg();
+ const scoreWidth = Math.max(24, Math.min(820, Math.round(result.readinessScore * 8.2)));
+ const findingWidth = Math.max(24, Math.min(820, result.findings.length * 76));
+ const actionWidth = Math.max(24, Math.min(820, result.requiredActions.length * 82));
+ const filters = [
+ 'drawbox=x=52:y=52:w=1176:h=616:color=white@0.13:t=fill',
+ 'drawbox=x=76:y=76:w=1128:h=568:color=white@0.08:t=fill',
+ 'drawbox=x=110:y=168:w=820:h=44:color=white@0.28:t=fill',
+ `drawbox=x=110:y=168:w=${scoreWidth}:h=44:color=0x2f855a@1:t=fill`,
+ 'drawbox=x=110:y=286:w=820:h=44:color=white@0.28:t=fill',
+ `drawbox=x=110:y=286:w=${findingWidth}:h=44:color=0xc2410c@1:t=fill`,
+ 'drawbox=x=110:y=404:w=820:h=44:color=white@0.28:t=fill',
+ `drawbox=x=110:y=404:w=${actionWidth}:h=44:color=0x1d4ed8@1:t=fill`,
+ 'drawbox=x=984:y=168:w=140:h=44:color=0x2f855a@1:t=fill',
+ 'drawbox=x=984:y=286:w=140:h=44:color=0xc2410c@1:t=fill',
+ 'drawbox=x=984:y=404:w=140:h=44:color=0x1d4ed8@1:t=fill',
+ 'drawbox=x=110:y=548:w=560:h=38:color=white@0.22:t=fill',
+ 'drawbox=x=110:y=548:w=470:h=38:color=0xf9ab00@1:t=fill',
+ ].join(',');
+
+ const resultProcess = spawnSync(ffmpeg, [
+ '-y',
+ '-f', 'lavfi',
+ '-i', 'color=c=0x12333c:s=1280x720:d=4:r=25',
+ '-vf', filters,
+ '-c:v', 'libx264',
+ '-pix_fmt', 'yuv420p',
+ '-movflags', '+faststart',
+ outPath,
+ ], {stdio: 'inherit'});
+ if (resultProcess.status !== 0) {
+ throw new Error(`ffmpeg exited with ${resultProcess.status}`);
+ }
+ console.log(outPath);
+}
+
+if (require.main === module) {
+ main();
+}
diff --git a/biological-accession-crosswalk-guard/reports/accession-crosswalk-packet.json b/biological-accession-crosswalk-guard/reports/accession-crosswalk-packet.json
new file mode 100644
index 00000000..bb790834
--- /dev/null
+++ b/biological-accession-crosswalk-guard/reports/accession-crosswalk-packet.json
@@ -0,0 +1,144 @@
+{
+ "graphId": "kg-biological-accession-demo",
+ "generatedAt": "2026-05-23T07:00:00Z",
+ "decision": "hold-for-curation",
+ "readinessScore": 0,
+ "summary": {
+ "nodeCount": 5,
+ "formatIssues": 1,
+ "duplicateTargets": 1,
+ "aliasIssues": 2,
+ "taxonIssues": 1,
+ "evidenceIssues": 2,
+ "recommendationSuppressions": 1
+ },
+ "findings": [
+ {
+ "severity": "critical",
+ "type": "invalid-accession-format",
+ "nodeId": "node-gene-p53-alias",
+ "namespace": "NCBIGene",
+ "accession": "TP53",
+ "message": "NCBIGene accession TP53 does not match the namespace format.",
+ "action": "fix_accession_namespace"
+ },
+ {
+ "severity": "major",
+ "type": "deprecated-accession-alias",
+ "nodeId": "node-gene-p53-alias",
+ "namespace": "NCBIGene",
+ "accession": "TP53",
+ "canonicalTarget": "NCBIGene:7157",
+ "message": "Node uses a deprecated alias that should be replaced by the canonical accession before graph publication.",
+ "action": "replace_deprecated_alias"
+ },
+ {
+ "severity": "major",
+ "type": "deprecated-accession-alias",
+ "nodeId": "node-protein-old",
+ "namespace": "UniProtKB",
+ "accession": "Q9Y261",
+ "canonicalTarget": "UniProtKB:Q9Y261",
+ "message": "Node uses a deprecated alias that should be replaced by the canonical accession before graph publication.",
+ "action": "replace_deprecated_alias"
+ },
+ {
+ "severity": "major",
+ "type": "taxon-mismatch",
+ "nodeId": "node-protein-old",
+ "namespace": "UniProtKB",
+ "accession": "Q9Y261",
+ "expectedTaxon": "9606",
+ "taxon": "10090",
+ "message": "Taxon 10090 does not match expected taxon 9606.",
+ "action": "resolve_taxon_crosswalk"
+ },
+ {
+ "severity": "major",
+ "type": "missing-doi-evidence",
+ "nodeId": "node-protein-old",
+ "namespace": "UniProtKB",
+ "accession": "Q9Y261",
+ "message": "Crosswalk lacks DOI-backed evidence for the entity-page edge.",
+ "action": "attach_doi_evidence"
+ },
+ {
+ "severity": "major",
+ "type": "low-crosswalk-confidence",
+ "nodeId": "node-protein-old",
+ "namespace": "UniProtKB",
+ "accession": "Q9Y261",
+ "confidence": 0.61,
+ "message": "Crosswalk confidence is below the safe recommendation threshold.",
+ "action": "raise_crosswalk_confidence"
+ },
+ {
+ "severity": "critical",
+ "type": "duplicate-canonical-target",
+ "nodeId": "node-gene-tp53,node-gene-p53-alias",
+ "canonicalTarget": "NCBIGene:7157",
+ "message": "Multiple graph nodes resolve to NCBIGene:7157.",
+ "action": "merge_duplicate_canonical_nodes"
+ },
+ {
+ "severity": "major",
+ "type": "unsafe-recommendation-crosswalk",
+ "nodeId": "node-gene-p53-alias",
+ "namespace": "NCBIGene",
+ "accession": "TP53",
+ "message": "Recommendation path uses a node with unresolved crosswalk findings.",
+ "action": "suppress_unsafe_recommendation"
+ }
+ ],
+ "requiredActions": [
+ {
+ "type": "fix_accession_namespace",
+ "nodeId": "node-gene-p53-alias",
+ "findingType": "invalid-accession-format",
+ "message": "Correct the accession namespace or replace the node with a valid identifier."
+ },
+ {
+ "type": "replace_deprecated_alias",
+ "nodeId": "node-gene-p53-alias",
+ "findingType": "deprecated-accession-alias",
+ "message": "Replace deprecated aliases with the current canonical accession."
+ },
+ {
+ "type": "replace_deprecated_alias",
+ "nodeId": "node-protein-old",
+ "findingType": "deprecated-accession-alias",
+ "message": "Replace deprecated aliases with the current canonical accession."
+ },
+ {
+ "type": "resolve_taxon_crosswalk",
+ "nodeId": "node-protein-old",
+ "findingType": "taxon-mismatch",
+ "message": "Resolve taxon mismatch before recommendations use this node."
+ },
+ {
+ "type": "attach_doi_evidence",
+ "nodeId": "node-protein-old",
+ "findingType": "missing-doi-evidence",
+ "message": "Attach DOI-backed evidence for the crosswalk edge."
+ },
+ {
+ "type": "raise_crosswalk_confidence",
+ "nodeId": "node-protein-old",
+ "findingType": "low-crosswalk-confidence",
+ "message": "Add stronger evidence or curator approval to raise crosswalk confidence."
+ },
+ {
+ "type": "merge_duplicate_canonical_nodes",
+ "nodeId": "node-gene-tp53,node-gene-p53-alias",
+ "findingType": "duplicate-canonical-target",
+ "message": "Merge duplicate canonical nodes and preserve edge provenance."
+ },
+ {
+ "type": "suppress_unsafe_recommendation",
+ "nodeId": "node-gene-p53-alias",
+ "findingType": "unsafe-recommendation-crosswalk",
+ "message": "Suppress recommendations using unresolved crosswalk nodes."
+ }
+ ],
+ "auditDigest": "08f1b33b690fc649"
+}
diff --git a/biological-accession-crosswalk-guard/reports/accession-crosswalk-report.md b/biological-accession-crosswalk-guard/reports/accession-crosswalk-report.md
new file mode 100644
index 00000000..5b68885e
--- /dev/null
+++ b/biological-accession-crosswalk-guard/reports/accession-crosswalk-report.md
@@ -0,0 +1,40 @@
+# Biological Accession Crosswalk Guard Report
+
+Graph: kg-biological-accession-demo
+Generated: 2026-05-23T07:00:00Z
+Decision: hold-for-curation
+Readiness score: 0
+Findings: 8
+Audit digest: 08f1b33b690fc649
+
+## Summary
+
+- Nodes reviewed: 5
+- Format issues: 1
+- Duplicate targets: 1
+- Alias issues: 2
+- Taxon issues: 1
+- Evidence issues: 2
+- Recommendation suppressions: 1
+
+## Findings
+
+- CRITICAL invalid-accession-format for node-gene-p53-alias: NCBIGene accession TP53 does not match the namespace format.
+- MAJOR deprecated-accession-alias for node-gene-p53-alias: Node uses a deprecated alias that should be replaced by the canonical accession before graph publication.
+- MAJOR deprecated-accession-alias for node-protein-old: Node uses a deprecated alias that should be replaced by the canonical accession before graph publication.
+- MAJOR taxon-mismatch for node-protein-old: Taxon 10090 does not match expected taxon 9606.
+- MAJOR missing-doi-evidence for node-protein-old: Crosswalk lacks DOI-backed evidence for the entity-page edge.
+- MAJOR low-crosswalk-confidence for node-protein-old: Crosswalk confidence is below the safe recommendation threshold.
+- CRITICAL duplicate-canonical-target for node-gene-tp53,node-gene-p53-alias: Multiple graph nodes resolve to NCBIGene:7157.
+- MAJOR unsafe-recommendation-crosswalk for node-gene-p53-alias: Recommendation path uses a node with unresolved crosswalk findings.
+
+## Required Actions
+
+- fix_accession_namespace (node-gene-p53-alias): Correct the accession namespace or replace the node with a valid identifier.
+- replace_deprecated_alias (node-gene-p53-alias): Replace deprecated aliases with the current canonical accession.
+- replace_deprecated_alias (node-protein-old): Replace deprecated aliases with the current canonical accession.
+- resolve_taxon_crosswalk (node-protein-old): Resolve taxon mismatch before recommendations use this node.
+- attach_doi_evidence (node-protein-old): Attach DOI-backed evidence for the crosswalk edge.
+- raise_crosswalk_confidence (node-protein-old): Add stronger evidence or curator approval to raise crosswalk confidence.
+- merge_duplicate_canonical_nodes (node-gene-tp53,node-gene-p53-alias): Merge duplicate canonical nodes and preserve edge provenance.
+- suppress_unsafe_recommendation (node-gene-p53-alias): Suppress recommendations using unresolved crosswalk nodes.
diff --git a/biological-accession-crosswalk-guard/reports/demo.mp4 b/biological-accession-crosswalk-guard/reports/demo.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..db53158372327b875487d6bdde69bf6f02971801
GIT binary patch
literal 8372
zcmeHMd011&7QaapD2SlA6;ZB=ibx;{k*x-0Q`~SX;DUskQUz+d*XD*@-kHl0gOW`^0GExPk8dKuZbaK<8^IZRdg@nXk~I
zL5)gxoBEF4Xxc$Qq6Zb)z?mTL&T?^MyRcatTrL&FbKJQGrv5#oy&Y-~kn*RAF#GoF
zm~9l)S(hfy>3xD`jbRK7y%_j|58sZYVJ4Ego99O6{n65bw5`>IRt+@*f}Q~aO4H$G
z_GAo04>m+g=~nCk653cF;5hJIKjJ>R^e?rftb($m|5`VHUU
zk2n4?_yKK?|E2w5hUNpCUBBP$;OwCDfl9~O!QfmWnsxwq%k&(>3;NB@0Lg@B3_PPd
z9ux!||Bu@)FUdcq6*sIB-j82(=nr
z3eO8uYPi-$AEH#q;4l%PS4NuxgU~OA(9d><(9eRlnOH-@oi!vw0aCD&vQ2uow`W!b
z-u>bBrJ$T0&KL1ZqkeBd_XEzvg(?k+vw_sOu{}7rkS+A`a$^G{$OQykXUz$m%H-m{
zq5dE#B89;4SE&=^q?p1vES4LS!{WF>r-Y)^-mb23ad9r_##G4(rHe`v<%&jekx&Xb
z$f(qmRHf8<<3d6}2zhKAZbhCOE+PeTl~BfGd$YV*IH4ru30jiJig)v7#k1LNxPs(K
z$aq{EBLK<+S8EeM72bS}h{tweffBrMg*2WN@sUL~Xz(?JGK%D}-EpBrqf!ujP-Wwk
zhLp>tTA)1RJw-wa2w}8>#{vk1Xqie$@;Gc~HX9ccT8gjM%A{%}VK5M_=BvbFElKg1
z9GsG9Kts#pz)h=C$p{Iw_??|@xK=I|0+`MY7OvFj2N6mY1ciV|l@zIw6Cecr0(p#v
zNZ<=q3N=ANTL?aaj7h2l6CkJ|kc?PEC`c`MOTbqr04){qI6xC3LXD;&;0vUL77ZfR
zkot+mkA1q~j@&4qZGg}5+m?4U`^L${#70xaluv6Y-
z<6)QGX1ZcRB+6Mjf3!J@KMePLRM$%)*x_jYq?#{4*EVC$}+G|{$xx`M~S
zc0=P7?W*6EuKF2e_*uqj>h9%X^vGM+ym5>%_PXE1XZK2+n){}$;Ve5;ZJqIp<0mUD
zG8|1DDp&f4+aCG$m#Yf`=BMplqMr;lJ>9t|D8t)t|8coZOXx=DynWLiZ)QF0KXd)D
zp97`upZwT2BXpkg{jIm1#KD6Hj!3vUTp${!5YE2L>;`H`woD=7P-Zam9|xx(!VX2`on6)XTd1qRXL|
zMp?cewx6_1^OZi^yq5mn(bYZ}En|Ak?uAEXvvE##{+{3ql@Ifr0>(Hsj#`$NBU$yN
zyrzDbZ1$zb1<4zy*=jciZGDt%Jt}a2@fvUlefHgXSxaI*Q)GX-q{+TQ;dQYo{pQG>
zW;%XJ=C=-0Ey`Gx6-zAUE(*B0HtDp@W6iA@CqA&w|0J9-v@|xitkL$1=}z0@#|JK0
zd84_OzGD1@L$p1&D+j;roRGfTyWpJ&D~E7~$)p#HFGkwbv
zS?g2Yrtj82NZ;xiv+&H;fGt*+`~@=M}WN3L_#)rG?9jnexq(K_AZJweuEY
zzvG$b9m{RFabXST=J87X?WQzdNDV$Nv&!9-5~=w7=+e*;4{XY)-^>S@p1Dy!=Fz9u
zYSX`)Q?l*{(`Uanh}_mnZAXU<-CutAR>JUH)sxDNNe!Z{*WN3rE~;nA1e-YI>cr7E4Tt#YTToVH)8&@9fBh
zwO3AEyr)j0-pYEMGDCX*WEC@W*eK_sU(Mx9=`0ni=KRmi`2{tOm(`fLSMq`U5Bg~D
z*Os63PgBh=<4Haoy=2kTv~6Y)B?tKRsi*fVDf<;U{pxeR&ROVj`-4S=7rvQRS$Gm>
z^xYnP%j+K(L<1etO>}^WHt0ujGq)FgZJWpTx!*JwScC@LiogSl|2eB7Xjd=u`om>a
zzUxd#7pqp&Lzas?+X@c{7WlqZz1ix(N?D}I$Em?#+hSL>*sYTF+V&ZfUEj<5PQLS_
zn2}#CKSbPZlX2w%Q{!d5jzkVS-Q*roIjN<>bnm7r`A--YGt_GwuMWAS*Ozq6XM+VF
zG7$!58u9rYOZcX>{QPu-{kB|xoqTN4`cnsQ>79ps+VA+JgQuEP&(&B7b+LJS6?av&
zr*2HO*fL!Z4|{9;SI!fkxjo$;Sx9SY`0>t>clOx!L1t+M3)}g76A_7i{pVsBHqLjq
zgWNy2?a+?SZUgiAn;kZ6-~8QWIrbnt(l^B~J_$a0pL4;epit2nRI$>PAT@%PLQsCW
zf0F91F{aueBwX5)&~*@E`Rj1OTRz-XqFt1IjEwDK`dXy6etOiJ${0&jt=O3gz7%Q6d0An`b1-=pIoTe5unKgEHP&l-+!}<5egV^^2)H
z)+5ReU*`0P5(%+<_k3?G%5J{A{3?_y4Je=Vh_c!s)M^aMYGYA$`7&krt57ED0i^JZ
zK$$Yh2$Z`Wj78by%M`y?p)~mNZ;qEnMWSxLOg0kbn|Rsn-s`y0*nhrz-o(r9H$Bi}
zP#}Ov^d?@uiI;zIdj5Oy(#eotzA9caup#K*M*7UsxSZ{8ij;uQn9LIJp}$eS{+tr<
z8Iw~!_uuP|j!wQtWs}eS_qy{)z-LTc?DF4h&L9C_X9lTiZ%gm^o`1=Q9Shg*KizB2
z9RXiw?ikhHmSX)9WQObL1j@7Ee7xqY5k9U*b!3fU?QNcu{|X=dvH_kiBx2)V_;9_F
U?fqPhhW>31UweWc^VRBq0gu;|yZ`_I
literal 0
HcmV?d00001
diff --git a/biological-accession-crosswalk-guard/reports/summary.svg b/biological-accession-crosswalk-guard/reports/summary.svg
new file mode 100644
index 00000000..605eab39
--- /dev/null
+++ b/biological-accession-crosswalk-guard/reports/summary.svg
@@ -0,0 +1,31 @@
+
\ No newline at end of file
diff --git a/biological-accession-crosswalk-guard/sample-data.js b/biological-accession-crosswalk-guard/sample-data.js
new file mode 100644
index 00000000..7ff6c9e8
--- /dev/null
+++ b/biological-accession-crosswalk-guard/sample-data.js
@@ -0,0 +1,59 @@
+const samplePacket = {
+ graphId: 'kg-biological-accession-demo',
+ generatedAt: '2026-05-23T07:00:00Z',
+ nodes: [
+ {
+ id: 'node-gene-tp53',
+ namespace: 'NCBIGene',
+ accession: '7157',
+ canonicalTarget: 'NCBIGene:7157',
+ taxon: '9606',
+ expectedTaxon: '9606',
+ evidenceDois: ['10.1016/j.cell.2026.01.001'],
+ crosswalkConfidence: 0.98,
+ },
+ {
+ id: 'node-gene-p53-alias',
+ namespace: 'NCBIGene',
+ accession: 'TP53',
+ canonicalTarget: 'NCBIGene:7157',
+ taxon: '9606',
+ expectedTaxon: '9606',
+ deprecatedAlias: true,
+ evidenceDois: ['10.1016/j.cell.2026.01.001'],
+ crosswalkConfidence: 0.72,
+ usedInRecommendation: true,
+ },
+ {
+ id: 'node-protein-old',
+ namespace: 'UniProtKB',
+ accession: 'Q9Y261',
+ canonicalTarget: 'UniProtKB:Q9Y261',
+ taxon: '10090',
+ expectedTaxon: '9606',
+ deprecatedAlias: true,
+ evidenceDois: [],
+ crosswalkConfidence: 0.61,
+ },
+ {
+ id: 'node-compound-aspirin',
+ namespace: 'PubChem',
+ accession: '2244',
+ canonicalTarget: 'PubChem:2244',
+ evidenceDois: ['10.1038/s41586-026-0001-2'],
+ crosswalkConfidence: 0.97,
+ },
+ {
+ id: 'node-mesh-breast-neoplasms',
+ namespace: 'MeSH',
+ accession: 'D001943',
+ canonicalTarget: 'MeSH:D001943',
+ evidenceDois: ['10.1126/science.2026.0007'],
+ crosswalkConfidence: 0.95,
+ },
+ ],
+};
+
+module.exports = {
+ samplePacket,
+};
diff --git a/biological-accession-crosswalk-guard/test.js b/biological-accession-crosswalk-guard/test.js
new file mode 100644
index 00000000..aac9628f
--- /dev/null
+++ b/biological-accession-crosswalk-guard/test.js
@@ -0,0 +1,155 @@
+const test = require('node:test');
+const assert = require('node:assert/strict');
+
+const {
+ evaluateBiologicalAccessionCrosswalk,
+ buildReviewerPacket,
+} = require('./index');
+
+test('holds malformed accessions and duplicate canonical targets', () => {
+ const result = evaluateBiologicalAccessionCrosswalk({
+ graphId: 'kg-bio-crosswalk-risk',
+ generatedAt: '2026-05-23T07:00:00Z',
+ nodes: [
+ {
+ id: 'node-gene-tp53',
+ namespace: 'NCBIGene',
+ accession: '7157',
+ canonicalTarget: 'NCBIGene:7157',
+ taxon: '9606',
+ evidenceDois: ['10.1016/j.cell.2026.01.001'],
+ crosswalkConfidence: 0.98,
+ },
+ {
+ id: 'node-gene-p53-alias',
+ namespace: 'NCBIGene',
+ accession: 'TP53',
+ canonicalTarget: 'NCBIGene:7157',
+ taxon: '9606',
+ evidenceDois: ['10.1016/j.cell.2026.01.001'],
+ crosswalkConfidence: 0.72,
+ },
+ ],
+ });
+
+ assert.equal(result.decision, 'hold-for-curation');
+ assert.equal(result.summary.nodeCount, 2);
+ assert.equal(result.summary.formatIssues, 1);
+ assert.equal(result.summary.duplicateTargets, 1);
+ assert.deepEqual(
+ result.findings.map((finding) => finding.type),
+ ['invalid-accession-format', 'duplicate-canonical-target']
+ );
+ assert.equal(result.requiredActions[0].type, 'fix_accession_namespace');
+});
+
+test('requires review for deprecated aliases, taxon mismatch, and weak evidence', () => {
+ const result = evaluateBiologicalAccessionCrosswalk({
+ graphId: 'kg-bio-crosswalk-review',
+ generatedAt: '2026-05-23T07:00:00Z',
+ nodes: [
+ {
+ id: 'node-protein-old',
+ namespace: 'UniProtKB',
+ accession: 'Q9Y261',
+ canonicalTarget: 'UniProtKB:Q9Y261',
+ taxon: '10090',
+ expectedTaxon: '9606',
+ deprecatedAlias: true,
+ evidenceDois: [],
+ crosswalkConfidence: 0.61,
+ },
+ ],
+ });
+
+ assert.equal(result.decision, 'needs-curator-review');
+ assert.equal(result.summary.aliasIssues, 1);
+ assert.equal(result.summary.taxonIssues, 1);
+ assert.equal(result.summary.evidenceIssues, 2);
+ assert.deepEqual(
+ result.findings.map((finding) => finding.type),
+ ['deprecated-accession-alias', 'taxon-mismatch', 'missing-doi-evidence', 'low-crosswalk-confidence']
+ );
+ assert.equal(result.requiredActions.at(-1).type, 'raise_crosswalk_confidence');
+});
+
+test('suppresses unsafe recommendations with unresolved crosswalk findings', () => {
+ const result = evaluateBiologicalAccessionCrosswalk({
+ graphId: 'kg-recommendation-risk',
+ generatedAt: '2026-05-23T07:00:00Z',
+ nodes: [
+ {
+ id: 'node-compound-aspirin',
+ namespace: 'PubChem',
+ accession: '2244',
+ canonicalTarget: 'PubChem:2244',
+ taxon: null,
+ evidenceDois: ['10.1038/s41586-026-0001-2'],
+ crosswalkConfidence: 0.97,
+ },
+ {
+ id: 'node-mesh-bad',
+ namespace: 'MeSH',
+ accession: 'bad-mesh',
+ canonicalTarget: 'MeSH:D001241',
+ taxon: null,
+ evidenceDois: ['10.1038/s41586-026-0001-2'],
+ crosswalkConfidence: 0.93,
+ usedInRecommendation: true,
+ },
+ ],
+ });
+
+ assert.equal(result.decision, 'hold-for-curation');
+ assert.equal(result.summary.recommendationSuppressions, 1);
+ assert.equal(result.findings[0].type, 'invalid-accession-format');
+ assert.equal(result.requiredActions.at(-1).type, 'suppress_unsafe_recommendation');
+});
+
+test('approves clean crosswalks and builds deterministic reviewer packet', () => {
+ const result = evaluateBiologicalAccessionCrosswalk({
+ graphId: 'kg-ready-crosswalk',
+ generatedAt: '2026-05-23T07:00:00Z',
+ nodes: [
+ {
+ id: 'node-gene-brca1',
+ namespace: 'NCBIGene',
+ accession: '672',
+ canonicalTarget: 'NCBIGene:672',
+ taxon: '9606',
+ expectedTaxon: '9606',
+ evidenceDois: ['10.1126/science.2026.0007'],
+ crosswalkConfidence: 0.99,
+ },
+ {
+ id: 'node-protein-brca1',
+ namespace: 'UniProtKB',
+ accession: 'P38398',
+ canonicalTarget: 'UniProtKB:P38398',
+ taxon: '9606',
+ expectedTaxon: '9606',
+ evidenceDois: ['10.1126/science.2026.0007'],
+ crosswalkConfidence: 0.97,
+ },
+ {
+ id: 'node-mesh-breast-neoplasms',
+ namespace: 'MeSH',
+ accession: 'D001943',
+ canonicalTarget: 'MeSH:D001943',
+ evidenceDois: ['10.1126/science.2026.0007'],
+ crosswalkConfidence: 0.95,
+ },
+ ],
+ });
+
+ assert.equal(result.decision, 'approved');
+ assert.equal(result.readinessScore, 100);
+ assert.equal(result.findings.length, 0);
+
+ const packet = buildReviewerPacket(result);
+ assert.match(packet, /# Biological Accession Crosswalk Guard Report/);
+ assert.match(packet, /Graph: kg-ready-crosswalk/);
+ assert.match(packet, /Decision: approved/);
+ assert.match(packet, /Readiness score: 100/);
+ assert.match(packet, /Findings: 0/);
+});