From 9d26183c2cf31347874795e3001be80576539ba2 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 22 May 2026 15:30:39 +0500 Subject: [PATCH 001/318] chore(release): prepare 2.1.0 alpha metadata --- .gitignore | 2 ++ pyproject.toml | 6 ++-- uv.lock | 76 +++++++++++++++++++++++++------------------------- 3 files changed, 43 insertions(+), 41 deletions(-) diff --git a/.gitignore b/.gitignore index 71bd32fd..c85cab27 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,5 @@ extensions/vscode-codeclone/node_modules /scripts/refactor_guard.sh /docs/refactoring-spec.md /smoke_cli.sh +/codeclone-v2.1-roadmap.md +/specs/ diff --git a/pyproject.toml b/pyproject.toml index 3be92df4..32acdb28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "codeclone" -version = "2.0.2" +version = "2.1.0a1" description = "A structural review layer for Python — baseline-aware, deterministic, built for CI and AI agents" readme = { file = "docs/README-pypi.md", content-type = "text/markdown" } license = "MPL-2.0 AND MIT" @@ -40,7 +40,7 @@ keywords = [ ] classifiers = [ - "Development Status :: 5 - Production/Stable", + "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Topic :: Software Development :: Quality Assurance", "Topic :: Software Development :: Testing", @@ -73,7 +73,7 @@ dev = [ "build>=1.4.3", "twine>=6.2.0", "mypy>=1.20.1", - "ruff>=0.15.13", + "ruff>=0.15.14", "pre-commit>=4.5.1", ] diff --git a/uv.lock b/uv.lock index 7421c364..46605ece 100644 --- a/uv.lock +++ b/uv.lock @@ -105,11 +105,11 @@ wheels = [ [[package]] name = "certifi" -version = "2026.4.22" +version = "2026.5.20" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/25/ee/6caf7a40c36a1220410afe15a1cc64993a1f864871f698c0f93acb72842a/certifi-2026.4.22.tar.gz", hash = "sha256:8d455352a37b71bf76a79caa83a3d6c25afee4a385d632127b6afb3963f1c580", size = 137077, upload-time = "2026-04-22T11:26:11.191Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/ce/ee2ecad540810a79593028e88299baeae54d346cc7a0d94b6199988b89b1/certifi-2026.5.20.tar.gz", hash = "sha256:69dea482ab64caa7b9f6aba1c6bf48bb6a5448d1c0f1b17ab42ad8c763a5344d", size = 135422, upload-time = "2026-05-20T11:46:50.073Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/22/30/7cd8fdcdfbc5b869528b079bfb76dcdf6056b1a2097a662e5e8c04f42965/certifi-2026.4.22-py3-none-any.whl", hash = "sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a", size = 135707, upload-time = "2026-04-22T11:26:09.372Z" }, + { url = "https://files.pythonhosted.org/packages/59/8c/57e832b7af6d7c5abe66eb3fbe3a3a32f4d11ea23a1aa7131371035be991/certifi-2026.5.20-py3-none-any.whl", hash = "sha256:3c52e209ba0a4ad7aebe60436a4ab349c39e1e602e8c134221e546902ad25897", size = 134134, upload-time = "2026-05-20T11:46:48.578Z" }, ] [[package]] @@ -310,19 +310,19 @@ wheels = [ [[package]] name = "click" -version = "8.4.0" +version = "8.4.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/23/e4/796662cd90cf80e3a363c99db2b88e0e394b988a575f60a17e16440cd011/click-8.4.0.tar.gz", hash = "sha256:638f1338fe1235c8f4e008e4a8a254fb5c5fbdcbb40ece3c9142ebb78e792973", size = 350843, upload-time = "2026-05-17T00:47:58.425Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/98/518d8e5081007684232226f475082b30087d0f585e8457db087298259f49/click-8.4.1.tar.gz", hash = "sha256:918b5633eddf6b41c32d4f454bf0de810065c74e3f7dbf8ee5452f8be88d3e96", size = 353007, upload-time = "2026-05-22T04:08:37.769Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/ae/8e92f8058baf87f6c7d86ee7e457668690195cc77efedb8d3797a06e3940/click-8.4.0-py3-none-any.whl", hash = "sha256:40c50b7c6c6adac2823d411041ec84f3f103f1b280d5e9ce0d7f998995832f81", size = 116147, upload-time = "2026-05-17T00:47:56.842Z" }, + { url = "https://files.pythonhosted.org/packages/c7/0d/67e5b4109ea4a837e80daa87c2c696711955e40449a97e8926672534def2/click-8.4.1-py3-none-any.whl", hash = "sha256:482be17c6991b8c19c5429a1e995d9b0efdbb63172824c41f99965dc0ade8ec2", size = 116639, upload-time = "2026-05-22T04:08:35.26Z" }, ] [[package]] name = "codeclone" -version = "2.0.2" +version = "2.1.0a1" source = { editable = "." } dependencies = [ { name = "orjson" }, @@ -360,7 +360,7 @@ requires-dist = [ { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.3" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=7.1.0" }, { name = "rich", specifier = ">=15.0.0" }, - { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.13" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.14" }, { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.1" }, { name = "twine", marker = "extra == 'dev'", specifier = ">=6.2.0" }, ] @@ -661,11 +661,11 @@ wheels = [ [[package]] name = "idna" -version = "3.15" +version = "3.16" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/77/7b3966d0b9d1d31a36ddf1746926a11dface89a83409bf1483f0237aa758/idna-3.15.tar.gz", hash = "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc", size = 199245, upload-time = "2026-05-12T22:45:57.011Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/88/bcf9709822fe69d02c2a6a77956c98ce6ea8ca8767a9aadcedc7eb6a2390/idna-3.16.tar.gz", hash = "sha256:d7a6da03db833450fca25d2358ac9ff06cd624577a4aea3a596d5c0f77b8e03d", size = 203770, upload-time = "2026-05-22T00:16:18.781Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340, upload-time = "2026-05-12T22:45:55.733Z" }, + { url = "https://files.pythonhosted.org/packages/94/16/70255075a9859a0e3adb789b68ceb0e210dec03934245fd98d248226572f/idna-3.16-py3-none-any.whl", hash = "sha256:cc246e3a3f89580c3a951b5ad298ca4638078b2cdd4f115654332b5c26daded5", size = 74165, upload-time = "2026-05-22T00:16:16.698Z" }, ] [[package]] @@ -1328,14 +1328,14 @@ wheels = [ [[package]] name = "pyjwt" -version = "2.12.1" +version = "2.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/81/58d0ac84e1ef3a3843791d6954d94c0b33d526c75eeb1efbce9d0a4c4077/pyjwt-2.13.0.tar.gz", hash = "sha256:41571c89ca91598c79e8ef18a2d07367d4810fbbd6f637794879baf1b7703423", size = 107515, upload-time = "2026-05-21T19:54:36.618Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" }, + { url = "https://files.pythonhosted.org/packages/a3/5e/ecf12fdb62546d64385c158514e9b2b671f7832108ef2ecd2020ce0af2d1/pyjwt-2.13.0-py3-none-any.whl", hash = "sha256:66adcc2aff09b3f1bbd95fc1e1577df8ac8723c978552fd43304c8a290ac5728", size = 31274, upload-time = "2026-05-21T19:54:35.362Z" }, ] [package.optional-dependencies] @@ -1711,27 +1711,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.13" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/24/21/a7d5c126d5b557715ef81098f3db2fe20f622a039ff2e626af28d674ab80/ruff-0.15.13.tar.gz", hash = "sha256:f9d89f17f7ba7fb2ed42921f0df75da797a9a5d71bc39049e2c687cf2baf44b7", size = 4678180, upload-time = "2026-05-14T13:44:37.869Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/61/11d458dc6ac22504fd8e237b29dfd40504c7fbbcc8930402cfe51a8e63ed/ruff-0.15.13-py3-none-linux_armv6l.whl", hash = "sha256:444b580fc72fd6887e650acd3e575e18cdc79dbcf42fb4030b491057921f61f8", size = 10738279, upload-time = "2026-05-14T13:44:18.7Z" }, - { url = "https://files.pythonhosted.org/packages/86/ca/caa871ee7be718c45256fada4e16a218ee3e33f0c4a46b729a60a24912e6/ruff-0.15.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6590d009e7cb7ebf36f83dbdd44a3fa48a0994ff6f1cdc1b08006abe58f98dc7", size = 11124798, upload-time = "2026-05-14T13:44:06.427Z" }, - { url = "https://files.pythonhosted.org/packages/d3/19/43f5f2e568dddde567fc41f8471f9432c09563e19d3e617a48cfa52f8f0a/ruff-0.15.13-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1c26d2f66163deeb6e08d8b39fbbe983ce3c71cea06a6d7591cfd1421793c629", size = 10460761, upload-time = "2026-05-14T13:44:04.375Z" }, - { url = "https://files.pythonhosted.org/packages/99/df/cf938cd6de3003178f03ad7c1ea2a6c099468c03a35037985070b37e76be/ruff-0.15.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dbd6f94b434f896308e4d57fb7bfde0d02b99f7a64b3bdab0fdfa6a864203a5", size = 10804451, upload-time = "2026-05-14T13:44:25.221Z" }, - { url = "https://files.pythonhosted.org/packages/c7/7d/5d0973129b154ded2225729169d7068f26b467760b146493fde138415f23/ruff-0.15.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bf3259f3be4d181bda591da5db2571aed6853c6a048157756448020bc6c5cd22", size = 10534285, upload-time = "2026-05-14T13:44:08.888Z" }, - { url = "https://files.pythonhosted.org/packages/1f/e3/6b999bbc66cd51e5f073842bc2a3995e99c5e0e72e16b15e7261f7abf57a/ruff-0.15.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae9c17e5eb4430c154e76abc25d79a318190f5a997f38fb6b114416c5319ffc9", size = 11312063, upload-time = "2026-05-14T13:44:11.274Z" }, - { url = "https://files.pythonhosted.org/packages/af/5a/642639e9f5db04f1e97fbd6e091c6fd20725bdf072fb114d00eefb9e6eb8/ruff-0.15.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e2e39bff6c341f4b577a21b801326fab0b11847f48fcaa83f00a113c9b3cb55", size = 12183079, upload-time = "2026-05-14T13:44:01.634Z" }, - { url = "https://files.pythonhosted.org/packages/19/4c/7585735f6b53b0f12de13618b2f7d250a844f018822efc899df2e7b8295f/ruff-0.15.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e8d9a8e08013542e94d3220bc5b62cc3e5ef87c5f74bff367d3fac14fab013e6", size = 11440833, upload-time = "2026-05-14T13:43:59.043Z" }, - { url = "https://files.pythonhosted.org/packages/e8/31/bf1a0803d077e679cfeee5f2f67290a0fa79c7385b5d9a8c17b9db2c48f0/ruff-0.15.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc411dfebe5eebe55ce041c6ae080eb7668955e866daa2fbb16692a784f1c4ca", size = 11434486, upload-time = "2026-05-14T13:44:27.761Z" }, - { url = "https://files.pythonhosted.org/packages/e1/4e/62c9b999875d4f14db80f277c030578f5e249c9852d65b7ac7ad0b43c041/ruff-0.15.13-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:768494eb08b9cee54e2fd27969966f74db5a57f6eaa7a90fcb3306af34dfc4bd", size = 11385189, upload-time = "2026-05-14T13:44:13.704Z" }, - { url = "https://files.pythonhosted.org/packages/fc/89/7e959047a104df3eb12863447c110140191fc5b6c4f379ea2e803fcdb0e4/ruff-0.15.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:fb75f9a3a7e42ffe117d734494e6c5e5cb3565d66e12612cb63d0e572a41a5b6", size = 10781380, upload-time = "2026-05-14T13:43:56.734Z" }, - { url = "https://files.pythonhosted.org/packages/ff/52/5fd18f3b88cab63e88aa11516b3b4e1e5f720e5c330f8dbe5c26210f41f8/ruff-0.15.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8cb74dd33bb2f6613faf7fc03b660053b5ac4f80e706d5788c6335e2a8048d51", size = 10540605, upload-time = "2026-05-14T13:44:20.748Z" }, - { url = "https://files.pythonhosted.org/packages/e8/e0/9e35f338990d3e41a82875ff7053ffe97541dae81c9d02143177f381d572/ruff-0.15.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:7ef823f817fcd191dc934e984be9cf4094f808effa16f2542ad8e821ba02bbf2", size = 11036554, upload-time = "2026-05-14T13:44:16.256Z" }, - { url = "https://files.pythonhosted.org/packages/c2/13/070fb048c24080fba188f66371e2a92785be257ad02242066dc7255ac6e9/ruff-0.15.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f345a13937bd7f09f6f5d19fa0721b0c103e00e7f62bc67089a8e5e037719e0b", size = 11528133, upload-time = "2026-05-14T13:44:22.808Z" }, - { url = "https://files.pythonhosted.org/packages/6b/8c/b1e1666aef7fc6555094d73ae6cd981701781ae85b97ceefc0eebd0b4668/ruff-0.15.13-py3-none-win32.whl", hash = "sha256:4044f94208b3b05ba0fc4a4abd0558cf4d6459bd18325eead7fd8cc66f909b41", size = 10721455, upload-time = "2026-05-14T13:44:35.697Z" }, - { url = "https://files.pythonhosted.org/packages/ab/a6/870a3e8a50590bb92be184ad928c2922f088b00d9dc5c5ec7b924ee08c22/ruff-0.15.13-py3-none-win_amd64.whl", hash = "sha256:7064884d442b7d477b4e7473d12da7f08851d2b1982763c5d3f388a19468a1a4", size = 11900409, upload-time = "2026-05-14T13:44:30.389Z" }, - { url = "https://files.pythonhosted.org/packages/9b/36/9c015cd052fca743dae8cb2aeb16b551444787467db42ceab0fc968865af/ruff-0.15.13-py3-none-win_arm64.whl", hash = "sha256:2471da9bd1068c8c064b5fd9c0c4b6dddffd6369cb1cd68b29993b1709ff1b21", size = 11179336, upload-time = "2026-05-14T13:44:33.026Z" }, +version = "0.15.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/8a/8bce2894573e9dae6ff4d77fe34ad727d79b9e6238ad288c5638990d90f6/ruff-0.15.14.tar.gz", hash = "sha256:48e866b165be4a9bdbf310f7d3c9a07edef2fe8cd63ffeb4e00bb590506ebf9f", size = 4700910, upload-time = "2026-05-21T14:34:55.177Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/c8/74a92c6ff9fcfb4f1f947126d3ebee8389276e161ecc85de5bda7cda51bd/ruff-0.15.14-py3-none-linux_armv6l.whl", hash = "sha256:8dd2db9416e487c8d4b01fa7056bb02c4d05969d4f8d17a08c229c2f4ff3c108", size = 10739177, upload-time = "2026-05-21T14:34:37.332Z" }, + { url = "https://files.pythonhosted.org/packages/45/91/254a35c20acc38a7223c9d2d594af12e794432464f2cdeb52af1dc4a892d/ruff-0.15.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:be4ff55af755bd71a00ab3dc6bd7ffc467bd76e0df6881e286c2e3d23e8fb43b", size = 11144969, upload-time = "2026-05-21T14:34:43.978Z" }, + { url = "https://files.pythonhosted.org/packages/56/9e/d13e40f83b8d0a94430e6778ce1d94a43b38cf2efe63278bdd2b4c65abbf/ruff-0.15.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:48d5909d7d06276ce7dde6d32bfa4b0d4cb2651145cd8ee4b440722cbc77832f", size = 10478207, upload-time = "2026-05-21T14:34:48.378Z" }, + { url = "https://files.pythonhosted.org/packages/8d/f1/b15a7839fa4f332f8acec78e20564f26bb2d866e3d21710b877fd0263000/ruff-0.15.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca8cbfa94c4f90984a67561978602746d4cd27103568f745fa90eee3f0d4107d", size = 10818459, upload-time = "2026-05-21T14:34:22.318Z" }, + { url = "https://files.pythonhosted.org/packages/45/33/53d651177f84f94b400a0e27f8824eeada3dddc9d5ee8aeb048f4352a520/ruff-0.15.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a6bbc0333f1ab053423bcbf6226477d266ca7cec7738c4c8e3f55647803f3c4", size = 10541800, upload-time = "2026-05-21T14:34:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/b8/a6/868f87e0bf9786ed24b5d0d0ad8676b8a94fd1912f42cddf9cfc7857818a/ruff-0.15.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a24a4f7605d7003a6674d4387651effd939dead3fddd0f36561eb77a9a2e542", size = 11342149, upload-time = "2026-05-21T14:34:46.365Z" }, + { url = "https://files.pythonhosted.org/packages/a7/8b/38cd5c19faffdcc05a408d2b78edccc69492ab9720eadb49ea15ef80d768/ruff-0.15.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:049b5326e53ed80978f2fc041a280603f69dd6b0c95464342a2bb4572d9d9e2f", size = 12212563, upload-time = "2026-05-21T14:34:28.579Z" }, + { url = "https://files.pythonhosted.org/packages/3e/4d/a3c5b874a556d5731e3e657aaf04311bb76f0a5c3ec220ed43051be6b64b/ruff-0.15.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4ed42e6696c8dfa5f06728e6441993901f548eb92d73bc472cb5a38d1395fbf", size = 11493299, upload-time = "2026-05-21T14:34:41.836Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c0/56472c251d09858a53e51efbd485b09e1995d8731668b76d52e5dd6ee0f1/ruff-0.15.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:715c543cf450c4888251f91c52f1942a800541d9bddd7ac060aa4e6b77ae7cba", size = 11455931, upload-time = "2026-05-21T14:34:57.276Z" }, + { url = "https://files.pythonhosted.org/packages/2c/4a/e2e7b4d8dbf233d4eace59c75bc3435fa6d8bd3bae82d351d4e4300c0fd1/ruff-0.15.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:72ebab6013ec887d439d8b7593737a0a4ffb06d45d209d4e4bf2e92813082d3f", size = 11400794, upload-time = "2026-05-21T14:34:39.773Z" }, + { url = "https://files.pythonhosted.org/packages/97/c7/83c0539fe34c3e09136204d1e75d6052492364e0b3cb05e9465423f567d7/ruff-0.15.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:49072d36abdbe97a8dd7f480afe9c675699c0c495d4c84076e2c1203c4550581", size = 10804759, upload-time = "2026-05-21T14:34:31.045Z" }, + { url = "https://files.pythonhosted.org/packages/86/a6/18f2bfc095a2ab4a78745644e428205532ce6653a5d0fa8501572891534d/ruff-0.15.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:958522aee105068640c2c2ceae08f413ae44d922f52a1374ac13d6a96032fc93", size = 10539517, upload-time = "2026-05-21T14:34:53.064Z" }, + { url = "https://files.pythonhosted.org/packages/54/3a/5a8b3b69c654d4e4bf1d246ac5b49cbcdac6eaab6905925f8915f31e3b80/ruff-0.15.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f3707da619a143a2e8830e2abab8224478d69ace2d28cb6c20543ae97c36bf61", size = 11065169, upload-time = "2026-05-21T14:34:24.484Z" }, + { url = "https://files.pythonhosted.org/packages/ed/c5/8864e4e7925b836ea354b31d57641ec03830564e281a8b6f061f8c3e0ec1/ruff-0.15.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:bb01d645694e3ec0102105d07ef2d53703970407d59c04e59d3ba0b7a1d53553", size = 11560214, upload-time = "2026-05-21T14:34:50.975Z" }, + { url = "https://files.pythonhosted.org/packages/36/38/012bf76752e1f89ed50b77b99532d90f3a3e287bc7918e1fc0948ac866ac/ruff-0.15.14-py3-none-win32.whl", hash = "sha256:6d0c1ad2a0ab718d39b6d8fd2217981ce4d625cd96a720095f798fb47d8b13e6", size = 10805548, upload-time = "2026-05-21T14:34:33.453Z" }, + { url = "https://files.pythonhosted.org/packages/d1/b7/4ea2c170f10ad760fff2a5250beb18897719dc8b52b53a24cddbb9dd3f19/ruff-0.15.14-py3-none-win_amd64.whl", hash = "sha256:802342981e056db3851a7836e5b070f8f15f67d4a685ae2a6160939d364b2902", size = 11939523, upload-time = "2026-05-21T14:34:18.077Z" }, + { url = "https://files.pythonhosted.org/packages/62/d5/bc97ff895ec35cf3925d4bd60f3b39d822f377a446906ec9bcc87405e59b/ruff-0.15.14-py3-none-win_arm64.whl", hash = "sha256:ff47b90a9ef6a40c9e2f3b479c1fb78531adf055b94c1eba0a7ba04b31951826", size = 11208607, upload-time = "2026-05-21T14:34:26.525Z" }, ] [[package]] @@ -1762,15 +1762,15 @@ wheels = [ [[package]] name = "starlette" -version = "1.0.0" +version = "1.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" } +sdist = { url = "https://files.pythonhosted.org/packages/08/a3/84e821cc54b4ab50ae6dbc6ac3800a651b65ec35f045cc73785380654057/starlette-1.0.1.tar.gz", hash = "sha256:512399c5f1de7fac99c88572212ded9ddeddef2fb32afa82d724000e88b38f4f", size = 2659596, upload-time = "2026-05-21T21:58:58.433Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/b2df4bc09a1e51ff664c1e17018a4274b42e5e9352e4a478ea540512dc88/starlette-1.0.1-py3-none-any.whl", hash = "sha256:7c0e69b2ee1c848bd54669d908500117a3ee13de603a21427e5c6fc1adf98dcd", size = 72802, upload-time = "2026-05-21T21:58:56.551Z" }, ] [[package]] From 9d3b9340af5a2cb7c711cc8b7bf200b838c08f53 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 22 May 2026 15:33:22 +0500 Subject: [PATCH 002/318] feat(mcp): add change intent and blast radius tools --- AGENTS.md | 7 +- CHANGELOG.md | 21 + codeclone/surfaces/mcp/_blast_radius.py | 624 +++++++++++ codeclone/surfaces/mcp/_intent.py | 193 ++++ .../mcp/_session_blast_radius_mixin.py | 118 +++ .../surfaces/mcp/_session_intent_mixin.py | 355 +++++++ .../surfaces/mcp/_session_state_mixin.py | 26 + codeclone/surfaces/mcp/server.py | 57 + codeclone/surfaces/mcp/service.py | 28 + codeclone/surfaces/mcp/session.py | 12 +- docs/book/20-mcp-interface.md | 9 +- docs/mcp.md | 12 +- .../contract_snapshots/mcp_tool_schemas.json | 982 ++++++++++-------- tests/test_mcp_server.py | 53 +- tests/test_mcp_service.py | 321 +++++- 15 files changed, 2391 insertions(+), 427 deletions(-) create mode 100644 codeclone/surfaces/mcp/_blast_radius.py create mode 100644 codeclone/surfaces/mcp/_intent.py create mode 100644 codeclone/surfaces/mcp/_session_blast_radius_mixin.py create mode 100644 codeclone/surfaces/mcp/_session_intent_mixin.py diff --git a/AGENTS.md b/AGENTS.md index 4b285479..4bf90480 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -66,8 +66,8 @@ Key artifacts: `codeclone-mcp` - `plugins/codeclone/` + `.agents/plugins/marketplace.json` — stable Codex plugin as a native local discovery layer over `codeclone-mcp`, with a bundled CodeClone review skill -- MCP runs are in-memory only; review markers are session-local and must never - leak into baseline/cache/report artifacts +- MCP runs are in-memory only; review markers and change intents are + session-local and must never leak into baseline/cache/report artifacts - `docs/`, `mkdocs.yml`, `.github/workflows/docs.yml` — published documentation site and docs build pipeline --- @@ -232,7 +232,8 @@ Reports come in: MCP is a separate optional interface, not a report format. It must remain a read-only agent layer over the same canonical report/baseline/cache contracts. -Session review markers are allowed only as ephemeral MCP process state. +Session review markers and change intents are allowed only as ephemeral MCP +process state. ### Report invariants diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c3ee8d6..5f5f6fe4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # Changelog +## [2.1.0a1] - 2026-05-22 + +`2.1.0a1` opens the v2.1 alpha line for structural change control. + +### Added + +- Add MCP `get_blast_radius` as a deterministic pre-change projection over the + canonical report: direct dependents, clone cohorts, dependency-cycle + membership, coverage/risk signals, and do-not-touch paths. +- Add MCP `manage_change_intent` for session-local change intent lifecycle: + declare intended scope, inspect active intent, check actual changed files + against scope, and clear intent state. + +### Internal + +- Keep intent and blast-radius cache state in MCP process memory only; they do + not mutate source files, baselines, cache artifacts, reports, or canonical + report integrity. +- Mark the package as `2.1.0a1` with the PyPI alpha classifier while v2.1 + controller features are under development. + ## [2.0.2] - 2026-05-19 `2.0.2` is a focused patch release for VS Code extension packaging metadata, diff --git a/codeclone/surfaces/mcp/_blast_radius.py b/codeclone/surfaces/mcp/_blast_radius.py new file mode 100644 index 00000000..05b31b44 --- /dev/null +++ b/codeclone/surfaces/mcp/_blast_radius.py @@ -0,0 +1,624 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections import deque +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from fnmatch import fnmatchcase +from typing import Final, Literal + +BlastRadiusDepth = Literal["direct", "transitive"] +BlastRadiusInclude = Literal[ + "imports", + "clone_cohorts", + "coverage", + "risk_signals", + "do_not_touch", + "cycles", +] + +VALID_BLAST_RADIUS_DEPTHS: Final[frozenset[str]] = frozenset({"direct", "transitive"}) +VALID_BLAST_RADIUS_INCLUDE: Final[frozenset[str]] = frozenset( + { + "imports", + "clone_cohorts", + "coverage", + "risk_signals", + "do_not_touch", + "cycles", + } +) +DEFAULT_BLAST_RADIUS_INCLUDE: Final[tuple[BlastRadiusInclude, ...]] = ( + "imports", + "clone_cohorts", + "coverage", + "risk_signals", + "do_not_touch", + "cycles", +) +DEFAULT_DO_NOT_TOUCH_PATTERNS: Final[tuple[str, ...]] = ( + "codeclone.baseline.json", + ".cache/codeclone/**", +) + + +@dataclass(frozen=True, slots=True) +class BlastRadiusResult: + run_id: str + origin: tuple[str, ...] + depth: BlastRadiusDepth + radius_level: str + direct_dependents: tuple[str, ...] + transitive_dependents: tuple[str, ...] + clone_cohort_members: tuple[str, ...] + in_dependency_cycle: tuple[str, ...] + structural_risk: dict[str, list[str]] + do_not_touch: tuple[dict[str, str], ...] + guardrails: tuple[str, ...] + + def to_payload( + self, + *, + include: Sequence[str] = DEFAULT_BLAST_RADIUS_INCLUDE, + ) -> dict[str, object]: + include_set = {str(item) for item in include} + imports_enabled = "imports" in include_set + risk_enabled = "risk_signals" in include_set or "coverage" in include_set + structural_risk = dict(self.structural_risk) if risk_enabled else {} + if "coverage" not in include_set: + structural_risk.pop("low_coverage_in_blast_zone", None) + if "risk_signals" not in include_set: + for key in ( + "high_complexity_in_blast_zone", + "high_coupling_in_blast_zone", + "overloaded_modules_in_blast_zone", + ): + structural_risk.pop(key, None) + return { + "run_id": self.run_id, + "origin": list(self.origin), + "depth": self.depth, + "radius_level": self.radius_level, + "direct_dependents": ( + list(self.direct_dependents) if imports_enabled else [] + ), + "transitive_dependents": ( + list(self.transitive_dependents) + if imports_enabled and self.depth == "transitive" + else [] + ), + "clone_cohort_members": ( + list(self.clone_cohort_members) + if "clone_cohorts" in include_set + else [] + ), + "in_dependency_cycle": ( + list(self.in_dependency_cycle) if "cycles" in include_set else [] + ), + "structural_risk": structural_risk, + "do_not_touch": ( + [dict(item) for item in self.do_not_touch] + if "do_not_touch" in include_set + else [] + ), + "guardrails": list(self.guardrails), + } + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () + + +def _as_int(value: object, default: int = 0) -> int: + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + if isinstance(value, str): + try: + return int(value.strip()) + except ValueError: + return default + return default + + +def _normalize_relative_path(path: object) -> str: + text = str(path).replace("\\", "/").strip() + if text == ".": + return "" + if text.startswith("./"): + text = text[2:] + return text.rstrip("/") + + +def _path_to_module(path: str) -> str: + normalized = _normalize_relative_path(path) + if not normalized.endswith(".py"): + return normalized.replace("/", ".") + without_suffix = normalized[:-3] + if without_suffix.endswith("/__init__"): + without_suffix = without_suffix[: -len("/__init__")] + if without_suffix == "__init__": + without_suffix = "" + return without_suffix.replace("/", ".").strip(".") + + +def _module_to_candidate_path(module: str) -> str: + return f"{module.replace('.', '/')}.py" if module else "" + + +def _dedupe_sorted(values: Sequence[str] | set[str]) -> tuple[str, ...]: + return tuple(sorted({value for value in values if value})) + + +def _path_matches_glob(path: str, patterns: Sequence[str]) -> bool: + return any(fnmatchcase(path, pattern) for pattern in patterns) + + +def _item_path(item: Mapping[str, object]) -> str: + for key in ("relative_path", "path", "filepath", "file"): + value = _normalize_relative_path(item.get(key, "")) + if value: + return value + return "" + + +def _module_path_index(report_document: Mapping[str, object]) -> dict[str, str]: + modules: dict[str, str] = {} + inventory = _as_mapping(report_document.get("inventory")) + file_registry = _as_mapping(inventory.get("file_registry")) + for raw_path in _as_sequence(file_registry.get("items")): + path = _normalize_relative_path(raw_path) + module = _path_to_module(path) + if module and path: + modules.setdefault(module, path) + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + for family_name in ( + "complexity", + "coupling", + "cohesion", + "coverage_join", + "overloaded_modules", + "security_surfaces", + "api_surface", + "coverage_adoption", + ): + family = _as_mapping(families.get(family_name)) + for raw_item in _as_sequence(family.get("items")): + item = _as_mapping(raw_item) + path = _item_path(item) + module = str(item.get("module", "")).strip() or _path_to_module(path) + if module and path: + modules.setdefault(module, path) + return modules + + +def _module_to_output(module: str, module_paths: Mapping[str, str]) -> str: + return module_paths.get(module, _module_to_candidate_path(module) or module) + + +def _build_reverse_import_graph( + edges: Sequence[Mapping[str, object]], +) -> dict[str, set[str]]: + reverse: dict[str, set[str]] = {} + for edge in edges: + source = str(edge.get("source", "")).strip() + target = str(edge.get("target", "")).strip() + if source and target: + reverse.setdefault(target, set()).add(source) + return reverse + + +def _dependency_edges( + report_document: Mapping[str, object], +) -> tuple[Mapping[str, object], ...]: + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + dependencies = _as_mapping(families.get("dependencies")) + return tuple(_as_mapping(item) for item in _as_sequence(dependencies.get("items"))) + + +def _dependency_cycles( + report_document: Mapping[str, object], +) -> tuple[tuple[str, ...], ...]: + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + dependencies = _as_mapping(families.get("dependencies")) + cycles: list[tuple[str, ...]] = [] + for raw_cycle in _as_sequence(dependencies.get("cycles")): + cycle = tuple( + str(module).strip() + for module in _as_sequence(raw_cycle) + if str(module).strip() + ) + if cycle: + cycles.append(cycle) + return tuple(sorted(cycles, key=lambda item: (len(item), item))) + + +def _compute_direct_dependents( + *, + origin_modules: Sequence[str], + reverse_graph: Mapping[str, set[str]], +) -> tuple[str, ...]: + dependents: set[str] = set() + for module in origin_modules: + dependents.update(reverse_graph.get(module, set())) + return _dedupe_sorted(dependents) + + +def _compute_transitive_dependents( + *, + origin_modules: Sequence[str], + reverse_graph: Mapping[str, set[str]], +) -> tuple[str, ...]: + seen: set[str] = set() + queue: deque[str] = deque(origin_modules) + origin_set = set(origin_modules) + while queue: + current = queue.popleft() + for dependent in sorted(reverse_graph.get(current, set())): + if dependent in seen or dependent in origin_set: + continue + seen.add(dependent) + queue.append(dependent) + return _dedupe_sorted(seen) + + +def _clone_group_buckets( + report_document: Mapping[str, object], +) -> tuple[Mapping[str, object], ...]: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get("clones")) + buckets: list[Mapping[str, object]] = [] + for bucket_name in ("functions", "blocks", "segments"): + buckets.extend( + _as_mapping(item) for item in _as_sequence(clones.get(bucket_name)) + ) + return tuple(buckets) + + +def _suppressed_clone_buckets( + report_document: Mapping[str, object], +) -> tuple[Mapping[str, object], ...]: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get("clones")) + suppressed = _as_mapping(clones.get("suppressed")) + buckets: list[Mapping[str, object]] = [] + for bucket_name in ( + "function", + "block", + "segment", + "functions", + "blocks", + "segments", + ): + buckets.extend( + _as_mapping(item) for item in _as_sequence(suppressed.get(bucket_name)) + ) + return tuple(buckets) + + +def _compute_clone_cohort_members( + *, + report_document: Mapping[str, object], + origin_paths: Sequence[str], +) -> tuple[str, ...]: + origin_set = set(origin_paths) + cohort_paths: set[str] = set() + for group in _clone_group_buckets(report_document): + item_paths = { + _item_path(_as_mapping(item)) for item in _as_sequence(group.get("items")) + } + item_paths.discard("") + if origin_set.intersection(item_paths): + cohort_paths.update(item_paths - origin_set) + return _dedupe_sorted(cohort_paths) + + +def _compute_cycle_membership( + *, + origin_modules: Sequence[str], + origin_by_module: Mapping[str, str], + report_document: Mapping[str, object], +) -> tuple[str, ...]: + cycle_modules = { + module for cycle in _dependency_cycles(report_document) for module in cycle + } + return _dedupe_sorted( + { + origin_by_module[module] + for module in origin_modules + if module in cycle_modules and origin_by_module.get(module) + } + ) + + +def _compute_radius_level( + *, + direct_dependents: Sequence[str], + clone_cohort_members: Sequence[str], +) -> str: + total_affected = len(direct_dependents) + len(clone_cohort_members) + if total_affected == 0: + return "low" + if total_affected <= 5: + return "medium" + return "high" + + +def _blast_zone( + *, + origin_paths: Sequence[str], + direct_dependents: Sequence[str], + transitive_dependents: Sequence[str], + clone_cohort_members: Sequence[str], +) -> set[str]: + return { + *origin_paths, + *direct_dependents, + *transitive_dependents, + *clone_cohort_members, + } + + +def _compute_risk_signals( + *, + report_document: Mapping[str, object], + blast_zone_paths: set[str], +) -> dict[str, list[str]]: + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + complexity = _as_mapping(families.get("complexity")) + coupling = _as_mapping(families.get("coupling")) + coverage_join = _as_mapping(families.get("coverage_join")) + overloaded_modules = _as_mapping(families.get("overloaded_modules")) + + high_complexity = { + _item_path(_as_mapping(item)) + for item in _as_sequence(complexity.get("items")) + if str(_as_mapping(item).get("risk", "")).strip() == "high" + and _item_path(_as_mapping(item)) in blast_zone_paths + } + high_coupling = { + _item_path(_as_mapping(item)) + for item in _as_sequence(coupling.get("items")) + if str(_as_mapping(item).get("risk", "")).strip() == "high" + and _item_path(_as_mapping(item)) in blast_zone_paths + } + low_coverage = { + _item_path(_as_mapping(item)) + for item in _as_sequence(coverage_join.get("items")) + if ( + bool(_as_mapping(item).get("coverage_hotspot")) + or bool(_as_mapping(item).get("scope_gap_hotspot")) + ) + and _item_path(_as_mapping(item)) in blast_zone_paths + } + overloaded = { + _item_path(_as_mapping(item)) + for item in _as_sequence(overloaded_modules.get("items")) + if str(_as_mapping(item).get("candidate_status", "")).strip() == "candidate" + and _item_path(_as_mapping(item)) in blast_zone_paths + } + return { + "high_complexity_in_blast_zone": list(_dedupe_sorted(high_complexity)), + "high_coupling_in_blast_zone": list(_dedupe_sorted(high_coupling)), + "low_coverage_in_blast_zone": list(_dedupe_sorted(low_coverage)), + "overloaded_modules_in_blast_zone": list(_dedupe_sorted(overloaded)), + } + + +def _finding_paths(finding: Mapping[str, object]) -> tuple[str, ...]: + return _dedupe_sorted( + {_item_path(_as_mapping(item)) for item in _as_sequence(finding.get("items"))} + ) + + +def _all_finding_groups( + report_document: Mapping[str, object], +) -> tuple[Mapping[str, object], ...]: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + result: list[Mapping[str, object]] = [] + for family_payload in groups.values(): + family_map = _as_mapping(family_payload) + for value in family_map.values(): + result.extend(_as_mapping(item) for item in _as_sequence(value)) + return tuple(result) + + +def _append_do_not_touch( + entries: dict[str, str], + *, + path: str, + reason: str, +) -> None: + if not path: + return + entries.setdefault(path, reason) + + +def _compute_do_not_touch( + *, + report_document: Mapping[str, object], + origin_paths: Sequence[str], + blast_zone_paths: set[str], + forbidden_patterns: Sequence[str], + allowed_scope: Sequence[str] = (), +) -> tuple[dict[str, str], ...]: + entries: dict[str, str] = {} + origin_set = set(origin_paths) + allowed_set = set(allowed_scope) + for pattern in DEFAULT_DO_NOT_TOUCH_PATTERNS: + _append_do_not_touch( + entries, + path=pattern, + reason=( + "baseline, cache, and generated CodeClone state require explicit " + "separate changes" + ), + ) + for pattern in forbidden_patterns: + _append_do_not_touch(entries, path=pattern, reason="declared forbidden path") + for group in _all_finding_groups(report_document): + if str(group.get("novelty", "")).strip() != "known": + continue + for path in _finding_paths(group): + if path not in origin_set: + _append_do_not_touch( + entries, + path=path, + reason="known baseline debt outside declared origin", + ) + for group in _suppressed_clone_buckets(report_document): + for path in _finding_paths(group): + _append_do_not_touch( + entries, + path=path, + reason="golden fixture clone suppression surface", + ) + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + for family_name, reason in ( + ("security_surfaces", "report-only security boundary inventory"), + ("overloaded_modules", "report-only design signal"), + ): + family = _as_mapping(families.get(family_name)) + for raw_item in _as_sequence(family.get("items")): + path = _item_path(_as_mapping(raw_item)) + if path and path not in origin_set: + _append_do_not_touch(entries, path=path, reason=reason) + if allowed_set: + for path in blast_zone_paths: + if path not in allowed_set: + _append_do_not_touch( + entries, + path=path, + reason="affected by blast radius but outside declared edit scope", + ) + return tuple( + {"path": path, "reason": entries[path]} + for path in sorted(entries) + if path and (_path_matches_glob(path, forbidden_patterns) or path in entries) + ) + + +def _guardrails( + *, + radius_level: str, + do_not_touch: Sequence[Mapping[str, str]], +) -> tuple[str, ...]: + guardrails = [ + "review direct dependents before editing public behavior", + "treat clone cohort members as comparison context, not automatic edit targets", + ] + if radius_level == "high": + guardrails.append("high blast radius requires explicit human scope approval") + if do_not_touch: + guardrails.append("do-not-touch paths require separate explicit approval") + return tuple(guardrails) + + +def compute_blast_radius( + *, + run_id: str, + report_document: Mapping[str, object], + files: Sequence[str], + depth: BlastRadiusDepth = "direct", + forbidden_patterns: Sequence[str] = DEFAULT_DO_NOT_TOUCH_PATTERNS, + allowed_scope: Sequence[str] = (), +) -> BlastRadiusResult: + origin_paths = _dedupe_sorted( + tuple(_normalize_relative_path(path) for path in files) + ) + module_paths = _module_path_index(report_document) + origin_by_module = { + module: path + for path in origin_paths + for module in (_path_to_module(path),) + if module + } + origin_modules = tuple(sorted(origin_by_module)) + reverse_graph = _build_reverse_import_graph(_dependency_edges(report_document)) + direct_modules = _compute_direct_dependents( + origin_modules=origin_modules, + reverse_graph=reverse_graph, + ) + transitive_modules = ( + _compute_transitive_dependents( + origin_modules=origin_modules, + reverse_graph=reverse_graph, + ) + if depth == "transitive" + else () + ) + direct_dependents = _dedupe_sorted( + tuple(_module_to_output(module, module_paths) for module in direct_modules) + ) + transitive_dependents = _dedupe_sorted( + tuple( + _module_to_output(module, module_paths) + for module in transitive_modules + if module not in set(direct_modules) + ) + ) + clone_cohort_members = _compute_clone_cohort_members( + report_document=report_document, + origin_paths=origin_paths, + ) + dependency_cycle_members = _compute_cycle_membership( + origin_modules=origin_modules, + origin_by_module=origin_by_module, + report_document=report_document, + ) + radius_level = _compute_radius_level( + direct_dependents=direct_dependents, + clone_cohort_members=clone_cohort_members, + ) + zone = _blast_zone( + origin_paths=origin_paths, + direct_dependents=direct_dependents, + transitive_dependents=transitive_dependents, + clone_cohort_members=clone_cohort_members, + ) + risk = _compute_risk_signals( + report_document=report_document, + blast_zone_paths=zone, + ) + do_not_touch = _compute_do_not_touch( + report_document=report_document, + origin_paths=origin_paths, + blast_zone_paths=zone, + forbidden_patterns=forbidden_patterns, + allowed_scope=allowed_scope, + ) + return BlastRadiusResult( + run_id=run_id, + origin=origin_paths, + depth=depth, + radius_level=radius_level, + direct_dependents=direct_dependents, + transitive_dependents=transitive_dependents, + clone_cohort_members=clone_cohort_members, + in_dependency_cycle=dependency_cycle_members, + structural_risk=risk, + do_not_touch=do_not_touch, + guardrails=_guardrails(radius_level=radius_level, do_not_touch=do_not_touch), + ) diff --git a/codeclone/surfaces/mcp/_intent.py b/codeclone/surfaces/mcp/_intent.py new file mode 100644 index 00000000..f869ee9e --- /dev/null +++ b/codeclone/surfaces/mcp/_intent.py @@ -0,0 +1,193 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from enum import Enum +from fnmatch import fnmatchcase +from pathlib import Path +from typing import Final + +DEFAULT_FORBIDDEN: Final[tuple[str, ...]] = ( + "codeclone.baseline.json", + ".cache/codeclone/**", +) +DEFAULT_INTENT_GUARDS: Final[tuple[str, ...]] = ( + "scope_expansion_requires_explanation", + "baseline_update_forbidden", + "cache_update_forbidden", + "generated_report_update_forbidden", + "out_of_scope_production_change_requires_human", + "new_structural_regression_forbidden", + "report_only_claims_forbidden", +) + + +class IntentStatus(str, Enum): + ACTIVE = "active" + CLEAN = "clean" + EXPANDED = "expanded" + VIOLATED = "violated" + UNVERIFIED = "unverified" + EXPIRED = "expired" + + +@dataclass(frozen=True, slots=True) +class IntentScope: + allowed_files: tuple[str, ...] + allowed_related: tuple[str, ...] = () + forbidden: tuple[str, ...] = DEFAULT_FORBIDDEN + + @property + def allowed_paths(self) -> tuple[str, ...]: + return tuple(sorted({*self.allowed_files, *self.allowed_related})) + + def to_payload(self) -> dict[str, object]: + return { + "allowed_files": list(self.allowed_files), + "allowed_related": list(self.allowed_related), + "forbidden": list(self.forbidden), + } + + +@dataclass(frozen=True, slots=True) +class IntentCheckResult: + status: IntentStatus + declared_scope: tuple[str, ...] + actual_changed_files: tuple[str, ...] + unexpected_files: tuple[str, ...] + forbidden_touched: tuple[str, ...] + required_action: str | None + message: str + + def to_payload(self) -> dict[str, object]: + return { + "status": self.status.value, + "declared_scope": list(self.declared_scope), + "actual_changed_files": list(self.actual_changed_files), + "unexpected_files": list(self.unexpected_files), + "forbidden_touched": list(self.forbidden_touched), + "required_action": self.required_action, + "message": self.message, + } + + +@dataclass(frozen=True, slots=True) +class IntentRecord: + intent_id: str + run_id: str + report_digest: str + status: IntentStatus + declared_at_utc: str + scope: IntentScope + intent_description: str + expected_effects: tuple[str, ...] + guards: tuple[str, ...] + blast_radius_summary: dict[str, object] | None = None + check_result: IntentCheckResult | None = None + + def to_payload(self, *, short_run_id: str | None = None) -> dict[str, object]: + payload: dict[str, object] = { + "intent_id": self.intent_id, + "run_id": short_run_id or self.run_id, + "status": self.status.value, + "scope": self.scope.to_payload(), + "intent": self.intent_description, + "expected_effects": list(self.expected_effects), + "guards": list(self.guards), + "declared_at_utc": self.declared_at_utc, + "report_digest": self.report_digest, + "blast_radius_summary": self.blast_radius_summary or {}, + } + if self.check_result is not None: + payload["check_result"] = self.check_result.to_payload() + return payload + + +def _normalize_path(value: object) -> str: + text = str(value).replace("\\", "/").strip() + if text == ".": + return "" + if text.startswith("./"): + text = text[2:] + text = text.rstrip("/") + if Path(text).is_absolute(): + raise ValueError(f"intent paths must be relative: {value!r}") + if ".." in Path(text).parts: + raise ValueError(f"path traversal not allowed: {value!r}") + return text + + +def _normalize_required_paths(value: object, *, field_name: str) -> tuple[str, ...]: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + raise ValueError(f"scope.{field_name} must be a list of relative paths.") + paths = tuple( + sorted({_normalize_path(item) for item in value if str(item).strip()}) + ) + if not paths: + raise ValueError(f"scope.{field_name} must contain at least one path.") + return paths + + +def _normalize_optional_paths(value: object, *, field_name: str) -> tuple[str, ...]: + if value is None: + return () + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + raise ValueError(f"scope.{field_name} must be a list of relative paths.") + return tuple(sorted({_normalize_path(item) for item in value if str(item).strip()})) + + +def normalize_intent_scope(scope: object) -> IntentScope: + if not isinstance(scope, Mapping): + raise ValueError("scope must be an object with allowed_files.") + allowed_files = _normalize_required_paths( + scope.get("allowed_files"), + field_name="allowed_files", + ) + allowed_related = _normalize_optional_paths( + scope.get("allowed_related"), + field_name="allowed_related", + ) + raw_forbidden = scope.get("forbidden") + forbidden = ( + ( + *DEFAULT_FORBIDDEN, + *_normalize_optional_paths(raw_forbidden, field_name="forbidden"), + ) + if raw_forbidden is not None + else DEFAULT_FORBIDDEN + ) + return IntentScope( + allowed_files=allowed_files, + allowed_related=allowed_related, + forbidden=tuple(sorted(set(forbidden))), + ) + + +def normalize_expected_effects(value: object) -> tuple[str, ...]: + if value is None: + return () + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + raise ValueError("expected_effects must be a list of strings.") + return tuple(sorted({str(item).strip() for item in value if str(item).strip()})) + + +def forbidden_touched( + *, + changed_files: Sequence[str], + forbidden_patterns: Sequence[str], +) -> tuple[str, ...]: + return tuple( + sorted( + { + path + for path in changed_files + if any(fnmatchcase(path, pattern) for pattern in forbidden_patterns) + } + ) + ) diff --git a/codeclone/surfaces/mcp/_session_blast_radius_mixin.py b/codeclone/surfaces/mcp/_session_blast_radius_mixin.py new file mode 100644 index 00000000..a3cdcf1a --- /dev/null +++ b/codeclone/surfaces/mcp/_session_blast_radius_mixin.py @@ -0,0 +1,118 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence + +from . import _session_helpers as _helpers +from ._blast_radius import ( + DEFAULT_BLAST_RADIUS_INCLUDE, + DEFAULT_DO_NOT_TOUCH_PATTERNS, + VALID_BLAST_RADIUS_DEPTHS, + VALID_BLAST_RADIUS_INCLUDE, + BlastRadiusDepth, + BlastRadiusResult, + compute_blast_radius, +) +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPRunRecord, + MCPServiceContractError, +) +from ._session_state_mixin import _MCPSessionStateMixin + + +class _MCPSessionBlastRadiusMixin(_MCPSessionStateMixin): + _runs: CodeCloneMCPRunStore + _blast_radius_cache: dict[tuple[str, tuple[str, ...], str], BlastRadiusResult] + + def get_blast_radius( + self, + *, + files: Sequence[str], + run_id: str | None = None, + depth: str = "direct", + include: Sequence[str] | None = None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + normalized_depth = self._validated_blast_radius_depth(depth) + normalized_files = self._normalize_changed_paths( + root_path=record.root, + paths=files, + ) + if not normalized_files: + raise MCPServiceContractError( + "get_blast_radius requires at least one file." + ) + normalized_include = self._validated_blast_radius_include(include) + result = self._blast_radius_result( + record=record, + files=normalized_files, + depth=normalized_depth, + ) + return result.to_payload(include=normalized_include) + + def _blast_radius_result( + self, + *, + record: MCPRunRecord, + files: Sequence[str], + depth: BlastRadiusDepth, + forbidden_patterns: Sequence[str] = DEFAULT_DO_NOT_TOUCH_PATTERNS, + allowed_scope: Sequence[str] = (), + ) -> BlastRadiusResult: + normalized_files = tuple(sorted(set(files))) + cache_key = (record.run_id, normalized_files, depth) + cacheable = ( + not allowed_scope + and tuple(forbidden_patterns) == DEFAULT_DO_NOT_TOUCH_PATTERNS + ) + if cacheable: + with self._state_lock: + cached = self._blast_radius_cache.get(cache_key) + if cached is not None: + return cached + result = compute_blast_radius( + run_id=_helpers._short_run_id(record.run_id), + report_document=record.report_document, + files=normalized_files, + depth=depth, + forbidden_patterns=forbidden_patterns, + allowed_scope=allowed_scope, + ) + if cacheable: + with self._state_lock: + self._blast_radius_cache[cache_key] = result + return result + + def _validated_blast_radius_depth(self, depth: str) -> BlastRadiusDepth: + if depth not in VALID_BLAST_RADIUS_DEPTHS: + expected = ", ".join(sorted(VALID_BLAST_RADIUS_DEPTHS)) + raise MCPServiceContractError( + f"Invalid value for depth: {depth!r}. Expected one of: {expected}." + ) + return "transitive" if depth == "transitive" else "direct" + + def _validated_blast_radius_include( + self, + include: Sequence[str] | None, + ) -> tuple[str, ...]: + if include is None: + return DEFAULT_BLAST_RADIUS_INCLUDE + invalid = sorted( + {item for item in include if item not in VALID_BLAST_RADIUS_INCLUDE} + ) + if invalid: + expected = ", ".join(sorted(VALID_BLAST_RADIUS_INCLUDE)) + raise MCPServiceContractError( + "Invalid value for include: " + f"{', '.join(invalid)}. Expected values: {expected}." + ) + return tuple(sorted(set(include))) + + +__all__ = ["_MCPSessionBlastRadiusMixin"] diff --git a/codeclone/surfaces/mcp/_session_intent_mixin.py b/codeclone/surfaces/mcp/_session_intent_mixin.py new file mode 100644 index 00000000..5b477fa6 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_intent_mixin.py @@ -0,0 +1,355 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import replace +from datetime import datetime, timezone +from fnmatch import fnmatchcase + +from . import _session_helpers as _helpers +from ._intent import ( + DEFAULT_INTENT_GUARDS, + IntentCheckResult, + IntentRecord, + IntentScope, + IntentStatus, + forbidden_touched, + normalize_expected_effects, + normalize_intent_scope, +) +from ._session_blast_radius_mixin import _MCPSessionBlastRadiusMixin +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPRunRecord, + MCPServiceContractError, +) + + +class _MCPSessionIntentMixin(_MCPSessionBlastRadiusMixin): + _runs: CodeCloneMCPRunStore + _active_intents: dict[str, IntentRecord] + _intent_sequence: int + + def manage_change_intent( + self, + *, + action: str, + run_id: str | None = None, + intent_id: str | None = None, + scope: dict[str, object] | None = None, + intent: str | None = None, + expected_effects: Sequence[str] | None = None, + diff_ref: str | None = None, + changed_files: Sequence[str] | None = None, + ) -> dict[str, object]: + match action: + case "declare": + return self._declare_change_intent( + run_id=run_id, + scope=scope, + intent=intent, + expected_effects=expected_effects, + ) + case "get": + record, active_intent = self._resolve_intent( + run_id=run_id, + intent_id=intent_id, + ) + return self._intent_payload_with_expiry( + record=record, + intent=active_intent, + ) + case "check": + return self._check_change_intent( + run_id=run_id, + intent_id=intent_id, + diff_ref=diff_ref, + changed_files=changed_files, + ) + case "clear": + return self._clear_change_intent(intent_id=intent_id) + case _: + raise MCPServiceContractError( + "Invalid value for action: " + f"{action!r}. Expected one of: check, clear, declare, get." + ) + + def _declare_change_intent( + self, + *, + run_id: str | None, + scope: dict[str, object] | None, + intent: str | None, + expected_effects: Sequence[str] | None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + try: + normalized_scope = normalize_intent_scope(scope) + normalized_expected_effects = normalize_expected_effects(expected_effects) + except ValueError as exc: + raise MCPServiceContractError(str(exc)) from exc + description = str(intent or "").strip() + if not description: + raise MCPServiceContractError("action='declare' requires intent text.") + blast = self._blast_radius_result( + record=record, + files=normalized_scope.allowed_paths, + depth="direct", + forbidden_patterns=normalized_scope.forbidden, + allowed_scope=normalized_scope.allowed_paths, + ) + blast_payload = blast.to_payload() + blast_summary = self._blast_radius_summary( + blast_payload=blast_payload, + scope=normalized_scope, + ) + with self._state_lock: + for existing_id, existing in tuple(self._active_intents.items()): + if existing.run_id == record.run_id: + self._active_intents.pop(existing_id, None) + self._intent_sequence += 1 + intent_id = ( + f"intent-{_helpers._short_run_id(record.run_id)}-" + f"{self._intent_sequence:03d}" + ) + record_payload = IntentRecord( + intent_id=intent_id, + run_id=record.run_id, + report_digest=self._report_digest_value(record), + status=IntentStatus.ACTIVE, + declared_at_utc=_utc_now(), + scope=normalized_scope, + intent_description=description, + expected_effects=normalized_expected_effects, + guards=DEFAULT_INTENT_GUARDS, + blast_radius_summary=blast_summary, + ) + self._active_intents[intent_id] = record_payload + payload = record_payload.to_payload( + short_run_id=_helpers._short_run_id(record.run_id) + ) + payload["do_not_touch"] = blast_payload["do_not_touch"] + return payload + + def _check_change_intent( + self, + *, + run_id: str | None, + intent_id: str | None, + diff_ref: str | None, + changed_files: Sequence[str] | None, + ) -> dict[str, object]: + if diff_ref is None and not changed_files: + raise MCPServiceContractError( + "action='check' requires diff_ref or changed_files." + ) + record, active_intent = self._resolve_intent( + run_id=run_id, + intent_id=intent_id, + ) + if self._is_intent_expired(record=record, intent=active_intent): + expired = replace(active_intent, status=IntentStatus.EXPIRED) + return expired.to_payload( + short_run_id=_helpers._short_run_id(record.run_id) + ) + actual = ( + self._normalize_changed_paths(root_path=record.root, paths=changed_files) + if changed_files + else self._git_diff_paths(root_path=record.root, git_diff_ref=str(diff_ref)) + ) + check_result = self._intent_check_result(intent=active_intent, actual=actual) + updated = replace( + active_intent, + status=check_result.status, + check_result=check_result, + ) + with self._state_lock: + self._active_intents[updated.intent_id] = updated + payload = check_result.to_payload() + payload["intent_id"] = updated.intent_id + return payload + + def _clear_change_intent(self, *, intent_id: str | None) -> dict[str, object]: + with self._state_lock: + removed_ids: tuple[str, ...] + if intent_id is not None: + if intent_id not in self._active_intents: + raise MCPServiceContractError( + f"Unknown change intent id: {intent_id}" + ) + removed_ids = (intent_id,) + self._active_intents.pop(intent_id, None) + else: + removed_ids = tuple(self._active_intents) + self._active_intents.clear() + return { + "cleared": len(removed_ids), + "cleared_intent_ids": list(removed_ids), + } + + def _resolve_intent( + self, + *, + run_id: str | None, + intent_id: str | None, + ) -> tuple[MCPRunRecord, IntentRecord]: + if intent_id is not None: + with self._state_lock: + active_intent = self._active_intents.get(intent_id) + if active_intent is None: + raise MCPServiceContractError(f"Unknown change intent id: {intent_id}") + return self._runs.get(active_intent.run_id), active_intent + record = self._runs.get(run_id) + with self._state_lock: + matching = [ + intent + for intent in self._active_intents.values() + if intent.run_id == record.run_id + ] + if not matching: + raise MCPServiceContractError("No active change intent is available.") + return record, matching[-1] + + def _intent_payload_with_expiry( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + ) -> dict[str, object]: + if self._is_intent_expired(record=record, intent=intent): + intent = replace(intent, status=IntentStatus.EXPIRED) + return intent.to_payload(short_run_id=_helpers._short_run_id(record.run_id)) + + def _is_intent_expired( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + ) -> bool: + return intent.report_digest != self._report_digest_value(record) + + def _report_digest_value(self, record: MCPRunRecord) -> str: + integrity = _as_mapping(record.report_document.get("integrity")) + digest = _as_mapping(integrity.get("digest")) + value = str(digest.get("value", "")).strip() + if value: + return value + return record.run_id + + def _blast_radius_summary( + self, + *, + blast_payload: Mapping[str, object], + scope: IntentScope, + ) -> dict[str, object]: + affected = tuple( + sorted( + { + *( + str(item) + for item in _as_sequence(blast_payload.get("direct_dependents")) + ), + *( + str(item) + for item in _as_sequence( + blast_payload.get("transitive_dependents") + ) + ), + *( + str(item) + for item in _as_sequence( + blast_payload.get("clone_cohort_members") + ) + ), + } + ) + ) + return { + "radius_level": str(blast_payload.get("radius_level", "low")), + "direct_dependents_count": len( + _as_sequence(blast_payload.get("direct_dependents")) + ), + "clone_cohort_members_count": len( + _as_sequence(blast_payload.get("clone_cohort_members")) + ), + "affected_but_forbidden": list( + forbidden_touched( + changed_files=affected, + forbidden_patterns=scope.forbidden, + ) + ), + "do_not_touch_count": len(_as_sequence(blast_payload.get("do_not_touch"))), + } + + def _intent_check_result( + self, + *, + intent: IntentRecord, + actual: Sequence[str], + ) -> IntentCheckResult: + actual_files = tuple(sorted(set(actual))) + declared_scope = intent.scope.allowed_files + allowed = set(intent.scope.allowed_files) + related = set(intent.scope.allowed_related) + forbidden = forbidden_touched( + changed_files=actual_files, + forbidden_patterns=intent.scope.forbidden, + ) + unexpected = tuple( + path + for path in actual_files + if path not in allowed + and path not in related + and not any( + fnmatchcase(path, pattern) for pattern in intent.scope.forbidden + ) + ) + expanded = tuple(path for path in actual_files if path in related) + if forbidden or unexpected: + status = IntentStatus.VIOLATED + required_action = "human_approval" + message = "Patch touched forbidden or out-of-scope files." + elif expanded: + status = IntentStatus.EXPANDED + required_action = None + message = "Patch touched allowed related files outside primary scope." + else: + status = IntentStatus.CLEAN + required_action = None + message = "Patch stayed inside declared scope." + return IntentCheckResult( + status=status, + declared_scope=declared_scope, + actual_changed_files=actual_files, + unexpected_files=unexpected, + forbidden_touched=forbidden, + required_action=required_action, + message=message, + ) + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () + + +def _utc_now() -> str: + return ( + datetime.now(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z") + ) + + +__all__ = ["_MCPSessionIntentMixin"] diff --git a/codeclone/surfaces/mcp/_session_state_mixin.py b/codeclone/surfaces/mcp/_session_state_mixin.py index a8d58e53..d1309ecd 100644 --- a/codeclone/surfaces/mcp/_session_state_mixin.py +++ b/codeclone/surfaces/mcp/_session_state_mixin.py @@ -8,6 +8,8 @@ from ...baseline.metrics_baseline import probe_metrics_baseline_section from . import _session_helpers as _helpers +from ._blast_radius import BlastRadiusResult +from ._intent import IntentRecord from ._session_baseline import ( CloneBaselineState, MetricsBaselineState, @@ -782,6 +784,9 @@ class _MCPSessionStateMixin(_MCPSessionReportMixin): _review_state: dict[str, OrderedDict[str, str | None]] _last_gate_results: dict[str, dict[str, object]] _spread_max_cache: dict[str, int] + _blast_radius_cache: dict[tuple[str, tuple[str, ...], str], BlastRadiusResult] + _active_intents: dict[str, IntentRecord] + _intent_sequence: int def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: record = self._runs.get(request.run_id) @@ -1116,9 +1121,14 @@ def clear_session_runs(self) -> dict[str, object]: ) cleared_gate_results = len(self._last_gate_results) cleared_spread_cache_entries = len(self._spread_max_cache) + cleared_blast_radius_entries = len(self._blast_radius_cache) + cleared_intents = len(self._active_intents) self._review_state.clear() self._last_gate_results.clear() self._spread_max_cache.clear() + self._blast_radius_cache.clear() + self._active_intents.clear() + self._intent_sequence = 0 return { "cleared_runs": len(removed_run_ids), "cleared_run_ids": [ @@ -1127,6 +1137,8 @@ def clear_session_runs(self) -> dict[str, object]: "cleared_review_entries": cleared_review_entries, "cleared_gate_results": cleared_gate_results, "cleared_spread_cache_entries": cleared_spread_cache_entries, + "cleared_blast_radius_entries": cleared_blast_radius_entries, + "cleared_intents": cleared_intents, } def read_resource(self, uri: str) -> str: @@ -1209,3 +1221,17 @@ def _prune_session_state(self) -> None: ] for run_id in stale_run_ids: state_map.pop(run_id, None) + stale_blast_radius_keys = [ + cache_key + for cache_key in self._blast_radius_cache + if cache_key[0] not in active_run_ids + ] + for cache_key in stale_blast_radius_keys: + self._blast_radius_cache.pop(cache_key, None) + stale_intent_ids = [ + intent_id + for intent_id, intent in self._active_intents.items() + if intent.run_id not in active_run_ids + ] + for intent_id in stale_intent_ids: + self._active_intents.pop(intent_id, None) diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 3599bac0..1392437a 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -349,6 +349,30 @@ def get_production_triage( max_suggestions=max_suggestions, ) + @tool( + title="Get Blast Radius", + description=( + "Return the deterministic structural risk boundary for changing " + "the given files. Shows direct dependents, clone cohort members, " + "coverage gaps, and do-not-touch paths. Derived from the canonical " + "report; no new analysis is performed." + ), + annotations=read_only_tool, + structured_output=True, + ) + def get_blast_radius( + files: list[str], + run_id: str | None = None, + depth: str = "direct", + include: list[str] | None = None, + ) -> dict[str, object]: + return service.get_blast_radius( + files=files, + run_id=run_id, + depth=depth, + include=include, + ) + @tool( title="Help", description=( @@ -783,6 +807,39 @@ def mark_finding_reviewed( def list_reviewed_findings(run_id: str | None = None) -> dict[str, object]: return service.list_reviewed_findings(run_id=run_id) + @tool( + title="Manage Change Intent", + description=( + "Manage the agent change intent lifecycle for the current MCP " + "session. Actions: 'declare' to declare intended scope before " + "editing, 'get' to retrieve active intent, 'check' to verify " + "actual diff against declared scope, and 'clear' to remove intent. " + "Intent is session-local and in-memory." + ), + annotations=session_tool, + structured_output=True, + ) + def manage_change_intent( + action: str, + run_id: str | None = None, + intent_id: str | None = None, + scope: dict[str, object] | None = None, + intent: str | None = None, + expected_effects: list[str] | None = None, + diff_ref: str | None = None, + changed_files: list[str] | None = None, + ) -> dict[str, object]: + return service.manage_change_intent( + action=action, + run_id=run_id, + intent_id=intent_id, + scope=scope, + intent=intent, + expected_effects=expected_effects, + diff_ref=diff_ref, + changed_files=changed_files, + ) + @tool( title="Clear Session Runs", description=( diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index adca1d4d..dfdd3570 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -52,6 +52,18 @@ def get_production_triage( def get_help(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("get_help", **params) + def get_blast_radius( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("get_blast_radius", **params) + + def manage_change_intent( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("manage_change_intent", **params) + def generate_pr_summary( self: _RunDictService, **params: object, @@ -205,6 +217,22 @@ def _apply_public_method_signatures() -> None: _kwonly("max_hotspots", "int", 3), _kwonly("max_suggestions", "int", 3), ), + "get_blast_radius": ( + _kwonly("files", "Sequence[str]"), + _kwonly("run_id", "str | None", None), + _kwonly("depth", "str", "direct"), + _kwonly("include", "Sequence[str] | None", None), + ), + "manage_change_intent": ( + _kwonly("action", "str"), + _kwonly("run_id", "str | None", None), + _kwonly("intent_id", "str | None", None), + _kwonly("scope", "dict[str, object] | None", None), + _kwonly("intent", "str | None", None), + _kwonly("expected_effects", "Sequence[str] | None", None), + _kwonly("diff_ref", "str | None", None), + _kwonly("changed_files", "Sequence[str] | None", None), + ), "get_remediation": ( _kwonly("finding_id", "str"), _kwonly("run_id", "str | None", None), diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 3de4ce24..813bd64a 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -10,10 +10,13 @@ from ...report.meta import build_report_meta as _build_report_meta from ...report.meta import current_report_timestamp_utc as _current_report_timestamp_utc from . import _session_helpers as _helpers +from ._blast_radius import BlastRadiusResult +from ._intent import IntentRecord from ._session_baseline import ( resolve_clone_baseline_state, resolve_metrics_baseline_state, ) +from ._session_intent_mixin import _MCPSessionIntentMixin from ._session_shared import ( _REPORT_DUMMY_PATH, DEFAULT_BLOCK_MIN_LOC, @@ -54,7 +57,6 @@ process, report, ) -from ._session_state_mixin import _MCPSessionStateMixin __all__ = [ "DEFAULT_MCP_HISTORY_LIMIT", @@ -75,13 +77,19 @@ ] -class MCPSession(_MCPSessionStateMixin): +class MCPSession(_MCPSessionIntentMixin): def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._runs = CodeCloneMCPRunStore(history_limit=history_limit) self._state_lock = RLock() self._review_state: dict[str, OrderedDict[str, str | None]] = {} self._last_gate_results: dict[str, dict[str, object]] = {} self._spread_max_cache: dict[str, int] = {} + self._blast_radius_cache: dict[ + tuple[str, tuple[str, ...], str], + BlastRadiusResult, + ] = {} + self._active_intents: dict[str, IntentRecord] = {} + self._intent_sequence = 0 def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: self._validate_analysis_request(request) diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index a54070b6..863c926e 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -2,7 +2,7 @@ ## Purpose -Define the current public MCP surface in the CodeClone `2.0` release line. +Define the current public MCP surface in the CodeClone `2.1` release line. The MCP layer is optional, read-only, and built on the same canonical pipeline/report contracts as the CLI. It does not create a second analysis @@ -51,7 +51,7 @@ Current server characteristics: ## Tools -Current tool set: `21` tools. +Current tool set: `23` tools. The MCP surface is intentionally triage-first: analyze first, summarize/triage second, then drill into one finding or one hotspot family. @@ -64,6 +64,7 @@ second, then drill into one finding or one hotspot family. | `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `cache_policy` | Diff-aware analysis with changed-files projection over the same canonical run/report contract. | | `get_run_summary` | `run_id` | Cheapest run-level snapshot. Start here after analysis when you need health, findings, baseline/cache status, and inventory in compact form. | | `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first first-pass view over one stored run. | +| `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Derived pre-change risk boundary: direct dependents, clone cohorts, coverage gaps, risk signals, and do-not-touch paths. | | `help` | `topic`, `detail` | Bounded workflow/contract guidance for supported MCP topics. | | `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta view over findings and health; returns `incomparable` when roots/settings differ. | | `evaluate_gates` | `run_id`, gate flags, threshold overrides, `coverage_min` | Evaluate CI/gating decisions against a stored run without mutating process or repo state. | @@ -95,6 +96,7 @@ second, then drill into one finding or one hotspot family. |--------------------------|--------------------------------|-------------------------------------------------------------------------------------| | `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the current in-memory MCP session. | | `list_reviewed_findings` | `run_id` | Return reviewed markers currently held in process memory. | +| `manage_change_intent` | `action`, `run_id`, `intent_id`, `scope`, `changed_files` or `diff_ref` | Declare, inspect, check, or clear session-local change intent for governed edits. | | `clear_session_runs` | none | Clear in-memory run history and session-local review state for this server process. | ## Resources @@ -128,6 +130,9 @@ Resources are deterministic read-only projections over stored runs. provided it must also be absolute. - `git_diff_ref` is validated before any subprocess call. - Review markers are session-local in-memory state only. +- Change intent and blast-radius cache state are session-local in-memory state + only; they do not enter canonical report integrity, baseline, or cache + artifacts. - Run history is process-local and does not survive restart. - Missing optional MCP dependency is surfaced explicitly by the launcher. - `metrics_detail(family="security_surfaces")` exposes a compact, report-only diff --git a/docs/mcp.md b/docs/mcp.md index bdeda025..0fad642e 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -103,7 +103,7 @@ Run retention is bounded: default `4`, max `10` (`--history-limit`). If a tool request omits `processes`, MCP defers process-count policy to the core CodeClone runtime. -Current CodeClone `2.0` MCP surface: `21` tools, `7` fixed resources, and `3` +Current CodeClone `2.1` MCP surface: `23` tools, `7` fixed resources, and `3` run-scoped URI templates. ## Tool surface @@ -114,6 +114,7 @@ run-scoped URI templates. | `analyze_changed_paths` | Diff-aware analysis via `changed_paths` or `git_diff_ref`; compact changed-files snapshot | | `get_run_summary` | Cheapest run snapshot: health, findings, baseline, inventory, active thresholds | | `get_production_triage` | Production-first view: health, hotspots, suggestions, active thresholds; best first pass for noisy repos | +| `get_blast_radius` | Pre-change risk boundary: direct dependents, clone cohorts, coverage gaps, and do-not-touch paths | | `help` | Semantic guide for workflow, analysis profile, baseline, suppressions, review state, changed-scope | | `compare_runs` | Run-to-run delta: regressions, improvements, health change | | `list_findings` | Filtered, paginated findings; use after hotspots or `check_*` | @@ -130,6 +131,7 @@ run-scoped URI templates. | `generate_pr_summary` | PR-friendly markdown or JSON summary | | `mark_finding_reviewed` | Session-local review marker (in-memory) | | `list_reviewed_findings` | List reviewed findings for a run | +| `manage_change_intent` | Declare, inspect, check, or clear session-local edit scope intent | | `clear_session_runs` | Reset in-memory runs and session state | > `check_*` tools query stored runs only. Call `analyze_repository` or @@ -206,8 +208,9 @@ trigger analysis. `codeclone://latest/*` always resolves to the most recent run registered in the current MCP server session. A later `analyze_repository` or `analyze_changed_paths` call moves that pointer. -`mark_finding_reviewed` and `clear_session_runs` mutate only in-memory session -state. They never touch source files, baselines, cache, or report artifacts. +`mark_finding_reviewed`, `manage_change_intent`, and `clear_session_runs` +mutate only in-memory session state. They never touch source files, baselines, +cache, or report artifacts. ## Recommended workflows @@ -338,7 +341,8 @@ If `codeclone-mcp` is not on `PATH`, use an absolute path to the launcher. ## Security - Read-only by design: no source mutation, no baseline/cache writes. -- Run history and review markers are in-memory only — lost on process stop. +- Run history, review markers, and change intents are in-memory only — lost on + process stop. - Repository access is limited to what the server process can read locally. - `streamable-http` binds to loopback by default; `--allow-remote` is explicit opt-in. diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index 4cf9e3ba..46d8d2a5 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -1,25 +1,28 @@ [ { + "name": "analyze_changed_paths", "input_schema": { "properties": { - "analysis_mode": { - "default": "full", - "title": "Analysis Mode", + "root": { + "title": "Root", "type": "string" }, - "api_surface": { + "changed_paths": { "anyOf": [ { - "type": "boolean" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "title": "Api Surface" + "title": "Changed Paths" }, - "baseline_path": { + "git_diff_ref": { "anyOf": [ { "type": "string" @@ -29,9 +32,19 @@ } ], "default": null, - "title": "Baseline Path" + "title": "Git Diff Ref" }, - "block_min_loc": { + "analysis_mode": { + "default": "full", + "title": "Analysis Mode", + "type": "string" + }, + "respect_pyproject": { + "default": true, + "title": "Respect Pyproject", + "type": "boolean" + }, + "processes": { "anyOf": [ { "type": "integer" @@ -41,9 +54,9 @@ } ], "default": null, - "title": "Block Min Loc" + "title": "Processes" }, - "block_min_stmt": { + "min_loc": { "anyOf": [ { "type": "integer" @@ -53,41 +66,33 @@ } ], "default": null, - "title": "Block Min Stmt" + "title": "Min Loc" }, - "cache_path": { + "min_stmt": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Cache Path" - }, - "cache_policy": { - "default": "reuse", - "title": "Cache Policy", - "type": "string" + "title": "Min Stmt" }, - "changed_paths": { + "block_min_loc": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Changed Paths" + "title": "Block Min Loc" }, - "cohesion_threshold": { + "block_min_stmt": { "anyOf": [ { "type": "integer" @@ -97,9 +102,9 @@ } ], "default": null, - "title": "Cohesion Threshold" + "title": "Block Min Stmt" }, - "complexity_threshold": { + "segment_min_loc": { "anyOf": [ { "type": "integer" @@ -109,9 +114,9 @@ } ], "default": null, - "title": "Complexity Threshold" + "title": "Segment Min Loc" }, - "coupling_threshold": { + "segment_min_stmt": { "anyOf": [ { "type": "integer" @@ -121,19 +126,19 @@ } ], "default": null, - "title": "Coupling Threshold" + "title": "Segment Min Stmt" }, - "coverage_min": { + "api_surface": { "anyOf": [ { - "type": "integer" + "type": "boolean" }, { "type": "null" } ], "default": null, - "title": "Coverage Min" + "title": "Api Surface" }, "coverage_xml": { "anyOf": [ @@ -147,19 +152,19 @@ "default": null, "title": "Coverage Xml" }, - "git_diff_ref": { + "coverage_min": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Git Diff Ref" + "title": "Coverage Min" }, - "max_baseline_size_mb": { + "complexity_threshold": { "anyOf": [ { "type": "integer" @@ -169,9 +174,9 @@ } ], "default": null, - "title": "Max Baseline Size Mb" + "title": "Complexity Threshold" }, - "max_cache_size_mb": { + "coupling_threshold": { "anyOf": [ { "type": "integer" @@ -181,45 +186,45 @@ } ], "default": null, - "title": "Max Cache Size Mb" + "title": "Coupling Threshold" }, - "metrics_baseline_path": { + "cohesion_threshold": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Metrics Baseline Path" + "title": "Cohesion Threshold" }, - "min_loc": { + "baseline_path": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Min Loc" + "title": "Baseline Path" }, - "min_stmt": { + "metrics_baseline_path": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Min Stmt" + "title": "Metrics Baseline Path" }, - "processes": { + "max_baseline_size_mb": { "anyOf": [ { "type": "integer" @@ -229,30 +234,26 @@ } ], "default": null, - "title": "Processes" - }, - "respect_pyproject": { - "default": true, - "title": "Respect Pyproject", - "type": "boolean" + "title": "Max Baseline Size Mb" }, - "root": { - "title": "Root", + "cache_policy": { + "default": "reuse", + "title": "Cache Policy", "type": "string" }, - "segment_min_loc": { + "cache_path": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Segment Min Loc" + "title": "Cache Path" }, - "segment_min_stmt": { + "max_cache_size_mb": { "anyOf": [ { "type": "integer" @@ -262,7 +263,7 @@ } ], "default": null, - "title": "Segment Min Stmt" + "title": "Max Cache Size Mb" } }, "required": [ @@ -270,30 +271,42 @@ ], "title": "analyze_changed_pathsArguments", "type": "object" - }, - "name": "analyze_changed_paths" + } }, { + "name": "analyze_repository", "input_schema": { "properties": { + "root": { + "title": "Root", + "type": "string" + }, "analysis_mode": { "default": "full", "title": "Analysis Mode", "type": "string" }, - "api_surface": { + "respect_pyproject": { + "default": true, + "title": "Respect Pyproject", + "type": "boolean" + }, + "changed_paths": { "anyOf": [ { - "type": "boolean" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "title": "Api Surface" + "title": "Changed Paths" }, - "baseline_path": { + "git_diff_ref": { "anyOf": [ { "type": "string" @@ -303,9 +316,9 @@ } ], "default": null, - "title": "Baseline Path" + "title": "Git Diff Ref" }, - "block_min_loc": { + "processes": { "anyOf": [ { "type": "integer" @@ -315,9 +328,9 @@ } ], "default": null, - "title": "Block Min Loc" + "title": "Processes" }, - "block_min_stmt": { + "min_loc": { "anyOf": [ { "type": "integer" @@ -327,41 +340,33 @@ } ], "default": null, - "title": "Block Min Stmt" + "title": "Min Loc" }, - "cache_path": { + "min_stmt": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Cache Path" - }, - "cache_policy": { - "default": "reuse", - "title": "Cache Policy", - "type": "string" + "title": "Min Stmt" }, - "changed_paths": { + "block_min_loc": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Changed Paths" + "title": "Block Min Loc" }, - "cohesion_threshold": { + "block_min_stmt": { "anyOf": [ { "type": "integer" @@ -371,9 +376,9 @@ } ], "default": null, - "title": "Cohesion Threshold" + "title": "Block Min Stmt" }, - "complexity_threshold": { + "segment_min_loc": { "anyOf": [ { "type": "integer" @@ -383,9 +388,9 @@ } ], "default": null, - "title": "Complexity Threshold" + "title": "Segment Min Loc" }, - "coupling_threshold": { + "segment_min_stmt": { "anyOf": [ { "type": "integer" @@ -395,19 +400,19 @@ } ], "default": null, - "title": "Coupling Threshold" + "title": "Segment Min Stmt" }, - "coverage_min": { + "api_surface": { "anyOf": [ { - "type": "integer" + "type": "boolean" }, { "type": "null" } ], "default": null, - "title": "Coverage Min" + "title": "Api Surface" }, "coverage_xml": { "anyOf": [ @@ -421,19 +426,19 @@ "default": null, "title": "Coverage Xml" }, - "git_diff_ref": { + "coverage_min": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Git Diff Ref" + "title": "Coverage Min" }, - "max_baseline_size_mb": { + "complexity_threshold": { "anyOf": [ { "type": "integer" @@ -443,9 +448,9 @@ } ], "default": null, - "title": "Max Baseline Size Mb" + "title": "Complexity Threshold" }, - "max_cache_size_mb": { + "coupling_threshold": { "anyOf": [ { "type": "integer" @@ -455,45 +460,45 @@ } ], "default": null, - "title": "Max Cache Size Mb" + "title": "Coupling Threshold" }, - "metrics_baseline_path": { + "cohesion_threshold": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Metrics Baseline Path" + "title": "Cohesion Threshold" }, - "min_loc": { + "baseline_path": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Min Loc" + "title": "Baseline Path" }, - "min_stmt": { + "metrics_baseline_path": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Min Stmt" + "title": "Metrics Baseline Path" }, - "processes": { + "max_baseline_size_mb": { "anyOf": [ { "type": "integer" @@ -503,30 +508,26 @@ } ], "default": null, - "title": "Processes" - }, - "respect_pyproject": { - "default": true, - "title": "Respect Pyproject", - "type": "boolean" + "title": "Max Baseline Size Mb" }, - "root": { - "title": "Root", + "cache_policy": { + "default": "reuse", + "title": "Cache Policy", "type": "string" }, - "segment_min_loc": { + "cache_path": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Segment Min Loc" + "title": "Cache Path" }, - "segment_min_stmt": { + "max_cache_size_mb": { "anyOf": [ { "type": "integer" @@ -536,7 +537,7 @@ } ], "default": null, - "title": "Segment Min Stmt" + "title": "Max Cache Size Mb" } }, "required": [ @@ -544,13 +545,13 @@ ], "title": "analyze_repositoryArguments", "type": "object" - }, - "name": "analyze_repository" + } }, { + "name": "check_clones", "input_schema": { "properties": { - "clone_type": { + "run_id": { "anyOf": [ { "type": "string" @@ -560,19 +561,9 @@ } ], "default": null, - "title": "Clone Type" - }, - "detail_level": { - "default": "summary", - "title": "Detail Level", - "type": "string" - }, - "max_results": { - "default": 10, - "title": "Max Results", - "type": "integer" + "title": "Run Id" }, - "path": { + "root": { "anyOf": [ { "type": "string" @@ -582,9 +573,9 @@ } ], "default": null, - "title": "Path" + "title": "Root" }, - "root": { + "path": { "anyOf": [ { "type": "string" @@ -594,9 +585,9 @@ } ], "default": null, - "title": "Root" + "title": "Path" }, - "run_id": { + "clone_type": { "anyOf": [ { "type": "string" @@ -606,7 +597,7 @@ } ], "default": null, - "title": "Run Id" + "title": "Clone Type" }, "source_kind": { "anyOf": [ @@ -619,27 +610,27 @@ ], "default": null, "title": "Source Kind" + }, + "max_results": { + "default": 10, + "title": "Max Results", + "type": "integer" + }, + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" } }, "title": "check_clonesArguments", "type": "object" - }, - "name": "check_clones" + } }, { + "name": "check_cohesion", "input_schema": { "properties": { - "detail_level": { - "default": "summary", - "title": "Detail Level", - "type": "string" - }, - "max_results": { - "default": 10, - "title": "Max Results", - "type": "integer" - }, - "path": { + "run_id": { "anyOf": [ { "type": "string" @@ -649,7 +640,7 @@ } ], "default": null, - "title": "Path" + "title": "Run Id" }, "root": { "anyOf": [ @@ -663,7 +654,7 @@ "default": null, "title": "Root" }, - "run_id": { + "path": { "anyOf": [ { "type": "string" @@ -673,40 +664,40 @@ } ], "default": null, - "title": "Run Id" + "title": "Path" + }, + "max_results": { + "default": 10, + "title": "Max Results", + "type": "integer" + }, + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" } }, "title": "check_cohesionArguments", "type": "object" - }, - "name": "check_cohesion" + } }, { + "name": "check_complexity", "input_schema": { "properties": { - "detail_level": { - "default": "summary", - "title": "Detail Level", - "type": "string" - }, - "max_results": { - "default": 10, - "title": "Max Results", - "type": "integer" - }, - "min_complexity": { + "run_id": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Min Complexity" + "title": "Run Id" }, - "path": { + "root": { "anyOf": [ { "type": "string" @@ -716,9 +707,9 @@ } ], "default": null, - "title": "Path" + "title": "Root" }, - "root": { + "path": { "anyOf": [ { "type": "string" @@ -728,40 +719,40 @@ } ], "default": null, - "title": "Root" + "title": "Path" }, - "run_id": { + "min_complexity": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Run Id" + "title": "Min Complexity" + }, + "max_results": { + "default": 10, + "title": "Max Results", + "type": "integer" + }, + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" } }, "title": "check_complexityArguments", "type": "object" - }, - "name": "check_complexity" + } }, { + "name": "check_coupling", "input_schema": { "properties": { - "detail_level": { - "default": "summary", - "title": "Detail Level", - "type": "string" - }, - "max_results": { - "default": 10, - "title": "Max Results", - "type": "integer" - }, - "path": { + "run_id": { "anyOf": [ { "type": "string" @@ -771,7 +762,7 @@ } ], "default": null, - "title": "Path" + "title": "Run Id" }, "root": { "anyOf": [ @@ -785,7 +776,7 @@ "default": null, "title": "Root" }, - "run_id": { + "path": { "anyOf": [ { "type": "string" @@ -795,28 +786,28 @@ } ], "default": null, - "title": "Run Id" + "title": "Path" + }, + "max_results": { + "default": 10, + "title": "Max Results", + "type": "integer" + }, + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" } }, "title": "check_couplingArguments", "type": "object" - }, - "name": "check_coupling" + } }, { + "name": "check_dead_code", "input_schema": { "properties": { - "detail_level": { - "default": "normal", - "title": "Detail Level", - "type": "string" - }, - "max_results": { - "default": 10, - "title": "Max Results", - "type": "integer" - }, - "min_severity": { + "run_id": { "anyOf": [ { "type": "string" @@ -826,9 +817,9 @@ } ], "default": null, - "title": "Min Severity" + "title": "Run Id" }, - "path": { + "root": { "anyOf": [ { "type": "string" @@ -838,9 +829,9 @@ } ], "default": null, - "title": "Path" + "title": "Root" }, - "root": { + "path": { "anyOf": [ { "type": "string" @@ -850,9 +841,9 @@ } ], "default": null, - "title": "Root" + "title": "Path" }, - "run_id": { + "min_severity": { "anyOf": [ { "type": "string" @@ -862,28 +853,37 @@ } ], "default": null, - "title": "Run Id" + "title": "Min Severity" + }, + "max_results": { + "default": 10, + "title": "Max Results", + "type": "integer" + }, + "detail_level": { + "default": "normal", + "title": "Detail Level", + "type": "string" } }, "title": "check_dead_codeArguments", "type": "object" - }, - "name": "check_dead_code" + } }, { + "name": "clear_session_runs", "input_schema": { "properties": {}, "title": "clear_session_runsArguments", "type": "object" - }, - "name": "clear_session_runs" + } }, { + "name": "compare_runs", "input_schema": { "properties": { - "focus": { - "default": "all", - "title": "Focus", + "run_id_before": { + "title": "Run Id Before", "type": "string" }, "run_id_after": { @@ -898,8 +898,9 @@ "default": null, "title": "Run Id After" }, - "run_id_before": { - "title": "Run Id Before", + "focus": { + "default": "all", + "title": "Focus", "type": "string" } }, @@ -908,20 +909,32 @@ ], "title": "compare_runsArguments", "type": "object" - }, - "name": "compare_runs" + } }, { + "name": "evaluate_gates", "input_schema": { "properties": { - "coverage_min": { - "default": 50, - "title": "Coverage Min", - "type": "integer" + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" }, - "fail_cohesion": { + "fail_on_new": { + "default": false, + "title": "Fail On New", + "type": "boolean" + }, + "fail_threshold": { "default": -1, - "title": "Fail Cohesion", + "title": "Fail Threshold", "type": "integer" }, "fail_complexity": { @@ -934,6 +947,11 @@ "title": "Fail Coupling", "type": "integer" }, + "fail_cohesion": { + "default": -1, + "title": "Fail Cohesion", + "type": "integer" + }, "fail_cycles": { "default": false, "title": "Fail Cycles", @@ -949,29 +967,24 @@ "title": "Fail Health", "type": "integer" }, - "fail_on_api_break": { - "default": false, - "title": "Fail On Api Break", - "type": "boolean" - }, - "fail_on_docstring_regression": { + "fail_on_new_metrics": { "default": false, - "title": "Fail On Docstring Regression", + "title": "Fail On New Metrics", "type": "boolean" }, - "fail_on_new": { + "fail_on_typing_regression": { "default": false, - "title": "Fail On New", + "title": "Fail On Typing Regression", "type": "boolean" }, - "fail_on_new_metrics": { + "fail_on_docstring_regression": { "default": false, - "title": "Fail On New Metrics", + "title": "Fail On Docstring Regression", "type": "boolean" }, - "fail_on_typing_regression": { + "fail_on_api_break": { "default": false, - "title": "Fail On Typing Regression", + "title": "Fail On Api Break", "type": "boolean" }, "fail_on_untested_hotspots": { @@ -979,9 +992,9 @@ "title": "Fail On Untested Hotspots", "type": "boolean" }, - "fail_threshold": { + "min_typing_coverage": { "default": -1, - "title": "Fail Threshold", + "title": "Min Typing Coverage", "type": "integer" }, "min_docstring_coverage": { @@ -989,11 +1002,20 @@ "title": "Min Docstring Coverage", "type": "integer" }, - "min_typing_coverage": { - "default": -1, - "title": "Min Typing Coverage", + "coverage_min": { + "default": 50, + "title": "Coverage Min", "type": "integer" - }, + } + }, + "title": "evaluate_gatesArguments", + "type": "object" + } + }, + { + "name": "generate_pr_summary", + "input_schema": { + "properties": { "run_id": { "anyOf": [ { @@ -1005,16 +1027,7 @@ ], "default": null, "title": "Run Id" - } - }, - "title": "evaluate_gatesArguments", - "type": "object" - }, - "name": "evaluate_gates" - }, - { - "input_schema": { - "properties": { + }, "changed_paths": { "anyOf": [ { @@ -1030,11 +1043,6 @@ "default": null, "title": "Changed Paths" }, - "format": { - "default": "markdown", - "title": "Format", - "type": "string" - }, "git_diff_ref": { "anyOf": [ { @@ -1047,6 +1055,27 @@ "default": null, "title": "Git Diff Ref" }, + "format": { + "default": "markdown", + "title": "Format", + "type": "string" + } + }, + "title": "generate_pr_summaryArguments", + "type": "object" + } + }, + { + "name": "get_blast_radius", + "input_schema": { + "properties": { + "files": { + "items": { + "type": "string" + }, + "title": "Files", + "type": "array" + }, "run_id": { "anyOf": [ { @@ -1058,21 +1087,39 @@ ], "default": null, "title": "Run Id" + }, + "depth": { + "default": "direct", + "title": "Depth", + "type": "string" + }, + "include": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Include" } }, - "title": "generate_pr_summaryArguments", + "required": [ + "files" + ], + "title": "get_blast_radiusArguments", "type": "object" - }, - "name": "generate_pr_summary" + } }, { + "name": "get_finding", "input_schema": { "properties": { - "detail_level": { - "default": "normal", - "title": "Detail Level", - "type": "string" - }, "finding_id": { "title": "Finding Id", "type": "string" @@ -1088,6 +1135,11 @@ ], "default": null, "title": "Run Id" + }, + "detail_level": { + "default": "normal", + "title": "Detail Level", + "type": "string" } }, "required": [ @@ -1095,22 +1147,12 @@ ], "title": "get_findingArguments", "type": "object" - }, - "name": "get_finding" + } }, { + "name": "get_production_triage", "input_schema": { "properties": { - "max_hotspots": { - "default": 3, - "title": "Max Hotspots", - "type": "integer" - }, - "max_suggestions": { - "default": 3, - "title": "Max Suggestions", - "type": "integer" - }, "run_id": { "anyOf": [ { @@ -1122,21 +1164,26 @@ ], "default": null, "title": "Run Id" + }, + "max_hotspots": { + "default": 3, + "title": "Max Hotspots", + "type": "integer" + }, + "max_suggestions": { + "default": 3, + "title": "Max Suggestions", + "type": "integer" } }, "title": "get_production_triageArguments", "type": "object" - }, - "name": "get_production_triage" + } }, { + "name": "get_remediation", "input_schema": { "properties": { - "detail_level": { - "default": "normal", - "title": "Detail Level", - "type": "string" - }, "finding_id": { "title": "Finding Id", "type": "string" @@ -1152,6 +1199,11 @@ ], "default": null, "title": "Run Id" + }, + "detail_level": { + "default": "normal", + "title": "Detail Level", + "type": "string" } }, "required": [ @@ -1159,13 +1211,13 @@ ], "title": "get_remediationArguments", "type": "object" - }, - "name": "get_remediation" + } }, { + "name": "get_report_section", "input_schema": { "properties": { - "family": { + "run_id": { "anyOf": [ { "type": "string" @@ -1175,19 +1227,14 @@ } ], "default": null, - "title": "Family" - }, - "limit": { - "default": 50, - "title": "Limit", - "type": "integer" + "title": "Run Id" }, - "offset": { - "default": 0, - "title": "Offset", - "type": "integer" + "section": { + "default": "all", + "title": "Section", + "type": "string" }, - "path": { + "family": { "anyOf": [ { "type": "string" @@ -1197,9 +1244,9 @@ } ], "default": null, - "title": "Path" + "title": "Family" }, - "run_id": { + "path": { "anyOf": [ { "type": "string" @@ -1209,20 +1256,25 @@ } ], "default": null, - "title": "Run Id" + "title": "Path" }, - "section": { - "default": "all", - "title": "Section", - "type": "string" + "offset": { + "default": 0, + "title": "Offset", + "type": "integer" + }, + "limit": { + "default": 50, + "title": "Limit", + "type": "integer" } }, "title": "get_report_sectionArguments", "type": "object" - }, - "name": "get_report_section" + } }, { + "name": "get_run_summary", "input_schema": { "properties": { "run_id": { @@ -1240,20 +1292,20 @@ }, "title": "get_run_summaryArguments", "type": "object" - }, - "name": "get_run_summary" + } }, { + "name": "help", "input_schema": { "properties": { + "topic": { + "title": "Topic", + "type": "string" + }, "detail": { "default": "compact", "title": "Detail", "type": "string" - }, - "topic": { - "title": "Topic", - "type": "string" } }, "required": [ @@ -1261,13 +1313,13 @@ ], "title": "helpArguments", "type": "object" - }, - "name": "help" + } }, { + "name": "list_findings", "input_schema": { "properties": { - "category": { + "run_id": { "anyOf": [ { "type": "string" @@ -1277,39 +1329,26 @@ } ], "default": null, - "title": "Category" + "title": "Run Id" }, - "changed_paths": { + "family": { + "default": "all", + "title": "Family", + "type": "string" + }, + "category": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Changed Paths" - }, - "detail_level": { - "default": "summary", - "title": "Detail Level", - "type": "string" - }, - "exclude_reviewed": { - "default": false, - "title": "Exclude Reviewed", - "type": "boolean" - }, - "family": { - "default": "all", - "title": "Family", - "type": "string" + "title": "Category" }, - "git_diff_ref": { + "severity": { "anyOf": [ { "type": "string" @@ -1319,48 +1358,51 @@ } ], "default": null, - "title": "Git Diff Ref" - }, - "limit": { - "default": 50, - "title": "Limit", - "type": "integer" + "title": "Severity" }, - "max_results": { + "source_kind": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Max Results" + "title": "Source Kind" }, "novelty": { "default": "all", "title": "Novelty", "type": "string" }, - "offset": { - "default": 0, - "title": "Offset", - "type": "integer" + "sort_by": { + "default": "default", + "title": "Sort By", + "type": "string" }, - "run_id": { + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" + }, + "changed_paths": { "anyOf": [ { - "type": "string" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "title": "Run Id" + "title": "Changed Paths" }, - "severity": { + "git_diff_ref": { "anyOf": [ { "type": "string" @@ -1370,58 +1412,79 @@ } ], "default": null, - "title": "Severity" + "title": "Git Diff Ref" }, - "sort_by": { - "default": "default", - "title": "Sort By", - "type": "string" + "exclude_reviewed": { + "default": false, + "title": "Exclude Reviewed", + "type": "boolean" }, - "source_kind": { + "offset": { + "default": 0, + "title": "Offset", + "type": "integer" + }, + "limit": { + "default": 50, + "title": "Limit", + "type": "integer" + }, + "max_results": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Source Kind" + "title": "Max Results" } }, "title": "list_findingsArguments", "type": "object" - }, - "name": "list_findings" + } }, { + "name": "list_hotspots", "input_schema": { "properties": { - "changed_paths": { + "kind": { + "title": "Kind", + "type": "string" + }, + "run_id": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Changed Paths" + "title": "Run Id" }, "detail_level": { "default": "summary", "title": "Detail Level", "type": "string" }, - "exclude_reviewed": { - "default": false, - "title": "Exclude Reviewed", - "type": "boolean" + "changed_paths": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Changed Paths" }, "git_diff_ref": { "anyOf": [ @@ -1435,9 +1498,10 @@ "default": null, "title": "Git Diff Ref" }, - "kind": { - "title": "Kind", - "type": "string" + "exclude_reviewed": { + "default": false, + "title": "Exclude Reviewed", + "type": "boolean" }, "limit": { "default": 10, @@ -1455,7 +1519,19 @@ ], "default": null, "title": "Max Results" - }, + } + }, + "required": [ + "kind" + ], + "title": "list_hotspotsArguments", + "type": "object" + } + }, + { + "name": "list_reviewed_findings", + "input_schema": { + "properties": { "run_id": { "anyOf": [ { @@ -1469,17 +1545,18 @@ "title": "Run Id" } }, - "required": [ - "kind" - ], - "title": "list_hotspotsArguments", + "title": "list_reviewed_findingsArguments", "type": "object" - }, - "name": "list_hotspots" + } }, { + "name": "manage_change_intent", "input_schema": { "properties": { + "action": { + "title": "Action", + "type": "string" + }, "run_id": { "anyOf": [ { @@ -1491,21 +1568,103 @@ ], "default": null, "title": "Run Id" + }, + "intent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Intent Id" + }, + "scope": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Scope" + }, + "intent": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Intent" + }, + "expected_effects": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Expected Effects" + }, + "diff_ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Diff Ref" + }, + "changed_files": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Changed Files" } }, - "title": "list_reviewed_findingsArguments", + "required": [ + "action" + ], + "title": "manage_change_intentArguments", "type": "object" - }, - "name": "list_reviewed_findings" + } }, { + "name": "mark_finding_reviewed", "input_schema": { "properties": { "finding_id": { "title": "Finding Id", "type": "string" }, - "note": { + "run_id": { "anyOf": [ { "type": "string" @@ -1515,9 +1674,9 @@ } ], "default": null, - "title": "Note" + "title": "Run Id" }, - "run_id": { + "note": { "anyOf": [ { "type": "string" @@ -1527,7 +1686,7 @@ } ], "default": null, - "title": "Run Id" + "title": "Note" } }, "required": [ @@ -1535,7 +1694,6 @@ ], "title": "mark_finding_reviewedArguments", "type": "object" - }, - "name": "mark_finding_reviewed" + } } ] diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 7106b833..85c6fcd5 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -124,6 +124,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "help", "get_run_summary", "get_production_triage", + "get_blast_radius", "evaluate_gates", "get_report_section", "list_findings", @@ -139,6 +140,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "generate_pr_summary", "mark_finding_reviewed", "list_reviewed_findings", + "manage_change_intent", } for name, tool in tools.items(): assert tool.annotations is not None @@ -154,6 +156,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "check_dead_code", "get_run_summary", "get_production_triage", + "get_blast_radius", "evaluate_gates", "help", "get_report_section", @@ -167,7 +170,12 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: } ) assert tool.annotations.destructiveHint is ( - name in {"mark_finding_reviewed", "clear_session_runs"} + name + in { + "mark_finding_reviewed", + "manage_change_intent", + "clear_session_runs", + } ) assert tool.annotations.idempotentHint is True assert "cache_policy='off'" in str(tools["analyze_repository"].description) @@ -189,6 +197,8 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: assert "default first-pass review" in str( tools["get_production_triage"].description ) + assert "structural risk boundary" in str(tools["get_blast_radius"].description) + assert "Intent is session-local" in str(tools["manage_change_intent"].description) assert "bounded guidance, not a full manual" in str(tools["help"].description) assert "workflow, analysis_profile, suppressions, baseline" in str( tools["help"].description @@ -326,6 +336,46 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: assert production_triage["run_id"] == run_id assert _mapping_child(production_triage, "cache")["freshness"] + blast_radius = _structured_tool_result( + asyncio.run( + server.call_tool( + "get_blast_radius", + {"run_id": run_id, "files": ["pkg/dup.py"]}, + ) + ) + ) + assert blast_radius["origin"] == ["pkg/dup.py"] + assert blast_radius["radius_level"] in {"low", "medium", "high"} + + change_intent = _structured_tool_result( + asyncio.run( + server.call_tool( + "manage_change_intent", + { + "action": "declare", + "run_id": run_id, + "scope": {"allowed_files": ["pkg/dup.py"]}, + "intent": "review duplicate fixture", + }, + ) + ) + ) + intent_id = str(change_intent["intent_id"]) + intent_check = _structured_tool_result( + asyncio.run( + server.call_tool( + "manage_change_intent", + { + "action": "check", + "intent_id": intent_id, + "changed_files": ["pkg/dup.py"], + }, + ) + ) + ) + assert change_intent["status"] == "active" + assert intent_check["status"] == "clean" + latest_report_resource = list( asyncio.run(server.read_resource("codeclone://latest/report.json")) ) @@ -576,6 +626,7 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: asyncio.run(server.call_tool("clear_session_runs", {})) ) assert cast(int, cleared["cleared_runs"]) >= 1 + assert cast(int, cleared["cleared_intents"]) >= 1 assert run_id in cast("list[str]", cleared["cleared_run_ids"]) from mcp.server.fastmcp.exceptions import ResourceError diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 40ed9ef4..c10c4e3f 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -18,6 +18,7 @@ import pytest +import codeclone.surfaces.mcp._blast_radius as mcp_blast_radius_mod import codeclone.surfaces.mcp._session_baseline as mcp_baseline_mod import codeclone.surfaces.mcp._session_finding_mixin as mcp_finding_mod import codeclone.surfaces.mcp._session_helpers as mcp_helpers_mod @@ -172,6 +173,147 @@ def _dummy_run_record(root: Path, run_id: str) -> MCPRunRecord: ) +def _blast_radius_report_document(digest: str = "digest-a") -> dict[str, object]: + return { + "integrity": { + "digest": { + "value": digest, + "algorithm": "sha256", + "verified": True, + } + }, + "inventory": { + "file_registry": { + "items": [ + "pkg/a.py", + "pkg/b.py", + "pkg/c.py", + "pkg/clone_peer.py", + "tests/test_a.py", + ] + } + }, + "metrics": { + "families": { + "dependencies": { + "items": [ + { + "source": "pkg.b", + "target": "pkg.a", + "import_type": "import", + "line": 1, + }, + { + "source": "pkg.c", + "target": "pkg.b", + "import_type": "from_import", + "line": 2, + }, + ], + "cycles": [["pkg.a", "pkg.b"]], + "longest_chains": [], + }, + "complexity": { + "items": [ + { + "relative_path": "pkg/b.py", + "risk": "high", + "cyclomatic_complexity": 25, + } + ] + }, + "coupling": { + "items": [ + { + "relative_path": "pkg/c.py", + "risk": "high", + "cbo": 12, + } + ] + }, + "coverage_join": { + "items": [ + { + "relative_path": "pkg/a.py", + "coverage_hotspot": True, + "scope_gap_hotspot": False, + } + ] + }, + "overloaded_modules": { + "items": [ + { + "module": "pkg.b", + "relative_path": "pkg/b.py", + "candidate_status": "candidate", + } + ] + }, + "security_surfaces": { + "items": [ + { + "relative_path": "pkg/c.py", + "category": "network_boundary", + } + ] + }, + } + }, + "findings": { + "groups": { + "clones": { + "functions": [ + { + "id": "clone:function:g1", + "family": "clone", + "category": "function", + "novelty": "new", + "items": [ + {"relative_path": "pkg/a.py"}, + {"relative_path": "pkg/clone_peer.py"}, + ], + }, + { + "id": "clone:function:g2", + "family": "clone", + "category": "function", + "novelty": "known", + "items": [{"relative_path": "pkg/c.py"}], + }, + ], + "blocks": [], + "segments": [], + "suppressed": { + "function": [ + { + "id": "clone:function:fixture", + "items": [{"relative_path": "tests/test_a.py"}], + } + ], + "block": [], + "segment": [], + }, + }, + "design": {"groups": []}, + "dead_code": {"groups": []}, + "structural": {"groups": []}, + } + }, + } + + +def _blast_radius_run_record( + root: Path, + run_id: str = "abcdef1234567890", + *, + digest: str = "digest-a", +) -> MCPRunRecord: + return replace( + _dummy_run_record(root, run_id), + report_document=_blast_radius_report_document(digest), + ) + + def _two_clone_fixture_roots(tmp_path: Path) -> tuple[Path, Path]: first_root = tmp_path / "first" second_root = tmp_path / "second" @@ -1935,6 +2077,159 @@ def _raise_subprocess(*args: object, **kwargs: object) -> object: mcp_shared_mod.CodeCloneMCPRunStore(history_limit=11) +def test_mcp_blast_radius_projection_is_deterministic() -> None: + report_document = _blast_radius_report_document() + + direct = mcp_blast_radius_mod.compute_blast_radius( + run_id="abcdef12", + report_document=report_document, + files=("pkg/a.py",), + depth="direct", + ) + transitive = mcp_blast_radius_mod.compute_blast_radius( + run_id="abcdef12", + report_document=report_document, + files=("pkg/a.py",), + depth="transitive", + ) + + assert direct.direct_dependents == ("pkg/b.py",) + assert direct.transitive_dependents == () + assert direct.clone_cohort_members == ("pkg/clone_peer.py",) + assert direct.in_dependency_cycle == ("pkg/a.py",) + assert direct.radius_level == "medium" + assert direct.structural_risk["high_complexity_in_blast_zone"] == ["pkg/b.py"] + assert direct.structural_risk["low_coverage_in_blast_zone"] == ["pkg/a.py"] + assert transitive.direct_dependents == ("pkg/b.py",) + assert transitive.transitive_dependents == ("pkg/c.py",) + assert direct.to_payload(include=("do_not_touch",))["direct_dependents"] == [] + + +def test_mcp_service_get_blast_radius_uses_cache_and_include_filter( + tmp_path: Path, +) -> None: + service = CodeCloneMCPService(history_limit=2) + record = _blast_radius_run_record(tmp_path) + service._runs.register(record) + + first = service.get_blast_radius(files=("pkg/a.py",), depth="transitive") + second = service.get_blast_radius( + files=("pkg/a.py",), + depth="transitive", + include=("do_not_touch",), + ) + + assert first["run_id"] == "abcdef12" + assert first["origin"] == ["pkg/a.py"] + assert first["direct_dependents"] == ["pkg/b.py"] + assert first["transitive_dependents"] == ["pkg/c.py"] + assert first["clone_cohort_members"] == ["pkg/clone_peer.py"] + assert cast(dict[str, object], first["structural_risk"])[ + "overloaded_modules_in_blast_zone" + ] == ["pkg/b.py"] + assert second["direct_dependents"] == [] + assert cast("list[dict[str, object]]", second["do_not_touch"]) + assert len(service._blast_radius_cache) == 1 + with pytest.raises(MCPServiceContractError, match="requires at least one file"): + service.get_blast_radius(files=()) + with pytest.raises(MCPServiceContractError, match="Invalid value for depth"): + service.get_blast_radius(files=("pkg/a.py",), depth="full") + + +def test_mcp_service_manage_change_intent_lifecycle(tmp_path: Path) -> None: + service = CodeCloneMCPService(history_limit=2) + record = _blast_radius_run_record(tmp_path) + service._runs.register(record) + + declared = service.manage_change_intent( + action="declare", + run_id="abcdef12", + scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": ["tests/test_a.py"], + "forbidden": ["pkg/c.py"], + }, + intent="adjust pkg.a behavior", + expected_effects=["no new clone group", "no baseline update"], + ) + intent_id = str(declared["intent_id"]) + + assert declared["status"] == "active" + assert cast(dict[str, object], declared["scope"])["forbidden"] == [ + ".cache/codeclone/**", + "codeclone.baseline.json", + "pkg/c.py", + ] + assert ( + cast(dict[str, object], declared["blast_radius_summary"])[ + "direct_dependents_count" + ] + == 1 + ) + assert cast("list[dict[str, object]]", declared["do_not_touch"]) + + fetched = service.manage_change_intent(action="get", intent_id=intent_id) + assert fetched["intent_id"] == intent_id + expanded = service.manage_change_intent( + action="check", + intent_id=intent_id, + changed_files=["pkg/a.py", "tests/test_a.py"], + ) + assert expanded["status"] == "expanded" + assert expanded["required_action"] is None + + violated = service.manage_change_intent( + action="check", + intent_id=intent_id, + changed_files=["pkg/a.py", "pkg/c.py", "pkg/unplanned.py"], + ) + assert violated["status"] == "violated" + assert violated["required_action"] == "human_approval" + assert violated["forbidden_touched"] == ["pkg/c.py"] + assert violated["unexpected_files"] == ["pkg/unplanned.py"] + + cleared = service.manage_change_intent(action="clear", intent_id=intent_id) + assert cleared == {"cleared": 1, "cleared_intent_ids": [intent_id]} + with pytest.raises(MCPServiceContractError, match="No active change intent"): + service.manage_change_intent(action="get", run_id="abcdef12") + + +def test_mcp_service_manage_change_intent_validation_expiry_and_prune( + tmp_path: Path, +) -> None: + service = CodeCloneMCPService(history_limit=1) + record = _blast_radius_run_record(tmp_path) + service._runs.register(record) + + with pytest.raises(MCPServiceContractError, match="allowed_files"): + service.manage_change_intent( + action="declare", + scope={"allowed_files": []}, + intent="missing scope", + ) + with pytest.raises(MCPServiceContractError, match="Invalid value for action"): + service.manage_change_intent(action="unknown") + + declared = service.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent="change pkg.a", + ) + intent_id = str(declared["intent_id"]) + with pytest.raises(MCPServiceContractError, match="requires diff_ref"): + service.manage_change_intent(action="check", intent_id=intent_id) + + service._runs.register(_blast_radius_run_record(tmp_path, digest="digest-b")) + expired = service.manage_change_intent(action="get", intent_id=intent_id) + assert expired["status"] == "expired" + + service._runs.register( + _blast_radius_run_record(tmp_path, run_id="fedcba9876543210") + ) + service._prune_session_state() + assert service._active_intents == {} + + def test_mcp_service_branch_helpers_on_real_runs( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, @@ -2980,12 +3275,32 @@ def test_mcp_service_clear_session_runs_clears_in_memory_state(tmp_path: Path) - note="triaged", ) service.evaluate_gates(MCPGateRequest(run_id=run_id, fail_threshold=0)) + service.get_blast_radius(files=("pkg/dup.py",), run_id=run_id) + service.manage_change_intent( + action="declare", + run_id=run_id, + scope={"allowed_files": ["pkg/dup.py"]}, + intent="touch duplicate fixture", + ) cleared = service.clear_session_runs() - assert cleared["cleared_runs"] == 1 - assert cleared["cleared_review_entries"] == 1 - assert cleared["cleared_gate_results"] == 1 + assert { + key: cleared[key] + for key in ( + "cleared_runs", + "cleared_review_entries", + "cleared_gate_results", + "cleared_blast_radius_entries", + "cleared_intents", + ) + } == { + "cleared_runs": 1, + "cleared_review_entries": 1, + "cleared_gate_results": 1, + "cleared_blast_radius_entries": 1, + "cleared_intents": 1, + } with pytest.raises(MCPRunNotFoundError): service.get_run_summary() From 84d46885b3ef977ff31c58fddfd6d0765b8fc7a8 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 22 May 2026 20:14:21 +0500 Subject: [PATCH 003/318] feat(mcp): separate blast radius review context --- CHANGELOG.md | 3 +- codeclone/surfaces/mcp/_blast_radius.py | 177 ++++++++++++++---- .../surfaces/mcp/_session_intent_mixin.py | 6 + codeclone/surfaces/mcp/server.py | 5 +- docs/book/20-mcp-interface.md | 40 ++-- docs/mcp.md | 57 +++--- tests/test_mcp_server.py | 3 + tests/test_mcp_service.py | 106 ++++++++++- 8 files changed, 313 insertions(+), 84 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f5f6fe4..5c0a7951 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ - Add MCP `get_blast_radius` as a deterministic pre-change projection over the canonical report: direct dependents, clone cohorts, dependency-cycle - membership, coverage/risk signals, and do-not-touch paths. + membership, coverage/risk signals, actionable do-not-touch paths, and + bounded review-only context. - Add MCP `manage_change_intent` for session-local change intent lifecycle: declare intended scope, inspect active intent, check actual changed files against scope, and clear intent state. diff --git a/codeclone/surfaces/mcp/_blast_radius.py b/codeclone/surfaces/mcp/_blast_radius.py index 05b31b44..5df8f34b 100644 --- a/codeclone/surfaces/mcp/_blast_radius.py +++ b/codeclone/surfaces/mcp/_blast_radius.py @@ -9,7 +9,6 @@ from collections import deque from collections.abc import Mapping, Sequence from dataclasses import dataclass -from fnmatch import fnmatchcase from typing import Final, Literal BlastRadiusDepth = Literal["direct", "transitive"] @@ -19,6 +18,7 @@ "coverage", "risk_signals", "do_not_touch", + "review_context", "cycles", ] @@ -30,6 +30,7 @@ "coverage", "risk_signals", "do_not_touch", + "review_context", "cycles", } ) @@ -39,12 +40,14 @@ "coverage", "risk_signals", "do_not_touch", + "review_context", "cycles", ) DEFAULT_DO_NOT_TOUCH_PATTERNS: Final[tuple[str, ...]] = ( "codeclone.baseline.json", ".cache/codeclone/**", ) +MAX_CONTEXT_ITEMS: Final[int] = 20 @dataclass(frozen=True, slots=True) @@ -59,6 +62,7 @@ class BlastRadiusResult: in_dependency_cycle: tuple[str, ...] structural_risk: dict[str, list[str]] do_not_touch: tuple[dict[str, str], ...] + review_context: tuple[dict[str, str], ...] guardrails: tuple[str, ...] def to_payload( @@ -79,6 +83,10 @@ def to_payload( "overloaded_modules_in_blast_zone", ): structural_risk.pop(key, None) + do_not_touch = self.do_not_touch if "do_not_touch" in include_set else () + review_context = self.review_context if "review_context" in include_set else () + do_not_touch_payload = _bounded_entries(do_not_touch) + review_context_payload = _bounded_entries(review_context) return { "run_id": self.run_id, "origin": list(self.origin), @@ -101,10 +109,15 @@ def to_payload( list(self.in_dependency_cycle) if "cycles" in include_set else [] ), "structural_risk": structural_risk, - "do_not_touch": ( - [dict(item) for item in self.do_not_touch] - if "do_not_touch" in include_set - else [] + "do_not_touch": do_not_touch_payload, + "do_not_touch_summary": _entry_summary( + entries=do_not_touch, + shown=len(do_not_touch_payload), + ), + "review_context": review_context_payload, + "review_context_summary": _entry_summary( + entries=review_context, + shown=len(review_context_payload), ), "guardrails": list(self.guardrails), } @@ -164,8 +177,38 @@ def _dedupe_sorted(values: Sequence[str] | set[str]) -> tuple[str, ...]: return tuple(sorted({value for value in values if value})) -def _path_matches_glob(path: str, patterns: Sequence[str]) -> bool: - return any(fnmatchcase(path, pattern) for pattern in patterns) +def _bounded_entries( + entries: Sequence[Mapping[str, str]], + *, + limit: int = MAX_CONTEXT_ITEMS, +) -> list[dict[str, str]]: + return [dict(item) for item in entries[:limit]] + + +def _count_by_field( + entries: Sequence[Mapping[str, str]], + *, + field: str, +) -> dict[str, int]: + counts: dict[str, int] = {} + for entry in entries: + key = str(entry.get(field, "")).strip() or "unknown" + counts[key] = counts.get(key, 0) + 1 + return dict(sorted(counts.items(), key=lambda item: (-item[1], item[0]))) + + +def _entry_summary( + *, + entries: Sequence[Mapping[str, str]], + shown: int, +) -> dict[str, object]: + return { + "total": len(entries), + "shown": shown, + "truncated": shown < len(entries), + "top_categories": _count_by_field(entries, field="category"), + "top_reasons": _count_by_field(entries, field="reason"), + } def _item_path(item: Mapping[str, object]) -> str: @@ -444,80 +487,141 @@ def _all_finding_groups( return tuple(result) -def _append_do_not_touch( - entries: dict[str, str], +def _append_boundary_entry( + entries: dict[str, dict[str, str]], *, path: str, reason: str, + category: str, + severity: str, ) -> None: if not path: return - entries.setdefault(path, reason) + entries.setdefault( + path, + { + "path": path, + "reason": reason, + "category": category, + "severity": severity, + }, + ) -def _compute_do_not_touch( +def _append_review_entry( + entries: dict[tuple[str, str, str], dict[str, str]], + *, + path: str, + reason: str, + category: str, + severity: str = "context", +) -> None: + if not path: + return + entries.setdefault( + (path, category, reason), + { + "path": path, + "reason": reason, + "category": category, + "severity": severity, + }, + ) + + +def _compute_change_boundaries( *, report_document: Mapping[str, object], origin_paths: Sequence[str], blast_zone_paths: set[str], forbidden_patterns: Sequence[str], allowed_scope: Sequence[str] = (), -) -> tuple[dict[str, str], ...]: - entries: dict[str, str] = {} +) -> tuple[tuple[dict[str, str], ...], tuple[dict[str, str], ...]]: + do_not_touch_entries: dict[str, dict[str, str]] = {} + review_entries: dict[tuple[str, str, str], dict[str, str]] = {} origin_set = set(origin_paths) allowed_set = set(allowed_scope) for pattern in DEFAULT_DO_NOT_TOUCH_PATTERNS: - _append_do_not_touch( - entries, + _append_boundary_entry( + do_not_touch_entries, path=pattern, reason=( "baseline, cache, and generated CodeClone state require explicit " "separate changes" ), + category="baseline_or_generated_state", + severity="hard", ) for pattern in forbidden_patterns: - _append_do_not_touch(entries, path=pattern, reason="declared forbidden path") + _append_boundary_entry( + do_not_touch_entries, + path=pattern, + reason="declared forbidden path", + category="explicit_forbidden", + severity="hard", + ) for group in _all_finding_groups(report_document): if str(group.get("novelty", "")).strip() != "known": continue for path in _finding_paths(group): - if path not in origin_set: - _append_do_not_touch( - entries, + if path in blast_zone_paths and path not in origin_set: + _append_review_entry( + review_entries, path=path, reason="known baseline debt outside declared origin", + category="known_baseline_debt", ) for group in _suppressed_clone_buckets(report_document): for path in _finding_paths(group): - _append_do_not_touch( - entries, - path=path, - reason="golden fixture clone suppression surface", - ) + if path in blast_zone_paths: + _append_review_entry( + review_entries, + path=path, + reason="golden fixture clone suppression surface", + category="golden_fixture_surface", + ) metrics = _as_mapping(report_document.get("metrics")) families = _as_mapping(metrics.get("families")) - for family_name, reason in ( - ("security_surfaces", "report-only security boundary inventory"), - ("overloaded_modules", "report-only design signal"), + for family_name, reason, category in ( + ( + "security_surfaces", + "report-only security boundary inventory", + "security_boundary_context", + ), + ("overloaded_modules", "report-only design signal", "report_only_context"), ): family = _as_mapping(families.get(family_name)) for raw_item in _as_sequence(family.get("items")): path = _item_path(_as_mapping(raw_item)) - if path and path not in origin_set: - _append_do_not_touch(entries, path=path, reason=reason) + if path in blast_zone_paths and path not in origin_set: + _append_review_entry( + review_entries, + path=path, + reason=reason, + category=category, + ) if allowed_set: for path in blast_zone_paths: if path not in allowed_set: - _append_do_not_touch( - entries, + _append_boundary_entry( + do_not_touch_entries, path=path, reason="affected by blast radius but outside declared edit scope", + category="affected_but_not_allowed", + severity="requires_expansion", ) - return tuple( - {"path": path, "reason": entries[path]} - for path in sorted(entries) - if path and (_path_matches_glob(path, forbidden_patterns) or path in entries) + do_not_touch = tuple( + do_not_touch_entries[path] for path in sorted(do_not_touch_entries) if path + ) + review_context = tuple( + entry + for entry in sorted( + review_entries.values(), + key=lambda item: (item["path"], item["category"], item["reason"]), + ) + if entry["path"] not in do_not_touch_entries ) + return do_not_touch, review_context def _guardrails( @@ -602,7 +706,7 @@ def compute_blast_radius( report_document=report_document, blast_zone_paths=zone, ) - do_not_touch = _compute_do_not_touch( + do_not_touch, review_context = _compute_change_boundaries( report_document=report_document, origin_paths=origin_paths, blast_zone_paths=zone, @@ -620,5 +724,6 @@ def compute_blast_radius( in_dependency_cycle=dependency_cycle_members, structural_risk=risk, do_not_touch=do_not_touch, + review_context=review_context, guardrails=_guardrails(radius_level=radius_level, do_not_touch=do_not_touch), ) diff --git a/codeclone/surfaces/mcp/_session_intent_mixin.py b/codeclone/surfaces/mcp/_session_intent_mixin.py index 5b477fa6..4f3d683a 100644 --- a/codeclone/surfaces/mcp/_session_intent_mixin.py +++ b/codeclone/surfaces/mcp/_session_intent_mixin.py @@ -134,6 +134,9 @@ def _declare_change_intent( short_run_id=_helpers._short_run_id(record.run_id) ) payload["do_not_touch"] = blast_payload["do_not_touch"] + payload["do_not_touch_summary"] = blast_payload["do_not_touch_summary"] + payload["review_context"] = blast_payload["review_context"] + payload["review_context_summary"] = blast_payload["review_context_summary"] return payload def _check_change_intent( @@ -284,6 +287,9 @@ def _blast_radius_summary( ) ), "do_not_touch_count": len(_as_sequence(blast_payload.get("do_not_touch"))), + "review_context_count": len( + _as_sequence(blast_payload.get("review_context")) + ), } def _intent_check_result( diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 1392437a..9e8ee3df 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -354,8 +354,9 @@ def get_production_triage( description=( "Return the deterministic structural risk boundary for changing " "the given files. Shows direct dependents, clone cohort members, " - "coverage gaps, and do-not-touch paths. Derived from the canonical " - "report; no new analysis is performed." + "coverage gaps, actionable do-not-touch paths, and review-only " + "context. Derived from the canonical report; no new analysis is " + "performed." ), annotations=read_only_tool, structured_output=True, diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index 863c926e..36cecce3 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -56,18 +56,26 @@ Current tool set: `23` tools. The MCP surface is intentionally triage-first: analyze first, summarize/triage second, then drill into one finding or one hotspot family. +`get_blast_radius` keeps hard guardrails separate from review context. +`do_not_touch` is limited to actionable negative context such as baselines, +generated CodeClone state, explicit forbidden paths, or files affected by the +blast radius but outside the declared edit scope. Report-only signals such as +security boundary inventory and overloaded-module candidates are returned as +`review_context`, not as edit prohibitions. Long context sections include +`total`, `shown`, and `truncated` summaries. + ### Analysis and run-level tools -| Tool | Key parameters | Purpose | -|-------------------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------| -| `analyze_repository` | `root`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `baseline_path`, `metrics_baseline_path`, `cache_policy` | Full deterministic analysis of one repo root; registers the latest in-memory run. | -| `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `cache_policy` | Diff-aware analysis with changed-files projection over the same canonical run/report contract. | -| `get_run_summary` | `run_id` | Cheapest run-level snapshot. Start here after analysis when you need health, findings, baseline/cache status, and inventory in compact form. | -| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first first-pass view over one stored run. | -| `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Derived pre-change risk boundary: direct dependents, clone cohorts, coverage gaps, risk signals, and do-not-touch paths. | -| `help` | `topic`, `detail` | Bounded workflow/contract guidance for supported MCP topics. | -| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta view over findings and health; returns `incomparable` when roots/settings differ. | -| `evaluate_gates` | `run_id`, gate flags, threshold overrides, `coverage_min` | Evaluate CI/gating decisions against a stored run without mutating process or repo state. | +| Tool | Key parameters | Purpose | +|-------------------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------| +| `analyze_repository` | `root`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `baseline_path`, `metrics_baseline_path`, `cache_policy` | Full deterministic analysis of one repo root; registers the latest in-memory run. | +| `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `cache_policy` | Diff-aware analysis with changed-files projection over the same canonical run/report contract. | +| `get_run_summary` | `run_id` | Cheapest run-level snapshot. Start here after analysis when you need health, findings, baseline/cache status, and inventory in compact form. | +| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first first-pass view over one stored run. | +| `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Derived pre-change risk boundary: direct dependents, clone cohorts, coverage gaps, risk signals, actionable do-not-touch paths, and review-only context. | +| `help` | `topic`, `detail` | Bounded workflow/contract guidance for supported MCP topics. | +| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta view over findings and health; returns `incomparable` when roots/settings differ. | +| `evaluate_gates` | `run_id`, gate flags, threshold overrides, `coverage_min` | Evaluate CI/gating decisions against a stored run without mutating process or repo state. | ### Report and finding projection tools @@ -92,12 +100,12 @@ second, then drill into one finding or one hotspot family. ### Session-local tools -| Tool | Key parameters | Purpose | -|--------------------------|--------------------------------|-------------------------------------------------------------------------------------| -| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the current in-memory MCP session. | -| `list_reviewed_findings` | `run_id` | Return reviewed markers currently held in process memory. | -| `manage_change_intent` | `action`, `run_id`, `intent_id`, `scope`, `changed_files` or `diff_ref` | Declare, inspect, check, or clear session-local change intent for governed edits. | -| `clear_session_runs` | none | Clear in-memory run history and session-local review state for this server process. | +| Tool | Key parameters | Purpose | +|--------------------------|-------------------------------------------------------------------------|-------------------------------------------------------------------------------------| +| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the current in-memory MCP session. | +| `list_reviewed_findings` | `run_id` | Return reviewed markers currently held in process memory. | +| `manage_change_intent` | `action`, `run_id`, `intent_id`, `scope`, `changed_files` or `diff_ref` | Declare, inspect, check, or clear session-local change intent for governed edits. | +| `clear_session_runs` | none | Clear in-memory run history and session-local review state for this server process. | ## Resources diff --git a/docs/mcp.md b/docs/mcp.md index 0fad642e..61c2d5ce 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -108,31 +108,31 @@ run-scoped URI templates. ## Tool surface -| Tool | Purpose | -|--------------------------|----------------------------------------------------------------------------------------------------------| -| `analyze_repository` | Full analysis → compact summary; use `get_run_summary` or `get_production_triage` as the first pass | -| `analyze_changed_paths` | Diff-aware analysis via `changed_paths` or `git_diff_ref`; compact changed-files snapshot | -| `get_run_summary` | Cheapest run snapshot: health, findings, baseline, inventory, active thresholds | -| `get_production_triage` | Production-first view: health, hotspots, suggestions, active thresholds; best first pass for noisy repos | -| `get_blast_radius` | Pre-change risk boundary: direct dependents, clone cohorts, coverage gaps, and do-not-touch paths | -| `help` | Semantic guide for workflow, analysis profile, baseline, suppressions, review state, changed-scope | -| `compare_runs` | Run-to-run delta: regressions, improvements, health change | -| `list_findings` | Filtered, paginated findings; use after hotspots or `check_*` | -| `get_finding` | Single finding detail by id; defaults to `normal` detail level | -| `get_remediation` | Remediation payload for one finding | -| `list_hotspots` | Priority-ranked hotspot views; preferred before broad listing | -| `get_report_section` | Read report sections; `metrics_detail` is paginated with family/path filters | -| `evaluate_gates` | Evaluate CI gating decisions | -| `check_clones` | Clone findings only; narrower than `list_findings` | -| `check_complexity` | Complexity hotspots only | -| `check_coupling` | Coupling hotspots only | -| `check_cohesion` | Cohesion hotspots only | -| `check_dead_code` | Dead-code findings only | -| `generate_pr_summary` | PR-friendly markdown or JSON summary | -| `mark_finding_reviewed` | Session-local review marker (in-memory) | -| `list_reviewed_findings` | List reviewed findings for a run | -| `manage_change_intent` | Declare, inspect, check, or clear session-local edit scope intent | -| `clear_session_runs` | Reset in-memory runs and session state | +| Tool | Purpose | +|--------------------------|-----------------------------------------------------------------------------------------------------------------------------------| +| `analyze_repository` | Full analysis → compact summary; use `get_run_summary` or `get_production_triage` as the first pass | +| `analyze_changed_paths` | Diff-aware analysis via `changed_paths` or `git_diff_ref`; compact changed-files snapshot | +| `get_run_summary` | Cheapest run snapshot: health, findings, baseline, inventory, active thresholds | +| `get_production_triage` | Production-first view: health, hotspots, suggestions, active thresholds; best first pass for noisy repos | +| `get_blast_radius` | Pre-change risk boundary: direct dependents, clone cohorts, coverage gaps, actionable do-not-touch paths, and review-only context | +| `help` | Semantic guide for workflow, analysis profile, baseline, suppressions, review state, changed-scope | +| `compare_runs` | Run-to-run delta: regressions, improvements, health change | +| `list_findings` | Filtered, paginated findings; use after hotspots or `check_*` | +| `get_finding` | Single finding detail by id; defaults to `normal` detail level | +| `get_remediation` | Remediation payload for one finding | +| `list_hotspots` | Priority-ranked hotspot views; preferred before broad listing | +| `get_report_section` | Read report sections; `metrics_detail` is paginated with family/path filters | +| `evaluate_gates` | Evaluate CI gating decisions | +| `check_clones` | Clone findings only; narrower than `list_findings` | +| `check_complexity` | Complexity hotspots only | +| `check_coupling` | Coupling hotspots only | +| `check_cohesion` | Cohesion hotspots only | +| `check_dead_code` | Dead-code findings only | +| `generate_pr_summary` | PR-friendly markdown or JSON summary | +| `mark_finding_reviewed` | Session-local review marker (in-memory) | +| `list_reviewed_findings` | List reviewed findings for a run | +| `manage_change_intent` | Declare, inspect, check, or clear session-local edit scope intent | +| `clear_session_runs` | Reset in-memory runs and session state | > `check_*` tools query stored runs only. Call `analyze_repository` or > `analyze_changed_paths` first. @@ -140,6 +140,11 @@ run-scoped URI templates. **Payload conventions:** - `check_*` responses include only the relevant health dimension. +- `get_blast_radius` separates edit prohibitions from context: + `do_not_touch` contains actionable negative context such as baselines, + generated CodeClone state, explicit forbidden paths, and affected files + outside declared scope. Report-only signals are returned as `review_context`. + Long context sections include `total`, `shown`, and `truncated` summaries. - Empty design `check_*` responses may also include a compact `threshold_context` (`metric`, `threshold`, `measured_units`, `highest_below_threshold`) to show whether the run is genuinely quiet or @@ -350,7 +355,7 @@ If `codeclone-mcp` is not on `PATH`, use an absolute path to the launcher. | Problem | Fix | |-----------------------------------------------------------|--------------------------------------------------------------------------------| -| `CodeClone MCP support requires the optional 'mcp' extra` | `uv tool install "codeclone[mcp]"` or `uv pip install "codeclone[mcp]"` | +| `CodeClone MCP support requires the optional 'mcp' extra` | `uv tool install "codeclone[mcp]"` or `uv pip install "codeclone[mcp]"` | | Client cannot find `codeclone-mcp` | `uv tool install "codeclone[mcp]"` or use an absolute launcher path | | Client only accepts remote MCP | Use `streamable-http` transport | | Agent reads stale results | Call `analyze_repository` again; `latest` always points to the most recent run | diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 85c6fcd5..84677173 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -198,6 +198,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: tools["get_production_triage"].description ) assert "structural risk boundary" in str(tools["get_blast_radius"].description) + assert "review-only context" in str(tools["get_blast_radius"].description) assert "Intent is session-local" in str(tools["manage_change_intent"].description) assert "bounded guidance, not a full manual" in str(tools["help"].description) assert "workflow, analysis_profile, suppressions, baseline" in str( @@ -346,6 +347,8 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) assert blast_radius["origin"] == ["pkg/dup.py"] assert blast_radius["radius_level"] in {"low", "medium", "high"} + assert "review_context" in blast_radius + assert "do_not_touch_summary" in blast_radius change_intent = _structured_tool_result( asyncio.run( diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index c10c4e3f..f49c59c0 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -2100,9 +2100,68 @@ def test_mcp_blast_radius_projection_is_deterministic() -> None: assert direct.radius_level == "medium" assert direct.structural_risk["high_complexity_in_blast_zone"] == ["pkg/b.py"] assert direct.structural_risk["low_coverage_in_blast_zone"] == ["pkg/a.py"] + assert [item["path"] for item in direct.do_not_touch] == [ + ".cache/codeclone/**", + "codeclone.baseline.json", + ] + assert direct.review_context == ( + { + "path": "pkg/b.py", + "reason": "report-only design signal", + "category": "report_only_context", + "severity": "context", + }, + ) assert transitive.direct_dependents == ("pkg/b.py",) assert transitive.transitive_dependents == ("pkg/c.py",) - assert direct.to_payload(include=("do_not_touch",))["direct_dependents"] == [] + assert [item["category"] for item in transitive.review_context] == [ + "report_only_context", + "known_baseline_debt", + "security_boundary_context", + ] + do_not_touch_only = direct.to_payload(include=("do_not_touch",)) + assert do_not_touch_only["direct_dependents"] == [] + assert do_not_touch_only["review_context"] == [] + assert ( + cast(dict[str, object], do_not_touch_only["do_not_touch_summary"])["total"] == 2 + ) + + +def test_mcp_blast_radius_payload_bounds_context_sections() -> None: + review_context = tuple( + { + "path": f"pkg/context_{index}.py", + "reason": "report-only design signal", + "category": "report_only_context", + "severity": "context", + } + for index in range(25) + ) + result = mcp_blast_radius_mod.BlastRadiusResult( + run_id="abcdef12", + origin=("pkg/a.py",), + depth="direct", + radius_level="low", + direct_dependents=(), + transitive_dependents=(), + clone_cohort_members=(), + in_dependency_cycle=(), + structural_risk={}, + do_not_touch=(), + review_context=review_context, + guardrails=(), + ) + + payload = result.to_payload(include=("review_context",)) + + assert len(cast("list[dict[str, object]]", payload["review_context"])) == 20 + assert cast(dict[str, object], payload["review_context_summary"]) == { + "total": 25, + "shown": 20, + "truncated": True, + "top_categories": {"report_only_context": 25}, + "top_reasons": {"report-only design signal": 25}, + } def test_mcp_service_get_blast_radius_uses_cache_and_include_filter( @@ -2127,8 +2186,22 @@ def test_mcp_service_get_blast_radius_uses_cache_and_include_filter( assert cast(dict[str, object], first["structural_risk"])[ "overloaded_modules_in_blast_zone" ] == ["pkg/b.py"] + assert [ + item["category"] + for item in cast("list[dict[str, str]]", first["review_context"]) + ] == [ + "report_only_context", + "known_baseline_debt", + "security_boundary_context", + ] assert second["direct_dependents"] == [] - assert cast("list[dict[str, object]]", second["do_not_touch"]) + assert [ + item["path"] for item in cast("list[dict[str, str]]", second["do_not_touch"]) + ] == [ + ".cache/codeclone/**", + "codeclone.baseline.json", + ] + assert second["review_context"] == [] assert len(service._blast_radius_cache) == 1 with pytest.raises(MCPServiceContractError, match="requires at least one file"): service.get_blast_radius(files=()) @@ -2166,7 +2239,34 @@ def test_mcp_service_manage_change_intent_lifecycle(tmp_path: Path) -> None: ] == 1 ) - assert cast("list[dict[str, object]]", declared["do_not_touch"]) + assert ( + cast(dict[str, object], declared["blast_radius_summary"])["do_not_touch_count"] + == 5 + ) + assert ( + cast(dict[str, object], declared["blast_radius_summary"])[ + "review_context_count" + ] + == 1 + ) + assert [ + item["category"] + for item in cast("list[dict[str, str]]", declared["do_not_touch"]) + ] == [ + "baseline_or_generated_state", + "baseline_or_generated_state", + "affected_but_not_allowed", + "explicit_forbidden", + "affected_but_not_allowed", + ] + assert cast("list[dict[str, str]]", declared["review_context"]) == [ + { + "path": "tests/test_a.py", + "reason": "golden fixture clone suppression surface", + "category": "golden_fixture_surface", + "severity": "context", + } + ] fetched = service.manage_change_intent(action="get", intent_id=intent_id) assert fetched["intent_id"] == intent_id From ab9c020ac4590a4c042ae0d2fa94d20598aaf428 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 22 May 2026 20:14:45 +0500 Subject: [PATCH 004/318] docs: describe change controller alpha --- README.md | 65 +++++++++++++++++--- docs/README-pypi.md | 36 ++++++++--- docs/README.md | 25 ++++++-- docs/book/24-structural-change-controller.md | 54 ++++++++++++++++ docs/book/README.md | 1 + mkdocs.yml | 1 + 6 files changed, 160 insertions(+), 22 deletions(-) create mode 100644 docs/book/24-structural-change-controller.md diff --git a/README.md b/README.md index b2f25858..8b7ebb03 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ > -

A structural review layer for Python — baseline-aware, deterministic, built for CI and AI agents

+

Structural change controller for Python — deterministic, baseline-aware, built for CI and AI agents

[![][pypi-shield]][pypi-link] [![][status-shield]][pypi-link] [![][downloads-shield]][pypi-link] [![][python-shield]][pypi-link] [![][score-shield]][score-link] [![][license-shield]][license-link] @@ -26,8 +26,17 @@ --- -CodeClone adds a **control layer** between analysis and CI: it **isolates structural regressions** -from historical debt, so merges are blocked only by **what actually got worse**. +> [!NOTE] +> This README tracks the in-development **v2.1** line. +> For the latest stable release, see the +> [`v2.0.2` README](https://github.com/orenlab/codeclone/blob/v2.0.2/README.md) +> and the +> [`v2.0.2` docs](https://github.com/orenlab/codeclone/tree/v2.0.2/docs). + +CodeClone is a **structural change controller** for Python. It starts before the +first edit — when an agent declares what it intends to change — maps the +structural blast radius, verifies that the patch stayed inside its declared +boundary, and leaves an auditable receipt. **One canonical analysis.** The same **deterministic facts** across CLI, HTML reports, IDE, and MCP — for both **human reviewers** and **AI agents**. @@ -35,11 +44,41 @@ IDE, and MCP — for both **human reviewers** and **AI agents**. Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · Live sample report: [orenlab.github.io/codeclone/examples/report/](https://orenlab.github.io/codeclone/examples/report/) +## Change Controller + +When an AI agent edits code, CodeClone governs the structural boundary: + +| Step | Tool | What it does | +|------|------|-------------| +| 1. Declare intent | `manage_change_intent` | Agent states what it plans to change, which files, and why | +| 2. Map blast radius | `get_blast_radius` | Reverse imports, clone cohorts, dependency cycles, do-not-touch signals | +| 3. Check patch contract | planned | Pre-edit regression budget with headroom; post-edit boundary verification | +| 4. Generate receipt | planned | Auditable artifact linking intent, scope, patch status, and structural delta | +| 5. Validate claims | planned | Cross-check the agent's review text against the canonical report | + +Each step is deterministic — structural facts from the canonical report, no LLM inference. + +The v2.1 alpha starts with two live MCP tools, `manage_change_intent` and +`get_blast_radius`, composed over the existing read-only analysis surface. +Patch contract, receipt, and claim guard tools are planned follow-ups in the +same controller line. Controller state is session-local and in-memory — no +files created, no repo state mutated. + +Change controller docs: [Structural Change Controller](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) + ## Features -**Control & governance** +**Change control** + +- **Intent declaration** — agent states what it plans to change; CodeClone tracks scope, expiry, and status +- **Blast radius** — structural risk projection: reverse imports, clone cohorts, dependency cycles, do-not-touch signals +- **Patch contract** — planned pre-edit regression budget and post-edit boundary verification +- **Review receipt** — planned auditable artifact linking intent, scope, patch verification, and structural delta +- **Claim guard** — planned citation-based validation of review text against the canonical report -- **Baseline governance** — separates accepted **legacy** debt from **new regressions**; CI fails only on what changed +**Baseline governance** + +- **Regression isolation** — separates accepted **legacy** debt from **new regressions**; CI fails only on what changed - **CI-first** — deterministic output, stable ordering, exit code contract, pre-commit support - **Reports** — interactive HTML, JSON, Markdown, SARIF, and text from one canonical report @@ -55,7 +94,7 @@ Live sample report: [orenlab.github.io/codeclone/examples/report/](https://orenl **Surfaces & integrations** -- **MCP control surface** — triage-first agent and IDE interface over the same canonical pipeline; read-only by contract +- **MCP control surface** — 23-tool agent and IDE interface over the same canonical pipeline; read-only by contract - **IDE & agent clients** — VS Code extension, Claude Desktop bundle, and Codex plugin over the same MCP contract **Performance** @@ -207,7 +246,7 @@ repos: ## MCP Control Surface -Triage-first MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. +23-tool MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. Read-only by contract: never mutates source, baselines, or repo state. ```bash @@ -218,6 +257,11 @@ codeclone-mcp --transport stdio codeclone-mcp --transport streamable-http ``` +21 analysis and triage tools provide the canonical read-only surface. 2 phase-1 +change controller tools (`manage_change_intent`, `get_blast_radius`) compose +over that surface to govern the structural boundary of AI-assisted changes. +Patch contract, review receipt, and claim guard are planned v2.1 follow-ups. + > [!WARNING] > Analysis tools require an absolute repository root. Relative roots such as `.` are rejected. > Keep `stdio` as the default transport for local IDE and agent clients; HTTP exposure beyond @@ -269,7 +313,7 @@ Config reference: [Config and defaults](https://orenlab.github.io/codeclone/book ## Baseline Workflow -Baselines capture the current duplication state. Once committed, they become the CI reference point. +Baselines capture the current structural state. Once committed, they become the CI reference point. - Clones are classified as **NEW** (not in baseline) or **KNOWN** (accepted debt) - `--update-baseline` writes both clone and metrics snapshots @@ -319,7 +363,7 @@ Top-level keys: `report_schema_version`, `meta`, `inventory`, `findings`, `metri { "report_schema_version": "2.11", "meta": { - "codeclone_version": "2.0.2", + "codeclone_version": "2.1.0a1", "project_name": "...", "scan_root": ".", "...": "..." @@ -440,6 +484,7 @@ Performance claims are backed by the reproducible benchmark workflow documented Full docs and contract book: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) Quick links: +[Change Controller](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) · [Baseline](https://orenlab.github.io/codeclone/book/06-baseline/) · [Report](https://orenlab.github.io/codeclone/book/08-report/) · [Metrics & gates](https://orenlab.github.io/codeclone/book/15-metrics-and-quality-gates/) · @@ -477,4 +522,4 @@ Versions released before this change remain under their original license terms. [score-link]: #how-it-works [license-link]: #license [tests-link]: https://github.com/orenlab/codeclone/actions/workflows/tests.yml -[benchmark-link]: #benchmarking +[benchmark-link]: https://github.com/orenlab/codeclone/actions/workflows/benchmark.yml diff --git a/docs/README-pypi.md b/docs/README-pypi.md index 62f01eb9..c7d8542e 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -17,7 +17,7 @@

- A structural review layer for Python — baseline-aware, deterministic, built for CI and AI agents + Structural change controller for Python — deterministic, baseline-aware, built for CI and AI agents

@@ -27,9 +27,10 @@ Python

-CodeClone adds a control layer between analysis and CI: it isolates structural -regressions from historical debt, so merges are blocked only by what actually -got worse. +CodeClone is a structural change controller for Python. It starts before the +first edit — when an agent declares what it intends to change — maps the +structural blast radius, verifies that the patch stayed inside its declared +boundary, and leaves an auditable receipt. The same analysis pipeline powers CLI reports, CI checks, the MCP server, and native IDE/agent clients — so humans and AI agents operate on identical, @@ -40,10 +41,31 @@ deterministic facts. - Source: - Issues: +## Change Controller + +When an AI agent edits code, CodeClone governs the structural boundary: + +1. **Declare intent** — agent states what it plans to change, which files, and why +2. **Map blast radius** — reverse imports, clone cohorts, dependency cycles, do-not-touch signals +3. **Check patch contract** — planned pre-edit regression budget and post-edit boundary verification +4. **Generate receipt** — planned auditable artifact: intent + scope + patch status + structural delta +5. **Validate claims** — planned cross-check of review text against the canonical report + +Each step is deterministic — structural facts, no LLM inference. + +Docs: + ## Features -**Control & governance** -- **Baseline governance** — separates accepted **legacy** debt from **new regressions**; CI fails only on what changed +**Change control** +- **Intent declaration** — agent states what it plans to change; CodeClone tracks scope, expiry, and status +- **Blast radius** — structural risk projection: reverse imports, clone cohorts, dependency cycles, do-not-touch signals +- **Patch contract** — planned pre-edit regression budget and post-edit boundary verification +- **Review receipt** — planned auditable artifact linking intent, scope, patch verification, and structural delta +- **Claim guard** — planned citation-based validation of review text against the canonical report + +**Baseline governance** +- **Regression isolation** — separates accepted **legacy** debt from **new regressions**; CI fails only on what changed - **CI-first** — deterministic output, stable ordering, exit code contract, pre-commit support - **Reports** — interactive HTML, JSON, Markdown, SARIF, and text from one canonical report @@ -56,7 +78,7 @@ deterministic facts. - **Security Surfaces** — report-only inventory of security-relevant capability boundaries without vulnerability claims **Surfaces & integrations** -- **MCP control surface** — triage-first agent and IDE interface over the same canonical pipeline; read-only by contract +- **MCP control surface** — 23-tool agent and IDE interface over the same canonical pipeline; read-only by contract - **IDE & agent clients** — VS Code extension, Claude Desktop bundle, and Codex plugin over the same MCP contract **Performance** diff --git a/docs/README.md b/docs/README.md index bbd2b6ed..5c85d44d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,11 +1,13 @@ # CodeClone Docs -> Deterministic structural review for Python codebases. +> Structural change controller for Python codebases. > One canonical analysis across CI, HTML reports, IDEs, and AI agents. -CodeClone is a structural review layer for Python focused on deterministic -analysis, baseline-aware governance, and review surfaces for both humans and -AI-assisted workflows. +CodeClone is a structural change controller for Python. It starts before the +first edit — when an agent declares what it intends to change — maps the +structural blast radius, verifies that the patch stayed inside its declared +boundary, and leaves an auditable receipt. The same deterministic facts power +CI gates, human reviews, and AI-assisted workflows. This documentation site has two complementary layers: @@ -24,12 +26,20 @@ This documentation site has two complementary layers: ### New to CodeClone? -Understand the deterministic review model and governance philosophy. +Understand the deterministic change control model and governance philosophy. - [Contracts and guarantees](book/00-intro.md) - [Architecture map (components + ownership)](book/01-architecture-map.md) - [Terminology](book/02-terminology.md) +### Governing AI-assisted changes? + +Understand the structural change controller: intent, blast radius, patch contract, +review receipt, and claim guard. + +- [Structural Change Controller](book/24-structural-change-controller.md) +- [MCP interface contract](book/20-mcp-interface.md) + ### Integrating into CI? Set up baseline-aware gating and deterministic review flows. @@ -68,6 +78,7 @@ The Contracts Book defines: - determinism guarantees - trust and compatibility rules - review surface contracts +- change controller workflow and tool contracts ### Core Contracts @@ -78,6 +89,10 @@ The Contracts Book defines: - [Cache contract (schema v2.8)](book/07-cache.md) - [Report contract (schema v2.11)](book/08-report.md) +### Change Controller + +- [Structural Change Controller](book/24-structural-change-controller.md) + ### Interfaces - [CLI behavior, modes, and UX](book/09-cli.md) diff --git a/docs/book/24-structural-change-controller.md b/docs/book/24-structural-change-controller.md new file mode 100644 index 00000000..1e9ca2fd --- /dev/null +++ b/docs/book/24-structural-change-controller.md @@ -0,0 +1,54 @@ +# Structural Change Controller + +CodeClone v2.1 adds a session-local MCP control layer for AI-assisted edits. +The controller is not a second analyzer and does not persist state. It composes +over stored MCP runs and the canonical report contract. + +## Status + +The v2.1 alpha starts with the pre-change phase: + +| Phase | Status | MCP surface | +|-------|--------|-------------| +| Intent declaration | Live in `2.1.0a1` | `manage_change_intent` | +| Blast radius | Live in `2.1.0a1` | `get_blast_radius` | +| Patch contract | Planned | `check_patch_contract` | +| Review receipt | Planned | `create_review_receipt` | +| Claim guard | Planned | `validate_review_claims` | + +Planned tools are roadmap items until implemented and tested. Public clients +must not assume they exist in the current MCP tool list. + +## Contract + +- The canonical report remains the source of truth. +- Controller state is session-local and in-memory. +- MCP must not mutate source files, baselines, cache, reports, or repo state. +- Tools derive responses from existing run/report facts rather than LLM + inference. +- Report-only context is review context, not an edit prohibition. + +## Pre-Change Workflow + +1. Run `analyze_repository` or `analyze_changed_paths`. +2. Declare scope with `manage_change_intent(action="declare")`. +3. Inspect the returned `blast_radius_summary`. +4. Optionally call `get_blast_radius` for full dependent/context detail. +5. After editing, call `manage_change_intent(action="check")` with + `changed_files` or `diff_ref`. + +`manage_change_intent` can return `clean`, `expanded`, `violated`, or +`expired`. Expiry means the report digest changed since declaration. + +## Blast Radius Payload + +`get_blast_radius` separates hard edit guardrails from review context: + +- `do_not_touch`: actionable negative context such as baseline/cache state, + explicit forbidden paths, or affected files outside declared scope. +- `review_context`: report-only facts such as security boundary inventory, + overloaded-module candidates, known baseline debt, and golden fixture + surfaces. + +Long context sections are bounded and include summaries with `total`, `shown`, +and `truncated`. diff --git a/docs/book/README.md b/docs/book/README.md index 7ed5a711..5a82a206 100644 --- a/docs/book/README.md +++ b/docs/book/README.md @@ -37,6 +37,7 @@ If a statement is not enforced by code/tests, it is explicitly marked as non-con - [21-vscode-extension.md](21-vscode-extension.md) - [22-claude-desktop-bundle.md](22-claude-desktop-bundle.md) - [23-codex-plugin.md](23-codex-plugin.md) +- [24-structural-change-controller.md](24-structural-change-controller.md) - [10-html-render.md](10-html-render.md) ### System properties diff --git a/mkdocs.yml b/mkdocs.yml index 29f2025f..11215d22 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -106,6 +106,7 @@ nav: - VS Code Extension: book/21-vscode-extension.md - Claude Desktop Bundle: book/22-claude-desktop-bundle.md - Codex Plugin: book/23-codex-plugin.md + - Structural Change Controller: book/24-structural-change-controller.md - HTML Render: book/10-html-render.md - System Properties: From b992813d81e4bc1fdb867c46bc88fe3d0f3d4a88 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 22 May 2026 20:15:04 +0500 Subject: [PATCH 005/318] chore(claude): refresh change controller bundle metadata --- extensions/claude-desktop-codeclone/README.md | 26 ++++++- .../claude-desktop-codeclone/manifest.json | 76 +++++++++++++++---- .../claude-desktop-codeclone/package.json | 4 +- .../test/manifest.test.js | 2 +- uv.lock | 6 +- 5 files changed, 92 insertions(+), 22 deletions(-) diff --git a/extensions/claude-desktop-codeclone/README.md b/extensions/claude-desktop-codeclone/README.md index 83da9e9e..499a7f7a 100644 --- a/extensions/claude-desktop-codeclone/README.md +++ b/extensions/claude-desktop-codeclone/README.md @@ -1,9 +1,9 @@ # CodeClone for Claude Desktop -Local MCP bundle wrapper for `codeclone-mcp` — installs as a `.mcpb` package -instead of manual JSON editing. +Structural change controller for Python — local MCP bundle wrapper for +`codeclone-mcp`. Installs as a `.mcpb` package instead of manual JSON editing. -Same canonical MCP surface used by CLI, VS Code, Codex, and Claude Code. +Same canonical 23-tool MCP surface used by CLI, VS Code, Codex, and Claude Code. Read-only, baseline-aware, local stdio only. As the local `codeclone-mcp` server gains new canonical surfaces, the bundle exposes them without adding a second client-side interpretation layer. @@ -53,6 +53,25 @@ command** in the extension settings to an absolute path. ## Usage +### Change controller workflow + +```text +# 1. Analyze the repository +Use CodeClone to analyze this repository. + +# 2. Declare intent before editing +Declare a change intent for refactoring codeclone/core/parser.py — I plan to +extract the CFG builder into a separate module. + +# 3. Check blast radius +Show the blast radius for codeclone/core/parser.py. + +# 4. After editing — verify the patch +Check my change intent against the current diff. +``` + +### Analysis and review + ```text # Conservative first pass Use CodeClone to analyze this repository and show the top production hotspots. @@ -84,4 +103,5 @@ npm run pack # build .mcpb - [Claude Desktop bundle guide](https://orenlab.github.io/codeclone/claude-desktop-bundle/) - [MCP usage guide](https://orenlab.github.io/codeclone/mcp/) +- [Change controller docs](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) - [Issues](https://github.com/orenlab/codeclone/issues) diff --git a/extensions/claude-desktop-codeclone/manifest.json b/extensions/claude-desktop-codeclone/manifest.json index ae51e65b..4ed8d2dc 100644 --- a/extensions/claude-desktop-codeclone/manifest.json +++ b/extensions/claude-desktop-codeclone/manifest.json @@ -2,9 +2,9 @@ "manifest_version": "0.3", "name": "codeclone", "display_name": "CodeClone", - "version": "2.0.0", - "description": "Baseline-aware structural review for Claude Desktop through a local CodeClone MCP launcher.", - "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. It keeps Claude on the same canonical MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin — read-only, baseline-aware, local stdio only.", + "version": "2.1.0", + "description": "Structural change controller for Python — deterministic, baseline-aware, built for CI and AI agents.", + "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. It keeps Claude on the same canonical 23-tool MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin — read-only, baseline-aware, local stdio only. The v2.1 change controller starts with intent declaration and blast radius; patch contract, review receipt, and claim guard are planned follow-ups.", "author": { "name": "Den Rozhnovskiy", "email": "pytelemonbot@mail.ru", @@ -25,38 +25,76 @@ "mcp", "claude-desktop", "codeclone", + "change-controller", "structural-review", "baseline-aware", + "blast-radius", "code-quality" ], "tools": [ { "name": "analyze_repository", - "description": "Run a baseline-aware CodeClone analysis for the current repository." + "description": "Run a deterministic CodeClone analysis and register it as the latest MCP run." }, { "name": "analyze_changed_paths", - "description": "Run changed-files analysis for PR-style structural review." + "description": "Run a deterministic CodeClone analysis and return a changed-files projection." + }, + { + "name": "get_run_summary", + "description": "Cheapest run-level snapshot: health, findings, baseline, inventory." }, { "name": "get_production_triage", - "description": "Return the cheapest production-first hotspot and health snapshot." + "description": "Production-first triage: health, hotspots, suggestions, source-kind counters." }, { - "name": "get_run_summary", - "description": "Cheapest run snapshot: health, findings, baseline, inventory." + "name": "get_blast_radius", + "description": "Structural risk boundary: dependents, clone cohorts, do-not-touch paths, review context." + }, + { + "name": "manage_change_intent", + "description": "Change intent lifecycle: declare scope, get status, check diff, clear intent." }, { "name": "list_hotspots", "description": "Priority-ranked hotspot views by kind." }, + { + "name": "list_findings", + "description": "List canonical finding groups with filters, pagination, and summary cards." + }, { "name": "get_finding", - "description": "Open one canonical finding by id." + "description": "Return a single canonical finding group by short or full id." }, { "name": "get_remediation", - "description": "Return remediation guidance for one canonical finding." + "description": "Return actionable remediation guidance for a single finding." + }, + { + "name": "get_report_section", + "description": "Return a canonical report section for the latest or specified run." + }, + { + "name": "check_clones", + "description": "Return clone findings from a compatible stored run." + }, + { + "name": "check_complexity", + "description": "Return complexity hotspots from a compatible stored run." + }, + { + "name": "check_coupling", + "description": "Return coupling hotspots from a compatible stored run." + }, + { + "name": "check_cohesion", + "description": "Return cohesion hotspots from a compatible stored run." + }, + { + "name": "check_dead_code", + "description": "Return dead-code findings from a compatible stored run." }, { "name": "compare_runs", @@ -64,15 +102,27 @@ }, { "name": "evaluate_gates", - "description": "Evaluate CI gating decisions for the current run." + "description": "Evaluate CI gate conditions against an existing MCP run." }, { "name": "generate_pr_summary", - "description": "PR-friendly markdown or JSON summary of the analysis." + "description": "PR-friendly markdown or JSON summary of changed files." + }, + { + "name": "mark_finding_reviewed", + "description": "Mark a finding as reviewed in the current in-memory MCP session." + }, + { + "name": "list_reviewed_findings", + "description": "List in-memory reviewed findings for the current or specified run." + }, + { + "name": "clear_session_runs", + "description": "Clear all in-memory MCP analysis runs and ephemeral session state." }, { "name": "help", - "description": "Semantic guide for workflow, analysis profile, baseline, coverage, and review state." + "description": "Explain a CodeClone workflow or contract topic and suggest the safest next step." } ], "tools_generated": true, diff --git a/extensions/claude-desktop-codeclone/package.json b/extensions/claude-desktop-codeclone/package.json index dd3847cb..38ef49f6 100644 --- a/extensions/claude-desktop-codeclone/package.json +++ b/extensions/claude-desktop-codeclone/package.json @@ -1,8 +1,8 @@ { "name": "@orenlab/codeclone-claude-desktop", - "version": "2.0.0", + "version": "2.1.0", "private": true, - "description": "Claude Desktop MCP bundle wrapper for the local CodeClone MCP launcher.", + "description": "Claude Desktop MCP bundle — structural change controller for Python.", "license": "MPL-2.0", "type": "commonjs", "engines": { diff --git a/extensions/claude-desktop-codeclone/test/manifest.test.js b/extensions/claude-desktop-codeclone/test/manifest.test.js index c619f64e..1df293d2 100644 --- a/extensions/claude-desktop-codeclone/test/manifest.test.js +++ b/extensions/claude-desktop-codeclone/test/manifest.test.js @@ -32,6 +32,6 @@ test("manifest keeps the setup surface bounded and local", () => { ]); assert.equal(manifest.documentation, "https://orenlab.github.io/codeclone/claude-desktop-bundle/"); assert.equal(manifest.tools_generated, true); - assert.equal(manifest.tools.length, 11); + assert.equal(manifest.tools.length, 23); assert.equal("instructions" in manifest, false); }); diff --git a/uv.lock b/uv.lock index 46605ece..88e110c1 100644 --- a/uv.lock +++ b/uv.lock @@ -912,11 +912,11 @@ wheels = [ [[package]] name = "more-itertools" -version = "11.0.2" +version = "11.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/f7/139d22fef48ac78127d18e01d80cf1be40236ae489769d17f35c3d425293/more_itertools-11.0.2.tar.gz", hash = "sha256:392a9e1e362cbc106a2457d37cabf9b36e5e12efd4ebff1654630e76597df804", size = 144659, upload-time = "2026-04-09T15:01:33.297Z" } +sdist = { url = "https://files.pythonhosted.org/packages/de/1d/f4da6f02cdffe04d6362210b807146a26044c88d839208aec273bb0d9184/more_itertools-11.1.0.tar.gz", hash = "sha256:48e8f4d9e7e5878571ecf6f2b4e57634f93cd474cc8cfbd2376f2d11b396e30d", size = 145772, upload-time = "2026-05-22T14:14:29.909Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/98/6af411189d9413534c3eb691182bff1f5c6d44ed2f93f2edfe52a1bbceb8/more_itertools-11.0.2-py3-none-any.whl", hash = "sha256:6e35b35f818b01f691643c6c611bc0902f2e92b46c18fffa77ae1e7c46e912e4", size = 71939, upload-time = "2026-04-09T15:01:32.21Z" }, + { url = "https://files.pythonhosted.org/packages/e8/3d/1087453384dbde46a8c7f9356eead2c58be8a7bf156bca40243377c85715/more_itertools-11.1.0-py3-none-any.whl", hash = "sha256:4b65538ae22f6fed0ce4874efd317463a7489796a0939fa66824dd542125a192", size = 72226, upload-time = "2026-05-22T14:14:28.824Z" }, ] [[package]] From 94fc1fc37ced84e1109c4475d51d3d17ac8abbf9 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 22 May 2026 20:39:37 +0500 Subject: [PATCH 006/318] feat(mcp): add patch contract checks --- codeclone/surfaces/mcp/_patch_contract.py | 182 ++++++ .../mcp/_session_patch_contract_mixin.py | 600 ++++++++++++++++++ codeclone/surfaces/mcp/server.py | 32 + codeclone/surfaces/mcp/service.py | 16 + codeclone/surfaces/mcp/session.py | 4 +- .../contract_snapshots/mcp_tool_schemas.json | 96 +++ tests/test_mcp_server.py | 17 + tests/test_mcp_service.py | 329 ++++++++++ 8 files changed, 1274 insertions(+), 2 deletions(-) create mode 100644 codeclone/surfaces/mcp/_patch_contract.py create mode 100644 codeclone/surfaces/mcp/_session_patch_contract_mixin.py diff --git a/codeclone/surfaces/mcp/_patch_contract.py b/codeclone/surfaces/mcp/_patch_contract.py new file mode 100644 index 00000000..2e3cb754 --- /dev/null +++ b/codeclone/surfaces/mcp/_patch_contract.py @@ -0,0 +1,182 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass +from enum import Enum +from typing import Final, Literal + +from ...contracts import DEFAULT_COVERAGE_MIN + +StrictnessProfile = Literal["ci", "strict", "relaxed"] +PatchContractMode = Literal["budget", "verify"] + +VALID_PATCH_CONTRACT_MODES: Final[frozenset[str]] = frozenset({"budget", "verify"}) +VALID_STRICTNESS_PROFILES: Final[frozenset[str]] = frozenset( + {"ci", "strict", "relaxed"} +) + + +class PatchContractStatus(str, Enum): + ACCEPTED = "accepted" + VIOLATED = "violated" + UNVERIFIED = "unverified" + EXPIRED = "expired" + + +@dataclass(frozen=True, slots=True) +class PatchBudgets: + clone_regression: int = 0 + dead_code_regression: bool = False + dependency_cycle: bool = False + coverage_hotspot: bool = False + complexity_delta: int = -1 + coupling_delta: int = -1 + cohesion_delta: int = -1 + health_floor: int = -1 + typing_regression: bool = False + docstring_regression: bool = False + api_break: bool = False + coverage_min: int = DEFAULT_COVERAGE_MIN + + def to_payload(self) -> dict[str, object]: + return { + "clone_regression": self.clone_regression, + "dead_code_regression": self.dead_code_regression, + "dependency_cycle": self.dependency_cycle, + "coverage_hotspot": self.coverage_hotspot, + "complexity_delta": self.complexity_delta, + "coupling_delta": self.coupling_delta, + "cohesion_delta": self.cohesion_delta, + "health_floor": self.health_floor, + "typing_regression": self.typing_regression, + "docstring_regression": self.docstring_regression, + "api_break": self.api_break, + "coverage_min": self.coverage_min, + } + + +STRICT_BUDGETS: Final[PatchBudgets] = PatchBudgets( + clone_regression=0, + dead_code_regression=True, + dependency_cycle=True, + coverage_hotspot=True, + complexity_delta=10, + coupling_delta=5, + cohesion_delta=3, + health_floor=70, + typing_regression=True, + docstring_regression=True, + api_break=True, + coverage_min=80, +) + +RELAXED_BUDGETS: Final[PatchBudgets] = PatchBudgets( + clone_regression=-1, + dead_code_regression=False, + dependency_cycle=False, + coverage_hotspot=False, + complexity_delta=-1, + coupling_delta=-1, + cohesion_delta=-1, + health_floor=-1, + typing_regression=False, + docstring_regression=False, + api_break=False, + coverage_min=-1, +) + + +def budgets_from_request( + *, + coverage_min: int | None, + complexity_threshold: int | None, + coupling_threshold: int | None, + cohesion_threshold: int | None, +) -> PatchBudgets: + return PatchBudgets( + clone_regression=0, + complexity_delta=_none_to_unlimited(complexity_threshold), + coupling_delta=_none_to_unlimited(coupling_threshold), + cohesion_delta=_none_to_unlimited(cohesion_threshold), + coverage_min=coverage_min if coverage_min is not None else DEFAULT_COVERAGE_MIN, + ) + + +def budgets_for_strictness( + *, + strictness: StrictnessProfile, + coverage_min: int | None, + complexity_threshold: int | None, + coupling_threshold: int | None, + cohesion_threshold: int | None, +) -> PatchBudgets: + if strictness == "strict": + return STRICT_BUDGETS + if strictness == "relaxed": + return RELAXED_BUDGETS + return budgets_from_request( + coverage_min=coverage_min, + complexity_threshold=complexity_threshold, + coupling_threshold=coupling_threshold, + cohesion_threshold=cohesion_threshold, + ) + + +def detect_baseline_abuse( + *, + before_gate_would_fail: bool, + after_gate_would_fail: bool, + after_baseline_status: str, + regressions: int, + changed_files: int, + intent_available: bool, +) -> dict[str, object]: + baseline_updated = after_baseline_status == "updated" + triggers: list[str] = [] + if baseline_updated and (regressions > 0 or changed_files > 0): + triggers.append("baseline_changed_with_functional_code") + if baseline_updated and regressions > 0: + triggers.append("baseline_updated_while_regressions_present") + if baseline_updated and not intent_available: + triggers.append("baseline_updated_without_intent") + if baseline_updated and before_gate_would_fail and not after_gate_would_fail: + triggers.append("ci_greened_by_accepting_debt") + return { + "detected": bool(triggers), + "triggers": triggers, + } + + +def baseline_status(report_document: Mapping[str, object]) -> str: + meta = _as_mapping(report_document.get("meta")) + baseline = _as_mapping(meta.get("baseline")) + return str(baseline.get("status", "")).strip() + + +def _none_to_unlimited(value: int | None) -> int: + return value if value is not None else -1 + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +__all__ = [ + "RELAXED_BUDGETS", + "STRICT_BUDGETS", + "VALID_PATCH_CONTRACT_MODES", + "VALID_STRICTNESS_PROFILES", + "PatchBudgets", + "PatchContractMode", + "PatchContractStatus", + "StrictnessProfile", + "baseline_status", + "budgets_for_strictness", + "detect_baseline_abuse", +] diff --git a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py new file mode 100644 index 00000000..b0cc2141 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py @@ -0,0 +1,600 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence + +from ...utils.coerce import as_int as _coerce_int +from . import _session_helpers as _helpers +from ._intent import IntentRecord, IntentStatus +from ._patch_contract import ( + VALID_PATCH_CONTRACT_MODES, + VALID_STRICTNESS_PROFILES, + PatchBudgets, + PatchContractMode, + PatchContractStatus, + StrictnessProfile, + baseline_status, + budgets_for_strictness, + detect_baseline_abuse, +) +from ._session_intent_mixin import _MCPSessionIntentMixin +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPGateRequest, + MCPRunNotFoundError, + MCPRunRecord, + MCPServiceContractError, +) + +MAX_WORSENED_ITEMS = 20 + + +class _MCPSessionPatchContractMixin(_MCPSessionIntentMixin): + _runs: CodeCloneMCPRunStore + _active_intents: dict[str, IntentRecord] + + def check_patch_contract( + self, + *, + mode: str, + run_id: str | None = None, + before_run_id: str | None = None, + after_run_id: str | None = None, + intent_id: str | None = None, + strictness: str = "ci", + diff_ref: str | None = None, + changed_files: Sequence[str] | None = None, + ) -> dict[str, object]: + validated_mode = self._validated_patch_contract_mode(mode) + validated_strictness = self._validated_strictness(strictness) + if validated_mode == "budget": + return self._patch_contract_budget( + run_id=run_id, + intent_id=intent_id, + strictness=validated_strictness, + ) + return self._patch_contract_verify( + before_run_id=before_run_id, + after_run_id=after_run_id, + intent_id=intent_id, + strictness=validated_strictness, + diff_ref=diff_ref, + changed_files=changed_files, + ) + + def _patch_contract_budget( + self, + *, + run_id: str | None, + intent_id: str | None, + strictness: StrictnessProfile, + ) -> dict[str, object]: + record = self._runs.get(run_id) + intent = self._optional_intent(record=record, intent_id=intent_id) + budgets = self._budgets_for_record(record=record, strictness=strictness) + current_state = self._current_state(record) + gate_preview = self._gate_preview(record=record, budgets=budgets) + return { + "mode": "budget", + "run_id": _helpers._short_run_id(record.run_id), + "strictness": strictness, + "intent_id": intent.intent_id if intent is not None else None, + "scope": "changed" if intent is not None else "full", + "declared_scope": ( + intent.scope.to_payload() if intent is not None else None + ), + "blast_radius_summary": ( + intent.blast_radius_summary if intent is not None else None + ), + "budgets": budgets.to_payload(), + "current_state": current_state, + "headroom": self._headroom(budgets=budgets, current_state=current_state), + "gate_preview": gate_preview, + "message": self._budget_message( + strictness=strictness, + gate_preview=gate_preview, + ), + } + + def _patch_contract_verify( + self, + *, + before_run_id: str | None, + after_run_id: str | None, + intent_id: str | None, + strictness: StrictnessProfile, + diff_ref: str | None, + changed_files: Sequence[str] | None, + ) -> dict[str, object]: + if before_run_id is None: + return self._unverified_patch_contract(reason="no_before_run") + try: + before = self._runs.get(before_run_id) + except MCPRunNotFoundError: + return self._unverified_patch_contract(reason="no_before_run") + if after_run_id is None: + return self._unverified_patch_contract(reason="no_after_run") + try: + after = self._runs.get(after_run_id) + except MCPRunNotFoundError: + return self._unverified_patch_contract( + reason="no_after_run", + before=before, + ) + compare_payload = self.compare_runs( + run_id_before=before.run_id, + run_id_after=after.run_id, + focus="all", + ) + if not bool(compare_payload.get("comparable")): + return self._unverified_patch_contract( + reason="incomparable_runs", + before=before, + after=after, + structural_delta=self._structural_delta(compare_payload), + ) + intent = self._optional_intent(record=before, intent_id=intent_id) + if intent is not None and self._is_intent_expired(record=before, intent=intent): + return self._expired_patch_contract( + before=before, after=after, intent=intent + ) + actual_changed_files = self._patch_changed_files( + after=after, + diff_ref=diff_ref, + changed_files=changed_files, + ) + scope_check = ( + self._scope_check_payload(intent=intent, actual=actual_changed_files) + if intent is not None + else None + ) + budgets = self._budgets_for_record(record=after, strictness=strictness) + before_gate = self._gate_preview(record=before, budgets=budgets) + after_gate = self._gate_preview(record=after, budgets=budgets) + structural_delta = self._structural_delta(compare_payload) + regressions = _as_sequence(structural_delta.get("regressions")) + baseline_abuse = detect_baseline_abuse( + before_gate_would_fail=bool(before_gate["would_fail"]), + after_gate_would_fail=bool(after_gate["would_fail"]), + after_baseline_status=baseline_status(after.report_document), + regressions=len(regressions), + changed_files=len(actual_changed_files), + intent_available=intent is not None, + ) + violations = self._contract_violations( + structural_delta=structural_delta, + gate_preview=after_gate, + scope_check=scope_check, + baseline_abuse=baseline_abuse, + ) + blocking_violations = () if strictness == "relaxed" else violations + status = ( + PatchContractStatus.VIOLATED.value + if blocking_violations + else PatchContractStatus.ACCEPTED.value + ) + return { + "mode": "verify", + "status": status, + "reason": None, + "before": self._run_ref_payload(before), + "after": self._run_ref_payload(after), + "intent_id": intent.intent_id if intent is not None else None, + "strictness": strictness, + "structural_delta": structural_delta, + "worsened": self._worsened_symbols(before=before, after=after), + "scope_check": scope_check, + "gate_preview": after_gate, + "baseline_abuse": baseline_abuse, + "contract_violations": list(violations), + "blocking_violations": list(blocking_violations), + "message": self._verify_message(status=status, violations=violations), + } + + def _validated_patch_contract_mode(self, mode: str) -> PatchContractMode: + if mode not in VALID_PATCH_CONTRACT_MODES: + expected = ", ".join(sorted(VALID_PATCH_CONTRACT_MODES)) + raise MCPServiceContractError( + f"Invalid value for mode: {mode!r}. Expected one of: {expected}." + ) + return "verify" if mode == "verify" else "budget" + + def _validated_strictness(self, strictness: str) -> StrictnessProfile: + if strictness not in VALID_STRICTNESS_PROFILES: + expected = ", ".join(sorted(VALID_STRICTNESS_PROFILES)) + raise MCPServiceContractError( + "Invalid value for strictness: " + f"{strictness!r}. Expected one of: {expected}." + ) + if strictness == "strict": + return "strict" + if strictness == "relaxed": + return "relaxed" + return "ci" + + def _optional_intent( + self, + *, + record: MCPRunRecord, + intent_id: str | None, + ) -> IntentRecord | None: + if intent_id is not None: + _, intent = self._resolve_intent(run_id=None, intent_id=intent_id) + return intent + with self._state_lock: + matching = [ + intent + for intent in self._active_intents.values() + if intent.run_id == record.run_id + ] + return matching[-1] if matching else None + + def _budgets_for_record( + self, + *, + record: MCPRunRecord, + strictness: StrictnessProfile, + ) -> PatchBudgets: + request = record.request + return budgets_for_strictness( + strictness=strictness, + coverage_min=request.coverage_min, + complexity_threshold=request.complexity_threshold, + coupling_threshold=request.coupling_threshold, + cohesion_threshold=request.cohesion_threshold, + ) + + def _gate_request( + self, *, record: MCPRunRecord, budgets: PatchBudgets + ) -> MCPGateRequest: + clone_budget = budgets.clone_regression + return MCPGateRequest( + run_id=record.run_id, + fail_on_new=clone_budget == 0, + fail_threshold=-1, + fail_complexity=budgets.complexity_delta, + fail_coupling=budgets.coupling_delta, + fail_cohesion=budgets.cohesion_delta, + fail_cycles=budgets.dependency_cycle, + fail_dead_code=budgets.dead_code_regression, + fail_health=budgets.health_floor, + fail_on_typing_regression=budgets.typing_regression, + fail_on_docstring_regression=budgets.docstring_regression, + fail_on_api_break=budgets.api_break, + fail_on_untested_hotspots=budgets.coverage_hotspot, + coverage_min=budgets.coverage_min, + ) + + def _gate_preview( + self, + *, + record: MCPRunRecord, + budgets: PatchBudgets, + ) -> dict[str, object]: + gate_result = self._evaluate_gate_snapshot( + record=record, + request=self._gate_request(record=record, budgets=budgets), + ) + return { + "would_fail": gate_result.exit_code != 0, + "exit_code": gate_result.exit_code, + "reasons": list(gate_result.reasons), + } + + def _current_state(self, record: MCPRunRecord) -> dict[str, object]: + report_document = record.report_document + return { + "health_score": _helpers._summary_health_score(record.summary), + "complexity_max": self._family_max( + report_document, + family="complexity", + keys=("cyclomatic_complexity", "complexity", "value"), + ), + "coupling_max": self._family_max( + report_document, + family="coupling", + keys=("cbo", "coupling", "value"), + ), + "cohesion_max": self._family_max( + report_document, + family="cohesion", + keys=("lcom4", "cohesion", "value"), + ), + "dependency_cycles": len(self._dependency_cycles(report_document)), + "clone_groups": record.func_clones_count + record.block_clones_count, + "dead_code_high_confidence": self._dead_code_high_confidence( + report_document + ), + } + + def _headroom( + self, + *, + budgets: PatchBudgets, + current_state: Mapping[str, object], + ) -> dict[str, object]: + return { + "complexity_headroom": self._threshold_headroom( + budget=budgets.complexity_delta, + current=_coerce_int(current_state.get("complexity_max")), + ), + "coupling_headroom": self._threshold_headroom( + budget=budgets.coupling_delta, + current=_coerce_int(current_state.get("coupling_max")), + ), + "cohesion_headroom": self._threshold_headroom( + budget=budgets.cohesion_delta, + current=_coerce_int(current_state.get("cohesion_max")), + ), + "health_headroom": ( + _coerce_int(current_state.get("health_score")) - budgets.health_floor + if budgets.health_floor >= 0 + and current_state.get("health_score") is not None + else None + ), + } + + def _patch_changed_files( + self, + *, + after: MCPRunRecord, + diff_ref: str | None, + changed_files: Sequence[str] | None, + ) -> tuple[str, ...]: + if changed_files: + return self._normalize_changed_paths( + root_path=after.root, paths=changed_files + ) + if diff_ref is not None: + return self._git_diff_paths(root_path=after.root, git_diff_ref=diff_ref) + return tuple(after.changed_paths) + + def _scope_check_payload( + self, + *, + intent: IntentRecord, + actual: Sequence[str], + ) -> dict[str, object]: + check_result = self._intent_check_result(intent=intent, actual=actual) + return check_result.to_payload() + + def _contract_violations( + self, + *, + structural_delta: Mapping[str, object], + gate_preview: Mapping[str, object], + scope_check: Mapping[str, object] | None, + baseline_abuse: Mapping[str, object], + ) -> tuple[str, ...]: + violations: list[str] = [] + if _as_sequence(structural_delta.get("regressions")): + violations.append("structural_regressions") + if bool(gate_preview.get("would_fail")): + violations.append("gate_failures") + if ( + scope_check is not None + and scope_check.get("status") == IntentStatus.VIOLATED.value + ): + violations.append("scope_violation") + violations.extend( + f"baseline_abuse:{trigger}" + for trigger in _as_sequence(baseline_abuse.get("triggers")) + ) + return tuple(violations) + + def _structural_delta( + self, compare_payload: Mapping[str, object] + ) -> dict[str, object]: + return { + "regressions": list(_as_sequence(compare_payload.get("regressions"))), + "improvements": list(_as_sequence(compare_payload.get("improvements"))), + "health_delta": compare_payload.get("health_delta"), + "verdict": str(compare_payload.get("verdict", "")), + } + + def _worsened_symbols( + self, + *, + before: MCPRunRecord, + after: MCPRunRecord, + ) -> list[dict[str, object]]: + worsened: list[dict[str, object]] = [] + for family, value_keys in ( + ("complexity", ("cyclomatic_complexity", "complexity", "value")), + ("coupling", ("cbo", "coupling", "value")), + ("cohesion", ("lcom4", "cohesion", "value")), + ): + before_items = self._metric_item_index( + before.report_document, + family=family, + value_keys=value_keys, + ) + after_items = self._metric_item_index( + after.report_document, + family=family, + value_keys=value_keys, + ) + for key, after_value in after_items.items(): + before_value = before_items.get(key) + if before_value is not None and after_value > before_value: + path, symbol = key + worsened.append( + { + "family": family, + "path": path, + "symbol": symbol, + "before": before_value, + "after": after_value, + "delta": after_value - before_value, + } + ) + return sorted( + worsened, + key=lambda item: ( + -_coerce_int(item.get("delta")), + str(item.get("family", "")), + str(item.get("path", "")), + str(item.get("symbol", "")), + ), + )[:MAX_WORSENED_ITEMS] + + def _metric_item_index( + self, + report_document: Mapping[str, object], + *, + family: str, + value_keys: Sequence[str], + ) -> dict[tuple[str, str], int]: + result: dict[tuple[str, str], int] = {} + for item in self._metric_family_items(report_document, family=family): + path = self._item_path(item) + symbol = self._item_symbol(item) + value = self._first_int(item, keys=value_keys) + if path or symbol: + result[(path, symbol)] = value + return result + + def _metric_family_items( + self, + report_document: Mapping[str, object], + *, + family: str, + ) -> tuple[Mapping[str, object], ...]: + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + family_payload = _as_mapping(families.get(family)) + return tuple( + _as_mapping(item) for item in _as_sequence(family_payload.get("items")) + ) + + def _family_max( + self, + report_document: Mapping[str, object], + *, + family: str, + keys: Sequence[str], + ) -> int: + values = [ + self._first_int(item, keys=keys) + for item in self._metric_family_items(report_document, family=family) + ] + return max(values, default=0) + + def _dead_code_high_confidence(self, report_document: Mapping[str, object]) -> int: + return sum( + 1 + for item in self._metric_family_items(report_document, family="dead_code") + if str(item.get("confidence", "")).strip().lower() == "high" + ) + + def _dependency_cycles( + self, + report_document: Mapping[str, object], + ) -> tuple[object, ...]: + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + dependencies = _as_mapping(families.get("dependencies")) + return tuple(_as_sequence(dependencies.get("cycles"))) + + def _first_int(self, item: Mapping[str, object], *, keys: Sequence[str]) -> int: + for key in keys: + if key in item: + return _coerce_int(item.get(key)) + return 0 + + def _item_path(self, item: Mapping[str, object]) -> str: + for key in ("relative_path", "path", "filepath", "file"): + value = str(item.get(key, "")).strip() + if value: + return value.replace("\\", "/") + return "" + + def _item_symbol(self, item: Mapping[str, object]) -> str: + for key in ("qualname", "symbol", "name", "class_name", "function"): + value = str(item.get(key, "")).strip() + if value: + return value + return "" + + def _threshold_headroom(self, *, budget: int, current: int) -> int | None: + return budget - current if budget >= 0 else None + + def _run_ref_payload(self, record: MCPRunRecord) -> dict[str, object]: + return { + "run_id": _helpers._short_run_id(record.run_id), + "health": _helpers._summary_health_score(record.summary), + } + + def _unverified_patch_contract( + self, + *, + reason: str, + before: MCPRunRecord | None = None, + after: MCPRunRecord | None = None, + structural_delta: Mapping[str, object] | None = None, + ) -> dict[str, object]: + return { + "mode": "verify", + "status": PatchContractStatus.UNVERIFIED.value, + "reason": reason, + "before": self._run_ref_payload(before) if before is not None else None, + "after": self._run_ref_payload(after) if after is not None else None, + "structural_delta": dict(structural_delta or {}), + "contract_violations": [], + "message": f"Patch contract unverified: {reason}.", + } + + def _expired_patch_contract( + self, + *, + before: MCPRunRecord, + after: MCPRunRecord, + intent: IntentRecord, + ) -> dict[str, object]: + return { + "mode": "verify", + "status": PatchContractStatus.EXPIRED.value, + "reason": "report_digest_mismatch", + "before": self._run_ref_payload(before), + "after": self._run_ref_payload(after), + "intent_id": intent.intent_id, + "contract_violations": ["intent_expired"], + "message": ( + "Patch contract expired: intent was declared for another report digest." + ), + } + + def _budget_message( + self, + *, + strictness: StrictnessProfile, + gate_preview: Mapping[str, object], + ) -> str: + if strictness == "relaxed": + return "Relaxed patch budget is advisory; gate failures are not blocking." + if gate_preview.get("would_fail"): + return "Current run is already outside the selected patch budget." + return "Current run is inside the selected patch budget." + + def _verify_message(self, *, status: str, violations: Sequence[str]) -> str: + if status == PatchContractStatus.ACCEPTED.value: + return "Patch contract accepted." + return "Patch contract violated: " + ", ".join(violations) + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () + + +__all__ = ["_MCPSessionPatchContractMixin"] diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 9e8ee3df..7616ea83 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -374,6 +374,38 @@ def get_blast_radius( include=include, ) + @tool( + title="Check Patch Contract", + description=( + "Pre-edit budget query (mode='budget') or post-edit structural " + "verification (mode='verify'). Composes stored runs, gate " + "evaluation, run comparison, and session-local change intent " + "without running analysis or mutating repository state." + ), + annotations=read_only_tool, + structured_output=True, + ) + def check_patch_contract( + mode: str, + run_id: str | None = None, + before_run_id: str | None = None, + after_run_id: str | None = None, + intent_id: str | None = None, + strictness: str = "ci", + diff_ref: str | None = None, + changed_files: list[str] | None = None, + ) -> dict[str, object]: + return service.check_patch_contract( + mode=mode, + run_id=run_id, + before_run_id=before_run_id, + after_run_id=after_run_id, + intent_id=intent_id, + strictness=strictness, + diff_ref=diff_ref, + changed_files=changed_files, + ) + @tool( title="Help", description=( diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index dfdd3570..be72ee3a 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -64,6 +64,12 @@ def manage_change_intent( ) -> dict[str, object]: return self._run_dict("manage_change_intent", **params) + def check_patch_contract( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("check_patch_contract", **params) + def generate_pr_summary( self: _RunDictService, **params: object, @@ -223,6 +229,16 @@ def _apply_public_method_signatures() -> None: _kwonly("depth", "str", "direct"), _kwonly("include", "Sequence[str] | None", None), ), + "check_patch_contract": ( + _kwonly("mode", "str"), + _kwonly("run_id", "str | None", None), + _kwonly("before_run_id", "str | None", None), + _kwonly("after_run_id", "str | None", None), + _kwonly("intent_id", "str | None", None), + _kwonly("strictness", "str", "ci"), + _kwonly("diff_ref", "str | None", None), + _kwonly("changed_files", "Sequence[str] | None", None), + ), "manage_change_intent": ( _kwonly("action", "str"), _kwonly("run_id", "str | None", None), diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 813bd64a..89cc276c 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -16,7 +16,7 @@ resolve_clone_baseline_state, resolve_metrics_baseline_state, ) -from ._session_intent_mixin import _MCPSessionIntentMixin +from ._session_patch_contract_mixin import _MCPSessionPatchContractMixin from ._session_shared import ( _REPORT_DUMMY_PATH, DEFAULT_BLOCK_MIN_LOC, @@ -77,7 +77,7 @@ ] -class MCPSession(_MCPSessionIntentMixin): +class MCPSession(_MCPSessionPatchContractMixin): def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._runs = CodeCloneMCPRunStore(history_limit=history_limit) self._state_lock = RLock() diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index 46d8d2a5..2ca786c9 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -870,6 +870,102 @@ "type": "object" } }, + { + "name": "check_patch_contract", + "input_schema": { + "properties": { + "mode": { + "title": "Mode", + "type": "string" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + }, + "before_run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Before Run Id" + }, + "after_run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "After Run Id" + }, + "intent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Intent Id" + }, + "strictness": { + "default": "ci", + "title": "Strictness", + "type": "string" + }, + "diff_ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Diff Ref" + }, + "changed_files": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Changed Files" + } + }, + "required": [ + "mode" + ], + "title": "check_patch_contractArguments", + "type": "object" + } + }, { "name": "clear_session_runs", "input_schema": { diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 84677173..b832bff2 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -125,6 +125,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "get_run_summary", "get_production_triage", "get_blast_radius", + "check_patch_contract", "evaluate_gates", "get_report_section", "list_findings", @@ -157,6 +158,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "get_run_summary", "get_production_triage", "get_blast_radius", + "check_patch_contract", "evaluate_gates", "help", "get_report_section", @@ -199,6 +201,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: ) assert "structural risk boundary" in str(tools["get_blast_radius"].description) assert "review-only context" in str(tools["get_blast_radius"].description) + assert "mode='budget'" in str(tools["check_patch_contract"].description) assert "Intent is session-local" in str(tools["manage_change_intent"].description) assert "bounded guidance, not a full manual" in str(tools["help"].description) assert "workflow, analysis_profile, suppressions, baseline" in str( @@ -378,6 +381,20 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) assert change_intent["status"] == "active" assert intent_check["status"] == "clean" + patch_budget = _structured_tool_result( + asyncio.run( + server.call_tool( + "check_patch_contract", + { + "mode": "budget", + "run_id": run_id, + "intent_id": intent_id, + }, + ) + ) + ) + assert patch_budget["mode"] == "budget" + assert patch_budget["intent_id"] == intent_id latest_report_resource = list( asyncio.run(server.read_resource("codeclone://latest/report.json")) diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index f49c59c0..2c9be6ed 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -6,6 +6,7 @@ from __future__ import annotations +import copy import importlib import json import subprocess @@ -19,6 +20,7 @@ import pytest import codeclone.surfaces.mcp._blast_radius as mcp_blast_radius_mod +import codeclone.surfaces.mcp._patch_contract as mcp_patch_contract_mod import codeclone.surfaces.mcp._session_baseline as mcp_baseline_mod import codeclone.surfaces.mcp._session_finding_mixin as mcp_finding_mod import codeclone.surfaces.mcp._session_helpers as mcp_helpers_mod @@ -314,6 +316,71 @@ def _blast_radius_run_record( ) +def _patch_contract_report_document( + *, + digest: str, + include_regression: bool, + complexity: int, + baseline_status: str = "ok", +) -> dict[str, object]: + report_document = copy.deepcopy(_blast_radius_report_document(digest)) + report_document["meta"] = {"baseline": {"status": baseline_status}} + findings = cast("dict[str, object]", report_document["findings"]) + groups = cast("dict[str, object]", findings["groups"]) + clones = cast("dict[str, object]", groups["clones"]) + functions = cast( + "list[dict[str, object]]", + clones["functions"], + ) + if not include_regression: + del functions[1:] + metrics = cast("dict[str, object]", report_document["metrics"]) + families = cast("dict[str, object]", metrics["families"]) + complexity_family = cast("dict[str, object]", families["complexity"]) + complexity_items = cast( + "list[dict[str, object]]", + complexity_family["items"], + ) + complexity_items[0]["qualname"] = "pkg.b.handle" + complexity_items[0]["cyclomatic_complexity"] = complexity + return report_document + + +def _patch_contract_run_record( + root: Path, + *, + run_id: str, + digest: str, + include_regression: bool, + complexity: int, + health: int = 80, + baseline_status: str = "ok", + request: MCPAnalysisRequest | None = None, + new_func: frozenset[str] = frozenset(), +) -> MCPRunRecord: + return replace( + _dummy_run_record(root, run_id), + request=request or MCPAnalysisRequest(root=str(root), respect_pyproject=False), + report_document=_patch_contract_report_document( + digest=digest, + include_regression=include_regression, + complexity=complexity, + baseline_status=baseline_status, + ), + summary={"run_id": run_id, "health": {"score": health, "grade": "B"}}, + func_clones_count=2 if include_regression else 1, + block_clones_count=0, + new_func=new_func, + ) + + +def _payload_dicts( + payload: Mapping[str, object], + keys: tuple[str, ...], +) -> tuple[dict[str, object], ...]: + return tuple(cast("dict[str, object]", payload[key]) for key in keys) + + def _two_clone_fixture_roots(tmp_path: Path) -> tuple[Path, Path]: first_root = tmp_path / "first" second_root = tmp_path / "second" @@ -2330,6 +2397,268 @@ def test_mcp_service_manage_change_intent_validation_expiry_and_prune( assert service._active_intents == {} +def test_mcp_patch_contract_profiles_and_baseline_abuse() -> None: + ci_budget = mcp_patch_contract_mod.budgets_for_strictness( + strictness="ci", + coverage_min=None, + complexity_threshold=10, + coupling_threshold=5, + cohesion_threshold=3, + ) + strict_budget = mcp_patch_contract_mod.budgets_for_strictness( + strictness="strict", + coverage_min=None, + complexity_threshold=None, + coupling_threshold=None, + cohesion_threshold=None, + ) + relaxed_budget = mcp_patch_contract_mod.budgets_for_strictness( + strictness="relaxed", + coverage_min=80, + complexity_threshold=10, + coupling_threshold=5, + cohesion_threshold=3, + ) + + assert { + "clone_regression": ci_budget.clone_regression, + "complexity_delta": ci_budget.complexity_delta, + "coupling_delta": ci_budget.coupling_delta, + "cohesion_delta": ci_budget.cohesion_delta, + } == { + "clone_regression": 0, + "complexity_delta": 10, + "coupling_delta": 5, + "cohesion_delta": 3, + } + assert { + "strict_dead_code": strict_budget.dead_code_regression, + "strict_health_floor": strict_budget.health_floor, + "relaxed_clone_regression": relaxed_budget.clone_regression, + "relaxed_coverage_min": relaxed_budget.coverage_min, + } == { + "strict_dead_code": True, + "strict_health_floor": 70, + "relaxed_clone_regression": -1, + "relaxed_coverage_min": -1, + } + abuse = mcp_patch_contract_mod.detect_baseline_abuse( + before_gate_would_fail=True, + after_gate_would_fail=False, + after_baseline_status="updated", + regressions=2, + changed_files=1, + intent_available=False, + ) + assert abuse == { + "detected": True, + "triggers": [ + "baseline_changed_with_functional_code", + "baseline_updated_while_regressions_present", + "baseline_updated_without_intent", + "ci_greened_by_accepting_debt", + ], + } + assert ( + mcp_patch_contract_mod.baseline_status( + {"meta": {"baseline": {"status": "updated"}}} + ) + == "updated" + ) + + +def test_mcp_service_check_patch_contract_budget_uses_intent_and_gate_preview( + tmp_path: Path, +) -> None: + service = CodeCloneMCPService(history_limit=2) + request = MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + complexity_threshold=10, + coupling_threshold=5, + cohesion_threshold=3, + coverage_min=80, + ) + record = _patch_contract_run_record( + tmp_path, + run_id="abcdef1234567890", + digest="budget-digest", + include_regression=False, + complexity=6, + health=90, + request=request, + ) + service._runs.register(record) + declared = service.manage_change_intent( + action="declare", + run_id="abcdef12", + scope={"allowed_files": ["pkg/a.py"]}, + intent="adjust pkg.a behavior", + ) + + payload = service.check_patch_contract( + mode="budget", + run_id="abcdef12", + intent_id=str(declared["intent_id"]), + ) + budgets, current_state, headroom, gate_preview = _payload_dicts( + payload, + ("budgets", "current_state", "headroom", "gate_preview"), + ) + + assert payload["run_id"] == "abcdef12" + assert payload["strictness"] == "ci" + assert payload["scope"] == "changed" + assert payload["declared_scope"] == { + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [".cache/codeclone/**", "codeclone.baseline.json"], + } + assert ( + cast("dict[str, object]", payload["blast_radius_summary"])["radius_level"] + == "medium" + ) + assert { + "clone_regression": budgets["clone_regression"], + "complexity_delta": budgets["complexity_delta"], + "coverage_min": budgets["coverage_min"], + "complexity_max": current_state["complexity_max"], + "clone_groups": current_state["clone_groups"], + } == { + "clone_regression": 0, + "complexity_delta": 10, + "coverage_min": 80, + "complexity_max": 6, + "clone_groups": 1, + } + assert headroom["complexity_headroom"] == 4 + assert gate_preview["would_fail"] is False + + relaxed = service.check_patch_contract( + mode="budget", + run_id="abcdef12", + strictness="relaxed", + ) + assert cast("dict[str, object]", relaxed["budgets"])["clone_regression"] == -1 + assert cast("dict[str, object]", relaxed["gate_preview"])["would_fail"] is False + assert "advisory" in str(relaxed["message"]) + with pytest.raises(MCPServiceContractError, match="Invalid value for strictness"): + service.check_patch_contract(mode="budget", strictness="wild") + with pytest.raises(MCPServiceContractError, match="Invalid value for mode"): + service.check_patch_contract(mode="inspect") + + +def test_mcp_service_check_patch_contract_verify_composes_existing_primitives( + tmp_path: Path, +) -> None: + service = CodeCloneMCPService(history_limit=4) + before = _patch_contract_run_record( + tmp_path, + run_id="before1234567890", + digest="before-digest", + include_regression=False, + complexity=6, + health=85, + ) + after = _patch_contract_run_record( + tmp_path, + run_id="after1234567890", + digest="after-digest", + include_regression=True, + complexity=14, + health=70, + baseline_status="updated", + new_func=frozenset({"clone:function:g2"}), + ) + service._runs.register(before) + declared = service.manage_change_intent( + action="declare", + run_id="before12", + scope={"allowed_files": ["pkg/a.py"]}, + intent="adjust pkg.a behavior", + expected_effects=["no new clone group"], + ) + service._runs.register(after) + + verified = service.check_patch_contract( + mode="verify", + before_run_id="before12", + after_run_id="after12", + intent_id=str(declared["intent_id"]), + changed_files=["pkg/a.py"], + ) + structural_delta, gate_preview, scope_check, baseline_abuse = _payload_dicts( + verified, + ("structural_delta", "gate_preview", "scope_check", "baseline_abuse"), + ) + worsened = cast("list[dict[str, object]]", verified["worsened"]) + + assert verified["status"] == "violated" + assert len(cast("list[dict[str, object]]", structural_delta["regressions"])) == 1 + assert structural_delta["verdict"] == "regressed" + assert gate_preview["would_fail"] is True + assert scope_check["status"] == "clean" + assert scope_check["actual_changed_files"] == ["pkg/a.py"] + assert baseline_abuse["triggers"] == [ + "baseline_changed_with_functional_code", + "baseline_updated_while_regressions_present", + ] + assert worsened[0] == { + "family": "complexity", + "path": "pkg/b.py", + "symbol": "pkg.b.handle", + "before": 6, + "after": 14, + "delta": 8, + } + assert verified["contract_violations"] == [ + "structural_regressions", + "gate_failures", + "baseline_abuse:baseline_changed_with_functional_code", + "baseline_abuse:baseline_updated_while_regressions_present", + ] + assert verified["blocking_violations"] == verified["contract_violations"] + + relaxed = service.check_patch_contract( + mode="verify", + before_run_id="before12", + after_run_id="after12", + intent_id=str(declared["intent_id"]), + strictness="relaxed", + changed_files=["pkg/a.py"], + ) + assert relaxed["status"] == "accepted" + assert relaxed["contract_violations"] == [ + "structural_regressions", + "baseline_abuse:baseline_changed_with_functional_code", + "baseline_abuse:baseline_updated_while_regressions_present", + ] + assert relaxed["blocking_violations"] == [] + + no_before = service.check_patch_contract(mode="verify") + no_after = service.check_patch_contract( + mode="verify", + before_run_id="before12", + ) + unknown_before = service.check_patch_contract( + mode="verify", + before_run_id="missing", + ) + unknown_after = service.check_patch_contract( + mode="verify", + before_run_id="before12", + after_run_id="missing", + ) + assert no_before["status"] == "unverified" + assert no_before["reason"] == "no_before_run" + assert no_after["status"] == "unverified" + assert no_after["reason"] == "no_after_run" + assert unknown_before["status"] == "unverified" + assert unknown_before["reason"] == "no_before_run" + assert unknown_after["status"] == "unverified" + assert unknown_after["reason"] == "no_after_run" + + def test_mcp_service_branch_helpers_on_real_runs( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, From d67776e0a5156cced6957fe8f9f91006af922838 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 22 May 2026 20:39:49 +0500 Subject: [PATCH 007/318] docs: document patch contract surface --- CHANGELOG.md | 3 + README.md | 89 +++++++++++--------- docs/README-pypi.md | 14 +-- docs/book/20-mcp-interface.md | 3 +- docs/book/24-structural-change-controller.md | 21 +++-- docs/mcp.md | 6 +- 6 files changed, 80 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c0a7951..82c6af96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ - Add MCP `manage_change_intent` for session-local change intent lifecycle: declare intended scope, inspect active intent, check actual changed files against scope, and clear intent state. +- Add MCP `check_patch_contract` with read-only `budget` and `verify` modes: + pre-edit gate budget/headroom, post-edit before/after comparison, gate + preview, intent-scope validation, and baseline-abuse signals. ### Internal diff --git a/README.md b/README.md index 8b7ebb03..ff10b9c7 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,20 @@ --- +CodeClone is a **structural change controller** for Python — deterministic static analysis that +combines clone detection, code-quality metrics, and baseline-aware CI gating with first-class +governance for AI coding agents. + +In the current v2.1 alpha, CodeClone records the declared intent before the first edit, maps the +structural blast radius, and verifies explicit before/after runs against the patch contract. +Receipt and claim-guard tools are planned next. + +**One canonical analysis, many surfaces.** CLI, HTML reports, IDE, and MCP all read the same +deterministic facts — for both human reviewers and AI agents. + +Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · +Live sample report: [orenlab.github.io/codeclone/examples/report/](https://orenlab.github.io/codeclone/examples/report/) + > [!NOTE] > This README tracks the in-development **v2.1** line. > For the latest stable release, see the @@ -33,36 +47,24 @@ > and the > [`v2.0.2` docs](https://github.com/orenlab/codeclone/tree/v2.0.2/docs). -CodeClone is a **structural change controller** for Python. It starts before the -first edit — when an agent declares what it intends to change — maps the -structural blast radius, verifies that the patch stayed inside its declared -boundary, and leaves an auditable receipt. - -**One canonical analysis.** The same **deterministic facts** across CLI, HTML reports, -IDE, and MCP — for both **human reviewers** and **AI agents**. - -Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · -Live sample report: [orenlab.github.io/codeclone/examples/report/](https://orenlab.github.io/codeclone/examples/report/) - ## Change Controller -When an AI agent edits code, CodeClone governs the structural boundary: +When an AI agent edits code, CodeClone governs the structural boundary across five stages: -| Step | Tool | What it does | -|------|------|-------------| -| 1. Declare intent | `manage_change_intent` | Agent states what it plans to change, which files, and why | -| 2. Map blast radius | `get_blast_radius` | Reverse imports, clone cohorts, dependency cycles, do-not-touch signals | -| 3. Check patch contract | planned | Pre-edit regression budget with headroom; post-edit boundary verification | -| 4. Generate receipt | planned | Auditable artifact linking intent, scope, patch status, and structural delta | -| 5. Validate claims | planned | Cross-check the agent's review text against the canonical report | +| Step | Tool | What it does | +|-------------------------|------------------------|------------------------------------------------------------------------------| +| 1. Declare intent | `manage_change_intent` | Agent states what it plans to change, which files, and why | +| 2. Map blast radius | `get_blast_radius` | Reverse imports, clone cohorts, dependency cycles, do-not-touch signals | +| 3. Check patch contract | `check_patch_contract` | Pre-edit regression budget with headroom; post-edit boundary verification | +| 4. Generate receipt | _planned_ | Auditable artifact linking intent, scope, patch status, and structural delta | +| 5. Validate claims | _planned_ | Cross-check the agent's review text against the canonical report | -Each step is deterministic — structural facts from the canonical report, no LLM inference. +Every step is deterministic — structural facts from the canonical report, no LLM inference. -The v2.1 alpha starts with two live MCP tools, `manage_change_intent` and -`get_blast_radius`, composed over the existing read-only analysis surface. -Patch contract, receipt, and claim guard tools are planned follow-ups in the -same controller line. Controller state is session-local and in-memory — no -files created, no repo state mutated. +The v2.1 alpha ships steps 1–3 as live MCP tools (`manage_change_intent`, `get_blast_radius`, +`check_patch_contract`) composed over the existing read-only analysis surface. Steps 4–5 are +planned follow-ups in the same controller line. Controller state is session-local and +in-memory — no files created, no repo state mutated. Change controller docs: [Structural Change Controller](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) @@ -72,14 +74,14 @@ Change controller docs: [Structural Change Controller](https://orenlab.github.io - **Intent declaration** — agent states what it plans to change; CodeClone tracks scope, expiry, and status - **Blast radius** — structural risk projection: reverse imports, clone cohorts, dependency cycles, do-not-touch signals -- **Patch contract** — planned pre-edit regression budget and post-edit boundary verification -- **Review receipt** — planned auditable artifact linking intent, scope, patch verification, and structural delta -- **Claim guard** — planned citation-based validation of review text against the canonical report +- **Patch contract** — pre-edit regression budget and post-edit boundary verification over explicit before/after runs +- **Review receipt** _(planned)_ — auditable artifact linking intent, scope, patch verification, and structural delta +- **Claim guard** _(planned)_ — citation-based validation of review text against the canonical report **Baseline governance** - **Regression isolation** — separates accepted **legacy** debt from **new regressions**; CI fails only on what changed -- **CI-first** — deterministic output, stable ordering, exit code contract, pre-commit support +- **CI-first** — deterministic output, stable ordering, exit-code contract, pre-commit support - **Reports** — interactive HTML, JSON, Markdown, SARIF, and text from one canonical report **Detection & analysis** @@ -90,11 +92,11 @@ Change controller docs: [Structural Change Controller](https://orenlab.github.io profile, dead code, health score, and overloaded-module profiling - **Adoption & API** — type/docstring annotation coverage, public API surface inventory and baseline diff - **Coverage Join** — fuse external Cobertura XML into the current run to surface coverage hotspots and scope gaps -- **Security Surfaces** — report-only inventory of security-relevant capability boundaries without vulnerability claims +- **Security surfaces** — report-only inventory of security-relevant capability boundaries (no vulnerability claims) **Surfaces & integrations** -- **MCP control surface** — 23-tool agent and IDE interface over the same canonical pipeline; read-only by contract +- **MCP control surface** — 24-tool agent and IDE interface over the same canonical pipeline; read-only by contract - **IDE & agent clients** — VS Code extension, Claude Desktop bundle, and Codex plugin over the same MCP contract **Performance** @@ -103,11 +105,15 @@ Change controller docs: [Structural Change Controller](https://orenlab.github.io ## How It Works +CodeClone runs a single deterministic pipeline and emits one canonical JSON report. Every +other surface — HTML, Markdown, SARIF, MCP, IDE — is a projection of that report, so structural +facts stay consistent across consumers. +
Pipeline overview
CodeClone pipeline @@ -142,7 +148,7 @@ uvx codeclone@latest . ## Quick Start ```bash -codeclone . # analyze +codeclone . # analyze current directory codeclone . --html # HTML report codeclone . --html --open-html-report # open in browser codeclone . --json --md --sarif --text # all formats @@ -199,8 +205,8 @@ CodeClone ships a composite GitHub Action for PR and CI workflows: pr-comment: "true" ``` -It can run baseline-aware gating, generate JSON and SARIF reports, upload SARIF to GitHub Code Scanning, -and post or update a PR summary comment. +It runs baseline-aware gating, generates JSON and SARIF reports, uploads SARIF to GitHub Code +Scanning, and posts or updates a PR summary comment. Action docs: [.github/actions/codeclone/README.md](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) @@ -246,8 +252,8 @@ repos: ## MCP Control Surface -23-tool MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. -Read-only by contract: never mutates source, baselines, or repo state. +A 24-tool MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. +Read-only by contract — never mutates source, baselines, or repo state. ```bash # local stdio clients @@ -257,10 +263,10 @@ codeclone-mcp --transport stdio codeclone-mcp --transport streamable-http ``` -21 analysis and triage tools provide the canonical read-only surface. 2 phase-1 -change controller tools (`manage_change_intent`, `get_blast_radius`) compose -over that surface to govern the structural boundary of AI-assisted changes. -Patch contract, review receipt, and claim guard are planned v2.1 follow-ups. +Of the 24 tools, 21 expose the canonical read-only analysis and triage surface. The remaining +three — `manage_change_intent`, `get_blast_radius`, and `check_patch_contract` — are the change +controller, composed over that surface to govern the structural boundary of AI-assisted edits. +Review receipt and claim guard tools are planned v2.1 follow-ups. > [!WARNING] > Analysis tools require an absolute repository root. Relative roots such as `.` are rejected. @@ -507,7 +513,6 @@ Versions released before this change remain under their original license terms. - **Licenses:** [MPL-2.0](https://github.com/orenlab/codeclone/blob/main/LICENSE) · [MIT docs](https://github.com/orenlab/codeclone/blob/main/LICENSE-MIT) · [Scope map](https://github.com/orenlab/codeclone/blob/main/LICENSES.md) - [pypi-shield]: https://img.shields.io/pypi/v/codeclone?style=flat-square&color=6366f1 [status-shield]: https://img.shields.io/pypi/status/codeclone?style=flat-square&color=6366f1 [downloads-shield]: https://img.shields.io/pypi/dm/codeclone?style=flat-square&color=6366f1 diff --git a/docs/README-pypi.md b/docs/README-pypi.md index c7d8542e..8ce5cc3e 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -27,10 +27,10 @@ Python

-CodeClone is a structural change controller for Python. It starts before the -first edit — when an agent declares what it intends to change — maps the -structural blast radius, verifies that the patch stayed inside its declared -boundary, and leaves an auditable receipt. +CodeClone is a structural change controller for Python. The v2.1 alpha starts +before the first edit — when an agent declares what it intends to change — +maps the structural blast radius, and verifies explicit before/after runs +against the patch contract. Receipt and claim-guard tools are planned next. The same analysis pipeline powers CLI reports, CI checks, the MCP server, and native IDE/agent clients — so humans and AI agents operate on identical, @@ -47,7 +47,7 @@ When an AI agent edits code, CodeClone governs the structural boundary: 1. **Declare intent** — agent states what it plans to change, which files, and why 2. **Map blast radius** — reverse imports, clone cohorts, dependency cycles, do-not-touch signals -3. **Check patch contract** — planned pre-edit regression budget and post-edit boundary verification +3. **Check patch contract** — pre-edit regression budget and post-edit boundary verification 4. **Generate receipt** — planned auditable artifact: intent + scope + patch status + structural delta 5. **Validate claims** — planned cross-check of review text against the canonical report @@ -60,7 +60,7 @@ Docs: Date: Fri, 22 May 2026 20:39:59 +0500 Subject: [PATCH 008/318] chore(claude): expose patch contract metadata --- extensions/claude-desktop-codeclone/README.md | 2 +- extensions/claude-desktop-codeclone/manifest.json | 6 +++++- extensions/claude-desktop-codeclone/test/manifest.test.js | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/extensions/claude-desktop-codeclone/README.md b/extensions/claude-desktop-codeclone/README.md index 499a7f7a..b2136f0e 100644 --- a/extensions/claude-desktop-codeclone/README.md +++ b/extensions/claude-desktop-codeclone/README.md @@ -3,7 +3,7 @@ Structural change controller for Python — local MCP bundle wrapper for `codeclone-mcp`. Installs as a `.mcpb` package instead of manual JSON editing. -Same canonical 23-tool MCP surface used by CLI, VS Code, Codex, and Claude Code. +Same canonical 24-tool MCP surface used by CLI, VS Code, Codex, and Claude Code. Read-only, baseline-aware, local stdio only. As the local `codeclone-mcp` server gains new canonical surfaces, the bundle exposes them without adding a second client-side interpretation layer. diff --git a/extensions/claude-desktop-codeclone/manifest.json b/extensions/claude-desktop-codeclone/manifest.json index 4ed8d2dc..dcc87a8f 100644 --- a/extensions/claude-desktop-codeclone/manifest.json +++ b/extensions/claude-desktop-codeclone/manifest.json @@ -4,7 +4,7 @@ "display_name": "CodeClone", "version": "2.1.0", "description": "Structural change controller for Python — deterministic, baseline-aware, built for CI and AI agents.", - "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. It keeps Claude on the same canonical 23-tool MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin — read-only, baseline-aware, local stdio only. The v2.1 change controller starts with intent declaration and blast radius; patch contract, review receipt, and claim guard are planned follow-ups.", + "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. It keeps Claude on the same canonical 24-tool MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin — read-only, baseline-aware, local stdio only. The v2.1 change controller includes intent declaration, blast radius, and patch contract; review receipt and claim guard are planned follow-ups.", "author": { "name": "Den Rozhnovskiy", "email": "pytelemonbot@mail.ru", @@ -56,6 +56,10 @@ "name": "manage_change_intent", "description": "Change intent lifecycle: declare scope, get status, check diff, clear intent." }, + { + "name": "check_patch_contract", + "description": "Patch contract checks: budget before editing, verify before/after runs after editing." + }, { "name": "list_hotspots", "description": "Priority-ranked hotspot views by kind." diff --git a/extensions/claude-desktop-codeclone/test/manifest.test.js b/extensions/claude-desktop-codeclone/test/manifest.test.js index 1df293d2..c12dd24e 100644 --- a/extensions/claude-desktop-codeclone/test/manifest.test.js +++ b/extensions/claude-desktop-codeclone/test/manifest.test.js @@ -32,6 +32,6 @@ test("manifest keeps the setup surface bounded and local", () => { ]); assert.equal(manifest.documentation, "https://orenlab.github.io/codeclone/claude-desktop-bundle/"); assert.equal(manifest.tools_generated, true); - assert.equal(manifest.tools.length, 23); + assert.equal(manifest.tools.length, 24); assert.equal("instructions" in manifest, false); }); From 65bd7fcade7d99249af107f0799092fd79d74991 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 23 May 2026 11:39:04 +0500 Subject: [PATCH 009/318] docs(plugin): add change control skill --- .../skills/codeclone-change-control/SKILL.md | 194 ++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 plugins/codeclone/skills/codeclone-change-control/SKILL.md diff --git a/plugins/codeclone/skills/codeclone-change-control/SKILL.md b/plugins/codeclone/skills/codeclone-change-control/SKILL.md new file mode 100644 index 00000000..dd4e9dee --- /dev/null +++ b/plugins/codeclone/skills/codeclone-change-control/SKILL.md @@ -0,0 +1,194 @@ +--- +name: codeclone-change-control +description: Use when Codex should modify a Python repository through CodeClone MCP — intent-first change workflow, blast radius, scoped edits, patch verification, and review receipt. +--- + +# CodeClone Change Control + +Use this skill when the task requires changing files in a Python repository with +CodeClone MCP available. + +This is not a passive review workflow. CodeClone starts before a diff exists: +declare intent, inspect structural risk, edit inside scope, then verify the +patch. + +## Activation contract + +Use this workflow whenever this skill is selected for a repository edit. Start +with a pre-edit MCP run and keep the returned `run_id` and `intent_id` for +verification. If a required MCP tool is unavailable in the connected server, +continue with the available steps and state which step was skipped. + +Do not downgrade the task to an ordinary edit after this skill has been +selected. The only valid reasons to skip the workflow are: no repository files +will be changed, CodeClone MCP is unavailable, or the user explicitly asks for +analysis only. + +## Rules + +- Use MCP tools only when invoked through the CodeClone plugin. +- If no latest MCP run exists, call `analyze_repository` yourself before + declaring intent. +- Declare intent before editing. +- Do not silently expand scope. +- Treat blast-radius dependents and clone cohorts as review context, not + permission to modify. +- Treat `do_not_touch` as a boundary unless the user explicitly expands scope. +- Treat `review_context` as context, not an edit ban. +- Do not update baselines, cache, or generated reports as part of a functional + change. +- Do not fall back to CLI or local report files. +- CodeClone is the source of truth — do not reinterpret findings independently. +- Never auto-suppress findings or mutate CodeClone baseline state. + +## Workflow + +``` +analyze_repository +→ manage_change_intent(action="declare") +→ get_blast_radius +→ check_patch_contract(mode="budget") +→ edit code +→ analyze_repository +→ manage_change_intent(action="check") +→ check_patch_contract(mode="verify") +→ validate_review_claims +→ create_review_receipt +``` + +Use the full path for normal code changes. Skip only steps that are unavailable +in the connected CodeClone MCP server. +Keep the pre-edit `run_id` as `before_run_id`; verify against the explicit +after-run produced after the edit. + +## Intent first + +Before editing, call: + +``` +manage_change_intent(action="declare") +``` + +Declare: + +- intended files +- allowed related files +- forbidden files +- short intent +- expected effects + +Example expected effects: + +- no new clone group +- no new dead code +- no dependency cycle +- no baseline update + +## Scope expansion + +If the fix requires a file outside declared scope: + +1. stop; +2. explain why the extra file is needed; +3. redeclare intent with the expanded scope; +4. continue only after the new intent is active. + +A patch that fixes the issue but expands scope silently is a failed patch. + +## Blast radius + +Use: + +``` +get_blast_radius +``` + +Read the response this way: + +- `direct_dependents` / `transitive_dependents`: review before changing public + behavior +- `clone_cohort_members`: comparison context, not automatic edit targets +- `structural_risk`: risk context for review priority +- `do_not_touch`: paths that require explicit approval or a separate workflow +- `review_context`: supporting context, not a ban + +## Patch budget + +Before editing, call: + +``` +check_patch_contract(mode="budget") +``` + +Use the returned budget as the edit boundary. Do not introduce new clone groups, +dead code, dependency cycles, API breaks, or baseline changes unless explicitly +allowed. + +## Patch verification + +After editing, run analysis again, then call: + +``` +manage_change_intent(action="check") +check_patch_contract(mode="verify") +``` + +If the result is `unverified`, report what is missing. Do not claim the patch is +verified. + +If the result is `violated`, stop and explain the violation instead of continuing +to broaden the patch. + +## Claim discipline + +When writing a summary, call: + +``` +validate_review_claims +``` + +Do not claim: + +- report-only signals are CI failures +- Security Surfaces are vulnerabilities +- known baseline debt is a new regression +- dead code exists where runtime reachability evidence says otherwise +- a fix is verified without an after-run and patch contract check + +## Review receipt + +At the end, call: + +``` +create_review_receipt +``` + +The final user summary should include: + +- declared scope +- scope expansion, if any +- blast radius summary +- patch contract status +- remaining human decisions +- receipt location or payload, if returned + +## Success criteria + +The task is complete only when: + +- intent was declared before editing +- blast radius was inspected +- edits stayed inside declared scope, or expansion was explicit +- patch contract was checked +- baseline/cache/generated state was not changed accidentally +- claims were validated when a review summary was written +- a review receipt was created when available + +## Non-goals + +- Do not use this skill for quick hotspot snapshots; use `codeclone-hotspots`. +- Do not use this skill for passive structural review with no edits; use + `codeclone-review`. +- Do not auto-fix unrelated findings. +- Do not turn report-only context into gates. +- Do not make baseline refresh part of a functional patch. From 37ecc6a1d0d951ca8bad05b629f14e22d2c70358 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 23 May 2026 12:46:48 +0500 Subject: [PATCH 010/318] feat(mcp): add review receipts --- CHANGELOG.md | 4 + README.md | 28 +- codeclone/surfaces/mcp/_review_receipt.py | 364 ++++++++++++++++ .../mcp/_session_review_receipt_mixin.py | 387 ++++++++++++++++++ codeclone/surfaces/mcp/server.py | 27 ++ codeclone/surfaces/mcp/service.py | 13 + codeclone/surfaces/mcp/session.py | 4 +- docs/README-pypi.md | 9 +- docs/book/20-mcp-interface.md | 9 +- docs/book/24-structural-change-controller.md | 37 +- docs/mcp.md | 7 +- extensions/claude-desktop-codeclone/README.md | 5 +- .../claude-desktop-codeclone/manifest.json | 6 +- .../test/manifest.test.js | 2 +- .../contract_snapshots/mcp_tool_schemas.json | 48 +++ tests/test_mcp_server.py | 28 ++ tests/test_mcp_service.py | 364 ++++++++++++++-- 17 files changed, 1279 insertions(+), 63 deletions(-) create mode 100644 codeclone/surfaces/mcp/_review_receipt.py create mode 100644 codeclone/surfaces/mcp/_session_review_receipt_mixin.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 82c6af96..df87453d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ - Add MCP `check_patch_contract` with read-only `budget` and `verify` modes: pre-edit gate budget/headroom, post-edit before/after comparison, gate preview, intent-scope validation, and baseline-abuse signals. +- Add MCP `create_review_receipt` for deterministic markdown/JSON audit + artifacts that compose report provenance, intent scope, blast radius, + reviewed findings, structural delta, patch-contract status, human decision + points, and claims-not-made without mutating repository state. ### Internal diff --git a/README.md b/README.md index ff10b9c7..87c126ff 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,8 @@ combines clone detection, code-quality metrics, and baseline-aware CI gating wit governance for AI coding agents. In the current v2.1 alpha, CodeClone records the declared intent before the first edit, maps the -structural blast radius, and verifies explicit before/after runs against the patch contract. -Receipt and claim-guard tools are planned next. +structural blast radius, verifies explicit before/after runs against the patch contract, and +generates auditable review receipts. The claim-guard tool is planned next. **One canonical analysis, many surfaces.** CLI, HTML reports, IDE, and MCP all read the same deterministic facts — for both human reviewers and AI agents. @@ -56,15 +56,15 @@ When an AI agent edits code, CodeClone governs the structural boundary across fi | 1. Declare intent | `manage_change_intent` | Agent states what it plans to change, which files, and why | | 2. Map blast radius | `get_blast_radius` | Reverse imports, clone cohorts, dependency cycles, do-not-touch signals | | 3. Check patch contract | `check_patch_contract` | Pre-edit regression budget with headroom; post-edit boundary verification | -| 4. Generate receipt | _planned_ | Auditable artifact linking intent, scope, patch status, and structural delta | +| 4. Generate receipt | `create_review_receipt` | Auditable artifact linking intent, scope, patch status, and structural delta | | 5. Validate claims | _planned_ | Cross-check the agent's review text against the canonical report | Every step is deterministic — structural facts from the canonical report, no LLM inference. -The v2.1 alpha ships steps 1–3 as live MCP tools (`manage_change_intent`, `get_blast_radius`, -`check_patch_contract`) composed over the existing read-only analysis surface. Steps 4–5 are -planned follow-ups in the same controller line. Controller state is session-local and -in-memory — no files created, no repo state mutated. +The v2.1 alpha ships steps 1–4 as live MCP tools (`manage_change_intent`, `get_blast_radius`, +`check_patch_contract`, `create_review_receipt`) composed over the existing read-only analysis +surface. Step 5 is a planned follow-up in the same controller line. Controller state is +session-local and in-memory — no files created, no repo state mutated. Change controller docs: [Structural Change Controller](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) @@ -75,7 +75,7 @@ Change controller docs: [Structural Change Controller](https://orenlab.github.io - **Intent declaration** — agent states what it plans to change; CodeClone tracks scope, expiry, and status - **Blast radius** — structural risk projection: reverse imports, clone cohorts, dependency cycles, do-not-touch signals - **Patch contract** — pre-edit regression budget and post-edit boundary verification over explicit before/after runs -- **Review receipt** _(planned)_ — auditable artifact linking intent, scope, patch verification, and structural delta +- **Review receipt** — auditable artifact linking intent, scope, patch verification, and structural delta - **Claim guard** _(planned)_ — citation-based validation of review text against the canonical report **Baseline governance** @@ -96,7 +96,7 @@ Change controller docs: [Structural Change Controller](https://orenlab.github.io **Surfaces & integrations** -- **MCP control surface** — 24-tool agent and IDE interface over the same canonical pipeline; read-only by contract +- **MCP control surface** — 25-tool agent and IDE interface over the same canonical pipeline; read-only by contract - **IDE & agent clients** — VS Code extension, Claude Desktop bundle, and Codex plugin over the same MCP contract **Performance** @@ -252,7 +252,7 @@ repos: ## MCP Control Surface -A 24-tool MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. +A 25-tool MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. Read-only by contract — never mutates source, baselines, or repo state. ```bash @@ -263,10 +263,10 @@ codeclone-mcp --transport stdio codeclone-mcp --transport streamable-http ``` -Of the 24 tools, 21 expose the canonical read-only analysis and triage surface. The remaining -three — `manage_change_intent`, `get_blast_radius`, and `check_patch_contract` — are the change -controller, composed over that surface to govern the structural boundary of AI-assisted edits. -Review receipt and claim guard tools are planned v2.1 follow-ups. +Of the 25 tools, 21 expose the canonical read-only analysis and triage surface. The remaining +four — `manage_change_intent`, `get_blast_radius`, `check_patch_contract`, and +`create_review_receipt` — are the change controller, composed over that surface to govern the +structural boundary of AI-assisted edits. The claim guard tool is a planned v2.1 follow-up. > [!WARNING] > Analysis tools require an absolute repository root. Relative roots such as `.` are rejected. diff --git a/codeclone/surfaces/mcp/_review_receipt.py b/codeclone/surfaces/mcp/_review_receipt.py new file mode 100644 index 00000000..870f1b6d --- /dev/null +++ b/codeclone/surfaces/mcp/_review_receipt.py @@ -0,0 +1,364 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from enum import Enum +from typing import Final, Literal + +from ...contracts import REPORT_SCHEMA_VERSION + +RECEIPT_VERSION: Final = "1" +ReceiptFormat = Literal["json", "markdown"] +VALID_RECEIPT_FORMATS: Final[frozenset[str]] = frozenset({"json", "markdown"}) +MAX_HUMAN_DECISION_POINTS: Final = 10 + + +class ReceiptVerdict(str, Enum): + CLEAN = "clean" + INCOMPLETE = "incomplete" + NEEDS_ATTENTION = "needs_attention" + + +class ReceiptPatchStatus(str, Enum): + ACCEPTED = "accepted" + VIOLATED = "violated" + NOT_CHECKED = "not_checked" + + +CLAIMS_NOT_MADE: Final[tuple[dict[str, str], ...]] = ( + { + "claim_type": "security_vulnerability", + "reason": ( + "Security Surfaces are report-only trust-boundary inventory, " + "not vulnerability claims." + ), + }, + { + "claim_type": "baseline_regression", + "reason": ( + "Known baseline debt was not treated as a new regression; " + "novelty='known' remains baseline context." + ), + }, + { + "claim_type": "report_only_ci_failure", + "reason": ("Report-only signals were not treated as CI gate failures."), + }, +) + + +def derive_baseline_status(report_document: Mapping[str, object]) -> str: + meta = _as_mapping(report_document.get("meta")) + baseline = _as_mapping(meta.get("baseline")) + if not bool(baseline.get("loaded", False)): + return "not_loaded" + status = str(baseline.get("status", "")).strip().lower() + if bool(baseline.get("trusted_for_diff", False)) or status == "ok": + return "trusted" + return "untrusted" + + +def derive_patch_status( + *, + gate_result: Mapping[str, object] | None, + intent_check_status: str | None, + regressions: int, + has_structural_delta: bool, +) -> str: + if intent_check_status == "violated": + return ReceiptPatchStatus.VIOLATED.value + if gate_result is not None and bool(gate_result.get("would_fail")): + return ReceiptPatchStatus.VIOLATED.value + if regressions > 0: + return ReceiptPatchStatus.VIOLATED.value + if gate_result is None and intent_check_status is None and not has_structural_delta: + return ReceiptPatchStatus.NOT_CHECKED.value + return ReceiptPatchStatus.ACCEPTED.value + + +def derive_human_decision_points( + *, + changed_findings: Sequence[Mapping[str, object]], + intent_status: str | None, +) -> list[dict[str, object]]: + points: list[dict[str, object]] = [] + for finding in changed_findings: + if str(finding.get("family", "")).strip() == "clone": + points.append( + _decision_point( + category="clone_divergence", + finding_id=str(finding.get("id", "")), + reason=( + "Clone cohort member was in changed scope; " + "confirm divergence is intentional." + ), + ) + ) + if str(finding.get("novelty", "")).strip() == "known": + points.append( + _decision_point( + category="baseline_debt_touched", + finding_id=str(finding.get("id", "")), + reason=( + "Known baseline finding was in changed scope; " + "confirm whether the patch addresses or preserves it." + ), + ) + ) + if intent_status == "expanded": + points.append( + _decision_point( + category="scope_expansion", + finding_id="", + reason=( + "Edit scope expanded beyond declared files; " + "human confirmation is required." + ), + ) + ) + return _numbered_decisions(points[:MAX_HUMAN_DECISION_POINTS]) + + +def derive_claims_not_made( + report_document: Mapping[str, object], +) -> list[dict[str, object]]: + claims: list[dict[str, object]] = [dict(item) for item in CLAIMS_NOT_MADE] + if _suppressed_clone_count(report_document) > 0: + claims.append( + { + "claim_type": "suppressed_clone_regression", + "reason": ( + "Suppressed clone groups were not counted as active new " + "regressions." + ), + } + ) + return claims + + +def receipt_verdict( + *, + reviewed_count: int, + gate_relevant_count: int, + patch_status: str, + human_decision_count: int, +) -> str: + if patch_status == ReceiptPatchStatus.VIOLATED.value: + return ReceiptVerdict.NEEDS_ATTENTION.value + if human_decision_count > 0: + return ReceiptVerdict.NEEDS_ATTENTION.value + if patch_status == ReceiptPatchStatus.NOT_CHECKED.value: + return ReceiptVerdict.INCOMPLETE.value + if gate_relevant_count > 0 and reviewed_count < gate_relevant_count: + return ReceiptVerdict.INCOMPLETE.value + return ReceiptVerdict.CLEAN.value + + +def render_receipt_markdown(receipt: Mapping[str, object]) -> str: + provenance = _as_mapping(receipt.get("provenance")) + scope = _optional_mapping(receipt.get("scope")) + blast_radius = _optional_mapping(receipt.get("blast_radius")) + reviewed = _as_mapping(receipt.get("reviewed_evidence")) + patch = _optional_mapping(receipt.get("patch_contract")) + structural_delta = _as_mapping(receipt.get("structural_delta")) + health = _as_mapping(receipt.get("health")) + decisions = _mapping_rows(receipt.get("human_decision_points")) + claims = _mapping_rows(receipt.get("claims_not_made")) + + lines = [ + "## CodeClone Agent Review Receipt", + "", + f"**Report:** `{provenance.get('report_digest', 'unknown')}`", + ( + f"**Schema:** " + f"`{provenance.get('report_schema_version', REPORT_SCHEMA_VERSION)}`" + ), + f"**Baseline:** {provenance.get('baseline_status', 'unknown')}", + "**Review contract:** v1", + "", + "---", + "", + "### Scope", + ] + if scope is None: + lines.append("No intent declared.") + else: + lines.extend( + [ + f"**Intent:** {scope.get('intent_description') or 'none'}", + f"**Status:** {scope.get('intent_status') or 'unknown'}", + f"**Declared files:** {_inline_paths(scope.get('declared_files'))}", + f"**Changed files:** {_inline_paths(scope.get('changed_files'))}", + f"**Unexpected files:** {_inline_paths(scope.get('unexpected_files'))}", + ] + ) + lines.extend(["", "### Blast Radius"]) + if blast_radius is None: + lines.append("Not available.") + else: + lines.extend( + [ + f"**Level:** {blast_radius.get('radius_level', 'unknown')}", + ( + f"**Direct dependents:** " + f"{blast_radius.get('direct_dependents_count', 0)}" + ), + ( + f"**Clone cohort members:** " + f"{blast_radius.get('clone_cohort_members_count', 0)}" + ), + ( + f"**Do-not-touch entries:** " + f"{blast_radius.get('do_not_touch_count', 0)}" + ), + ] + ) + lines.extend(["", "### Reviewed Evidence"]) + lines.append( + f"**Reviewed:** {reviewed.get('reviewed_count', 0)} / " + f"{reviewed.get('total_gate_relevant', 0)} gate-relevant findings" + ) + for item in _mapping_rows(reviewed.get("items")): + note = item.get("note") + suffix = f" - note: {note}" if note else "" + lines.append( + f"- `{item.get('finding_id', '')}`: {item.get('kind', 'finding')}" + f" ({item.get('severity', 'info')}){suffix}" + ) + if not _mapping_rows(reviewed.get("items")): + lines.append("- none") + lines.extend(["", "### Patch Contract"]) + if patch is None: + lines.append("Not available.") + else: + lines.extend( + [ + f"**Status:** {patch.get('status', 'not_checked')}", + f"**Regressions:** {patch.get('regressions', 0)}", + f"**Improvements:** {patch.get('improvements', 0)}", + f"**Health delta:** {_signed_delta(patch.get('health_delta'))}", + ] + ) + lines.extend( + [ + "", + "### Structural Delta", + f"**Verdict:** {structural_delta.get('verdict', 'stable')}", + f"**Health delta:** {_signed_delta(structural_delta.get('health_delta'))}", + "", + "### Human Decisions Requested", + ] + ) + if decisions: + lines.extend( + f"- **{decision.get('id', '')}:** {decision.get('reason', '')}" + for decision in decisions + ) + else: + lines.append("- none") + lines.extend(["", "### Claims Not Made"]) + lines.extend(f"- {claim.get('reason', '')}" for claim in claims) + lines.extend( + [ + "", + f"**Health:** {health.get('score', 'n/a')}/100 " + f"({health.get('grade', 'n/a')})", + f"**Receipt verdict:** {receipt.get('verdict', 'incomplete')}", + "", + f"*Generated by CodeClone | run: `{provenance.get('run_id', 'unknown')}` | " + f"{receipt.get('generated_at_utc', '')}*", + ] + ) + return "\n".join(lines) + + +def _decision_point( + *, + category: str, + finding_id: str, + reason: str, +) -> dict[str, object]: + return { + "id": "", + "finding_id": finding_id, + "reason": reason, + "category": category, + } + + +def _numbered_decisions( + points: Sequence[Mapping[str, object]], +) -> list[dict[str, object]]: + return [ + { + "id": f"D-{index}", + "finding_id": str(point.get("finding_id", "")), + "reason": str(point.get("reason", "")), + "category": str(point.get("category", "")), + } + for index, point in enumerate(points, start=1) + ] + + +def _suppressed_clone_count(report_document: Mapping[str, object]) -> int: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get("clones")) + suppressed = _as_mapping(clones.get("suppressed")) + return sum( + len(_as_sequence(suppressed.get(kind))) + for kind in ("function", "block", "segment") + ) + + +def _inline_paths(value: object) -> str: + paths = [str(item) for item in _as_sequence(value) if str(item)] + if not paths: + return "none" + return ", ".join(f"`{path}`" for path in paths) + + +def _signed_delta(value: object) -> str: + if isinstance(value, int): + return f"{value:+d}" + return "n/a" + + +def _optional_mapping(value: object) -> Mapping[str, object] | None: + return value if isinstance(value, Mapping) else None + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () + + +def _mapping_rows(value: object) -> list[Mapping[str, object]]: + return [_as_mapping(item) for item in _as_sequence(value)] + + +__all__ = [ + "CLAIMS_NOT_MADE", + "MAX_HUMAN_DECISION_POINTS", + "RECEIPT_VERSION", + "VALID_RECEIPT_FORMATS", + "ReceiptFormat", + "ReceiptPatchStatus", + "ReceiptVerdict", + "derive_baseline_status", + "derive_claims_not_made", + "derive_human_decision_points", + "derive_patch_status", + "receipt_verdict", + "render_receipt_markdown", +] diff --git a/codeclone/surfaces/mcp/_session_review_receipt_mixin.py b/codeclone/surfaces/mcp/_session_review_receipt_mixin.py new file mode 100644 index 00000000..aba10d16 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_review_receipt_mixin.py @@ -0,0 +1,387 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections import OrderedDict +from collections.abc import Mapping + +from ...contracts import REPORT_SCHEMA_VERSION +from ...utils.coerce import as_int as _coerce_int +from . import _session_helpers as _helpers +from ._intent import IntentRecord +from ._review_receipt import ( + RECEIPT_VERSION, + VALID_RECEIPT_FORMATS, + derive_baseline_status, + derive_claims_not_made, + derive_human_decision_points, + derive_patch_status, + receipt_verdict, + render_receipt_markdown, +) +from ._session_patch_contract_mixin import _MCPSessionPatchContractMixin +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPRunRecord, + MCPServiceContractError, +) + + +class _MCPSessionReviewReceiptMixin(_MCPSessionPatchContractMixin): + _runs: CodeCloneMCPRunStore + _active_intents: dict[str, IntentRecord] + _review_state: dict[str, OrderedDict[str, str | None]] + _last_gate_results: dict[str, dict[str, object]] + + def create_review_receipt( + self, + *, + run_id: str | None = None, + intent_id: str | None = None, + format: str = "markdown", + include_blast_radius: bool = True, + include_patch_contract: bool = True, + ) -> dict[str, object]: + output_format = self._validated_receipt_format(format) + record = self._runs.get(run_id) + intent = self._receipt_intent(record=record, intent_id=intent_id) + changed_paths = self._receipt_changed_paths(record=record, intent=intent) + changed_findings = self._receipt_changed_findings( + record=record, + changed_paths=changed_paths, + ) + structural_delta = self._receipt_structural_delta(record) + reviewed_evidence = self._reviewed_evidence(record) + patch_contract = ( + self._receipt_patch_contract( + record=record, + intent=intent, + structural_delta=structural_delta, + changed_paths=changed_paths, + ) + if include_patch_contract + else None + ) + human_decisions = derive_human_decision_points( + changed_findings=changed_findings, + intent_status=self._intent_status(intent), + ) + patch_status = ( + str(patch_contract.get("status", "not_checked")) + if patch_contract is not None + else "not_checked" + ) + receipt: dict[str, object] = { + "receipt_version": RECEIPT_VERSION, + "generated_at_utc": self._receipt_generated_at(record), + "provenance": self._receipt_provenance(record), + "scope": self._receipt_scope(intent), + "blast_radius": ( + self._receipt_blast_radius(intent) if include_blast_radius else None + ), + "reviewed_evidence": reviewed_evidence, + "patch_contract": patch_contract, + "structural_delta": structural_delta, + "human_decision_points": human_decisions, + "claims_not_made": derive_claims_not_made(record.report_document), + "health": self._receipt_health(record), + "verdict": receipt_verdict( + reviewed_count=_coerce_int(reviewed_evidence.get("reviewed_count")), + gate_relevant_count=_coerce_int( + reviewed_evidence.get("total_gate_relevant") + ), + patch_status=patch_status, + human_decision_count=len(human_decisions), + ), + } + if output_format == "json": + return receipt + return { + "run_id": _helpers._short_run_id(record.run_id), + "format": output_format, + "content": render_receipt_markdown(receipt), + "receipt": receipt, + } + + def _validated_receipt_format(self, value: str) -> str: + if value not in VALID_RECEIPT_FORMATS: + expected = ", ".join(sorted(VALID_RECEIPT_FORMATS)) + raise MCPServiceContractError( + f"Invalid value for format: {value!r}. Expected one of: {expected}." + ) + return "json" if value == "json" else "markdown" + + def _receipt_intent( + self, + *, + record: MCPRunRecord, + intent_id: str | None, + ) -> IntentRecord | None: + intent_record: MCPRunRecord | None = None + intent: IntentRecord | None + if intent_id is not None: + intent_record, intent = self._resolve_intent( + run_id=None, + intent_id=intent_id, + ) + else: + intent = self._optional_intent(record=record, intent_id=None) + if intent is not None and intent.run_id != record.run_id: + intent_record = intent_record or self._runs.get(intent.run_id) + if intent_record.root != record.root: + raise MCPServiceContractError( + "Receipt intent must belong to the selected run or the same root." + ) + return intent + + def _receipt_changed_paths( + self, + *, + record: MCPRunRecord, + intent: IntentRecord | None, + ) -> tuple[str, ...]: + if intent is not None and intent.check_result is not None: + return tuple(intent.check_result.actual_changed_files) + return tuple(record.changed_paths) + + def _receipt_changed_findings( + self, + *, + record: MCPRunRecord, + changed_paths: tuple[str, ...], + ) -> list[dict[str, object]]: + if not changed_paths: + return [] + findings = self._base_findings(record) + return [ + finding + for finding in findings + if self._finding_touches_paths( + finding=finding, + changed_paths=changed_paths, + ) + ] + + def _receipt_provenance(self, record: MCPRunRecord) -> dict[str, object]: + return { + "report_digest": self._receipt_digest(record), + "report_schema_version": REPORT_SCHEMA_VERSION, + "baseline_status": derive_baseline_status(record.report_document), + "run_id": _helpers._short_run_id(record.run_id), + "root": str(record.root), + } + + def _receipt_digest(self, record: MCPRunRecord) -> str: + integrity = _helpers._as_mapping(record.report_document.get("integrity")) + digest = _helpers._as_mapping(integrity.get("digest")) + algorithm = str(digest.get("algorithm", "sha256")).strip() or "sha256" + return f"{algorithm}:{_helpers._report_digest(record.report_document)}" + + def _receipt_generated_at(self, record: MCPRunRecord) -> str: + meta = _helpers._as_mapping(record.report_document.get("meta")) + value = str(meta.get("report_generated_at_utc", "")).strip() + if value: + return value + runtime = _helpers._as_mapping(meta.get("runtime")) + value = str(runtime.get("report_generated_at_utc", "")).strip() + if value: + return value + return str(record.summary.get("analysis_started_at_utc", "")).strip() + + def _receipt_scope(self, intent: IntentRecord | None) -> dict[str, object] | None: + if intent is None: + return None + check = intent.check_result + return { + "intent_id": intent.intent_id, + "intent_status": self._intent_status(intent), + "intent_description": intent.intent_description, + "declared_files": list(intent.scope.allowed_files), + "changed_files": list(check.actual_changed_files) if check else [], + "unexpected_files": list(check.unexpected_files) if check else [], + "forbidden_touched": list(check.forbidden_touched) if check else [], + } + + def _intent_status(self, intent: IntentRecord | None) -> str | None: + if intent is None: + return None + if intent.check_result is not None: + return intent.check_result.status.value + return intent.status.value + + def _receipt_blast_radius( + self, + intent: IntentRecord | None, + ) -> dict[str, object] | None: + if intent is None or not intent.blast_radius_summary: + return None + summary = intent.blast_radius_summary + return { + "radius_level": summary.get("radius_level", "unknown"), + "direct_dependents_count": _coerce_int( + summary.get("direct_dependents_count") + ), + "clone_cohort_members_count": _coerce_int( + summary.get("clone_cohort_members_count") + ), + "do_not_touch_count": _coerce_int(summary.get("do_not_touch_count")), + } + + def _reviewed_evidence(self, record: MCPRunRecord) -> dict[str, object]: + findings = self._base_findings(record) + gate_relevant = [ + finding + for finding in findings + if str(finding.get("novelty", "")) == "new" + or str(finding.get("severity", "")) in {"critical", "warning"} + ] + with self._state_lock: + review_items = tuple( + self._review_state.get(record.run_id, OrderedDict()).items() + ) + items: list[dict[str, object]] = [] + for canonical_id, note in review_items: + finding = self._finding_by_id(record=record, canonical_id=canonical_id) + if finding is None: + continue + summary = self._finding_summary_card(record, finding) + items.append( + { + "finding_id": self._short_finding_id(record, canonical_id), + "kind": str(summary.get("kind") or "finding"), + "severity": str(summary.get("severity") or "info"), + "note": note, + } + ) + return { + "total_gate_relevant": len(gate_relevant), + "reviewed_count": len(items), + "items": items, + } + + def _finding_by_id( + self, + *, + record: MCPRunRecord, + canonical_id: str, + ) -> dict[str, object] | None: + for finding in self._base_findings(record): + if str(finding.get("id", "")) == canonical_id: + return finding + return None + + def _receipt_structural_delta(self, record: MCPRunRecord) -> dict[str, object]: + previous = self._previous_run_for_root(record) + if previous is None: + return { + "available": False, + "regressions": 0, + "improvements": 0, + "health_delta": None, + "verdict": "not_available", + } + compare_payload = self.compare_runs( + run_id_before=previous.run_id, + run_id_after=record.run_id, + focus="all", + ) + return { + "available": bool(compare_payload.get("comparable")), + "regressions": len( + _helpers._as_sequence(compare_payload.get("regressions")) + ), + "improvements": len( + _helpers._as_sequence(compare_payload.get("improvements")) + ), + "health_delta": compare_payload.get("health_delta"), + "verdict": str(compare_payload.get("verdict", "stable")), + } + + def _receipt_patch_contract( + self, + *, + record: MCPRunRecord, + intent: IntentRecord | None, + structural_delta: Mapping[str, object], + changed_paths: tuple[str, ...], + ) -> dict[str, object]: + with self._state_lock: + gate_result = self._last_gate_results.get(record.run_id) + gate_payload = dict(gate_result) if gate_result is not None else None + regressions = _coerce_int(structural_delta.get("regressions")) + intent_check_status = ( + intent.check_result.status.value + if intent is not None and intent.check_result is not None + else None + ) + baseline_abuse = self._receipt_baseline_abuse_detected( + record=record, + regressions=regressions, + changed_files=len(changed_paths), + ) + contract_violations = self._receipt_contract_violations( + gate_result=gate_payload, + intent_check_status=intent_check_status, + regressions=regressions, + baseline_abuse=baseline_abuse, + ) + return { + "status": derive_patch_status( + gate_result=gate_payload, + intent_check_status=intent_check_status, + regressions=regressions, + has_structural_delta=bool(structural_delta.get("available")), + ), + "regressions": regressions, + "improvements": _coerce_int(structural_delta.get("improvements")), + "health_delta": structural_delta.get("health_delta"), + "contract_violations": contract_violations, + "baseline_abuse_detected": baseline_abuse, + } + + def _receipt_baseline_abuse_detected( + self, + *, + record: MCPRunRecord, + regressions: int, + changed_files: int, + ) -> bool: + meta = _helpers._as_mapping(record.report_document.get("meta")) + baseline = _helpers._as_mapping(meta.get("baseline")) + return str(baseline.get("status", "")).strip() == "updated" and ( + regressions > 0 or changed_files > 0 + ) + + def _receipt_contract_violations( + self, + *, + gate_result: Mapping[str, object] | None, + intent_check_status: str | None, + regressions: int, + baseline_abuse: bool, + ) -> list[str]: + violations: list[str] = [] + if regressions > 0: + violations.append("structural_regressions") + if gate_result is not None and bool(gate_result.get("would_fail")): + violations.append("gate_failures") + if intent_check_status == "violated": + violations.append("scope_violation") + if baseline_abuse: + violations.append("baseline_abuse") + return violations + + def _receipt_health(self, record: MCPRunRecord) -> dict[str, object]: + health = _helpers._summary_health_payload(record.summary) + return { + "score": health.get("score"), + "grade": health.get("grade"), + "delta": _helpers._summary_health_delta(record.summary), + } + + +__all__ = ["_MCPSessionReviewReceiptMixin"] diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 7616ea83..6cfdb421 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -406,6 +406,33 @@ def check_patch_contract( changed_files=changed_files, ) + @tool( + title="Create Review Receipt", + description=( + "Generate a deterministic, auditable review receipt from stored " + "MCP state: report provenance, intent scope, blast radius, " + "reviewed findings, patch contract status, human decision points, " + "and claims-not-made. Output markdown or JSON without mutating " + "repository state." + ), + annotations=read_only_tool, + structured_output=True, + ) + def create_review_receipt( + run_id: str | None = None, + intent_id: str | None = None, + format: str = "markdown", + include_blast_radius: bool = True, + include_patch_contract: bool = True, + ) -> dict[str, object]: + return service.create_review_receipt( + run_id=run_id, + intent_id=intent_id, + format=format, + include_blast_radius=include_blast_radius, + include_patch_contract=include_patch_contract, + ) + @tool( title="Help", description=( diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index be72ee3a..cda1d6e5 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -70,6 +70,12 @@ def check_patch_contract( ) -> dict[str, object]: return self._run_dict("check_patch_contract", **params) + def create_review_receipt( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("create_review_receipt", **params) + def generate_pr_summary( self: _RunDictService, **params: object, @@ -239,6 +245,13 @@ def _apply_public_method_signatures() -> None: _kwonly("diff_ref", "str | None", None), _kwonly("changed_files", "Sequence[str] | None", None), ), + "create_review_receipt": ( + _kwonly("run_id", "str | None", None), + _kwonly("intent_id", "str | None", None), + _kwonly("format", "str", "markdown"), + _kwonly("include_blast_radius", "bool", True), + _kwonly("include_patch_contract", "bool", True), + ), "manage_change_intent": ( _kwonly("action", "str"), _kwonly("run_id", "str | None", None), diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 89cc276c..29b6cdf4 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -16,7 +16,7 @@ resolve_clone_baseline_state, resolve_metrics_baseline_state, ) -from ._session_patch_contract_mixin import _MCPSessionPatchContractMixin +from ._session_review_receipt_mixin import _MCPSessionReviewReceiptMixin from ._session_shared import ( _REPORT_DUMMY_PATH, DEFAULT_BLOCK_MIN_LOC, @@ -77,7 +77,7 @@ ] -class MCPSession(_MCPSessionPatchContractMixin): +class MCPSession(_MCPSessionReviewReceiptMixin): def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._runs = CodeCloneMCPRunStore(history_limit=history_limit) self._state_lock = RLock() diff --git a/docs/README-pypi.md b/docs/README-pypi.md index 8ce5cc3e..a2cbaccb 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -30,7 +30,8 @@ CodeClone is a structural change controller for Python. The v2.1 alpha starts before the first edit — when an agent declares what it intends to change — maps the structural blast radius, and verifies explicit before/after runs -against the patch contract. Receipt and claim-guard tools are planned next. +against the patch contract. It also generates auditable review receipts; the +claim-guard tool is planned next. The same analysis pipeline powers CLI reports, CI checks, the MCP server, and native IDE/agent clients — so humans and AI agents operate on identical, @@ -48,7 +49,7 @@ When an AI agent edits code, CodeClone governs the structural boundary: 1. **Declare intent** — agent states what it plans to change, which files, and why 2. **Map blast radius** — reverse imports, clone cohorts, dependency cycles, do-not-touch signals 3. **Check patch contract** — pre-edit regression budget and post-edit boundary verification -4. **Generate receipt** — planned auditable artifact: intent + scope + patch status + structural delta +4. **Generate receipt** — auditable artifact: intent + scope + patch status + structural delta 5. **Validate claims** — planned cross-check of review text against the canonical report Each step is deterministic — structural facts, no LLM inference. @@ -61,7 +62,7 @@ Docs: { ]); assert.equal(manifest.documentation, "https://orenlab.github.io/codeclone/claude-desktop-bundle/"); assert.equal(manifest.tools_generated, true); - assert.equal(manifest.tools.length, 24); + assert.equal(manifest.tools.length, 25); assert.equal("instructions" in manifest, false); }); diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index 2ca786c9..2d52350f 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -1007,6 +1007,54 @@ "type": "object" } }, + { + "name": "create_review_receipt", + "input_schema": { + "properties": { + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + }, + "intent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Intent Id" + }, + "format": { + "default": "markdown", + "title": "Format", + "type": "string" + }, + "include_blast_radius": { + "default": true, + "title": "Include Blast Radius", + "type": "boolean" + }, + "include_patch_contract": { + "default": true, + "title": "Include Patch Contract", + "type": "boolean" + } + }, + "title": "create_review_receiptArguments", + "type": "object" + } + }, { "name": "evaluate_gates", "input_schema": { diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index b832bff2..25f8f607 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -126,6 +126,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "get_production_triage", "get_blast_radius", "check_patch_contract", + "create_review_receipt", "evaluate_gates", "get_report_section", "list_findings", @@ -159,6 +160,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "get_production_triage", "get_blast_radius", "check_patch_contract", + "create_review_receipt", "evaluate_gates", "help", "get_report_section", @@ -202,6 +204,8 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: assert "structural risk boundary" in str(tools["get_blast_radius"].description) assert "review-only context" in str(tools["get_blast_radius"].description) assert "mode='budget'" in str(tools["check_patch_contract"].description) + assert "auditable review receipt" in str(tools["create_review_receipt"].description) + assert "claims-not-made" in str(tools["create_review_receipt"].description) assert "Intent is session-local" in str(tools["manage_change_intent"].description) assert "bounded guidance, not a full manual" in str(tools["help"].description) assert "workflow, analysis_profile, suppressions, baseline" in str( @@ -608,6 +612,18 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) ) ) + receipt = _structured_tool_result( + asyncio.run( + server.call_tool( + "create_review_receipt", + { + "run_id": run_id, + "intent_id": intent_id, + "format": "markdown", + }, + ) + ) + ) assert complexity["check"] == "complexity" assert cast(int, clones["total"]) >= 1 assert coupling["check"] == "coupling" @@ -622,6 +638,18 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: assert reviewed_finding["priority"] == summary_finding["priority"] assert reviewed_finding["locations"] == summary_finding["locations"] assert "## CodeClone Summary" in str(pr_summary["content"]) + assert receipt["format"] == "markdown" + assert "## CodeClone Agent Review Receipt" in str(receipt["content"]) + receipt_payload = cast("dict[str, object]", receipt["receipt"]) + assert cast("dict[str, object]", receipt_payload["scope"])["intent_id"] == ( + intent_id + ) + assert ( + cast("dict[str, object]", receipt_payload["reviewed_evidence"])[ + "reviewed_count" + ] + == 1 + ) run_summary_resource = list( asyncio.run(server.read_resource(f"codeclone://runs/{run_id}/summary")) diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 2c9be6ed..bdff4b89 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -21,6 +21,7 @@ import codeclone.surfaces.mcp._blast_radius as mcp_blast_radius_mod import codeclone.surfaces.mcp._patch_contract as mcp_patch_contract_mod +import codeclone.surfaces.mcp._review_receipt as mcp_review_receipt_mod import codeclone.surfaces.mcp._session_baseline as mcp_baseline_mod import codeclone.surfaces.mcp._session_finding_mixin as mcp_finding_mod import codeclone.surfaces.mcp._session_helpers as mcp_helpers_mod @@ -324,7 +325,13 @@ def _patch_contract_report_document( baseline_status: str = "ok", ) -> dict[str, object]: report_document = copy.deepcopy(_blast_radius_report_document(digest)) - report_document["meta"] = {"baseline": {"status": baseline_status}} + report_document["meta"] = { + "baseline": { + "loaded": bool(baseline_status), + "status": baseline_status, + }, + "runtime": {"report_generated_at_utc": "2026-05-23T12:00:00Z"}, + } findings = cast("dict[str, object]", report_document["findings"]) groups = cast("dict[str, object]", findings["groups"]) clones = cast("dict[str, object]", groups["clones"]) @@ -374,6 +381,56 @@ def _patch_contract_run_record( ) +def _patch_contract_before_after_records( + root: Path, + *, + before_health: int, +) -> tuple[MCPRunRecord, MCPRunRecord]: + before = _patch_contract_run_record( + root, + run_id="before1234567890", + digest="before-digest", + include_regression=False, + complexity=6, + health=before_health, + ) + after = _patch_contract_run_record( + root, + run_id="after1234567890", + digest="after-digest", + include_regression=True, + complexity=14, + health=70, + baseline_status="updated", + new_func=frozenset({"clone:function:g2"}), + ) + return before, after + + +def _declare_pkg_a_intent(service: CodeCloneMCPService) -> dict[str, object]: + return service.manage_change_intent( + action="declare", + run_id="before12", + scope={"allowed_files": ["pkg/a.py"]}, + intent="adjust pkg.a behavior", + expected_effects=["no new clone group"], + ) + + +def _seed_patch_contract_intent( + service: CodeCloneMCPService, + root: Path, + *, + before_health: int, +) -> tuple[MCPRunRecord, dict[str, object]]: + before, after = _patch_contract_before_after_records( + root, + before_health=before_health, + ) + service._runs.register(before) + return after, _declare_pkg_a_intent(service) + + def _payload_dicts( payload: Mapping[str, object], keys: tuple[str, ...], @@ -2552,31 +2609,10 @@ def test_mcp_service_check_patch_contract_verify_composes_existing_primitives( tmp_path: Path, ) -> None: service = CodeCloneMCPService(history_limit=4) - before = _patch_contract_run_record( + after, declared = _seed_patch_contract_intent( + service, tmp_path, - run_id="before1234567890", - digest="before-digest", - include_regression=False, - complexity=6, - health=85, - ) - after = _patch_contract_run_record( - tmp_path, - run_id="after1234567890", - digest="after-digest", - include_regression=True, - complexity=14, - health=70, - baseline_status="updated", - new_func=frozenset({"clone:function:g2"}), - ) - service._runs.register(before) - declared = service.manage_change_intent( - action="declare", - run_id="before12", - scope={"allowed_files": ["pkg/a.py"]}, - intent="adjust pkg.a behavior", - expected_effects=["no new clone group"], + before_health=85, ) service._runs.register(after) @@ -2649,14 +2685,276 @@ def test_mcp_service_check_patch_contract_verify_composes_existing_primitives( before_run_id="before12", after_run_id="missing", ) - assert no_before["status"] == "unverified" - assert no_before["reason"] == "no_before_run" - assert no_after["status"] == "unverified" - assert no_after["reason"] == "no_after_run" - assert unknown_before["status"] == "unverified" - assert unknown_before["reason"] == "no_before_run" - assert unknown_after["status"] == "unverified" - assert unknown_after["reason"] == "no_after_run" + unverified_cases = [ + (no_before, "no_before_run"), + (no_after, "no_after_run"), + (unknown_before, "no_before_run"), + (unknown_after, "no_after_run"), + ] + for payload, reason in unverified_cases: + assert payload["status"] == "unverified" + assert payload["reason"] == reason + + +def test_mcp_review_receipt_helpers_are_bounded_and_contract_aware() -> None: + assert ( + mcp_review_receipt_mod.derive_baseline_status( + {"meta": {"baseline": {"loaded": True, "status": "ok"}}} + ) + == "trusted" + ) + assert ( + mcp_review_receipt_mod.derive_baseline_status( + {"meta": {"baseline": {"loaded": True, "status": "integrity_failed"}}} + ) + == "untrusted" + ) + assert mcp_review_receipt_mod.derive_baseline_status({"meta": {}}) == "not_loaded" + + assert ( + mcp_review_receipt_mod.derive_patch_status( + gate_result={"would_fail": False}, + intent_check_status="clean", + regressions=0, + has_structural_delta=True, + ) + == "accepted" + ) + assert ( + mcp_review_receipt_mod.derive_patch_status( + gate_result=None, + intent_check_status="violated", + regressions=0, + has_structural_delta=False, + ) + == "violated" + ) + assert ( + mcp_review_receipt_mod.derive_patch_status( + gate_result={"would_fail": True}, + intent_check_status="clean", + regressions=0, + has_structural_delta=False, + ) + == "violated" + ) + assert ( + mcp_review_receipt_mod.derive_patch_status( + gate_result=None, + intent_check_status=None, + regressions=1, + has_structural_delta=True, + ) + == "violated" + ) + assert ( + mcp_review_receipt_mod.derive_patch_status( + gate_result=None, + intent_check_status=None, + regressions=0, + has_structural_delta=False, + ) + == "not_checked" + ) + + decisions = mcp_review_receipt_mod.derive_human_decision_points( + changed_findings=[ + { + "id": f"clone:function:{index}", + "family": "clone", + "novelty": "known", + } + for index in range(12) + ], + intent_status="expanded", + ) + assert len(decisions) == mcp_review_receipt_mod.MAX_HUMAN_DECISION_POINTS + assert decisions[0] == { + "id": "D-1", + "finding_id": "clone:function:0", + "reason": ( + "Clone cohort member was in changed scope; " + "confirm divergence is intentional." + ), + "category": "clone_divergence", + } + + claims = mcp_review_receipt_mod.derive_claims_not_made( + _blast_radius_report_document() + ) + assert [claim["claim_type"] for claim in claims] == [ + "security_vulnerability", + "baseline_regression", + "report_only_ci_failure", + "suppressed_clone_regression", + ] + assert ( + mcp_review_receipt_mod.receipt_verdict( + reviewed_count=1, + gate_relevant_count=1, + patch_status="accepted", + human_decision_count=0, + ) + == "clean" + ) + assert ( + mcp_review_receipt_mod.receipt_verdict( + reviewed_count=0, + gate_relevant_count=1, + patch_status="accepted", + human_decision_count=0, + ) + == "incomplete" + ) + assert ( + mcp_review_receipt_mod.receipt_verdict( + reviewed_count=1, + gate_relevant_count=1, + patch_status="violated", + human_decision_count=0, + ) + == "needs_attention" + ) + + +def test_mcp_service_create_review_receipt_minimal_and_deterministic( + tmp_path: Path, +) -> None: + service = CodeCloneMCPService(history_limit=2) + record = _patch_contract_run_record( + tmp_path, + run_id="receipt1234567890", + digest="receipt-digest", + include_regression=False, + complexity=6, + health=92, + ) + service._runs.register(record) + + first = service.create_review_receipt(run_id="receipt12", format="json") + second = service.create_review_receipt(run_id="receipt12", format="json") + + assert first == second + assert first["receipt_version"] == mcp_review_receipt_mod.RECEIPT_VERSION + assert first["generated_at_utc"] == "2026-05-23T12:00:00Z" + provenance = cast("dict[str, object]", first["provenance"]) + assert provenance["report_digest"] == "sha256:receipt-digest" + assert provenance["baseline_status"] == "trusted" + assert first["scope"] is None + assert first["blast_radius"] is None + assert cast("dict[str, object]", first["patch_contract"])["status"] == ( + "not_checked" + ) + assert cast("dict[str, object]", first["structural_delta"])["available"] is False + assert cast("dict[str, object]", first["reviewed_evidence"]) == { + "total_gate_relevant": 1, + "reviewed_count": 0, + "items": [], + } + assert first["verdict"] == "incomplete" + assert copy.deepcopy(record.report_document) == record.report_document + + compact = service.create_review_receipt( + run_id="receipt12", + format="json", + include_blast_radius=False, + include_patch_contract=False, + ) + assert compact["blast_radius"] is None + assert compact["patch_contract"] is None + assert compact["verdict"] == "incomplete" + + markdown = service.create_review_receipt(run_id="receipt12") + assert markdown["run_id"] == "receipt1" + assert markdown["format"] == "markdown" + assert "## CodeClone Agent Review Receipt" in str(markdown["content"]) + assert "No intent declared." in str(markdown["content"]) + + with pytest.raises(MCPServiceContractError, match="Invalid value for format"): + service.create_review_receipt(run_id="receipt12", format="yaml") + + +def test_mcp_service_create_review_receipt_full_post_edit_workflow( + tmp_path: Path, +) -> None: + service = CodeCloneMCPService(history_limit=4) + after, declared = _seed_patch_contract_intent( + service, + tmp_path, + before_health=88, + ) + intent_id = str(declared["intent_id"]) + intent_check = service.manage_change_intent( + action="check", + intent_id=intent_id, + changed_files=["pkg/a.py"], + ) + service._runs.register(after) + reviewed = service.mark_finding_reviewed( + run_id="after12", + finding_id="clone:function:g1", + note="reviewed clone split", + ) + + receipt = service.create_review_receipt( + run_id="after12", + intent_id=intent_id, + format="json", + ) + provenance, scope, blast, reviewed_evidence, patch, delta, health = _payload_dicts( + receipt, + ( + "provenance", + "scope", + "blast_radius", + "reviewed_evidence", + "patch_contract", + "structural_delta", + "health", + ), + ) + decisions = cast("list[dict[str, object]]", receipt["human_decision_points"]) + claims = cast("list[dict[str, object]]", receipt["claims_not_made"]) + + assert intent_check["status"] == "clean" + assert reviewed["reviewed"] is True + assert provenance["report_digest"] == "sha256:after-digest" + assert provenance["baseline_status"] == "untrusted" + assert scope["intent_id"] == intent_id + assert scope["intent_status"] == "clean" + assert scope["declared_files"] == ["pkg/a.py"] + assert scope["changed_files"] == ["pkg/a.py"] + assert blast["radius_level"] == "medium" + assert blast["direct_dependents_count"] == 1 + assert reviewed_evidence["reviewed_count"] == 1 + assert cast("list[dict[str, object]]", reviewed_evidence["items"])[0] == { + "finding_id": "fn:g1", + "kind": "function_clone", + "severity": "info", + "note": "reviewed clone split", + } + assert delta["available"] is True + assert delta["regressions"] == 1 + assert patch["status"] == "violated" + assert patch["regressions"] == 1 + assert patch["contract_violations"] == [ + "structural_regressions", + "baseline_abuse", + ] + assert patch["baseline_abuse_detected"] is True + assert decisions[0]["category"] == "clone_divergence" + assert claims[0]["claim_type"] == "security_vulnerability" + assert health == {"score": 70, "grade": "B", "delta": 0} + assert receipt["verdict"] == "needs_attention" + + markdown = service.create_review_receipt( + run_id="after12", + intent_id=intent_id, + format="markdown", + ) + content = str(markdown["content"]) + assert "**Status:** violated" in content + assert "reviewed clone split" in content def test_mcp_service_branch_helpers_on_real_runs( From 3f8370705246c0097e08c7e72e052396c6345799 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 24 May 2026 15:16:01 +0500 Subject: [PATCH 011/318] feat(mcp): add workspace intent registry Add disk-backed workspace intent records for multi-agent coordination, harden patch-contract payload semantics, and pin active intent runs so verification does not lose its before-run. --- AGENTS.md | 16 +- CHANGELOG.md | 11 +- CLAUDE.md | 40 ++ README.md | 34 +- codeclone/surfaces/mcp/_intent.py | 1 + codeclone/surfaces/mcp/_patch_contract.py | 41 +- .../surfaces/mcp/_session_intent_mixin.py | 263 ++++++- codeclone/surfaces/mcp/_session_shared.py | 100 ++- .../surfaces/mcp/_session_state_mixin.py | 23 + codeclone/surfaces/mcp/_workspace_intents.py | 639 ++++++++++++++++++ codeclone/surfaces/mcp/server.py | 28 +- codeclone/surfaces/mcp/service.py | 2 + codeclone/surfaces/mcp/session.py | 6 + docs/book/20-mcp-interface.md | 20 +- docs/book/24-structural-change-controller.md | 59 +- docs/mcp.md | 47 +- .../skills/codeclone-change-control/SKILL.md | 55 +- .../contract_snapshots/mcp_tool_schemas.json | 24 + tests/test_mcp_server.py | 8 +- tests/test_mcp_service.py | 149 +++- tests/test_workspace_intents.py | 199 ++++++ 21 files changed, 1667 insertions(+), 98 deletions(-) create mode 100644 CLAUDE.md create mode 100644 codeclone/surfaces/mcp/_workspace_intents.py create mode 100644 tests/test_workspace_intents.py diff --git a/AGENTS.md b/AGENTS.md index 4bf90480..527b57ec 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -66,8 +66,10 @@ Key artifacts: `codeclone-mcp` - `plugins/codeclone/` + `.agents/plugins/marketplace.json` — stable Codex plugin as a native local discovery layer over `codeclone-mcp`, with a bundled CodeClone review skill -- MCP runs are in-memory only; review markers and change intents are - session-local and must never leak into baseline/cache/report artifacts +- MCP runs are in-memory only. Review markers are session-local. Change intent + truth is session-local, with optional ephemeral workspace coordination records + under `.cache/codeclone/intents/`; none of this may leak into + baseline/cache/report artifacts. - `docs/`, `mkdocs.yml`, `.github/workflows/docs.yml` — published documentation site and docs build pipeline --- @@ -232,8 +234,9 @@ Reports come in: MCP is a separate optional interface, not a report format. It must remain a read-only agent layer over the same canonical report/baseline/cache contracts. -Session review markers and change intents are allowed only as ephemeral MCP -process state. +Session review markers and change intent truth are ephemeral MCP process state. +Workspace intent registry files under `.cache/codeclone/intents/` are advisory +coordination state only, not analysis cache or report truth. ### Report invariants @@ -359,7 +362,7 @@ Before cutting a release: - Don’t embed suppressions into baseline unless explicitly designed as a versioned contract. - Don’t introduce nondeterministic ordering (dict iteration, set ordering, filesystem traversal without sort). - Don’t make the base `codeclone` install depend on optional MCP runtime packages. -- Don’t let MCP mutate baselines, source files, or repo state. +- Don’t let MCP mutate baselines, source files, reports, or analysis cache data. - Don’t let MCP re-synthesize design findings from raw metrics; read canonical `findings.groups.design` only. --- @@ -451,7 +454,8 @@ Use this map to route changes to the right owner module. - `codeclone/report/*.py` (other modules) — deterministic report support slices such as explainability, suggestions, merge, overview, findings helpers, and source-kind routing. - `codeclone/surfaces/mcp/service.py` — typed, in-process MCP service over the current pipeline/report contracts; - keep it deterministic and read-only except for session-local in-memory markers. + keep it deterministic and read-only except for session-local state and + ephemeral workspace intent records under `.cache/codeclone/intents/`. - `codeclone/surfaces/mcp/server.py` — optional MCP launcher/server wiring, transport config, and MCP tool/resource registration; keep dependency loading lazy so base installs/CI do not require MCP runtime packages. - `tests/test_mcp_service.py`, `tests/test_mcp_server.py` — MCP contract and integration tests; run these when diff --git a/CHANGELOG.md b/CHANGELOG.md index df87453d..89d15556 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ - Add MCP `manage_change_intent` for session-local change intent lifecycle: declare intended scope, inspect active intent, check actual changed files against scope, and clear intent state. +- Add a workspace intent registry under `.cache/codeclone/intents/` so separate + MCP stdio processes can see advisory multi-agent edit intents before + declaring overlapping scope. - Add MCP `check_patch_contract` with read-only `budget` and `verify` modes: pre-edit gate budget/headroom, post-edit before/after comparison, gate preview, intent-scope validation, and baseline-abuse signals. @@ -25,7 +28,13 @@ - Keep intent and blast-radius cache state in MCP process memory only; they do not mutate source files, baselines, cache artifacts, reports, or canonical - report integrity. + report integrity. Workspace intent files are ephemeral coordination state, + not analysis cache or report truth. +- Keep patch-contract budget payloads explicit: disabled numeric thresholds are + `null` in MCP payloads, and boolean enforcement policies use `forbid_*` + names. +- Pin MCP runs referenced by active change intents so bounded run-history + pruning cannot drop the declared before-run before verification. - Mark the package as `2.1.0a1` with the PyPI alpha classifier while v2.1 controller features are under development. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..4de09c6e --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,40 @@ +# CodeClone Project Rules + +## Change control workflow + +This repository uses CodeClone MCP for structural change control. +Before editing any files, follow this workflow: + +1. Check workspace: `manage_change_intent(action="list_workspace", + root="")` + — if other agents have active intents, review their scope +2. Run analysis: `analyze_repository(root="")` +3. Declare intent: `manage_change_intent(action="declare", scope={...})` + — if `concurrent_intents` is non-empty, narrow scope or ask the user +4. Check blast radius: `get_blast_radius(files=[...])` +5. Check budget: `check_patch_contract(mode="budget")` +6. Edit files within declared scope only +7. Re-run analysis: `analyze_repository(root="")` +8. Verify: `manage_change_intent(action="check", ...)` then + `check_patch_contract(mode="verify")` +9. Clear intent: `manage_change_intent(action="clear")` + +### Rules + +- Never edit files without declaring intent first. +- Never silently expand scope — redeclare with expanded scope. +- Treat `do_not_touch` as a hard boundary. +- Treat `review_context` as context, not an edit ban. +- Do not update baselines, cache, or generated reports as part of a + functional change. +- If `list_workspace` shows another agent working on overlapping files, + stop and coordinate with the user before proceeding. +- CodeClone is the source of truth — do not reinterpret findings. + +### When to skip + +Skip this workflow only when: + +- No repository files will be changed (read-only tasks, specs only) +- CodeClone MCP is not available +- The user explicitly asks for analysis only diff --git a/README.md b/README.md index 87c126ff..ea17964c 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,9 @@ governance for AI coding agents. In the current v2.1 alpha, CodeClone records the declared intent before the first edit, maps the structural blast radius, verifies explicit before/after runs against the patch contract, and -generates auditable review receipts. The claim-guard tool is planned next. +generates auditable review receipts. It also exposes an advisory workspace intent registry so +parallel agents can see overlapping edit scopes before they start. The claim-guard tool is +planned next. **One canonical analysis, many surfaces.** CLI, HTML reports, IDE, and MCP all read the same deterministic facts — for both human reviewers and AI agents. @@ -53,18 +55,20 @@ When an AI agent edits code, CodeClone governs the structural boundary across fi | Step | Tool | What it does | |-------------------------|------------------------|------------------------------------------------------------------------------| -| 1. Declare intent | `manage_change_intent` | Agent states what it plans to change, which files, and why | -| 2. Map blast radius | `get_blast_radius` | Reverse imports, clone cohorts, dependency cycles, do-not-touch signals | -| 3. Check patch contract | `check_patch_contract` | Pre-edit regression budget with headroom; post-edit boundary verification | -| 4. Generate receipt | `create_review_receipt` | Auditable artifact linking intent, scope, patch status, and structural delta | -| 5. Validate claims | _planned_ | Cross-check the agent's review text against the canonical report | +| 1. Check workspace | `manage_change_intent` | Agent sees other active workspace intents before editing | +| 2. Declare intent | `manage_change_intent` | Agent states what it plans to change, which files, and why | +| 3. Map blast radius | `get_blast_radius` | Reverse imports, clone cohorts, dependency cycles, do-not-touch signals | +| 4. Check patch contract | `check_patch_contract` | Pre-edit regression budget with headroom; post-edit boundary verification | +| 5. Generate receipt | `create_review_receipt` | Auditable artifact linking intent, scope, patch status, and structural delta | +| 6. Validate claims | _planned_ | Cross-check the agent's review text against the canonical report | Every step is deterministic — structural facts from the canonical report, no LLM inference. -The v2.1 alpha ships steps 1–4 as live MCP tools (`manage_change_intent`, `get_blast_radius`, +The v2.1 alpha ships steps 1–5 as live MCP tools (`manage_change_intent`, `get_blast_radius`, `check_patch_contract`, `create_review_receipt`) composed over the existing read-only analysis -surface. Step 5 is a planned follow-up in the same controller line. Controller state is -session-local and in-memory — no files created, no repo state mutated. +surface. Claim validation is a planned follow-up in the same controller line. Intent truth is +session-local; workspace coordination records are ephemeral files under `.cache/codeclone/intents/`. +CodeClone still never mutates source files, baselines, reports, or analysis cache data. Change controller docs: [Structural Change Controller](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) @@ -73,6 +77,7 @@ Change controller docs: [Structural Change Controller](https://orenlab.github.io **Change control** - **Intent declaration** — agent states what it plans to change; CodeClone tracks scope, expiry, and status +- **Workspace intent registry** — advisory multi-agent visibility for overlapping edit scopes - **Blast radius** — structural risk projection: reverse imports, clone cohorts, dependency cycles, do-not-touch signals - **Patch contract** — pre-edit regression budget and post-edit boundary verification over explicit before/after runs - **Review receipt** — auditable artifact linking intent, scope, patch verification, and structural delta @@ -253,7 +258,8 @@ repos: ## MCP Control Surface A 25-tool MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. -Read-only by contract — never mutates source, baselines, or repo state. +Read-only for source, baselines, reports, and analysis cache data. The change controller may write +ephemeral coordination records under `.cache/codeclone/intents/`. ```bash # local stdio clients @@ -263,10 +269,10 @@ codeclone-mcp --transport stdio codeclone-mcp --transport streamable-http ``` -Of the 25 tools, 21 expose the canonical read-only analysis and triage surface. The remaining -four — `manage_change_intent`, `get_blast_radius`, `check_patch_contract`, and -`create_review_receipt` — are the change controller, composed over that surface to govern the -structural boundary of AI-assisted edits. The claim guard tool is a planned v2.1 follow-up. +The controller tools — `manage_change_intent`, `get_blast_radius`, +`check_patch_contract`, and `create_review_receipt` — are composed over the same +canonical surface to govern the structural boundary of AI-assisted edits. The +claim guard tool is a planned v2.1 follow-up. > [!WARNING] > Analysis tools require an absolute repository root. Relative roots such as `.` are rejected. diff --git a/codeclone/surfaces/mcp/_intent.py b/codeclone/surfaces/mcp/_intent.py index f869ee9e..ae7d338f 100644 --- a/codeclone/surfaces/mcp/_intent.py +++ b/codeclone/surfaces/mcp/_intent.py @@ -25,6 +25,7 @@ "out_of_scope_production_change_requires_human", "new_structural_regression_forbidden", "report_only_claims_forbidden", + "concurrent_workspace_intent_conflict_requires_review", ) diff --git a/codeclone/surfaces/mcp/_patch_contract.py b/codeclone/surfaces/mcp/_patch_contract.py index 2e3cb754..cccf60c1 100644 --- a/codeclone/surfaces/mcp/_patch_contract.py +++ b/codeclone/surfaces/mcp/_patch_contract.py @@ -45,19 +45,32 @@ class PatchBudgets: coverage_min: int = DEFAULT_COVERAGE_MIN def to_payload(self) -> dict[str, object]: + disabled = tuple( + name + for name, value in ( + ("clone_regression", self.clone_regression), + ("complexity_delta", self.complexity_delta), + ("coupling_delta", self.coupling_delta), + ("cohesion_delta", self.cohesion_delta), + ("health_floor", self.health_floor), + ("coverage_min", self.coverage_min), + ) + if value < 0 + ) return { - "clone_regression": self.clone_regression, - "dead_code_regression": self.dead_code_regression, - "dependency_cycle": self.dependency_cycle, - "coverage_hotspot": self.coverage_hotspot, - "complexity_delta": self.complexity_delta, - "coupling_delta": self.coupling_delta, - "cohesion_delta": self.cohesion_delta, - "health_floor": self.health_floor, - "typing_regression": self.typing_regression, - "docstring_regression": self.docstring_regression, - "api_break": self.api_break, - "coverage_min": self.coverage_min, + "clone_regression": _none_if_unlimited(self.clone_regression), + "forbid_dead_code_regression": self.dead_code_regression, + "forbid_dependency_cycle": self.dependency_cycle, + "forbid_coverage_hotspot": self.coverage_hotspot, + "complexity_delta": _none_if_unlimited(self.complexity_delta), + "coupling_delta": _none_if_unlimited(self.coupling_delta), + "cohesion_delta": _none_if_unlimited(self.cohesion_delta), + "health_floor": _none_if_unlimited(self.health_floor), + "forbid_typing_regression": self.typing_regression, + "forbid_docstring_regression": self.docstring_regression, + "forbid_api_break": self.api_break, + "coverage_min": _none_if_unlimited(self.coverage_min), + "disabled": list(disabled), } @@ -163,6 +176,10 @@ def _none_to_unlimited(value: int | None) -> int: return value if value is not None else -1 +def _none_if_unlimited(value: int) -> int | None: + return value if value >= 0 else None + + def _as_mapping(value: object) -> Mapping[str, object]: return value if isinstance(value, Mapping) else {} diff --git a/codeclone/surfaces/mcp/_session_intent_mixin.py b/codeclone/surfaces/mcp/_session_intent_mixin.py index 4f3d683a..3a035481 100644 --- a/codeclone/surfaces/mcp/_session_intent_mixin.py +++ b/codeclone/surfaces/mcp/_session_intent_mixin.py @@ -6,10 +6,12 @@ from __future__ import annotations +import os from collections.abc import Mapping, Sequence from dataclasses import replace from datetime import datetime, timezone from fnmatch import fnmatchcase +from pathlib import Path from . import _session_helpers as _helpers from ._intent import ( @@ -25,15 +27,39 @@ from ._session_blast_radius_mixin import _MCPSessionBlastRadiusMixin from ._session_shared import ( CodeCloneMCPRunStore, + MCPRunNotFoundError, MCPRunRecord, MCPServiceContractError, ) +from ._workspace_intents import ( + WorkspaceIntentRecord, + WorkspaceIntentStatus, + compute_scope_digest, + detect_conflicts, + expires_at, + find_workspace_intent, + format_utc, + gc_workspace, + is_orphaned, + list_workspace_intents, + remove_workspace_intent, + remove_workspace_record, + resolved_ttl_seconds, + stale_reason, + update_workspace_intent_status, + utc_now, + workspace_status_counts, + write_workspace_intent, +) class _MCPSessionIntentMixin(_MCPSessionBlastRadiusMixin): _runs: CodeCloneMCPRunStore _active_intents: dict[str, IntentRecord] _intent_sequence: int + _agent_pid: int + _agent_start_epoch: int + _agent_label: str def manage_change_intent( self, @@ -46,6 +72,8 @@ def manage_change_intent( expected_effects: Sequence[str] | None = None, diff_ref: str | None = None, changed_files: Sequence[str] | None = None, + root: str | None = None, + ttl_seconds: int | None = None, ) -> dict[str, object]: match action: case "declare": @@ -54,6 +82,7 @@ def manage_change_intent( scope=scope, intent=intent, expected_effects=expected_effects, + ttl_seconds=ttl_seconds, ) case "get": record, active_intent = self._resolve_intent( @@ -73,10 +102,21 @@ def manage_change_intent( ) case "clear": return self._clear_change_intent(intent_id=intent_id) + case "list_workspace": + return self._list_workspace_intents(root=root) + case "gc_workspace": + return self._gc_workspace_intents(root=root) + case "reset_workspace": + return self._reset_workspace_intent( + root=root, + intent_id=intent_id, + ttl_seconds=ttl_seconds, + ) case _: raise MCPServiceContractError( "Invalid value for action: " - f"{action!r}. Expected one of: check, clear, declare, get." + f"{action!r}. Expected one of: check, clear, declare, " + "gc_workspace, get, list_workspace, reset_workspace." ) def _declare_change_intent( @@ -86,6 +126,7 @@ def _declare_change_intent( scope: dict[str, object] | None, intent: str | None, expected_effects: Sequence[str] | None, + ttl_seconds: int | None, ) -> dict[str, object]: record = self._runs.get(run_id) try: @@ -101,28 +142,34 @@ def _declare_change_intent( files=normalized_scope.allowed_paths, depth="direct", forbidden_patterns=normalized_scope.forbidden, - allowed_scope=normalized_scope.allowed_paths, ) blast_payload = blast.to_payload() blast_summary = self._blast_radius_summary( blast_payload=blast_payload, scope=normalized_scope, ) + ttl = resolved_ttl_seconds( + ttl_seconds, + env_value=os.environ.get("CODECLONE_INTENT_TTL_SECONDS"), + ) + replaced_intents: list[IntentRecord] = [] with self._state_lock: for existing_id, existing in tuple(self._active_intents.items()): if existing.run_id == record.run_id: self._active_intents.pop(existing_id, None) + replaced_intents.append(existing) self._intent_sequence += 1 intent_id = ( f"intent-{_helpers._short_run_id(record.run_id)}-" f"{self._intent_sequence:03d}" ) + declared_at = _utc_now() record_payload = IntentRecord( intent_id=intent_id, run_id=record.run_id, report_digest=self._report_digest_value(record), status=IntentStatus.ACTIVE, - declared_at_utc=_utc_now(), + declared_at_utc=declared_at, scope=normalized_scope, intent_description=description, expected_effects=normalized_expected_effects, @@ -130,6 +177,29 @@ def _declare_change_intent( blast_radius_summary=blast_summary, ) self._active_intents[intent_id] = record_payload + self._runs.pin(record.run_id) + workspace_record = self._workspace_record_from_intent( + record=record, + intent=record_payload, + ttl_seconds=ttl, + ) + for replaced_intent in replaced_intents: + remove_workspace_intent( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=replaced_intent.intent_id, + ) + workspace_existing = list_workspace_intents(root=record.root) + workspace_registered = write_workspace_intent( + root=record.root, + record=workspace_record, + ) + concurrent_intents = detect_conflicts( + new_scope=normalized_scope.to_payload(), + existing=workspace_existing, + own_pid=self._agent_pid, + ) payload = record_payload.to_payload( short_run_id=_helpers._short_run_id(record.run_id) ) @@ -137,6 +207,9 @@ def _declare_change_intent( payload["do_not_touch_summary"] = blast_payload["do_not_touch_summary"] payload["review_context"] = blast_payload["review_context"] payload["review_context_summary"] = blast_payload["review_context_summary"] + payload["workspace_registered"] = workspace_registered + payload["concurrent_intents"] = concurrent_intents + payload["ttl_seconds"] = ttl return payload def _check_change_intent( @@ -157,6 +230,9 @@ def _check_change_intent( ) if self._is_intent_expired(record=record, intent=active_intent): expired = replace(active_intent, status=IntentStatus.EXPIRED) + with self._state_lock: + self._active_intents[expired.intent_id] = expired + self._sync_workspace_intent_status(record=record, intent=expired) return expired.to_payload( short_run_id=_helpers._short_run_id(record.run_id) ) @@ -173,6 +249,7 @@ def _check_change_intent( ) with self._state_lock: self._active_intents[updated.intent_id] = updated + self._sync_workspace_intent_status(record=record, intent=updated) payload = check_result.to_payload() payload["intent_id"] = updated.intent_id return payload @@ -180,19 +257,42 @@ def _check_change_intent( def _clear_change_intent(self, *, intent_id: str | None) -> dict[str, object]: with self._state_lock: removed_ids: tuple[str, ...] + removed_intents: tuple[IntentRecord, ...] if intent_id is not None: if intent_id not in self._active_intents: raise MCPServiceContractError( f"Unknown change intent id: {intent_id}" ) removed_ids = (intent_id,) - self._active_intents.pop(intent_id, None) + removed = self._active_intents.pop(intent_id) + removed_intents = (removed,) else: removed_ids = tuple(self._active_intents) + removed_intents = tuple(self._active_intents.values()) self._active_intents.clear() + workspace_targets: tuple[tuple[Path, str], ...] = tuple( + (record.root, removed_intent.intent_id) + for removed_intent in removed_intents + for record in (self._optional_run_record(removed_intent.run_id),) + if record is not None + ) + for removed_intent in removed_intents: + self._runs.unpin(removed_intent.run_id) + workspace_cleared = True + for root_path, removed_intent_id in workspace_targets: + workspace_cleared = ( + remove_workspace_intent( + root=root_path, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=removed_intent_id, + ) + and workspace_cleared + ) return { "cleared": len(removed_ids), "cleared_intent_ids": list(removed_ids), + "workspace_cleared": workspace_cleared, } def _resolve_intent( @@ -226,6 +326,9 @@ def _intent_payload_with_expiry( ) -> dict[str, object]: if self._is_intent_expired(record=record, intent=intent): intent = replace(intent, status=IntentStatus.EXPIRED) + with self._state_lock: + self._active_intents[intent.intent_id] = intent + self._sync_workspace_intent_status(record=record, intent=intent) return intent.to_payload(short_run_id=_helpers._short_run_id(record.run_id)) def _is_intent_expired( @@ -244,6 +347,148 @@ def _report_digest_value(self, record: MCPRunRecord) -> str: return value return record.run_id + def _workspace_record_from_intent( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + ttl_seconds: int, + ) -> WorkspaceIntentRecord: + scope_payload = intent.scope.to_payload() + declared_at = _parse_utc(intent.declared_at_utc) or utc_now() + return WorkspaceIntentRecord( + intent_id=intent.intent_id, + agent_pid=self._agent_pid, + agent_start_epoch=self._agent_start_epoch, + agent_label=self._agent_label, + run_id=record.run_id, + declared_at_utc=format_utc(declared_at), + expires_at_utc=expires_at( + declared_at=declared_at, + ttl_seconds=ttl_seconds, + ), + ttl_seconds=ttl_seconds, + status=intent.status.value, + intent=intent.intent_description, + scope=scope_payload, + scope_digest=compute_scope_digest(scope_payload), + blast_radius_summary=dict(intent.blast_radius_summary or {}), + ) + + def _sync_workspace_intent_status( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + ) -> None: + update_workspace_intent_status( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent.intent_id, + new_status=intent.status.value, + ) + + def _list_workspace_intents(self, *, root: str | None) -> dict[str, object]: + root_path = self._resolve_workspace_root(root) + counts = workspace_status_counts(root=root_path) + records = list_workspace_intents(root=root_path) + return { + "workspace_intents": [ + item.to_payload( + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + ) + for item in records + ], + "stale_count": counts["stale_count"], + "orphaned_count": counts["orphaned_count"], + "total_agents": len({item.agent_pid for item in records}), + "own_pid": self._agent_pid, + "own_start_epoch": self._agent_start_epoch, + } + + def _gc_workspace_intents(self, *, root: str | None) -> dict[str, object]: + return gc_workspace(root=self._resolve_workspace_root(root)) + + def _reset_workspace_intent( + self, + *, + root: str | None, + intent_id: str | None, + ttl_seconds: int | None, + ) -> dict[str, object]: + if intent_id is None: + raise MCPServiceContractError( + "action='reset_workspace' requires intent_id." + ) + root_path = self._resolve_workspace_root(root) + found = find_workspace_intent(root=root_path, intent_id=intent_id) + if found is None: + raise MCPServiceContractError(f"Unknown workspace intent id: {intent_id}") + _, workspace_record = found + reason = stale_reason(workspace_record) + is_own = ( + workspace_record.agent_pid == self._agent_pid + and workspace_record.agent_start_epoch == self._agent_start_epoch + ) + if reason in {"expired", "orphaned"}: + removed = remove_workspace_record(root=root_path, record=workspace_record) + return { + "intent_id": workspace_record.intent_id, + "action_taken": "removed" if removed else "failed", + "reason": reason, + } + if not is_own and not is_orphaned(workspace_record): + return { + "intent_id": workspace_record.intent_id, + "action_taken": "rejected", + "reason": "foreign_live_intent", + "agent_pid": workspace_record.agent_pid, + "agent_start_epoch": workspace_record.agent_start_epoch, + "agent_label": workspace_record.agent_label, + "message": ( + "Intent belongs to a live agent. Coordinate with the owning " + "agent or user before resetting it." + ), + } + ttl = resolved_ttl_seconds( + ttl_seconds, + env_value=os.environ.get("CODECLONE_INTENT_TTL_SECONDS"), + ) + updated = update_workspace_intent_status( + root=root_path, + pid=workspace_record.agent_pid, + start_epoch=workspace_record.agent_start_epoch, + intent_id=workspace_record.intent_id, + new_status=WorkspaceIntentStatus.ACTIVE.value, + ttl_seconds=ttl, + ) + latest = find_workspace_intent(root=root_path, intent_id=intent_id) + latest_record = latest[1] if latest is not None else workspace_record + return { + "intent_id": workspace_record.intent_id, + "action_taken": "reset" if updated else "failed", + "new_status": latest_record.status, + "new_expires_at_utc": latest_record.expires_at_utc, + } + + def _resolve_workspace_root(self, root: str | None) -> Path: + if root is not None: + return _helpers._resolve_root(root) + try: + return self._runs.get(None).root + except MCPRunNotFoundError as exc: + raise MCPServiceContractError( + "Workspace intent actions require root or a latest MCP run." + ) from exc + + def _optional_run_record(self, run_id: str) -> MCPRunRecord | None: + try: + return self._runs.get(run_id) + except MCPRunNotFoundError: + return None + def _blast_radius_summary( self, *, @@ -358,4 +603,14 @@ def _utc_now() -> str: ) +def _parse_utc(value: str) -> datetime | None: + try: + parsed = datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + if parsed.tzinfo is None: + return None + return parsed.astimezone(timezone.utc) + + __all__ = ["_MCPSessionIntentMixin"] diff --git a/codeclone/surfaces/mcp/_session_shared.py b/codeclone/surfaces/mcp/_session_shared.py index 97be9961..41f77cdd 100644 --- a/codeclone/surfaces/mcp/_session_shared.py +++ b/codeclone/surfaces/mcp/_session_shared.py @@ -129,6 +129,7 @@ "latest_runs", "review_state", "changed_scope", + "change_control", ] HelpDetail = Literal["compact", "normal"] MetricsDetailFamily = Literal[ @@ -236,6 +237,7 @@ "latest_runs", "review_state", "changed_scope", + "change_control", } ) _VALID_HELP_DETAILS = frozenset({"compact", "normal"}) @@ -373,6 +375,10 @@ class MCPHelpTopicSpec: f"{_MCP_BOOK_URL}19-inline-suppressions/", ) _MCP_GUIDE_DOC_LINK: Final[tuple[str, str]] = ("MCP usage guide", _MCP_GUIDE_URL) +_CHANGE_CONTROL_DOC_LINK: Final[tuple[str, str]] = ( + "Structural change controller", + f"{_MCP_BOOK_URL}24-structural-change-controller/", +) _HELP_TOPIC_SPECS: Final[dict[str, MCPHelpTopicSpec]] = { "workflow": MCPHelpTopicSpec( summary=( @@ -730,6 +736,65 @@ class MCPHelpTopicSpec: ), ), ), + "change_control": MCPHelpTopicSpec( + summary=( + "Change control is the edit-time MCP workflow: inspect concurrent " + "workspace intents, declare scope, read blast radius and patch " + "budget, then verify the finished patch." + ), + key_points=( + ( + "Start with manage_change_intent(action='list_workspace', " + "root=...) before analysis so active agents are visible early." + ), + ( + "Run analyze_repository, then declare intent with allowed_files, " + "allowed_related, and forbidden paths before editing." + ), + ( + "Use get_blast_radius and check_patch_contract(mode='budget') " + "as the pre-edit boundary." + ), + ( + "Hard overlaps mean two agents claimed the same primary file; " + "soft overlaps mean primary files overlap related context." + ), + ( + "After editing, re-run analysis, check intent scope, verify " + "the patch contract, and clear the intent." + ), + ( + "Use reset_workspace for interrupted own, expired, or orphaned " + "intents; foreign live intents require coordination." + ), + ), + recommended_tools=( + "manage_change_intent", + "analyze_repository", + "get_blast_radius", + "check_patch_contract", + "create_review_receipt", + ), + doc_links=(_CHANGE_CONTROL_DOC_LINK, _MCP_INTERFACE_DOC_LINK), + warnings=( + ( + "The workspace registry is advisory coordination state under " + ".cache/codeclone/intents/, not analysis truth." + ), + ( + "Do not treat review_context as a ban or concurrent_intents as " + "an automatic blocker without human or orchestrator policy." + ), + ), + anti_patterns=( + "Editing files before declaring intent.", + "Silently expanding scope after a hard overlap or scope violation.", + ( + "Resetting a foreign live intent instead of coordinating with " + "the owning agent or user." + ), + ), + ), } @@ -1022,6 +1087,7 @@ def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._lock = RLock() self._records: OrderedDict[str, MCPRunRecord] = OrderedDict() self._latest_run_id: str | None = None + self._pinned_run_ids: set[str] = set() def register(self, record: MCPRunRecord) -> MCPRunRecord: with self._lock: @@ -1029,8 +1095,7 @@ def register(self, record: MCPRunRecord) -> MCPRunRecord: self._records[record.run_id] = record self._records.move_to_end(record.run_id) self._latest_run_id = record.run_id - while len(self._records) > self._history_limit: - self._records.popitem(last=False) + self._prune_unpinned_locked() return record def get(self, run_id: str | None = None) -> MCPRunRecord: @@ -1060,13 +1125,44 @@ def records(self) -> tuple[MCPRunRecord, ...]: with self._lock: return tuple(self._records.values()) + def pin(self, run_id: str) -> str: + with self._lock: + resolved_run_id = self._resolve_run_id(run_id) + if resolved_run_id is None: + raise MCPRunNotFoundError("No matching MCP analysis run is available.") + self._pinned_run_ids.add(resolved_run_id) + return resolved_run_id + + def unpin(self, run_id: str) -> None: + with self._lock: + resolved_run_id = self._resolve_run_id(run_id) or run_id + self._pinned_run_ids.discard(resolved_run_id) + self._prune_unpinned_locked() + def clear(self) -> tuple[str, ...]: with self._lock: removed_run_ids = tuple(self._records.keys()) self._records.clear() + self._pinned_run_ids.clear() self._latest_run_id = None return removed_run_ids + def _prune_unpinned_locked(self) -> None: + while self._unpinned_count_locked() > self._history_limit: + for run_id in tuple(self._records): + if run_id in self._pinned_run_ids: + continue + self._records.pop(run_id, None) + if self._latest_run_id == run_id: + self._latest_run_id = next(reversed(self._records), None) + break + else: + break + self._pinned_run_ids.intersection_update(self._records) + + def _unpinned_count_locked(self) -> int: + return sum(1 for run_id in self._records if run_id not in self._pinned_run_ids) + __all__ = [ "CATEGORY_CLONE", diff --git a/codeclone/surfaces/mcp/_session_state_mixin.py b/codeclone/surfaces/mcp/_session_state_mixin.py index d1309ecd..03dfa8d1 100644 --- a/codeclone/surfaces/mcp/_session_state_mixin.py +++ b/codeclone/surfaces/mcp/_session_state_mixin.py @@ -73,6 +73,7 @@ load_pyproject_config, paginate, ) +from ._workspace_intents import remove_workspace_intent class _MCPSessionChangedProjectionMixin(_MCPSessionFindingMixin): @@ -81,6 +82,9 @@ class _MCPSessionChangedProjectionMixin(_MCPSessionFindingMixin): _review_state: dict[str, OrderedDict[str, str | None]] _last_gate_results: dict[str, dict[str, object]] _spread_max_cache: dict[str, int] + _active_intents: dict[str, IntentRecord] + _agent_pid: int + _agent_start_epoch: int def _build_changed_projection( self, @@ -1114,6 +1118,13 @@ def generate_pr_summary( } def clear_session_runs(self) -> dict[str, object]: + workspace_targets: list[tuple[Path, str]] = [] + for intent in self._active_intents.values(): + try: + record = self._runs.get(intent.run_id) + except MCPServiceContractError: + continue + workspace_targets.append((record.root, intent.intent_id)) removed_run_ids = self._runs.clear() with self._state_lock: cleared_review_entries = sum( @@ -1129,6 +1140,17 @@ def clear_session_runs(self) -> dict[str, object]: self._blast_radius_cache.clear() self._active_intents.clear() self._intent_sequence = 0 + workspace_cleared = True + for root_path, intent_id in workspace_targets: + workspace_cleared = ( + remove_workspace_intent( + root=root_path, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent_id, + ) + and workspace_cleared + ) return { "cleared_runs": len(removed_run_ids), "cleared_run_ids": [ @@ -1139,6 +1161,7 @@ def clear_session_runs(self) -> dict[str, object]: "cleared_spread_cache_entries": cleared_spread_cache_entries, "cleared_blast_radius_entries": cleared_blast_radius_entries, "cleared_intents": cleared_intents, + "workspace_cleared": workspace_cleared, } def read_resource(self, uri: str) -> str: diff --git a/codeclone/surfaces/mcp/_workspace_intents.py b/codeclone/surfaces/mcp/_workspace_intents.py new file mode 100644 index 00000000..248c50bd --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intents.py @@ -0,0 +1,639 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import hmac +import os +from collections.abc import Mapping, Sequence +from dataclasses import dataclass, replace +from datetime import datetime, timedelta, timezone +from enum import Enum +from pathlib import Path +from typing import Final + +from ...cache.integrity import canonical_json +from ...utils.json_io import read_json_object, write_json_document_atomically + +REGISTRY_VERSION: Final = "1" +REGISTRY_DIR_PARTS: Final = (".cache", "codeclone", "intents") +DEFAULT_TTL_SECONDS: Final = 3600 +MIN_TTL_SECONDS: Final = 60 +MAX_TTL_SECONDS: Final = 86400 +_HEX_DIGEST_LENGTH: Final = 64 + + +class WorkspaceIntentStatus(str, Enum): + ACTIVE = "active" + CLEAN = "clean" + EXPANDED = "expanded" + VIOLATED = "violated" + EXPIRED = "expired" + ORPHANED = "orphaned" + + +@dataclass(frozen=True, slots=True) +class WorkspaceIntentRecord: + intent_id: str + agent_pid: int + agent_start_epoch: int + agent_label: str + run_id: str + declared_at_utc: str + expires_at_utc: str + ttl_seconds: int + status: str + intent: str + scope: dict[str, object] + scope_digest: str + blast_radius_summary: dict[str, object] + + def unsigned_payload(self) -> dict[str, object]: + return { + "registry_version": REGISTRY_VERSION, + "intent_id": self.intent_id, + "agent_pid": self.agent_pid, + "agent_start_epoch": self.agent_start_epoch, + "agent_label": self.agent_label, + "run_id": self.run_id, + "declared_at_utc": self.declared_at_utc, + "expires_at_utc": self.expires_at_utc, + "ttl_seconds": self.ttl_seconds, + "status": self.status, + "intent": self.intent, + "scope": self.scope, + "scope_digest": self.scope_digest, + "blast_radius_summary": self.blast_radius_summary, + } + + def signed_payload(self) -> dict[str, object]: + payload = self.unsigned_payload() + payload["integrity"] = {"payload_sha256": compute_intent_digest(payload)} + return payload + + def to_payload( + self, + *, + own_pid: int | None = None, + own_start_epoch: int | None = None, + ) -> dict[str, object]: + payload = self.unsigned_payload() + payload["is_own"] = self.agent_pid == own_pid and ( + own_start_epoch is None or self.agent_start_epoch == own_start_epoch + ) + return payload + + +def registry_dir(root: Path) -> Path: + return root.joinpath(*REGISTRY_DIR_PARTS) + + +def intent_filename(*, pid: int, start_epoch: int, intent_id: str) -> str: + return f"{pid}-{start_epoch}-{intent_id}.json" + + +def intent_path( + *, + root: Path, + pid: int, + start_epoch: int, + intent_id: str, +) -> Path: + return registry_dir(root) / intent_filename( + pid=pid, + start_epoch=start_epoch, + intent_id=intent_id, + ) + + +def utc_now() -> datetime: + return datetime.now(timezone.utc).replace(microsecond=0) + + +def format_utc(value: datetime) -> str: + return ( + value.astimezone(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace( + "+00:00", + "Z", + ) + ) + + +def resolved_ttl_seconds(value: object = None, *, env_value: object = None) -> int: + raw = value if value is not None else env_value + if raw is None: + return DEFAULT_TTL_SECONDS + if isinstance(raw, bool): + return DEFAULT_TTL_SECONDS + try: + parsed = int(str(raw).strip()) + except ValueError: + return DEFAULT_TTL_SECONDS + return min(MAX_TTL_SECONDS, max(MIN_TTL_SECONDS, parsed)) + + +def expires_at(*, declared_at: datetime, ttl_seconds: int) -> str: + return format_utc(declared_at + timedelta(seconds=ttl_seconds)) + + +def compute_scope_digest(scope: Mapping[str, object]) -> str: + return hashlib.sha256(canonical_json(dict(scope)).encode("utf-8")).hexdigest() + + +def compute_intent_digest(data: Mapping[str, object]) -> str: + digestable = {key: value for key, value in data.items() if key != "integrity"} + return hashlib.sha256(canonical_json(digestable).encode("utf-8")).hexdigest() + + +def verify_intent_integrity(data: Mapping[str, object]) -> bool: + integrity = _as_mapping(data.get("integrity")) + stored = integrity.get("payload_sha256") + if not _is_hex_digest(stored): + return False + expected = compute_intent_digest(data) + return hmac.compare_digest(str(stored), expected) + + +def validate_workspace_record(data: object) -> WorkspaceIntentRecord | None: + if not isinstance(data, Mapping): + return None + if not all(isinstance(key, str) for key in data): + return None + if not verify_intent_integrity(data): + return None + if data.get("registry_version") != REGISTRY_VERSION: + return None + intent_id = _required_string(data.get("intent_id")) + agent_pid = _positive_int(data.get("agent_pid")) + agent_start_epoch = _positive_int(data.get("agent_start_epoch")) + agent_label = _string_value(data.get("agent_label")) + run_id = _required_string(data.get("run_id")) + declared_at_utc = _required_string(data.get("declared_at_utc")) + expires_at_utc = _required_string(data.get("expires_at_utc")) + ttl_seconds = _positive_int(data.get("ttl_seconds")) + status = _required_string(data.get("status")) + intent = _required_string(data.get("intent")) + scope = _valid_scope(data.get("scope")) + scope_digest = data.get("scope_digest") + blast_radius_summary = _dict_payload(data.get("blast_radius_summary")) + if ( + intent_id is None + or agent_pid is None + or agent_start_epoch is None + or run_id is None + or declared_at_utc is None + or expires_at_utc is None + or ttl_seconds is None + or status not in _valid_status_values() + or intent is None + or scope is None + or not _is_hex_digest(scope_digest) + or blast_radius_summary is None + ): + return None + if _parse_utc(declared_at_utc) is None or _parse_utc(expires_at_utc) is None: + return None + if compute_scope_digest(scope) != str(scope_digest): + return None + return WorkspaceIntentRecord( + intent_id=intent_id, + agent_pid=agent_pid, + agent_start_epoch=agent_start_epoch, + agent_label=agent_label, + run_id=run_id, + declared_at_utc=declared_at_utc, + expires_at_utc=expires_at_utc, + ttl_seconds=ttl_seconds, + status=status, + intent=intent, + scope=scope, + scope_digest=str(scope_digest), + blast_radius_summary=blast_radius_summary, + ) + + +def write_workspace_intent(*, root: Path, record: WorkspaceIntentRecord) -> bool: + try: + write_json_document_atomically( + path=intent_path( + root=root, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ), + document=record.signed_payload(), + sort_keys=True, + trailing_newline=True, + ) + except OSError: + return False + return True + + +def update_workspace_intent_status( + *, + root: Path, + pid: int, + start_epoch: int, + intent_id: str, + new_status: str, + ttl_seconds: int | None = None, +) -> bool: + found = find_workspace_intent(root=root, intent_id=intent_id) + if found is None: + return False + path, record = found + if record.agent_pid != pid or record.agent_start_epoch != start_epoch: + return False + updated = _updated_record(record, new_status=new_status, ttl_seconds=ttl_seconds) + try: + write_json_document_atomically( + path=path, + document=updated.signed_payload(), + sort_keys=True, + trailing_newline=True, + ) + except OSError: + return False + return True + + +def remove_workspace_intent( + *, + root: Path, + pid: int, + start_epoch: int, + intent_id: str, +) -> bool: + path = intent_path( + root=root, + pid=pid, + start_epoch=start_epoch, + intent_id=intent_id, + ) + try: + path.unlink(missing_ok=True) + except OSError: + return False + return True + + +def remove_workspace_record(*, root: Path, record: WorkspaceIntentRecord) -> bool: + return remove_workspace_intent( + root=root, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + + +def list_workspace_intents( + *, + root: Path, + exclude_stale: bool = True, +) -> tuple[WorkspaceIntentRecord, ...]: + records = [ + record + for _, record in _valid_registry_entries(root) + if not exclude_stale or stale_reason(record) is None + ] + return tuple(sorted(records, key=_record_sort_key)) + + +def find_workspace_intent( + *, + root: Path, + intent_id: str, +) -> tuple[Path, WorkspaceIntentRecord] | None: + matches = [ + (path, record) + for path, record in _valid_registry_entries(root) + if record.intent_id == intent_id + ] + if not matches: + return None + return sorted(matches, key=lambda item: _record_sort_key(item[1]))[-1] + + +def workspace_status_counts(*, root: Path) -> dict[str, int]: + records = [record for _, record in _valid_registry_entries(root)] + stale_records = [record for record in records if stale_reason(record) is not None] + return { + "stale_count": len(stale_records), + "orphaned_count": sum(1 for record in records if is_orphaned(record)), + "total_agents": len({record.agent_pid for record in records}), + } + + +def detect_conflicts( + *, + new_scope: Mapping[str, object], + existing: Sequence[WorkspaceIntentRecord], + own_pid: int, +) -> list[dict[str, object]]: + new_allowed, new_related = _scope_file_sets(new_scope) + conflicts: list[dict[str, object]] = [] + for record in existing: + if record.agent_pid == own_pid or stale_reason(record) is not None: + continue + existing_allowed, existing_related = _scope_file_sets(record.scope) + hard_overlap = tuple(sorted(new_allowed.intersection(existing_allowed))) + soft_overlap = tuple( + sorted( + new_allowed.intersection(existing_related).union( + new_related.intersection(existing_allowed) + ) + ) + ) + if hard_overlap or soft_overlap: + conflicts.append( + { + "intent_id": record.intent_id, + "agent_pid": record.agent_pid, + "agent_start_epoch": record.agent_start_epoch, + "agent_label": record.agent_label, + "intent": record.intent, + "overlap_type": _overlap_type( + hard=bool(hard_overlap), + soft=bool(soft_overlap), + ), + "hard_overlap": list(hard_overlap), + "soft_overlap": list(soft_overlap), + "declared_at_utc": record.declared_at_utc, + "expires_at_utc": record.expires_at_utc, + } + ) + return sorted( + conflicts, + key=lambda item: ( + str(item["overlap_type"]), + str(item["agent_label"]), + _sort_agent_pid(item.get("agent_pid")), + str(item["intent_id"]), + ), + ) + + +def gc_workspace(*, root: Path) -> dict[str, object]: + removed_ids: list[str] = [] + removed_reasons: dict[str, str] = {} + corrupted_filenames: list[str] = [] + for path in _registry_files(root): + payload = _read_payload(path) + record = validate_workspace_record(payload) if payload is not None else None + if record is None: + if _unlink(path): + corrupted_filenames.append(path.name) + continue + reason = stale_reason(record) + if reason is None: + continue + if _unlink(path): + removed_ids.append(record.intent_id) + removed_reasons[record.intent_id] = reason + remaining = len(list_workspace_intents(root=root, exclude_stale=False)) + return { + "removed": len(removed_ids), + "removed_intent_ids": removed_ids, + "removed_reasons": removed_reasons, + "corrupted_removed": len(corrupted_filenames), + "corrupted_filenames": corrupted_filenames, + "remaining": remaining, + } + + +def is_stale(record: WorkspaceIntentRecord) -> bool: + return stale_reason(record) is not None + + +def stale_reason(record: WorkspaceIntentRecord) -> str | None: + if record.status == WorkspaceIntentStatus.EXPIRED.value: + return "expired" + if record.status == WorkspaceIntentStatus.ORPHANED.value: + return "orphaned" + expires = _parse_utc(record.expires_at_utc) + if expires is None or expires <= utc_now(): + return "expired" + if is_orphaned(record): + return "orphaned" + return None + + +def is_orphaned(record: WorkspaceIntentRecord) -> bool: + return not _is_pid_alive(record.agent_pid) + + +def _is_pid_alive(pid: int) -> bool: + if pid <= 0: + return False + try: + os.kill(pid, 0) + except ProcessLookupError: + return False + except PermissionError: + return True + except OSError: + return True + return True + + +def _updated_record( + record: WorkspaceIntentRecord, + *, + new_status: str, + ttl_seconds: int | None, +) -> WorkspaceIntentRecord: + if ttl_seconds is None: + return replace(record, status=new_status) + declared_at = utc_now() + return replace( + record, + declared_at_utc=format_utc(declared_at), + expires_at_utc=expires_at(declared_at=declared_at, ttl_seconds=ttl_seconds), + ttl_seconds=ttl_seconds, + status=new_status, + ) + + +def _valid_registry_entries( + root: Path, +) -> tuple[tuple[Path, WorkspaceIntentRecord], ...]: + entries: list[tuple[Path, WorkspaceIntentRecord]] = [] + for path in _registry_files(root): + payload = _read_payload(path) + record = validate_workspace_record(payload) if payload is not None else None + if record is not None: + entries.append((path, record)) + return tuple(entries) + + +def _registry_files(root: Path) -> tuple[Path, ...]: + directory = registry_dir(root) + try: + return tuple(sorted(directory.glob("*.json"))) + except OSError: + return () + + +def _read_payload(path: Path) -> dict[str, object] | None: + try: + return read_json_object(path) + except (OSError, TypeError, ValueError): + return None + + +def _unlink(path: Path) -> bool: + try: + path.unlink(missing_ok=True) + except OSError: + return False + return True + + +def _record_sort_key(record: WorkspaceIntentRecord) -> tuple[str, int, str]: + return (record.declared_at_utc, record.agent_pid, record.intent_id) + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _dict_payload(value: object) -> dict[str, object] | None: + if not isinstance(value, Mapping): + return None + if not all(isinstance(key, str) for key in value): + return None + return dict(value) + + +def _string_value(value: object) -> str: + return value if isinstance(value, str) else "" + + +def _required_string(value: object) -> str | None: + if not isinstance(value, str): + return None + text = value.strip() + return text or None + + +def _positive_int(value: object) -> int | None: + if isinstance(value, bool) or not isinstance(value, int) or value <= 0: + return None + return value + + +def _is_hex_digest(value: object) -> bool: + if not isinstance(value, str) or len(value) != _HEX_DIGEST_LENGTH: + return False + return all(char in "0123456789abcdef" for char in value.lower()) + + +def _valid_status_values() -> frozenset[str]: + return frozenset(status.value for status in WorkspaceIntentStatus) + + +def _valid_scope(value: object) -> dict[str, object] | None: + if not isinstance(value, Mapping): + return None + if not all(isinstance(key, str) for key in value): + return None + allowed = _valid_path_list(value.get("allowed_files"), required=True) + if allowed is None: + return None + related = _valid_path_list(value.get("allowed_related", ()), required=False) + forbidden = _valid_path_list(value.get("forbidden", ()), required=False) + if related is None or forbidden is None: + return None + return { + "allowed_files": allowed, + "allowed_related": related, + "forbidden": forbidden, + } + + +def _valid_path_list(value: object, *, required: bool) -> list[str] | None: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + return None + paths: list[str] = [] + for item in value: + if not isinstance(item, str): + return None + path = item.replace("\\", "/").strip() + if not path: + continue + if Path(path).is_absolute() or ".." in Path(path).parts: + return None + paths.append(path.rstrip("/")) + deduped = sorted(set(paths)) + if required and not deduped: + return None + return deduped + + +def _scope_file_sets(scope: Mapping[str, object]) -> tuple[set[str], set[str]]: + allowed = set(_valid_path_list(scope.get("allowed_files"), required=False) or []) + related = set( + _valid_path_list(scope.get("allowed_related", ()), required=False) or [] + ) + return allowed, related + + +def _parse_utc(value: str) -> datetime | None: + try: + parsed = datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + if parsed.tzinfo is None: + return None + return parsed.astimezone(timezone.utc) + + +def _sort_agent_pid(value: object) -> int: + return value if isinstance(value, int) and not isinstance(value, bool) else 0 + + +def _overlap_type(*, hard: bool, soft: bool) -> str: + if hard and soft: + return "both" + return "hard" if hard else "soft" + + +__all__ = [ + "DEFAULT_TTL_SECONDS", + "MAX_TTL_SECONDS", + "MIN_TTL_SECONDS", + "REGISTRY_VERSION", + "WorkspaceIntentRecord", + "WorkspaceIntentStatus", + "compute_intent_digest", + "compute_scope_digest", + "detect_conflicts", + "expires_at", + "find_workspace_intent", + "format_utc", + "gc_workspace", + "intent_filename", + "intent_path", + "is_orphaned", + "is_stale", + "list_workspace_intents", + "registry_dir", + "remove_workspace_intent", + "remove_workspace_record", + "resolved_ttl_seconds", + "stale_reason", + "update_workspace_intent_status", + "utc_now", + "validate_workspace_record", + "verify_intent_integrity", + "workspace_status_counts", + "write_workspace_intent", +] diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 6cfdb421..76738ea9 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -44,8 +44,13 @@ "bounded metrics drill-down, and prefer generate_pr_summary(format='markdown') " "unless machine JSON is required. Coverage join accepts external Cobertura " "XML as a current-run signal and does not become baseline truth. Pass an " - "absolute repository root to analysis tools. This server never updates " - "baselines and never mutates source files." + "absolute repository root to analysis tools. For file edits, call " + "manage_change_intent(action='list_workspace', root=...) before analysis, " + "then analyze, declare intent, inspect blast radius and patch budget, edit " + "within scope, re-analyze, verify, and clear intent. If concurrent intents " + "overlap, narrow scope or coordinate. This server never updates baselines " + "and never mutates source files, analysis cache, or reports; it may write " + "ephemeral workspace coordination state under .cache/codeclone/intents/." ) _MCP_INSTALL_HINT = ( "CodeClone MCP support requires the optional 'mcp' extra. " @@ -441,7 +446,8 @@ def create_review_receipt( "canonical doc links. Use this when workflow or contract meaning " "is unclear. This is bounded guidance, not a full manual. " "Supported topics: workflow, analysis_profile, suppressions, " - "baseline, coverage, latest_runs, review_state, changed_scope." + "baseline, coverage, latest_runs, review_state, changed_scope, " + "change_control." ), annotations=read_only_tool, structured_output=True, @@ -871,10 +877,14 @@ def list_reviewed_findings(run_id: str | None = None) -> dict[str, object]: title="Manage Change Intent", description=( "Manage the agent change intent lifecycle for the current MCP " - "session. Actions: 'declare' to declare intended scope before " - "editing, 'get' to retrieve active intent, 'check' to verify " - "actual diff against declared scope, and 'clear' to remove intent. " - "Intent is session-local and in-memory." + "session and optional workspace registry. Actions: 'list_workspace' " + "to inspect concurrent workspace intents, 'declare' to declare " + "intended scope before editing, 'get' to retrieve active intent, " + "'check' to verify actual diff against declared scope, 'clear' to " + "remove intent, 'gc_workspace' to clean stale registry files, and " + "'reset_workspace' for interrupted-session recovery. In-memory " + "intent state remains session-local; workspace coordination state " + "is ephemeral under .cache/codeclone/intents/." ), annotations=session_tool, structured_output=True, @@ -888,6 +898,8 @@ def manage_change_intent( expected_effects: list[str] | None = None, diff_ref: str | None = None, changed_files: list[str] | None = None, + root: str | None = None, + ttl_seconds: int | None = None, ) -> dict[str, object]: return service.manage_change_intent( action=action, @@ -898,6 +910,8 @@ def manage_change_intent( expected_effects=expected_effects, diff_ref=diff_ref, changed_files=changed_files, + root=root, + ttl_seconds=ttl_seconds, ) @tool( diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index cda1d6e5..0966032a 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -261,6 +261,8 @@ def _apply_public_method_signatures() -> None: _kwonly("expected_effects", "Sequence[str] | None", None), _kwonly("diff_ref", "str | None", None), _kwonly("changed_files", "Sequence[str] | None", None), + _kwonly("root", "str | None", None), + _kwonly("ttl_seconds", "int | None", None), ), "get_remediation": ( _kwonly("finding_id", "str"), diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 29b6cdf4..118c728d 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -6,6 +6,9 @@ from __future__ import annotations +import os +import time + from ...cache.store import resolve_cache_status from ...report.meta import build_report_meta as _build_report_meta from ...report.meta import current_report_timestamp_utc as _current_report_timestamp_utc @@ -90,6 +93,9 @@ def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: ] = {} self._active_intents: dict[str, IntentRecord] = {} self._intent_sequence = 0 + self._agent_pid = os.getpid() + self._agent_start_epoch = int(time.time()) + self._agent_label = os.environ.get("CODECLONE_AGENT_LABEL", "") def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: self._validate_analysis_request(request) diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index e09b5c2b..12543d0f 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -58,11 +58,15 @@ second, then drill into one finding or one hotspot family. `get_blast_radius` keeps hard guardrails separate from review context. `do_not_touch` is limited to actionable negative context such as baselines, -generated CodeClone state, explicit forbidden paths, or files affected by the -blast radius but outside the declared edit scope. Report-only signals such as -security boundary inventory and overloaded-module candidates are returned as -`review_context`, not as edit prohibitions. Long context sections include -`total`, `shown`, and `truncated` summaries. +generated CodeClone state, and explicit forbidden paths. Report-only signals +such as security boundary inventory and overloaded-module candidates are +returned as `review_context`, not as edit prohibitions. Long context sections +include `total`, `shown`, and `truncated` summaries. + +`manage_change_intent` is session-local for intent truth, but v2.1 also writes +best-effort workspace coordination records under `.cache/codeclone/intents/`. +Those records are advisory multi-agent visibility only; MCP still never updates +source files, baselines, reports, or analysis cache data. `create_review_receipt` is a read-only audit artifact. It composes stored report provenance, optional intent/blast-radius state, reviewed findings, @@ -112,7 +116,7 @@ does not persist outside the MCP session. |--------------------------|-------------------------------------------------------------------------|-------------------------------------------------------------------------------------| | `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the current in-memory MCP session. | | `list_reviewed_findings` | `run_id` | Return reviewed markers currently held in process memory. | -| `manage_change_intent` | `action`, `run_id`, `intent_id`, `scope`, `changed_files` or `diff_ref` | Declare, inspect, check, or clear session-local change intent for governed edits. | +| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `ttl_seconds`, `changed_files` or `diff_ref` | Declare/check/clear session-local intent and list/gc/reset workspace coordination records. | | `clear_session_runs` | none | Clear in-memory run history and session-local review state for this server process. | ## Resources @@ -134,8 +138,8 @@ Resources are deterministic read-only projections over stored runs. ## Contract rules -- MCP is read-only with respect to source files, baselines, cache artifacts, - and report artifacts. +- MCP is read-only with respect to source files, baselines, analysis cache + artifacts such as `cache.json`, and report artifacts. - MCP reuses the same canonical report document as CLI/JSON/HTML/SARIF. - Finding ids, ordering, and summary data are deterministic projections over the stored run. diff --git a/docs/book/24-structural-change-controller.md b/docs/book/24-structural-change-controller.md index d7683183..990bd4db 100644 --- a/docs/book/24-structural-change-controller.md +++ b/docs/book/24-structural-change-controller.md @@ -1,13 +1,13 @@ # Structural Change Controller -CodeClone v2.1 adds a session-local MCP control layer for AI-assisted edits. -The controller is not a second analyzer and does not persist state. It composes -over stored MCP runs and the canonical report contract. +CodeClone v2.1 adds an MCP control layer for AI-assisted edits. The controller +is not a second analyzer. It composes over stored MCP runs and the canonical +report contract. ## Status The v2.1 alpha currently includes intent, blast-radius, patch-contract checks, -and review receipts: +review receipts, and workspace intent visibility: | Phase | Status | MCP surface | |-------|--------|-------------| @@ -15,6 +15,7 @@ and review receipts: | Blast radius | Live in `2.1.0a1` | `get_blast_radius` | | Patch contract | Live in `2.1.0a1` | `check_patch_contract` | | Review receipt | Live in `2.1.0a1` | `create_review_receipt` | +| Workspace intent registry | Live in `2.1.0a1` | `manage_change_intent` | | Claim guard | Planned | `validate_review_claims` | Claim guard is a roadmap item until implemented and tested. Public clients @@ -23,26 +24,33 @@ must not assume it exists in the current MCP tool list. ## Contract - The canonical report remains the source of truth. -- Controller state is session-local and in-memory. -- MCP must not mutate source files, baselines, cache, reports, or repo state. +- Intent truth is session-local and in-memory. +- MCP may write ephemeral workspace coordination records under + `.cache/codeclone/intents/`. +- MCP must not mutate source files, baselines, reports, or analysis cache data. - Tools derive responses from existing run/report facts rather than LLM inference. - Report-only context is review context, not an edit prohibition. ## Pre-Change Workflow -1. Run `analyze_repository` or `analyze_changed_paths`. -2. Declare scope with `manage_change_intent(action="declare")`. -3. Inspect the returned `blast_radius_summary`. -4. Optionally call `get_blast_radius` for full dependent/context detail. -5. Call `check_patch_contract(mode="budget")` to inspect the active regression +1. Call `manage_change_intent(action="list_workspace", root="/abs/repo")` to + see active intents from other agents before analysis. +2. Run `analyze_repository` or `analyze_changed_paths`. +3. Declare scope with `manage_change_intent(action="declare")`. +4. If `concurrent_intents` is non-empty, narrow scope or coordinate before + editing. +5. Inspect the returned `blast_radius_summary`. +6. Optionally call `get_blast_radius` for full dependent/context detail. +7. Call `check_patch_contract(mode="budget")` to inspect the active regression budget and metric headroom before editing. -6. After editing, call `manage_change_intent(action="check")` with +8. After editing, call `manage_change_intent(action="check")` with `changed_files` or `diff_ref`. -7. Run analysis again, then call `check_patch_contract(mode="verify")` with +9. Run analysis again, then call `check_patch_contract(mode="verify")` with explicit `before_run_id` and `after_run_id`. -8. Call `create_review_receipt` to collect provenance, scope, blast radius, +10. Call `create_review_receipt` to collect provenance, scope, blast radius, reviewed findings, patch status, human decision points, and claims-not-made. +11. Call `manage_change_intent(action="clear")` when the edit is complete. `manage_change_intent` can return `clean`, `expanded`, `violated`, or `expired`. Expiry means the report digest changed since declaration. @@ -53,12 +61,16 @@ previews gates, validates scope when intent is available, and reports baseline abuse signals. Missing before or after runs return `status="unverified"` with `reason="no_before_run"` or `reason="no_after_run"`. +Budget payloads use `null` for disabled numeric thresholds rather than sentinel +values. Boolean policy gates are named `forbid_*`, for example +`forbid_dead_code_regression`. + ## Blast Radius Payload `get_blast_radius` separates hard edit guardrails from review context: - `do_not_touch`: actionable negative context such as baseline/cache state, - explicit forbidden paths, or affected files outside declared scope. + generated CodeClone state, or explicit forbidden paths. - `review_context`: report-only facts such as security boundary inventory, overloaded-module candidates, known baseline debt, and golden fixture surfaces. @@ -66,6 +78,23 @@ abuse signals. Missing before or after runs return `status="unverified"` with Long context sections are bounded and include summaries with `total`, `shown`, and `truncated`. +## Workspace Intent Registry + +`manage_change_intent` also supports workspace actions for multi-agent +coordination: + +- `list_workspace`: list active workspace intent records from all agents for a + repository root. +- `gc_workspace`: remove expired, orphaned, or corrupted registry records. +- `reset_workspace`: recover an own, expired, or orphaned intent. Foreign live + intents are rejected and require coordination. + +Registry files live under `.cache/codeclone/intents/` and are protected with a +SHA-256 integrity digest over canonical JSON. This detects accidental +corruption, not malicious tampering by a user with write access. Conflicts are +advisory: hard overlap means two agents claimed the same primary file; soft +overlap means primary files overlap related context. + ## Review Receipt Payload `create_review_receipt` returns `format="markdown"` by default and can return a diff --git a/docs/mcp.md b/docs/mcp.md index 32526eec..b8547524 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -117,7 +117,7 @@ run-scoped URI templates. | `get_blast_radius` | Pre-change risk boundary: direct dependents, clone cohorts, coverage gaps, actionable do-not-touch paths, and review-only context | | `check_patch_contract` | Pre-edit regression budget or post-edit before/after verification over stored runs, gates, intent scope, and baseline-abuse signals | | `create_review_receipt` | Deterministic markdown or JSON audit artifact: provenance, scope, blast radius, reviewed findings, patch status, human decisions, and claims-not-made | -| `help` | Semantic guide for workflow, analysis profile, baseline, suppressions, review state, changed-scope | +| `help` | Semantic guide for workflow, change control, analysis profile, baseline, suppressions, review state, changed-scope | | `compare_runs` | Run-to-run delta: regressions, improvements, health change | | `list_findings` | Filtered, paginated findings; use after hotspots or `check_*` | | `get_finding` | Single finding detail by id; defaults to `normal` detail level | @@ -133,7 +133,7 @@ run-scoped URI templates. | `generate_pr_summary` | PR-friendly markdown or JSON summary | | `mark_finding_reviewed` | Session-local review marker (in-memory) | | `list_reviewed_findings` | List reviewed findings for a run | -| `manage_change_intent` | Declare, inspect, check, or clear session-local edit scope intent | +| `manage_change_intent` | Declare/check/clear edit intent; list/gc/reset ephemeral workspace intent records for multi-agent coordination | | `clear_session_runs` | Reset in-memory runs and session state | > `check_*` tools query stored runs only. Call `analyze_repository` or @@ -144,12 +144,17 @@ run-scoped URI templates. - `check_*` responses include only the relevant health dimension. - `get_blast_radius` separates edit prohibitions from context: `do_not_touch` contains actionable negative context such as baselines, - generated CodeClone state, explicit forbidden paths, and affected files - outside declared scope. Report-only signals are returned as `review_context`. - Long context sections include `total`, `shown`, and `truncated` summaries. + generated CodeClone state, and explicit forbidden paths. Report-only signals + are returned as `review_context`. Long context sections include `total`, + `shown`, and `truncated` summaries. - `check_patch_contract` does not run analysis. `mode="budget"` reads the selected stored run and optional intent; `mode="verify"` compares explicit before/after stored runs and returns `unverified` when either side is missing. + Disabled numeric thresholds are `null`; boolean policy gates use `forbid_*` + names. +- `manage_change_intent(action="list_workspace", root=...)` reads + `.cache/codeclone/intents/` to show active intents from other agents. The + registry is advisory coordination state, not analysis truth. - `create_review_receipt` does not run analysis or mutate state. It composes stored report provenance, optional intent/blast-radius state, reviewed findings, structural delta, patch-contract status, and explicit @@ -222,9 +227,10 @@ trigger analysis. `codeclone://latest/*` always resolves to the most recent run registered in the current MCP server session. A later `analyze_repository` or `analyze_changed_paths` call moves that pointer. -`mark_finding_reviewed`, `manage_change_intent`, and `clear_session_runs` -mutate only in-memory session state. They never touch source files, baselines, -cache, or report artifacts. +`mark_finding_reviewed` and most `manage_change_intent` state are in-memory. +Workspace intent records are the exception: they are ephemeral coordination +files under `.cache/codeclone/intents/`. MCP still never touches source files, +baselines, report artifacts, or analysis cache data. ## Recommended workflows @@ -238,7 +244,23 @@ analyze_repository → get_run_summary or get_production_triage ### Semantic uncertainty recovery ``` -help(topic="workflow" | "analysis_profile" | "baseline" | "coverage" | "suppressions" | "latest_runs" | "review_state" | "changed_scope") +help(topic="workflow" | "change_control" | "analysis_profile" | "baseline" | "coverage" | "suppressions" | "latest_runs" | "review_state" | "changed_scope") +``` + +### Change-control edit workflow + +``` +manage_change_intent(action="list_workspace", root="/abs/repo") +→ analyze_repository +→ manage_change_intent(action="declare", scope={...}) +→ get_blast_radius +→ check_patch_contract(mode="budget") +→ edit within scope +→ analyze_repository +→ manage_change_intent(action="check", changed_files=[...]) +→ check_patch_contract(mode="verify", before_run_id=..., after_run_id=...) +→ create_review_receipt +→ manage_change_intent(action="clear") ``` ### Full repository review @@ -354,9 +376,10 @@ If `codeclone-mcp` is not on `PATH`, use an absolute path to the launcher. ## Security -- Read-only by design: no source mutation, no baseline/cache writes. -- Run history, review markers, and change intents are in-memory only — lost on - process stop. +- Read-only with respect to source, baselines, reports, and analysis cache data. +- Run history and review markers are in-memory only. Change intents are + in-memory for session truth, with optional ephemeral coordination records + under `.cache/codeclone/intents/`. - Repository access is limited to what the server process can read locally. - `streamable-http` binds to loopback by default; `--allow-remote` is explicit opt-in. diff --git a/plugins/codeclone/skills/codeclone-change-control/SKILL.md b/plugins/codeclone/skills/codeclone-change-control/SKILL.md index dd4e9dee..9e9888ad 100644 --- a/plugins/codeclone/skills/codeclone-change-control/SKILL.md +++ b/plugins/codeclone/skills/codeclone-change-control/SKILL.md @@ -15,9 +15,10 @@ patch. ## Activation contract Use this workflow whenever this skill is selected for a repository edit. Start -with a pre-edit MCP run and keep the returned `run_id` and `intent_id` for -verification. If a required MCP tool is unavailable in the connected server, -continue with the available steps and state which step was skipped. +with a workspace intent check, then run pre-edit analysis and keep the returned +`run_id` and `intent_id` for verification. If a required MCP tool is unavailable +in the connected server, continue with the available steps and state which step +was skipped. Do not downgrade the task to an ordinary edit after this skill has been selected. The only valid reasons to skip the workflow are: no repository files @@ -27,10 +28,14 @@ analysis only. ## Rules - Use MCP tools only when invoked through the CodeClone plugin. -- If no latest MCP run exists, call `analyze_repository` yourself before - declaring intent. +- Call `manage_change_intent(action="list_workspace", root=...)` before + analysis when the connected server supports it. +- If no latest MCP run exists after the workspace check, call + `analyze_repository` yourself before declaring intent. - Declare intent before editing. - Do not silently expand scope. +- If concurrent workspace intents overlap your files, narrow scope or + coordinate before editing. - Treat blast-radius dependents and clone cohorts as review context, not permission to modify. - Treat `do_not_touch` as a boundary unless the user explicitly expands scope. @@ -44,7 +49,8 @@ analysis only. ## Workflow ``` -analyze_repository +manage_change_intent(action="list_workspace", root=...) +→ analyze_repository → manage_change_intent(action="declare") → get_blast_radius → check_patch_contract(mode="budget") @@ -54,13 +60,46 @@ analyze_repository → check_patch_contract(mode="verify") → validate_review_claims → create_review_receipt +→ manage_change_intent(action="clear") ``` -Use the full path for normal code changes. Skip only steps that are unavailable -in the connected CodeClone MCP server. +Older MCP servers may not support `list_workspace`, `validate_review_claims`, +or `create_review_receipt`. Skip only unavailable steps and say so explicitly. Keep the pre-edit `run_id` as `before_run_id`; verify against the explicit after-run produced after the edit. +## Workspace check + +Before analysis, call: + +``` +manage_change_intent(action="list_workspace", root="/absolute/repo") +``` + +If it returns active intents from other agents, compare their `scope` to your +planned files. A hard overlap means another agent claimed the same primary file. +A soft overlap means your primary file is in another agent's related context, or +the reverse. In either case, narrow scope or coordinate before editing. + +## Legacy workflow + +Use this only when `list_workspace` is unavailable in the connected MCP server: + +``` +analyze_repository +→ manage_change_intent(action="declare") +→ get_blast_radius +→ check_patch_contract(mode="budget") +→ edit code +→ analyze_repository +→ manage_change_intent(action="check") +→ check_patch_contract(mode="verify") +→ validate_review_claims +→ create_review_receipt +``` + +Still clear any declared intent when the server supports `clear`. + ## Intent first Before editing, call: diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index 2d52350f..10554aa0 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -1791,6 +1791,30 @@ ], "default": null, "title": "Changed Files" + }, + "root": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Root" + }, + "ttl_seconds": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Ttl Seconds" } }, "required": [ diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 25f8f607..866b23fe 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -115,6 +115,10 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: assert "default or pyproject-resolved thresholds for the first pass" in str( server.instructions ) + assert "manage_change_intent(action='list_workspace', root=...)" in str( + server.instructions + ) + assert ".cache/codeclone/intents/" in str(server.instructions) tools = {tool.name: tool for tool in asyncio.run(server.list_tools())} assert set(tools) == { @@ -206,11 +210,13 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: assert "mode='budget'" in str(tools["check_patch_contract"].description) assert "auditable review receipt" in str(tools["create_review_receipt"].description) assert "claims-not-made" in str(tools["create_review_receipt"].description) - assert "Intent is session-local" in str(tools["manage_change_intent"].description) + assert "list_workspace" in str(tools["manage_change_intent"].description) + assert ".cache/codeclone/intents/" in str(tools["manage_change_intent"].description) assert "bounded guidance, not a full manual" in str(tools["help"].description) assert "workflow, analysis_profile, suppressions, baseline" in str( tools["help"].description ) + assert "change_control" in str(tools["help"].description) assert init_options.server_version == CODECLONE_VERSION assert "Prefer list_hotspots or focused check_* tools" in str( tools["list_findings"].description diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index bdff4b89..2e84bd19 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -28,6 +28,7 @@ import codeclone.surfaces.mcp._session_runtime as mcp_runtime_mod import codeclone.surfaces.mcp._session_shared as mcp_shared_mod import codeclone.surfaces.mcp._session_state_mixin as mcp_state_mod +import codeclone.surfaces.mcp._workspace_intents as mcp_workspace_intents_mod import codeclone.surfaces.mcp.session as mcp_session_mod from codeclone.baseline import Baseline, current_python_tag from codeclone.baseline.metrics_baseline import MetricsBaseline @@ -853,6 +854,13 @@ def test_mcp_service_help_validates_topic_and_detail() -> None: f"v{BASELINE_SCHEMA_VERSION}" in point for point in cast("list[str]", baseline_help["key_points"]) ) + change_control = service.get_help(topic="change_control", detail="normal") + assert "list_workspace" in str(change_control["key_points"]) + assert "manage_change_intent" in cast( + "list[str]", + change_control["recommended_tools"], + ) + assert "foreign live intent" in str(change_control["anti_patterns"]) with pytest.raises(MCPServiceContractError, match="Invalid value for topic"): service.get_help(topic="gates") @@ -2197,6 +2205,17 @@ def _raise_subprocess(*args: object, **kwargs: object) -> object: assert tuple(record.run_id for record in store.records()) == ("second",) with pytest.raises(MCPRunNotFoundError): store.get("first") + + pinned_store = mcp_shared_mod.CodeCloneMCPRunStore(history_limit=1) + pinned_store.register(first) + pinned_store.pin("first") + pinned_store.register(second) + assert tuple(record.run_id for record in pinned_store.records()) == ( + "first", + "second", + ) + pinned_store.unpin("first") + assert tuple(record.run_id for record in pinned_store.records()) == ("second",) with pytest.raises(ValueError): mcp_shared_mod.CodeCloneMCPRunStore(history_limit=11) @@ -2365,13 +2384,13 @@ def test_mcp_service_manage_change_intent_lifecycle(tmp_path: Path) -> None: ) assert ( cast(dict[str, object], declared["blast_radius_summary"])["do_not_touch_count"] - == 5 + == 3 ) assert ( cast(dict[str, object], declared["blast_radius_summary"])[ "review_context_count" ] - == 1 + == 2 ) assert [ item["category"] @@ -2379,18 +2398,24 @@ def test_mcp_service_manage_change_intent_lifecycle(tmp_path: Path) -> None: ] == [ "baseline_or_generated_state", "baseline_or_generated_state", - "affected_but_not_allowed", "explicit_forbidden", - "affected_but_not_allowed", ] assert cast("list[dict[str, str]]", declared["review_context"]) == [ + { + "path": "pkg/b.py", + "reason": "report-only design signal", + "category": "report_only_context", + "severity": "context", + }, { "path": "tests/test_a.py", "reason": "golden fixture clone suppression surface", "category": "golden_fixture_surface", "severity": "context", - } + }, ] + assert declared["workspace_registered"] is True + assert declared["concurrent_intents"] == [] fetched = service.manage_change_intent(action="get", intent_id=intent_id) assert fetched["intent_id"] == intent_id @@ -2413,11 +2438,94 @@ def test_mcp_service_manage_change_intent_lifecycle(tmp_path: Path) -> None: assert violated["unexpected_files"] == ["pkg/unplanned.py"] cleared = service.manage_change_intent(action="clear", intent_id=intent_id) - assert cleared == {"cleared": 1, "cleared_intent_ids": [intent_id]} + assert cleared == { + "cleared": 1, + "cleared_intent_ids": [intent_id], + "workspace_cleared": True, + } with pytest.raises(MCPServiceContractError, match="No active change intent"): service.manage_change_intent(action="get", run_id="abcdef12") +def test_mcp_service_workspace_intent_registry_detects_concurrent_agents( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(mcp_workspace_intents_mod, "_is_pid_alive", lambda pid: True) + first = CodeCloneMCPService(history_limit=2) + second = CodeCloneMCPService(history_limit=2) + first._agent_pid, first._agent_start_epoch, first._agent_label = ( + 11111, + 100, + "agent-a", + ) + second._agent_pid, second._agent_start_epoch, second._agent_label = ( + 22222, + 200, + "agent-b", + ) + record = _blast_radius_run_record(tmp_path) + first._runs.register(record) + second._runs.register(record) + + declared_first = first.manage_change_intent( + action="declare", + scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": ["tests/test_a.py"], + }, + intent="first agent edits pkg.a", + ) + first_intent_id = str(declared_first["intent_id"]) + + workspace = second.manage_change_intent( + action="list_workspace", + root=str(tmp_path), + ) + workspace_intents = cast( + "list[dict[str, object]]", + workspace["workspace_intents"], + ) + assert workspace["total_agents"] == 1 + assert workspace_intents[0]["agent_label"] == "agent-a" + assert workspace_intents[0]["is_own"] is False + + hard_conflict = second.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent="second agent also edits pkg.a", + ) + assert cast("list[dict[str, object]]", hard_conflict["concurrent_intents"])[0][ + "hard_overlap" + ] == ["pkg/a.py"] + + soft_conflict = second.manage_change_intent( + action="declare", + scope={"allowed_files": ["tests/test_a.py"]}, + intent="second agent edits related tests", + ) + assert ( + cast("list[dict[str, object]]", soft_conflict["concurrent_intents"])[0][ + "overlap_type" + ] + == "soft" + ) + + rejected = second.manage_change_intent( + action="reset_workspace", + root=str(tmp_path), + intent_id=first_intent_id, + ) + assert rejected["action_taken"] == "rejected" + assert rejected["reason"] == "foreign_live_intent" + + cleared = first.manage_change_intent( + action="clear", + intent_id=first_intent_id, + ) + assert cleared["workspace_cleared"] is True + + def test_mcp_service_manage_change_intent_validation_expiry_and_prune( tmp_path: Path, ) -> None: @@ -2443,6 +2551,9 @@ def test_mcp_service_manage_change_intent_validation_expiry_and_prune( with pytest.raises(MCPServiceContractError, match="requires diff_ref"): service.manage_change_intent(action="check", intent_id=intent_id) + service._runs.register(_blast_radius_run_record(tmp_path)) + assert service._runs.get("abcdef12").run_id == "abcdef1234567890" + service._runs.register(_blast_radius_run_record(tmp_path, digest="digest-b")) expired = service.manage_change_intent(action="get", intent_id=intent_id) assert expired["status"] == "expired" @@ -2451,7 +2562,13 @@ def test_mcp_service_manage_change_intent_validation_expiry_and_prune( _blast_radius_run_record(tmp_path, run_id="fedcba9876543210") ) service._prune_session_state() - assert service._active_intents == {} + assert intent_id in service._active_intents + assert service._runs.get("abcdef12").run_id == "abcdef1234567890" + + service.manage_change_intent(action="clear", intent_id=intent_id) + service._prune_session_state() + with pytest.raises(MCPRunNotFoundError): + service._runs.get("abcdef12") def test_mcp_patch_contract_profiles_and_baseline_abuse() -> None: @@ -2575,16 +2692,28 @@ def test_mcp_service_check_patch_contract_budget_uses_intent_and_gate_preview( cast("dict[str, object]", payload["blast_radius_summary"])["radius_level"] == "medium" ) + assert ( + cast("dict[str, object]", payload["blast_radius_summary"])["do_not_touch_count"] + == 2 + ) + assert ( + cast("dict[str, object]", payload["blast_radius_summary"])[ + "review_context_count" + ] + == 1 + ) assert { "clone_regression": budgets["clone_regression"], "complexity_delta": budgets["complexity_delta"], "coverage_min": budgets["coverage_min"], + "forbid_dead_code_regression": budgets["forbid_dead_code_regression"], "complexity_max": current_state["complexity_max"], "clone_groups": current_state["clone_groups"], } == { "clone_regression": 0, "complexity_delta": 10, "coverage_min": 80, + "forbid_dead_code_regression": False, "complexity_max": 6, "clone_groups": 1, } @@ -2596,7 +2725,9 @@ def test_mcp_service_check_patch_contract_budget_uses_intent_and_gate_preview( run_id="abcdef12", strictness="relaxed", ) - assert cast("dict[str, object]", relaxed["budgets"])["clone_regression"] == -1 + relaxed_budgets = cast("dict[str, object]", relaxed["budgets"]) + assert relaxed_budgets["clone_regression"] is None + assert "clone_regression" in cast("list[str]", relaxed_budgets["disabled"]) assert cast("dict[str, object]", relaxed["gate_preview"])["would_fail"] is False assert "advisory" in str(relaxed["message"]) with pytest.raises(MCPServiceContractError, match="Invalid value for strictness"): @@ -4020,6 +4151,7 @@ def test_mcp_service_clear_session_runs_clears_in_memory_state(tmp_path: Path) - "cleared_gate_results", "cleared_blast_radius_entries", "cleared_intents", + "workspace_cleared", ) } == { "cleared_runs": 1, @@ -4027,6 +4159,7 @@ def test_mcp_service_clear_session_runs_clears_in_memory_state(tmp_path: Path) - "cleared_gate_results": 1, "cleared_blast_radius_entries": 1, "cleared_intents": 1, + "workspace_cleared": True, } with pytest.raises(MCPRunNotFoundError): service.get_run_summary() diff --git a/tests/test_workspace_intents.py b/tests/test_workspace_intents.py new file mode 100644 index 00000000..942a19e5 --- /dev/null +++ b/tests/test_workspace_intents.py @@ -0,0 +1,199 @@ +from __future__ import annotations + +import os +from datetime import timedelta +from pathlib import Path + +import pytest + +from codeclone.surfaces.mcp import _workspace_intents as workspace_intents +from codeclone.surfaces.mcp._workspace_intents import WorkspaceIntentRecord +from codeclone.utils.json_io import read_json_object, write_json_document_atomically + + +def _record( + *, + intent_id: str = "intent-abcdef12-001", + pid: int | None = None, + start_epoch: int = 100, + status: str = "active", + scope: dict[str, object] | None = None, + expires_delta: timedelta = timedelta(hours=1), +) -> WorkspaceIntentRecord: + declared_at = workspace_intents.utc_now() + scope_payload = scope or { + "allowed_files": ["pkg/a.py"], + "allowed_related": ["tests/test_a.py"], + "forbidden": [".cache/codeclone/**", "codeclone.baseline.json"], + } + return WorkspaceIntentRecord( + intent_id=intent_id, + agent_pid=pid or os.getpid(), + agent_start_epoch=start_epoch, + agent_label="agent-a", + run_id="abcdef1234567890", + declared_at_utc=workspace_intents.format_utc(declared_at), + expires_at_utc=workspace_intents.format_utc(declared_at + expires_delta), + ttl_seconds=3600, + status=status, + intent="edit pkg.a", + scope=scope_payload, + scope_digest=workspace_intents.compute_scope_digest(scope_payload), + blast_radius_summary={"radius_level": "medium"}, + ) + + +def test_workspace_intent_write_validate_update_and_remove(tmp_path: Path) -> None: + record = _record() + + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + records = workspace_intents.list_workspace_intents(root=tmp_path) + assert records == (record,) + assert workspace_intents.find_workspace_intent( + root=tmp_path, + intent_id=record.intent_id, + ) == ( + workspace_intents.intent_path( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ), + record, + ) + + assert workspace_intents.update_workspace_intent_status( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + new_status="clean", + ) + updated = workspace_intents.list_workspace_intents(root=tmp_path)[0] + assert updated.status == "clean" + assert workspace_intents.verify_intent_integrity(updated.signed_payload()) + + assert workspace_intents.remove_workspace_intent( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + assert workspace_intents.list_workspace_intents(root=tmp_path) == () + + +def test_workspace_intent_validation_rejects_tampered_and_invalid_paths( + tmp_path: Path, +) -> None: + record = _record() + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + path = workspace_intents.intent_path( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + payload = read_json_object(path) + payload["intent"] = "tampered" + write_json_document_atomically(path, payload, sort_keys=True) + + assert workspace_intents.list_workspace_intents(root=tmp_path) == () + gc_payload = workspace_intents.gc_workspace(root=tmp_path) + assert gc_payload["corrupted_removed"] == 1 + assert gc_payload["corrupted_filenames"] == [path.name] + + invalid_scope: dict[str, object] = { + "allowed_files": [str(tmp_path / "abs.py")], + "allowed_related": [], + "forbidden": [], + } + invalid = _record(scope=invalid_scope) + signed = invalid.signed_payload() + assert workspace_intents.validate_workspace_record(signed) is None + + traversal_scope: dict[str, object] = { + "allowed_files": ["../outside.py"], + "allowed_related": [], + "forbidden": [], + } + traversal = _record(scope=traversal_scope) + assert ( + workspace_intents.validate_workspace_record(traversal.signed_payload()) is None + ) + + +def test_workspace_intent_stale_orphan_and_gc( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + expired = _record( + intent_id="intent-expired-001", + expires_delta=timedelta(seconds=-1), + ) + orphaned = _record( + intent_id="intent-orphaned-001", + pid=999999, + start_epoch=101, + ) + active = _record(intent_id="intent-active-001", start_epoch=102) + for record in (expired, orphaned, active): + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + monkeypatch.setattr( + workspace_intents, + "_is_pid_alive", + lambda pid: pid != orphaned.agent_pid, + ) + + assert workspace_intents.stale_reason(expired) == "expired" + assert workspace_intents.stale_reason(orphaned) == "orphaned" + assert workspace_intents.list_workspace_intents(root=tmp_path) == (active,) + + gc_payload = workspace_intents.gc_workspace(root=tmp_path) + assert gc_payload["removed"] == 2 + assert gc_payload["removed_reasons"] == { + expired.intent_id: "expired", + orphaned.intent_id: "orphaned", + } + assert workspace_intents.list_workspace_intents(root=tmp_path) == (active,) + + +def test_workspace_intent_conflict_detection() -> None: + existing = _record() + + hard = workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(existing,), + own_pid=123456, + ) + assert hard[0]["overlap_type"] == "hard" + assert hard[0]["hard_overlap"] == ["pkg/a.py"] + + soft = workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["tests/test_a.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(existing,), + own_pid=123456, + ) + assert soft[0]["overlap_type"] == "soft" + assert soft[0]["soft_overlap"] == ["tests/test_a.py"] + + assert ( + workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(existing,), + own_pid=existing.agent_pid, + ) + == [] + ) From 769e2059d55f024c2ff74db6217983e973ea99da Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 24 May 2026 16:03:00 +0500 Subject: [PATCH 012/318] feat(mcp): add claim guard --- CHANGELOG.md | 5 + README.md | 28 +- codeclone/surfaces/mcp/_claim_guard.py | 543 ++++++++++++++++++ .../mcp/_session_claim_guard_mixin.py | 73 +++ codeclone/surfaces/mcp/_session_shared.py | 4 +- codeclone/surfaces/mcp/server.py | 34 +- codeclone/surfaces/mcp/service.py | 11 + codeclone/surfaces/mcp/session.py | 4 +- docs/README-pypi.md | 8 +- docs/README.md | 5 +- docs/book/01-architecture-map.md | 14 +- docs/book/14-compatibility-and-versioning.md | 6 +- docs/book/20-mcp-interface.md | 13 +- docs/book/24-structural-change-controller.md | 29 +- docs/book/28-claim-guard.md | 77 +++ docs/mcp.md | 9 +- mkdocs.yml | 1 + .../contract_snapshots/mcp_tool_schemas.json | 33 ++ .../public_api_surface.json | 20 + tests/test_mcp_server.py | 20 + tests/test_mcp_service.py | 189 ++++++ 21 files changed, 1083 insertions(+), 43 deletions(-) create mode 100644 codeclone/surfaces/mcp/_claim_guard.py create mode 100644 codeclone/surfaces/mcp/_session_claim_guard_mixin.py create mode 100644 docs/book/28-claim-guard.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 89d15556..ccaed279 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,11 @@ artifacts that compose report provenance, intent scope, blast radius, reviewed findings, structural delta, patch-contract status, human decision points, and claims-not-made without mutating repository state. +- Add MCP `validate_review_claims` as a deterministic, citation-based claim + guard for review text. It flags overclaims such as Security Surfaces called + vulnerabilities, report-only families called CI failures, known findings + called new regressions, dead-code certainty despite runtime reachability + evidence, and fixes claimed before post-patch verification. ### Internal diff --git a/README.md b/README.md index ea17964c..1693df4d 100644 --- a/README.md +++ b/README.md @@ -33,8 +33,8 @@ governance for AI coding agents. In the current v2.1 alpha, CodeClone records the declared intent before the first edit, maps the structural blast radius, verifies explicit before/after runs against the patch contract, and generates auditable review receipts. It also exposes an advisory workspace intent registry so -parallel agents can see overlapping edit scopes before they start. The claim-guard tool is -planned next. +parallel agents can see overlapping edit scopes before they start, and validates cited review +claims against the canonical report so agents do not overstate report-only signals or known debt. **One canonical analysis, many surfaces.** CLI, HTML reports, IDE, and MCP all read the same deterministic facts — for both human reviewers and AI agents. @@ -60,15 +60,15 @@ When an AI agent edits code, CodeClone governs the structural boundary across fi | 3. Map blast radius | `get_blast_radius` | Reverse imports, clone cohorts, dependency cycles, do-not-touch signals | | 4. Check patch contract | `check_patch_contract` | Pre-edit regression budget with headroom; post-edit boundary verification | | 5. Generate receipt | `create_review_receipt` | Auditable artifact linking intent, scope, patch status, and structural delta | -| 6. Validate claims | _planned_ | Cross-check the agent's review text against the canonical report | +| 6. Validate claims | `validate_review_claims` | Cross-check cited review text against canonical report semantics | Every step is deterministic — structural facts from the canonical report, no LLM inference. -The v2.1 alpha ships steps 1–5 as live MCP tools (`manage_change_intent`, `get_blast_radius`, -`check_patch_contract`, `create_review_receipt`) composed over the existing read-only analysis -surface. Claim validation is a planned follow-up in the same controller line. Intent truth is -session-local; workspace coordination records are ephemeral files under `.cache/codeclone/intents/`. -CodeClone still never mutates source files, baselines, reports, or analysis cache data. +The v2.1 alpha ships all six steps as live MCP tools (`manage_change_intent`, `get_blast_radius`, +`check_patch_contract`, `create_review_receipt`, `validate_review_claims`) composed over the +existing read-only analysis surface. Intent truth is session-local; workspace coordination records +are ephemeral files under `.cache/codeclone/intents/`. CodeClone still never mutates source files, +baselines, reports, or analysis cache data. Change controller docs: [Structural Change Controller](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) @@ -81,7 +81,7 @@ Change controller docs: [Structural Change Controller](https://orenlab.github.io - **Blast radius** — structural risk projection: reverse imports, clone cohorts, dependency cycles, do-not-touch signals - **Patch contract** — pre-edit regression budget and post-edit boundary verification over explicit before/after runs - **Review receipt** — auditable artifact linking intent, scope, patch verification, and structural delta -- **Claim guard** _(planned)_ — citation-based validation of review text against the canonical report +- **Claim guard** — citation-based validation of review text against canonical report semantics **Baseline governance** @@ -101,7 +101,7 @@ Change controller docs: [Structural Change Controller](https://orenlab.github.io **Surfaces & integrations** -- **MCP control surface** — 25-tool agent and IDE interface over the same canonical pipeline; read-only by contract +- **MCP control surface** — 26-tool agent and IDE interface over the same canonical pipeline; read-only by contract - **IDE & agent clients** — VS Code extension, Claude Desktop bundle, and Codex plugin over the same MCP contract **Performance** @@ -257,7 +257,7 @@ repos: ## MCP Control Surface -A 25-tool MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. +A 26-tool MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. Read-only for source, baselines, reports, and analysis cache data. The change controller may write ephemeral coordination records under `.cache/codeclone/intents/`. @@ -270,9 +270,9 @@ codeclone-mcp --transport streamable-http ``` The controller tools — `manage_change_intent`, `get_blast_radius`, -`check_patch_contract`, and `create_review_receipt` — are composed over the same -canonical surface to govern the structural boundary of AI-assisted edits. The -claim guard tool is a planned v2.1 follow-up. +`check_patch_contract`, `create_review_receipt`, and `validate_review_claims` — +are composed over the same canonical surface to govern the structural boundary +and review discipline of AI-assisted edits. > [!WARNING] > Analysis tools require an absolute repository root. Relative roots such as `.` are rejected. diff --git a/codeclone/surfaces/mcp/_claim_guard.py b/codeclone/surfaces/mcp/_claim_guard.py new file mode 100644 index 00000000..5ba78536 --- /dev/null +++ b/codeclone/surfaces/mcp/_claim_guard.py @@ -0,0 +1,543 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import re +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Final, Literal + +MAX_REVIEW_CLAIM_TEXT_CHARS: Final = 50_000 +TEXT_WINDOW_RADIUS: Final = 80 +SECURITY_SURFACES_FAMILY: Final = "security_surfaces" + +CitationKind = Literal["finding", "metric_family"] + +SECURITY_OVERCLAIM_KEYWORDS: Final = ( + "vulnerab", + "exploit", + "attack", + "cve", + "threat", + "security flaw", + "security bug", + "security issue", +) +GATE_OVERCLAIM_KEYWORDS: Final = ( + "fail", + "block", + "gate", + "ci ", + "ci-", + "pipeline", + "break build", + "must fix", + "blocking", +) +REGRESSION_OVERCLAIM_KEYWORDS: Final = ( + "new ", + "regress", + "introduc", + "just appeared", + "added", + "caused by", + "broke", +) +DEAD_CODE_CERTAINTY_KEYWORDS: Final = ( + "dead", + "unused", + "unreachable", + "remove", + "delete", + "safe to remove", + "definitely dead", +) +FIX_OVERCLAIM_KEYWORDS: Final = ( + "fixed", + "resolved", + "eliminated", + "removed the", + "cleaned up", + "refactored away", + "no longer", +) + +_UNKNOWN_SHORT_FINDING_RE: Final = re.compile(r"\bF-\d+\b", re.IGNORECASE) +_LITERAL_BOUNDARY_CHARS: Final = r"A-Za-z0-9_:" +_SENTENCE_BOUNDARIES: Final = ".!?\n" + + +@dataclass(frozen=True, slots=True) +class Citation: + cited_id: str + kind: CitationKind + text_window: str + start_offset: int + end_offset: int + + +@dataclass(frozen=True, slots=True) +class Violation: + pattern: str + claim: str + cited_id: str + reason: str + source_flag: str + + +@dataclass(frozen=True, slots=True) +class ReportContext: + findings: Mapping[str, Mapping[str, object]] + short_to_canonical: Mapping[str, str] + reachable_qualnames: frozenset[str] + report_only_families: frozenset[str] + has_comparison_run: bool + metric_families: frozenset[str] + + +def validate_claims( + *, + text: str, + report_context: ReportContext, + require_citations: bool = True, +) -> dict[str, object]: + citations = extract_citations(text, report_context=report_context) + violations = _violations_for_citations( + citations=citations, + report_context=report_context, + ) + warnings = _warnings_for_text( + text=text, + citations=citations, + report_context=report_context, + require_citations=require_citations, + ) + violation_keys = { + (violation.pattern, violation.cited_id, violation.claim) + for violation in violations + } + return { + "valid": len(violations) == 0, + "citations_found": len(citations), + "violations": [_violation_payload(violation) for violation in violations], + "warnings": warnings, + "validated_citations": [ + { + "cited_id": citation.cited_id, + "kind": citation.kind, + "valid": not any( + key[1] == citation.cited_id and key[2] == citation.text_window + for key in violation_keys + ), + } + for citation in citations + ], + } + + +def validate_text_input(text: object) -> str: + if not isinstance(text, str): + raise ValueError("text must be a string.") + cleaned = text.strip() + if not cleaned: + raise ValueError("text must not be empty.") + if len(text) > MAX_REVIEW_CLAIM_TEXT_CHARS: + raise ValueError( + "text exceeds the maximum supported length " + f"({MAX_REVIEW_CLAIM_TEXT_CHARS} characters)." + ) + return text + + +def extract_citations( + text: str, + *, + report_context: ReportContext, +) -> tuple[Citation, ...]: + citations: list[Citation] = [] + known_finding_ids = { + *report_context.findings.keys(), + *report_context.short_to_canonical.keys(), + } + for finding_id in sorted(known_finding_ids): + canonical_id = report_context.short_to_canonical.get(finding_id, finding_id) + if canonical_id not in report_context.findings: + continue + citations.extend( + Citation( + cited_id=canonical_id, + kind="finding", + text_window=text_window(text, match.start(), match.end()), + start_offset=match.start(), + end_offset=match.end(), + ) + for match in _find_literal_matches(text, finding_id) + ) + for family_name in sorted(report_context.metric_families): + pattern = re.compile( + rf"\b{re.escape(family_name)}\b", + flags=re.IGNORECASE, + ) + citations.extend( + Citation( + cited_id=family_name, + kind="metric_family", + text_window=text_window(text, match.start(), match.end()), + start_offset=match.start(), + end_offset=match.end(), + ) + for match in pattern.finditer(text) + ) + return tuple( + sorted( + _dedupe_citations(citations), + key=lambda item: ( + item.start_offset, + item.end_offset, + item.kind, + item.cited_id, + ), + ) + ) + + +def text_window( + text: str, + start_offset: int, + end_offset: int, + *, + radius: int = TEXT_WINDOW_RADIUS, +) -> str: + bound_start = max(0, start_offset - radius) + bound_end = min(len(text), end_offset + radius) + sentence_start = max( + ( + text.rfind(boundary, bound_start, start_offset) + for boundary in _SENTENCE_BOUNDARIES + ), + default=-1, + ) + start = max(bound_start, sentence_start + 1) + sentence_ends = [ + candidate + for boundary in _SENTENCE_BOUNDARIES + if (candidate := text.find(boundary, end_offset, bound_end)) != -1 + ] + end = min(sentence_ends) + 1 if sentence_ends else bound_end + return text[start:end].strip() + + +def _violations_for_citations( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + checks = ( + _check_security_vulnerability_overclaim, + _check_report_only_gate_overclaim, + _check_known_debt_overclaim, + _check_dead_code_reachability_overclaim, + _check_fix_without_verification, + ) + violations: list[Violation] = [] + for check in checks: + violations.extend(check(citations=citations, report_context=report_context)) + return tuple( + sorted( + _dedupe_violations(violations), + key=lambda item: (item.pattern, item.cited_id, item.claim), + ) + ) + + +def _check_security_vulnerability_overclaim( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + violations: list[Violation] = [] + for citation in citations: + if ( + citation.kind != "metric_family" + or citation.cited_id != SECURITY_SURFACES_FAMILY + ): + continue + if not _contains_keyword(citation.text_window, SECURITY_OVERCLAIM_KEYWORDS): + continue + violations.append( + Violation( + pattern="P-1", + claim=citation.text_window, + cited_id=citation.cited_id, + reason=( + "Security Surfaces are report-only trust-boundary inventory, " + "not vulnerability claims." + ), + source_flag="security_surfaces.gate_keys=()", + ) + ) + return tuple(violations) + + +def _check_report_only_gate_overclaim( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + violations: list[Violation] = [] + for citation in citations: + if citation.kind != "metric_family": + continue + if citation.cited_id not in report_context.report_only_families: + continue + if not _contains_keyword(citation.text_window, GATE_OVERCLAIM_KEYWORDS): + continue + violations.append( + Violation( + pattern="P-2", + claim=citation.text_window, + cited_id=citation.cited_id, + reason=( + f"'{citation.cited_id}' is a report-only signal " + "(gate_keys=()). It cannot fail CI or block a pipeline." + ), + source_flag=f"{citation.cited_id}.gate_keys=()", + ) + ) + return tuple(violations) + + +def _check_known_debt_overclaim( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + violations: list[Violation] = [] + for citation in citations: + if citation.kind != "finding": + continue + finding = report_context.findings.get(citation.cited_id) + if finding is None or str(finding.get("novelty", "")) != "known": + continue + if not _contains_keyword(citation.text_window, REGRESSION_OVERCLAIM_KEYWORDS): + continue + violations.append( + Violation( + pattern="P-3", + claim=citation.text_window, + cited_id=citation.cited_id, + reason=( + "This finding has novelty='known'; it exists in baseline " + "and cannot be described as a new regression." + ), + source_flag="finding.novelty='known'", + ) + ) + return tuple(violations) + + +def _check_dead_code_reachability_overclaim( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + violations: list[Violation] = [] + for citation in citations: + if citation.kind != "finding": + continue + finding = report_context.findings.get(citation.cited_id) + if finding is None or not _is_dead_code_finding(citation.cited_id, finding): + continue + if not _contains_keyword(citation.text_window, DEAD_CODE_CERTAINTY_KEYWORDS): + continue + reachable = sorted( + qualname + for qualname in _extract_qualnames_from_finding(citation.cited_id, finding) + if qualname in report_context.reachable_qualnames + ) + if not reachable: + continue + violations.append( + Violation( + pattern="P-4", + claim=citation.text_window, + cited_id=citation.cited_id, + reason=( + f"'{reachable[0]}' has runtime reachability evidence; " + "it must not be claimed as definitely dead code." + ), + source_flag="runtime_reachability.evidence_present", + ) + ) + return tuple(violations) + + +def _check_fix_without_verification( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + if report_context.has_comparison_run: + return () + violations: list[Violation] = [] + for citation in citations: + if citation.kind != "finding" or not _contains_keyword( + citation.text_window, + FIX_OVERCLAIM_KEYWORDS, + ): + continue + violations.append( + Violation( + pattern="P-5", + claim=citation.text_window, + cited_id=citation.cited_id, + reason=( + "Fix claimed but no post-patch analysis run is available. " + "Run analysis after editing and verify the patch contract." + ), + source_flag="session.comparison_run_available=false", + ) + ) + return tuple(violations) + + +def _warnings_for_text( + *, + text: str, + citations: Sequence[Citation], + report_context: ReportContext, + require_citations: bool, +) -> list[dict[str, str]]: + warnings: list[dict[str, str]] = [] + if require_citations and not citations: + warnings.append( + { + "type": "no_citations", + "message": ( + "No known CodeClone finding IDs or metric family citations " + "were found in the text." + ), + } + ) + for match in _UNKNOWN_SHORT_FINDING_RE.finditer(text): + cited_id = match.group(0).upper() + if cited_id not in report_context.short_to_canonical: + warnings.append( + { + "type": "unknown_finding", + "message": ( + f"Finding citation '{cited_id}' is not present in this run." + ), + } + ) + return warnings + + +def _find_literal_matches(text: str, literal: str) -> tuple[re.Match[str], ...]: + pattern = re.compile( + rf"(? bool: + lowered = text.casefold() + return any(keyword.casefold() in lowered for keyword in keywords) + + +def _dedupe_citations(citations: Sequence[Citation]) -> tuple[Citation, ...]: + seen: set[tuple[str, str, int, int]] = set() + deduped: list[Citation] = [] + for citation in citations: + key = ( + citation.kind, + citation.cited_id.casefold(), + citation.start_offset, + citation.end_offset, + ) + if key in seen: + continue + seen.add(key) + deduped.append(citation) + return tuple(deduped) + + +def _dedupe_violations(violations: Sequence[Violation]) -> tuple[Violation, ...]: + seen: set[tuple[str, str, str, str]] = set() + deduped: list[Violation] = [] + for violation in violations: + key = ( + violation.pattern, + violation.cited_id, + violation.claim, + violation.source_flag, + ) + if key in seen: + continue + seen.add(key) + deduped.append(violation) + return tuple(deduped) + + +def _violation_payload(violation: Violation) -> dict[str, str]: + return { + "pattern": violation.pattern, + "claim": violation.claim, + "cited_id": violation.cited_id, + "reason": violation.reason, + "source_flag": violation.source_flag, + } + + +def _is_dead_code_finding( + finding_id: str, + finding: Mapping[str, object], +) -> bool: + return ( + finding_id.startswith("dead_code:") + or str(finding.get("family", "")) == "dead_code" + or str(finding.get("category", "")) == "dead_code" + ) + + +def _extract_qualnames_from_finding( + finding_id: str, + finding: Mapping[str, object], +) -> frozenset[str]: + qualnames: set[str] = set() + _collect_qualname_fields(finding, qualnames) + for item in _as_sequence(finding.get("items")): + if isinstance(item, Mapping): + _collect_qualname_fields(item, qualnames) + if finding_id.startswith("dead_code:"): + _, _, remainder = finding_id.partition(":") + if remainder: + qualnames.add(remainder) + return frozenset(sorted(qualnames)) + + +def _collect_qualname_fields( + payload: Mapping[str, object], + qualnames: set[str], +) -> None: + for field_name in ( + "qualname", + "target_qualname", + "symbol", + "name", + "subject_key", + ): + value = str(payload.get(field_name, "")).strip() + if value: + qualnames.add(value) + + +def _as_sequence(value: object) -> Sequence[object]: + return value if isinstance(value, Sequence) and not isinstance(value, str) else () diff --git a/codeclone/surfaces/mcp/_session_claim_guard_mixin.py b/codeclone/surfaces/mcp/_session_claim_guard_mixin.py new file mode 100644 index 00000000..64900c3a --- /dev/null +++ b/codeclone/surfaces/mcp/_session_claim_guard_mixin.py @@ -0,0 +1,73 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ...metrics.registry import METRIC_FAMILIES +from . import _session_helpers as _helpers +from ._claim_guard import ( + ReportContext, + validate_claims, + validate_text_input, +) +from ._session_review_receipt_mixin import _MCPSessionReviewReceiptMixin +from ._session_shared import MCPRunRecord, MCPServiceContractError + + +class _MCPSessionClaimGuardMixin(_MCPSessionReviewReceiptMixin): + def validate_review_claims( + self, + *, + text: str, + run_id: str | None = None, + require_citations: bool = True, + ) -> dict[str, object]: + try: + validated_text = validate_text_input(text) + except ValueError as exc: + raise MCPServiceContractError(str(exc)) from exc + record = self._runs.get(run_id) + context = self._claim_guard_context(record) + payload = validate_claims( + text=validated_text, + report_context=context, + require_citations=bool(require_citations), + ) + return {"run_id": _helpers._short_run_id(record.run_id), **payload} + + def _claim_guard_context(self, record: MCPRunRecord) -> ReportContext: + _canonical_to_short, short_to_canonical = self._finding_id_maps(record) + findings = { + canonical_id: dict(finding) + for finding in self._base_findings(record) + if (canonical_id := str(finding.get("id", "")).strip()) + } + return ReportContext( + findings=findings, + short_to_canonical=short_to_canonical, + reachable_qualnames=self._reachable_qualnames(record), + report_only_families=frozenset( + sorted( + family.name + for family in METRIC_FAMILIES.values() + if not family.gate_keys + ) + ), + has_comparison_run=self._previous_run_for_root(record) is not None, + metric_families=frozenset(sorted(METRIC_FAMILIES)), + ) + + def _reachable_qualnames(self, record: MCPRunRecord) -> frozenset[str]: + project_metrics = record.project_metrics + if project_metrics is None: + return frozenset() + return frozenset( + sorted( + str(getattr(fact, "target_qualname", "")).strip() + for fact in getattr(project_metrics, "runtime_reachability", ()) + if str(getattr(fact, "target_qualname", "")).strip() + ) + ) diff --git a/codeclone/surfaces/mcp/_session_shared.py b/codeclone/surfaces/mcp/_session_shared.py index 41f77cdd..5aec3cef 100644 --- a/codeclone/surfaces/mcp/_session_shared.py +++ b/codeclone/surfaces/mcp/_session_shared.py @@ -761,7 +761,8 @@ class MCPHelpTopicSpec: ), ( "After editing, re-run analysis, check intent scope, verify " - "the patch contract, and clear the intent." + "the patch contract, validate review claims, and clear the " + "intent." ), ( "Use reset_workspace for interrupted own, expired, or orphaned " @@ -773,6 +774,7 @@ class MCPHelpTopicSpec: "analyze_repository", "get_blast_radius", "check_patch_contract", + "validate_review_claims", "create_review_receipt", ), doc_links=(_CHANGE_CONTROL_DOC_LINK, _MCP_INTERFACE_DOC_LINK), diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 76738ea9..27b18435 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -47,10 +47,11 @@ "absolute repository root to analysis tools. For file edits, call " "manage_change_intent(action='list_workspace', root=...) before analysis, " "then analyze, declare intent, inspect blast radius and patch budget, edit " - "within scope, re-analyze, verify, and clear intent. If concurrent intents " - "overlap, narrow scope or coordinate. This server never updates baselines " - "and never mutates source files, analysis cache, or reports; it may write " - "ephemeral workspace coordination state under .cache/codeclone/intents/." + "within scope, re-analyze, verify, validate review claims, and clear intent. " + "If concurrent intents overlap, narrow scope or coordinate. This server never " + "updates baselines and never mutates source files, analysis cache, or reports; " + "it may write ephemeral workspace coordination state under " + ".cache/codeclone/intents/." ) _MCP_INSTALL_HINT = ( "CodeClone MCP support requires the optional 'mcp' extra. " @@ -438,6 +439,31 @@ def create_review_receipt( include_patch_contract=include_patch_contract, ) + @tool( + title="Validate Review Claims", + description=( + "Validate cited review text against canonical report semantics. " + "Detects deterministic mischaracterizations: Security Surfaces " + "called vulnerabilities, report-only signals called CI failures, " + "known baseline debt called new regressions, dead code claimed " + "where runtime reachability evidence exists, and fixes claimed " + "without post-patch verification. Structural citation matching; " + "not NLP." + ), + annotations=read_only_tool, + structured_output=True, + ) + def validate_review_claims( + text: str, + run_id: str | None = None, + require_citations: bool = True, + ) -> dict[str, object]: + return service.validate_review_claims( + text=text, + run_id=run_id, + require_citations=require_citations, + ) + @tool( title="Help", description=( diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index 0966032a..869b7058 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -76,6 +76,12 @@ def create_review_receipt( ) -> dict[str, object]: return self._run_dict("create_review_receipt", **params) + def validate_review_claims( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("validate_review_claims", **params) + def generate_pr_summary( self: _RunDictService, **params: object, @@ -252,6 +258,11 @@ def _apply_public_method_signatures() -> None: _kwonly("include_blast_radius", "bool", True), _kwonly("include_patch_contract", "bool", True), ), + "validate_review_claims": ( + _kwonly("text", "str"), + _kwonly("run_id", "str | None", None), + _kwonly("require_citations", "bool", True), + ), "manage_change_intent": ( _kwonly("action", "str"), _kwonly("run_id", "str | None", None), diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 118c728d..e553079c 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -19,7 +19,7 @@ resolve_clone_baseline_state, resolve_metrics_baseline_state, ) -from ._session_review_receipt_mixin import _MCPSessionReviewReceiptMixin +from ._session_claim_guard_mixin import _MCPSessionClaimGuardMixin from ._session_shared import ( _REPORT_DUMMY_PATH, DEFAULT_BLOCK_MIN_LOC, @@ -80,7 +80,7 @@ ] -class MCPSession(_MCPSessionReviewReceiptMixin): +class MCPSession(_MCPSessionClaimGuardMixin): def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._runs = CodeCloneMCPRunStore(history_limit=history_limit) self._state_lock = RLock() diff --git a/docs/README-pypi.md b/docs/README-pypi.md index a2cbaccb..02c38ea3 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -31,7 +31,7 @@ CodeClone is a structural change controller for Python. The v2.1 alpha starts before the first edit — when an agent declares what it intends to change — maps the structural blast radius, and verifies explicit before/after runs against the patch contract. It also generates auditable review receipts; the -claim-guard tool is planned next. +claim guard validates cited review claims against canonical report semantics. The same analysis pipeline powers CLI reports, CI checks, the MCP server, and native IDE/agent clients — so humans and AI agents operate on identical, @@ -50,7 +50,7 @@ When an AI agent edits code, CodeClone governs the structural boundary: 2. **Map blast radius** — reverse imports, clone cohorts, dependency cycles, do-not-touch signals 3. **Check patch contract** — pre-edit regression budget and post-edit boundary verification 4. **Generate receipt** — auditable artifact: intent + scope + patch status + structural delta -5. **Validate claims** — planned cross-check of review text against the canonical report +5. **Validate claims** — citation-based cross-check of review text against the canonical report Each step is deterministic — structural facts, no LLM inference. @@ -63,7 +63,7 @@ Docs: 'dict[str, object]'" }, + { + "name": "check_patch_contract", + "signature": "(self, *, mode: 'str', run_id: 'str | None' = None, before_run_id: 'str | None' = None, after_run_id: 'str | None' = None, intent_id: 'str | None' = None, strictness: 'str' = 'ci', diff_ref: 'str | None' = None, changed_files: 'Sequence[str] | None' = None) -> 'dict[str, object]'" + }, { "name": "clear_session_runs", "signature": "(self) -> 'dict[str, object]'" @@ -43,6 +47,10 @@ "name": "compare_runs", "signature": "(self, *, run_id_before: 'str', run_id_after: 'str | None' = None, focus: 'ComparisonFocus' = 'all') -> 'dict[str, object]'" }, + { + "name": "create_review_receipt", + "signature": "(self, *, run_id: 'str | None' = None, intent_id: 'str | None' = None, format: 'str' = 'markdown', include_blast_radius: 'bool' = True, include_patch_contract: 'bool' = True) -> 'dict[str, object]'" + }, { "name": "evaluate_gates", "signature": "(self, request: 'MCPGateRequest') -> 'dict[str, object]'" @@ -51,6 +59,10 @@ "name": "generate_pr_summary", "signature": "(self, *, run_id: 'str | None' = None, changed_paths: 'Sequence[str]' = (), git_diff_ref: 'str | None' = None, format: 'PRSummaryFormat' = 'markdown') -> 'dict[str, object]'" }, + { + "name": "get_blast_radius", + "signature": "(self, *, files: 'Sequence[str]', run_id: 'str | None' = None, depth: 'str' = 'direct', include: 'Sequence[str] | None' = None) -> 'dict[str, object]'" + }, { "name": "get_finding", "signature": "(self, *, finding_id: 'str', run_id: 'str | None' = None, detail_level: 'DetailLevel' = 'normal') -> 'dict[str, object]'" @@ -87,6 +99,10 @@ "name": "list_reviewed_findings", "signature": "(self, *, run_id: 'str | None' = None) -> 'dict[str, object]'" }, + { + "name": "manage_change_intent", + "signature": "(self, *, action: 'str', run_id: 'str | None' = None, intent_id: 'str | None' = None, scope: 'dict[str, object] | None' = None, intent: 'str | None' = None, expected_effects: 'Sequence[str] | None' = None, diff_ref: 'str | None' = None, changed_files: 'Sequence[str] | None' = None, root: 'str | None' = None, ttl_seconds: 'int | None' = None) -> 'dict[str, object]'" + }, { "name": "mark_finding_reviewed", "signature": "(self, *, finding_id: 'str', run_id: 'str | None' = None, note: 'str | None' = None) -> 'dict[str, object]'" @@ -94,6 +110,10 @@ { "name": "read_resource", "signature": "(self, uri: 'str') -> 'str'" + }, + { + "name": "validate_review_claims", + "signature": "(self, *, text: 'str', run_id: 'str | None' = None, require_citations: 'bool' = True) -> 'dict[str, object]'" } ] } diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 866b23fe..83a5c360 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -119,6 +119,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: server.instructions ) assert ".cache/codeclone/intents/" in str(server.instructions) + assert "validate review claims" in str(server.instructions) tools = {tool.name: tool for tool in asyncio.run(server.list_tools())} assert set(tools) == { @@ -131,6 +132,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "get_blast_radius", "check_patch_contract", "create_review_receipt", + "validate_review_claims", "evaluate_gates", "get_report_section", "list_findings", @@ -165,6 +167,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "get_blast_radius", "check_patch_contract", "create_review_receipt", + "validate_review_claims", "evaluate_gates", "help", "get_report_section", @@ -210,6 +213,10 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: assert "mode='budget'" in str(tools["check_patch_contract"].description) assert "auditable review receipt" in str(tools["create_review_receipt"].description) assert "claims-not-made" in str(tools["create_review_receipt"].description) + assert "Structural citation matching" in str( + tools["validate_review_claims"].description + ) + assert "not NLP" in str(tools["validate_review_claims"].description) assert "list_workspace" in str(tools["manage_change_intent"].description) assert ".cache/codeclone/intents/" in str(tools["manage_change_intent"].description) assert "bounded guidance, not a full manual" in str(tools["help"].description) @@ -630,6 +637,17 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) ) ) + claim_guard = _structured_tool_result( + asyncio.run( + server.call_tool( + "validate_review_claims", + { + "run_id": run_id, + "text": "security_surfaces is boundary inventory.", + }, + ) + ) + ) assert complexity["check"] == "complexity" assert cast(int, clones["total"]) >= 1 assert coupling["check"] == "coupling" @@ -645,6 +663,8 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: assert reviewed_finding["locations"] == summary_finding["locations"] assert "## CodeClone Summary" in str(pr_summary["content"]) assert receipt["format"] == "markdown" + assert claim_guard["valid"] is True + assert claim_guard["citations_found"] == 1 assert "## CodeClone Agent Review Receipt" in str(receipt["content"]) receipt_payload = cast("dict[str, object]", receipt["receipt"]) assert cast("dict[str, object]", receipt_payload["scope"])["intent_id"] == ( diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 2e84bd19..0bd657d5 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -20,6 +20,7 @@ import pytest import codeclone.surfaces.mcp._blast_radius as mcp_blast_radius_mod +import codeclone.surfaces.mcp._claim_guard as mcp_claim_guard_mod import codeclone.surfaces.mcp._patch_contract as mcp_patch_contract_mod import codeclone.surfaces.mcp._review_receipt as mcp_review_receipt_mod import codeclone.surfaces.mcp._session_baseline as mcp_baseline_mod @@ -318,6 +319,58 @@ def _blast_radius_run_record( ) +def _claim_guard_context( + *, has_comparison_run: bool = False +) -> mcp_claim_guard_mod.ReportContext: + findings: dict[str, dict[str, object]] = { + "clone:function:g1": { + "id": "clone:function:g1", + "family": "clone", + "category": "function", + "novelty": "new", + }, + "clone:function:g2": { + "id": "clone:function:g2", + "family": "clone", + "category": "function", + "novelty": "known", + }, + "dead_code:pkg.routes:handler": { + "id": "dead_code:pkg.routes:handler", + "family": "dead_code", + "category": "dead_code", + "novelty": "new", + "items": [{"qualname": "pkg.routes:handler"}], + }, + } + return mcp_claim_guard_mod.ReportContext( + findings=findings, + short_to_canonical={ + "F-1": "clone:function:g1", + "F-2": "clone:function:g2", + "F-3": "dead_code:pkg.routes:handler", + }, + reachable_qualnames=frozenset({"pkg.routes:handler"}), + report_only_families=frozenset({"overloaded_modules", "security_surfaces"}), + has_comparison_run=has_comparison_run, + metric_families=frozenset( + { + "api_surface", + "cohesion", + "complexity", + "coupling", + "coverage_adoption", + "coverage_join", + "dead_code", + "dependencies", + "health", + "overloaded_modules", + "security_surfaces", + } + ), + ) + + def _patch_contract_report_document( *, digest: str, @@ -2827,6 +2880,142 @@ def test_mcp_service_check_patch_contract_verify_composes_existing_primitives( assert payload["reason"] == reason +def test_claim_guard_detects_deterministic_overclaims() -> None: + payload = mcp_claim_guard_mod.validate_claims( + text=( + "security_surfaces found vulnerabilities. " + "overloaded_modules will fail CI. " + "F-2 is a new regression. " + "F-3 is dead and safe to remove. " + "F-1 fixed the issue." + ), + report_context=_claim_guard_context(), + ) + + violations = cast("list[dict[str, object]]", payload["violations"]) + validated = cast("list[dict[str, object]]", payload["validated_citations"]) + assert payload["valid"] is False + assert {str(item["pattern"]) for item in violations} == { + "P-1", + "P-2", + "P-3", + "P-4", + "P-5", + } + assert payload["citations_found"] == 5 + assert all(not item["valid"] for item in validated) + + +def test_claim_guard_keeps_report_only_and_gate_eligible_semantics_separate() -> None: + payload = mcp_claim_guard_mod.validate_claims( + text=( + "security_surfaces is boundary inventory. " + "coverage_join can fail the coverage hotspot gate. " + "F-2 remains known baseline debt." + ), + report_context=_claim_guard_context(has_comparison_run=True), + ) + + assert payload["valid"] is True + assert payload["violations"] == [] + assert payload["warnings"] == [] + assert payload["citations_found"] == 3 + + +def test_claim_guard_no_citations_warning_and_unknown_short_id() -> None: + warning_payload = mcp_claim_guard_mod.validate_claims( + text="General review text without CodeClone citations.", + report_context=_claim_guard_context(), + ) + relaxed_payload = mcp_claim_guard_mod.validate_claims( + text="General review text without CodeClone citations.", + report_context=_claim_guard_context(), + require_citations=False, + ) + unknown_payload = mcp_claim_guard_mod.validate_claims( + text="F-999 looks new.", + report_context=_claim_guard_context(), + ) + + assert warning_payload["valid"] is True + assert cast("list[dict[str, str]]", warning_payload["warnings"])[0]["type"] == ( + "no_citations" + ) + assert relaxed_payload["warnings"] == [] + unknown_warnings = cast("list[dict[str, str]]", unknown_payload["warnings"]) + assert [item["type"] for item in unknown_warnings] == [ + "no_citations", + "unknown_finding", + ] + + +def test_mcp_service_validate_review_claims_contract(tmp_path: Path) -> None: + service = CodeCloneMCPService(history_limit=4) + record = _blast_radius_run_record(tmp_path, run_id="claimguard1234567890") + service._runs.register(record) + before_state_keys = set(service.__dict__) + before_report = copy.deepcopy(record.report_document) + + text = ( + "clone:function:g2 is a new regression. " + "security_surfaces found vulnerabilities. " + "overloaded_modules will fail CI." + ) + first = service.validate_review_claims(run_id="claimguard", text=text) + second = service.validate_review_claims(run_id="claimguard", text=text) + + assert first == second + assert first["run_id"] == "claimgua" + assert first["valid"] is False + violations = cast("list[dict[str, object]]", first["violations"]) + assert {str(item["pattern"]) for item in violations} == {"P-1", "P-2", "P-3"} + assert record.report_document == before_report + assert set(service.__dict__) == before_state_keys + + +def test_mcp_service_validate_review_claims_fix_verification_and_inputs( + tmp_path: Path, +) -> None: + service = CodeCloneMCPService(history_limit=4) + service._runs.register( + _blast_radius_run_record(tmp_path, run_id="claimbefore123456") + ) + + unverified = service.validate_review_claims( + run_id="claimbefore", + text="clone:function:g1 fixed the issue.", + ) + service._runs.register( + _blast_radius_run_record(tmp_path, run_id="claimafter123456") + ) + verified = service.validate_review_claims( + run_id="claimafter", + text="clone:function:g1 fixed the issue.", + ) + + unverified_violations = cast( + "list[dict[str, object]]", + unverified["violations"], + ) + assert [item["pattern"] for item in unverified_violations] == ["P-5"] + assert verified["violations"] == [] + assert verified["valid"] is True + assert ( + service.validate_review_claims( + run_id="claimafter", + text='{"summary":"security_surfaces is boundary inventory"}', + require_citations=False, + )["valid"] + is True + ) + with pytest.raises(MCPServiceContractError, match="must not be empty"): + service.validate_review_claims(text=" ") + with pytest.raises(MCPServiceContractError, match="maximum supported length"): + service.validate_review_claims(text="x" * 50_001) + with pytest.raises(MCPRunNotFoundError): + service.validate_review_claims(run_id="missing", text="security_surfaces") + + def test_mcp_review_receipt_helpers_are_bounded_and_contract_aware() -> None: assert ( mcp_review_receipt_mod.derive_baseline_status( From 0ed4ec88d26b503c1c4b4fee579260abbafd74b5 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 24 May 2026 16:30:51 +0500 Subject: [PATCH 013/318] feat(mcp): add graceful workspace intent cleanup on process shutdown Zero-trust safe_remove_own_intent() validates path containment, symlink indirection, and file type before unlinking. FastMCP lifespan teardown calls shutdown_cleanup() on stdin EOF; SIGTERM handler converts to SystemExit so async teardown runs. Existing GC remains the fallback for SIGKILL and crashes. --- codeclone/surfaces/mcp/_workspace_intents.py | 68 ++++ codeclone/surfaces/mcp/server.py | 33 +- codeclone/surfaces/mcp/service.py | 27 ++ .../public_api_surface.json | 4 + tests/test_mcp_shutdown.py | 363 ++++++++++++++++++ 5 files changed, 494 insertions(+), 1 deletion(-) create mode 100644 tests/test_mcp_shutdown.py diff --git a/codeclone/surfaces/mcp/_workspace_intents.py b/codeclone/surfaces/mcp/_workspace_intents.py index 248c50bd..99544e57 100644 --- a/codeclone/surfaces/mcp/_workspace_intents.py +++ b/codeclone/surfaces/mcp/_workspace_intents.py @@ -497,6 +497,73 @@ def _unlink(path: Path) -> bool: return True +def _is_safe_intent_path(expected: Path, registry: Path) -> bool: + """Return True only if *expected* is safe to delete. + + Checks (all must pass): + 1. *expected* is an absolute path. + 2. *expected* resolves to itself — no symlink indirection. + 3. Resolved path is strictly inside *registry* directory. + 4. Filename matches the ``{pid}-{start_epoch}-{intent_id}.json`` pattern. + 5. Target is a regular file (not a directory, device, or pipe). + """ + try: + if not expected.is_absolute(): + return False + resolved = expected.resolve(strict=False) + resolved_registry = registry.resolve(strict=False) + if resolved != expected: + return False + if not resolved.is_relative_to(resolved_registry): + return False + name = expected.name + if not name.endswith(".json") or name.count("-") < 2: + return False + if expected.exists() and not expected.is_file(): + return False + except (OSError, ValueError): + return False + return True + + +def safe_remove_own_intent( + *, + root: Path, + pid: int, + start_epoch: int, + intent_id: str, +) -> bool: + """Remove a workspace intent file ONLY if it belongs to the caller. + + Safety checks (all must pass): + 1. *root* is an absolute path. + 2. Constructed path resolves inside ``registry_dir(root)``. + 3. No symlink indirection (resolved == constructed). + 4. Target is a regular file. + 5. Filename matches expected pattern. + + Returns True if the file was removed or is already absent. + Returns False if any safety check fails (file is NOT removed). + Never raises. + """ + try: + if not root.is_absolute(): + return False + registry = registry_dir(root) + expected = intent_path( + root=root, + pid=pid, + start_epoch=start_epoch, + intent_id=intent_id, + ) + if not _is_safe_intent_path(expected, registry): + return False + expected.unlink(missing_ok=True) + except Exception: + return False + return True + + def _record_sort_key(record: WorkspaceIntentRecord) -> tuple[str, int, str]: return (record.declared_at_utc, record.agent_pid, record.intent_id) @@ -629,6 +696,7 @@ def _overlap_type(*, hard: bool, soft: bool) -> str: "remove_workspace_intent", "remove_workspace_record", "resolved_ttl_seconds", + "safe_remove_own_intent", "stale_reason", "update_workspace_intent_status", "utc_now", diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 27b18435..b438f9eb 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -9,7 +9,8 @@ import argparse import ipaddress import sys -from collections.abc import Callable +from collections.abc import AsyncIterator, Callable +from contextlib import asynccontextmanager from typing import TYPE_CHECKING, Literal, TypeVar from ... import __version__ @@ -147,9 +148,16 @@ def build_mcp_server( runtime_fastmcp, read_only_tool, analysis_tool, session_tool = _load_mcp_runtime() service = CodeCloneMCPService(history_limit=_validated_history_limit(history_limit)) + + @asynccontextmanager + async def _lifespan(_app: FastMCP) -> AsyncIterator[dict[str, object]]: + yield {} + service.shutdown_cleanup() + mcp = runtime_fastmcp( name="CodeClone", instructions=_SERVER_INSTRUCTIONS, + lifespan=_lifespan, website_url=DOCS_URL, host=host, port=port, @@ -1142,7 +1150,30 @@ def _host_is_loopback(host: str) -> bool: return False +def _install_sigterm_handler() -> None: + """Convert SIGTERM to SystemExit so async teardown runs. + + Python's default SIGTERM handler (SIG_DFL) terminates the process + immediately — no ``finally`` blocks, no ``atexit``, no async + context manager teardown. By raising :class:`SystemExit`, the + event loop unwinds normally and the FastMCP lifespan teardown + (which cleans workspace intent files) gets a chance to execute. + + Only installed on platforms that support SIGTERM (not Windows). + """ + import signal as _signal + + if not hasattr(_signal, "SIGTERM"): + return # pragma: no cover + + def _handler(_signum: int, _frame: object) -> None: + raise SystemExit(0) + + _signal.signal(_signal.SIGTERM, _handler) + + def main() -> None: + _install_sigterm_handler() args = build_parser().parse_args() if ( args.transport == "streamable-http" diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index 869b7058..266de2d0 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -8,10 +8,12 @@ import inspect from typing import Protocol +from ._workspace_intents import safe_remove_own_intent from .session import ( DEFAULT_MCP_HISTORY_LIMIT, MCPAnalysisRequest, MCPGateRequest, + MCPServiceContractError, MCPSession, ) from .tools._base import run_kw @@ -152,6 +154,31 @@ def clear_session_runs(self) -> dict[str, object]: def read_resource(self, uri: str) -> str: return self._session_cls.read_resource(self, uri) + def shutdown_cleanup(self) -> None: + """Best-effort cleanup of workspace intent files owned by this process. + + Called from FastMCP lifespan teardown at process exit. Removes + only files that THIS process created — identified by matching + PID + start_epoch + intent_id. Never raises. Does not write to + stdout/stderr (the pipe may already be closed). + """ + try: + with self._state_lock: + snapshot = dict(self._active_intents) + for intent_id, intent in snapshot.items(): + try: + run = self._runs.get(intent.run_id) + except (MCPServiceContractError, Exception): + continue + safe_remove_own_intent( + root=run.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent_id, + ) + except Exception: + pass + _EMPTY = inspect.Signature.empty diff --git a/tests/fixtures/contract_snapshots/public_api_surface.json b/tests/fixtures/contract_snapshots/public_api_surface.json index 73cb5105..e880bcd9 100644 --- a/tests/fixtures/contract_snapshots/public_api_surface.json +++ b/tests/fixtures/contract_snapshots/public_api_surface.json @@ -111,6 +111,10 @@ "name": "read_resource", "signature": "(self, uri: 'str') -> 'str'" }, + { + "name": "shutdown_cleanup", + "signature": "(self) -> 'None'" + }, { "name": "validate_review_claims", "signature": "(self, *, text: 'str', run_id: 'str | None' = None, require_citations: 'bool' = True) -> 'dict[str, object]'" diff --git a/tests/test_mcp_shutdown.py b/tests/test_mcp_shutdown.py new file mode 100644 index 00000000..17104ae6 --- /dev/null +++ b/tests/test_mcp_shutdown.py @@ -0,0 +1,363 @@ +"""Tests for Phase 7 — graceful MCP process shutdown. + +Validates that ``safe_remove_own_intent`` enforces zero-trust path +safety, and that ``CodeCloneMCPService.shutdown_cleanup`` removes only +files owned by the current process. +""" + +from __future__ import annotations + +import os +from datetime import timedelta +from pathlib import Path + +import pytest + +from codeclone.surfaces.mcp import _workspace_intents as workspace_intents +from codeclone.surfaces.mcp._workspace_intents import ( + WorkspaceIntentRecord, + _is_safe_intent_path, + intent_path, + registry_dir, + safe_remove_own_intent, + write_workspace_intent, +) +from codeclone.surfaces.mcp.service import CodeCloneMCPService +from codeclone.surfaces.mcp.session import MCPAnalysisRequest + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _record( + *, + intent_id: str = "intent-abcdef12-001", + pid: int | None = None, + start_epoch: int = 100, +) -> WorkspaceIntentRecord: + declared_at = workspace_intents.utc_now() + scope_payload: dict[str, object] = { + "allowed_files": ["pkg/a.py"], + "allowed_related": ["tests/test_a.py"], + "forbidden": [".cache/codeclone/**", "codeclone.baseline.json"], + } + return WorkspaceIntentRecord( + intent_id=intent_id, + agent_pid=pid or os.getpid(), + agent_start_epoch=start_epoch, + agent_label="test-agent", + run_id="abcdef1234567890", + declared_at_utc=workspace_intents.format_utc(declared_at), + expires_at_utc=workspace_intents.format_utc(declared_at + timedelta(hours=1)), + ttl_seconds=3600, + status="active", + intent="test intent", + scope=scope_payload, + scope_digest=workspace_intents.compute_scope_digest(scope_payload), + blast_radius_summary={"radius_level": "low"}, + ) + + +def _svc() -> CodeCloneMCPService: + return CodeCloneMCPService(history_limit=5) + + +def _analysis_request(root: str) -> MCPAnalysisRequest: + return MCPAnalysisRequest(root=root) + + +# --------------------------------------------------------------------------- +# _is_safe_intent_path +# --------------------------------------------------------------------------- + + +def test_safe_path_accepts_valid_intent_path(tmp_path: Path) -> None: + registry = registry_dir(tmp_path) + registry.mkdir(parents=True, exist_ok=True) + expected = intent_path( + root=tmp_path, + pid=123, + start_epoch=456, + intent_id="intent-aaa-001", + ) + assert _is_safe_intent_path(expected, registry) is True + + +def test_safe_path_rejects_relative(tmp_path: Path) -> None: + registry = registry_dir(tmp_path) + assert ( + _is_safe_intent_path( + Path("relative/123-456-intent-aaa-001.json"), + registry, + ) + is False + ) + + +@pytest.mark.parametrize( + "target_relative_to", + ["outside", "inside"], + ids=["symlink-outside-registry", "symlink-inside-registry"], +) +def test_safe_path_rejects_symlink( + tmp_path: Path, + target_relative_to: str, +) -> None: + registry = registry_dir(tmp_path) + registry.mkdir(parents=True, exist_ok=True) + parent = tmp_path if target_relative_to == "outside" else registry + target = parent / "real-target.json" + target.write_text("{}") + symlink = registry / "123-456-intent-aaa-001.json" + symlink.symlink_to(target) + assert _is_safe_intent_path(symlink, registry) is False + + +def test_safe_path_rejects_directory(tmp_path: Path) -> None: + registry = registry_dir(tmp_path) + registry.mkdir(parents=True, exist_ok=True) + (registry / "123-456-intent-aaa-001.json").mkdir() + assert ( + _is_safe_intent_path( + registry / "123-456-intent-aaa-001.json", + registry, + ) + is False + ) + + +def test_safe_path_rejects_outside_registry(tmp_path: Path) -> None: + registry = registry_dir(tmp_path) + outside = tmp_path / "123-456-intent-aaa-001.json" + assert _is_safe_intent_path(outside, registry) is False + + +def test_safe_path_rejects_non_json_extension(tmp_path: Path) -> None: + registry = registry_dir(tmp_path) + assert _is_safe_intent_path(registry / "123-456-x.txt", registry) is False + + +def test_safe_path_rejects_filename_without_dashes(tmp_path: Path) -> None: + registry = registry_dir(tmp_path) + assert _is_safe_intent_path(registry / "nodashes.json", registry) is False + + +# --------------------------------------------------------------------------- +# safe_remove_own_intent +# --------------------------------------------------------------------------- + + +def test_safe_remove_own_file(tmp_path: Path) -> None: + pid, epoch = os.getpid(), 100 + intent_id = "intent-abcdef12-001" + record = _record(pid=pid, start_epoch=epoch, intent_id=intent_id) + assert write_workspace_intent(root=tmp_path, record=record) + path = intent_path(root=tmp_path, pid=pid, start_epoch=epoch, intent_id=intent_id) + assert path.exists() + assert safe_remove_own_intent( + root=tmp_path, + pid=pid, + start_epoch=epoch, + intent_id=intent_id, + ) + assert not path.exists() + + +def test_safe_remove_missing_file_returns_true(tmp_path: Path) -> None: + registry_dir(tmp_path).mkdir(parents=True, exist_ok=True) + assert safe_remove_own_intent( + root=tmp_path, + pid=1, + start_epoch=1, + intent_id="intent-gone-001", + ) + + +def test_safe_remove_does_not_touch_foreign_pid(tmp_path: Path) -> None: + foreign_pid, own_pid = 999999, os.getpid() + intent_id = "intent-foreign-001" + record = _record(pid=foreign_pid, start_epoch=200, intent_id=intent_id) + assert write_workspace_intent(root=tmp_path, record=record) + foreign_path = intent_path( + root=tmp_path, + pid=foreign_pid, + start_epoch=200, + intent_id=intent_id, + ) + assert foreign_path.exists() + safe_remove_own_intent( + root=tmp_path, + pid=own_pid, + start_epoch=200, + intent_id=intent_id, + ) + assert foreign_path.exists() + + +def test_safe_remove_does_not_touch_foreign_epoch(tmp_path: Path) -> None: + pid = os.getpid() + intent_id = "intent-epoch-001" + record = _record(pid=pid, start_epoch=200, intent_id=intent_id) + assert write_workspace_intent(root=tmp_path, record=record) + real_path = intent_path( + root=tmp_path, + pid=pid, + start_epoch=200, + intent_id=intent_id, + ) + assert real_path.exists() + safe_remove_own_intent( + root=tmp_path, + pid=pid, + start_epoch=999, + intent_id=intent_id, + ) + assert real_path.exists() + + +def test_safe_remove_rejects_relative_root() -> None: + assert ( + safe_remove_own_intent( + root=Path("relative/path"), + pid=1, + start_epoch=1, + intent_id="intent-rel-001", + ) + is False + ) + + +def test_safe_remove_rejects_symlink_escape(tmp_path: Path) -> None: + registry = registry_dir(tmp_path) + registry.mkdir(parents=True, exist_ok=True) + outside = tmp_path / "outside-secret.json" + outside.write_text("important data") + pid, epoch = os.getpid(), 100 + intent_id = "intent-sym-001" + symlink = intent_path( + root=tmp_path, pid=pid, start_epoch=epoch, intent_id=intent_id + ) + symlink.symlink_to(outside) + assert ( + safe_remove_own_intent( + root=tmp_path, + pid=pid, + start_epoch=epoch, + intent_id=intent_id, + ) + is False + ) + assert outside.exists() + assert symlink.is_symlink() + + +def test_safe_remove_rejects_directory_target(tmp_path: Path) -> None: + registry = registry_dir(tmp_path) + registry.mkdir(parents=True, exist_ok=True) + pid, epoch = os.getpid(), 100 + intent_id = "intent-dir-001" + dir_path = intent_path( + root=tmp_path, + pid=pid, + start_epoch=epoch, + intent_id=intent_id, + ) + dir_path.mkdir() + assert ( + safe_remove_own_intent( + root=tmp_path, + pid=pid, + start_epoch=epoch, + intent_id=intent_id, + ) + is False + ) + assert dir_path.is_dir() + + +# --------------------------------------------------------------------------- +# shutdown_cleanup — integration +# --------------------------------------------------------------------------- + + +def test_shutdown_cleanup_removes_own_intents(tmp_path: Path) -> None: + svc = _svc() + run_id = str(svc.analyze_repository(_analysis_request(str(tmp_path)))["run_id"]) + decl = svc.manage_change_intent( + action="declare", + run_id=run_id, + root=str(tmp_path), + scope={"allowed_files": ["pkg/a.py"], "allowed_related": [], "forbidden": []}, + intent="test shutdown cleanup", + ) + path = intent_path( + root=tmp_path, + pid=svc._agent_pid, + start_epoch=svc._agent_start_epoch, + intent_id=str(decl["intent_id"]), + ) + assert path.exists() + svc.shutdown_cleanup() + assert not path.exists() + + +def test_shutdown_cleanup_noop_without_intents() -> None: + _svc().shutdown_cleanup() # must not raise + + +def test_shutdown_cleanup_is_idempotent(tmp_path: Path) -> None: + svc = _svc() + run_id = str(svc.analyze_repository(_analysis_request(str(tmp_path)))["run_id"]) + svc.manage_change_intent( + action="declare", + run_id=run_id, + root=str(tmp_path), + scope={"allowed_files": ["pkg/b.py"], "allowed_related": [], "forbidden": []}, + intent="idempotent test", + ) + svc.shutdown_cleanup() + svc.shutdown_cleanup() # second call — no error + + +def test_shutdown_cleanup_skips_on_run_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + svc = _svc() + run_id = str(svc.analyze_repository(_analysis_request(str(tmp_path)))["run_id"]) + svc.manage_change_intent( + action="declare", + run_id=run_id, + root=str(tmp_path), + scope={"allowed_files": ["pkg/c.py"], "allowed_related": [], "forbidden": []}, + intent="error test", + ) + monkeypatch.setattr( + svc._runs, + "get", + lambda *_a, **_kw: (_ for _ in ()).throw(RuntimeError("boom")), + ) + svc.shutdown_cleanup() # must not raise + + +# --------------------------------------------------------------------------- +# SIGTERM handler +# --------------------------------------------------------------------------- + + +def test_sigterm_handler_raises_system_exit() -> None: + import signal + + from codeclone.surfaces.mcp.server import _install_sigterm_handler + + old = signal.getsignal(signal.SIGTERM) + try: + _install_sigterm_handler() + handler = signal.getsignal(signal.SIGTERM) + assert handler is not signal.SIG_DFL + with pytest.raises(SystemExit) as exc_info: + handler(signal.SIGTERM, None) # type: ignore[misc,operator] + assert exc_info.value.code == 0 + finally: + signal.signal(signal.SIGTERM, old) From 4f86373667547ccc6dec08e273cc4a22b05ac671 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 24 May 2026 21:27:41 +0500 Subject: [PATCH 014/318] feat(cli): add change controller query modes --- CHANGELOG.md | 5 + README.md | 7 +- codeclone/config/spec.py | 27 ++ codeclone/surfaces/cli/blast_radius.py | 242 ++++++++++++++ codeclone/surfaces/cli/changed_scope.py | 8 +- codeclone/surfaces/cli/execution.py | 43 ++- codeclone/surfaces/cli/patch_verify.py | 297 ++++++++++++++++++ codeclone/surfaces/cli/types.py | 3 + codeclone/surfaces/cli/workflow.py | 177 +++++++++-- codeclone/ui_messages/__init__.py | 56 ++++ docs/README-pypi.md | 5 + docs/book/03-contracts-exit-codes.md | 3 + docs/book/09-cli.md | 17 + docs/book/24-structural-change-controller.md | 50 ++- .../fixtures/contract_snapshots/cli_help.txt | 15 +- tests/test_cli_blast_radius.py | 107 +++++++ tests/test_cli_patch_verify.py | 193 ++++++++++++ tests/test_cli_unit.py | 3 + 18 files changed, 1200 insertions(+), 58 deletions(-) create mode 100644 codeclone/surfaces/cli/blast_radius.py create mode 100644 codeclone/surfaces/cli/patch_verify.py create mode 100644 tests/test_cli_blast_radius.py create mode 100644 tests/test_cli_patch_verify.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ccaed279..9dbf52ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,9 @@ vulnerabilities, report-only families called CI failures, known findings called new regressions, dead-code certainty despite runtime reachability evidence, and fixes claimed before post-patch verification. +- Add CLI controller query modes: `--blast-radius FILE [FILE...]` for + terminal pre-change boundary review and `--patch-verify` for trusted-baseline + patch verification with `ci`, `strict`, and `relaxed` profiles. ### Internal @@ -42,6 +45,8 @@ pruning cannot drop the declared before-run before verification. - Mark the package as `2.1.0a1` with the PyPI alpha classifier while v2.1 controller features are under development. +- Keep CLI controller query modes read-only by skipping baseline, report, and + analysis-cache writes. ## [2.0.2] - 2026-05-19 diff --git a/README.md b/README.md index 1693df4d..118d43ff 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ combines clone detection, code-quality metrics, and baseline-aware CI gating wit governance for AI coding agents. In the current v2.1 alpha, CodeClone records the declared intent before the first edit, maps the -structural blast radius, verifies explicit before/after runs against the patch contract, and +structural blast radius, verifies patches against the patch contract, and generates auditable review receipts. It also exposes an advisory workspace intent registry so parallel agents can see overlapping edit scopes before they start, and validates cited review claims against the canonical report so agents do not overstate report-only signals or known debt. @@ -82,6 +82,7 @@ Change controller docs: [Structural Change Controller](https://orenlab.github.io - **Patch contract** — pre-edit regression budget and post-edit boundary verification over explicit before/after runs - **Review receipt** — auditable artifact linking intent, scope, patch verification, and structural delta - **Claim guard** — citation-based validation of review text against canonical report semantics +- **CLI controller queries** — `--blast-radius` before edits and `--patch-verify` before push **Baseline governance** @@ -172,6 +173,10 @@ codeclone . --changed-only --diff-against main # shorthand: diff source for changed-scope review codeclone . --paths-from-git-diff HEAD~1 + +# structural change controller queries +codeclone . --blast-radius codeclone/core/parser.py +codeclone . --patch-verify --diff-against HEAD~1 ```
diff --git a/codeclone/config/spec.py b/codeclone/config/spec.py index 798e2bf4..23e74807 100644 --- a/codeclone/config/spec.py +++ b/codeclone/config/spec.py @@ -231,6 +231,33 @@ def _option( metavar="GIT_REF", help_text=ui.HELP_PATHS_FROM_GIT_DIFF, ), + _option( + dest="blast_radius", + group="Analysis", + cli_kind="value", + flags=("--blast-radius",), + default=None, + nargs="+", + metavar="FILE", + help_text=ui.HELP_BLAST_RADIUS, + ), + _option( + dest="patch_verify", + group="Analysis", + cli_kind="store_true", + flags=("--patch-verify",), + default=False, + help_text=ui.HELP_PATCH_VERIFY, + ), + _option( + dest="strictness", + group="Analysis", + cli_kind="value", + flags=("--strictness",), + default="ci", + metavar="LEVEL", + help_text=ui.HELP_STRICTNESS, + ), _option( dest="cache_path", group="Analysis", diff --git a/codeclone/surfaces/cli/blast_radius.py b/codeclone/surfaces/cli/blast_radius.py new file mode 100644 index 00000000..6a0ce17e --- /dev/null +++ b/codeclone/surfaces/cli/blast_radius.py @@ -0,0 +1,242 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sys +from collections.abc import Mapping, Sequence +from pathlib import Path, PurePosixPath + +from ... import ui_messages as ui +from ...contracts import ExitCode +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence +from ..mcp._blast_radius import BlastRadiusResult, compute_blast_radius +from .types import PrinterLike + +_RISK_STYLES = { + "low": "green", + "medium": "yellow", + "high": "bold red", + "critical": "bold white on red", +} +_MAX_RENDERED_ITEMS = 20 + + +def _report_run_id(report_document: Mapping[str, object]) -> str: + integrity = _as_mapping(report_document.get("integrity")) + digest = _as_mapping(integrity.get("digest")) + value = str(digest.get("value", "")).strip() + return value or "cli-blast-radius" + + +def _inventory_paths(report_document: Mapping[str, object]) -> frozenset[str]: + inventory = _as_mapping(report_document.get("inventory")) + file_registry = _as_mapping(inventory.get("file_registry")) + return frozenset( + str(item).replace("\\", "/").strip("/") + for item in _as_sequence(file_registry.get("items")) + if str(item).strip() + ) + + +def _normalize_cli_path(raw_path: object) -> str: + text = str(raw_path).replace("\\", "/").strip() + if not text: + raise ValueError("empty path") + if Path(text).is_absolute(): + raise ValueError("absolute paths are not accepted") + normalized = str(PurePosixPath(text)) + parts = PurePosixPath(normalized).parts + if normalized in {"", "."} or any(part == ".." for part in parts): + raise ValueError("paths must stay inside the scan root") + return normalized.removeprefix("./").strip("/") + + +def _validated_origin_paths( + *, + report_document: Mapping[str, object], + files: Sequence[object], + console: PrinterLike, + quiet: bool, +) -> tuple[str, ...]: + known_paths = _inventory_paths(report_document) + valid: set[str] = set() + skipped: list[str] = [] + invalid: list[str] = [] + for raw_path in files: + try: + relative_path = _normalize_cli_path(raw_path) + except ValueError as exc: + invalid.append(f"{raw_path}: {exc}") + continue + if relative_path not in known_paths: + skipped.append(relative_path) + continue + valid.add(relative_path) + + if invalid: + rendered = "\n".join(f" - {item}" for item in invalid[:10]) + if len(invalid) > 10: + rendered += f"\n ... and {len(invalid) - 10} more" + console.print( + ui.fmt_contract_error("Invalid --blast-radius path selection:\n" + rendered) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + if skipped and not quiet: + rendered = ", ".join(skipped[:5]) + if len(skipped) > 5: + rendered += f", ... and {len(skipped) - 5} more" + console.print( + ui.fmt_cli_runtime_warning( + f"Blast radius skipped files outside analysis inventory: {rendered}" + ) + ) + + if not valid: + console.print( + ui.fmt_contract_error( + "--blast-radius requires at least one file from the analysis inventory." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + return tuple(sorted(valid)) + + +def _style(value: str, *, styles: Mapping[str, str]) -> str: + style = styles.get(value, "") + return f"[{style}]{value}[/{style}]" if style else value + + +def _print_items( + *, + console: PrinterLike, + title: str, + items: Sequence[str], +) -> None: + console.print(f" [bold]{title} ({len(items)}):[/bold]") + if not items: + console.print(" [dim]none[/dim]") + return + for item in items[:_MAX_RENDERED_ITEMS]: + console.print(f" {item}") + if len(items) > _MAX_RENDERED_ITEMS: + console.print(f" [dim]... and {len(items) - _MAX_RENDERED_ITEMS} more[/dim]") + + +def _print_entries( + *, + console: PrinterLike, + title: str, + entries: Sequence[Mapping[str, str]], +) -> None: + console.print(f" [bold]{title} ({len(entries)}):[/bold]") + if not entries: + console.print(" [dim]none[/dim]") + return + for entry in entries[:_MAX_RENDERED_ITEMS]: + path = str(entry.get("path", "")).strip() + reason = str(entry.get("reason", "")).strip() + severity = str(entry.get("severity", "")).strip() + suffix = f" [{severity}]" if severity else "" + console.print(f" {path} [dim]{reason}{suffix}[/dim]") + if len(entries) > _MAX_RENDERED_ITEMS: + console.print( + f" [dim]... and {len(entries) - _MAX_RENDERED_ITEMS} more[/dim]" + ) + + +def _contract_error_result(*, console: PrinterLike, message: str) -> int: + console.print(ui.fmt_contract_error(message)) + return int(ExitCode.CONTRACT_ERROR) + + +def _render_quiet_result(*, console: PrinterLike, result: BlastRadiusResult) -> int: + console.print( + ui.fmt_blast_radius_compact( + level=result.radius_level, + dependents=len(result.direct_dependents), + cohorts=len(result.clone_cohort_members), + cycles=len(result.in_dependency_cycle), + do_not_touch=len(result.do_not_touch), + ) + ) + return int(ExitCode.SUCCESS) + + +def render_blast_radius( + *, + console: PrinterLike, + report_document: Mapping[str, object] | None, + files: Sequence[object], + root_path: Path, + quiet: bool, +) -> int: + _ = root_path + if report_document is None: + return _contract_error_result( + console=console, + message="Blast radius requires a canonical report document.", + ) + return int(ExitCode.CONTRACT_ERROR) + + origin_paths = _validated_origin_paths( + report_document=report_document, + files=files, + console=console, + quiet=quiet, + ) + result = compute_blast_radius( + run_id=_report_run_id(report_document), + report_document=report_document, + files=origin_paths, + ) + + if quiet: + return _render_quiet_result(console=console, result=result) + + console.print() + console.print(f"[bold]{ui.BLAST_RADIUS_TITLE}[/bold]") + console.print() + console.print(f" [bold]Files:[/bold] {', '.join(result.origin)}") + console.print( + f" [bold]Risk level:[/bold] {_style(result.radius_level, styles=_RISK_STYLES)}" + ) + console.print() + _print_items( + console=console, + title="Direct dependents", + items=result.direct_dependents, + ) + _print_items( + console=console, + title="Clone cohort members", + items=result.clone_cohort_members, + ) + _print_items( + console=console, + title="Dependency cycles", + items=result.in_dependency_cycle, + ) + _print_entries( + console=console, + title="Do not touch", + entries=result.do_not_touch, + ) + _print_entries( + console=console, + title="Review context", + entries=result.review_context, + ) + if result.guardrails: + console.print(" [bold]Guardrails:[/bold]") + for guardrail in result.guardrails: + console.print(f" - {guardrail}") + return int(ExitCode.SUCCESS) + + +__all__ = ["render_blast_radius"] diff --git a/codeclone/surfaces/cli/changed_scope.py b/codeclone/surfaces/cli/changed_scope.py index 4a47d231..6cba7ea3 100644 --- a/codeclone/surfaces/cli/changed_scope.py +++ b/codeclone/surfaces/cli/changed_scope.py @@ -29,6 +29,8 @@ def _validate_changed_scope_args(*, args: object) -> str | None: console = require_status_console(cli_state.get_console()) diff_against = optional_text_attr(args, "diff_against") paths_from_git_diff = optional_text_attr(args, "paths_from_git_diff") + if bool_attr(args, "blast_radius"): + return None if diff_against and paths_from_git_diff: console.print( ui.fmt_contract_error( @@ -39,7 +41,11 @@ def _validate_changed_scope_args(*, args: object) -> str | None: if paths_from_git_diff: set_bool_attr(args, "changed_only", True) return paths_from_git_diff - if diff_against and not bool_attr(args, "changed_only"): + if ( + diff_against + and not bool_attr(args, "changed_only") + and not bool_attr(args, "patch_verify") + ): console.print(ui.fmt_contract_error("--diff-against requires --changed-only.")) sys.exit(ExitCode.CONTRACT_ERROR) if bool_attr(args, "changed_only") and not diff_against: diff --git a/codeclone/surfaces/cli/execution.py b/codeclone/surfaces/cli/execution.py index f4445ba1..d58711f5 100644 --- a/codeclone/surfaces/cli/execution.py +++ b/codeclone/surfaces/cli/execution.py @@ -35,6 +35,22 @@ from .types import require_status_console +def _save_cache_after_analysis( + *, + cache: Cache, + analysis_result: AnalysisResult, + cache_update_segment_projection_fn: Callable[[Cache, AnalysisResult], None], + printer: object, +) -> None: + cache_update_segment_projection_fn(cache, analysis_result) + try: + cache.save() + except CacheError as exc: + require_status_console(printer).print( + ui.fmt_cli_runtime_warning(ui.fmt_cache_save_failed(exc)) + ) + + def run_analysis_stages( *, args: object, @@ -65,6 +81,9 @@ def _require_rich_console(value: object) -> RichConsole: printer = require_status_console(cli_state.get_console()) use_status = not bool_attr(args, "quiet") and not bool_attr(args, "no_progress") + write_cache = not ( + bool_attr(args, "blast_radius") or bool_attr(args, "patch_verify") + ) try: if use_status: @@ -154,22 +173,26 @@ def _require_rich_console(value: object) -> RichConsole: discovery=discovery_result, processing=processing_result, ) - cache_update_segment_projection_fn(cache, analysis_result) - try: - cache.save() - except CacheError as exc: - printer.print(ui.fmt_cli_runtime_warning(ui.fmt_cache_save_failed(exc))) + if write_cache: + _save_cache_after_analysis( + cache=cache, + analysis_result=analysis_result, + cache_update_segment_projection_fn=cache_update_segment_projection_fn, + printer=printer, + ) else: analysis_result = analyze_fn( boot=boot, discovery=discovery_result, processing=processing_result, ) - cache_update_segment_projection_fn(cache, analysis_result) - try: - cache.save() - except CacheError as exc: - printer.print(ui.fmt_cli_runtime_warning(ui.fmt_cache_save_failed(exc))) + if write_cache: + _save_cache_after_analysis( + cache=cache, + analysis_result=analysis_result, + cache_update_segment_projection_fn=cache_update_segment_projection_fn, + printer=printer, + ) coverage_join = getattr(analysis_result, "coverage_join", None) if ( diff --git a/codeclone/surfaces/cli/patch_verify.py b/codeclone/surfaces/cli/patch_verify.py new file mode 100644 index 00000000..f2977c65 --- /dev/null +++ b/codeclone/surfaces/cli/patch_verify.py @@ -0,0 +1,297 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence + +from ... import ui_messages as ui +from ...contracts import ExitCode +from ...core._types import AnalysisResult +from ...report.gates.evaluator import ( + GateResult, + GateState, + MetricGateConfig, + evaluate_gate_state, + gate_state_from_project_metrics, +) +from ...utils.coerce import as_int as _as_int +from ..mcp._patch_contract import ( + VALID_STRICTNESS_PROFILES, + StrictnessProfile, + budgets_for_strictness, +) +from .baseline_state import CloneBaselineState +from .post_run import DiffContext +from .types import CLIArgsLike, PrinterLike + +_STATUS_STYLES = { + "accepted": "bold green", + "violated": "bold red", + "unverified": "yellow", +} + + +def validate_strictness(value: str) -> StrictnessProfile: + if value not in VALID_STRICTNESS_PROFILES: + expected = ", ".join(sorted(VALID_STRICTNESS_PROFILES)) + raise ValueError(f"Invalid --strictness value: {value!r}. Expected {expected}.") + if value == "strict": + return "strict" + if value == "relaxed": + return "relaxed" + return "ci" + + +def _threshold_or_none(value: object) -> int | None: + threshold = _as_int(value, -1) + return threshold if threshold >= 0 else None + + +def _health_after(analysis: AnalysisResult) -> int: + metrics = analysis.project_metrics + if metrics is None: + return 0 + return max(int(metrics.health.total), 0) + + +def _health_delta(metrics_diff: object | None) -> int: + if metrics_diff is None: + return 0 + return _as_int(getattr(metrics_diff, "health_delta", 0), 0) + + +def _metric_gate_config( + *, + args: CLIArgsLike, + strictness: StrictnessProfile, +) -> MetricGateConfig: + if strictness == "ci": + return MetricGateConfig( + fail_complexity=int(args.fail_complexity), + fail_coupling=int(args.fail_coupling), + fail_cohesion=int(args.fail_cohesion), + fail_cycles=bool(args.fail_cycles), + fail_dead_code=bool(args.fail_dead_code), + fail_health=int(args.fail_health), + fail_on_new_metrics=bool(args.fail_on_new_metrics), + fail_on_typing_regression=bool(args.fail_on_typing_regression), + fail_on_docstring_regression=bool(args.fail_on_docstring_regression), + fail_on_api_break=bool(args.fail_on_api_break), + fail_on_untested_hotspots=bool(args.fail_on_untested_hotspots), + min_typing_coverage=int(args.min_typing_coverage), + min_docstring_coverage=int(args.min_docstring_coverage), + coverage_min=int(args.coverage_min), + fail_on_new=True, + fail_threshold=-1, + ) + + budgets = budgets_for_strictness( + strictness=strictness, + coverage_min=int(args.coverage_min), + complexity_threshold=_threshold_or_none(args.fail_complexity), + coupling_threshold=_threshold_or_none(args.fail_coupling), + cohesion_threshold=_threshold_or_none(args.fail_cohesion), + ) + return MetricGateConfig( + fail_complexity=budgets.complexity_delta, + fail_coupling=budgets.coupling_delta, + fail_cohesion=budgets.cohesion_delta, + fail_cycles=budgets.dependency_cycle, + fail_dead_code=budgets.dead_code_regression, + fail_health=budgets.health_floor, + fail_on_new_metrics=( + budgets.typing_regression + or budgets.docstring_regression + or budgets.api_break + ), + fail_on_typing_regression=budgets.typing_regression, + fail_on_docstring_regression=budgets.docstring_regression, + fail_on_api_break=budgets.api_break, + fail_on_untested_hotspots=budgets.coverage_hotspot, + min_typing_coverage=int(args.min_typing_coverage), + min_docstring_coverage=int(args.min_docstring_coverage), + coverage_min=budgets.coverage_min, + fail_on_new=budgets.clone_regression == 0, + fail_threshold=-1, + ) + + +def _gate_state( + *, + analysis: AnalysisResult, + diff_context: DiffContext, +) -> GateState: + clone_total = analysis.func_clones_count + analysis.block_clones_count + if analysis.project_metrics is None: + return GateState( + clone_new_count=diff_context.new_clones_count, + clone_total=clone_total, + ) + return gate_state_from_project_metrics( + project_metrics=analysis.project_metrics, + coverage_join=analysis.coverage_join, + metrics_diff=diff_context.metrics_diff, + clone_new_count=diff_context.new_clones_count, + clone_total=clone_total, + ) + + +def _evaluate_patch_gates( + *, + args: CLIArgsLike, + strictness: StrictnessProfile, + analysis: AnalysisResult, + diff_context: DiffContext, +) -> GateResult: + return evaluate_gate_state( + state=_gate_state(analysis=analysis, diff_context=diff_context), + config=_metric_gate_config(args=args, strictness=strictness), + ) + + +def _status_text(status: str) -> str: + style = _STATUS_STYLES.get(status) + return f"[{style}]{status}[/{style}]" if style else status + + +def _gate_status(gate_result: GateResult) -> str: + return "FAIL" if gate_result.exit_code != 0 else "pass" + + +def _contract_violations( + *, + diff_context: DiffContext, + gate_result: GateResult, +) -> tuple[str, ...]: + violations: list[str] = [] + if diff_context.new_clones_count > 0: + violations.append("structural_regressions") + if gate_result.exit_code != 0: + violations.append("gate_failures") + return tuple(violations) + + +def _render_reasons( + *, + console: PrinterLike, + title: str, + values: Sequence[str], +) -> None: + console.print(f" [bold]{title}:[/bold]") + if not values: + console.print(" [dim]none[/dim]") + return + for value in values: + console.print(f" - {value}") + + +def render_patch_verify( + *, + console: PrinterLike, + args: CLIArgsLike, + strictness: str, + analysis: AnalysisResult, + diff_context: DiffContext, + baseline_state: CloneBaselineState, + quiet: bool, +) -> int: + try: + validated_strictness = validate_strictness(strictness) + except ValueError as exc: + console.print(ui.fmt_contract_error(str(exc))) + return int(ExitCode.CONTRACT_ERROR) + + if not baseline_state.trusted_for_diff: + console.print( + ui.fmt_contract_error( + "Patch verify requires a trusted baseline. " + "Run codeclone . --update-baseline first." + ) + ) + return int(ExitCode.CONTRACT_ERROR) + + gate_result = _evaluate_patch_gates( + args=args, + strictness=validated_strictness, + analysis=analysis, + diff_context=diff_context, + ) + violations = _contract_violations( + diff_context=diff_context, + gate_result=gate_result, + ) + status = "violated" if violations else "accepted" + exit_code = ( + int(ExitCode.GATING_FAILURE) + if violations and validated_strictness != "relaxed" + else int(ExitCode.SUCCESS) + ) + health_after = _health_after(analysis) + health_before = health_after - _health_delta(diff_context.metrics_diff) + gate_status = _gate_status(gate_result) + + if quiet: + console.print( + ui.fmt_patch_verify_compact( + status=status, + health_before=health_before, + health_after=health_after, + regressions=diff_context.new_clones_count, + gate_status=gate_status, + ) + ) + return exit_code + + from rich.rule import Rule + + console.print() + console.print(Rule(ui.PATCH_VERIFY_TITLE)) + console.print() + console.print(f" [bold]Strictness:[/bold] {validated_strictness}") + console.print(f" [bold]Status:[/bold] {_status_text(status)}") + console.print() + console.print( + f" [bold]Health:[/bold] {health_before} -> {health_after} " + f"(delta: {health_after - health_before})" + ) + console.print() + console.print(" [bold]Structural delta:[/bold]") + console.print(f" Regressions: {diff_context.new_clones_count}") + console.print(" Improvements: 0") + verdict = "regressed" if diff_context.new_clones_count > 0 else "stable" + console.print(f" Verdict: {verdict}") + console.print() + console.print( + f" [bold]Gate preview:[/bold] {gate_status} (exit {gate_result.exit_code})" + ) + if gate_result.reasons: + for reason in gate_result.reasons: + console.print(f" - {reason}") + console.print() + _render_reasons( + console=console, + title="Contract violations", + values=violations, + ) + console.print() + if status == "accepted": + console.print(" [bold green]Patch contract accepted.[/bold green]") + elif validated_strictness == "relaxed": + console.print( + " [yellow]Patch contract has advisory violations " + "but relaxed mode exits 0.[/yellow]" + ) + else: + console.print(" [bold red]Patch contract violated.[/bold red]") + return exit_code + + +__all__ = [ + "VALID_STRICTNESS_PROFILES", + "render_patch_verify", + "validate_strictness", +] diff --git a/codeclone/surfaces/cli/types.py b/codeclone/surfaces/cli/types.py index 3e464b2d..2a913650 100644 --- a/codeclone/surfaces/cli/types.py +++ b/codeclone/surfaces/cli/types.py @@ -78,6 +78,9 @@ class CLIArgsLike(Protocol): changed_only: bool diff_against: str | None paths_from_git_diff: str | None + blast_radius: tuple[str, ...] | list[str] | None + patch_verify: bool + strictness: str skip_metrics: bool skip_dead_code: bool skip_dependencies: bool diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index bed4dd4d..42b7e758 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -43,6 +43,8 @@ from . import state as cli_state from . import summary as cli_summary from . import tips as cli_tips +from .attrs import bool_attr +from .patch_verify import VALID_STRICTNESS_PROFILES from .types import CLIArgsLike, StatusConsole, require_status_console __all__ = [ @@ -68,6 +70,7 @@ "_resolve_metrics_baseline_state", "_rich_progress_symbols", "_run_analysis_stages", + "_validate_controller_query_flags", "_validate_report_ui_flags", "_write_report_outputs", "analyze", @@ -165,6 +168,89 @@ def _make_console(*, no_color: bool) -> object: LEGACY_CACHE_PATH = cli_state.LEGACY_CACHE_PATH +def _controller_query_mode(args: object) -> bool: + return bool_attr(args, "blast_radius") or bool_attr(args, "patch_verify") + + +def _validate_controller_query_flags( + *, + args: object, + report_outputs_requested: bool = False, + strictness_explicit: bool = False, +) -> None: + printer = _console() + blast_radius = bool_attr(args, "blast_radius") + patch_verify = bool_attr(args, "patch_verify") + strictness = str(getattr(args, "strictness", "ci") or "ci") + if strictness not in VALID_STRICTNESS_PROFILES: + expected = ", ".join(sorted(VALID_STRICTNESS_PROFILES)) + printer.print( + ui.fmt_contract_error( + f"Invalid --strictness value: {strictness!r}. Expected {expected}." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + if strictness_explicit and not patch_verify: + printer.print( + ui.fmt_contract_error("--strictness is only valid with --patch-verify.") + ) + sys.exit(ExitCode.CONTRACT_ERROR) + if blast_radius and patch_verify: + printer.print( + ui.fmt_contract_error("Use --blast-radius or --patch-verify, not both.") + ) + sys.exit(ExitCode.CONTRACT_ERROR) + if not (blast_radius or patch_verify): + return + if bool_attr(args, "update_baseline") or bool_attr(args, "update_metrics_baseline"): + printer.print( + ui.fmt_contract_error("Controller query modes cannot update baselines.") + ) + sys.exit(ExitCode.CONTRACT_ERROR) + if report_outputs_requested: + printer.print( + ui.fmt_contract_error( + "Controller query modes are terminal-only and cannot be combined " + "with report output flags." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + +def _run_controller_query( + *, + args: CLIArgsLike, + report_document: dict[str, object] | None, + root_path: Path, + analysis_result: AnalysisResult, + diff_context: cli_post_run.DiffContext, + baseline_state: cli_baseline_state.CloneBaselineState, +) -> int | None: + if bool_attr(args, "blast_radius"): + from .blast_radius import render_blast_radius + + return render_blast_radius( + console=_console(), + report_document=report_document, + files=tuple(getattr(args, "blast_radius", ()) or ()), + root_path=root_path, + quiet=args.quiet, + ) + if not bool_attr(args, "patch_verify"): + return None + from .patch_verify import render_patch_verify + + return render_patch_verify( + console=_console(), + args=args, + strictness=str(getattr(args, "strictness", "ci") or "ci"), + analysis=analysis_result, + diff_context=diff_context, + baseline_state=baseline_state, + quiet=args.quiet, + ) + + def print_banner(*, root: Path | None = None) -> None: _set_console(console) _print_banner_impl(root=root) @@ -273,6 +359,9 @@ def _main_impl() -> None: explicit_cli_dests = collect_explicit_cli_dests(ap, argv=raw_argv) report_path_origins = _report_path_origins(raw_argv) report_generated_at_utc = cli_meta_mod._current_report_timestamp_utc() + strictness_explicit = any( + arg == "--strictness" or arg.startswith("--strictness=") for arg in raw_argv + ) cache_path_from_args = any( arg in {"--cache-dir", "--cache-path"} or arg.startswith(("--cache-dir=", "--cache-path=")) @@ -298,6 +387,10 @@ def _main_impl() -> None: config_values=pyproject_config, explicit_cli_dests=explicit_cli_dests, ) + _validate_controller_query_flags( + args=args, + strictness_explicit=strictness_explicit, + ) git_diff_ref = _validate_changed_scope_args(args=args) changed_paths = ( _git_diff_changed_paths(root_path=root_path, git_diff_ref=git_diff_ref) @@ -337,6 +430,19 @@ def _main_impl() -> None: report_generated_at_utc=report_generated_at_utc, ) _validate_report_ui_flags(args=args, output_paths=output_paths) + _validate_controller_query_flags( + args=args, + report_outputs_requested=bool( + output_paths.html + or output_paths.json + or output_paths.md + or output_paths.sarif + or output_paths.text + or bool_attr(args, "open_html_report") + or bool_attr(args, "timestamped_report_paths") + ), + strictness_explicit=strictness_explicit, + ) cache_path = _resolve_cache_path( root_path=root_path, args=args, @@ -427,35 +533,36 @@ def _main_impl() -> None: discovery_result=discovery_result, processing_result=processing_result, ) - _print_summary( - console=_console(), - quiet=args.quiet, - files_found=discovery_result.files_found, - files_analyzed=processing_result.files_analyzed, - cache_hits=discovery_result.cache_hits, - files_skipped=processing_result.files_skipped, - analyzed_lines=summary_counts["analyzed_lines"], - analyzed_functions=summary_counts["analyzed_functions"], - analyzed_methods=summary_counts["analyzed_methods"], - analyzed_classes=summary_counts["analyzed_classes"], - func_clones_count=analysis_result.func_clones_count, - block_clones_count=analysis_result.block_clones_count, - segment_clones_count=analysis_result.segment_clones_count, - suppressed_golden_fixture_groups=len( - getattr(analysis_result, "suppressed_clone_groups", ()) - ), - suppressed_segment_groups=analysis_result.suppressed_segment_groups, - new_clones_count=diff_context.new_clones_count, - ) - print_metrics_if_available( - args=args, - analysis=analysis_result, - metrics_diff=diff_context.metrics_diff, - api_surface_diff_available=diff_context.api_surface_diff_available, - console=_console(), - build_metrics_snapshot_fn=build_metrics_snapshot, - print_metrics_fn=_print_metrics, - ) + if not _controller_query_mode(args): + _print_summary( + console=_console(), + quiet=args.quiet, + files_found=discovery_result.files_found, + files_analyzed=processing_result.files_analyzed, + cache_hits=discovery_result.cache_hits, + files_skipped=processing_result.files_skipped, + analyzed_lines=summary_counts["analyzed_lines"], + analyzed_functions=summary_counts["analyzed_functions"], + analyzed_methods=summary_counts["analyzed_methods"], + analyzed_classes=summary_counts["analyzed_classes"], + func_clones_count=analysis_result.func_clones_count, + block_clones_count=analysis_result.block_clones_count, + segment_clones_count=analysis_result.segment_clones_count, + suppressed_golden_fixture_groups=len( + getattr(analysis_result, "suppressed_clone_groups", ()) + ), + suppressed_segment_groups=analysis_result.suppressed_segment_groups, + new_clones_count=diff_context.new_clones_count, + ) + print_metrics_if_available( + args=args, + analysis=analysis_result, + metrics_diff=diff_context.metrics_diff, + api_surface_diff_available=diff_context.api_surface_diff_available, + console=_console(), + build_metrics_snapshot_fn=build_metrics_snapshot, + print_metrics_fn=_print_metrics, + ) report_artifacts = report( boot=boot, @@ -469,8 +576,18 @@ def _main_impl() -> None: metrics_diff=diff_context.metrics_diff, coverage_adoption_diff_available=diff_context.coverage_adoption_diff_available, api_surface_diff_available=diff_context.api_surface_diff_available, - include_report_document=bool(changed_paths), + include_report_document=bool(changed_paths) or _controller_query_mode(args), + ) + controller_exit_code = _run_controller_query( + args=args, + report_document=report_artifacts.report_document, + root_path=root_path, + analysis_result=analysis_result, + diff_context=diff_context, + baseline_state=baseline_state, ) + if controller_exit_code is not None: + sys.exit(controller_exit_code) changed_clone_gate = resolve_changed_clone_gate( args=args, report_document=report_artifacts.report_document, diff --git a/codeclone/ui_messages/__init__.py b/codeclone/ui_messages/__init__.py index fa94dafb..27ad98e8 100644 --- a/codeclone/ui_messages/__init__.py +++ b/codeclone/ui_messages/__init__.py @@ -68,6 +68,18 @@ "Shorthand for --changed-only using `git diff --name-only `.\n" "Useful for PR and CI review flows." ) +HELP_BLAST_RADIUS = ( + "Show structural blast radius for the given files.\n" + "Runs analysis first, then projects dependents, clone cohorts,\n" + "risk signals, and do-not-touch boundaries." +) +HELP_PATCH_VERIFY = ( + "Verify the current patch against the trusted baseline budget.\n" + "Runs analysis, checks baseline regressions and gate status, then exits." +) +HELP_STRICTNESS = ( + "Strictness profile for --patch-verify: ci, strict, or relaxed.\nDefault: ci." +) HELP_CACHE_PATH = ( "Path to the cache file.\n" "If FILE is omitted, uses /.cache/codeclone/cache.json." @@ -213,6 +225,8 @@ SUMMARY_TITLE = "Summary" METRICS_TITLE = "Metrics" CHANGED_SCOPE_TITLE = "Changed Scope" +BLAST_RADIUS_TITLE = "Blast Radius" +PATCH_VERIFY_TITLE = "Patch Verify" CLI_LAYOUT_MAX_WIDTH = 80 @@ -253,6 +267,14 @@ SUMMARY_COMPACT_CHANGED_SCOPE = ( "Changed paths={paths} findings={findings} new={new} known={known}" ) +SUMMARY_COMPACT_BLAST_RADIUS = ( + "blast-radius: {level} | dependents={dependents} cohorts={cohorts} " + "cycles={cycles} do-not-touch={do_not_touch}" +) +SUMMARY_COMPACT_PATCH_VERIFY = ( + "patch-verify: {status} | health={health_before}->{health_after} " + "regressions={regressions} gates={gate_status}" +) WARN_SUMMARY_ACCOUNTING_MISMATCH = ( "Summary accounting mismatch: " @@ -940,6 +962,40 @@ def fmt_changed_scope_compact( ) +def fmt_blast_radius_compact( + *, + level: str, + dependents: int, + cohorts: int, + cycles: int, + do_not_touch: int, +) -> str: + return SUMMARY_COMPACT_BLAST_RADIUS.format( + level=level, + dependents=dependents, + cohorts=cohorts, + cycles=cycles, + do_not_touch=do_not_touch, + ) + + +def fmt_patch_verify_compact( + *, + status: str, + health_before: int, + health_after: int, + regressions: int, + gate_status: str, +) -> str: + return SUMMARY_COMPACT_PATCH_VERIFY.format( + status=status, + health_before=health_before, + health_after=health_after, + regressions=regressions, + gate_status=gate_status, + ) + + def fmt_pipeline_done(elapsed: float) -> str: return f" [dim]Pipeline done in {elapsed:.2f}s[/dim]" diff --git a/docs/README-pypi.md b/docs/README-pypi.md index 02c38ea3..4ed57abe 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -32,6 +32,8 @@ before the first edit — when an agent declares what it intends to change — maps the structural blast radius, and verifies explicit before/after runs against the patch contract. It also generates auditable review receipts; the claim guard validates cited review claims against canonical report semantics. +The CLI exposes `--blast-radius` and `--patch-verify` for the same highest-value +controller checks in terminal workflows. The same analysis pipeline powers CLI reports, CI checks, the MCP server, and native IDE/agent clients — so humans and AI agents operate on identical, @@ -64,6 +66,7 @@ Docs: `. Useful for PR and CI review flows. + --blast-radius FILE [FILE ...] + Show structural blast radius for the given files. + Runs analysis first, then projects dependents, clone cohorts, + risk signals, and do-not-touch boundaries. + --patch-verify Verify the current patch against the trusted baseline budget. + Runs analysis, checks baseline regressions and gate status, then exits. + --strictness LEVEL Strictness profile for --patch-verify: ci, strict, or relaxed. + Default: ci. --cache-path [FILE] Path to the cache file. If FILE is omitted, uses /.cache/codeclone/cache.json. --cache-dir [FILE] Legacy alias for --cache-path. diff --git a/tests/test_cli_blast_radius.py b/tests/test_cli_blast_radius.py new file mode 100644 index 00000000..8cb18268 --- /dev/null +++ b/tests/test_cli_blast_radius.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.contracts import ExitCode +from codeclone.surfaces.cli.blast_radius import render_blast_radius + + +class _RecordingPrinter: + def __init__(self) -> None: + self.lines: list[str] = [] + + def print(self, *objects: object, **kwargs: object) -> None: + self.lines.append(" ".join(str(item) for item in objects)) + + @property + def text(self) -> str: + return "\n".join(self.lines) + + +def _report_document() -> dict[str, object]: + return { + "integrity": {"digest": {"value": "a" * 64}}, + "inventory": { + "file_registry": { + "items": ["pkg/a.py", "pkg/b.py", "pkg/c.py"], + }, + }, + "metrics": { + "families": { + "dependencies": { + "items": [{"source": "pkg.b", "target": "pkg.a"}], + "cycles": [], + }, + "complexity": {"items": []}, + "coupling": {"items": []}, + "coverage_join": {"items": []}, + "overloaded_modules": {"items": []}, + "security_surfaces": {"items": []}, + }, + }, + "findings": { + "groups": { + "clones": { + "functions": [], + "blocks": [], + "segments": [], + "suppressed": {}, + }, + "structural": {"groups": []}, + "dead_code": {"groups": []}, + "design": {"groups": []}, + }, + }, + } + + +def test_blast_radius_quiet_output_uses_canonical_projection(tmp_path: Path) -> None: + printer = _RecordingPrinter() + + exit_code = render_blast_radius( + console=printer, + report_document=_report_document(), + files=("pkg/a.py",), + root_path=tmp_path, + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert printer.text == ( + "blast-radius: medium | dependents=1 cohorts=0 cycles=0 do-not-touch=2" + ) + + +def test_blast_radius_rejects_absolute_paths(tmp_path: Path) -> None: + printer = _RecordingPrinter() + + with pytest.raises(SystemExit) as exc: + render_blast_radius( + console=printer, + report_document=_report_document(), + files=(str(tmp_path / "pkg" / "a.py"),), + root_path=tmp_path, + quiet=True, + ) + + assert exc.value.code == int(ExitCode.CONTRACT_ERROR) + assert "CONTRACT ERROR:" in printer.text + assert "absolute paths are not accepted" in printer.text + + +def test_blast_radius_requires_at_least_one_inventory_file(tmp_path: Path) -> None: + printer = _RecordingPrinter() + + with pytest.raises(SystemExit) as exc: + render_blast_radius( + console=printer, + report_document=_report_document(), + files=("pkg/missing.py",), + root_path=tmp_path, + quiet=True, + ) + + assert exc.value.code == int(ExitCode.CONTRACT_ERROR) + assert "--blast-radius requires at least one file" in printer.text diff --git a/tests/test_cli_patch_verify.py b/tests/test_cli_patch_verify.py new file mode 100644 index 00000000..5c2ecbb5 --- /dev/null +++ b/tests/test_cli_patch_verify.py @@ -0,0 +1,193 @@ +from __future__ import annotations + +from argparse import Namespace +from types import SimpleNamespace +from typing import Any, cast + +import pytest + +import codeclone.surfaces.cli.changed_scope as cli_changed_scope +import codeclone.surfaces.cli.workflow as cli_workflow +from codeclone.contracts import ExitCode +from codeclone.core._types import AnalysisResult +from codeclone.surfaces.cli.patch_verify import ( + render_patch_verify, + validate_strictness, +) +from codeclone.surfaces.cli.post_run import DiffContext + + +class _RecordingPrinter: + def __init__(self) -> None: + self.lines: list[str] = [] + + def print(self, *objects: object, **kwargs: object) -> None: + self.lines.append(" ".join(str(item) for item in objects)) + + @property + def text(self) -> str: + return "\n".join(self.lines) + + +def _args(**overrides: object) -> Namespace: + values: dict[str, object] = { + "fail_complexity": -1, + "fail_coupling": -1, + "fail_cohesion": -1, + "fail_cycles": False, + "fail_dead_code": False, + "fail_health": -1, + "fail_on_new_metrics": False, + "fail_on_typing_regression": False, + "fail_on_docstring_regression": False, + "fail_on_api_break": False, + "fail_on_untested_hotspots": False, + "min_typing_coverage": -1, + "min_docstring_coverage": -1, + "coverage_min": 50, + } + values.update(overrides) + return Namespace(**values) + + +def _analysis(*, function_clones: int = 0) -> AnalysisResult: + return AnalysisResult( + func_groups={}, + block_groups={}, + block_groups_report={}, + segment_groups={}, + suppressed_segment_groups=0, + block_group_facts={}, + func_clones_count=function_clones, + block_clones_count=0, + segment_clones_count=0, + files_analyzed_or_cached=1, + project_metrics=None, + metrics_payload=None, + suggestions=(), + segment_groups_raw_digest="", + ) + + +def _diff_context(*, new_clones: int = 0) -> DiffContext: + return DiffContext( + new_func={f"func-{index}" for index in range(new_clones)}, + new_block=set(), + new_clones_count=new_clones, + metrics_diff=None, + coverage_adoption_diff_available=False, + api_surface_diff_available=False, + ) + + +def _baseline_state(*, trusted: bool = True) -> object: + return SimpleNamespace(trusted_for_diff=trusted) + + +def test_patch_verify_accepts_clean_patch_quiet() -> None: + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness="ci", + analysis=_analysis(), + diff_context=_diff_context(), + baseline_state=cast(Any, _baseline_state()), + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert printer.text == ( + "patch-verify: accepted | health=0->0 regressions=0 gates=pass" + ) + + +@pytest.mark.parametrize( + ("strictness", "expected_code", "expected_text"), + [ + ( + "ci", + int(ExitCode.GATING_FAILURE), + "patch-verify: violated | health=0->0 regressions=1 gates=FAIL", + ), + ( + "relaxed", + int(ExitCode.SUCCESS), + "patch-verify: violated | health=0->0 regressions=1 gates=pass", + ), + ], +) +def test_patch_verify_reports_clone_regressions_by_strictness( + strictness: str, + expected_code: int, + expected_text: str, +) -> None: + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness=strictness, + analysis=_analysis(function_clones=1), + diff_context=_diff_context(new_clones=1), + baseline_state=cast(Any, _baseline_state()), + quiet=True, + ) + + assert exit_code == expected_code + assert printer.text == expected_text + + +def test_patch_verify_requires_trusted_baseline() -> None: + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness="ci", + analysis=_analysis(), + diff_context=_diff_context(), + baseline_state=cast(Any, _baseline_state(trusted=False)), + quiet=True, + ) + + assert exit_code == int(ExitCode.CONTRACT_ERROR) + assert "Patch verify requires a trusted baseline" in printer.text + + +def test_patch_verify_validates_strictness_values() -> None: + assert validate_strictness("ci") == "ci" + assert validate_strictness("strict") == "strict" + assert validate_strictness("relaxed") == "relaxed" + with pytest.raises(ValueError, match="Invalid --strictness value"): + validate_strictness("nope") + + +def test_patch_verify_allows_diff_against_without_changed_only() -> None: + cli_workflow.console = cli_workflow._make_plain_console() + args = Namespace( + changed_only=False, + diff_against="HEAD~1", + paths_from_git_diff=None, + patch_verify=True, + blast_radius=None, + ) + + assert cli_changed_scope._validate_changed_scope_args(args=args) == "HEAD~1" + + +def test_controller_query_flags_reject_mutually_exclusive_modes() -> None: + cli_workflow.console = cli_workflow._make_plain_console() + args = Namespace( + blast_radius=("pkg/a.py",), + patch_verify=True, + strictness="ci", + update_baseline=False, + update_metrics_baseline=False, + ) + + with pytest.raises(SystemExit) as exc: + cli_workflow._validate_controller_query_flags(args=args) + + assert exc.value.code == int(ExitCode.CONTRACT_ERROR) diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py index 49fd70aa..0aaab1c2 100644 --- a/tests/test_cli_unit.py +++ b/tests/test_cli_unit.py @@ -544,6 +544,9 @@ def test_cli_help_text_consistency( "--changed-only", "--diff-against GIT_REF", "--paths-from-git-diff GIT_REF", + "--blast-radius FILE [FILE ...]", + "--patch-verify", + "--strictness LEVEL", "Baselines and CI:", "Quality gates:", "Analysis stages:", From 4af34131f2310f08bdad215454aa67513d5b1561 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 24 May 2026 22:24:32 +0500 Subject: [PATCH 015/318] feat(mcp): add lease-based intent recovery --- CHANGELOG.md | 7 + .../surfaces/mcp/_session_intent_mixin.py | 463 +++++++++++++++++- .../mcp/_session_patch_contract_mixin.py | 4 + codeclone/surfaces/mcp/_session_shared.py | 10 +- codeclone/surfaces/mcp/_workspace_intents.py | 274 ++++++++++- codeclone/surfaces/mcp/server.py | 3 +- docs/book/24-structural-change-controller.md | 18 +- tests/test_mcp_server.py | 1 + tests/test_mcp_service.py | 244 ++++++++- tests/test_mcp_shutdown.py | 3 + tests/test_workspace_intents.py | 166 +++++++ 11 files changed, 1141 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9dbf52ab..3100041e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,10 @@ - Add CLI controller query modes: `--blast-radius FILE [FILE...]` for terminal pre-change boundary review and `--patch-verify` for trusted-baseline patch verification with `ci`, `strict`, and `relaxed` profiles. +- Add lease-aware workspace intent recovery for MCP change control. Intent + records now carry renewable ownership leases, `list_workspace` distinguishes + own/recoverable/foreign-active records, and `manage_change_intent` can + explicitly recover stale intents without killing another MCP process. ### Internal @@ -47,6 +51,9 @@ controller features are under development. - Keep CLI controller query modes read-only by skipping baseline, report, and analysis-cache writes. +- Keep workspace intent registry upgrades versioned and backward-readable: + registry v2 records add lease and report-digest fields, while v1 records are + accepted with conservative lease defaults until natural expiry. ## [2.0.2] - 2026-05-19 diff --git a/codeclone/surfaces/mcp/_session_intent_mixin.py b/codeclone/surfaces/mcp/_session_intent_mixin.py index 3a035481..f3107445 100644 --- a/codeclone/surfaces/mcp/_session_intent_mixin.py +++ b/codeclone/surfaces/mcp/_session_intent_mixin.py @@ -8,8 +8,8 @@ import os from collections.abc import Mapping, Sequence -from dataclasses import replace -from datetime import datetime, timezone +from dataclasses import dataclass, replace +from datetime import datetime, timedelta, timezone from fnmatch import fnmatchcase from pathlib import Path @@ -32,18 +32,21 @@ MCPServiceContractError, ) from ._workspace_intents import ( + IntentOwnership, WorkspaceIntentRecord, WorkspaceIntentStatus, + classify_intent_ownership, compute_scope_digest, detect_conflicts, expires_at, find_workspace_intent, format_utc, gc_workspace, - is_orphaned, list_workspace_intents, remove_workspace_intent, remove_workspace_record, + renew_workspace_intent_lease, + resolved_lease_seconds, resolved_ttl_seconds, stale_reason, update_workspace_intent_status, @@ -53,6 +56,19 @@ ) +@dataclass(frozen=True, slots=True) +class _RecoveryTarget: + root_path: Path + workspace_record: WorkspaceIntentRecord + now: datetime + + +@dataclass(frozen=True, slots=True) +class _RecoveryRun: + record: MCPRunRecord + report_digest: str + + class _MCPSessionIntentMixin(_MCPSessionBlastRadiusMixin): _runs: CodeCloneMCPRunStore _active_intents: dict[str, IntentRecord] @@ -61,6 +77,24 @@ class _MCPSessionIntentMixin(_MCPSessionBlastRadiusMixin): _agent_start_epoch: int _agent_label: str + def get_blast_radius( + self, + *, + files: Sequence[str], + run_id: str | None = None, + depth: str = "direct", + include: Sequence[str] | None = None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + payload = super().get_blast_radius( + files=files, + run_id=record.run_id, + depth=depth, + include=include, + ) + self._renew_lease_for_run(record=record) + return payload + def manage_change_intent( self, *, @@ -106,6 +140,12 @@ def manage_change_intent( return self._list_workspace_intents(root=root) case "gc_workspace": return self._gc_workspace_intents(root=root) + case "recover": + return self._recover_change_intent( + root=root, + run_id=run_id, + intent_id=intent_id, + ) case "reset_workspace": return self._reset_workspace_intent( root=root, @@ -116,7 +156,7 @@ def manage_change_intent( raise MCPServiceContractError( "Invalid value for action: " f"{action!r}. Expected one of: check, clear, declare, " - "gc_workspace, get, list_workspace, reset_workspace." + "gc_workspace, get, list_workspace, recover, reset_workspace." ) def _declare_change_intent( @@ -199,6 +239,7 @@ def _declare_change_intent( new_scope=normalized_scope.to_payload(), existing=workspace_existing, own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, ) payload = record_payload.to_payload( short_run_id=_helpers._short_run_id(record.run_id) @@ -228,6 +269,7 @@ def _check_change_intent( run_id=run_id, intent_id=intent_id, ) + self._renew_lease_if_active(record=record, intent=active_intent) if self._is_intent_expired(record=record, intent=active_intent): expired = replace(active_intent, status=IntentStatus.EXPIRED) with self._state_lock: @@ -329,6 +371,8 @@ def _intent_payload_with_expiry( with self._state_lock: self._active_intents[intent.intent_id] = intent self._sync_workspace_intent_status(record=record, intent=intent) + else: + self._renew_lease_if_active(record=record, intent=intent) return intent.to_payload(short_run_id=_helpers._short_run_id(record.run_id)) def _is_intent_expired( @@ -373,6 +417,11 @@ def _workspace_record_from_intent( scope=scope_payload, scope_digest=compute_scope_digest(scope_payload), blast_radius_summary=dict(intent.blast_radius_summary or {}), + lease_renewed_at_utc=format_utc(declared_at), + lease_seconds=resolved_lease_seconds( + env_value=os.environ.get("CODECLONE_INTENT_LEASE_SECONDS"), + ), + report_digest=intent.report_digest, ) def _sync_workspace_intent_status( @@ -389,18 +438,50 @@ def _sync_workspace_intent_status( new_status=intent.status.value, ) + def _renew_lease_if_active( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + ) -> None: + try: + renew_workspace_intent_lease( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent.intent_id, + ) + except Exception: + return + + def _renew_lease_for_run(self, *, record: MCPRunRecord) -> None: + with self._state_lock: + intents = tuple( + intent + for intent in self._active_intents.values() + if intent.run_id == record.run_id + ) + for intent in intents: + self._renew_lease_if_active(record=record, intent=intent) + def _list_workspace_intents(self, *, root: str | None) -> dict[str, object]: root_path = self._resolve_workspace_root(root) counts = workspace_status_counts(root=root_path) - records = list_workspace_intents(root=root_path) + records = list_workspace_intents(root=root_path, exclude_stale=False) + now = utc_now() return { "workspace_intents": [ item.to_payload( own_pid=self._agent_pid, own_start_epoch=self._agent_start_epoch, + now=now, ) for item in records ], + "recovery_available": self._recovery_available_payload( + records=records, + now=now, + ), "stale_count": counts["stale_count"], "orphaned_count": counts["orphaned_count"], "total_agents": len({item.agent_pid for item in records}), @@ -411,6 +492,276 @@ def _list_workspace_intents(self, *, root: str | None) -> dict[str, object]: def _gc_workspace_intents(self, *, root: str | None) -> dict[str, object]: return gc_workspace(root=self._resolve_workspace_root(root)) + def _recover_change_intent( + self, + *, + root: str | None, + run_id: str | None, + intent_id: str | None, + ) -> dict[str, object]: + request_error = self._recovery_required_fields_error( + root=root, + run_id=run_id, + intent_id=intent_id, + ) + if request_error is not None: + return request_error + assert root is not None + assert run_id is not None + assert intent_id is not None + target = self._recovery_target(root=root, intent_id=intent_id) + if isinstance(target, dict): + return target + recovery_run = self._recovery_run(run_id=run_id, target=target) + if isinstance(recovery_run, dict): + return recovery_run + recovered = self._activate_recovered_intent( + target=target, + recovery_run=recovery_run, + ) + if isinstance(recovered, dict): + return recovered + workspace_update = self._rewrite_recovered_workspace_record( + target=target, + recovery_run=recovery_run, + recovered=recovered, + ) + if isinstance(workspace_update, dict): + return workspace_update + recovered_at, previous_removed = workspace_update + return self._recovered_payload( + target=target, + recovery_run=recovery_run, + recovered=recovered, + recovered_at=recovered_at, + previous_removed=previous_removed, + ) + + def _recovery_required_fields_error( + self, + *, + root: str | None, + run_id: str | None, + intent_id: str | None, + ) -> dict[str, object] | None: + if intent_id is None: + return self._recovery_rejected( + intent_id=None, + reason="missing_intent_id", + message="action='recover' requires intent_id.", + ) + if run_id is None: + return self._recovery_rejected( + intent_id=intent_id, + reason="missing_run_id", + message="action='recover' requires run_id.", + ) + if root is None: + return self._recovery_rejected( + intent_id=intent_id, + reason="missing_root", + message="action='recover' requires root.", + ) + return None + + def _recovery_target( + self, + *, + root: str, + intent_id: str, + ) -> _RecoveryTarget | dict[str, object]: + root_path = self._resolve_workspace_root(root) + found = find_workspace_intent(root=root_path, intent_id=intent_id) + if found is None: + return self._recovery_rejected( + intent_id=intent_id, + reason="not_found", + message=f"No workspace intent found for intent_id: {intent_id}.", + ) + _, workspace_record = found + now = utc_now() + ownership = classify_intent_ownership( + workspace_record, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + now=now, + ) + if ownership not in {IntentOwnership.RECOVERABLE, IntentOwnership.OWN_STALE}: + return self._recovery_rejected( + intent_id=intent_id, + reason="not_recoverable", + message=self._recovery_rejection_message(ownership), + details={"ownership": ownership.value}, + ) + return _RecoveryTarget( + root_path=root_path, + workspace_record=workspace_record, + now=now, + ) + + def _recovery_run( + self, + *, + run_id: str, + target: _RecoveryTarget, + ) -> _RecoveryRun | dict[str, object]: + workspace_record = target.workspace_record + try: + record = self._runs.get(run_id) + except MCPRunNotFoundError: + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="run_not_available", + message=( + f"Run {run_id} is not available in this session. " + "Run analyze_repository first." + ), + ) + report_digest = self._report_digest_value(record) + if report_digest != workspace_record.report_digest: + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="report_digest_mismatch", + message=( + "Report digest does not match. The analysis run may have " + "changed since the intent was declared." + ), + details={ + "expected": workspace_record.report_digest, + "actual": report_digest, + }, + ) + if ( + compute_scope_digest(workspace_record.scope) + != workspace_record.scope_digest + ): + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="scope_digest_mismatch", + message="Workspace intent scope digest does not match.", + ) + return _RecoveryRun(record=record, report_digest=report_digest) + + def _activate_recovered_intent( + self, + *, + target: _RecoveryTarget, + recovery_run: _RecoveryRun, + ) -> IntentRecord | dict[str, object]: + workspace_record = target.workspace_record + with self._state_lock: + if workspace_record.intent_id in self._active_intents: + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="already_active", + message=( + f"Intent {workspace_record.intent_id} is already active " + "in this session." + ), + ) + try: + scope = normalize_intent_scope(workspace_record.scope) + except ValueError as exc: + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="invalid_scope", + message=str(exc), + ) + recovered = IntentRecord( + intent_id=workspace_record.intent_id, + run_id=recovery_run.record.run_id, + report_digest=recovery_run.report_digest, + status=IntentStatus.ACTIVE, + declared_at_utc=workspace_record.declared_at_utc, + scope=scope, + intent_description=workspace_record.intent, + expected_effects=(), + guards=DEFAULT_INTENT_GUARDS, + blast_radius_summary=dict(workspace_record.blast_radius_summary), + ) + self._active_intents[workspace_record.intent_id] = recovered + self._runs.pin(recovery_run.record.run_id) + return recovered + + def _rewrite_recovered_workspace_record( + self, + *, + target: _RecoveryTarget, + recovery_run: _RecoveryRun, + recovered: IntentRecord, + ) -> tuple[str, bool] | dict[str, object]: + workspace_record = target.workspace_record + recovered_at = format_utc(target.now) + updated_workspace_record = replace( + workspace_record, + agent_pid=self._agent_pid, + agent_start_epoch=self._agent_start_epoch, + agent_label=self._agent_label, + status=WorkspaceIntentStatus.ACTIVE.value, + lease_renewed_at_utc=recovered_at, + report_digest=recovery_run.report_digest, + ) + if not write_workspace_intent( + root=target.root_path, + record=updated_workspace_record, + ): + self._rollback_recovered_intent(recovered) + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="workspace_rewrite_failed", + message="Failed to rewrite workspace intent owner.", + ) + previous_removed = True + if ( + workspace_record.agent_pid != self._agent_pid + or workspace_record.agent_start_epoch != self._agent_start_epoch + ): + previous_removed = remove_workspace_record( + root=target.root_path, + record=workspace_record, + ) + return recovered_at, previous_removed + + def _rollback_recovered_intent(self, recovered: IntentRecord) -> None: + with self._state_lock: + self._active_intents.pop(recovered.intent_id, None) + self._runs.unpin(recovered.run_id) + + def _recovered_payload( + self, + *, + target: _RecoveryTarget, + recovery_run: _RecoveryRun, + recovered: IntentRecord, + recovered_at: str, + previous_removed: bool, + ) -> dict[str, object]: + workspace_record = target.workspace_record + return { + "intent_id": recovered.intent_id, + "action_taken": "recovered", + "run_id": _helpers._short_run_id(recovery_run.record.run_id), + "scope": recovered.scope.to_payload(), + "previous_owner": { + "agent_pid": workspace_record.agent_pid, + "agent_start_epoch": workspace_record.agent_start_epoch, + "agent_label": workspace_record.agent_label, + "lease_renewed_at_utc": workspace_record.lease_renewed_at_utc, + }, + "new_owner": { + "agent_pid": self._agent_pid, + "agent_start_epoch": self._agent_start_epoch, + "agent_label": self._agent_label, + }, + "recovered_at_utc": recovered_at, + "previous_workspace_record_removed": previous_removed, + "next_steps": [ + "Run manage_change_intent(action='get') to inspect recovered state.", + "Run check_patch_contract(mode='budget') to verify patch budget.", + "Continue editing within declared scope.", + ], + } + def _reset_workspace_intent( self, *, @@ -427,29 +778,42 @@ def _reset_workspace_intent( if found is None: raise MCPServiceContractError(f"Unknown workspace intent id: {intent_id}") _, workspace_record = found - reason = stale_reason(workspace_record) - is_own = ( - workspace_record.agent_pid == self._agent_pid - and workspace_record.agent_start_epoch == self._agent_start_epoch + now = utc_now() + ownership = classify_intent_ownership( + workspace_record, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + now=now, ) - if reason in {"expired", "orphaned"}: + if ownership in {IntentOwnership.EXPIRED, IntentOwnership.RECOVERABLE}: removed = remove_workspace_record(root=root_path, record=workspace_record) + reason = ( + "expired" + if ownership == IntentOwnership.EXPIRED + else stale_reason(workspace_record) or "recoverable" + ) return { "intent_id": workspace_record.intent_id, "action_taken": "removed" if removed else "failed", "reason": reason, } - if not is_own and not is_orphaned(workspace_record): + if ownership == IntentOwnership.FOREIGN_ACTIVE: return { "intent_id": workspace_record.intent_id, "action_taken": "rejected", - "reason": "foreign_live_intent", + "reason": "foreign_active", + "ownership": ownership.value, "agent_pid": workspace_record.agent_pid, "agent_start_epoch": workspace_record.agent_start_epoch, "agent_label": workspace_record.agent_label, + "escalation_hint": ( + "This intent belongs to a live process with a valid lease. " + "Do NOT kill the process. Ask the user to confirm whether " + "this is an abandoned session or a parallel agent." + ), "message": ( - "Intent belongs to a live agent. Coordinate with the owning " - "agent or user before resetting it." + "Intent has a valid lease from a live process. Coordinate " + "with the owning agent or user before resetting it." ), } ttl = resolved_ttl_seconds( @@ -473,6 +837,77 @@ def _reset_workspace_intent( "new_expires_at_utc": latest_record.expires_at_utc, } + def _recovery_available_payload( + self, + *, + records: Sequence[WorkspaceIntentRecord], + now: datetime, + ) -> list[dict[str, object]]: + available: list[dict[str, object]] = [] + for record in records: + ownership = classify_intent_ownership( + record, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + now=now, + ) + if ownership != IntentOwnership.RECOVERABLE: + continue + if self._optional_run_record(record.run_id) is None: + continue + available.append( + { + "intent_id": record.intent_id, + "run_id": _helpers._short_run_id(record.run_id), + "scope_digest": record.scope_digest, + "previous_agent_label": record.agent_label, + "lease_expired_at_utc": self._lease_expired_at_utc(record), + "hint": ("Use action='recover' with matching run_id to reclaim."), + } + ) + return sorted( + available, + key=lambda item: ( + str(item["previous_agent_label"]), + str(item["intent_id"]), + ), + ) + + def _lease_expired_at_utc(self, record: WorkspaceIntentRecord) -> str | None: + renewed_at = _parse_utc(record.lease_renewed_at_utc) + if renewed_at is None: + return None + return format_utc(renewed_at + timedelta(seconds=record.lease_seconds)) + + def _recovery_rejected( + self, + *, + intent_id: str | None, + reason: str, + message: str, + details: Mapping[str, object] | None = None, + ) -> dict[str, object]: + return { + "intent_id": intent_id, + "action_taken": "recovery_rejected", + "reason": reason, + "message": message, + "details": dict(details or {}), + } + + def _recovery_rejection_message(self, ownership: IntentOwnership) -> str: + if ownership == IntentOwnership.FOREIGN_ACTIVE: + return ( + "Intent has a valid lease from a live process. Cannot recover. " + "Use action='list_workspace' to inspect, then coordinate with " + "the user." + ) + if ownership == IntentOwnership.EXPIRED: + return "Intent has expired (TTL). Declare a new intent instead." + if ownership == IntentOwnership.OWN_ACTIVE: + return "Intent is already actively owned by this session." + return "Intent is not recoverable." + def _resolve_workspace_root(self, root: str | None) -> Path: if root is not None: return _helpers._resolve_root(root) diff --git a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py index b0cc2141..62ae33e3 100644 --- a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py +++ b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py @@ -76,6 +76,8 @@ def _patch_contract_budget( ) -> dict[str, object]: record = self._runs.get(run_id) intent = self._optional_intent(record=record, intent_id=intent_id) + if intent is not None: + self._renew_lease_if_active(record=record, intent=intent) budgets = self._budgets_for_record(record=record, strictness=strictness) current_state = self._current_state(record) gate_preview = self._gate_preview(record=record, budgets=budgets) @@ -139,6 +141,8 @@ def _patch_contract_verify( structural_delta=self._structural_delta(compare_payload), ) intent = self._optional_intent(record=before, intent_id=intent_id) + if intent is not None: + self._renew_lease_if_active(record=before, intent=intent) if intent is not None and self._is_intent_expired(record=before, intent=intent): return self._expired_patch_contract( before=before, after=after, intent=intent diff --git a/codeclone/surfaces/mcp/_session_shared.py b/codeclone/surfaces/mcp/_session_shared.py index 5aec3cef..3df54269 100644 --- a/codeclone/surfaces/mcp/_session_shared.py +++ b/codeclone/surfaces/mcp/_session_shared.py @@ -747,6 +747,11 @@ class MCPHelpTopicSpec: "Start with manage_change_intent(action='list_workspace', " "root=...) before analysis so active agents are visible early." ), + ( + "Recover ownership only when list_workspace marks an intent " + "recoverable and the matching run is available; do not kill " + "foreign active MCP processes." + ), ( "Run analyze_repository, then declare intent with allowed_files, " "allowed_related, and forbidden paths before editing." @@ -765,8 +770,9 @@ class MCPHelpTopicSpec: "intent." ), ( - "Use reset_workspace for interrupted own, expired, or orphaned " - "intents; foreign live intents require coordination." + "Use reset_workspace for interrupted own, expired, or " + "recoverable registry records; foreign active intents require " + "coordination." ), ), recommended_tools=( diff --git a/codeclone/surfaces/mcp/_workspace_intents.py b/codeclone/surfaces/mcp/_workspace_intents.py index 99544e57..0a00c8b3 100644 --- a/codeclone/surfaces/mcp/_workspace_intents.py +++ b/codeclone/surfaces/mcp/_workspace_intents.py @@ -19,11 +19,15 @@ from ...cache.integrity import canonical_json from ...utils.json_io import read_json_object, write_json_document_atomically -REGISTRY_VERSION: Final = "1" +LEGACY_REGISTRY_VERSION: Final = "1" +REGISTRY_VERSION: Final = "2" REGISTRY_DIR_PARTS: Final = (".cache", "codeclone", "intents") DEFAULT_TTL_SECONDS: Final = 3600 MIN_TTL_SECONDS: Final = 60 MAX_TTL_SECONDS: Final = 86400 +DEFAULT_LEASE_SECONDS: Final = 300 +MIN_LEASE_SECONDS: Final = 60 +MAX_LEASE_SECONDS: Final = 3600 _HEX_DIGEST_LENGTH: Final = 64 @@ -36,6 +40,14 @@ class WorkspaceIntentStatus(str, Enum): ORPHANED = "orphaned" +class IntentOwnership(str, Enum): + OWN_ACTIVE = "own_active" + OWN_STALE = "own_stale" + RECOVERABLE = "recoverable" + FOREIGN_ACTIVE = "foreign_active" + EXPIRED = "expired" + + @dataclass(frozen=True, slots=True) class WorkspaceIntentRecord: intent_id: str @@ -51,6 +63,9 @@ class WorkspaceIntentRecord: scope: dict[str, object] scope_digest: str blast_radius_summary: dict[str, object] + lease_renewed_at_utc: str + lease_seconds: int + report_digest: str def unsigned_payload(self) -> dict[str, object]: return { @@ -68,6 +83,9 @@ def unsigned_payload(self) -> dict[str, object]: "scope": self.scope, "scope_digest": self.scope_digest, "blast_radius_summary": self.blast_radius_summary, + "lease_renewed_at_utc": self.lease_renewed_at_utc, + "lease_seconds": self.lease_seconds, + "report_digest": self.report_digest, } def signed_payload(self) -> dict[str, object]: @@ -80,14 +98,79 @@ def to_payload( *, own_pid: int | None = None, own_start_epoch: int | None = None, + now: datetime | None = None, ) -> dict[str, object]: - payload = self.unsigned_payload() - payload["is_own"] = self.agent_pid == own_pid and ( - own_start_epoch is None or self.agent_start_epoch == own_start_epoch + current_time = now or utc_now() + ownership = classify_intent_ownership( + self, + own_pid=own_pid or 0, + own_start_epoch=own_start_epoch or 0, + now=current_time, ) + payload = self.unsigned_payload() + payload["ownership"] = ownership.value + payload["is_own"] = ownership in { + IntentOwnership.OWN_ACTIVE, + IntentOwnership.OWN_STALE, + } + lease_expiry = _lease_expiry(self) + if lease_expiry is not None: + remaining = int((lease_expiry - current_time).total_seconds()) + payload["lease_expires_in_seconds"] = max(0, remaining) + if ownership == IntentOwnership.FOREIGN_ACTIVE: + payload["escalation_hint"] = ( + "This intent belongs to a live process with a valid lease. " + "Do NOT kill the process. Ask the user to confirm whether " + "this is an abandoned session or a parallel agent." + ) return payload +def classify_intent_ownership( + record: WorkspaceIntentRecord, + *, + own_pid: int, + own_start_epoch: int, + now: datetime, +) -> IntentOwnership: + expires = _parse_utc(record.expires_at_utc) + if expires is None or expires <= now: + return IntentOwnership.EXPIRED + + is_own = record.agent_pid == own_pid and record.agent_start_epoch == own_start_epoch + lease_expiry = _lease_expiry(record) + lease_valid = lease_expiry is not None and lease_expiry > now + if is_own: + return IntentOwnership.OWN_ACTIVE if lease_valid else IntentOwnership.OWN_STALE + if not lease_valid: + return IntentOwnership.RECOVERABLE + if not _is_pid_alive(record.agent_pid): + return IntentOwnership.RECOVERABLE + return IntentOwnership.FOREIGN_ACTIVE + + +def _lease_expiry(record: WorkspaceIntentRecord) -> datetime | None: + renewed_at = _parse_utc(record.lease_renewed_at_utc) + if renewed_at is None: + return None + return renewed_at + timedelta(seconds=record.lease_seconds) + + +def _is_lease_expired(record: WorkspaceIntentRecord) -> bool: + lease_expiry = _lease_expiry(record) + return lease_expiry is None or lease_expiry <= utc_now() + + +def resolved_lease_seconds(value: object = None, *, env_value: object = None) -> int: + return _resolved_seconds( + value=value, + env_value=env_value, + default=DEFAULT_LEASE_SECONDS, + minimum=MIN_LEASE_SECONDS, + maximum=MAX_LEASE_SECONDS, + ) + + def registry_dir(root: Path) -> Path: return root.joinpath(*REGISTRY_DIR_PARTS) @@ -127,16 +210,33 @@ def format_utc(value: datetime) -> str: def resolved_ttl_seconds(value: object = None, *, env_value: object = None) -> int: + return _resolved_seconds( + value=value, + env_value=env_value, + default=DEFAULT_TTL_SECONDS, + minimum=MIN_TTL_SECONDS, + maximum=MAX_TTL_SECONDS, + ) + + +def _resolved_seconds( + *, + value: object, + env_value: object, + default: int, + minimum: int, + maximum: int, +) -> int: raw = value if value is not None else env_value if raw is None: - return DEFAULT_TTL_SECONDS + return default if isinstance(raw, bool): - return DEFAULT_TTL_SECONDS + return default try: parsed = int(str(raw).strip()) except ValueError: - return DEFAULT_TTL_SECONDS - return min(MAX_TTL_SECONDS, max(MIN_TTL_SECONDS, parsed)) + return default + return min(maximum, max(minimum, parsed)) def expires_at(*, declared_at: datetime, ttl_seconds: int) -> str: @@ -168,7 +268,8 @@ def validate_workspace_record(data: object) -> WorkspaceIntentRecord | None: return None if not verify_intent_integrity(data): return None - if data.get("registry_version") != REGISTRY_VERSION: + version = data.get("registry_version") + if version not in {REGISTRY_VERSION, LEGACY_REGISTRY_VERSION}: return None intent_id = _required_string(data.get("intent_id")) agent_pid = _positive_int(data.get("agent_pid")) @@ -183,22 +284,45 @@ def validate_workspace_record(data: object) -> WorkspaceIntentRecord | None: scope = _valid_scope(data.get("scope")) scope_digest = data.get("scope_digest") blast_radius_summary = _dict_payload(data.get("blast_radius_summary")) - if ( - intent_id is None - or agent_pid is None - or agent_start_epoch is None - or run_id is None - or declared_at_utc is None - or expires_at_utc is None - or ttl_seconds is None - or status not in _valid_status_values() - or intent is None - or scope is None - or not _is_hex_digest(scope_digest) - or blast_radius_summary is None + lease_fields = _lease_fields_for_version( + data=data, + version=str(version), + declared_at_utc=declared_at_utc, + ) + if lease_fields is None: + return None + lease_renewed_at_utc, lease_seconds, report_digest = lease_fields + if _record_required_value_missing( + intent_id, + agent_pid, + agent_start_epoch, + run_id, + declared_at_utc, + expires_at_utc, + ttl_seconds, + intent, + blast_radius_summary, ): return None - if _parse_utc(declared_at_utc) is None or _parse_utc(expires_at_utc) is None: + assert intent_id is not None + assert agent_pid is not None + assert agent_start_epoch is not None + assert run_id is not None + assert declared_at_utc is not None + assert expires_at_utc is not None + assert ttl_seconds is not None + assert intent is not None + assert blast_radius_summary is not None + if status not in _valid_status_values() or scope is None: + return None + assert status is not None + if not _is_hex_digest(scope_digest): + return None + if not _valid_record_dates( + declared_at_utc, + expires_at_utc, + lease_renewed_at_utc, + ): return None if compute_scope_digest(scope) != str(scope_digest): return None @@ -216,9 +340,39 @@ def validate_workspace_record(data: object) -> WorkspaceIntentRecord | None: scope=scope, scope_digest=str(scope_digest), blast_radius_summary=blast_radius_summary, + lease_renewed_at_utc=lease_renewed_at_utc, + lease_seconds=lease_seconds, + report_digest=report_digest, ) +def _lease_fields_for_version( + *, + data: Mapping[str, object], + version: str, + declared_at_utc: str | None, +) -> tuple[str, int, str] | None: + if version == REGISTRY_VERSION: + lease_renewed_at_utc = _required_string(data.get("lease_renewed_at_utc")) + lease_seconds = _valid_lease_seconds(data.get("lease_seconds")) + report_digest = _required_string(data.get("report_digest")) + else: + lease_renewed_at_utc = declared_at_utc + lease_seconds = DEFAULT_LEASE_SECONDS + report_digest = _string_value(data.get("report_digest")) + if lease_renewed_at_utc is None or lease_seconds is None or report_digest is None: + return None + return lease_renewed_at_utc, lease_seconds, report_digest + + +def _record_required_value_missing(*values: object) -> bool: + return any(value is None for value in values) + + +def _valid_record_dates(*values: str) -> bool: + return all(_parse_utc(value) is not None for value in values) + + def write_workspace_intent(*, root: Path, record: WorkspaceIntentRecord) -> bool: try: write_json_document_atomically( @@ -265,6 +419,36 @@ def update_workspace_intent_status( return True +def renew_workspace_intent_lease( + *, + root: Path, + pid: int, + start_epoch: int, + intent_id: str, +) -> bool: + found = find_workspace_intent(root=root, intent_id=intent_id) + if found is None: + return False + path, record = found + if record.agent_pid != pid or record.agent_start_epoch != start_epoch: + return False + now = utc_now() + expires = _parse_utc(record.expires_at_utc) + if expires is None or expires <= now: + return False + updated = replace(record, lease_renewed_at_utc=format_utc(now)) + try: + write_json_document_atomically( + path=path, + document=updated.signed_payload(), + sort_keys=True, + trailing_newline=True, + ) + except OSError: + return False + return True + + def remove_workspace_intent( *, root: Path, @@ -337,11 +521,19 @@ def detect_conflicts( new_scope: Mapping[str, object], existing: Sequence[WorkspaceIntentRecord], own_pid: int, + own_start_epoch: int, ) -> list[dict[str, object]]: new_allowed, new_related = _scope_file_sets(new_scope) conflicts: list[dict[str, object]] = [] + now = utc_now() for record in existing: - if record.agent_pid == own_pid or stale_reason(record) is not None: + ownership = classify_intent_ownership( + record, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + now=now, + ) + if ownership != IntentOwnership.FOREIGN_ACTIVE: continue existing_allowed, existing_related = _scope_file_sets(record.scope) hard_overlap = tuple(sorted(new_allowed.intersection(existing_allowed))) @@ -392,7 +584,7 @@ def gc_workspace(*, root: Path) -> dict[str, object]: if _unlink(path): corrupted_filenames.append(path.name) continue - reason = stale_reason(record) + reason = _gc_removal_reason(record) if reason is None: continue if _unlink(path): @@ -409,6 +601,18 @@ def gc_workspace(*, root: Path) -> dict[str, object]: } +def _gc_removal_reason(record: WorkspaceIntentRecord) -> str | None: + reason = stale_reason(record) + if reason == "lease_expired" and not _ttl_expired(record): + return None + return reason + + +def _ttl_expired(record: WorkspaceIntentRecord) -> bool: + expires = _parse_utc(record.expires_at_utc) + return expires is None or expires <= utc_now() + + def is_stale(record: WorkspaceIntentRecord) -> bool: return stale_reason(record) is not None @@ -423,6 +627,8 @@ def stale_reason(record: WorkspaceIntentRecord) -> str | None: return "expired" if is_orphaned(record): return "orphaned" + if _is_lease_expired(record): + return "lease_expired" return None @@ -458,6 +664,7 @@ def _updated_record( declared_at_utc=format_utc(declared_at), expires_at_utc=expires_at(declared_at=declared_at, ttl_seconds=ttl_seconds), ttl_seconds=ttl_seconds, + lease_renewed_at_utc=format_utc(declared_at), status=new_status, ) @@ -597,6 +804,15 @@ def _positive_int(value: object) -> int | None: return value +def _valid_lease_seconds(value: object) -> int | None: + parsed = _positive_int(value) + if parsed is None: + return None + if parsed < MIN_LEASE_SECONDS or parsed > MAX_LEASE_SECONDS: + return None + return parsed + + def _is_hex_digest(value: object) -> bool: if not isinstance(value, str) or len(value) != _HEX_DIGEST_LENGTH: return False @@ -674,12 +890,18 @@ def _overlap_type(*, hard: bool, soft: bool) -> str: __all__ = [ + "DEFAULT_LEASE_SECONDS", "DEFAULT_TTL_SECONDS", + "LEGACY_REGISTRY_VERSION", + "MAX_LEASE_SECONDS", "MAX_TTL_SECONDS", + "MIN_LEASE_SECONDS", "MIN_TTL_SECONDS", "REGISTRY_VERSION", + "IntentOwnership", "WorkspaceIntentRecord", "WorkspaceIntentStatus", + "classify_intent_ownership", "compute_intent_digest", "compute_scope_digest", "detect_conflicts", @@ -695,6 +917,8 @@ def _overlap_type(*, hard: bool, soft: bool) -> str: "registry_dir", "remove_workspace_intent", "remove_workspace_record", + "renew_workspace_intent_lease", + "resolved_lease_seconds", "resolved_ttl_seconds", "safe_remove_own_intent", "stale_reason", diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index b438f9eb..32536ffd 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -915,7 +915,8 @@ def list_reviewed_findings(run_id: str | None = None) -> dict[str, object]: "to inspect concurrent workspace intents, 'declare' to declare " "intended scope before editing, 'get' to retrieve active intent, " "'check' to verify actual diff against declared scope, 'clear' to " - "remove intent, 'gc_workspace' to clean stale registry files, and " + "remove intent, 'gc_workspace' to clean stale registry files, " + "'recover' to explicitly reclaim a stale leased intent, and " "'reset_workspace' for interrupted-session recovery. In-memory " "intent state remains session-local; workspace coordination state " "is ephemeral under .cache/codeclone/intents/." diff --git a/docs/book/24-structural-change-controller.md b/docs/book/24-structural-change-controller.md index 59b0eb19..95e14a61 100644 --- a/docs/book/24-structural-change-controller.md +++ b/docs/book/24-structural-change-controller.md @@ -18,6 +18,7 @@ queries: | Patch contract | Live in `2.1.0a1` | MCP `check_patch_contract`, CLI `--patch-verify` | | Review receipt | Live in `2.1.0a1` | MCP `create_review_receipt` | | Workspace intent registry | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Lease and recovery | Live in `2.1.0a1` | MCP `manage_change_intent` | | Claim guard | Live in `2.1.0a1` | MCP `validate_review_claims` | ## Contract @@ -57,6 +58,9 @@ report output flags and baseline update flags. 1. Call `manage_change_intent(action="list_workspace", root="/abs/repo")` to see active intents from other agents before analysis. + If it returns `ownership="recoverable"` for a matching run, use + `manage_change_intent(action="recover")` instead of killing another MCP + process or redeclaring blindly. 2. Run `analyze_repository` or `analyze_changed_paths`. 3. Declare scope with `manage_change_intent(action="declare")`. 4. If `concurrent_intents` is non-empty, narrow scope or coordinate before @@ -108,8 +112,11 @@ coordination: - `list_workspace`: list active workspace intent records from all agents for a repository root. - `gc_workspace`: remove expired, orphaned, or corrupted registry records. -- `reset_workspace`: recover an own, expired, or orphaned intent. Foreign live - intents are rejected and require coordination. +- `recover`: explicitly reclaim a stale leased intent when the caller has the + matching run and report digest in the current MCP session. +- `reset_workspace`: reset an own intent or remove expired/recoverable + registry records. Foreign active intents are rejected and require + coordination. Registry files live under `.cache/codeclone/intents/` and are protected with a SHA-256 integrity digest over canonical JSON. This detects accidental @@ -117,6 +124,13 @@ corruption, not malicious tampering by a user with write access. Conflicts are advisory: hard overlap means two agents claimed the same primary file; soft overlap means primary files overlap related context. +Each registry record has a TTL and a shorter renewable lease. TTL is the hard +maximum lifetime of the record. The lease is the ownership freshness signal: +active MCP interactions renew it, while detached processes stop renewing and +become recoverable after the lease window. A foreign active record has a live +lease and should be coordinated with the user; CodeClone does not ask agents to +kill the owning process. + ## Review Receipt Payload `create_review_receipt` returns `format="markdown"` by default and can return a diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 83a5c360..76fbd7be 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -218,6 +218,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: ) assert "not NLP" in str(tools["validate_review_claims"].description) assert "list_workspace" in str(tools["manage_change_intent"].description) + assert "recover" in str(tools["manage_change_intent"].description) assert ".cache/codeclone/intents/" in str(tools["manage_change_intent"].description) assert "bounded guidance, not a full manual" in str(tools["help"].description) assert "workflow, analysis_profile, suppressions, baseline" in str( diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 0bd657d5..4b4de600 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -13,6 +13,7 @@ from collections import OrderedDict from collections.abc import Mapping from dataclasses import replace +from datetime import timedelta from pathlib import Path from types import SimpleNamespace from typing import Any, cast @@ -2500,11 +2501,12 @@ def test_mcp_service_manage_change_intent_lifecycle(tmp_path: Path) -> None: service.manage_change_intent(action="get", run_id="abcdef12") -def test_mcp_service_workspace_intent_registry_detects_concurrent_agents( +def _paired_blast_services( tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, -) -> None: - monkeypatch.setattr(mcp_workspace_intents_mod, "_is_pid_alive", lambda pid: True) + *, + first_digest: str = "digest-a", + second_digest: str | None = None, +) -> tuple[CodeCloneMCPService, CodeCloneMCPService]: first = CodeCloneMCPService(history_limit=2) second = CodeCloneMCPService(history_limit=2) first._agent_pid, first._agent_start_epoch, first._agent_label = ( @@ -2517,9 +2519,76 @@ def test_mcp_service_workspace_intent_registry_detects_concurrent_agents( 200, "agent-b", ) - record = _blast_radius_run_record(tmp_path) - first._runs.register(record) - second._runs.register(record) + first._runs.register(_blast_radius_run_record(tmp_path, digest=first_digest)) + second._runs.register( + _blast_radius_run_record(tmp_path, digest=second_digest or first_digest) + ) + return first, second + + +def _stale_workspace_intent( + tmp_path: Path, + *, + intent_id: str, +) -> mcp_workspace_intents_mod.WorkspaceIntentRecord: + found = mcp_workspace_intents_mod.find_workspace_intent( + root=tmp_path, + intent_id=intent_id, + ) + assert found is not None + _, workspace_record = found + stale_record = replace( + workspace_record, + lease_renewed_at_utc=mcp_workspace_intents_mod.format_utc( + mcp_workspace_intents_mod.utc_now() - timedelta(minutes=10) + ), + lease_seconds=mcp_workspace_intents_mod.MIN_LEASE_SECONDS, + ) + assert mcp_workspace_intents_mod.write_workspace_intent( + root=tmp_path, + record=stale_record, + ) + return stale_record + + +def _single_service_with_stale_intent( + tmp_path: Path, +) -> tuple[CodeCloneMCPService, str, mcp_workspace_intents_mod.WorkspaceIntentRecord]: + service = CodeCloneMCPService(history_limit=2) + service._agent_pid, service._agent_start_epoch = 11111, 100 + service._runs.register(_blast_radius_run_record(tmp_path)) + declared = service.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent="change pkg.a", + ) + intent_id = str(declared["intent_id"]) + return ( + service, + intent_id, + _stale_workspace_intent( + tmp_path, + intent_id=intent_id, + ), + ) + + +def _lease_expires_at( + record: mcp_workspace_intents_mod.WorkspaceIntentRecord, +) -> str: + renewed_at = mcp_workspace_intents_mod._parse_utc(record.lease_renewed_at_utc) + assert renewed_at is not None + return mcp_workspace_intents_mod.format_utc( + renewed_at + timedelta(seconds=record.lease_seconds) + ) + + +def test_mcp_service_workspace_intent_registry_detects_concurrent_agents( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(mcp_workspace_intents_mod, "_is_pid_alive", lambda pid: True) + first, second = _paired_blast_services(tmp_path) declared_first = first.manage_change_intent( action="declare", @@ -2542,6 +2611,8 @@ def test_mcp_service_workspace_intent_registry_detects_concurrent_agents( assert workspace["total_agents"] == 1 assert workspace_intents[0]["agent_label"] == "agent-a" assert workspace_intents[0]["is_own"] is False + assert workspace_intents[0]["ownership"] == "foreign_active" + assert "Do NOT kill" in str(workspace_intents[0]["escalation_hint"]) hard_conflict = second.manage_change_intent( action="declare", @@ -2570,7 +2641,8 @@ def test_mcp_service_workspace_intent_registry_detects_concurrent_agents( intent_id=first_intent_id, ) assert rejected["action_taken"] == "rejected" - assert rejected["reason"] == "foreign_live_intent" + assert rejected["reason"] == "foreign_active" + assert "Do NOT kill" in str(rejected["escalation_hint"]) cleared = first.manage_change_intent( action="clear", @@ -2579,6 +2651,162 @@ def test_mcp_service_workspace_intent_registry_detects_concurrent_agents( assert cleared["workspace_cleared"] is True +def test_mcp_service_workspace_intent_recovery_after_lease_expiry( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(mcp_workspace_intents_mod, "_is_pid_alive", lambda pid: True) + first, second = _paired_blast_services(tmp_path) + + declared = first.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent="first agent edits pkg.a", + ) + intent_id = str(declared["intent_id"]) + + rejected = second.manage_change_intent( + action="recover", + root=str(tmp_path), + run_id="abcdef12", + intent_id=intent_id, + ) + assert rejected["action_taken"] == "recovery_rejected" + assert rejected["reason"] == "not_recoverable" + assert cast("dict[str, object]", rejected["details"])["ownership"] == ( + "foreign_active" + ) + + found = mcp_workspace_intents_mod.find_workspace_intent( + root=tmp_path, + intent_id=intent_id, + ) + assert found is not None + _, workspace_record = found + stale_record = replace( + workspace_record, + lease_renewed_at_utc=mcp_workspace_intents_mod.format_utc( + mcp_workspace_intents_mod.utc_now() - timedelta(minutes=10) + ), + lease_seconds=mcp_workspace_intents_mod.MIN_LEASE_SECONDS, + ) + assert mcp_workspace_intents_mod.write_workspace_intent( + root=tmp_path, + record=stale_record, + ) + + workspace = second.manage_change_intent( + action="list_workspace", + root=str(tmp_path), + ) + workspace_intents = cast( + "list[dict[str, object]]", + workspace["workspace_intents"], + ) + assert workspace_intents[0]["ownership"] == "recoverable" + assert cast("list[dict[str, object]]", workspace["recovery_available"]) == [ + { + "intent_id": intent_id, + "run_id": "abcdef12", + "scope_digest": stale_record.scope_digest, + "previous_agent_label": "agent-a", + "lease_expired_at_utc": _lease_expires_at(stale_record), + "hint": "Use action='recover' with matching run_id to reclaim.", + } + ] + + recovered = second.manage_change_intent( + action="recover", + root=str(tmp_path), + run_id="abcdef12", + intent_id=intent_id, + ) + assert recovered["action_taken"] == "recovered" + assert recovered["previous_owner"] == { + "agent_pid": 11111, + "agent_start_epoch": 100, + "agent_label": "agent-a", + "lease_renewed_at_utc": stale_record.lease_renewed_at_utc, + } + assert recovered["new_owner"] == { + "agent_pid": 22222, + "agent_start_epoch": 200, + "agent_label": "agent-b", + } + assert ( + second.manage_change_intent(action="get", intent_id=intent_id)["status"] + == "active" + ) + + latest = mcp_workspace_intents_mod.find_workspace_intent( + root=tmp_path, + intent_id=intent_id, + ) + assert latest is not None + _, latest_record = latest + assert latest_record.agent_pid == 22222 + assert latest_record.agent_start_epoch == 200 + assert latest_record.status == "active" + + +def test_mcp_service_workspace_intent_recovery_rejects_digest_mismatch( + tmp_path: Path, +) -> None: + first, second = _paired_blast_services( + tmp_path, + first_digest="digest-a", + second_digest="digest-b", + ) + + declared = first.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent="first agent edits pkg.a", + ) + intent_id = str(declared["intent_id"]) + _stale_workspace_intent(tmp_path, intent_id=intent_id) + + rejected = second.manage_change_intent( + action="recover", + root=str(tmp_path), + run_id="abcdef12", + intent_id=intent_id, + ) + + assert rejected["action_taken"] == "recovery_rejected" + assert rejected["reason"] == "report_digest_mismatch" + + +def test_mcp_service_workspace_intent_get_renews_lease(tmp_path: Path) -> None: + service, intent_id, stale_record = _single_service_with_stale_intent(tmp_path) + + service.manage_change_intent(action="get", intent_id=intent_id) + + latest = mcp_workspace_intents_mod.find_workspace_intent( + root=tmp_path, + intent_id=intent_id, + ) + assert latest is not None + _, latest_record = latest + assert latest_record.lease_renewed_at_utc != stale_record.lease_renewed_at_utc + + +def test_mcp_service_patch_contract_renews_workspace_intent_lease( + tmp_path: Path, +) -> None: + service, intent_id, stale_record = _single_service_with_stale_intent(tmp_path) + + service.check_patch_contract(mode="budget", intent_id=intent_id) + + latest = mcp_workspace_intents_mod.find_workspace_intent( + root=tmp_path, + intent_id=intent_id, + ) + assert latest is not None + _, latest_record = latest + assert latest_record.lease_renewed_at_utc != stale_record.lease_renewed_at_utc + + def test_mcp_service_manage_change_intent_validation_expiry_and_prune( tmp_path: Path, ) -> None: diff --git a/tests/test_mcp_shutdown.py b/tests/test_mcp_shutdown.py index 17104ae6..d9ccb193 100644 --- a/tests/test_mcp_shutdown.py +++ b/tests/test_mcp_shutdown.py @@ -56,6 +56,9 @@ def _record( scope=scope_payload, scope_digest=workspace_intents.compute_scope_digest(scope_payload), blast_radius_summary={"radius_level": "low"}, + lease_renewed_at_utc=workspace_intents.format_utc(declared_at), + lease_seconds=workspace_intents.DEFAULT_LEASE_SECONDS, + report_digest="digest-a", ) diff --git a/tests/test_workspace_intents.py b/tests/test_workspace_intents.py index 942a19e5..313c82db 100644 --- a/tests/test_workspace_intents.py +++ b/tests/test_workspace_intents.py @@ -19,6 +19,9 @@ def _record( status: str = "active", scope: dict[str, object] | None = None, expires_delta: timedelta = timedelta(hours=1), + lease_renewed_delta: timedelta = timedelta(), + lease_seconds: int = workspace_intents.DEFAULT_LEASE_SECONDS, + report_digest: str = "digest-a", ) -> WorkspaceIntentRecord: declared_at = workspace_intents.utc_now() scope_payload = scope or { @@ -40,6 +43,11 @@ def _record( scope=scope_payload, scope_digest=workspace_intents.compute_scope_digest(scope_payload), blast_radius_summary={"radius_level": "medium"}, + lease_renewed_at_utc=workspace_intents.format_utc( + declared_at + lease_renewed_delta + ), + lease_seconds=lease_seconds, + report_digest=report_digest, ) @@ -158,6 +166,161 @@ def test_workspace_intent_stale_orphan_and_gc( assert workspace_intents.list_workspace_intents(root=tmp_path) == (active,) +def test_workspace_intent_lease_expiry_is_recoverable_not_gc( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + record = _record( + intent_id="intent-lease-expired-001", + lease_renewed_delta=timedelta(minutes=-10), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + monkeypatch.setattr(workspace_intents, "_is_pid_alive", lambda pid: True) + + assert workspace_intents.stale_reason(record) == "lease_expired" + assert workspace_intents.list_workspace_intents(root=tmp_path) == () + assert workspace_intents.list_workspace_intents( + root=tmp_path, + exclude_stale=False, + ) == (record,) + + gc_payload = workspace_intents.gc_workspace(root=tmp_path) + assert gc_payload["removed"] == 0 + assert workspace_intents.list_workspace_intents( + root=tmp_path, + exclude_stale=False, + ) == (record,) + + +def test_workspace_intent_ownership_classification( + monkeypatch: pytest.MonkeyPatch, +) -> None: + now = workspace_intents.utc_now() + own = _record(pid=111, start_epoch=100) + own_stale = _record( + pid=111, + start_epoch=100, + lease_renewed_delta=timedelta(minutes=-10), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) + foreign = _record(pid=222, start_epoch=200) + expired = _record(expires_delta=timedelta(seconds=-1)) + + monkeypatch.setattr(workspace_intents, "_is_pid_alive", lambda pid: pid != 333) + + assert ( + workspace_intents.classify_intent_ownership( + own, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.OWN_ACTIVE + ) + assert ( + workspace_intents.classify_intent_ownership( + own_stale, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.OWN_STALE + ) + assert ( + workspace_intents.classify_intent_ownership( + foreign, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.FOREIGN_ACTIVE + ) + dead_pid = _record(pid=333, start_epoch=300) + assert ( + workspace_intents.classify_intent_ownership( + dead_pid, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.RECOVERABLE + ) + assert ( + workspace_intents.classify_intent_ownership( + expired, + own_pid=expired.agent_pid, + own_start_epoch=expired.agent_start_epoch, + now=now, + ) + == workspace_intents.IntentOwnership.EXPIRED + ) + + +def test_workspace_intent_renew_lease_updates_timestamp(tmp_path: Path) -> None: + record = _record( + lease_renewed_delta=timedelta(minutes=-2), + lease_seconds=workspace_intents.DEFAULT_LEASE_SECONDS, + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + assert workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + updated = workspace_intents.list_workspace_intents(root=tmp_path)[0] + assert updated.lease_renewed_at_utc != record.lease_renewed_at_utc + assert workspace_intents.verify_intent_integrity(updated.signed_payload()) + + +def test_workspace_intent_renew_lease_rejects_foreign_owner(tmp_path: Path) -> None: + record = _record() + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + assert ( + workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=record.agent_pid + 1, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + assert workspace_intents.list_workspace_intents(root=tmp_path)[0] == record + + +def test_workspace_intent_v1_record_defaults_lease_fields() -> None: + record = _record() + payload = { + "registry_version": workspace_intents.LEGACY_REGISTRY_VERSION, + "intent_id": record.intent_id, + "agent_pid": record.agent_pid, + "agent_start_epoch": record.agent_start_epoch, + "agent_label": record.agent_label, + "run_id": record.run_id, + "declared_at_utc": record.declared_at_utc, + "expires_at_utc": record.expires_at_utc, + "ttl_seconds": record.ttl_seconds, + "status": record.status, + "intent": record.intent, + "scope": record.scope, + "scope_digest": record.scope_digest, + "blast_radius_summary": record.blast_radius_summary, + } + payload["integrity"] = { + "payload_sha256": workspace_intents.compute_intent_digest(payload) + } + + validated = workspace_intents.validate_workspace_record(payload) + + assert validated is not None + assert validated.lease_renewed_at_utc == record.declared_at_utc + assert validated.lease_seconds == workspace_intents.DEFAULT_LEASE_SECONDS + assert validated.report_digest == "" + + def test_workspace_intent_conflict_detection() -> None: existing = _record() @@ -169,6 +332,7 @@ def test_workspace_intent_conflict_detection() -> None: }, existing=(existing,), own_pid=123456, + own_start_epoch=999, ) assert hard[0]["overlap_type"] == "hard" assert hard[0]["hard_overlap"] == ["pkg/a.py"] @@ -181,6 +345,7 @@ def test_workspace_intent_conflict_detection() -> None: }, existing=(existing,), own_pid=123456, + own_start_epoch=999, ) assert soft[0]["overlap_type"] == "soft" assert soft[0]["soft_overlap"] == ["tests/test_a.py"] @@ -194,6 +359,7 @@ def test_workspace_intent_conflict_detection() -> None: }, existing=(existing,), own_pid=existing.agent_pid, + own_start_epoch=existing.agent_start_epoch, ) == [] ) From 2577681bc54c0a855d50296b2ff927280d18c05a Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 24 May 2026 22:41:06 +0500 Subject: [PATCH 016/318] docs: restructure MCP docs and expand CLAUDE.md directives Rewrite MCP documentation (docs/mcp.md, book/20, book/28) with mermaid diagrams, phase flow charts, and structured tables. Enable mermaid in mkdocs.yml. Expand CLAUDE.md with spec discipline, validation workflow, hard boundaries, and commit style sections. Exclude mkdocs.yml from check-yaml (!!python/name tag). --- .pre-commit-config.yaml | 1 + CLAUDE.md | 145 +++++-- docs/book/20-mcp-interface.md | 309 ++++++++------- docs/book/28-claim-guard.md | 146 ++++++-- docs/mcp.md | 687 ++++++++++++++++++++-------------- mkdocs.yml | 6 +- 6 files changed, 808 insertions(+), 486 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cffc75e5..d0d20fc7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,6 +10,7 @@ repos: - id: check-added-large-files - id: check-toml - id: check-yaml + exclude: ^mkdocs\.yml$ - repo: local hooks: diff --git a/CLAUDE.md b/CLAUDE.md index 4de09c6e..6f71594c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,40 +1,131 @@ -# CodeClone Project Rules +# CodeClone — Claude Code Directives + +## Identity + +CodeClone: deterministic structural controller for Python. +Full architecture, contracts, and agent playbook → `AGENTS.md`. +Code is the source of truth. If docs and code diverge, follow code. + +## Default role + +**Specs and validation only.** Do not edit production code unless the +user explicitly permits it for a specific task. "Реализуй" / "Implement" +is explicit permission. "Проверь" / "Validate" is not. + +When permitted to edit code, follow the change control workflow below. ## Change control workflow -This repository uses CodeClone MCP for structural change control. -Before editing any files, follow this workflow: - -1. Check workspace: `manage_change_intent(action="list_workspace", - root="")` - — if other agents have active intents, review their scope -2. Run analysis: `analyze_repository(root="")` -3. Declare intent: `manage_change_intent(action="declare", scope={...})` - — if `concurrent_intents` is non-empty, narrow scope or ask the user -4. Check blast radius: `get_blast_radius(files=[...])` -5. Check budget: `check_patch_contract(mode="budget")` -6. Edit files within declared scope only -7. Re-run analysis: `analyze_repository(root="")` -8. Verify: `manage_change_intent(action="check", ...)` then +Before editing any repository files: + +1. `manage_change_intent(action="list_workspace", root="")` + — if `foreign_active` intents overlap, **stop and ask the user** +2. `analyze_repository(root="")` +3. `manage_change_intent(action="declare", scope={...})` + — if `concurrent_intents` non-empty, narrow scope or ask +4. `get_blast_radius(files=[...])` +5. `check_patch_contract(mode="budget")` +6. Edit within declared scope only +7. `analyze_repository(root="")` — re-run after edits +8. `manage_change_intent(action="check", ...)` then `check_patch_contract(mode="verify")` -9. Clear intent: `manage_change_intent(action="clear")` +9. `manage_change_intent(action="clear")` ### Rules - Never edit files without declaring intent first. - Never silently expand scope — redeclare with expanded scope. -- Treat `do_not_touch` as a hard boundary. -- Treat `review_context` as context, not an edit ban. -- Do not update baselines, cache, or generated reports as part of a - functional change. -- If `list_workspace` shows another agent working on overlapping files, - stop and coordinate with the user before proceeding. -- CodeClone is the source of truth — do not reinterpret findings. +- `do_not_touch` is a hard boundary. `review_context` is context, not a ban. +- Do not update baselines, cache, or generated reports. +- If `list_workspace` shows overlapping foreign intent, stop and coordinate. +- CodeClone findings are the source of truth — do not reinterpret. +- Live foreign intent means **stop**, not kill. Never suggest killing + a process without explicit user confirmation that the PID is abandoned. ### When to skip -Skip this workflow only when: +- Read-only tasks (analysis, validation, research) +- CodeClone MCP not available +- User explicitly says analysis-only + +## Spec writing discipline + +Specs are disposable implementation briefs, not documentation. +They are deleted after implementation and validation. + +### Invariants + +- **One model per decision.** If the spec describes alternative + approaches, choose one and close the others. Never leave two + incompatible paths in the same section. +- **Verify against code.** Every function signature, data model, and + behavior claim in the spec must be verified against current code + before writing. Read the source, do not assume. +- **No aspirational APIs.** If a function doesn't exist yet, say so. + Do not describe it as if it does. +- **Decision table for state machines.** If the spec introduces states + or classifications, provide an exhaustive decision table. Every + input combination must map to exactly one output. +- **Dependency direction explicit.** List what each new file imports + and what imports it. Verify against the architecture rules in + `AGENTS.md` §14. + +### Self-check before delivery + +Before presenting a spec, verify: + +1. Are there two conflicting approaches in the same spec? → pick one. +2. Does every code snippet match the actual codebase API? → read source. +3. Is every state transition deterministic? → write the decision table. +4. Can the implementer follow this without interpreting ambiguity? → if + unclear, it's wrong. + +## Validation discipline + +When validating an implementation against a spec: + +1. Read all implementation files (not just grep). +2. Cross-reference every spec requirement against code. +3. Run the relevant tests: `uv run pytest -q `. +4. Run `uv run pre-commit run --all-files` if the user asks to commit. +5. Check MCP tool visibility if a new tool was added. +6. Report: conformant / improved / divergent / missing — with evidence. + +## Verification commands + +```bash +# Always +uv run pre-commit run --all-files + +# MCP changes +uv run pytest -q tests/test_mcp_service.py tests/test_mcp_server.py + +# Full suite +uv run pytest -q +``` + +See `AGENTS.md` §3 for surface-specific commands. + +## Hard boundaries + +- Never update golden snapshots to "fix" tests. +- Never change fingerprint semantics without `FINGERPRINT_VERSION` review. +- Never make base `codeclone` depend on MCP runtime packages. +- Never let MCP mutate baselines, source files, reports, or cache. +- Never iterate sets/dicts without sorting when output order matters. +- Never introduce `Any` in core/domain code without narrowing it immediately. +- Never create `*.md` specs inside `docs/` — use `specs/` directory. +- Version constants live in `codeclone/contracts/__init__.py` — always + read from there, never copy from another doc. + +## Commit style + +``` +feat(scope): short imperative description + +Optional body with context. +``` -- No repository files will be changed (read-only tasks, specs only) -- CodeClone MCP is not available -- The user explicitly asks for analysis only +Scopes: `mcp`, `cli`, `core`, `baseline`, `cache`, `report`, `html`, +`metrics`, `docs`, `vscode`, `codex`, `claude-desktop`. +Prefixes: `feat`, `fix`, `refactor`, `test`, `docs`, `chore`. diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index 662e546f..4ac29569 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -2,188 +2,231 @@ ## Purpose -Define the current public MCP surface in the CodeClone `2.1` release line. +Define the public MCP surface in the CodeClone `2.1` release line. The MCP layer is optional, read-only, and built on the same canonical pipeline/report contracts as the CLI. It does not create a second analysis engine or a second persistence model. -!!! note "Read-only integration contract" - MCP surfaces the same canonical report and run state as the CLI and HTML - report. It must not mutate source, baseline, cache, or report artifacts. +!!! note "Integration surface, not a second analyzer" +MCP composes over the canonical report and run state shared by CLI, HTML, +and SARIF. It never mutates source files, baselines, analysis cache, or +report artifacts. + +--- ## Public surface -- package extra: `codeclone[mcp]` -- launcher: `codeclone-mcp` -- server wiring: `codeclone/surfaces/mcp/server.py` -- in-process service/session: `codeclone/surfaces/mcp/service.py`, - `codeclone/surfaces/mcp/session.py` +| Artifact | Path | +|-------------------|--------------------------------------------------------------------------| +| Package extra | `codeclone[mcp]` | +| Launcher | `codeclone-mcp` | +| Server wiring | `codeclone/surfaces/mcp/server.py` | +| Service / session | `codeclone/surfaces/mcp/service.py`, `codeclone/surfaces/mcp/session.py` | + +--- ## Shape +```mermaid +graph LR + subgraph Server["codeclone-mcp"] + T["Transport
stdio · streamable-http"] + SVC["Service
tool routing, shutdown"] + SESS["Session
runs, intents, markers"] + end + + T --> SVC --> SESS + SESS -->|"reads"| RP["Canonical Report"] + SESS -->|"writes"| WIR[".cache/codeclone/intents/"] + + style Server stroke:#6366f1,stroke-width:2px + style WIR fill:#fef9c3 +``` + Current server characteristics: -- optional dependency; base `codeclone` install does not require MCP runtime -- transports: - - `stdio` - - `streamable-http` -- run storage: - - in-memory only - - bounded by `--history-limit` - - latest-run pointer is process-local -- roots: - - analysis tools require an absolute repository root - - relative roots such as `.` are rejected -- analysis modes: - - `full` - - `clones_only` -- cache policies: - - `reuse` - - `off` - - `refresh` is rejected by the read-only MCP service contract; use `reuse` - or `off` +- **Optional dependency** — base `codeclone` install does not require MCP + runtime packages. +- **Transports** — `stdio` (default), `streamable-http`. +- **Run storage** — in-memory only, bounded by `--history-limit` (default 4, + max 10). Latest-run pointer is process-local. +- **Roots** — analysis tools require an absolute repository root. Relative + roots such as `.` are rejected. +- **Analysis modes** — `full`, `clones_only`. +- **Cache policies** — `reuse`, `off`. `refresh` is rejected by the read-only + MCP service contract. !!! warning "Absolute roots and remote exposure" - Analysis tools require an absolute repository root, and HTTP exposure - beyond loopback is intentionally explicit. Keep `stdio` as the default for - local IDE and agent clients. +Analysis tools require an absolute repository root. HTTP exposure beyond +loopback requires explicit `--allow-remote` and has no built-in +authentication. -## Tools +--- -Current tool set: `26` tools. - -The MCP surface is intentionally triage-first: analyze first, summarize/triage -second, then drill into one finding or one hotspot family. +## Tools -`get_blast_radius` keeps hard guardrails separate from review context. -`do_not_touch` is limited to actionable negative context such as baselines, -generated CodeClone state, and explicit forbidden paths. Report-only signals -such as security boundary inventory and overloaded-module candidates are -returned as `review_context`, not as edit prohibitions. Long context sections -include `total`, `shown`, and `truncated` summaries. +Current tool set: **26 tools** organized by workflow phase. -`manage_change_intent` is session-local for intent truth, but v2.1 also writes -best-effort workspace coordination records under `.cache/codeclone/intents/`. -Those records are advisory multi-agent visibility only; MCP still never updates -source files, baselines, reports, or analysis cache data. +```mermaid +graph LR + A["1. Analyze"] --> T["2. Triage"] + T --> D["3. Drill down"] + T --> F["4. Focused checks"] + D --> CC["5. Change control"] + F --> CC + CC --> S["6. Session"] -`create_review_receipt` is a read-only audit artifact. It composes stored -report provenance, optional intent/blast-radius state, reviewed findings, -structural delta, patch-contract status, human decision points, and -claims-not-made into markdown or JSON. It does not enter report integrity and -does not persist outside the MCP session. + style A fill:#dbeafe + style T fill:#dbeafe + style CC fill:#f0fdf4 +``` -`validate_review_claims` is a read-only claim guard over stored run semantics. -It validates cited review text using deterministic string matching around -finding ids and metric family names. It is not an NLP fact checker and it is not -a CI gate. +The surface is intentionally triage-first: analyze → summarize/triage → +drill into one finding or one hotspot family. ### Analysis and run-level tools -| Tool | Key parameters | Purpose | -|-------------------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------| -| `analyze_repository` | `root`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `baseline_path`, `metrics_baseline_path`, `cache_policy` | Full deterministic analysis of one repo root; registers the latest in-memory run. | -| `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `cache_policy` | Diff-aware analysis with changed-files projection over the same canonical run/report contract. | -| `get_run_summary` | `run_id` | Cheapest run-level snapshot. Start here after analysis when you need health, findings, baseline/cache status, and inventory in compact form. | -| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first first-pass view over one stored run. | -| `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Derived pre-change risk boundary: direct dependents, clone cohorts, coverage gaps, risk signals, actionable do-not-touch paths, and review-only context. | -| `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Pre-edit regression budget or post-edit verification over stored runs, gate evaluation, change intent scope, and baseline-abuse signals. | -| `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Deterministic audit artifact over stored run/session state; returns markdown or JSON without mutating artifacts. | -| `validate_review_claims` | `text`, `run_id`, `require_citations` | Citation-based validator for review text; flags deterministic mischaracterizations of report-only signals, known debt, reachability, or unverified fixes. | -| `help` | `topic`, `detail` | Bounded workflow/contract guidance for supported MCP topics. | -| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta view over findings and health; returns `incomparable` when roots/settings differ. | -| `evaluate_gates` | `run_id`, gate flags, threshold overrides, `coverage_min` | Evaluate CI/gating decisions against a stored run without mutating process or repo state. | +| Tool | Key parameters | Purpose | +|-------------------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------| +| `analyze_repository` | `root`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `baseline_path`, `metrics_baseline_path`, `cache_policy` | Full deterministic analysis; registers an in-memory run | +| `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `cache_policy` | Diff-aware analysis with changed-files projection | +| `get_run_summary` | `run_id` | Cheapest run-level snapshot: health, findings, baseline/cache status | +| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first first-pass view | +| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta; returns `incomparable` when roots/settings differ | +| `evaluate_gates` | `run_id`, gate flags, threshold overrides, `coverage_min` | Preview CI gating decisions without mutating state | +| `help` | `topic`, `detail` | Bounded workflow/contract guidance | ### Report and finding projection tools -| Tool | Key parameters | Purpose | -|-----------------------|------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------| -| `get_report_section` | `run_id`, `section`, `family`, `path`, `offset`, `limit` | Read canonical report sections; `metrics_detail` is the bounded/paginated drill-down path. | -| `list_findings` | `run_id`, `family`, `category`, `severity`, `source_kind`, `novelty`, `sort_by`, `detail_level`, changed-scope filters, pagination | Deterministic filtered finding list over canonical stored findings. | -| `get_finding` | `finding_id`, `run_id`, `detail_level` | Return one canonical finding group by short or full id. | -| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Return the remediation/explainability packet for one finding. | -| `list_hotspots` | `kind`, `run_id`, `detail_level`, changed-scope filters, pagination | Return one derived hotspot list such as `most_actionable` or `production_hotspots`. | -| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | PR-oriented summary for changed scope; `markdown` is the default human/LLM-facing format. | +| Tool | Key parameters | Purpose | +|-----------------------|------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------| +| `get_report_section` | `run_id`, `section`, `family`, `path`, `offset`, `limit` | Read report sections; `metrics_detail` is paginated | +| `list_findings` | `run_id`, `family`, `category`, `severity`, `source_kind`, `novelty`, `sort_by`, `detail_level`, changed-scope filters, pagination | Filtered, paginated finding list | +| `get_finding` | `finding_id`, `run_id`, `detail_level` | One canonical finding by short or full ID | +| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Remediation/explainability for one finding | +| `list_hotspots` | `kind`, `run_id`, `detail_level`, changed-scope filters, pagination | Priority-ranked hotspot views by kind | +| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | PR-oriented markdown or JSON summary | ### Focused check tools -| Tool | Key parameters | Purpose | -|--------------------|-------------------------------------------------------------------------------------------------|-------------------------------------------------------| -| `check_clones` | `run_id` or absolute `root`, `path`, `clone_type`, `source_kind`, `max_results`, `detail_level` | Narrow clone-only query over a compatible stored run. | -| `check_complexity` | `run_id` or absolute `root`, `path`, `min_complexity`, `max_results`, `detail_level` | Narrow complexity-hotspot query. | -| `check_coupling` | `run_id` or absolute `root`, `path`, `max_results`, `detail_level` | Narrow coupling-hotspot query. | -| `check_cohesion` | `run_id` or absolute `root`, `path`, `max_results`, `detail_level` | Narrow cohesion-hotspot query. | -| `check_dead_code` | `run_id` or absolute `root`, `path`, `min_severity`, `max_results`, `detail_level` | Narrow dead-code query. | +| Tool | Key parameters | Purpose | +|--------------------|----------------------------------------------------------------------------------------|--------------------------| +| `check_clones` | `run_id` or `root`, `path`, `clone_type`, `source_kind`, `max_results`, `detail_level` | Narrow clone-only query | +| `check_complexity` | `run_id` or `root`, `path`, `min_complexity`, `max_results`, `detail_level` | Complexity hotspot query | +| `check_coupling` | `run_id` or `root`, `path`, `max_results`, `detail_level` | Coupling hotspot query | +| `check_cohesion` | `run_id` or `root`, `path`, `max_results`, `detail_level` | Cohesion hotspot query | +| `check_dead_code` | `run_id` or `root`, `path`, `min_severity`, `max_results`, `detail_level` | Dead code query | + +### Change control tools + +| Tool | Key parameters | Purpose | +|--------------------------|-------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------| +| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `ttl_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, list_workspace, gc_workspace, reset_workspace | +| `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: dependents, clone cohorts, do-not-touch, review context | +| `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Budget query or post-edit verification | +| `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Deterministic audit artifact: provenance, scope, reviewed findings, patch status | +| `validate_review_claims` | `text`, `run_id`, `require_citations` | Citation-based overclaim detection against stored run semantics | + +??? info "Blast radius: do_not_touch vs review_context" +`do_not_touch` is limited to actionable negative context: baselines, +generated CodeClone state, explicit forbidden paths. Report-only signals +such as security boundary inventory and overloaded-module candidates are +returned as `review_context` — information, not edit prohibitions. Long +context sections include `total`, `shown`, and `truncated` summaries. + +??? info "Patch contract modes" +**Budget** reads one stored run and optional intent. Shows regression +headroom per quality dimension before editing. **Verify** compares +explicit before/after stored runs, previews gates, validates scope, and +reports baseline-abuse signals. Missing runs return +`status="unverified"`. ### Session-local tools -| Tool | Key parameters | Purpose | -|--------------------------|-------------------------------------------------------------------------|-------------------------------------------------------------------------------------| -| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the current in-memory MCP session. | -| `list_reviewed_findings` | `run_id` | Return reviewed markers currently held in process memory. | -| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `ttl_seconds`, `changed_files` or `diff_ref` | Declare/check/clear session-local intent and list/gc/reset workspace coordination records. | -| `clear_session_runs` | none | Clear in-memory run history and session-local review state for this server process. | +| Tool | Key parameters | Purpose | +|--------------------------|--------------------------------|-----------------------------------------| +| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Session-local review marker (in-memory) | +| `list_reviewed_findings` | `run_id` | List reviewed markers for a run | +| `clear_session_runs` | — | Reset in-memory runs and session state | + +--- ## Resources -Resources are deterministic read-only projections over stored runs. - -| URI | Purpose | -|---------------------------------------------------|-------------------------------------------------------------| -| `codeclone://latest/summary` | Compact summary for the latest stored run. | -| `codeclone://latest/report.json` | Canonical JSON report for the latest stored run. | -| `codeclone://latest/health` | Health/metrics snapshot for the latest stored run. | -| `codeclone://latest/gates` | Last gate-evaluation result produced in this MCP session. | -| `codeclone://latest/changed` | Changed-files projection for the latest diff-aware run. | -| `codeclone://latest/triage` | Production-first triage payload for the latest stored run. | -| `codeclone://schema` | Canonical report schema-style descriptor. | -| `codeclone://runs/{run_id}/summary` | Compact summary for one specific stored run. | -| `codeclone://runs/{run_id}/report.json` | Canonical JSON report for one specific stored run. | -| `codeclone://runs/{run_id}/findings/{finding_id}` | Canonical JSON finding payload for one specific stored run. | +Resources are deterministic read-only projections over stored runs. They do +not trigger analysis. + +### Fixed resources (7) + +| URI | Content | +|----------------------------------|-------------------------------------------------| +| `codeclone://latest/summary` | Compact summary for the latest stored run | +| `codeclone://latest/report.json` | Canonical JSON report for the latest stored run | +| `codeclone://latest/health` | Health/metrics snapshot | +| `codeclone://latest/gates` | Last gate-evaluation result | +| `codeclone://latest/changed` | Changed-files projection | +| `codeclone://latest/triage` | Production-first triage payload | +| `codeclone://schema` | Canonical report shape descriptor | + +### Run-scoped templates (3) + +| URI template | Content | +|---------------------------------------------------|---------------------------------| +| `codeclone://runs/{run_id}/summary` | Summary for a specific run | +| `codeclone://runs/{run_id}/report.json` | Report for a specific run | +| `codeclone://runs/{run_id}/findings/{finding_id}` | One finding from a specific run | + +`codeclone://latest/*` always resolves to the most recent run. + +--- ## Contract rules -- MCP is read-only with respect to source files, baselines, analysis cache - artifacts such as `cache.json`, and report artifacts. +- MCP is **read-only** with respect to source files, baselines, analysis + cache (`cache.json`), and report artifacts. - MCP reuses the same canonical report document as CLI/JSON/HTML/SARIF. -- Finding ids, ordering, and summary data are deterministic projections over +- Finding IDs, ordering, and summary data are deterministic projections over the stored run. - `analyze_changed_paths` requires either explicit `changed_paths` or `git_diff_ref`. -- `analyze_repository` and `analyze_changed_paths` require an absolute `root`. -- `check_*` tools may resolve against an existing stored run, but if `root` is - provided it must also be absolute. +- Analysis tools require an absolute `root`. +- `check_*` tools may resolve against a stored run; if `root` is provided it + must be absolute. - `git_diff_ref` is validated before any subprocess call. - Review markers are session-local in-memory state only. -- Change intent and blast-radius cache state are session-local in-memory state - only; they do not enter canonical report integrity, baseline, or cache - artifacts. +- Change intent, blast-radius cache, and workspace registry state do not + enter canonical report integrity, baseline, or cache artifacts. - Run history is process-local and does not survive restart. - Missing optional MCP dependency is surfaced explicitly by the launcher. - `metrics_detail(family="security_surfaces")` exposes a compact, report-only - inventory of exact security-relevant capability surfaces. It does not claim + inventory of security-relevant capability surfaces. It does not claim vulnerabilities or exploitability. -- `validate_review_claims` detects overclaims such as Security Surfaces called - vulnerabilities, report-only families called CI failures, known baseline debt - called new regressions, dead-code certainty where runtime reachability - evidence exists, and fixes claimed before a post-patch run exists. +- `validate_review_claims` detects deterministic overclaims. See + [28-claim-guard.md](28-claim-guard.md) for the full pattern catalog. + +--- ## Security model -- default transport is local `stdio` -- non-local HTTP exposure requires explicit `--allow-remote` -- server runtime is loaded lazily so base installs and normal CI do not require - MCP packages -- MCP must not mutate repo state or synthesize findings outside canonical - report facts +| Property | Guarantee | +|-------------------|---------------------------------------------------------------------------------| +| Default transport | Local `stdio` | +| Remote exposure | Explicit `--allow-remote` required for non-loopback | +| Lazy loading | Base installs and CI do not require MCP packages | +| Read-only | Never mutates repo state or synthesizes findings outside canonical report facts | + +--- ## Determinism -- run identity is derived from canonical report integrity -- summary, hotspots, findings, and remediation payloads are deterministic - projections over stored run state -- MCP must not create MCP-only analysis semantics or MCP-only gate semantics +- Run identity is derived from canonical report integrity digest. +- Summary, hotspots, findings, and remediation payloads are deterministic + projections over stored run state. +- MCP must not create MCP-only analysis semantics or MCP-only gate + semantics. + +--- ## Locked by tests @@ -191,10 +234,12 @@ Resources are deterministic read-only projections over stored runs. - `tests/test_mcp_server.py` - `tests/test_mcp_tool_schema_snapshot.py` +--- + ## See also -- [09-cli.md](09-cli.md) -- [08-report.md](08-report.md) -- [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) -- [28-claim-guard.md](28-claim-guard.md) -- [../mcp.md](../mcp.md) +- [28-claim-guard.md](28-claim-guard.md) — citation-based review validation +- [24-structural-change-controller.md](24-structural-change-controller.md) — change control workflow +- [09-cli.md](09-cli.md) — CLI reference +- [08-report.md](08-report.md) — canonical report schema +- [MCP deep dive](../mcp.md) — architecture, client setup, workflows, and prompt patterns diff --git a/docs/book/28-claim-guard.md b/docs/book/28-claim-guard.md index 4012073f..f7cd5b05 100644 --- a/docs/book/28-claim-guard.md +++ b/docs/book/28-claim-guard.md @@ -6,72 +6,136 @@ Define the `validate_review_claims` MCP tool in the CodeClone `2.1` release line. Claim guard keeps review text disciplined. It validates cited claims against -semantic flags already present in stored MCP runs. It does not perform free-form -NLP, source analysis, or fact checking. +semantic flags already present in stored MCP runs. It does not perform +free-form NLP, source analysis, or fact checking. + +--- ## Public surface -- MCP tool: `validate_review_claims` -- service method: `CodeCloneMCPService.validate_review_claims` -- session mixin: `codeclone/surfaces/mcp/_session_claim_guard_mixin.py` -- pure validator: `codeclone/surfaces/mcp/_claim_guard.py` +| Artifact | Path | +|----------------|--------------------------------------------------------| +| MCP tool | `validate_review_claims` | +| Service method | `CodeCloneMCPService.validate_review_claims` | +| Session mixin | `codeclone/surfaces/mcp/_session_claim_guard_mixin.py` | +| Pure validator | `codeclone/surfaces/mcp/_claim_guard.py` | + +--- + +## Validation pipeline + +```mermaid +graph LR + T["Review text"] --> E["Extract citations
finding IDs, metric families"] + E --> W["Text window
±80 chars around citation"] + W --> P["Pattern checks
P-1 … P-5"] + P --> V{"Violations?"} + V -->|"yes"| INV["valid: false"] + V -->|"no"| OK["valid: true"] + + style INV fill:#fee2e2 + style OK fill:#f0fdf4 +``` + +The pipeline is fully deterministic: + +1. Resolve the stored run. +2. Index canonical and short finding IDs from the canonical report. +3. Read metric-family gate semantics from the metric registry. +4. Extract citations from the supplied text. +5. Check keyword patterns inside a bounded text window around each citation. + +--- ## Parameters -| Parameter | Type | Default | Meaning | -|-----------|------|---------|---------| -| `text` | `str` | required | Markdown, plain text, or JSON string to validate. | -| `run_id` | `str \| None` | latest | Stored MCP run whose report semantics are used. | -| `require_citations` | `bool` | `true` | Warn when no known finding ids or metric family names are cited. | +| Parameter | Type | Default | Meaning | +|---------------------|---------------|----------|-----------------------------------------------------------------| +| `text` | `str` | required | Markdown, plain text, or JSON string to validate | +| `run_id` | `str \| None` | latest | Stored MCP run whose report semantics are used | +| `require_citations` | `bool` | `true` | Warn when no known finding IDs or metric family names are cited | +!!! info "Text limits" Text must be non-empty and at most `50,000` characters. -## Contract +--- -The tool is read-only. It does not mutate source files, baselines, reports, -analysis cache, review markers, or change intents. +## Contract -Validation is deterministic: +The tool is **read-only**. It does not mutate source files, baselines, +reports, analysis cache, review markers, or change intents. -1. Resolve the stored run. -2. Index canonical and short finding ids from the canonical report. -3. Read metric-family gate semantics from CodeClone's metric registry. -4. Extract citations from the supplied text. -5. Check conservative keyword patterns inside a bounded sentence/window around - each citation. +### Response shape -The response contains: +| Field | Type | Meaning | +|-----------------------|--------|--------------------------------------| +| `valid` | `bool` | `true` when no violations were found | +| `citations_found` | `int` | Number of recognized citations | +| `violations` | `list` | Deterministic overclaim records | +| `warnings` | `list` | Missing or unknown citations | +| `validated_citations` | `list` | Per-citation validity summary | -- `valid`: `true` when no violations were found. -- `citations_found`: number of recognized citations. -- `violations`: deterministic overclaim records. -- `warnings`: missing or unknown citations. -- `validated_citations`: per-citation validity summary. +Warnings do not make the response invalid. Only violations set +`valid=false`. -Warnings do not make the response invalid. +--- ## Patterns -| Pattern | Meaning | -|---------|---------| -| `P-1` | Security Surfaces were described as vulnerabilities or exploitability. | -| `P-2` | A report-only metric family was described as a CI failure or blocking gate. | -| `P-3` | A finding with `novelty="known"` was described as new or introduced. | -| `P-4` | Dead-code certainty was claimed despite runtime reachability evidence. | -| `P-5` | A finding was claimed fixed/resolved before a post-patch run was available. | +Five deterministic overclaim patterns, each checking keyword proximity +around cited finding IDs or metric family names: + +### P-1: Security surface overclaim + +Security Surfaces described as vulnerabilities or exploitability. +Security Surfaces are a **report-only boundary inventory** — they show +where security-relevant capabilities exist, not whether they are +exploitable. + +### P-2: Gate overclaim + +A report-only metric family described as a CI failure or blocking gate. +Not all metric families participate in gating; report-only families are +informational. + +### P-3: Regression overclaim + +A finding with `novelty="known"` described as new or introduced. Known +findings are accepted baseline debt, not new regressions. + +### P-4: Dead code certainty overclaim + +Dead-code certainty claimed despite runtime reachability evidence. When +framework reachability patterns match a dead-code candidate, certainty +claims are invalid. + +### P-5: Fix overclaim + +A finding claimed as fixed or resolved before a post-patch run is +available. Without a comparison run, fix claims cannot be verified. + +--- ## Non-goals -Claim guard is not: +!!! warning "What claim guard is not" +- Not a vulnerability scanner +- Not a CI gate +- Not an LLM fact checker +- Not proof that uncited text is correct +- Not a replacement for `check_patch_contract` -- a vulnerability scanner -- a CI gate -- an LLM fact checker -- a proof that uncited text is correct -- a replacement for `check_patch_contract` +--- ## Locked by tests - `tests/test_mcp_service.py` - `tests/test_mcp_server.py` - `tests/test_mcp_tool_schema_snapshot.py` + +--- + +## See also + +- [20-mcp-interface.md](20-mcp-interface.md) — full MCP tool and resource contract +- [MCP deep dive](../mcp.md) — architecture, workflows, prompt patterns diff --git a/docs/mcp.md b/docs/mcp.md index 5c038bc3..df44b18d 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -1,225 +1,360 @@ -# MCP Usage Guide +# MCP for AI Agents CodeClone MCP is a **read-only, baseline-aware** analysis server for AI agents -and MCP-capable clients. It exposes the deterministic pipeline without mutating -source files, baselines, cache, or report artifacts. Session-local review/run -state is mutable in memory only. +and MCP-capable clients. It exposes the same deterministic pipeline as the CLI +without mutating source files, baselines, cache, or report artifacts. Works with any MCP-capable client regardless of backend model. -!!! note "Read-only by contract" - MCP is an integration surface over the same canonical pipeline and report - contracts as the CLI. It does not create a second analysis engine or write - back to repository state. +--- + +## Architecture + +### Where MCP fits + +MCP is an **integration surface**, not a second analyzer. It composes over the +same canonical pipeline and report contracts as the CLI and HTML report. + +```mermaid +graph LR + A[Source Code] --> B[Core Pipeline] + B --> C[Canonical Report] + C --> D[CLI] + C --> E[HTML] + C --> F[MCP] + C --> G[SARIF] + + style F stroke:#6366f1,stroke-width:2px +``` + +### Session architecture + +Every `codeclone-mcp` process owns an isolated session. Session state lives +entirely in process memory and does not survive restart. + +```mermaid +graph TD + subgraph MCPSession["MCPSession (in-memory)"] + RS[Run Store
bounded history] + AI[Active Intents
change control] + RM[Review Markers
session-local] + BRC[Blast Radius Cache] + GR[Gate Results] + end + + subgraph Disk["Disk (ephemeral)"] + WIR[".cache/codeclone/intents/
Workspace Intent Registry"] + end + + MCPSession -->|"writes coordination records"| Disk + MCPSession -->|"never writes"| BL[Baselines] + MCPSession -->|"never writes"| CA[Cache] + MCPSession -->|"never writes"| RP[Reports] + MCPSession -->|"never writes"| SC[Source Files] + + style BL fill:#fee2e2 + style CA fill:#fee2e2 + style RP fill:#fee2e2 + style SC fill:#fee2e2 +``` + +### Mixin chain + +The session is composed from focused mixins, each owning one capability +layer. The chain is append-only: new phases extend the top without modifying +existing mixins. + +```mermaid +graph BT + F["_MCPSessionFindingMixin
finding queries, id maps"] + CP["_MCPSessionChangedProjectionMixin
changed-files projection"] + AA["_MCPSessionAnalysisArgsMixin
analysis parameter resolution"] + RSB["_MCPSessionRunSummaryBuilderMixin
summary construction"] + SM["_MCPSessionSummaryMixin
triage, hotspots, PR summary"] + RPM["_MCPSessionReportMixin
report sections, metrics detail"] + STM["_MCPSessionStateMixin
gates, review markers, runs"] + BR["_MCPSessionBlastRadiusMixin
blast radius computation"] + IM["_MCPSessionIntentMixin
change intent lifecycle"] + PC["_MCPSessionPatchContractMixin
budget and verify"] + RR["_MCPSessionReviewReceiptMixin
audit receipt composition"] + CG["_MCPSessionClaimGuardMixin
citation-based validation"] + S["MCPSession"] + + F --> CP --> AA --> RSB --> SM --> RPM --> STM --> BR --> IM --> PC --> RR --> CG --> S + + style S stroke:#6366f1,stroke-width:2px + style CG fill:#f0fdf4 + style RR fill:#f0fdf4 + style PC fill:#f0fdf4 + style IM fill:#f0fdf4 + style BR fill:#f0fdf4 +``` + +--- ## Install === "Standalone tool" - ```bash title="Install the MCP launcher as a standalone tool" + ```bash uv tool install "codeclone[mcp]" ``` -=== "Existing environment" +=== "Project environment" - ```bash title="Install the MCP extra into the current environment" + ```bash uv pip install "codeclone[mcp]" ``` -## Quick client setup +--- -If `codeclone-mcp` is already on your `PATH`, both Claude Code and Codex can -register it directly as a local stdio server. +## Client setup -### Claude Code +All clients use the same server. Only the registration format differs. -```bash -claude mcp add codeclone -- codeclone-mcp --transport stdio -claude mcp list -``` +=== "Claude Code" -Use `--scope project` if you want Claude Code to store the shared config in -`.mcp.json` for the repository instead of your local user state. + ```bash + claude mcp add codeclone -- codeclone-mcp --transport stdio + ``` -### Codex + Use `--scope project` to store config in `.mcp.json` for the repository. -```bash -codex mcp add codeclone -- codeclone-mcp --transport stdio -codex mcp list -``` +=== "Codex" + + ```bash + codex mcp add codeclone -- codeclone-mcp --transport stdio + ``` -If you installed CodeClone into a project virtual environment rather than a -global tool path, use the full launcher path instead of bare `codeclone-mcp`. + A native plugin also ships in `plugins/codeclone/`. + See [Codex plugin guide](codex-plugin.md). -### Codex plugin +=== "Cursor" -A native Codex plugin ships in `plugins/codeclone/` with repo-local -discovery, a `.mcp.json` definition, and two skills (review + hotspots). -See [Codex plugin guide](codex-plugin.md). + Add to `.cursor/mcp.json`: -### Claude Desktop bundle + ```json + { + "mcpServers": { + "codeclone": { + "command": "codeclone-mcp", + "args": ["--transport", "stdio"] + } + } + } + ``` -A local `.mcpb` bundle ships in `extensions/claude-desktop-codeclone/` with -pre-loaded instructions and auto-discovery of the launcher. -See [Claude Desktop bundle guide](claude-desktop-bundle.md). +=== "Claude Desktop" -## Start the server + A local `.mcpb` bundle ships in `extensions/claude-desktop-codeclone/`. + See [Claude Desktop bundle guide](claude-desktop-bundle.md). -**Local agents** (Claude Code, Codex, Copilot Chat, Gemini CLI): +=== "JSON config (generic)" -```bash title="Start a local stdio MCP server" -codeclone-mcp --transport stdio -``` + ```json + { + "mcpServers": { + "codeclone": { + "command": "codeclone-mcp", + "args": ["--transport", "stdio"] + } + } + } + ``` + + Works with Copilot Chat, Gemini CLI, and other MCP-capable clients. + +If `codeclone-mcp` is not on `PATH`, use the full launcher path. -MCP analysis tools require an absolute repository root. Relative roots such as -`.` are rejected, because the server process working directory may differ from -the client workspace. The same absolute-path rule applies to `check_*` tools -when a `root` filter is provided. +--- -!!! note "Absolute roots are required" - MCP tool requests must pass an absolute repository root. This keeps runs - deterministic across clients whose working directories may differ from the - visible workspace path. +## Server -**Remote / HTTP-only clients:** +### Transports -```bash title="Start the optional HTTP transport locally" +| Transport | Default | Use case | +|-------------------|---------|---------------------------------| +| `stdio` | Yes | Local agents, IDEs, CLI clients | +| `streamable-http` | No | Remote clients, Responses API | + +```bash title="Local (default)" +codeclone-mcp --transport stdio +``` + +```bash title="HTTP (loopback only)" codeclone-mcp --transport streamable-http --host 127.0.0.1 --port 8000 ``` !!! warning "Remote exposure is opt-in" - Non-loopback hosts require `--allow-remote`, and the built-in HTTP server - does not provide authentication. Use it only on trusted networks or behind - your own authenticated reverse proxy. +Non-loopback hosts require `--allow-remote`. The built-in HTTP server +has no authentication. Use it only on trusted networks or behind an +authenticated reverse proxy. + +### Run retention + +Run history is bounded: default `4`, max `10` (`--history-limit`). +Runs are in-memory only and do not survive process restart. -Non-loopback hosts require `--allow-remote` (no built-in auth). -When `--allow-remote` is enabled, any reachable network client can trigger -CPU-intensive analysis, read results, and probe repository-relative paths -through MCP request parameters. Use it only on trusted networks. For anything -production-adjacent, put the server behind a firewall or a reverse proxy with -authentication. +### Absolute roots -Run retention is bounded: default `4`, max `10` (`--history-limit`). -If a tool request omits `processes`, MCP defers process-count policy to the -core CodeClone runtime. +All analysis tools require an **absolute** repository root. Relative roots +like `.` are rejected because the server working directory may differ from +the client workspace. -Current CodeClone `2.1` MCP surface: `26` tools, `7` fixed resources, and `3` -run-scoped URI templates. +--- ## Tool surface -| Tool | Purpose | -|--------------------------|-----------------------------------------------------------------------------------------------------------------------------------| -| `analyze_repository` | Full analysis → compact summary; use `get_run_summary` or `get_production_triage` as the first pass | -| `analyze_changed_paths` | Diff-aware analysis via `changed_paths` or `git_diff_ref`; compact changed-files snapshot | -| `get_run_summary` | Cheapest run snapshot: health, findings, baseline, inventory, active thresholds | -| `get_production_triage` | Production-first view: health, hotspots, suggestions, active thresholds; best first pass for noisy repos | -| `get_blast_radius` | Pre-change risk boundary: direct dependents, clone cohorts, coverage gaps, actionable do-not-touch paths, and review-only context | -| `check_patch_contract` | Pre-edit regression budget or post-edit before/after verification over stored runs, gates, intent scope, and baseline-abuse signals | -| `create_review_receipt` | Deterministic markdown or JSON audit artifact: provenance, scope, blast radius, reviewed findings, patch status, human decisions, and claims-not-made | -| `validate_review_claims` | Citation-based validator for review text against stored run semantics; not NLP or a CI gate | -| `help` | Semantic guide for workflow, change control, analysis profile, baseline, suppressions, review state, changed-scope | -| `compare_runs` | Run-to-run delta: regressions, improvements, health change | -| `list_findings` | Filtered, paginated findings; use after hotspots or `check_*` | -| `get_finding` | Single finding detail by id; defaults to `normal` detail level | -| `get_remediation` | Remediation payload for one finding | -| `list_hotspots` | Priority-ranked hotspot views; preferred before broad listing | -| `get_report_section` | Read report sections; `metrics_detail` is paginated with family/path filters | -| `evaluate_gates` | Evaluate CI gating decisions | -| `check_clones` | Clone findings only; narrower than `list_findings` | -| `check_complexity` | Complexity hotspots only | -| `check_coupling` | Coupling hotspots only | -| `check_cohesion` | Cohesion hotspots only | -| `check_dead_code` | Dead-code findings only | -| `generate_pr_summary` | PR-friendly markdown or JSON summary | -| `mark_finding_reviewed` | Session-local review marker (in-memory) | -| `list_reviewed_findings` | List reviewed findings for a run | -| `manage_change_intent` | Declare/check/clear edit intent; list/gc/reset ephemeral workspace intent records for multi-agent coordination | -| `clear_session_runs` | Reset in-memory runs and session state | - -> `check_*` tools query stored runs only. Call `analyze_repository` or -> `analyze_changed_paths` first. - -**Payload conventions:** - -- `check_*` responses include only the relevant health dimension. -- `get_blast_radius` separates edit prohibitions from context: - `do_not_touch` contains actionable negative context such as baselines, - generated CodeClone state, and explicit forbidden paths. Report-only signals - are returned as `review_context`. Long context sections include `total`, - `shown`, and `truncated` summaries. -- `check_patch_contract` does not run analysis. `mode="budget"` reads the - selected stored run and optional intent; `mode="verify"` compares explicit - before/after stored runs and returns `unverified` when either side is missing. - Disabled numeric thresholds are `null`; boolean policy gates use `forbid_*` - names. -- `manage_change_intent(action="list_workspace", root=...)` reads - `.cache/codeclone/intents/` to show active intents from other agents. The - registry is advisory coordination state, not analysis truth. -- `create_review_receipt` does not run analysis or mutate state. It composes - stored report provenance, optional intent/blast-radius state, reviewed - findings, structural delta, patch-contract status, and explicit - claims-not-made into markdown or JSON. -- `validate_review_claims` does not run analysis or mutate state. It detects - deterministic overclaims in cited review text: Security Surfaces called - vulnerabilities, report-only families called gates, known findings called new - regressions, reachable dead-code candidates called definitely dead, or fixes - claimed before a post-patch run exists. -- Empty design `check_*` responses may also include a compact - `threshold_context` (`metric`, `threshold`, `measured_units`, - `highest_below_threshold`) to show whether the run is genuinely quiet or - simply below the active threshold. -- Finding responses use short MCP IDs and relative paths by default; - `detail_level=full` restores the compatibility payload with URIs. -- Summary and triage projections keep interpretation compact: `health_scope` - explains what the health score covers, `focus` explains the active view, and - `new_by_source_kind` attributes new findings without widening the payload. -- When baseline comparison is untrusted, summary and triage also expose - `baseline.compared_without_valid_baseline` plus baseline/runtime python tags. -- Summary `diff` also carries compact adoption/API deltas: - `typing_param_permille_delta`, `typing_return_permille_delta`, - `docstring_permille_delta`, `api_breaking_changes`, and `new_api_symbols`. -- When `analyze_repository` or `analyze_changed_paths` receives - `coverage_xml`, summaries include compact `coverage_join` facts. The XML path - may be absolute or relative to the analysis root, and the join remains a - current-run signal rather than baseline truth. -- Run summaries may also include compact `security_surfaces` facts: - item count, category count, production/test split, and `report_only=true`. - This layer inventories exact security-relevant capability surfaces and trust - boundaries; it does not claim vulnerabilities or exploitability. -- When `respect_pyproject=true`, MCP also applies `golden_fixture_paths`. - Fully matching golden-fixture clone groups are excluded from active clone and - gate projections but remain visible in the canonical report under the - optional `findings.groups.clones.suppressed.*` bucket. -- Invalid Cobertura XML does not fail `analyze_*`; summaries expose - `coverage_join.status="invalid"` plus `invalid_reason`. Coverage hotspot gate - preview still requires a valid join. -- Run IDs are 8-char hex handles; finding IDs are short prefixed forms. - Both accept the full canonical form as input. -- `metrics_detail(family="overloaded_modules")` exposes the report-only - module-hotspot layer without turning it into findings or gate data. -- `metrics_detail` also accepts `coverage_adoption`, `coverage_join`, - `security_surfaces`, and - `api_surface`. -- `help(topic=...)` is static: meaning, anti-patterns, next step, doc links. -- Start with repo defaults or `pyproject`-resolved thresholds, then lower them - only for an explicit higher-sensitivity exploratory pass. +Current surface: **26 tools**, **7 fixed resources**, **3 URI templates**. + +The surface is organized by workflow phase. Start at the top, drill down +as needed. + +### Phase 1: Analyze + +| Tool | Purpose | +|-------------------------|---------------------------------------------------| +| `analyze_repository` | Full deterministic analysis of one repo root | +| `analyze_changed_paths` | Diff-aware analysis with changed-files projection | + +Both register the result as an in-memory run. All other tools read from +stored runs. + +### Phase 2: Triage + +| Tool | Purpose | +|-------------------------|------------------------------------------------------------| +| `get_run_summary` | Cheapest snapshot: health, findings, baseline status | +| `get_production_triage` | Production-first view: hotspots, suggestions, thresholds | +| `list_hotspots` | Priority-ranked hotspot views by kind | +| `compare_runs` | Run-to-run delta: regressions, improvements, health change | + +!!! tip "Start here" +After analysis, call `get_run_summary` or `get_production_triage` first. +Prefer `list_hotspots` or `check_*` before broad `list_findings` calls. + +### Phase 3: Drill down + +| Tool | Purpose | +|-----------------------|-------------------------------------------------------------| +| `list_findings` | Filtered, paginated findings with novelty and scope filters | +| `get_finding` | Single finding detail by short or canonical ID | +| `get_remediation` | Remediation and explainability for one finding | +| `get_report_section` | Read report sections; `metrics_detail` is paginated | +| `evaluate_gates` | Preview CI gating decisions without mutating state | +| `generate_pr_summary` | PR-friendly markdown or JSON summary | + +### Phase 4: Focused checks + +Narrow queries over a single quality dimension. Cheaper than `list_findings` +when you know which dimension to inspect. + +| Tool | Dimension | +|--------------------|--------------------------------| +| `check_clones` | Clone groups | +| `check_complexity` | Cyclomatic complexity hotspots | +| `check_coupling` | Afferent/efferent coupling | +| `check_cohesion` | Module cohesion | +| `check_dead_code` | Dead code candidates | + +### Phase 5: Change control + +The structural change controller workflow. These tools compose over stored +runs and session state without running analysis or mutating the repository. + +```mermaid +sequenceDiagram + participant A as Agent + participant M as MCP Server + participant D as Disk Registry + + A->>M: list_workspace(root) + M->>D: read .cache/codeclone/intents/ + D-->>M: active intents + M-->>A: workspace state + + A->>M: analyze_repository(root) + M-->>A: run registered + + A->>M: declare(scope, intent) + M->>D: write intent record + M-->>A: intent_id, blast_radius, concurrent_intents + + A->>M: get_blast_radius(files) + M-->>A: do_not_touch, review_context + + A->>M: check_patch_contract(mode=budget) + M-->>A: regression budget, headroom + + Note over A: Edit files within scope + + A->>M: analyze_repository(root) + M-->>A: after-run registered + + A->>M: check(changed_files) + M-->>A: clean / expanded / violated + + A->>M: check_patch_contract(mode=verify) + M-->>A: accepted / violated + + A->>M: validate_review_claims(text) + M-->>A: valid / violations + + A->>M: create_review_receipt + M-->>A: audit artifact + + A->>M: clear + M->>D: remove intent record +``` + +| Tool | Purpose | +|--------------------------|---------------------------------------------------------------------------------------------| +| `manage_change_intent` | Intent lifecycle: declare, get, check, clear, list_workspace, gc_workspace, reset_workspace | +| `get_blast_radius` | Pre-change risk boundary: dependents, clone cohorts, do-not-touch, review context | +| `check_patch_contract` | Budget query (`mode=budget`) or post-edit verification (`mode=verify`) | +| `create_review_receipt` | Deterministic audit artifact: provenance, scope, reviewed findings, patch status | +| `validate_review_claims` | Citation-based overclaim detection against stored run semantics | + +??? info "Blast radius: do_not_touch vs review_context" +`do_not_touch` contains actionable edit prohibitions: baselines, generated +state, forbidden paths. `review_context` contains report-only signals: +security boundary inventory, overloaded-module candidates, known baseline +debt. Review context is information, not an edit ban. + +??? info "Patch contract modes" +**Budget** reads one stored run and optional intent. Shows regression +headroom per quality dimension before editing. **Verify** compares explicit +before/after stored runs, previews gates, validates scope, and reports +baseline-abuse signals. Missing runs return `status=unverified`. + +### Phase 6: Session management + +| Tool | Purpose | +|--------------------------|-----------------------------------------| +| `mark_finding_reviewed` | Session-local review marker (in-memory) | +| `list_reviewed_findings` | List reviewed findings for a run | +| `clear_session_runs` | Reset in-memory runs and session state | +| `help` | Bounded workflow and contract guidance | + +--- ## Resource surface -Fixed resources: +Resources are read-only views over stored runs. They do not trigger analysis. + +### Fixed resources -| Resource | Content | -|----------------------------------|--------------------------------------------| -| `codeclone://latest/summary` | Latest run summary | -| `codeclone://latest/triage` | Latest production-first triage | -| `codeclone://latest/report.json` | Full canonical report | -| `codeclone://latest/health` | Health score and dimensions | -| `codeclone://latest/gates` | Last gate evaluation result | -| `codeclone://latest/changed` | Changed-files projection (diff-aware runs) | -| `codeclone://schema` | Canonical report shape descriptor | +| URI | Content | +|----------------------------------|-----------------------------------| +| `codeclone://latest/summary` | Latest run summary | +| `codeclone://latest/triage` | Latest production-first triage | +| `codeclone://latest/report.json` | Full canonical report | +| `codeclone://latest/health` | Health score and dimensions | +| `codeclone://latest/gates` | Last gate evaluation result | +| `codeclone://latest/changed` | Changed-files projection | +| `codeclone://schema` | Canonical report shape descriptor | -Run-scoped resource templates: +### Run-scoped templates | URI template | Content | |---------------------------------------------------|---------------------------------| @@ -227,181 +362,163 @@ Run-scoped resource templates: | `codeclone://runs/{run_id}/report.json` | Report for a specific run | | `codeclone://runs/{run_id}/findings/{finding_id}` | One finding from a specific run | -Resources and URI templates are read-only views over stored runs; they do not -trigger analysis. - -`codeclone://latest/*` always resolves to the most recent run registered in the -current MCP server session. A later `analyze_repository` or -`analyze_changed_paths` call moves that pointer. -`mark_finding_reviewed` and most `manage_change_intent` state are in-memory. -Workspace intent records are the exception: they are ephemeral coordination -files under `.cache/codeclone/intents/`. MCP still never touches source files, -baselines, report artifacts, or analysis cache data. - -## Recommended workflows - -### Budget-aware first pass +`codeclone://latest/*` always resolves to the most recent run. A later +`analyze_repository` or `analyze_changed_paths` call moves the pointer. -``` -analyze_repository → get_run_summary or get_production_triage -→ list_hotspots or check_* → get_finding → get_remediation -``` - -### Semantic uncertainty recovery +--- -``` -help(topic="workflow" | "change_control" | "analysis_profile" | "baseline" | "coverage" | "suppressions" | "latest_runs" | "review_state" | "changed_scope") -``` +## Workflows -### Change-control edit workflow +### Health check ``` -manage_change_intent(action="list_workspace", root="/abs/repo") -→ analyze_repository -→ manage_change_intent(action="declare", scope={...}) -→ get_blast_radius -→ check_patch_contract(mode="budget") -→ edit within scope -→ analyze_repository -→ manage_change_intent(action="check", changed_files=[...]) -→ check_patch_contract(mode="verify", before_run_id=..., after_run_id=...) -→ validate_review_claims(text="...") -→ create_review_receipt -→ manage_change_intent(action="clear") +analyze_repository + -> get_run_summary or get_production_triage + -> list_hotspots or check_* + -> get_finding -> get_remediation ``` -### Full repository review +### PR review ``` -analyze_repository → get_production_triage -→ list_hotspots(kind="highest_priority") → get_finding → evaluate_gates +analyze_changed_paths(changed_paths=[...] or git_diff_ref="HEAD~1") + -> list_findings(sort_by="priority") + -> get_finding -> get_remediation + -> generate_pr_summary ``` -### Conservative first pass, then deeper review +### Change control ``` -analyze_repository(api_surface=true) # when you need API inventory/diff -→ help(topic="analysis_profile") when you need finer-grained local review -→ analyze_repository(min_loc=..., min_stmt=..., ...) as an explicit higher-sensitivity pass -→ compare_runs +manage_change_intent(action="list_workspace") + -> analyze_repository + -> manage_change_intent(action="declare", scope={...}) + -> get_blast_radius(files=[...]) + -> check_patch_contract(mode="budget") + -> [edit within scope] + -> analyze_repository + -> manage_change_intent(action="check", changed_files=[...]) + -> check_patch_contract(mode="verify", before_run_id=..., after_run_id=...) + -> validate_review_claims(text="...") + -> create_review_receipt + -> manage_change_intent(action="clear") ``` -### Coverage hotspot review +### Coverage review ``` analyze_repository(coverage_xml="coverage.xml") -→ metrics_detail(family="coverage_join") -→ evaluate_gates(fail_on_untested_hotspots=true, coverage_min=50) - -Coverage Join in MCP separates measured `coverage_hotspots` from -`scope_gap_hotspots` (functions outside the supplied `coverage.xml` scope). + -> get_report_section(section="metrics_detail", family="coverage_join") + -> evaluate_gates(fail_on_untested_hotspots=true, coverage_min=50) ``` -### Changed-files review (PR / patch) +### Session review loop ``` -analyze_changed_paths → get_report_section(section="changed") -→ list_findings(changed_paths=..., sort_by="priority") → get_remediation → generate_pr_summary +list_findings -> get_finding -> mark_finding_reviewed + -> list_findings(exclude_reviewed=true) -> ... + -> clear_session_runs ``` -### Session-based review loop - -``` -list_findings → get_finding → mark_finding_reviewed -→ list_findings(exclude_reviewed=true) → … → clear_session_runs -``` +--- ## Prompt patterns Good prompts include **scope**, **goal**, and **constraint**: -```text -# Health check +```text title="Health check" Use codeclone MCP to analyze this repository. Give me a concise structural health summary and the top findings to look at first. +``` -# Changed-files review +```text title="Changed-files review" Use codeclone MCP in changed-files mode for my latest edits. Focus only on findings that touch changed files and rank them by priority. +``` -# Gate preview +```text title="Gate preview" Run codeclone through MCP and preview gating with fail_on_new. Explain the exact reasons. Do not change any files. +``` -# AI-generated code check +```text title="AI-generated code check" I added code with an AI agent. Use codeclone MCP to check for new structural drift. Separate accepted baseline debt from new regressions. ``` -**Tips:** - +!!! tip "Best practices" - Use `analyze_changed_paths` for PRs, not full analysis. - Prefer `get_run_summary` or `get_production_triage` as the first pass. - Prefer `list_hotspots` or narrow `check_*` tools before broad `list_findings`. - Use `get_finding` / `get_remediation` for one finding instead of raising - `detail_level` on larger lists. -- Keep `git_diff_ref` to a safe single revision expression; option-like, - whitespace-containing, and punctuated shell-style inputs are rejected. +`detail_level` on larger lists. - Pass an absolute `root` — MCP rejects relative roots like `.`. -- Use `coverage_xml` only with `analysis_mode="full"`; clones-only analysis does - not collect the function-span facts needed for coverage join. -- Use `"production-only"` / `source_kind` filters to cut test/fixture noise. +- Use `coverage_xml` only with `analysis_mode="full"`. +- Use `source_kind="production-only"` to cut test/fixture noise. - Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions. -## Client configuration +--- -All clients use the same server — only the registration format differs. +## Payload conventions -### JSON clients (Claude Code, Copilot Chat, Gemini CLI) +Short reference for response structure patterns across the tool surface. -```json -{ - "mcpServers": { - "codeclone": { - "command": "codeclone-mcp", - "args": [ - "--transport", - "stdio" - ] - } - } -} -``` +**IDs** — Run IDs are 8-char hex handles. Finding IDs are short prefixed +forms. Both accept the full canonical form as input. -### Codex / OpenAI +**Detail levels** — `summary` (default for lists), `normal` (default for +single finding), `full` (compatibility payload with URIs). -```toml -[mcp_servers.codeclone] -enabled = true -command = "codeclone-mcp" -args = ["--transport", "stdio"] -``` +**Pagination** — `list_findings`, `list_hotspots`, and +`get_report_section(section="metrics_detail")` support `offset` and `limit`. -For the Responses API or remote-only clients, use `streamable-http`. +**Changed-scope filters** — `list_findings`, `list_hotspots`, and +`generate_pr_summary` accept `changed_paths` or `git_diff_ref` for PR +projection. -If `codeclone-mcp` is not on `PATH`, use an absolute path to the launcher. +**Threshold context** — Empty `check_*` responses include +`threshold_context` showing whether the run is genuinely quiet or simply +below the active threshold. + +**Budget nulls** — `check_patch_contract` uses `null` for disabled numeric +thresholds. Boolean policy gates use `forbid_*` names. + +**Long context** — `do_not_touch`, `review_context`, and similar sections +include `total`, `shown`, and `truncated` summaries. + +--- ## Security -- Read-only with respect to source, baselines, reports, and analysis cache data. -- Run history and review markers are in-memory only. Change intents are - in-memory for session truth, with optional ephemeral coordination records - under `.cache/codeclone/intents/`. -- Repository access is limited to what the server process can read locally. -- `streamable-http` binds to loopback by default; `--allow-remote` is explicit opt-in. +| Property | Guarantee | +|-------------------|------------------------------------------------------------| +| Read-only | Never mutates source, baseline, cache, or report artifacts | +| Default transport | Local `stdio` | +| Remote exposure | Explicit `--allow-remote` required for non-loopback | +| Lazy loading | Base `codeclone` install does not require MCP packages | +| Repository access | Limited to what the server process can read locally | +| Session state | In-memory only; does not survive restart | +| Workspace intents | Ephemeral coordination under `.cache/codeclone/intents/` | + +--- ## Troubleshooting -| Problem | Fix | -|-----------------------------------------------------------|--------------------------------------------------------------------------------| -| `CodeClone MCP support requires the optional 'mcp' extra` | `uv tool install "codeclone[mcp]"` or `uv pip install "codeclone[mcp]"` | -| Client cannot find `codeclone-mcp` | `uv tool install "codeclone[mcp]"` or use an absolute launcher path | -| Client only accepts remote MCP | Use `streamable-http` transport | -| Agent reads stale results | Call `analyze_repository` again; `latest` always points to the most recent run | -| `changed_paths` rejected | Pass a `list[str]` of repo-relative paths, not a comma-separated string | +| Problem | Fix | +|-----------------------------------------------------------|---------------------------------------------------------| +| `CodeClone MCP support requires the optional 'mcp' extra` | `uv tool install "codeclone[mcp]"` | +| Client cannot find `codeclone-mcp` | `uv tool install "codeclone[mcp]"` or use absolute path | +| Client only accepts remote MCP | Use `streamable-http` transport | +| Agent reads stale results | Call `analyze_repository` again | +| `changed_paths` rejected | Pass a `list[str]` of repo-relative paths | +| Relative root rejected | Use absolute path, not `.` | + +--- ## See also -- [book/20-mcp-interface.md](book/20-mcp-interface.md) — formal interface contract -- [book/08-report.md](book/08-report.md) — canonical report contract -- [book/09-cli.md](book/09-cli.md) — CLI reference +- [MCP Interface Contract](book/20-mcp-interface.md) — formal tool and resource contract +- [Structural Change Controller](book/24-structural-change-controller.md) — change control workflow +- [Claim Guard](book/28-claim-guard.md) — citation-based review validation +- [CLI Reference](book/09-cli.md) — command-line interface +- [Report Contract](book/08-report.md) — canonical report schema diff --git a/mkdocs.yml b/mkdocs.yml index aea988c6..16f72981 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -76,7 +76,11 @@ markdown_extensions: - pymdownx.inlinehilite - - pymdownx.superfences + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format - pymdownx.tabbed: alternate_style: true From 82d432b29dd4cff9e0a05939aba5899b2562cc7b Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 24 May 2026 22:46:36 +0500 Subject: [PATCH 017/318] fix(cli): remove unreachable return and fix nargs/version tracking Remove dead return after _contract_error_result() in blast_radius.py which already returns ExitCode.CONTRACT_ERROR. Handle nargs="+" in test_options_spec_coverage._cli_sample() so argparse list result matches expected value. Sync GitHub Action default package version to 2.1.0a1 in both _action_impl.py and action.yml. --- .github/actions/codeclone/_action_impl.py | 2 +- .github/actions/codeclone/action.yml | 2 +- codeclone/surfaces/cli/blast_radius.py | 1 - tests/test_options_spec_coverage.py | 2 ++ 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/actions/codeclone/_action_impl.py b/.github/actions/codeclone/_action_impl.py index bfb2d364..692f8a28 100644 --- a/.github/actions/codeclone/_action_impl.py +++ b/.github/actions/codeclone/_action_impl.py @@ -25,7 +25,7 @@ from typing import Literal COMMENT_MARKER = "" -DEFAULT_CODECLONE_PACKAGE_VERSION = "2.0.2" +DEFAULT_CODECLONE_PACKAGE_VERSION = "2.1.0a1" @dataclass(frozen=True, slots=True) diff --git a/.github/actions/codeclone/action.yml b/.github/actions/codeclone/action.yml index eff91fde..24aa8478 100644 --- a/.github/actions/codeclone/action.yml +++ b/.github/actions/codeclone/action.yml @@ -18,7 +18,7 @@ inputs: package-version: description: "CodeClone version from PyPI for remote installs (ignored when the action runs from the checked-out CodeClone repo)" required: false - default: "2.0.2" + default: "2.1.0a1" path: description: "Project root" diff --git a/codeclone/surfaces/cli/blast_radius.py b/codeclone/surfaces/cli/blast_radius.py index 6a0ce17e..b12a4f40 100644 --- a/codeclone/surfaces/cli/blast_radius.py +++ b/codeclone/surfaces/cli/blast_radius.py @@ -182,7 +182,6 @@ def render_blast_radius( console=console, message="Blast radius requires a canonical report document.", ) - return int(ExitCode.CONTRACT_ERROR) origin_paths = _validated_origin_paths( report_document=report_document, diff --git a/tests/test_options_spec_coverage.py b/tests/test_options_spec_coverage.py index 850d311b..392d317f 100644 --- a/tests/test_options_spec_coverage.py +++ b/tests/test_options_spec_coverage.py @@ -28,6 +28,8 @@ def _cli_sample(option: OptionSpec) -> tuple[tuple[str, ...], object]: return ((option.flags[0],), False) if option.value_type is int: return ((option.flags[0], "7"), 7) + if option.nargs == "+": + return ((option.flags[0], "sample-value"), ["sample-value"]) return ((option.flags[0], "sample-value"), "sample-value") From 55231a52c8595a1e329ce5fe4466d43bcce724c6 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 25 May 2026 08:29:17 +0500 Subject: [PATCH 018/318] docs(mcp): tighten change-control workflow guidance --- CLAUDE.md | 32 ++++++++++++--- docs/book/24-structural-change-controller.md | 39 +++++++++++-------- docs/mcp.md | 13 ++++--- .../skills/codeclone-change-control/SKILL.md | 31 ++++++++++----- 4 files changed, 77 insertions(+), 38 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 6f71594c..767da93c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,6 +16,11 @@ When permitted to edit code, follow the change control workflow below. ## Change control workflow +This workflow is mandatory protocol, not advisory text. Do not skip, replace, +reorder, or approximate these steps. If a required MCP call fails or is +unavailable, stop and report the blocker instead of continuing as a normal +edit. + Before editing any repository files: 1. `manage_change_intent(action="list_workspace", root="")` @@ -27,18 +32,35 @@ Before editing any repository files: 5. `check_patch_contract(mode="budget")` 6. Edit within declared scope only 7. `analyze_repository(root="")` — re-run after edits -8. `manage_change_intent(action="check", ...)` then - `check_patch_contract(mode="verify")` -9. `manage_change_intent(action="clear")` +8. `manage_change_intent(action="check", intent_id=..., changed_files=[...])` + — pass the original `intent_id` explicitly and provide either + `changed_files` or `diff_ref` (the intent is bound to the before-run; + without `intent_id`, `_resolve_intent` looks up the latest run and + misses it) +9. `check_patch_contract(mode="verify", before_run_id=..., + after_run_id=..., intent_id=...)` — verify compares the intent's + `report_digest` against the before-run; redeclare on the after-run + would cause an `expired` mismatch +10. `manage_change_intent(action="clear")` ### Rules -- Never edit files without declaring intent first. -- Never silently expand scope — redeclare with expanded scope. +- MUST NOT edit files without declaring intent first. +- MUST NOT silently expand scope — redeclare with expanded scope before + editing the extra file. +- MUST NOT redeclare on the after-run. Re-declare only to expand scope before + editing or to start a separate change. +- MUST NOT call the `check` action without exactly one changed-scope source: + `changed_files` or `diff_ref`. +- After re-analyze, pass `intent_id` explicitly to + `check`/`get`/`verify` — otherwise `_resolve_intent` resolves by + latest run_id and misses intents bound to the before-run. - `do_not_touch` is a hard boundary. `review_context` is context, not a ban. - Do not update baselines, cache, or generated reports. - If `list_workspace` shows overlapping foreign intent, stop and coordinate. - CodeClone findings are the source of truth — do not reinterpret. +- If `check_patch_contract(mode="verify")` returns `unverified` or `violated`, + do not claim the patch is verified. - Live foreign intent means **stop**, not kill. Never suggest killing a process without explicit user confirmation that the PID is abandoned. diff --git a/docs/book/24-structural-change-controller.md b/docs/book/24-structural-change-controller.md index 95e14a61..5d577c1c 100644 --- a/docs/book/24-structural-change-controller.md +++ b/docs/book/24-structural-change-controller.md @@ -11,15 +11,15 @@ The v2.1 alpha currently includes intent, blast-radius, patch-contract checks, review receipts, workspace intent visibility, claim guard, and CLI controller queries: -| Phase | Status | Surface | -|-------|--------|---------| -| Intent declaration | Live in `2.1.0a1` | MCP `manage_change_intent` | -| Blast radius | Live in `2.1.0a1` | MCP `get_blast_radius`, CLI `--blast-radius` | -| Patch contract | Live in `2.1.0a1` | MCP `check_patch_contract`, CLI `--patch-verify` | -| Review receipt | Live in `2.1.0a1` | MCP `create_review_receipt` | -| Workspace intent registry | Live in `2.1.0a1` | MCP `manage_change_intent` | -| Lease and recovery | Live in `2.1.0a1` | MCP `manage_change_intent` | -| Claim guard | Live in `2.1.0a1` | MCP `validate_review_claims` | +| Phase | Status | Surface | +|---------------------------|-------------------|--------------------------------------------------| +| Intent declaration | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Blast radius | Live in `2.1.0a1` | MCP `get_blast_radius`, CLI `--blast-radius` | +| Patch contract | Live in `2.1.0a1` | MCP `check_patch_contract`, CLI `--patch-verify` | +| Review receipt | Live in `2.1.0a1` | MCP `create_review_receipt` | +| Workspace intent registry | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Lease and recovery | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Claim guard | Live in `2.1.0a1` | MCP `validate_review_claims` | ## Contract @@ -69,14 +69,19 @@ report output flags and baseline update flags. 6. Optionally call `get_blast_radius` for full dependent/context detail. 7. Call `check_patch_contract(mode="budget")` to inspect the active regression budget and metric headroom before editing. -8. After editing, call `manage_change_intent(action="check")` with - `changed_files` or `diff_ref`. -9. Run analysis again, then call `check_patch_contract(mode="verify")` with - explicit `before_run_id` and `after_run_id`. -10. Call `validate_review_claims` before publishing a review summary. -11. Call `create_review_receipt` to collect provenance, scope, blast radius, - reviewed findings, patch status, human decision points, and claims-not-made. -12. Call `manage_change_intent(action="clear")` when the edit is complete. +8. Run analysis again after editing (produces the after-run). +9. Call `manage_change_intent(action="check", intent_id=..., changed_files=...)` + with the original `intent_id`. Use `diff_ref=...` instead of + `changed_files=...` when the changed set should come from git. The intent + stays bound to the before-run; `verify` compares its `report_digest` against + the before-run, so redeclaring on the after-run would cause an `expired` + mismatch. +10. Call `check_patch_contract(mode="verify", before_run_id=..., + after_run_id=..., intent_id=...)`. +11. Call `validate_review_claims` before publishing a review summary. +12. Call `create_review_receipt` to collect provenance, scope, blast radius, + reviewed findings, patch status, human decision points, and claims-not-made. +13. Call `manage_change_intent(action="clear")` when the edit is complete. `manage_change_intent` can return `clean`, `expanded`, `violated`, or `expired`. Expiry means the report digest changed since declaration. diff --git a/docs/mcp.md b/docs/mcp.md index df44b18d..fb088bb2 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -289,12 +289,13 @@ sequenceDiagram Note over A: Edit files within scope A->>M: analyze_repository(root) - M-->>A: after-run registered + M-->>A: after_run_id registered - A->>M: check(changed_files) + A->>M: check(intent_id, changed_files or diff_ref) + Note right of M: intent stays on before-run; changed scope is explicit M-->>A: clean / expanded / violated - A->>M: check_patch_contract(mode=verify) + A->>M: check_patch_contract(mode=verify, before_run_id, after_run_id, intent_id) M-->>A: accepted / violated A->>M: validate_review_claims(text) @@ -396,9 +397,9 @@ manage_change_intent(action="list_workspace") -> get_blast_radius(files=[...]) -> check_patch_contract(mode="budget") -> [edit within scope] - -> analyze_repository - -> manage_change_intent(action="check", changed_files=[...]) - -> check_patch_contract(mode="verify", before_run_id=..., after_run_id=...) + -> analyze_repository # after-run + -> manage_change_intent(action="check", intent_id=..., changed_files=[...]) + -> check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) -> validate_review_claims(text="...") -> create_review_receipt -> manage_change_intent(action="clear") diff --git a/plugins/codeclone/skills/codeclone-change-control/SKILL.md b/plugins/codeclone/skills/codeclone-change-control/SKILL.md index 9e9888ad..6981be86 100644 --- a/plugins/codeclone/skills/codeclone-change-control/SKILL.md +++ b/plugins/codeclone/skills/codeclone-change-control/SKILL.md @@ -50,19 +50,26 @@ analysis only. ``` manage_change_intent(action="list_workspace", root=...) -→ analyze_repository -→ manage_change_intent(action="declare") +→ analyze_repository # before-run +→ manage_change_intent(action="declare") # intent bound to before-run → get_blast_radius → check_patch_contract(mode="budget") → edit code -→ analyze_repository -→ manage_change_intent(action="check") -→ check_patch_contract(mode="verify") +→ analyze_repository # after-run +→ manage_change_intent(action="check", intent_id=..., changed_files=[...]) +→ check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) → validate_review_claims → create_review_receipt → manage_change_intent(action="clear") ``` +The intent stays bound to the before-run. After re-analyze, pass `intent_id` +explicitly to `check` and `verify`; without it, `_resolve_intent` resolves by +latest run id and misses the intent. Do not redeclare on the after-run: +`verify` compares the intent's `report_digest` against the before-run, and a +redeclared intent would cause an `expired` mismatch. Use `diff_ref=...` instead +of `changed_files=[...]` when the changed set should come from git. + Older MCP servers may not support `list_workspace`, `validate_review_claims`, or `create_review_receipt`. Skip only unavailable steps and say so explicitly. Keep the pre-edit `run_id` as `before_run_id`; verify against the explicit @@ -92,8 +99,8 @@ analyze_repository → check_patch_contract(mode="budget") → edit code → analyze_repository -→ manage_change_intent(action="check") -→ check_patch_contract(mode="verify") +→ manage_change_intent(action="check", intent_id=..., changed_files=[...]) +→ check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) → validate_review_claims → create_review_receipt ``` @@ -165,13 +172,17 @@ allowed. ## Patch verification -After editing, run analysis again, then call: +After editing, run analysis again, then pass the original `intent_id` +explicitly: ``` -manage_change_intent(action="check") -check_patch_contract(mode="verify") +manage_change_intent(action="check", intent_id=..., changed_files=[...]) +check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) ``` +Use `diff_ref=...` instead of `changed_files=[...]` when the changed set should +come from git. + If the result is `unverified`, report what is missing. Do not claim the patch is verified. From 5a0bb91ed74e9c2b0e0bc49a9827fb6ec73b0814 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 25 May 2026 08:42:30 +0500 Subject: [PATCH 019/318] docs(claude): require change-control completion --- CLAUDE.md | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index 767da93c..c0d09f09 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -13,6 +13,8 @@ user explicitly permits it for a specific task. "Реализуй" / "Implement" is explicit permission. "Проверь" / "Validate" is not. When permitted to edit code, follow the change control workflow below. +Creating or editing a spec is also a repository edit. "Spec only" is not a +reason to skip change control. ## Change control workflow @@ -41,7 +43,7 @@ Before editing any repository files: after_run_id=..., intent_id=...)` — verify compares the intent's `report_digest` against the before-run; redeclare on the after-run would cause an `expired` mismatch -10. `manage_change_intent(action="clear")` +10. `manage_change_intent(action="clear", intent_id=...)` ### Rules @@ -52,6 +54,8 @@ Before editing any repository files: editing or to start a separate change. - MUST NOT call the `check` action without exactly one changed-scope source: `changed_files` or `diff_ref`. +- MUST clear the original intent by explicit `intent_id` after successful + verification. - After re-analyze, pass `intent_id` explicitly to `check`/`get`/`verify` — otherwise `_resolve_intent` resolves by latest run_id and misses intents bound to the before-run. @@ -61,9 +65,28 @@ Before editing any repository files: - CodeClone findings are the source of truth — do not reinterpret. - If `check_patch_contract(mode="verify")` returns `unverified` or `violated`, do not claim the patch is verified. +- Leaving an active or recoverable own intent behind is a blocked cleanup, not + a completed task. - Live foreign intent means **stop**, not kill. Never suggest killing a process without explicit user confirmation that the PID is abandoned. +### Completion gate + +Do not say "done", "implemented", "validated", "verified", "ready", or +equivalent unless all of these are true: + +1. an after-run was created after the last edit; +2. `manage_change_intent(action="check", intent_id=..., changed_files=...)` + or `diff_ref=...` returned `clean`; +3. `check_patch_contract(mode="verify", before_run_id=..., + after_run_id=..., intent_id=...)` returned `accepted`; +4. any final summary claims passed `validate_review_claims`; +5. `manage_change_intent(action="clear", intent_id=...)` succeeded. + +If any item cannot be completed, report `BLOCKED` or `UNVERIFIED`, include the +`intent_id`, and state the exact missing step. Do not present the work as +finished. + ### When to skip - Read-only tasks (analysis, validation, research) From 458e4737f8b32bf52136c2cd0a702a0bb14faefc Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 25 May 2026 09:42:22 +0500 Subject: [PATCH 020/318] feat(integrations): add local sync tooling --- .../.cursor-plugin/plugin.json | 27 + plugins/cursor-codeclone/assets/icon.png | Bin 0 -> 2079 bytes plugins/cursor-codeclone/assets/logo.png | Bin 0 -> 2079 bytes plugins/cursor-codeclone/mcp.json | 13 + .../rules/codeclone-python.mdc | 16 + .../rules/codeclone-workflow.mdc | 29 + .../skills/codeclone-change-control/SKILL.md | 244 ++++++++ .../skills/codeclone-hotspots/SKILL.md | 58 ++ .../skills/codeclone-review/SKILL.md | 82 +++ scripts/sync_integrations.py | 534 ++++++++++++++++++ tests/test_cursor_plugin.py | 98 ++++ tests/test_sync_integrations.py | 325 +++++++++++ 12 files changed, 1426 insertions(+) create mode 100644 plugins/cursor-codeclone/.cursor-plugin/plugin.json create mode 100644 plugins/cursor-codeclone/assets/icon.png create mode 100644 plugins/cursor-codeclone/assets/logo.png create mode 100644 plugins/cursor-codeclone/mcp.json create mode 100644 plugins/cursor-codeclone/rules/codeclone-python.mdc create mode 100644 plugins/cursor-codeclone/rules/codeclone-workflow.mdc create mode 100644 plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md create mode 100644 plugins/cursor-codeclone/skills/codeclone-hotspots/SKILL.md create mode 100644 plugins/cursor-codeclone/skills/codeclone-review/SKILL.md create mode 100644 scripts/sync_integrations.py create mode 100644 tests/test_cursor_plugin.py create mode 100644 tests/test_sync_integrations.py diff --git a/plugins/cursor-codeclone/.cursor-plugin/plugin.json b/plugins/cursor-codeclone/.cursor-plugin/plugin.json new file mode 100644 index 00000000..b72128c0 --- /dev/null +++ b/plugins/cursor-codeclone/.cursor-plugin/plugin.json @@ -0,0 +1,27 @@ +{ + "name": "codeclone", + "version": "2.1.0a1", + "description": "Baseline-aware structural code quality analysis for Cursor through the local CodeClone MCP server.", + "author": { + "name": "Den Rozhnovskiy", + "email": "pytelemonbot@mail.ru", + "url": "https://github.com/orenlab" + }, + "homepage": "https://orenlab.github.io/codeclone/cursor-plugin/", + "repository": "https://github.com/orenlab/codeclone", + "license": "MPL-2.0", + "keywords": [ + "codeclone", + "cursor", + "mcp", + "structural-review", + "baseline-aware", + "code-quality", + "clone-detection", + "python" + ], + "rules": "rules/", + "skills": "skills/", + "mcpServers": "mcp.json", + "logo": "assets/logo.png" +} diff --git a/plugins/cursor-codeclone/assets/icon.png b/plugins/cursor-codeclone/assets/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..31388ba302274bfebc1e05666adc73d42c3227b9 GIT binary patch literal 2079 zcmb`Ii&qm@9>;$(o!|si!aHJ-AOxZy!lDtB#|X&#WkCU7VTA_YAh4RQg-oEZmY1vr zQNRXQM1ht-wLGLkSQn(k24vSoQA$`rT}03b3V|dY+<#!to}GKn%;(`+MB4 zBf|Vw=$h*Sz>2^C-_HOL3K76{sL#HX8+?Fe&jWq8M+t^r4|@f(qd)C_y8-L;Htm#q zGaMWe^zw>L+u;KZ`-;fTCThRmIL9^plFog?X6acC9~7s&qWvtkwHoEG+*iUUw==@;{Iw?uy7#skz*|MO#ff6IvG_OGAU$-+wSI4;}Ye%?Q|VXpp^ z>B#VH7dpEnOJEnP7FXw5`eCc( z0r_7%9NGC(@?&RnVoE7F^*FkzAyCV$lShqlv}IO8=g7>>i4MhLjNG21?&VcuAdGA%jkA&r7?Q|X0)sL~CI+bPT|WbvR%w3{VD2(5`t-{_a)E zKga(f+f~}?UN+M9<)pG**5;Um1G_|1C2L($M}L199SnlCDJQQ*t4<~q`RVv$&}Elt zjY^Hjy>>4;trY|V^Ifg5S}>|?uX;e*mc(jaJC^pEay!z#2!)=Vn&_Oxt`K^=EpWW( z4A5Ei-d=dgP!I|+r#ky8Iwm)ZtWko8#JrX~n*yYAFg=4-Yn?=gJ_}X(hru7$B>RG( z7WhGt`+qfnC53RP*Z3hAxlvF0uw!Z`Ac{_{2|W}8X1NbSJPpAC_TVe_N z_ujw)D_}@N`PYT_g{pbU{EI&AeMzA9vc^H}CC-D0E6~26X`a{j(zt282KuVR$JMC@ zP-{S}mtPMT_KAa3kDYr>XFsk$WvPK4U8xf)7lCg_8Ji{xymC?z^Fme&S(YlyzX^pTXu!ke; zycYyLcI1U#-b-gAP~w$cN;APEMvl_@Q|9#+R^o71mzd8}IZ{BUU?GeGoPxy@ILP!U zP#03Lw56$Eg39xQ2luA0D1&|;m>kc4FyY6+NDX@9(9!R9H+x&dj2W?`Jih7ns`W@^ z2NZYAz3!)fOaX|b2Fijw(iEYS@Kh&lbrImwEMg6|7SK1G27eK?93YhuRIb=4{t!pm z8a7zrRH4xy8Rk>DgdNijqFkX(G&|~tyzPqz8R(${kg+*tE}jizCM0jAxr#}Ld75G{ z>rWnBRO*V}*o9_W8X){9=E z_#5MA#c0eRHMJGJj3im-oc4mR1G1bg>d7NT2M(46LEDfY#IhN)+_;7<%Lvq% zOZQD@a1?8Fa zuA4Y9WJIjjmhCnmZ`r}P9UP8=g&Z};I<6acFVhkUID@(-i^WjofmHn~sdd7Lyv3%v z6C4hKnwd84I~LUNpl0$+-i|vPRtpTtC}yHNRpLugWssjOHN=noknF~iuQ7ia>s@tj z0d0xM=ckP3-%Cd-*7>=(rFK7S2R_v-_;SHhGwGFso4)Z$ba%k^05*jy_HD22Yjv^E zrz?CZ`r5^!8p+X;ZOCq|s0|eyHja4l&`bSf>)qcQD<<@rv=-GT`oW5rleIrLx|pZi zDAdrWzWTOCdd<+JJlIF*U@toU3h9zBG+*vydZUtn$`)lu%JWRD*pivHH7=C(zG9@o z^0ArDum#>Sl+>vopR0IO%(`DRlAaO5HNR-knb0L}oyfwCEph9tZ@T2(2~NlU-FN)C zXdofErDf0M4H?t0&i(sIv%KkJ{7=IFi@P;qs}?;$(o!|si!aHJ-AOxZy!lDtB#|X&#WkCU7VTA_YAh4RQg-oEZmY1vr zQNRXQM1ht-wLGLkSQn(k24vSoQA$`rT}03b3V|dY+<#!to}GKn%;(`+MB4 zBf|Vw=$h*Sz>2^C-_HOL3K76{sL#HX8+?Fe&jWq8M+t^r4|@f(qd)C_y8-L;Htm#q zGaMWe^zw>L+u;KZ`-;fTCThRmIL9^plFog?X6acC9~7s&qWvtkwHoEG+*iUUw==@;{Iw?uy7#skz*|MO#ff6IvG_OGAU$-+wSI4;}Ye%?Q|VXpp^ z>B#VH7dpEnOJEnP7FXw5`eCc( z0r_7%9NGC(@?&RnVoE7F^*FkzAyCV$lShqlv}IO8=g7>>i4MhLjNG21?&VcuAdGA%jkA&r7?Q|X0)sL~CI+bPT|WbvR%w3{VD2(5`t-{_a)E zKga(f+f~}?UN+M9<)pG**5;Um1G_|1C2L($M}L199SnlCDJQQ*t4<~q`RVv$&}Elt zjY^Hjy>>4;trY|V^Ifg5S}>|?uX;e*mc(jaJC^pEay!z#2!)=Vn&_Oxt`K^=EpWW( z4A5Ei-d=dgP!I|+r#ky8Iwm)ZtWko8#JrX~n*yYAFg=4-Yn?=gJ_}X(hru7$B>RG( z7WhGt`+qfnC53RP*Z3hAxlvF0uw!Z`Ac{_{2|W}8X1NbSJPpAC_TVe_N z_ujw)D_}@N`PYT_g{pbU{EI&AeMzA9vc^H}CC-D0E6~26X`a{j(zt282KuVR$JMC@ zP-{S}mtPMT_KAa3kDYr>XFsk$WvPK4U8xf)7lCg_8Ji{xymC?z^Fme&S(YlyzX^pTXu!ke; zycYyLcI1U#-b-gAP~w$cN;APEMvl_@Q|9#+R^o71mzd8}IZ{BUU?GeGoPxy@ILP!U zP#03Lw56$Eg39xQ2luA0D1&|;m>kc4FyY6+NDX@9(9!R9H+x&dj2W?`Jih7ns`W@^ z2NZYAz3!)fOaX|b2Fijw(iEYS@Kh&lbrImwEMg6|7SK1G27eK?93YhuRIb=4{t!pm z8a7zrRH4xy8Rk>DgdNijqFkX(G&|~tyzPqz8R(${kg+*tE}jizCM0jAxr#}Ld75G{ z>rWnBRO*V}*o9_W8X){9=E z_#5MA#c0eRHMJGJj3im-oc4mR1G1bg>d7NT2M(46LEDfY#IhN)+_;7<%Lvq% zOZQD@a1?8Fa zuA4Y9WJIjjmhCnmZ`r}P9UP8=g&Z};I<6acFVhkUID@(-i^WjofmHn~sdd7Lyv3%v z6C4hKnwd84I~LUNpl0$+-i|vPRtpTtC}yHNRpLugWssjOHN=noknF~iuQ7ia>s@tj z0d0xM=ckP3-%Cd-*7>=(rFK7S2R_v-_;SHhGwGFso4)Z$ba%k^05*jy_HD22Yjv^E zrz?CZ`r5^!8p+X;ZOCq|s0|eyHja4l&`bSf>)qcQD<<@rv=-GT`oW5rleIrLx|pZi zDAdrWzWTOCdd<+JJlIF*U@toU3h9zBG+*vydZUtn$`)lu%JWRD*pivHH7=C(zG9@o z^0ArDum#>Sl+>vopR0IO%(`DRlAaO5HNR-knb0L}oyfwCEph9tZ@T2(2~NlU-FN)C zXdofErDf0M4H?t0&i(sIv%KkJ{7=IFi@P;qs}? Path: + return base_dir / f"codeclone-{target_name}" + + +def validate_source(root: Path, allow_dirty: bool) -> SourceInfo: + source_root = root.resolve() + if not (source_root / ".git").exists(): + raise SyncValidationError(f"source {source_root} is not a git repository") + + commit_full = _run_git(source_root, ("rev-parse", "HEAD")) + commit_short = _run_git(source_root, ("rev-parse", "--short", "HEAD")) + dirty = bool(_run_git(source_root, ("status", "--porcelain"))) + if dirty and not allow_dirty: + raise SyncValidationError( + "source tree is dirty (use --allow-dirty to override)" + ) + + return SourceInfo( + commit_short=commit_short, + commit_full=commit_full, + dirty=dirty, + version=_read_version(source_root), + ) + + +def validate_target(path: Path, target_name: str) -> None: + expected_name = f"codeclone-{target_name}" + if path.name != expected_name: + raise SyncValidationError(f"target {path} does not look like {expected_name}") + if not path.exists() or not path.is_dir() or not (path / ".git").exists(): + raise SyncValidationError(f"target {path} does not exist or is not a git repo") + + +def sync_target( + *, + source_root: Path, + target_root: Path, + target: SyncTarget, + allow_dirty: bool, + dry_run: bool, +) -> SyncResult: + source_info = validate_source(source_root, allow_dirty=allow_dirty) + validate_target(target_root, target.name) + _validate_target_definition(target) + + source_root = source_root.resolve() + target_root = target_root.resolve() + denylist = GLOBAL_DENYLIST + target.denylist + source_pairs = _resolve_source_pairs( + source_root=source_root, + target_root=target_root, + target=target, + denylist=denylist, + ) + deletable_paths = _deletable_paths( + source_root=source_root, + target_root=target_root, + target=target, + denylist=denylist, + ) + + files_deleted = sum(_count_existing_files(path) for path in deletable_paths) + files_copied = len(source_pairs) + manifest_path = target_root / MANIFEST_NAME + + if not dry_run: + try: + for path in deletable_paths: + _delete_path(path=path, target_root=target_root) + for source_path, destination_path in source_pairs: + _copy_file( + source_path=source_path, + destination_path=destination_path, + target_root=target_root, + ) + manifest = _make_manifest( + source_info=source_info, + target=target, + files_copied=files_copied, + files_deleted=files_deleted, + ) + write_manifest(target_root=target_root, manifest=manifest) + except OSError as exc: + raise SyncCopyError(str(exc)) from exc + + return SyncResult( + target_name=target.name, + files_copied=files_copied, + files_deleted=files_deleted, + manifest_path=manifest_path, + dry_run=dry_run, + ) + + +def write_manifest(*, target_root: Path, manifest: SyncManifest) -> Path: + manifest_path = target_root / MANIFEST_NAME + payload = { + "source_repository": manifest.source_repository, + "source_commit": manifest.source_commit, + "source_commit_full": manifest.source_commit_full, + "source_dirty": manifest.source_dirty, + "codeclone_version": manifest.codeclone_version, + "target": manifest.target, + "synced_at_utc": manifest.synced_at_utc, + "source_paths": list(manifest.source_paths), + "files_copied": manifest.files_copied, + "files_deleted": manifest.files_deleted, + } + _write_json_atomically(manifest_path, payload) + return manifest_path + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Sync CodeClone integration repos.") + target_group = parser.add_mutually_exclusive_group(required=True) + target_group.add_argument( + "--target", + choices=tuple(SYNC_TARGETS), + help="sync one target", + ) + target_group.add_argument( + "--all", + action="store_true", + help="sync all targets", + ) + parser.add_argument( + "--base-dir", + type=Path, + default=Path(".."), + help="parent directory of distribution repos", + ) + parser.add_argument( + "--allow-dirty", + action="store_true", + help="allow sync from a dirty source tree", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="print planned operation counts without writing", + ) + args = parser.parse_args(argv) + + source_root = Path.cwd().resolve() + base_dir = _resolve_base_dir(source_root=source_root, base_dir=args.base_dir) + selected = tuple(SYNC_TARGETS) if args.all else (str(args.target),) + + try: + for target_name in selected: + target = SYNC_TARGETS[target_name] + target_root = resolve_target_path(target_name, base_dir).resolve() + result = sync_target( + source_root=source_root, + target_root=target_root, + target=target, + allow_dirty=bool(args.allow_dirty), + dry_run=bool(args.dry_run), + ) + _print_result(result=result, target_root=target_root) + except SyncValidationError as exc: + print(f"error: {exc}", file=sys.stderr) + return 1 + except SyncCopyError as exc: + print(f"error: {exc}", file=sys.stderr) + return 2 + + return 0 + + +def _run_git(root: Path, args: Sequence[str]) -> str: + try: + result = subprocess.run( + ("git", *args), + cwd=root, + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as exc: + message = exc.stderr.strip() or exc.stdout.strip() or str(exc) + raise SyncValidationError(message) from exc + return result.stdout.strip() + + +def _read_version(root: Path) -> str: + text = (root / "pyproject.toml").read_text(encoding="utf-8") + match = re.search(r'^version\s*=\s*"([^"]+)"', text, re.MULTILINE) + if match is None: + raise SyncValidationError("cannot read version from pyproject.toml") + return match.group(1) + + +def _resolve_base_dir(*, source_root: Path, base_dir: Path) -> Path: + if base_dir.is_absolute(): + return base_dir.resolve() + return (source_root / base_dir).resolve() + + +def _validate_target_definition(target: SyncTarget) -> None: + for source, destination in target.copies: + _validate_relative_path(source, field="source", allow_dot=False) + _validate_relative_path(destination, field="target", allow_dot=True) + for generated in target.generated: + _validate_relative_path(generated, field="generated", allow_dot=False) + + +def _validate_relative_path(path: str, *, field: str, allow_dot: bool) -> None: + if not path: + raise SyncValidationError(f"{field} path is empty") + candidate = Path(path) + if candidate.is_absolute(): + raise SyncValidationError(f"{field} path must be relative: {path}") + if path == "." and allow_dot: + return + if path == ".": + raise SyncValidationError(f"{field} path cannot be '.': {path}") + if ".." in candidate.parts: + raise SyncValidationError(f"path traversal in {field} path: {path}") + + +def _resolve_source_pairs( + *, + source_root: Path, + target_root: Path, + target: SyncTarget, + denylist: tuple[str, ...], +) -> list[tuple[Path, Path]]: + pairs: list[tuple[Path, Path]] = [] + for source_rel, destination_rel in target.copies: + source_path = _resolve_inside(root=source_root, relative=source_rel) + if not source_path.exists(): + raise SyncValidationError(f"source path does not exist: {source_rel}") + destination_base = ( + target_root + if destination_rel == "." + else _resolve_inside(root=target_root, relative=destination_rel) + ) + if source_path.is_dir(): + for file_path in _iter_source_files(source_path, denylist): + relative_file = file_path.relative_to(source_path) + pairs.append((file_path, destination_base / relative_file)) + elif source_path.is_file(): + relative_name = source_path.name if destination_rel == "." else "" + destination_path = ( + destination_base / relative_name if relative_name else destination_base + ) + if not _is_denied(source_path.name, denylist): + pairs.append((source_path, destination_path)) + else: + raise SyncValidationError(f"unsupported source path: {source_rel}") + return sorted(pairs, key=lambda item: item[1].as_posix()) + + +def _deletable_paths( + *, + source_root: Path, + target_root: Path, + target: SyncTarget, + denylist: tuple[str, ...], +) -> list[Path]: + paths: list[Path] = [] + for source_rel, destination_rel in target.copies: + if destination_rel != ".": + paths.append(_resolve_inside(root=target_root, relative=destination_rel)) + continue + source_path = _resolve_inside(root=source_root, relative=source_rel) + if source_path.is_dir(): + paths.extend( + target_root / child.name + for child in sorted(source_path.iterdir(), key=lambda path: path.name) + if not _is_denied(child.name, denylist) + ) + elif source_path.is_file() and not _is_denied(source_path.name, denylist): + paths.append(target_root / source_path.name) + + paths.extend( + _resolve_inside(root=target_root, relative=generated) + for generated in target.generated + ) + + return sorted(set(paths), key=lambda path: path.as_posix()) + + +def _resolve_inside(*, root: Path, relative: str) -> Path: + _validate_relative_path(relative, field="path", allow_dot=True) + resolved_root = root.resolve() + resolved = (resolved_root / relative).resolve() + if not _is_relative_to(resolved, resolved_root): + raise SyncValidationError(f"path escapes target root: {relative}") + return resolved + + +def _is_relative_to(path: Path, root: Path) -> bool: + try: + path.relative_to(root) + except ValueError: + return False + return True + + +def _iter_source_files(source_path: Path, denylist: tuple[str, ...]) -> list[Path]: + files: list[Path] = [] + for current_root, dirnames, filenames in os.walk(source_path): + current = Path(current_root) + relative_root = current.relative_to(source_path) + dirnames[:] = [ + dirname + for dirname in sorted(dirnames) + if not _is_denied(_join_relative(relative_root, dirname), denylist) + ] + for filename in sorted(filenames): + file_path = current / filename + relative_file = _join_relative(relative_root, filename) + if _is_denied(relative_file, denylist): + continue + if file_path.is_symlink(): + raise SyncValidationError(f"refusing to copy symlink: {file_path}") + files.append(file_path) + return files + + +def _join_relative(relative_root: Path, name: str) -> str: + if str(relative_root) == ".": + return name + return (relative_root / name).as_posix() + + +def _is_denied(relative_path: str, denylist: tuple[str, ...]) -> bool: + normalized = relative_path.replace("\\", "/") + for pattern in denylist: + if fnmatch(normalized, pattern): + return True + if pattern.endswith("/**"): + prefix = pattern[:-3] + if normalized == prefix or normalized.startswith(f"{prefix}/"): + return True + return False + + +def _count_existing_files(path: Path) -> int: + if not path.exists(): + return 0 + if path.is_file() or path.is_symlink(): + return 1 + return sum(1 for child in path.rglob("*") if child.is_file() or child.is_symlink()) + + +def _delete_path(*, path: Path, target_root: Path) -> None: + if path == target_root: + raise SyncCopyError("refusing to delete target root") + if not _is_relative_to(path.resolve(), target_root.resolve()): + raise SyncCopyError(f"refusing to delete outside target root: {path}") + if path.is_dir() and not path.is_symlink(): + shutil.rmtree(path) + elif path.exists(): + path.unlink() + + +def _copy_file( + *, + source_path: Path, + destination_path: Path, + target_root: Path, +) -> None: + if not _is_relative_to(destination_path.resolve().parent, target_root.resolve()): + raise SyncCopyError(f"refusing to copy outside target root: {destination_path}") + destination_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source_path, destination_path) + + +def _make_manifest( + *, + source_info: SourceInfo, + target: SyncTarget, + files_copied: int, + files_deleted: int, +) -> SyncManifest: + return SyncManifest( + source_repository=SOURCE_REPOSITORY, + source_commit=source_info.commit_short, + source_commit_full=source_info.commit_full, + source_dirty=source_info.dirty, + codeclone_version=source_info.version, + target=target.name, + synced_at_utc=datetime.now(timezone.utc).replace(microsecond=0).isoformat(), + source_paths=tuple(source for source, _ in target.copies), + files_copied=files_copied, + files_deleted=files_deleted, + ) + + +def _write_json_atomically(path: Path, payload: dict[str, object]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_name = tempfile.mkstemp( + prefix=f".{path.name}.", + suffix=".tmp", + dir=path.parent, + text=True, + ) + tmp_path = Path(tmp_name) + try: + with os.fdopen(fd, "w", encoding="utf-8") as handle: + json.dump(payload, handle, indent=2, sort_keys=True) + handle.write("\n") + handle.flush() + os.fsync(handle.fileno()) + os.replace(tmp_path, path) + except Exception: + tmp_path.unlink(missing_ok=True) + raise + + +def _print_result(*, result: SyncResult, target_root: Path) -> None: + prefix = "dry-run" if result.dry_run else "sync" + copied = "to copy" if result.dry_run else "copied" + deleted = "to delete" if result.dry_run else "deleted" + print(f"{prefix}: {result.target_name} -> {target_root}") + print(f" manifest: {result.manifest_path}") + print( + f" result: {result.files_copied} {copied}, {result.files_deleted} {deleted}" + ) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_cursor_plugin.py b/tests/test_cursor_plugin.py new file mode 100644 index 00000000..c7f32cd0 --- /dev/null +++ b/tests/test_cursor_plugin.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +import json +import re +from pathlib import Path + + +def _load_json(path: Path) -> object: + return json.loads(path.read_text(encoding="utf-8")) + + +def _frontmatter(text: str) -> dict[str, str]: + match = re.match(r"^---\n(?P.*?)\n---\n", text, re.DOTALL) + assert match is not None + fields: dict[str, str] = {} + for line in match.group("body").splitlines(): + key, value = line.split(":", 1) + fields[key.strip()] = value.strip().strip('"') + return fields + + +def test_cursor_plugin_json_is_valid() -> None: + root = Path(__file__).resolve().parents[1] + plugin_root = root / "plugins" / "cursor-codeclone" + manifest = _load_json(plugin_root / ".cursor-plugin" / "plugin.json") + + assert isinstance(manifest, dict) + assert manifest["name"] == "codeclone" + assert manifest["version"] == "2.1.0a1" + assert manifest["license"] == "MPL-2.0" + assert manifest["rules"] == "rules/" + assert manifest["skills"] == "skills/" + assert manifest["mcpServers"] == "mcp.json" + assert manifest["logo"] == "assets/logo.png" + assert (plugin_root / "assets" / "logo.png").is_file() + assert (plugin_root / "assets" / "icon.png").is_file() + + +def test_cursor_mcp_json_is_valid() -> None: + root = Path(__file__).resolve().parents[1] + plugin_root = root / "plugins" / "cursor-codeclone" + mcp_config = _load_json(plugin_root / "mcp.json") + + assert isinstance(mcp_config, dict) + server = mcp_config["mcpServers"]["codeclone"] + assert server == { + "type": "stdio", + "command": "codeclone-mcp", + "args": ["--transport", "stdio"], + "env": {}, + } + + +def test_cursor_rules_have_valid_frontmatter() -> None: + root = Path(__file__).resolve().parents[1] + rules_root = root / "plugins" / "cursor-codeclone" / "rules" + workflow = (rules_root / "codeclone-workflow.mdc").read_text(encoding="utf-8") + python = (rules_root / "codeclone-python.mdc").read_text(encoding="utf-8") + + workflow_fields = _frontmatter(workflow) + python_fields = _frontmatter(python) + assert workflow_fields["alwaysApply"] == "true" + assert "CodeClone MCP integration rules" in workflow_fields["description"] + assert python_fields["globs"] == "**/*.py" + assert "Use MCP tools only" in workflow + assert "Do not fall back to CLI or local report files." in workflow + assert "Run CodeClone analysis before making structural changes." in python + + +def test_cursor_skills_match_codex_skills() -> None: + root = Path(__file__).resolve().parents[1] + cursor_skills = root / "plugins" / "cursor-codeclone" / "skills" + codex_skills = root / "plugins" / "codeclone" / "skills" + + for skill_name in ( + "codeclone-review", + "codeclone-hotspots", + "codeclone-change-control", + ): + cursor_text = (cursor_skills / skill_name / "SKILL.md").read_text( + encoding="utf-8" + ) + codex_text = (codex_skills / skill_name / "SKILL.md").read_text( + encoding="utf-8" + ) + assert _frontmatter(cursor_text) == _frontmatter(codex_text) + + +def test_cursor_plugin_version_matches_pyproject() -> None: + root = Path(__file__).resolve().parents[1] + manifest = _load_json( + root / "plugins" / "cursor-codeclone" / ".cursor-plugin" / "plugin.json" + ) + pyproject = (root / "pyproject.toml").read_text(encoding="utf-8") + match = re.search(r'^version\s*=\s*"([^"]+)"', pyproject, re.MULTILINE) + assert match is not None + assert isinstance(manifest, dict) + assert manifest["version"] == match.group(1) diff --git a/tests/test_sync_integrations.py b/tests/test_sync_integrations.py new file mode 100644 index 00000000..b0051a53 --- /dev/null +++ b/tests/test_sync_integrations.py @@ -0,0 +1,325 @@ +from __future__ import annotations + +import json +import subprocess +from dataclasses import replace +from pathlib import Path + +import pytest + +from scripts.sync_integrations import ( + SYNC_TARGETS, + SyncTarget, + SyncValidationError, + main, + sync_target, + validate_source, + validate_target, +) + + +def _write(path: Path, text: str = "content\n") -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +def _git(path: Path, *args: str) -> None: + subprocess.run( + ("git", *args), + cwd=path, + check=True, + capture_output=True, + text=True, + ) + + +def _init_git(path: Path) -> None: + path.mkdir(parents=True, exist_ok=True) + _git(path, "init") + _git(path, "config", "user.email", "tests@example.invalid") + _git(path, "config", "user.name", "CodeClone Tests") + + +def _commit_all(path: Path) -> None: + _git(path, "add", ".") + _git(path, "commit", "-m", "fixture") + + +def _make_source(tmp_path: Path) -> Path: + source = tmp_path / "source" + _init_git(source) + _write( + source / "pyproject.toml", + '[project]\nname = "codeclone"\nversion = "9.8.7"\n', + ) + _write(source / "plugins" / "codeclone" / "README.md", "# Codex\n") + _write(source / "plugins" / "codeclone" / "skills" / "review" / "SKILL.md") + _write(source / ".agents" / "plugins" / "marketplace.json", '{"plugins":[]}\n') + _write( + source / "extensions" / "claude-desktop-codeclone" / "manifest.json", + "{}\n", + ) + _write(source / "extensions" / "vscode-codeclone" / "package.json", "{}\n") + _write(source / "extensions" / "vscode-codeclone" / "src" / "extension.js") + _write( + source / "plugins" / "cursor-codeclone" / ".cursor-plugin" / "plugin.json", + "{}\n", + ) + _write(source / "plugins" / "cursor-codeclone" / "rules" / "workflow.mdc") + _commit_all(source) + return source + + +def _make_target(tmp_path: Path, name: str) -> Path: + target = tmp_path / f"codeclone-{name}" + _init_git(target) + return target + + +def _load_manifest(target: Path) -> dict[str, object]: + payload = json.loads((target / "SYNC_MANIFEST.json").read_text(encoding="utf-8")) + assert isinstance(payload, dict) + return payload + + +def test_sync_copies_files_and_writes_manifest(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "codex") + + result = sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["codex"], + allow_dirty=False, + dry_run=False, + ) + + assert result.files_copied == 3 + assert result.files_deleted == 0 + assert (target / "plugins" / "codeclone" / "README.md").is_file() + assert (target / ".agents" / "plugins" / "marketplace.json").is_file() + manifest = _load_manifest(target) + assert { + "source_repository": manifest["source_repository"], + "source_dirty": manifest["source_dirty"], + "codeclone_version": manifest["codeclone_version"], + "target": manifest["target"], + "files_copied": manifest["files_copied"], + "files_deleted": manifest["files_deleted"], + } == { + "source_repository": "orenlab/codeclone", + "source_dirty": False, + "codeclone_version": "9.8.7", + "target": "codex", + "files_copied": 3, + "files_deleted": 0, + } + + +def test_sync_deletes_only_allowlisted_paths(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "codex") + _write(target / "plugins" / "codeclone" / "stale.txt") + _write(target / ".github" / "workflows" / "ci.yml") + _write(target / "KEEP.md") + _write(target / "SYNC_MANIFEST.json", "{}\n") + + result = sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["codex"], + allow_dirty=False, + dry_run=False, + ) + + assert result.files_deleted == 2 + assert not (target / "plugins" / "codeclone" / "stale.txt").exists() + assert (target / ".github" / "workflows" / "ci.yml").is_file() + assert (target / "KEEP.md").is_file() + + +def test_sync_respects_global_denylist(tmp_path: Path) -> None: + source = _make_source(tmp_path) + _write(source / "plugins" / "codeclone" / "__pycache__" / "x.pyc") + _write(source / "plugins" / "codeclone" / ".DS_Store") + _write(source / "plugins" / "codeclone" / "node_modules" / "pkg" / "index.js") + _commit_all(source) + target = _make_target(tmp_path, "codex") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["codex"], + allow_dirty=False, + dry_run=False, + ) + + assert not (target / "plugins" / "codeclone" / "__pycache__").exists() + assert not (target / "plugins" / "codeclone" / ".DS_Store").exists() + assert not (target / "plugins" / "codeclone" / "node_modules").exists() + + +def test_sync_respects_per_target_denylist(tmp_path: Path) -> None: + source = _make_source(tmp_path) + _write(source / "extensions" / "vscode-codeclone" / "secret" / "token.txt") + _commit_all(source) + target = _make_target(tmp_path, "vscode") + target_def = replace(SYNC_TARGETS["vscode"], denylist=("secret/**",)) + + sync_target( + source_root=source, + target_root=target, + target=target_def, + allow_dirty=False, + dry_run=False, + ) + + assert (target / "package.json").is_file() + assert not (target / "secret").exists() + + +def test_sync_dry_run_does_not_write(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "codex") + + result = sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["codex"], + allow_dirty=False, + dry_run=True, + ) + + assert result.dry_run is True + assert result.files_copied == 3 + assert not (target / "plugins").exists() + assert not (target / "SYNC_MANIFEST.json").exists() + + +def test_sync_rejects_dirty_source_without_flag(tmp_path: Path) -> None: + source = _make_source(tmp_path) + _write(source / "dirty.txt") + + with pytest.raises(SyncValidationError, match="source tree is dirty"): + validate_source(source, allow_dirty=False) + + +def test_sync_allows_dirty_source_with_flag(tmp_path: Path) -> None: + source = _make_source(tmp_path) + _write(source / "dirty.txt") + + source_info = validate_source(source, allow_dirty=True) + + assert source_info.dirty is True + + +def test_sync_rejects_missing_target(tmp_path: Path) -> None: + with pytest.raises(SyncValidationError, match="does not exist"): + validate_target(tmp_path / "codeclone-codex", "codex") + + +def test_sync_rejects_non_git_target(tmp_path: Path) -> None: + target = tmp_path / "codeclone-codex" + target.mkdir() + + with pytest.raises(SyncValidationError, match="not a git repo"): + validate_target(target, "codex") + + +def test_sync_rejects_path_traversal(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "bad") + bad_target = SyncTarget( + name="bad", + copies=(("plugins/codeclone", "../outside"),), + generated=("SYNC_MANIFEST.json",), + ) + + with pytest.raises(SyncValidationError, match="path traversal"): + sync_target( + source_root=source, + target_root=target, + target=bad_target, + allow_dirty=False, + dry_run=False, + ) + + +def test_sync_all_targets(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + source = _make_source(tmp_path) + base_dir = tmp_path / "targets" + for name in SYNC_TARGETS: + _make_target(base_dir, name) + monkeypatch.chdir(source) + + exit_code = main(["--all", "--base-dir", str(base_dir)]) + + assert exit_code == 0 + for name in SYNC_TARGETS: + assert (base_dir / f"codeclone-{name}" / "SYNC_MANIFEST.json").is_file() + + +def test_manifest_fields(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "cursor") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["cursor"], + allow_dirty=False, + dry_run=False, + ) + + manifest = _load_manifest(target) + assert set(manifest) == { + "codeclone_version", + "files_copied", + "files_deleted", + "source_commit", + "source_commit_full", + "source_dirty", + "source_paths", + "source_repository", + "synced_at_utc", + "target", + } + assert isinstance(manifest["source_commit"], str) + assert isinstance(manifest["source_commit_full"], str) + assert isinstance(manifest["source_dirty"], bool) + assert isinstance(manifest["source_paths"], list) + assert isinstance(manifest["files_copied"], int) + assert isinstance(manifest["files_deleted"], int) + + +def test_flat_layout_copies_to_root(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "vscode") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["vscode"], + allow_dirty=False, + dry_run=False, + ) + + assert (target / "package.json").is_file() + assert (target / "src" / "extension.js").is_file() + assert not (target / "extensions").exists() + + +def test_nested_layout_preserves_structure(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "codex") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["codex"], + allow_dirty=False, + dry_run=False, + ) + + assert (target / "plugins" / "codeclone" / "README.md").is_file() + assert not (target / "README.md").exists() From 733bf493e66730c776408eedc439500967630613 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 25 May 2026 09:42:54 +0500 Subject: [PATCH 021/318] docs: refresh project onboarding --- README.md | 494 +++++++++------------------------------- docs/README-pypi.md | 159 ++----------- docs/README.md | 138 +++++------ docs/codex-plugin.md | 52 +++-- docs/getting-started.md | 234 +++++++++++++++++++ mkdocs.yml | 1 + 6 files changed, 450 insertions(+), 628 deletions(-) create mode 100644 docs/getting-started.md diff --git a/README.md b/README.md index 118d43ff..80281fa7 100644 --- a/README.md +++ b/README.md @@ -16,134 +16,40 @@ > -

Structural change controller for Python — deterministic, baseline-aware, built for CI and AI agents

+

Structural change controller for Python

-[![][pypi-shield]][pypi-link] [![][status-shield]][pypi-link] [![][downloads-shield]][pypi-link] [![][python-shield]][pypi-link] [![][score-shield]][score-link] [![][license-shield]][license-link] - -[![][tests-shield]][tests-link] [![][benchmark-shield]][benchmark-link] +[![][pypi-shield]][pypi-link] [![][python-shield]][pypi-link] [![][downloads-shield]][pypi-link] [![][tests-shield]][tests-link] [![][license-shield]][license-link] --- -CodeClone is a **structural change controller** for Python — deterministic static analysis that -combines clone detection, code-quality metrics, and baseline-aware CI gating with first-class -governance for AI coding agents. - -In the current v2.1 alpha, CodeClone records the declared intent before the first edit, maps the -structural blast radius, verifies patches against the patch contract, and -generates auditable review receipts. It also exposes an advisory workspace intent registry so -parallel agents can see overlapping edit scopes before they start, and validates cited review -claims against the canonical report so agents do not overstate report-only signals or known debt. +Deterministic static analysis that combines clone detection, code-quality metrics, +and baseline-aware CI gating — with a structural change controller for AI coding agents. -**One canonical analysis, many surfaces.** CLI, HTML reports, IDE, and MCP all read the same -deterministic facts — for both human reviewers and AI agents. +One canonical analysis, many surfaces: CLI, HTML reports, MCP server, IDE extensions. +Humans and agents operate on the same deterministic facts. -Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · -Live sample report: [orenlab.github.io/codeclone/examples/report/](https://orenlab.github.io/codeclone/examples/report/) +Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · +[Live report](https://orenlab.github.io/codeclone/examples/report/) > [!NOTE] > This README tracks the in-development **v2.1** line. -> For the latest stable release, see the -> [`v2.0.2` README](https://github.com/orenlab/codeclone/blob/v2.0.2/README.md) -> and the -> [`v2.0.2` docs](https://github.com/orenlab/codeclone/tree/v2.0.2/docs). - -## Change Controller - -When an AI agent edits code, CodeClone governs the structural boundary across five stages: - -| Step | Tool | What it does | -|-------------------------|------------------------|------------------------------------------------------------------------------| -| 1. Check workspace | `manage_change_intent` | Agent sees other active workspace intents before editing | -| 2. Declare intent | `manage_change_intent` | Agent states what it plans to change, which files, and why | -| 3. Map blast radius | `get_blast_radius` | Reverse imports, clone cohorts, dependency cycles, do-not-touch signals | -| 4. Check patch contract | `check_patch_contract` | Pre-edit regression budget with headroom; post-edit boundary verification | -| 5. Generate receipt | `create_review_receipt` | Auditable artifact linking intent, scope, patch status, and structural delta | -| 6. Validate claims | `validate_review_claims` | Cross-check cited review text against canonical report semantics | - -Every step is deterministic — structural facts from the canonical report, no LLM inference. - -The v2.1 alpha ships all six steps as live MCP tools (`manage_change_intent`, `get_blast_radius`, -`check_patch_contract`, `create_review_receipt`, `validate_review_claims`) composed over the -existing read-only analysis surface. Intent truth is session-local; workspace coordination records -are ephemeral files under `.cache/codeclone/intents/`. CodeClone still never mutates source files, -baselines, reports, or analysis cache data. - -Change controller docs: [Structural Change Controller](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) - -## Features - -**Change control** - -- **Intent declaration** — agent states what it plans to change; CodeClone tracks scope, expiry, and status -- **Workspace intent registry** — advisory multi-agent visibility for overlapping edit scopes -- **Blast radius** — structural risk projection: reverse imports, clone cohorts, dependency cycles, do-not-touch signals -- **Patch contract** — pre-edit regression budget and post-edit boundary verification over explicit before/after runs -- **Review receipt** — auditable artifact linking intent, scope, patch verification, and structural delta -- **Claim guard** — citation-based validation of review text against canonical report semantics -- **CLI controller queries** — `--blast-radius` before edits and `--patch-verify` before push - -**Baseline governance** - -- **Regression isolation** — separates accepted **legacy** debt from **new regressions**; CI fails only on what changed -- **CI-first** — deterministic output, stable ordering, exit-code contract, pre-commit support -- **Reports** — interactive HTML, JSON, Markdown, SARIF, and text from one canonical report - -**Detection & analysis** - -- **Clone detection** — function (CFG fingerprint), block (statement windows), and segment (report-only) clones -- **Structural findings** — duplicated branch families, clone guard/exit divergence, and clone-cohort drift -- **Quality metrics** — cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, adaptive depth - profile, dead code, health score, and overloaded-module profiling -- **Adoption & API** — type/docstring annotation coverage, public API surface inventory and baseline diff -- **Coverage Join** — fuse external Cobertura XML into the current run to surface coverage hotspots and scope gaps -- **Security surfaces** — report-only inventory of security-relevant capability boundaries (no vulnerability claims) - -**Surfaces & integrations** - -- **MCP control surface** — 26-tool agent and IDE interface over the same canonical pipeline; read-only by contract -- **IDE & agent clients** — VS Code extension, Claude Desktop bundle, and Codex plugin over the same MCP contract - -**Performance** - -- **Fast** — incremental caching, parallel processing, warm-run optimization - -## How It Works - -CodeClone runs a single deterministic pipeline and emits one canonical JSON report. Every -other surface — HTML, Markdown, SARIF, MCP, IDE — is a projection of that report, so structural -facts stay consistent across consumers. - -
-Pipeline overview -
-CodeClone pipeline diagram -
- -Architecture: [Architecture narrative](https://orenlab.github.io/codeclone/architecture/) · -CFG semantics: [CFG semantics](https://orenlab.github.io/codeclone/cfg/) +> For the latest stable release see the +> [`v2.0.2` README](https://github.com/orenlab/codeclone/blob/v2.0.2/README.md). -## Installation +## Install ```bash -# recommended -uv tool install codeclone +uv tool install codeclone # recommended +pip install codeclone # or pip -# pip -pip install codeclone - -# with MCP server +# with MCP server for AI agents / IDE uv tool install "codeclone[mcp]" -pip install "codeclone[mcp]" ```
-Run without install +Run without installing ```bash uvx codeclone@latest . @@ -155,28 +61,18 @@ uvx codeclone@latest . ```bash codeclone . # analyze current directory -codeclone . --html # HTML report -codeclone . --html --open-html-report # open in browser -codeclone . --json --md --sarif --text # all formats -codeclone . --ci # CI mode +codeclone . --html --open-html-report # HTML report in browser +codeclone . --ci # CI mode (baseline-aware gating) ```
-More examples +More commands ```bash -# timestamped report snapshots -codeclone . --html --json --timestamped-report-paths - -# changed-scope gating against git diff -codeclone . --changed-only --diff-against main - -# shorthand: diff source for changed-scope review -codeclone . --paths-from-git-diff HEAD~1 - -# structural change controller queries -codeclone . --blast-radius codeclone/core/parser.py -codeclone . --patch-verify --diff-against HEAD~1 +codeclone . --json --md --sarif --text # all report formats +codeclone . --changed-only --diff-against main # changed-scope review +codeclone . --blast-radius codeclone/core/parser.py # structural risk map +codeclone . --patch-verify --diff-against HEAD~1 # patch verification ```
@@ -191,22 +87,15 @@ codeclone . --update-baseline codeclone . --ci ``` -> [!TIP] -> Run `codeclone . --update-baseline` once after install to establish your CI reference point. -> Commit the baseline file — it becomes the contract CI enforces on every push. +`--ci` equals `--fail-on-new --no-color --quiet`. When a trusted metrics baseline +is present, it also enables `--fail-on-new-metrics`. -
-What --ci enables - -The `--ci` preset equals `--fail-on-new --no-color --quiet`. -When a trusted metrics baseline is loaded, CI mode also enables `--fail-on-new-metrics`. - -
+> [!TIP] +> Run `codeclone . --update-baseline` once after install. Commit the baseline +> file — it becomes the contract CI enforces on every push. ### GitHub Action -CodeClone ships a composite GitHub Action for PR and CI workflows: - ```yaml - uses: orenlab/codeclone/.github/actions/codeclone@v2 with: @@ -215,35 +104,19 @@ CodeClone ships a composite GitHub Action for PR and CI workflows: pr-comment: "true" ``` -It runs baseline-aware gating, generates JSON and SARIF reports, uploads SARIF to GitHub Code -Scanning, and posts or updates a PR summary comment. - -Action -docs: [.github/actions/codeclone/README.md](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) +Runs gating, generates reports, uploads SARIF to Code Scanning, posts a PR summary. +[Action docs](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) ### Quality Gates ```bash -# Metrics thresholds -codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 --fail-health 60 - -# Structural policies -codeclone . --fail-cycles --fail-dead-code - -# Regression detection vs baseline -codeclone . --fail-on-new-metrics - -# Adoption and API governance -codeclone . --min-typing-coverage 80 --min-docstring-coverage 60 -codeclone . --fail-on-typing-regression --fail-on-docstring-regression -codeclone . --api-surface --update-metrics-baseline -codeclone . --fail-on-api-break - -# Coverage Join — fuse external Cobertura XML into the review -codeclone . --coverage coverage.xml --fail-on-untested-hotspots --coverage-min 50 +codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 +codeclone . --fail-cycles --fail-dead-code --fail-health 60 +codeclone . --fail-on-new-metrics --fail-on-typing-regression +codeclone . --coverage coverage.xml --fail-on-untested-hotspots ``` -Gate details: [Metrics and quality gates](https://orenlab.github.io/codeclone/book/15-metrics-and-quality-gates/) +[Gate reference](https://orenlab.github.io/codeclone/book/15-metrics-and-quality-gates/) ### Pre-commit @@ -260,282 +133,123 @@ repos: types: [ python ] ``` -## MCP Control Surface +## What It Detects -A 26-tool MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. -Read-only for source, baselines, reports, and analysis cache data. The change controller may write -ephemeral coordination records under `.cache/codeclone/intents/`. +| Category | What | +|----------|------| +| **Clones** | Function clones (CFG fingerprint), block clones (statement windows), segment clones (report-only) | +| **Structural** | Duplicated branch families, clone guard/exit divergence, clone-cohort drift | +| **Quality metrics** | Cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, dead code, health score | +| **Adoption** | Type annotation and docstring coverage, public API surface inventory | +| **Coverage Join** | Fuses external Cobertura XML to surface coverage hotspots and scope gaps | +| **Security surfaces** | Report-only inventory of security-relevant capability boundaries | -```bash -# local stdio clients -codeclone-mcp --transport stdio +**Baseline governance** separates accepted legacy debt from new regressions — +CI fails only on what changed. Reports render in HTML, JSON, Markdown, SARIF, +and text from one canonical JSON payload. -# remote / HTTP-only clients -codeclone-mcp --transport streamable-http -``` +## Change Controller + +The v2.1 structural change controller governs AI-assisted edits across five stages: + +| Stage | Tool | Purpose | +|-------|------|---------| +| Declare intent | `manage_change_intent` | Agent states scope before editing | +| Map blast radius | `get_blast_radius` | Reverse imports, clone cohorts, do-not-touch | +| Check patch contract | `check_patch_contract` | Pre-edit budget / post-edit verification | +| Generate receipt | `create_review_receipt` | Auditable artifact: intent + scope + delta | +| Validate claims | `validate_review_claims` | Cross-check review text against report | + +Every step is deterministic — structural facts from the canonical report, no LLM inference. +Intent is session-local; workspace coordination is ephemeral under `.cache/codeclone/intents/`. -The controller tools — `manage_change_intent`, `get_blast_radius`, -`check_patch_contract`, `create_review_receipt`, and `validate_review_claims` — -are composed over the same canonical surface to govern the structural boundary -and review discipline of AI-assisted edits. +[Change controller docs](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) + +## MCP Server + +26-tool read-only MCP server for AI agents and IDE clients. + +```bash +codeclone-mcp --transport stdio # local clients +codeclone-mcp --transport streamable-http # remote / HTTP clients +``` > [!WARNING] -> Analysis tools require an absolute repository root. Relative roots such as `.` are rejected. -> Keep `stdio` as the default transport for local IDE and agent clients; HTTP exposure beyond -> loopback requires explicit `--allow-remote`. +> Analysis tools require an absolute repository root. Relative roots like `.` are rejected. -[MCP usage guide](https://orenlab.github.io/codeclone/mcp/) · +[MCP usage guide](https://orenlab.github.io/codeclone/mcp/) · [MCP interface contract](https://orenlab.github.io/codeclone/book/20-mcp-interface/) -### Native Client Surfaces - -| Surface | Location | Purpose | -|---------------------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------| -| **VS Code extension** | [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) | Triage-first structural review in the editor | -| **Claude Desktop bundle** | [`extensions/claude-desktop-codeclone/`](https://github.com/orenlab/codeclone/tree/main/extensions/claude-desktop-codeclone) | Local `.mcpb` install with pre-loaded instructions | -| **Codex plugin** | [`plugins/codeclone/`](https://github.com/orenlab/codeclone/tree/main/plugins/codeclone) | Native discovery, two skills, and MCP definition | +### Native Clients -All three are native clients over the same `codeclone-mcp` contract — no second analysis engine. +| Surface | Install | Docs | +|---------|---------|------| +| **VS Code** | [Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) | [Guide](https://orenlab.github.io/codeclone/book/21-vscode-extension/) | +| **Claude Desktop** | [`extensions/claude-desktop-codeclone/`](https://github.com/orenlab/codeclone/tree/main/extensions/claude-desktop-codeclone) | [Guide](https://orenlab.github.io/codeclone/book/22-claude-desktop-bundle/) | +| **Codex** | [`plugins/codeclone/`](https://github.com/orenlab/codeclone/tree/main/plugins/codeclone) | [Guide](https://orenlab.github.io/codeclone/book/23-codex-plugin/) | -[VS Code extension docs](https://orenlab.github.io/codeclone/book/21-vscode-extension/) · -[Claude Desktop docs](https://orenlab.github.io/codeclone/book/22-claude-desktop-bundle/) · -[Codex plugin docs](https://orenlab.github.io/codeclone/book/23-codex-plugin/) +All clients connect to the same `codeclone-mcp` contract — no second analysis engine. ## Configuration -CodeClone loads project-level configuration from `pyproject.toml`: - ```toml [tool.codeclone] +baseline = "codeclone.baseline.json" min_loc = 10 min_stmt = 6 -baseline = "codeclone.baseline.json" -golden_fixture_paths = ["tests/fixtures/golden_*"] -skip_metrics = false -quiet = false -html_out = ".cache/codeclone/report.html" -json_out = ".cache/codeclone/report.json" -md_out = ".cache/codeclone/report.md" -sarif_out = ".cache/codeclone/report.sarif" -text_out = ".cache/codeclone/report.txt" block_min_loc = 20 block_min_stmt = 8 -segment_min_loc = 20 -segment_min_stmt = 10 ``` Precedence: CLI flags > `pyproject.toml` > built-in defaults. - -Config reference: [Config and defaults](https://orenlab.github.io/codeclone/book/04-config-and-defaults/) - -## Baseline Workflow - -Baselines capture the current structural state. Once committed, they become the CI reference point. - -- Clones are classified as **NEW** (not in baseline) or **KNOWN** (accepted debt) -- `--update-baseline` writes both clone and metrics snapshots -- Trust is verified via `generator`, `fingerprint_version`, and `payload_sha256` -- In `--ci` mode, an untrusted baseline is a contract error (exit 2) - -Full contract: [Baseline contract](https://orenlab.github.io/codeclone/book/06-baseline/) - -## Exit Codes - -| Code | Meaning | -|------|-------------------------------------------------------------------------------| -| `0` | Success | -| `2` | Contract error — untrusted baseline, invalid config, unreadable sources in CI | -| `3` | Gating failure — new clones or metric threshold exceeded | -| `5` | Internal error | - -Contract errors (`2`) take precedence over gating failures (`3`). - -Full policy: [Exit codes and failure policy](https://orenlab.github.io/codeclone/book/03-contracts-exit-codes/) +[Config reference](https://orenlab.github.io/codeclone/book/04-config-and-defaults/) ## Reports -| Format | Flag | Default path | -|----------|-----------|---------------------------------| -| HTML | `--html` | `.cache/codeclone/report.html` | -| JSON | `--json` | `.cache/codeclone/report.json` | -| Markdown | `--md` | `.cache/codeclone/report.md` | -| SARIF | `--sarif` | `.cache/codeclone/report.sarif` | -| Text | `--text` | `.cache/codeclone/report.txt` | - -All formats are rendered from one canonical JSON report. -`--open-html-report` opens the HTML in the default browser. -`--timestamped-report-paths` appends a UTC timestamp to default filenames. +| Format | Flag | Default path | +|--------|------|--------------| +| HTML | `--html` | `.cache/codeclone/report.html` | +| JSON | `--json` | `.cache/codeclone/report.json` | +| Markdown | `--md` | `.cache/codeclone/report.md` | +| SARIF | `--sarif` | `.cache/codeclone/report.sarif` | +| Text | `--text` | `.cache/codeclone/report.txt` | -Report contract: [Report contract](https://orenlab.github.io/codeclone/book/08-report/) · +All formats render from one canonical JSON report. +[Report contract](https://orenlab.github.io/codeclone/book/08-report/) · [HTML render](https://orenlab.github.io/codeclone/book/10-html-render/) -
-Canonical JSON report shape (v2.11) - -Full schema contract: [Report contract](https://orenlab.github.io/codeclone/book/08-report/) - -Top-level keys: `report_schema_version`, `meta`, `inventory`, `findings`, `metrics`, `derived`, `integrity`. - -```json -{ - "report_schema_version": "2.11", - "meta": { - "codeclone_version": "2.1.0a1", - "project_name": "...", - "scan_root": ".", - "...": "..." - }, - "inventory": { - "files": {}, - "code": {}, - "file_registry": { - "encoding": "relative_path", - "items": [] - } - }, - "findings": { - "summary": {}, - "groups": { - "clones": { - "functions": [], - "blocks": [], - "segments": [] - }, - "structural": { - "groups": [] - }, - "dead_code": { - "groups": [] - }, - "design": { - "groups": [] - } - } - }, - "metrics": { - "summary": { - "coverage_adoption": {}, - "coverage_join": {}, - "api_surface": {} - }, - "families": { - "coverage_adoption": {}, - "coverage_join": {}, - "api_surface": {} - } - }, - "derived": { - "suggestions": [], - "overview": { - "families": {}, - "top_risks": [], - "health_snapshot": {}, - "directory_hotspots": {} - }, - "hotlists": { - "most_actionable_ids": [], - "highest_spread_ids": [], - "production_hotspot_ids": [] - } - }, - "integrity": { - "canonicalization": { - "version": "1", - "scope": "canonical_only" - }, - "digest": { - "algorithm": "sha256", - "verified": true, - "value": "..." - } - } -} -``` - -
- -## Inline Suppressions - -When a symbol is invoked through runtime dynamics (framework callbacks, plugin loading, reflection), -suppress the known false positive at the declaration site: - -```python -# codeclone: ignore[dead-code] -def handle_exception(exc: Exception) -> None: - ... - - -class Middleware: # codeclone: ignore[dead-code] - ... -``` - -Suppression contract: [Inline suppressions](https://orenlab.github.io/codeclone/book/19-inline-suppressions/) · -[Dead-code contract](https://orenlab.github.io/codeclone/book/16-dead-code-contract/) - -## Benchmarking - -
-Reproducible Docker benchmark - -```bash -./benchmarks/run_docker_benchmark.sh -``` - -The wrapper builds `benchmarks/Dockerfile`, runs isolated container benchmarks, and writes results to -`.cache/benchmarks/codeclone-benchmark.json`. - -Use environment overrides to pin the benchmark envelope: - -```bash -CPUSET=0 CPUS=1.0 MEMORY=2g RUNS=16 WARMUPS=4 \ - ./benchmarks/run_docker_benchmark.sh -``` - -Performance claims are backed by the reproducible benchmark workflow documented in -[Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmarking/). - -
- -## Documentation +## Exit Codes -Full docs and contract book: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) +| Code | Meaning | +|------|---------| +| `0` | Success | +| `2` | Contract error — untrusted baseline, invalid config | +| `3` | Gating failure — new clones or threshold exceeded | +| `5` | Internal error | -Quick links: -[Change Controller](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) · -[Baseline](https://orenlab.github.io/codeclone/book/06-baseline/) · -[Report](https://orenlab.github.io/codeclone/book/08-report/) · -[Metrics & gates](https://orenlab.github.io/codeclone/book/15-metrics-and-quality-gates/) · -[MCP](https://orenlab.github.io/codeclone/book/20-mcp-interface/) · -[CLI](https://orenlab.github.io/codeclone/book/09-cli/) +Contract errors (`2`) take precedence over gating failures (`3`). ## License - **Code:** MPL-2.0 (`LICENSE`) -- **Documentation and docs-site content:** MIT (`LICENSE-MIT`) - -Versions released before this change remain under their original license terms. +- **Documentation:** MIT (`LICENSE-MIT`) ## Links -- **Docs:** -- **Issues:** -- **Discussions:** -- **PyPI:** -- **Licenses:** [MPL-2.0](https://github.com/orenlab/codeclone/blob/main/LICENSE) · [MIT docs](https://github.com/orenlab/codeclone/blob/main/LICENSE-MIT) · [Scope map](https://github.com/orenlab/codeclone/blob/main/LICENSES.md) +[Docs](https://orenlab.github.io/codeclone/) · +[PyPI](https://pypi.org/project/codeclone/) · +[Issues](https://github.com/orenlab/codeclone/issues) · +[Discussions](https://github.com/orenlab/codeclone/discussions) · +[License scope map](https://github.com/orenlab/codeclone/blob/main/LICENSES.md) [pypi-shield]: https://img.shields.io/pypi/v/codeclone?style=flat-square&color=6366f1 -[status-shield]: https://img.shields.io/pypi/status/codeclone?style=flat-square&color=6366f1 [downloads-shield]: https://img.shields.io/pypi/dm/codeclone?style=flat-square&color=6366f1 [python-shield]: https://img.shields.io/pypi/pyversions/codeclone?style=flat-square&color=6366f1 -[score-shield]: https://img.shields.io/badge/codeclone-90%20(A)-6366f1?style=flat-square [license-shield]: https://img.shields.io/badge/license-MPL--2.0-6366f1?style=flat-square [tests-shield]: https://img.shields.io/github/actions/workflow/status/orenlab/codeclone/tests.yml?branch=main&style=flat-square&label=tests -[benchmark-shield]: https://img.shields.io/github/actions/workflow/status/orenlab/codeclone/benchmark.yml?style=flat-square&label=benchmark [pypi-link]: https://pypi.org/project/codeclone/ -[score-link]: #how-it-works [license-link]: #license [tests-link]: https://github.com/orenlab/codeclone/actions/workflows/tests.yml -[benchmark-link]: https://github.com/orenlab/codeclone/actions/workflows/benchmark.yml diff --git a/docs/README-pypi.md b/docs/README-pypi.md index 4ed57abe..cd6596b2 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -17,76 +17,17 @@

- Structural change controller for Python — deterministic, baseline-aware, built for CI and AI agents + Structural change controller for Python

PyPI - Tests - Benchmark Python + Tests

-CodeClone is a structural change controller for Python. The v2.1 alpha starts -before the first edit — when an agent declares what it intends to change — -maps the structural blast radius, and verifies explicit before/after runs -against the patch contract. It also generates auditable review receipts; the -claim guard validates cited review claims against canonical report semantics. -The CLI exposes `--blast-radius` and `--patch-verify` for the same highest-value -controller checks in terminal workflows. - -The same analysis pipeline powers CLI reports, CI checks, the MCP server, and -native IDE/agent clients — so humans and AI agents operate on identical, -deterministic facts. - -- Documentation: -- Live sample report: -- Source: -- Issues: - -## Change Controller - -When an AI agent edits code, CodeClone governs the structural boundary: - -1. **Declare intent** — agent states what it plans to change, which files, and why -2. **Map blast radius** — reverse imports, clone cohorts, dependency cycles, do-not-touch signals -3. **Check patch contract** — pre-edit regression budget and post-edit boundary verification -4. **Generate receipt** — auditable artifact: intent + scope + patch status + structural delta -5. **Validate claims** — citation-based cross-check of review text against the canonical report - -Each step is deterministic — structural facts, no LLM inference. - -Docs: - -## Features - -**Change control** -- **Intent declaration** — agent states what it plans to change; CodeClone tracks scope, expiry, and status -- **Blast radius** — structural risk projection: reverse imports, clone cohorts, dependency cycles, do-not-touch signals -- **Patch contract** — pre-edit regression budget and post-edit boundary verification over explicit before/after runs -- **Review receipt** — auditable artifact linking intent, scope, patch verification, and structural delta -- **Claim guard** — citation-based validation of review text against canonical report semantics -- **CLI controller queries** — blast radius before edits and patch verification before push - -**Baseline governance** -- **Regression isolation** — separates accepted **legacy** debt from **new regressions**; CI fails only on what changed -- **CI-first** — deterministic output, stable ordering, exit code contract, pre-commit support -- **Reports** — interactive HTML, JSON, Markdown, SARIF, and text from one canonical report - -**Detection & analysis** -- **Clone detection** — function (CFG fingerprint), block (statement windows), and segment (report-only) clones -- **Structural findings** — duplicated branch families, clone guard/exit divergence, and clone-cohort drift -- **Quality metrics** — cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, adaptive depth profile, dead code, health score, and overloaded-module profiling -- **Adoption & API** — type/docstring annotation coverage, public API surface inventory and baseline diff -- **Coverage Join** — fuse external Cobertura XML into the current run to surface coverage hotspots and scope gaps -- **Security Surfaces** — report-only inventory of security-relevant capability boundaries without vulnerability claims - -**Surfaces & integrations** -- **MCP control surface** — 26-tool agent and IDE interface over the same canonical pipeline; read-only by contract -- **IDE & agent clients** — VS Code extension, Claude Desktop bundle, and Codex plugin over the same MCP contract - -**Performance** -- **Fast** — incremental caching, parallel processing, warm-run optimization +Deterministic static analysis that combines clone detection, code-quality metrics, +and baseline-aware CI gating — with a structural change controller for AI coding agents. ## Quick Start @@ -95,98 +36,36 @@ uv tool install codeclone codeclone . # analyze codeclone . --html # HTML report -codeclone . --html --open-html-report -codeclone . --json --md --sarif --text codeclone . --ci # CI mode -codeclone . --blast-radius codeclone/core/parser.py -codeclone . --patch-verify --diff-against HEAD~1 -``` - -Run without installing: - -```bash -uvx codeclone@latest . -``` - -## CI Workflow - -```bash -# 1. Generate and commit the baseline -codeclone . --update-baseline - -# 2. Enforce it in CI -codeclone . --ci -``` - -`--ci` equals `--fail-on-new --no-color --quiet`. When a trusted metrics -baseline is loaded, CI mode also enables `--fail-on-new-metrics`. - -Exit codes: - -| Code | Meaning | -|------|-------------------------------------------------------------------------------| -| `0` | Success | -| `2` | Contract error — untrusted baseline, invalid config, unreadable sources in CI | -| `3` | Gating failure — new clones or metric threshold exceeded | -| `5` | Internal error | - -Contract errors (`2`) take precedence over gating failures (`3`). - -## Reports - -```bash -codeclone . --html -codeclone . --json -codeclone . --md -codeclone . --sarif -codeclone . --text ``` -All formats are rendered from one canonical report payload. +## Key Capabilities -Report contract: +- **Clone detection** — function (CFG fingerprint), block, and segment clones +- **Quality metrics** — complexity, coupling, cohesion, dead code, health score +- **Baseline governance** — separates legacy debt from new regressions; CI fails only on what changed +- **Change controller** — intent declaration, blast radius, patch contract, review receipt for AI agents +- **MCP server** — 26-tool read-only interface for IDE and agent clients +- **Reports** — HTML, JSON, Markdown, SARIF, text from one canonical payload -## MCP and Native Clients +## MCP Server ```bash uv tool install "codeclone[mcp]" - codeclone-mcp --transport stdio ``` -The MCP server is read-only by contract: it never mutates source files, -baselines, cache, or repository state. - -| Surface | Link | -|-----------------------|--------------------------------------------------------------------------------------| -| VS Code extension | | -| Claude Desktop bundle | | -| Codex plugin | | - -MCP docs: - -## Configuration - -```toml -[tool.codeclone] -baseline = "codeclone.baseline.json" -min_loc = 10 -min_stmt = 6 -block_min_loc = 20 -block_min_stmt = 8 -fail_on_new = true -fail_cycles = true -fail_dead_code = true -fail_health = 80 -``` +Native clients: VS Code extension, Claude Desktop bundle, Codex plugin. -Precedence: CLI flags > `pyproject.toml` > built-in defaults. +## Links -Config reference: +- Documentation: +- Source: +- Issues: ## License -- Code: MPL-2.0 (`LICENSE`) -- Documentation and docs-site content: MIT (`LICENSE-MIT`) +- Code: MPL-2.0 +- Documentation: MIT License scope map: diff --git a/docs/README.md b/docs/README.md index 37d32ef8..6f69e14f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,69 +1,61 @@ # CodeClone Docs -> Structural change controller for Python codebases. -> One canonical analysis across CI, HTML reports, IDEs, and AI agents. +> Structural change controller for Python — +> deterministic, baseline-aware, built for CI and AI agents. -CodeClone is a structural change controller for Python. It starts before the -first edit — when an agent declares what it intends to change — maps the -structural blast radius, verifies that the patch stayed inside its declared -boundary, leaves an auditable receipt, and validates cited review claims against -canonical report semantics. The same deterministic facts power CI gates, human -reviews, and AI-assisted workflows. +CodeClone runs one deterministic analysis pipeline and emits a canonical JSON +report. Every surface — CLI, HTML, MCP, IDE — is a projection of that report. +Humans and AI agents operate on the same structural facts. -This documentation site has two complementary layers: - -- **Contracts Book** — canonical behavioral contracts derived from code and locked tests -- **Deep Dives** — architecture, CFG semantics, integrations, and operational rationale +The v2.1 change controller starts before the first edit: an agent declares what +it intends to change, CodeClone maps the structural blast radius, verifies the +patch against the declared boundary, and generates an auditable review receipt. !!! note "Licensing" - CodeClone source code is licensed under MPL-2.0. - - Documentation content under `docs/` and the published docs site are - licensed under MIT. + Source code: MPL-2.0. Documentation and docs-site content: MIT. --- -## Start Here - -### New to CodeClone? - -Understand the deterministic change control model and governance philosophy. - -- [Contracts and guarantees](book/00-intro.md) -- [Architecture map (components + ownership)](book/01-architecture-map.md) -- [Terminology](book/02-terminology.md) +## Getting Started -### Governing AI-assisted changes? +| Goal | Start here | +|------|-----------| +| First install and run | [Getting started](getting-started.md) | +| Understand the model | [Contracts and guarantees](book/00-intro.md) | +| Terminology lookup | [Terminology](book/02-terminology.md) | -Understand the structural change controller: intent, blast radius, patch contract, -review receipt, and claim guard. +## CI and Gating -- [Structural Change Controller](book/24-structural-change-controller.md) -- [MCP interface contract](book/20-mcp-interface.md) +| Goal | Start here | +|------|-----------| +| Baseline-aware CI | [Getting started: CI setup](getting-started.md#ci-setup) | +| Exit codes and failure policy | [Exit codes](book/03-contracts-exit-codes.md) | +| Quality gates and metrics | [Metrics and gates](book/15-metrics-and-quality-gates.md) | +| Baseline contract | [Baseline](book/06-baseline.md) | -### Integrating into CI? +## AI Agent Governance -Set up baseline-aware gating and deterministic review flows. +| Goal | Start here | +|------|-----------| +| Change controller workflow | [Structural Change Controller](book/24-structural-change-controller.md) | +| MCP interface contract | [MCP interface](book/20-mcp-interface.md) | +| MCP usage guide | [MCP guide](mcp.md) | -- [Exit codes and failure policy](book/03-contracts-exit-codes.md) -- [Metrics mode and quality gates](book/15-metrics-and-quality-gates.md) -- [Baseline contract](book/06-baseline.md) +## IDE and Agent Clients -### Using IDEs or AI agents? +| Surface | Usage guide | Contract | +|---------|------------|----------| +| VS Code extension | [Guide](vscode-extension.md) | [Contract](book/21-vscode-extension.md) | +| Claude Desktop bundle | [Guide](claude-desktop-bundle.md) | [Contract](book/22-claude-desktop-bundle.md) | +| Codex plugin | [Guide](codex-plugin.md) | [Contract](book/23-codex-plugin.md) | -Understand the canonical review surfaces and MCP contract. +## Reports -- [MCP interface contract](book/20-mcp-interface.md) -- [VS Code extension](book/21-vscode-extension.md) -- [Codex plugin](book/23-codex-plugin.md) - -### Reviewing reports? - -Explore the canonical report model and rendered review surfaces. - -- [Report contract](book/08-report.md) -- [HTML report rendering](book/10-html-render.md) -- [Live sample report](examples/report.md) +| Goal | Start here | +|------|-----------| +| Report model and schema | [Report contract](book/08-report.md) | +| HTML rendering | [HTML render](book/10-html-render.md) | +| Live sample | [Sample report](examples/report.md) | --- @@ -71,16 +63,6 @@ Explore the canonical report model and rendered review surfaces. Contract-first documentation derived from code and locked tests. -The Contracts Book defines: - -- schemas and typed contracts -- baseline and cache semantics -- exit codes and CI behavior -- determinism guarantees -- trust and compatibility rules -- review surface contracts -- change controller workflow and tool contracts - ### Core Contracts - [Exit codes and failure policy](book/03-contracts-exit-codes.md) @@ -123,17 +105,15 @@ The Contracts Book defines: ## Deep Dives -Narrative documentation covering architecture, integrations, and operational usage. - - [Architecture narrative](architecture.md) - [CFG design and semantics](cfg.md) -- [MCP integration for AI agents and clients](mcp.md) -- [VS Code extension usage guide](vscode-extension.md) -- [Claude Desktop bundle usage guide](claude-desktop-bundle.md) -- [Codex plugin usage guide](codex-plugin.md) -- [SARIF integration for IDE/code-scanning use](sarif.md) +- [MCP integration guide](mcp.md) +- [VS Code extension usage](vscode-extension.md) +- [Claude Desktop bundle usage](claude-desktop-bundle.md) +- [Codex plugin usage](codex-plugin.md) +- [SARIF integration](sarif.md) -### Operational and legal +### Operational - [Privacy Policy](privacy-policy.md) - [Terms of Use](terms-of-use.md) @@ -151,23 +131,15 @@ Narrative documentation covering architecture, integrations, and operational usa ## Local Preview -=== "Build the site" - - ```bash title="Validate the docs site" - uv run --with mkdocs --with mkdocs-material mkdocs build --strict - ``` +```bash +# Build the site +uv run --with mkdocs --with mkdocs-material mkdocs build --strict -=== "Build the site and sample report" - - ```bash title="Generate the live sample report into the built site" - uv run --with mkdocs --with mkdocs-material mkdocs build --strict - uv run python scripts/build_docs_example_report.py --output-dir site/examples/report/live - ``` +# Build with live sample report +uv run --with mkdocs --with mkdocs-material mkdocs build --strict +uv run python scripts/build_docs_example_report.py --output-dir site/examples/report/live +``` !!! note "Generated output" - `site/` is generated output used for local preview and GitHub Pages - publishing. It is not committed to git. - -GitHub Pages publishing is handled by -[`docs.yml`](https://github.com/orenlab/codeclone/blob/main/.github/workflows/docs.yml) -via a custom Actions workflow. + `site/` is generated output used for local preview and GitHub Pages publishing. + It is not committed to git. diff --git a/docs/codex-plugin.md b/docs/codex-plugin.md index 01504188..34fa9a46 100644 --- a/docs/codex-plugin.md +++ b/docs/codex-plugin.md @@ -5,14 +5,15 @@ Repo-local discovery via `.agents/plugins/marketplace.json`. ## What ships in the plugin -| File | Purpose | -|------------------------------|----------------------------------------------------| -| `.codex-plugin/plugin.json` | Plugin metadata, prompts, instructions | -| `.mcp.json` | Workspace-first MCP launcher definition | -| `scripts/launch_mcp` | Shell-free launcher wrapper for Codex | -| `skills/codeclone-review/` | Conservative-first full review skill | -| `skills/codeclone-hotspots/` | Quick hotspot discovery skill | -| `assets/` | Plugin branding | +| File | Purpose | +|------|---------| +| `.codex-plugin/plugin.json` | Plugin metadata, prompts, instructions | +| `.mcp.json` | Workspace-first MCP launcher definition | +| `scripts/launch_mcp` | Shell-free launcher wrapper for Codex | +| `skills/codeclone-review/` | Conservative-first full review skill | +| `skills/codeclone-hotspots/` | Quick hotspot discovery skill | +| `skills/codeclone-change-control/` | Intent-first change workflow skill | +| `assets/` | Plugin branding | ## Install @@ -35,22 +36,43 @@ Manual MCP registration without the plugin: codex mcp add codeclone -- codeclone-mcp --transport stdio ``` +## Skills + +### codeclone-review + +Full structural review: clone triage, changed-scope review, health-oriented +refactor planning. Starts conservative with default thresholds, supports +deeper follow-up with lowered thresholds and run comparison. + +### codeclone-hotspots + +Quick quality snapshot: health check, top risks, single-metric queries. +The cheapest useful path — `analyze_repository` then `get_production_triage`. + +### codeclone-change-control + +Intent-first change workflow for repository edits. Declares scope before +editing, maps blast radius, verifies the patch against the contract, generates +a review receipt, and validates cited review claims. This is the governance +skill — use it whenever the task requires changing files. + ## Runtime model Additive — Codex discovers the plugin from `.agents/plugins/marketplace.json`, -gets a local MCP definition and two skills. New canonical MCP surfaces from the -local `codeclone-mcp` version flow through directly, including `Coverage Join` +gets a local MCP definition and three skills. New canonical MCP surfaces from the +local `codeclone-mcp` version flow through directly, including Coverage Join facts and the optional `coverage` help topic when supported. The plugin does not mutate `~/.codex/config.toml` or install a second server binary. ## Current limits -- if you already registered `codeclone-mcp` manually, keep only one setup path - to avoid duplicate MCP surfaces -- the bundled `.mcp.json` prefers `.venv`, then a Poetry env, then `PATH` -- the bundled launcher stays shell-free and local-stdio-only +- If you already registered `codeclone-mcp` manually, keep only one setup path + to avoid duplicate MCP surfaces. +- The bundled `.mcp.json` prefers `.venv`, then a Poetry env, then `PATH`. +- The bundled launcher stays shell-free and local-stdio-only. -For the underlying interface contract, see: +## Further reading - [MCP usage guide](mcp.md) - [MCP interface contract](book/20-mcp-interface.md) +- [Structural Change Controller](book/24-structural-change-controller.md) diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 00000000..e4c2d910 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,234 @@ +# Getting Started + +Install CodeClone, run your first analysis, set up CI gating, and connect +an MCP client — in that order. + +## Install + +=== "uv (recommended)" + + ```bash + uv tool install codeclone + ``` + +=== "pip" + + ```bash + pip install codeclone + ``` + +=== "Run without installing" + + ```bash + uvx codeclone@latest . + ``` + +To use the MCP server (AI agents, IDE extensions), install the `mcp` extra: + +```bash +uv tool install "codeclone[mcp]" +# or +pip install "codeclone[mcp]" +``` + +## First Run + +```bash +codeclone . +``` + +This analyzes the current directory and prints a summary to stdout. +For an HTML report: + +```bash +codeclone . --html --open-html-report +``` + +Other formats — all rendered from one canonical JSON report: + +```bash +codeclone . --json # JSON +codeclone . --md # Markdown +codeclone . --sarif # SARIF (IDE / Code Scanning) +codeclone . --text # plain text +``` + +### Changed-scope review + +Analyze only files changed relative to a branch: + +```bash +codeclone . --changed-only --diff-against main +``` + +Or from a recent commit: + +```bash +codeclone . --paths-from-git-diff HEAD~1 +``` + +## CI Setup + +### 1. Create a baseline + +```bash +codeclone . --update-baseline +``` + +By default this writes `codeclone.baseline.json`, the unified clone and metrics +baseline. Commit it to the repository — it becomes the contract CI enforces. +If you use `--metrics-baseline` to redirect metric state, commit that file too. + +### 2. Run in CI + +```bash +codeclone . --ci +``` + +`--ci` equals `--fail-on-new --no-color --quiet`. When a trusted metrics +baseline is present, CI mode also enables `--fail-on-new-metrics`. + +Baseline governance: new clones and metric regressions fail the build; +accepted legacy debt passes. CI sees only what changed. + +### 3. Quality gates + +Add thresholds for stricter enforcement: + +```bash +codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 +codeclone . --fail-cycles --fail-dead-code --fail-health 60 +codeclone . --fail-on-typing-regression --fail-on-docstring-regression +codeclone . --coverage coverage.xml --fail-on-untested-hotspots +``` + +See [Metrics and quality gates](book/15-metrics-and-quality-gates.md) for the +full gate reference. + +### GitHub Action + +```yaml +- uses: orenlab/codeclone/.github/actions/codeclone@v2 + with: + fail-on-new: "true" + sarif: "true" + pr-comment: "true" +``` + +Runs gating, generates reports, uploads SARIF to Code Scanning, and posts a +PR summary comment. +[Action docs](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) + +### Pre-commit hook + +```yaml +repos: + - repo: local + hooks: + - id: codeclone + name: CodeClone + entry: codeclone + language: system + pass_filenames: false + args: [ ".", "--ci" ] + types: [ python ] +``` + +### Exit codes + +| Code | Meaning | +|------|---------| +| `0` | Success | +| `2` | Contract error — untrusted baseline, invalid config | +| `3` | Gating failure — new clones or threshold exceeded | +| `5` | Internal error | + +Contract errors (`2`) take precedence over gating failures (`3`). +See [Exit codes](book/03-contracts-exit-codes.md). + +## MCP Setup + +The MCP server exposes 26 read-only tools over the same canonical pipeline. + +### Start the server + +```bash +codeclone-mcp --transport stdio # local clients (IDE, agents) +codeclone-mcp --transport streamable-http # remote / HTTP clients +``` + +!!! warning + Analysis tools require an **absolute** repository root. + Relative roots like `.` are rejected. + +### Connect a client + +=== "VS Code" + + Install from the + [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone). + The extension connects to `codeclone-mcp` automatically. + + See [VS Code extension guide](vscode-extension.md). + +=== "Claude Desktop" + + Use the pre-built bundle in + [`extensions/claude-desktop-codeclone/`](https://github.com/orenlab/codeclone/tree/main/extensions/claude-desktop-codeclone). + + See [Claude Desktop guide](claude-desktop-bundle.md). + +=== "Codex" + + The plugin is in + [`plugins/codeclone/`](https://github.com/orenlab/codeclone/tree/main/plugins/codeclone). + Codex discovers it from `.agents/plugins/marketplace.json`. + + See [Codex plugin guide](codex-plugin.md). + +=== "Manual registration" + + ```bash + # Codex + codex mcp add codeclone -- codeclone-mcp --transport stdio + + # Any MCP client + codeclone-mcp --transport stdio + ``` + +### Change controller (AI agents) + +When an AI agent edits code, the MCP change controller governs the structural +boundary: + +1. **Declare intent** — scope, files, and purpose +2. **Map blast radius** — reverse imports, clone cohorts, do-not-touch +3. **Check patch contract** — pre-edit budget, post-edit verification +4. **Generate receipt** — auditable artifact +5. **Validate claims** — cross-check review text against report + +See [Structural Change Controller](book/24-structural-change-controller.md). + +## Configuration + +CodeClone loads project configuration from `pyproject.toml`: + +```toml +[tool.codeclone] +baseline = "codeclone.baseline.json" +min_loc = 10 +min_stmt = 6 +block_min_loc = 20 +block_min_stmt = 8 +``` + +Precedence: CLI flags > `pyproject.toml` > built-in defaults. + +See [Config and defaults](book/04-config-and-defaults.md). + +## Next Steps + +- [Architecture narrative](architecture.md) — how the pipeline works +- [Baseline contract](book/06-baseline.md) — trust model and schema +- [MCP interface contract](book/20-mcp-interface.md) — tool surface and guarantees +- [Report contract](book/08-report.md) — canonical JSON schema diff --git a/mkdocs.yml b/mkdocs.yml index 16f72981..cf2de392 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -87,6 +87,7 @@ markdown_extensions: nav: - Home: README.md + - Getting Started: getting-started.md - Contracts Book: - Overview: book/README.md From 43c2f0a4351f4ef6775922baece07f7042a8691d Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 25 May 2026 10:11:23 +0500 Subject: [PATCH 022/318] chore(docs): restructure docs --- README.md | 289 ++++++++++++++------ docs/book/23-codex-plugin.md | 29 +- docs/codex-plugin.md | 27 +- docs/getting-started.md | 10 +- docs/mcp.md | 8 +- plugins/codeclone/.codex-plugin/plugin.json | 5 +- plugins/codeclone/README.md | 44 +-- tests/test_codex_plugin.py | 68 +++-- 8 files changed, 340 insertions(+), 140 deletions(-) diff --git a/README.md b/README.md index 80281fa7..37a84ca6 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ > -

Structural change controller for Python

+

Structural Change Controller for AI-assisted Python development

[![][pypi-shield]][pypi-link] [![][python-shield]][pypi-link] [![][downloads-shield]][pypi-link] [![][tests-shield]][tests-link] [![][license-shield]][license-link] @@ -24,11 +24,20 @@ --- -Deterministic static analysis that combines clone detection, code-quality metrics, -and baseline-aware CI gating — with a structural change controller for AI coding agents. +CodeClone is a **deterministic structural review layer for Python**. -One canonical analysis, many surfaces: CLI, HTML reports, MCP server, IDE extensions. -Humans and agents operate on the same deterministic facts. +It gives humans and AI coding agents one canonical view of structural code quality: +clone findings, code-health metrics, baseline-aware CI gates, coverage context, +public API changes, and a **Structural Change Controller** that starts before a +diff exists. + +The controller lets agents declare intent, inspect structural blast radius, +stay inside explicit edit boundaries, verify the patch after editing, and leave +an auditable review receipt. + +One canonical analysis, many surfaces: **CLI, HTML reports, JSON, SARIF, MCP, +VS Code, Claude Desktop, Codex, and CI**. Humans and agents operate on the same +deterministic facts. Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · [Live report](https://orenlab.github.io/codeclone/examples/report/) @@ -38,13 +47,38 @@ Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) &middo > For the latest stable release see the > [`v2.0.2` README](https://github.com/orenlab/codeclone/blob/v2.0.2/README.md). +## Why CodeClone + +AI coding agents do not just write code faster. They also expand scope faster. + +A prompt asks for one change. The agent edits the target file, touches another +module because it is "related", updates a helper, changes tests, and the final +diff still looks plausible. The problem is not speed. The problem is silent +scope expansion. + +CodeClone introduces a Structural Change Controller for that workflow: + +```text +declare intent +→ inspect blast radius +→ constrain edit scope +→ edit +→ verify patch contract +→ validate claims +→ leave review receipt +``` + +CodeClone does not replace the agent and does not use LLM judgment to decide +what is safe. It gives the agent deterministic structural boundaries before the +diff exists, then verifies whether the resulting patch stayed inside them. + ## Install ```bash uv tool install codeclone # recommended -pip install codeclone # or pip +pip install codeclone # or pip -# with MCP server for AI agents / IDE +# with MCP server for AI agents / IDE clients uv tool install "codeclone[mcp]" ``` @@ -62,28 +96,66 @@ uvx codeclone@latest . ```bash codeclone . # analyze current directory codeclone . --html --open-html-report # HTML report in browser -codeclone . --ci # CI mode (baseline-aware gating) +codeclone . --ci # CI mode: baseline-aware gating ```
More commands ```bash -codeclone . --json --md --sarif --text # all report formats -codeclone . --changed-only --diff-against main # changed-scope review -codeclone . --blast-radius codeclone/core/parser.py # structural risk map -codeclone . --patch-verify --diff-against HEAD~1 # patch verification +codeclone . --json --md --sarif --text # all report formats +codeclone . --changed-only --diff-against main # changed-scope review + +# Structural Change Controller CLI surface +codeclone . --blast-radius codeclone/core/parser.py +codeclone . --patch-verify --diff-against HEAD~1 ```
-## CI Integration +## Structural Change Controller + +The Controller governs AI-assisted edits before they become invisible diffs. + +| Stage | Surface | Purpose | +|----------------------|-------------------------------------------|-------------------------------------------------------------------------| +| Declare intent | `manage_change_intent` | Agent states intended scope before editing | +| Map blast radius | `get_blast_radius` / `--blast-radius` | Reverse imports, clone cohorts, review context, do-not-touch boundaries | +| Check patch contract | `check_patch_contract` / `--patch-verify` | Pre-edit budget and post-edit structural verification | +| Generate receipt | `create_review_receipt` | Auditable artifact: intent, scope, blast radius, patch outcome | +| Validate claims | `validate_review_claims` | Cross-check review text against cited report facts | +| Coordinate workspace | workspace intent registry | Make active declared scopes visible across MCP processes | + +Every step is deterministic: structural facts come from the canonical report, +not from LLM inference. + +Intent execution is session-local. Cross-agent visibility is optional, +advisory, TTL/lease-bound, and stored as ephemeral workspace coordination state +under `.cache/codeclone/intents/`. CodeClone never mutates source files, +baselines, generated reports, or analysis cache through MCP. + +[Structural Change Controller docs](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) + +## What CodeClone Reviews + +| Category | What | +|-------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| **Clone structure** | Function clones using CFG fingerprints, block clones using statement windows, segment clones as report-only review context | +| **Structural findings** | Duplicated branch families, clone guard/exit divergence, clone-cohort drift | +| **Quality metrics** | Cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, adaptive dependency depth, dead code, health score | +| **Baseline governance** | Separates accepted legacy debt from new regressions so CI fails only on what got worse | +| **Coverage Join** | Fuses external Cobertura XML into the current run to surface untested hotspots and coverage scope gaps | +| **Adoption and API** | Type/docstring adoption, public API surface inventory, baseline-aware API break detection | +| **Security Surfaces** | Report-only inventory of security-relevant capability boundaries without vulnerability claims | +| **Design signals** | Overloaded modules and other report-only structural review context | + +## Baseline-Aware CI ```bash # 1. Generate baseline (commit to repo) codeclone . --update-baseline -# 2. Add to CI pipeline +# 2. Enforce it in CI codeclone . --ci ``` @@ -104,16 +176,25 @@ is present, it also enables `--fail-on-new-metrics`. pr-comment: "true" ``` -Runs gating, generates reports, uploads SARIF to Code Scanning, posts a PR summary. +Runs gating, generates reports, uploads SARIF to GitHub Code Scanning, and posts +or updates a PR summary. + [Action docs](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) ### Quality Gates ```bash +# Structural thresholds codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 codeclone . --fail-cycles --fail-dead-code --fail-health 60 -codeclone . --fail-on-new-metrics --fail-on-typing-regression -codeclone . --coverage coverage.xml --fail-on-untested-hotspots + +# Baseline-aware metric regression detection +codeclone . --fail-on-new-metrics +codeclone . --fail-on-typing-regression --fail-on-docstring-regression + +# API and coverage governance +codeclone . --api-surface --fail-on-api-break +codeclone . --coverage coverage.xml --fail-on-untested-hotspots --coverage-min 50 ``` [Gate reference](https://orenlab.github.io/codeclone/book/15-metrics-and-quality-gates/) @@ -133,106 +214,152 @@ repos: types: [ python ] ``` -## What It Detects - -| Category | What | -|----------|------| -| **Clones** | Function clones (CFG fingerprint), block clones (statement windows), segment clones (report-only) | -| **Structural** | Duplicated branch families, clone guard/exit divergence, clone-cohort drift | -| **Quality metrics** | Cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, dead code, health score | -| **Adoption** | Type annotation and docstring coverage, public API surface inventory | -| **Coverage Join** | Fuses external Cobertura XML to surface coverage hotspots and scope gaps | -| **Security surfaces** | Report-only inventory of security-relevant capability boundaries | - -**Baseline governance** separates accepted legacy debt from new regressions — -CI fails only on what changed. Reports render in HTML, JSON, Markdown, SARIF, -and text from one canonical JSON payload. - -## Change Controller - -The v2.1 structural change controller governs AI-assisted edits across five stages: +## MCP Control Surface -| Stage | Tool | Purpose | -|-------|------|---------| -| Declare intent | `manage_change_intent` | Agent states scope before editing | -| Map blast radius | `get_blast_radius` | Reverse imports, clone cohorts, do-not-touch | -| Check patch contract | `check_patch_contract` | Pre-edit budget / post-edit verification | -| Generate receipt | `create_review_receipt` | Auditable artifact: intent + scope + delta | -| Validate claims | `validate_review_claims` | Cross-check review text against report | +CodeClone ships a 26-tool MCP control surface for AI agents and IDE clients. -Every step is deterministic — structural facts from the canonical report, no LLM inference. -Intent is session-local; workspace coordination is ephemeral under `.cache/codeclone/intents/`. - -[Change controller docs](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) - -## MCP Server - -26-tool read-only MCP server for AI agents and IDE clients. +Canonical analysis remains read-only by contract: MCP tools never mutate source +files, baselines, generated reports, or analysis cache. Controller state is +session-local or ephemeral workspace coordination state. ```bash -codeclone-mcp --transport stdio # local clients -codeclone-mcp --transport streamable-http # remote / HTTP clients +codeclone-mcp --transport stdio # local clients +codeclone-mcp --transport streamable-http # HTTP transport ``` > [!WARNING] > Analysis tools require an absolute repository root. Relative roots like `.` are rejected. +> Keep `stdio` as the default transport for local IDE and agent clients. HTTP exposure beyond +> loopback requires explicit `--allow-remote`. [MCP usage guide](https://orenlab.github.io/codeclone/mcp/) · [MCP interface contract](https://orenlab.github.io/codeclone/book/20-mcp-interface/) -### Native Clients +### Native Agent and IDE Clients -| Surface | Install | Docs | -|---------|---------|------| -| **VS Code** | [Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) | [Guide](https://orenlab.github.io/codeclone/book/21-vscode-extension/) | +| Surface | Install | Docs | +|--------------------|------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------| +| **VS Code** | [Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) | [Guide](https://orenlab.github.io/codeclone/book/21-vscode-extension/) | | **Claude Desktop** | [`extensions/claude-desktop-codeclone/`](https://github.com/orenlab/codeclone/tree/main/extensions/claude-desktop-codeclone) | [Guide](https://orenlab.github.io/codeclone/book/22-claude-desktop-bundle/) | -| **Codex** | [`plugins/codeclone/`](https://github.com/orenlab/codeclone/tree/main/plugins/codeclone) | [Guide](https://orenlab.github.io/codeclone/book/23-codex-plugin/) | +| **Codex** | [`orenlab/codeclone-codex`](https://github.com/orenlab/codeclone-codex) | [Guide](https://orenlab.github.io/codeclone/book/23-codex-plugin/) | All clients connect to the same `codeclone-mcp` contract — no second analysis engine. +## Reports + +All report formats render from one canonical JSON payload. + +| Format | Flag | Default path | +|----------|-----------|---------------------------------| +| HTML | `--html` | `.cache/codeclone/report.html` | +| JSON | `--json` | `.cache/codeclone/report.json` | +| Markdown | `--md` | `.cache/codeclone/report.md` | +| SARIF | `--sarif` | `.cache/codeclone/report.sarif` | +| Text | `--text` | `.cache/codeclone/report.txt` | + +```bash +codeclone . --html --json --md --sarif --text +``` + +[Report contract](https://orenlab.github.io/codeclone/book/08-report/) · +[HTML render](https://orenlab.github.io/codeclone/book/10-html-render/) + ## Configuration +CodeClone loads project-level configuration from `pyproject.toml`. + ```toml [tool.codeclone] baseline = "codeclone.baseline.json" + min_loc = 10 min_stmt = 6 + block_min_loc = 20 block_min_stmt = 8 + +segment_min_loc = 20 +segment_min_stmt = 10 + +golden_fixture_paths = ["tests/fixtures/golden_*"] + +html_out = ".cache/codeclone/report.html" +json_out = ".cache/codeclone/report.json" +md_out = ".cache/codeclone/report.md" +sarif_out = ".cache/codeclone/report.sarif" +text_out = ".cache/codeclone/report.txt" ``` Precedence: CLI flags > `pyproject.toml` > built-in defaults. + [Config reference](https://orenlab.github.io/codeclone/book/04-config-and-defaults/) -## Reports +## Exit Codes -| Format | Flag | Default path | -|--------|------|--------------| -| HTML | `--html` | `.cache/codeclone/report.html` | -| JSON | `--json` | `.cache/codeclone/report.json` | -| Markdown | `--md` | `.cache/codeclone/report.md` | -| SARIF | `--sarif` | `.cache/codeclone/report.sarif` | -| Text | `--text` | `.cache/codeclone/report.txt` | +| Code | Meaning | +|------|-------------------------------------------------------------------------------| +| `0` | Success | +| `2` | Contract error — untrusted baseline, invalid config, unreadable sources in CI | +| `3` | Gating failure — new clones or quality threshold exceeded | +| `5` | Internal error | -All formats render from one canonical JSON report. -[Report contract](https://orenlab.github.io/codeclone/book/08-report/) · -[HTML render](https://orenlab.github.io/codeclone/book/10-html-render/) +Contract errors (`2`) take precedence over gating failures (`3`). -## Exit Codes +[Exit code policy](https://orenlab.github.io/codeclone/book/03-contracts-exit-codes/) -| Code | Meaning | -|------|---------| -| `0` | Success | -| `2` | Contract error — untrusted baseline, invalid config | -| `3` | Gating failure — new clones or threshold exceeded | -| `5` | Internal error | +## Inline Suppressions + +When a symbol is invoked through runtime dynamics — framework callbacks, +plugin loading, reflection — suppress a known false positive at the declaration +site: + +```python +# codeclone: ignore[dead-code] +def handle_exception(exc: Exception) -> None: + ... -Contract errors (`2`) take precedence over gating failures (`3`). + +class Middleware: # codeclone: ignore[dead-code] + ... +``` + +[Inline suppressions](https://orenlab.github.io/codeclone/book/19-inline-suppressions/) · +[Dead-code contract](https://orenlab.github.io/codeclone/book/16-dead-code-contract/) + +## Benchmarking + +```bash +./benchmarks/run_docker_benchmark.sh +``` + +The Docker benchmark writes reproducible results to +`.cache/benchmarks/codeclone-benchmark.json`. + +```bash +CPUSET=0 CPUS=1.0 MEMORY=2g RUNS=16 WARMUPS=4 \ + ./benchmarks/run_docker_benchmark.sh +``` + +[Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmarking/) + +## Documentation + +Full docs and contract book: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) + +Quick links: +[Baseline](https://orenlab.github.io/codeclone/book/06-baseline/) · +[Report](https://orenlab.github.io/codeclone/book/08-report/) · +[Metrics & gates](https://orenlab.github.io/codeclone/book/15-metrics-and-quality-gates/) · +[MCP](https://orenlab.github.io/codeclone/book/20-mcp-interface/) · +[Structural Change Controller](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) · +[CLI](https://orenlab.github.io/codeclone/book/09-cli/) ## License - **Code:** MPL-2.0 (`LICENSE`) -- **Documentation:** MIT (`LICENSE-MIT`) +- **Documentation and docs-site content:** MIT (`LICENSE-MIT`) + +Versions released before the license change remain under their original terms. ## Links @@ -243,13 +370,21 @@ Contract errors (`2`) take precedence over gating failures (`3`). [License scope map](https://github.com/orenlab/codeclone/blob/main/LICENSES.md) + [pypi-shield]: https://img.shields.io/pypi/v/codeclone?style=flat-square&color=6366f1 + [downloads-shield]: https://img.shields.io/pypi/dm/codeclone?style=flat-square&color=6366f1 + [python-shield]: https://img.shields.io/pypi/pyversions/codeclone?style=flat-square&color=6366f1 + [license-shield]: https://img.shields.io/badge/license-MPL--2.0-6366f1?style=flat-square + [tests-shield]: https://img.shields.io/github/actions/workflow/status/orenlab/codeclone/tests.yml?branch=main&style=flat-square&label=tests + [pypi-link]: https://pypi.org/project/codeclone/ + [license-link]: #license + [tests-link]: https://github.com/orenlab/codeclone/actions/workflows/tests.yml diff --git a/docs/book/23-codex-plugin.md b/docs/book/23-codex-plugin.md index 5e36abd6..545133cb 100644 --- a/docs/book/23-codex-plugin.md +++ b/docs/book/23-codex-plugin.md @@ -2,11 +2,11 @@ ## Purpose -Document the current contract and behavior of the Codex plugin shipped in -`plugins/codeclone/`. +Document the current contract and behavior of the Codex plugin sourced from +`plugins/codeclone/` and distributed through `orenlab/codeclone-codex`. -This chapter describes the plugin as a local Codex discovery and guidance layer -over existing CodeClone MCP contracts. +This chapter describes the plugin as a Codex discovery and guidance layer over +existing CodeClone MCP contracts. !!! note "Guidance layer only" The plugin contributes discovery metadata, a local MCP definition, and @@ -17,8 +17,10 @@ over existing CodeClone MCP contracts. The Codex plugin is: -- a repo-local Codex plugin under `plugins/` -- backed by `.agents/plugins/marketplace.json` +- sourced from `plugins/codeclone/` in this monorepo +- distributed as `orenlab/codeclone-codex` +- backed by `.agents/plugins/marketplace.json` for local development and + packaging - read-only with respect to repository state - a composition of local MCP server metadata plus Codex skill guidance - a native Codex setup surface, not a second extension model @@ -26,7 +28,7 @@ The Codex plugin is: ## Source of truth The plugin delegates analysis to the existing `codeclone-mcp` launcher and -guides usage through a plugin-bundled skill. +guides usage through bundled skills. New canonical MCP surfaces flow through from the resolved local server version. That includes current-run metric families such as `Coverage Join` and the @@ -47,9 +49,10 @@ The plugin currently provides: - `.mcp.json` - `scripts/launch_mcp` - `README.md` -- two bundled skills: +- three bundled skills: - `codeclone-review` - `codeclone-hotspots` + - `codeclone-change-control` - a repo-local marketplace entry in `.agents/plugins/marketplace.json` ## Runtime model @@ -65,18 +68,20 @@ The plugin surface is additive: plugin The plugin does not rewrite user config or install CodeClone automatically. +Public users install the distribution package with +`marketplace add orenlab/codeclone-codex`. ## Design rules -- **Codex-native packaging**: use `plugins/` plus `.agents/plugins/marketplace.json` - for discovery. +- **Codex-native packaging**: keep source under `plugins/` and publish the + marketplace distribution through `orenlab/codeclone-codex`. - **Canonical MCP first**: all analysis still flows through `codeclone-mcp`. - **Skill guidance, not analysis logic**: the skill teaches conservative-first CodeClone review but does not create new findings. - **No hidden installation side effects**: the plugin does not patch `~/.codex/config.toml`. -- **Repo-local clarity**: the plugin is meant to travel with the repository as - a native Codex surface. +- **Source clarity**: the monorepo copy is the source; the public install + surface is the `orenlab/codeclone-codex` distribution. - **Launcher honesty**: the plugin assumes `codeclone-mcp` is already installable in the current workspace or reachable on `PATH`, and prefers the workspace environment when one is present. diff --git a/docs/codex-plugin.md b/docs/codex-plugin.md index 34fa9a46..788415ea 100644 --- a/docs/codex-plugin.md +++ b/docs/codex-plugin.md @@ -1,7 +1,7 @@ # Codex Plugin -CodeClone ships a native Codex plugin in `plugins/codeclone/`. -Repo-local discovery via `.agents/plugins/marketplace.json`. +CodeClone ships a native Codex plugin. Source lives in `plugins/codeclone/`; +public installs use the distribution repo `orenlab/codeclone-codex`. ## What ships in the plugin @@ -17,6 +17,15 @@ Repo-local discovery via `.agents/plugins/marketplace.json`. ## Install +Install the plugin from the Codex marketplace: + +```bash +marketplace add orenlab/codeclone-codex +``` + +The plugin expects a local `codeclone-mcp` command. Install CodeClone with the +MCP extra in the workspace or globally: + ```bash uv venv uv pip install --python .venv/bin/python "codeclone[mcp]" @@ -58,11 +67,15 @@ skill — use it whenever the task requires changing files. ## Runtime model -Additive — Codex discovers the plugin from `.agents/plugins/marketplace.json`, -gets a local MCP definition and three skills. New canonical MCP surfaces from the -local `codeclone-mcp` version flow through directly, including Coverage Join -facts and the optional `coverage` help topic when supported. The plugin does -not mutate `~/.codex/config.toml` or install a second server binary. +Additive — the marketplace install provides a local MCP definition and three +skills. New canonical MCP surfaces from the local `codeclone-mcp` version flow +through directly, including Coverage Join facts and the optional `coverage` +help topic when supported. The plugin does not mutate `~/.codex/config.toml` or +install a second server binary. + +`.agents/plugins/marketplace.json` is the monorepo-local source entry used for +development and packaging into `orenlab/codeclone-codex`; it is not the public +install path. ## Current limits diff --git a/docs/getting-started.md b/docs/getting-started.md index e4c2d910..55479141 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -180,9 +180,13 @@ codeclone-mcp --transport streamable-http # remote / HTTP clients === "Codex" - The plugin is in - [`plugins/codeclone/`](https://github.com/orenlab/codeclone/tree/main/plugins/codeclone). - Codex discovers it from `.agents/plugins/marketplace.json`. + ```bash + marketplace add orenlab/codeclone-codex + ``` + + The source plugin lives in + [`plugins/codeclone/`](https://github.com/orenlab/codeclone/tree/main/plugins/codeclone); + the marketplace distribution is `orenlab/codeclone-codex`. See [Codex plugin guide](codex-plugin.md). diff --git a/docs/mcp.md b/docs/mcp.md index fb088bb2..f09c3950 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -122,11 +122,17 @@ All clients use the same server. Only the registration format differs. === "Codex" + ```bash + marketplace add orenlab/codeclone-codex + ``` + + The native plugin includes the MCP definition and CodeClone skills. + Manual MCP registration without the plugin is also valid: + ```bash codex mcp add codeclone -- codeclone-mcp --transport stdio ``` - A native plugin also ships in `plugins/codeclone/`. See [Codex plugin guide](codex-plugin.md). === "Cursor" diff --git a/plugins/codeclone/.codex-plugin/plugin.json b/plugins/codeclone/.codex-plugin/plugin.json index 0d199931..a5724d88 100644 --- a/plugins/codeclone/.codex-plugin/plugin.json +++ b/plugins/codeclone/.codex-plugin/plugin.json @@ -25,7 +25,7 @@ "interface": { "displayName": "CodeClone", "shortDescription": "Structural code quality analysis for Python.", - "longDescription": "CodeClone for Codex — clone detection, quality metrics, and baseline-aware governance over the canonical codeclone-mcp server. Ships a review skill and a hotspot discovery skill for conservative-first, triage-first structural review.", + "longDescription": "CodeClone for Codex — clone detection, quality metrics, and baseline-aware governance over the canonical codeclone-mcp server. Ships review, hotspot discovery, and change-control skills for conservative-first structural review and intent-first repository edits.", "developerName": "OrenLab", "category": "Developer Tools", "capabilities": [ @@ -39,7 +39,8 @@ "defaultPrompt": [ "Use CodeClone to analyze this repo and show the top production hotspots.", "Run a changed-files CodeClone review for my current diff.", - "Check CodeClone health and explain what to fix first." + "Check CodeClone health and explain what to fix first.", + "Use CodeClone change control for this repository edit." ], "brandColor": "#6366f1", "composerIcon": "./assets/icon.png", diff --git a/plugins/codeclone/README.md b/plugins/codeclone/README.md index aa03b9c5..cc5762f6 100644 --- a/plugins/codeclone/README.md +++ b/plugins/codeclone/README.md @@ -9,28 +9,29 @@ directly, including `Coverage Join` facts and the optional `coverage` help topic ## What ships here -| File | Purpose | -|------------------------------|----------------------------------------------------| -| `.codex-plugin/plugin.json` | Plugin metadata and prompts | -| `.mcp.json` | Local stdio MCP definition | -| `scripts/launch_mcp` | Shell-free workspace-first launcher bootstrap | -| `skills/codeclone-review/` | Conservative-first full review skill | -| `skills/codeclone-hotspots/` | Quick hotspot discovery skill | -| `assets/` | Plugin branding | +| File | Purpose | +|---------------------------------------|-----------------------------------------------| +| `.codex-plugin/plugin.json` | Plugin metadata, prompts, and instructions | +| `.mcp.json` | Local stdio MCP definition | +| `scripts/launch_mcp` | Shell-free workspace-first launcher bootstrap | +| `skills/codeclone-review/` | Conservative-first full review skill | +| `skills/codeclone-hotspots/` | Quick hotspot discovery skill | +| `skills/codeclone-change-control/` | Intent-first change workflow skill | +| `assets/` | Plugin branding | `plugin.json` keeps the machine identifier as lowercase `codeclone`; the user-facing label stays in `interface.displayName` as `CodeClone`. ## Install -The plugin prefers a workspace launcher first: +Install the distribution package from the Codex marketplace: -1. `./.venv/bin/codeclone-mcp` -2. the current Poetry environment launcher -3. `codeclone-mcp` from `PATH` +```bash +marketplace add orenlab/codeclone-codex +``` -The bundled Codex launcher is a small repo-local Python wrapper, not a shell -snippet. It keeps the same workspace-first order without relying on `sh -lc`. +This plugin does not install the MCP server binary. Install CodeClone with the +MCP extra in the workspace or globally. Recommended workspace-local setup: @@ -49,8 +50,15 @@ uv tool install "codeclone[mcp]" codeclone-mcp --help ``` -Codex discovers the plugin from `.agents/plugins/marketplace.json`. -It does not rewrite `~/.codex/config.toml`. +The bundled Codex launcher is a small repo-local Python wrapper, not a shell +snippet. It prefers a workspace `.venv`, then the current Poetry environment, +then `codeclone-mcp` from `PATH`, without relying on `sh -lc`. + +`.agents/plugins/marketplace.json` is the monorepo-local source entry used for +development and distribution packaging. Public installs should use +`marketplace add orenlab/codeclone-codex`. + +The plugin does not rewrite `~/.codex/config.toml`. If you prefer manual MCP registration instead: @@ -67,6 +75,10 @@ current-run metrics surfaces. **codeclone-hotspots** — quick quality snapshot: health check, top risks, single-metric queries, pre-merge sanity checks, coverage/adoption/API snapshots. +**codeclone-change-control** — intent-first workflow for repository edits: +workspace intent check, blast radius, patch contract verification, claim guard, +and review receipt. + ## Links - [Codex plugin guide](https://orenlab.github.io/codeclone/codex-plugin/) diff --git a/tests/test_codex_plugin.py b/tests/test_codex_plugin.py index ebd5a313..0f646eda 100644 --- a/tests/test_codex_plugin.py +++ b/tests/test_codex_plugin.py @@ -8,6 +8,11 @@ def _load_json(path: Path) -> object: return json.loads(path.read_text(encoding="utf-8")) +def _assert_contains_all(text: str, needles: tuple[str, ...]) -> None: + for needle in needles: + assert needle in text + + def test_codex_plugin_manifest_is_consistent() -> None: root = Path(__file__).resolve().parents[1] plugin_root = root / "plugins" / "codeclone" @@ -46,11 +51,12 @@ def test_codex_plugin_manifest_is_consistent() -> None: ) assert interface["composerIcon"] == "./assets/icon.png" assert interface["logo"] == "./assets/logo.png" + assert "change-control skills" in interface["longDescription"] assert (plugin_root / "assets" / "icon.png").is_file() assert (plugin_root / "assets" / "logo.png").is_file() prompts = interface["defaultPrompt"] assert isinstance(prompts, list) - assert len(prompts) == 3 + assert len(prompts) == 4 assert all(isinstance(prompt, str) and 0 < len(prompt) <= 128 for prompt in prompts) @@ -93,30 +99,46 @@ def test_codex_plugin_skill_exists() -> None: plugin_root = root / "plugins" / "codeclone" skill_path = plugin_root / "skills" / "codeclone-review" / "SKILL.md" hotspot_skill_path = plugin_root / "skills" / "codeclone-hotspots" / "SKILL.md" + change_control_skill_path = ( + plugin_root / "skills" / "codeclone-change-control" / "SKILL.md" + ) skill_text = skill_path.read_text(encoding="utf-8") hotspot_skill_text = hotspot_skill_path.read_text(encoding="utf-8") + change_control_skill_text = change_control_skill_path.read_text(encoding="utf-8") manifest = _load_json(plugin_root / ".codex-plugin" / "plugin.json") assert isinstance(manifest, dict) - for needle in ( - "name: codeclone-review", - "conservative first pass", - 'help(topic="analysis_profile")', - 'help(topic="coverage")', - 'get_report_section(section="metrics")', - "Use MCP tools only", - "Do not fall back to CLI or local report files.", - ): - assert needle in skill_text - - for needle in ( - "name: codeclone-hotspots", - 'get_report_section(section="metrics")', - 'help(topic="coverage")', - "Use MCP tools only", - "Do not fall back to CLI or local report files.", - ): - assert needle in hotspot_skill_text + _assert_contains_all( + skill_text, + ( + "name: codeclone-review", + "conservative first pass", + 'help(topic="analysis_profile")', + 'help(topic="coverage")', + 'get_report_section(section="metrics")', + "Use MCP tools only", + "Do not fall back to CLI or local report files.", + ), + ) + _assert_contains_all( + hotspot_skill_text, + ( + "name: codeclone-hotspots", + 'get_report_section(section="metrics")', + 'help(topic="coverage")', + "Use MCP tools only", + "Do not fall back to CLI or local report files.", + ), + ) + _assert_contains_all( + change_control_skill_text, + ( + "name: codeclone-change-control", + "Declare intent before editing.", + 'check_patch_contract(mode="budget")', + "create_review_receipt", + ), + ) assert "Use MCP tools only." in manifest["instructions"] assert 'get_report_section(section="metrics")' in manifest["instructions"] @@ -130,12 +152,14 @@ def test_codex_plugin_readme_and_docs_exist() -> None: readme_text = (plugin_root / "README.md").read_text(encoding="utf-8") assert "# CodeClone for Codex" in readme_text + assert "marketplace add orenlab/codeclone-codex" in readme_text assert "codex mcp add codeclone -- codeclone-mcp --transport stdio" in readme_text assert "does not rewrite `~/.codex/config.toml`" in readme_text - assert "The plugin prefers a workspace launcher first" in readme_text - assert "the current Poetry environment launcher" in readme_text + assert "prefers a workspace `.venv`" in readme_text + assert "current Poetry environment" in readme_text assert "without relying on `sh -lc`" in readme_text assert 'uv tool install "codeclone[mcp]"' in readme_text + assert "codeclone-change-control" in readme_text assert (root / "docs" / "codex-plugin.md").is_file() assert (root / "docs" / "terms-of-use.md").is_file() From caff8fca9bf5be5cc0646d3ae40c2eec5b912c27 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 25 May 2026 10:15:16 +0500 Subject: [PATCH 023/318] fix(integration): mypy fix: Source file found twice under different module names: "sync_integrations" and "scripts.sync_integrations" --- scripts/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 scripts/__init__.py diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 00000000..e69de29b From 5244abaae7c9ab2be793fe327530dc6221ff3744 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 25 May 2026 10:57:08 +0500 Subject: [PATCH 024/318] fix(mcp): add space-variant citation matching and scope format hint Claim guard now matches metric family names written with spaces (e.g. "security surfaces") in addition to underscored identifiers. Intent declare error shows the expected scope format to reduce wasted agent context on invalid input. --- codeclone/surfaces/mcp/_claim_guard.py | 32 +++++++++++++++----------- codeclone/surfaces/mcp/_intent.py | 4 +++- tests/test_mcp_service.py | 21 +++++++++++++++++ 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/codeclone/surfaces/mcp/_claim_guard.py b/codeclone/surfaces/mcp/_claim_guard.py index 5ba78536..5780f3e4 100644 --- a/codeclone/surfaces/mcp/_claim_guard.py +++ b/codeclone/surfaces/mcp/_claim_guard.py @@ -178,20 +178,17 @@ def extract_citations( for match in _find_literal_matches(text, finding_id) ) for family_name in sorted(report_context.metric_families): - pattern = re.compile( - rf"\b{re.escape(family_name)}\b", - flags=re.IGNORECASE, - ) - citations.extend( - Citation( - cited_id=family_name, - kind="metric_family", - text_window=text_window(text, match.start(), match.end()), - start_offset=match.start(), - end_offset=match.end(), + for variant in _metric_family_patterns(family_name): + citations.extend( + Citation( + cited_id=family_name, + kind="metric_family", + text_window=text_window(text, match.start(), match.end()), + start_offset=match.start(), + end_offset=match.end(), + ) + for match in variant.finditer(text) ) - for match in pattern.finditer(text) - ) return tuple( sorted( _dedupe_citations(citations), @@ -437,6 +434,15 @@ def _warnings_for_text( return warnings +def _metric_family_patterns(family_name: str) -> tuple[re.Pattern[str], ...]: + canonical = re.compile(rf"\b{re.escape(family_name)}\b", flags=re.IGNORECASE) + if "_" not in family_name: + return (canonical,) + spaced_escaped = re.escape(family_name).replace("_", r"\s+") + spaced = re.compile(rf"\b{spaced_escaped}\b", flags=re.IGNORECASE) + return (canonical, spaced) + + def _find_literal_matches(text: str, literal: str) -> tuple[re.Match[str], ...]: pattern = re.compile( rf"(? tuple[str, . def normalize_intent_scope(scope: object) -> IntentScope: if not isinstance(scope, Mapping): - raise ValueError("scope must be an object with allowed_files.") + raise ValueError( + 'scope must be an object, e.g. {"allowed_files": ["path/to/file.py"]}.' + ) allowed_files = _normalize_required_paths( scope.get("allowed_files"), field_name="allowed_files", diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 4b4de600..fb258fe8 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -22,6 +22,7 @@ import codeclone.surfaces.mcp._blast_radius as mcp_blast_radius_mod import codeclone.surfaces.mcp._claim_guard as mcp_claim_guard_mod +import codeclone.surfaces.mcp._intent as mcp_intent_mod import codeclone.surfaces.mcp._patch_contract as mcp_patch_contract_mod import codeclone.surfaces.mcp._review_receipt as mcp_review_receipt_mod import codeclone.surfaces.mcp._session_baseline as mcp_baseline_mod @@ -3134,6 +3135,26 @@ def test_claim_guard_detects_deterministic_overclaims() -> None: assert all(not item["valid"] for item in validated) +def test_normalize_intent_scope_hint_on_invalid_type() -> None: + """Non-dict scope gives an actionable error with format example.""" + with pytest.raises(ValueError, match=r"allowed_files"): + mcp_intent_mod.normalize_intent_scope(["pkg/a.py"]) + + +def test_claim_guard_detects_space_variant_overclaims() -> None: + """Underscore-to-space fallback catches natural-language metric family names.""" + payload = mcp_claim_guard_mod.validate_claims( + text=( + "security surfaces found vulnerabilities. overloaded modules will fail CI." + ), + report_context=_claim_guard_context(), + ) + violations = cast("list[dict[str, object]]", payload["violations"]) + assert payload["valid"] is False + assert {str(item["pattern"]) for item in violations} == {"P-1", "P-2"} + assert payload["citations_found"] == 2 + + def test_claim_guard_keeps_report_only_and_gate_eligible_semantics_separate() -> None: payload = mcp_claim_guard_mod.validate_claims( text=( From 8ccdf8f79c7d8f841f637bdf873eefb23110587e Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 25 May 2026 17:19:00 +0500 Subject: [PATCH 025/318] test(mcp): cover change-control CLI contracts --- tests/test_cli_blast_radius.py | 177 +++++++++++++++++++++++++- tests/test_cli_patch_verify.py | 220 +++++++++++++++++++++++++++++++++ tests/test_mcp_service.py | 60 +++++++++ 3 files changed, 454 insertions(+), 3 deletions(-) diff --git a/tests/test_cli_blast_radius.py b/tests/test_cli_blast_radius.py index 8cb18268..6cd6844c 100644 --- a/tests/test_cli_blast_radius.py +++ b/tests/test_cli_blast_radius.py @@ -91,17 +91,188 @@ def test_blast_radius_rejects_absolute_paths(tmp_path: Path) -> None: assert "absolute paths are not accepted" in printer.text -def test_blast_radius_requires_at_least_one_inventory_file(tmp_path: Path) -> None: +@pytest.mark.parametrize( + ("files", "expected_message"), + [ + (("pkg/missing.py",), "--blast-radius requires at least one file"), + (("",), "empty path"), + (("../escape.py",), "paths must stay inside the scan root"), + ], +) +def test_blast_radius_rejects_invalid_input( + tmp_path: Path, + files: tuple[str, ...], + expected_message: str, +) -> None: printer = _RecordingPrinter() with pytest.raises(SystemExit) as exc: render_blast_radius( console=printer, report_document=_report_document(), - files=("pkg/missing.py",), + files=files, root_path=tmp_path, quiet=True, ) assert exc.value.code == int(ExitCode.CONTRACT_ERROR) - assert "--blast-radius requires at least one file" in printer.text + assert expected_message in printer.text + + +def test_blast_radius_warns_on_skipped_files(tmp_path: Path) -> None: + printer = _RecordingPrinter() + + exit_code = render_blast_radius( + console=printer, + report_document=_report_document(), + files=("pkg/a.py", "pkg/not_here.py"), + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "skipped files outside analysis inventory" in printer.text + + +def test_blast_radius_none_report_returns_contract_error(tmp_path: Path) -> None: + printer = _RecordingPrinter() + + exit_code = render_blast_radius( + console=printer, + report_document=None, + files=("pkg/a.py",), + root_path=tmp_path, + quiet=True, + ) + + assert exit_code == int(ExitCode.CONTRACT_ERROR) + assert "Blast radius requires a canonical report" in printer.text + + +def test_blast_radius_verbose_output_renders_all_sections(tmp_path: Path) -> None: + printer = _RecordingPrinter() + + exit_code = render_blast_radius( + console=printer, + report_document=_report_document(), + files=("pkg/a.py",), + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = printer.text + expected_sections = ( + "Blast Radius", + "pkg/a.py", + "Risk level:", + "Direct dependents", + "Clone cohort members", + "Dependency cycles", + "Do not touch", + "Review context", + ) + for section in expected_sections: + assert section in text, f"Missing section: {section}" + + +def test_blast_radius_verbose_with_guardrails(tmp_path: Path) -> None: + """Verbose mode also renders guardrails when present.""" + printer = _RecordingPrinter() + + exit_code = render_blast_radius( + console=printer, + report_document=_report_document(), + files=("pkg/a.py",), + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "Guardrails:" in printer.text + + +def _report_document_many_files() -> dict[str, object]: + """Report with >20 inventory files to exercise truncation rendering.""" + files = [f"pkg/f{index:03d}.py" for index in range(25)] + deps = [ + {"source": f"pkg.f{index:03d}", "target": "pkg.f000"} for index in range(1, 25) + ] + return { + "integrity": {"digest": {"value": "b" * 64}}, + "inventory": {"file_registry": {"items": files}}, + "metrics": { + "families": { + "dependencies": {"items": deps, "cycles": []}, + "complexity": {"items": []}, + "coupling": {"items": []}, + "coverage_join": {"items": []}, + "overloaded_modules": {"items": []}, + "security_surfaces": {"items": []}, + }, + }, + "findings": { + "groups": { + "clones": { + "functions": [], + "blocks": [], + "segments": [], + "suppressed": {}, + }, + "structural": {"groups": []}, + "dead_code": {"groups": []}, + "design": {"groups": []}, + }, + }, + } + + +def test_blast_radius_verbose_truncates_long_lists(tmp_path: Path) -> None: + """Items and entries exceeding _MAX_RENDERED_ITEMS are truncated.""" + printer = _RecordingPrinter() + + exit_code = render_blast_radius( + console=printer, + report_document=_report_document_many_files(), + files=("pkg/f000.py",), + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "... and" in printer.text + + +def test_blast_radius_skipped_warning_truncated(tmp_path: Path) -> None: + """More than 5 skipped files triggers truncation in warning.""" + extra_missing = [f"pkg/miss{i}.py" for i in range(7)] + printer = _RecordingPrinter() + + exit_code = render_blast_radius( + console=printer, + report_document=_report_document(), + files=("pkg/a.py", *extra_missing), + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "... and" in printer.text + + +def test_blast_radius_many_invalid_paths_truncated(tmp_path: Path) -> None: + """More than 10 invalid paths triggers truncation in error message.""" + bad_paths = [f"/abs/path{i}.py" for i in range(12)] + printer = _RecordingPrinter() + + with pytest.raises(SystemExit) as exc: + render_blast_radius( + console=printer, + report_document=_report_document(), + files=bad_paths, + root_path=tmp_path, + quiet=True, + ) + + assert exc.value.code == int(ExitCode.CONTRACT_ERROR) + assert "... and 2 more" in printer.text diff --git a/tests/test_cli_patch_verify.py b/tests/test_cli_patch_verify.py index 5c2ecbb5..02a740f2 100644 --- a/tests/test_cli_patch_verify.py +++ b/tests/test_cli_patch_verify.py @@ -10,6 +10,7 @@ import codeclone.surfaces.cli.workflow as cli_workflow from codeclone.contracts import ExitCode from codeclone.core._types import AnalysisResult +from codeclone.models import HealthScore, MetricsDiff, ProjectMetrics from codeclone.surfaces.cli.patch_verify import ( render_patch_verify, validate_strictness, @@ -164,6 +165,225 @@ def test_patch_verify_validates_strictness_values() -> None: validate_strictness("nope") +def test_patch_verify_rejects_invalid_strictness() -> None: + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness="nope", + analysis=_analysis(), + diff_context=_diff_context(), + baseline_state=cast(Any, _baseline_state()), + quiet=True, + ) + + assert exit_code == int(ExitCode.CONTRACT_ERROR) + assert "Invalid --strictness value" in printer.text + + +def test_patch_verify_verbose_accepted() -> None: + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness="ci", + analysis=_analysis(), + diff_context=_diff_context(), + baseline_state=cast(Any, _baseline_state()), + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = printer.text + assert "accepted" in text.lower() + expected_sections = ( + "Patch Verify", + "Strictness:", + "Health:", + "Structural delta:", + "Gate preview:", + "Contract violations:", + "Patch contract accepted", + ) + for section in expected_sections: + assert section in text, f"Missing section: {section}" + + +def test_patch_verify_verbose_violated() -> None: + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness="ci", + analysis=_analysis(function_clones=1), + diff_context=_diff_context(new_clones=1), + baseline_state=cast(Any, _baseline_state()), + quiet=False, + ) + + assert exit_code == int(ExitCode.GATING_FAILURE) + text = printer.text + assert "violated" in text.lower() + assert "structural_regressions" in text + assert "Patch contract violated" in text + + +def test_patch_verify_verbose_relaxed_advisory() -> None: + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness="relaxed", + analysis=_analysis(function_clones=1), + diff_context=_diff_context(new_clones=1), + baseline_state=cast(Any, _baseline_state()), + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = printer.text + assert "advisory violations" in text + assert "relaxed mode exits 0" in text + + +def test_patch_verify_strict_strictness_quiet_enforces_health() -> None: + """Strict mode with health_floor=70 and no metrics yields gate failure.""" + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness="strict", + analysis=_analysis(), + diff_context=_diff_context(), + baseline_state=cast(Any, _baseline_state()), + quiet=True, + ) + + assert exit_code == int(ExitCode.GATING_FAILURE) + assert "violated" in printer.text + + +def _project_metrics(*, health: int = 85) -> ProjectMetrics: + return ProjectMetrics( + complexity_avg=5.0, + complexity_max=10, + high_risk_functions=(), + coupling_avg=3.0, + coupling_max=5, + high_risk_classes=(), + cohesion_avg=1.0, + cohesion_max=2, + low_cohesion_classes=(), + dependency_modules=3, + dependency_edges=2, + dependency_edge_list=(), + dependency_cycles=(), + dependency_max_depth=2, + dependency_longest_chains=(), + dead_code=(), + health=HealthScore(total=health, grade="A", dimensions={}), + ) + + +def _analysis_with_metrics( + *, health: int = 85, function_clones: int = 0 +) -> AnalysisResult: + return AnalysisResult( + func_groups={}, + block_groups={}, + block_groups_report={}, + segment_groups={}, + suppressed_segment_groups=0, + block_group_facts={}, + func_clones_count=function_clones, + block_clones_count=0, + segment_clones_count=0, + files_analyzed_or_cached=1, + project_metrics=_project_metrics(health=health), + metrics_payload=None, + suggestions=(), + segment_groups_raw_digest="", + ) + + +def test_patch_verify_with_project_metrics_quiet() -> None: + """Covers _health_after, _health_delta, and gate_state_from_project_metrics.""" + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness="ci", + analysis=_analysis_with_metrics(health=85), + diff_context=_diff_context(), + baseline_state=cast(Any, _baseline_state()), + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "health=85->85" in printer.text + assert "accepted" in printer.text + + +def test_patch_verify_with_project_metrics_verbose() -> None: + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness="ci", + analysis=_analysis_with_metrics(health=85), + diff_context=_diff_context(), + baseline_state=cast(Any, _baseline_state()), + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "85 -> 85" in printer.text + + +def _diff_context_with_metrics_diff( + *, new_clones: int = 0, health_delta: int = 0 +) -> DiffContext: + return DiffContext( + new_func={f"func-{index}" for index in range(new_clones)}, + new_block=set(), + new_clones_count=new_clones, + metrics_diff=MetricsDiff( + new_high_risk_functions=(), + new_high_coupling_classes=(), + new_cycles=(), + new_dead_code=(), + health_delta=health_delta, + ), + coverage_adoption_diff_available=False, + api_surface_diff_available=False, + ) + + +def test_patch_verify_health_delta_from_metrics_diff() -> None: + """Covers _health_delta with a real MetricsDiff object.""" + printer = _RecordingPrinter() + + exit_code = render_patch_verify( + console=printer, + args=cast(Any, _args()), + strictness="ci", + analysis=_analysis_with_metrics(health=85), + diff_context=_diff_context_with_metrics_diff(health_delta=5), + baseline_state=cast(Any, _baseline_state()), + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "health=80->85" in printer.text + + def test_patch_verify_allows_diff_against_without_changed_only() -> None: cli_workflow.console = cli_workflow._make_plain_console() args = Namespace( diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index fb258fe8..488ba3ae 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -3141,6 +3141,66 @@ def test_normalize_intent_scope_hint_on_invalid_type() -> None: mcp_intent_mod.normalize_intent_scope(["pkg/a.py"]) +def test_normalize_intent_scope_edge_cases() -> None: + """Path normalization: traversal, absolute, leading ./ prefix.""" + # leading ./ stripped + scope = mcp_intent_mod.normalize_intent_scope({"allowed_files": ["./pkg/a.py"]}) + assert scope.allowed_files == ("pkg/a.py",) + + # absolute path rejected + with pytest.raises(ValueError, match="relative"): + mcp_intent_mod.normalize_intent_scope({"allowed_files": ["/abs/path.py"]}) + + # traversal rejected + with pytest.raises(ValueError, match="traversal"): + mcp_intent_mod.normalize_intent_scope({"allowed_files": ["../escape.py"]}) + + # string instead of list rejected for allowed_files + with pytest.raises(ValueError, match="list of relative paths"): + mcp_intent_mod.normalize_intent_scope({"allowed_files": "pkg/a.py"}) + + # optional paths: string instead of list rejected + with pytest.raises(ValueError, match="list of relative paths"): + mcp_intent_mod.normalize_intent_scope( + {"allowed_files": ["pkg/a.py"], "allowed_related": "pkg/b.py"} + ) + + +def test_normalize_expected_effects_rejects_string() -> None: + """expected_effects must be a list, not a bare string.""" + with pytest.raises(ValueError, match="list of strings"): + mcp_intent_mod.normalize_expected_effects("single effect") + + +def test_intent_record_payload_includes_check_result() -> None: + """IntentRecord.to_payload includes check_result when present.""" + check = mcp_intent_mod.IntentCheckResult( + status=mcp_intent_mod.IntentStatus.CLEAN, + declared_scope=("pkg/a.py",), + actual_changed_files=("pkg/a.py",), + unexpected_files=(), + forbidden_touched=(), + required_action=None, + message="clean", + ) + record = mcp_intent_mod.IntentRecord( + intent_id="test-001", + run_id="run1234", + report_digest="abc123", + status=mcp_intent_mod.IntentStatus.CLEAN, + declared_at_utc="2026-01-01T00:00:00Z", + scope=mcp_intent_mod.IntentScope(allowed_files=("pkg/a.py",)), + intent_description="test", + expected_effects=(), + guards=(), + check_result=check, + ) + payload = record.to_payload() + assert "check_result" in payload + check_payload = cast("Mapping[str, object]", payload["check_result"]) + assert check_payload["status"] == "clean" + + def test_claim_guard_detects_space_variant_overclaims() -> None: """Underscore-to-space fallback catches natural-language metric family names.""" payload = mcp_claim_guard_mod.validate_claims( From c6301da396aa79651874b229d1ff9b1fd5f1dcaf Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 25 May 2026 17:50:46 +0500 Subject: [PATCH 026/318] docs: fix admonition indentation across all doc files --- docs/book/20-mcp-interface.md | 32 ++++++++++++------------ docs/book/28-claim-guard.md | 12 ++++----- docs/mcp.md | 46 +++++++++++++++++------------------ 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index 4ac29569..69b120bb 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -9,9 +9,9 @@ pipeline/report contracts as the CLI. It does not create a second analysis engine or a second persistence model. !!! note "Integration surface, not a second analyzer" -MCP composes over the canonical report and run state shared by CLI, HTML, -and SARIF. It never mutates source files, baselines, analysis cache, or -report artifacts. + MCP composes over the canonical report and run state shared by CLI, HTML, + and SARIF. It never mutates source files, baselines, analysis cache, or + report artifacts. --- @@ -58,9 +58,9 @@ Current server characteristics: MCP service contract. !!! warning "Absolute roots and remote exposure" -Analysis tools require an absolute repository root. HTTP exposure beyond -loopback requires explicit `--allow-remote` and has no built-in -authentication. + Analysis tools require an absolute repository root. HTTP exposure beyond + loopback requires explicit `--allow-remote` and has no built-in + authentication. --- @@ -129,18 +129,18 @@ drill into one finding or one hotspot family. | `validate_review_claims` | `text`, `run_id`, `require_citations` | Citation-based overclaim detection against stored run semantics | ??? info "Blast radius: do_not_touch vs review_context" -`do_not_touch` is limited to actionable negative context: baselines, -generated CodeClone state, explicit forbidden paths. Report-only signals -such as security boundary inventory and overloaded-module candidates are -returned as `review_context` — information, not edit prohibitions. Long -context sections include `total`, `shown`, and `truncated` summaries. + `do_not_touch` is limited to actionable negative context: baselines, + generated CodeClone state, explicit forbidden paths. Report-only signals + such as security boundary inventory and overloaded-module candidates are + returned as `review_context` — information, not edit prohibitions. Long + context sections include `total`, `shown`, and `truncated` summaries. ??? info "Patch contract modes" -**Budget** reads one stored run and optional intent. Shows regression -headroom per quality dimension before editing. **Verify** compares -explicit before/after stored runs, previews gates, validates scope, and -reports baseline-abuse signals. Missing runs return -`status="unverified"`. + **Budget** reads one stored run and optional intent. Shows regression + headroom per quality dimension before editing. **Verify** compares + explicit before/after stored runs, previews gates, validates scope, and + reports baseline-abuse signals. Missing runs return + `status="unverified"`. ### Session-local tools diff --git a/docs/book/28-claim-guard.md b/docs/book/28-claim-guard.md index f7cd5b05..05e20b8a 100644 --- a/docs/book/28-claim-guard.md +++ b/docs/book/28-claim-guard.md @@ -56,7 +56,7 @@ The pipeline is fully deterministic: | `require_citations` | `bool` | `true` | Warn when no known finding IDs or metric family names are cited | !!! info "Text limits" -Text must be non-empty and at most `50,000` characters. + Text must be non-empty and at most `50,000` characters. --- @@ -119,11 +119,11 @@ available. Without a comparison run, fix claims cannot be verified. ## Non-goals !!! warning "What claim guard is not" -- Not a vulnerability scanner -- Not a CI gate -- Not an LLM fact checker -- Not proof that uncited text is correct -- Not a replacement for `check_patch_contract` + - Not a vulnerability scanner + - Not a CI gate + - Not an LLM fact checker + - Not proof that uncited text is correct + - Not a replacement for `check_patch_contract` --- diff --git a/docs/mcp.md b/docs/mcp.md index f09c3950..49ae9d1e 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -192,9 +192,9 @@ codeclone-mcp --transport streamable-http --host 127.0.0.1 --port 8000 ``` !!! warning "Remote exposure is opt-in" -Non-loopback hosts require `--allow-remote`. The built-in HTTP server -has no authentication. Use it only on trusted networks or behind an -authenticated reverse proxy. + Non-loopback hosts require `--allow-remote`. The built-in HTTP server + has no authentication. Use it only on trusted networks or behind an + authenticated reverse proxy. ### Run retention @@ -236,8 +236,8 @@ stored runs. | `compare_runs` | Run-to-run delta: regressions, improvements, health change | !!! tip "Start here" -After analysis, call `get_run_summary` or `get_production_triage` first. -Prefer `list_hotspots` or `check_*` before broad `list_findings` calls. + After analysis, call `get_run_summary` or `get_production_triage` first. + Prefer `list_hotspots` or `check_*` before broad `list_findings` calls. ### Phase 3: Drill down @@ -298,7 +298,7 @@ sequenceDiagram M-->>A: after_run_id registered A->>M: check(intent_id, changed_files or diff_ref) - Note right of M: intent stays on before-run; changed scope is explicit + Note right of M: intent stays on before-run, changed scope is explicit M-->>A: clean / expanded / violated A->>M: check_patch_contract(mode=verify, before_run_id, after_run_id, intent_id) @@ -323,16 +323,16 @@ sequenceDiagram | `validate_review_claims` | Citation-based overclaim detection against stored run semantics | ??? info "Blast radius: do_not_touch vs review_context" -`do_not_touch` contains actionable edit prohibitions: baselines, generated -state, forbidden paths. `review_context` contains report-only signals: -security boundary inventory, overloaded-module candidates, known baseline -debt. Review context is information, not an edit ban. + `do_not_touch` contains actionable edit prohibitions: baselines, generated + state, forbidden paths. `review_context` contains report-only signals: + security boundary inventory, overloaded-module candidates, known baseline + debt. Review context is information, not an edit ban. ??? info "Patch contract modes" -**Budget** reads one stored run and optional intent. Shows regression -headroom per quality dimension before editing. **Verify** compares explicit -before/after stored runs, previews gates, validates scope, and reports -baseline-abuse signals. Missing runs return `status=unverified`. + **Budget** reads one stored run and optional intent. Shows regression + headroom per quality dimension before editing. **Verify** compares explicit + before/after stored runs, previews gates, validates scope, and reports + baseline-abuse signals. Missing runs return `status=unverified`. ### Phase 6: Session management @@ -454,15 +454,15 @@ Separate accepted baseline debt from new regressions. ``` !!! tip "Best practices" -- Use `analyze_changed_paths` for PRs, not full analysis. -- Prefer `get_run_summary` or `get_production_triage` as the first pass. -- Prefer `list_hotspots` or narrow `check_*` tools before broad `list_findings`. -- Use `get_finding` / `get_remediation` for one finding instead of raising -`detail_level` on larger lists. -- Pass an absolute `root` — MCP rejects relative roots like `.`. -- Use `coverage_xml` only with `analysis_mode="full"`. -- Use `source_kind="production-only"` to cut test/fixture noise. -- Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions. + - Use `analyze_changed_paths` for PRs, not full analysis. + - Prefer `get_run_summary` or `get_production_triage` as the first pass. + - Prefer `list_hotspots` or narrow `check_*` tools before broad `list_findings`. + - Use `get_finding` / `get_remediation` for one finding instead of raising + `detail_level` on larger lists. + - Pass an absolute `root` — MCP rejects relative roots like `.`. + - Use `coverage_xml` only with `analysis_mode="full"`. + - Use `source_kind="production-only"` to cut test/fixture noise. + - Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions. --- From 3d75354c7db3417e96994aec833331fb2a63dbe3 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 25 May 2026 23:01:36 +0500 Subject: [PATCH 027/318] feat(controller): add session stats and lease-aware intents --- codeclone/config/spec.py | 8 + codeclone/surfaces/cli/session_stats.py | 438 ++++++++ codeclone/surfaces/cli/types.py | 1 + codeclone/surfaces/cli/workflow.py | 27 +- .../surfaces/mcp/_session_intent_mixin.py | 97 +- codeclone/surfaces/mcp/_session_shared.py | 10 +- codeclone/surfaces/mcp/_workspace_intents.py | 57 +- codeclone/surfaces/mcp/server.py | 13 +- codeclone/surfaces/mcp/service.py | 1 + codeclone/surfaces/mcp/session.py | 44 +- codeclone/ui_messages/__init__.py | 4 + .../fixtures/contract_snapshots/cli_help.txt | 4 +- .../contract_snapshots/mcp_tool_schemas.json | 12 + .../public_api_surface.json | 2 +- tests/test_cli_session_stats.py | 852 +++++++++++++++ tests/test_mcp_service.py | 996 +++++++++++++++++- tests/test_workspace_intents.py | 627 +++++++++++ 17 files changed, 3161 insertions(+), 32 deletions(-) create mode 100644 codeclone/surfaces/cli/session_stats.py create mode 100644 tests/test_cli_session_stats.py diff --git a/codeclone/config/spec.py b/codeclone/config/spec.py index 23e74807..95f20f07 100644 --- a/codeclone/config/spec.py +++ b/codeclone/config/spec.py @@ -258,6 +258,14 @@ def _option( metavar="LEVEL", help_text=ui.HELP_STRICTNESS, ), + _option( + dest="session_stats", + group="Analysis", + cli_kind="store_true", + flags=("--session-stats",), + default=False, + help_text=ui.HELP_SESSION_STATS, + ), _option( dest="cache_path", group="Analysis", diff --git a/codeclone/surfaces/cli/session_stats.py b/codeclone/surfaces/cli/session_stats.py new file mode 100644 index 00000000..b109e43d --- /dev/null +++ b/codeclone/surfaces/cli/session_stats.py @@ -0,0 +1,438 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import os +import time +from collections import defaultdict +from collections.abc import Mapping +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING + +from ...contracts import ExitCode +from .types import PrinterLike + +if TYPE_CHECKING: + from ..mcp._workspace_intents import WorkspaceIntentRecord + +_REPORT_PATH_PARTS = (".cache", "codeclone", "report.json") +_MAX_ALLOWED_FILES_SHOWN = 5 + + +@dataclass(frozen=True, slots=True) +class _IntentSnapshot: + intent_id: str + status: str + ownership: str + scope_file_count: int + allowed_files: tuple[str, ...] + declared_at_utc: str + lease_remaining_seconds: int + + +@dataclass(frozen=True, slots=True) +class _AgentSnapshot: + pid: int + start_epoch: int + label: str + alive: bool + intents: tuple[_IntentSnapshot, ...] + + +@dataclass(frozen=True, slots=True) +class _SessionSnapshot: + root: Path + agents: tuple[_AgentSnapshot, ...] + stale_count: int + expired_count: int + recoverable_count: int + latest_run_id: str | None + latest_run_health: int | None + latest_run_findings: int | None + latest_run_files: int | None + latest_run_age_seconds: int | None + cache_present: bool + workspace_health: str + + +def render_session_stats( + *, + console: PrinterLike, + root_path: Path, + quiet: bool, +) -> int: + """Render workspace session status. Returns ExitCode int.""" + try: + snapshot = _collect_session_snapshot(root_path) + except Exception as exc: + console.print(f"CONTRACT ERROR: failed to read session state: {exc}") + return int(ExitCode.CONTRACT_ERROR) + if quiet: + return _render_quiet(console, snapshot) + return _render_verbose(console, snapshot) + + +def _collect_session_snapshot(root_path: Path) -> _SessionSnapshot: + from ...surfaces.mcp._workspace_intents import ( + IntentOwnership, + classify_intent_ownership, + list_workspace_intents, + utc_now, + ) + + now = utc_now() + own_pid = os.getpid() + own_start_epoch = _process_start_epoch() + + try: + records = list_workspace_intents(root=root_path, exclude_stale=False) + except Exception: + records = () + + stale_count = 0 + expired_count = 0 + recoverable_count = 0 + agent_intents: dict[tuple[int, int], list[_IntentSnapshot]] = defaultdict(list) + agent_labels: dict[tuple[int, int], str] = {} + agent_alive: dict[tuple[int, int], bool] = {} + + for record in records: + ownership = classify_intent_ownership( + record, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + now=now, + ) + + if ownership == IntentOwnership.EXPIRED: + expired_count += 1 + continue + if ownership == IntentOwnership.OWN_STALE: + stale_count += 1 + if ownership == IntentOwnership.RECOVERABLE: + recoverable_count += 1 + + lease_remaining = _lease_remaining_seconds(record, now) + scope = record.scope + allowed_files: list[str] = [] + if isinstance(scope, dict): + raw_files = scope.get("allowed_files") + if isinstance(raw_files, list): + allowed_files = [str(f) for f in raw_files] + + agent_key = (record.agent_pid, record.agent_start_epoch) + agent_labels[agent_key] = record.agent_label + if agent_key not in agent_alive: + agent_alive[agent_key] = _is_pid_alive(record.agent_pid) + + agent_intents[agent_key].append( + _IntentSnapshot( + intent_id=record.intent_id, + status=record.status, + ownership=ownership.value, + scope_file_count=len(allowed_files), + allowed_files=tuple(sorted(allowed_files)), + declared_at_utc=record.declared_at_utc, + lease_remaining_seconds=lease_remaining, + ) + ) + + agents: list[_AgentSnapshot] = [] + for agent_key in sorted(agent_intents): + pid, start_epoch = agent_key + agents.append( + _AgentSnapshot( + pid=pid, + start_epoch=start_epoch, + label=agent_labels.get(agent_key, ""), + alive=agent_alive.get(agent_key, False), + intents=tuple(agent_intents[agent_key]), + ) + ) + + ( + latest_run_id, + latest_run_health, + latest_run_findings, + latest_run_files, + latest_run_age_seconds, + cache_present, + ) = _read_cached_report(root_path) + + workspace_health = _classify_workspace_health( + agents=agents, + stale_count=stale_count, + expired_count=expired_count, + ) + + return _SessionSnapshot( + root=root_path, + agents=tuple(agents), + stale_count=stale_count, + expired_count=expired_count, + recoverable_count=recoverable_count, + latest_run_id=latest_run_id, + latest_run_health=latest_run_health, + latest_run_findings=latest_run_findings, + latest_run_files=latest_run_files, + latest_run_age_seconds=latest_run_age_seconds, + cache_present=cache_present, + workspace_health=workspace_health, + ) + + +def _render_quiet(console: PrinterLike, snapshot: _SessionSnapshot) -> int: + live_agents = [a for a in snapshot.agents if a.alive] + total_intents = sum(len(a.intents) for a in snapshot.agents) + parts = [ + f"session-stats: {snapshot.workspace_health}", + "|", + f"agents={len(live_agents)}", + f"intents={total_intents}", + f"stale={snapshot.stale_count}", + f"latest_run={snapshot.latest_run_id or 'none'}", + ] + if snapshot.latest_run_health is not None: + parts.append(f"health={snapshot.latest_run_health}") + console.print(" ".join(parts)) + return int(ExitCode.SUCCESS) + + +def _render_verbose(console: PrinterLike, snapshot: _SessionSnapshot) -> int: + console.print("[bold]╍╍╍ Session Stats ╍╍╍[/bold]") + console.print() + console.print(f" Workspace: {snapshot.root}") + + if snapshot.cache_present and snapshot.latest_run_id: + age_str = _format_age(snapshot.latest_run_age_seconds) + health_part = ( + f", health={snapshot.latest_run_health}" + if snapshot.latest_run_health is not None + else "" + ) + findings_part = ( + f", findings={snapshot.latest_run_findings}" + if snapshot.latest_run_findings is not None + else "" + ) + console.print( + f" Latest run: {snapshot.latest_run_id}" + f" ({age_str}{health_part}{findings_part})" + ) + if snapshot.latest_run_files is not None: + console.print( + f" Cache: report.json present" + f" ({snapshot.latest_run_files} files)" + ) + else: + console.print(" Latest run: none") + + console.print() + live_agents = [a for a in snapshot.agents if a.alive] + console.print(f" Active agents: {len(live_agents)}") + + for agent in live_agents: + label = agent.label or "unknown" + started_ago = _format_age(int(time.time()) - agent.start_epoch) + console.print(f" PID {agent.pid} ({label}) — started {started_ago}") + for intent in agent.intents: + file_count_label = f"{intent.scope_file_count} file" + ( + "s" if intent.scope_file_count != 1 else "" + ) + console.print( + f" {intent.intent_id} {intent.status} scope: {file_count_label}" + ) + shown_files = intent.allowed_files[:_MAX_ALLOWED_FILES_SHOWN] + if shown_files: + files_str = ", ".join(shown_files) + if len(intent.allowed_files) > _MAX_ALLOWED_FILES_SHOWN: + remaining = len(intent.allowed_files) - _MAX_ALLOWED_FILES_SHOWN + files_str += f" ... and {remaining} more" + console.print(f" allowed: {files_str}") + lease_str = _format_duration(intent.lease_remaining_seconds) + console.print(f" lease: {lease_str} remaining") + + console.print() + console.print(f" Stale intents: {snapshot.stale_count}") + console.print(f" Expired intents: {snapshot.expired_count}") + console.print(f" Recoverable: {snapshot.recoverable_count}") + console.print() + console.print(f" Workspace health: {snapshot.workspace_health}") + return int(ExitCode.SUCCESS) + + +def _classify_workspace_health( + *, + agents: list[_AgentSnapshot] | tuple[_AgentSnapshot, ...], + stale_count: int, + expired_count: int, +) -> str: + live_agents = [a for a in agents if a.alive] + if not live_agents: + return "idle" + + active_intent_agents = [ + agent + for agent in live_agents + if any(intent.status == "active" for intent in agent.intents) + ] + + if not active_intent_agents: + return "clean" + + if len(active_intent_agents) >= 2 and _has_scope_overlap(active_intent_agents): + return "contested" + + return "active" + + +def _has_scope_overlap(agents: list[_AgentSnapshot]) -> bool: + all_files: list[set[str]] = [] + for agent in agents: + agent_files: set[str] = set() + for intent in agent.intents: + if intent.status == "active": + agent_files.update(intent.allowed_files) + if agent_files: + all_files.append(agent_files) + + for i in range(len(all_files)): + for j in range(i + 1, len(all_files)): + if all_files[i] & all_files[j]: + return True + return False + + +def _read_cached_report( + root_path: Path, +) -> tuple[str | None, int | None, int | None, int | None, int | None, bool]: + report_path = root_path.joinpath(*_REPORT_PATH_PARTS) + if not report_path.is_file(): + return None, None, None, None, None, False + try: + with open(report_path, "rb") as fh: + data = json.load(fh) + except Exception: + return None, None, None, None, None, False + + run_id: str | None = None + health: int | None = None + findings: int | None = None + files: int | None = None + age_seconds: int | None = None + + data_mapping = data if isinstance(data, dict) else {} + digest_value = _string_field( + _mapping_at(data_mapping, ("integrity", "digest")), "value" + ) + if digest_value is not None and len(digest_value) >= 8: + run_id = digest_value[:8] + + files = _list_field_len( + _mapping_at(data_mapping, ("inventory", "file_registry")), + "items", + ) + if _mapping_at(data_mapping, ("metrics", "families")) is not None: + health = _int_field(_mapping_at(data_mapping, ("health",)), "score") + findings = _int_field(_mapping_at(data_mapping, ("findings",)), "total") + + try: + mtime = report_path.stat().st_mtime + age_seconds = max(0, int(time.time() - mtime)) + except OSError: + pass + + return run_id, health, findings, files, age_seconds, True + + +def _mapping_at( + payload: Mapping[str, object], + keys: tuple[str, ...], +) -> Mapping[str, object] | None: + current: object = payload + for key in keys: + if not isinstance(current, dict): + return None + current = current.get(key) + return current if isinstance(current, dict) else None + + +def _string_field(payload: Mapping[str, object] | None, key: str) -> str | None: + if payload is None: + return None + value = payload.get(key) + return value if isinstance(value, str) else None + + +def _int_field(payload: Mapping[str, object] | None, key: str) -> int | None: + if payload is None: + return None + value = payload.get(key) + return value if isinstance(value, int) else None + + +def _list_field_len(payload: Mapping[str, object] | None, key: str) -> int | None: + if payload is None: + return None + value = payload.get(key) + return len(value) if isinstance(value, list) else None + + +def _lease_remaining_seconds(record: WorkspaceIntentRecord, now: datetime) -> int: + from ...surfaces.mcp._workspace_intents import _lease_expiry + + expiry = _lease_expiry(record) + if expiry is None: + return 0 + delta = (expiry - now).total_seconds() + return max(0, int(delta)) + + +def _is_pid_alive(pid: int) -> bool: + if pid <= 0: + return False + try: + os.kill(pid, 0) + except ProcessLookupError: + return False + except PermissionError: + return True + return True + + +def _process_start_epoch() -> int: + return int(time.time()) + + +def _format_age(seconds: int | None) -> str: + if seconds is None or seconds < 0: + return "unknown" + if seconds < 60: + return f"{seconds}s ago" + minutes = seconds // 60 + if minutes < 60: + return f"{minutes}m ago" + hours = minutes // 60 + remaining_minutes = minutes % 60 + if remaining_minutes: + return f"{hours}h{remaining_minutes}m ago" + return f"{hours}h ago" + + +def _format_duration(seconds: int) -> str: + if seconds <= 0: + return "expired" + if seconds < 60: + return f"{seconds}s" + minutes = seconds // 60 + remaining_seconds = seconds % 60 + if remaining_seconds: + return f"{minutes}m{remaining_seconds}s" + return f"{minutes}m" diff --git a/codeclone/surfaces/cli/types.py b/codeclone/surfaces/cli/types.py index 2a913650..37842904 100644 --- a/codeclone/surfaces/cli/types.py +++ b/codeclone/surfaces/cli/types.py @@ -81,6 +81,7 @@ class CLIArgsLike(Protocol): blast_radius: tuple[str, ...] | list[str] | None patch_verify: bool strictness: str + session_stats: bool skip_metrics: bool skip_dead_code: bool skip_dependencies: bool diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index 42b7e758..36d4320a 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -169,7 +169,11 @@ def _make_console(*, no_color: bool) -> object: def _controller_query_mode(args: object) -> bool: - return bool_attr(args, "blast_radius") or bool_attr(args, "patch_verify") + return ( + bool_attr(args, "blast_radius") + or bool_attr(args, "patch_verify") + or bool_attr(args, "session_stats") + ) def _validate_controller_query_flags( @@ -195,12 +199,21 @@ def _validate_controller_query_flags( ui.fmt_contract_error("--strictness is only valid with --patch-verify.") ) sys.exit(ExitCode.CONTRACT_ERROR) + session_stats = bool_attr(args, "session_stats") + if session_stats and (blast_radius or patch_verify): + printer.print( + ui.fmt_contract_error( + "--session-stats cannot be combined with " + "--blast-radius or --patch-verify." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) if blast_radius and patch_verify: printer.print( ui.fmt_contract_error("Use --blast-radius or --patch-verify, not both.") ) sys.exit(ExitCode.CONTRACT_ERROR) - if not (blast_radius or patch_verify): + if not (blast_radius or patch_verify or session_stats): return if bool_attr(args, "update_baseline") or bool_attr(args, "update_metrics_baseline"): printer.print( @@ -391,6 +404,16 @@ def _main_impl() -> None: args=args, strictness_explicit=strictness_explicit, ) + if bool_attr(args, "session_stats"): + from .session_stats import render_session_stats + + sys.exit( + render_session_stats( + console=_console(), + root_path=root_path, + quiet=args.quiet, + ) + ) git_diff_ref = _validate_changed_scope_args(args=args) changed_paths = ( _git_diff_changed_paths(root_path=root_path, git_diff_ref=git_diff_ref) diff --git a/codeclone/surfaces/mcp/_session_intent_mixin.py b/codeclone/surfaces/mcp/_session_intent_mixin.py index f3107445..9168e01a 100644 --- a/codeclone/surfaces/mcp/_session_intent_mixin.py +++ b/codeclone/surfaces/mcp/_session_intent_mixin.py @@ -32,6 +32,9 @@ MCPServiceContractError, ) from ._workspace_intents import ( + DEFAULT_LEASE_SECONDS, + MAX_LEASE_SECONDS, + MIN_LEASE_SECONDS, IntentOwnership, WorkspaceIntentRecord, WorkspaceIntentStatus, @@ -108,6 +111,7 @@ def manage_change_intent( changed_files: Sequence[str] | None = None, root: str | None = None, ttl_seconds: int | None = None, + lease_seconds: int | None = None, ) -> dict[str, object]: match action: case "declare": @@ -136,6 +140,11 @@ def manage_change_intent( ) case "clear": return self._clear_change_intent(intent_id=intent_id) + case "renew": + return self._renew_change_intent( + intent_id=intent_id, + lease_seconds=lease_seconds, + ) case "list_workspace": return self._list_workspace_intents(root=root) case "gc_workspace": @@ -156,7 +165,8 @@ def manage_change_intent( raise MCPServiceContractError( "Invalid value for action: " f"{action!r}. Expected one of: check, clear, declare, " - "gc_workspace, get, list_workspace, recover, reset_workspace." + "gc_workspace, get, list_workspace, recover, renew, " + "reset_workspace." ) def _declare_change_intent( @@ -464,6 +474,60 @@ def _renew_lease_for_run(self, *, record: MCPRunRecord) -> None: for intent in intents: self._renew_lease_if_active(record=record, intent=intent) + def _renew_change_intent( + self, + *, + intent_id: str | None, + lease_seconds: int | None, + ) -> dict[str, object]: + if intent_id is None: + with self._state_lock: + all_intents = list(self._active_intents.values()) + if not all_intents: + raise MCPServiceContractError( + "action='renew' requires intent_id or an active intent." + ) + active_intent = all_intents[-1] + intent_id = active_intent.intent_id + record, active_intent = self._resolve_intent( + run_id=None, + intent_id=intent_id, + ) + renewed = renew_workspace_intent_lease( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=active_intent.intent_id, + lease_seconds=lease_seconds, + ) + latest = ( + find_workspace_intent(root=record.root, intent_id=active_intent.intent_id) + if renewed + else None + ) + latest_record = latest[1] if latest is not None else None + effective_lease = ( + latest_record.lease_seconds + if latest_record is not None + else resolved_lease_seconds(lease_seconds) + ) + return { + "intent_id": active_intent.intent_id, + "status": active_intent.status.value, + "lease_renewed": renewed, + "lease_seconds": effective_lease, + "lease_expires_at_utc": ( + self._lease_expired_at_utc(latest_record) + if latest_record is not None + else None + ), + "lease_policy": { + "min_seconds": MIN_LEASE_SECONDS, + "default_seconds": DEFAULT_LEASE_SECONDS, + "max_seconds": MAX_LEASE_SECONDS, + }, + } + def _list_workspace_intents(self, *, root: str | None) -> dict[str, object]: root_path = self._resolve_workspace_root(root) counts = workspace_status_counts(root=root_path) @@ -797,22 +861,31 @@ def _reset_workspace_intent( "action_taken": "removed" if removed else "failed", "reason": reason, } - if ownership == IntentOwnership.FOREIGN_ACTIVE: + if ownership in {IntentOwnership.FOREIGN_ACTIVE, IntentOwnership.FOREIGN_STALE}: + hint = ( + ( + "This intent belongs to a live process with a valid lease. " + "Do NOT kill the process. Ask the user to confirm whether " + "this is an abandoned session or a parallel agent." + ) + if ownership == IntentOwnership.FOREIGN_ACTIVE + else ( + "This intent belongs to a live process whose lease has expired. " + "The owner may still be working. Coordinate with the user " + "before resetting." + ) + ) return { "intent_id": workspace_record.intent_id, "action_taken": "rejected", - "reason": "foreign_active", + "reason": ownership.value, "ownership": ownership.value, "agent_pid": workspace_record.agent_pid, "agent_start_epoch": workspace_record.agent_start_epoch, "agent_label": workspace_record.agent_label, - "escalation_hint": ( - "This intent belongs to a live process with a valid lease. " - "Do NOT kill the process. Ask the user to confirm whether " - "this is an abandoned session or a parallel agent." - ), + "escalation_hint": hint, "message": ( - "Intent has a valid lease from a live process. Coordinate " + "Intent belongs to a live process. Coordinate " "with the owning agent or user before resetting it." ), } @@ -902,6 +975,12 @@ def _recovery_rejection_message(self, ownership: IntentOwnership) -> str: "Use action='list_workspace' to inspect, then coordinate with " "the user." ) + if ownership == IntentOwnership.FOREIGN_STALE: + return ( + "Intent belongs to a live process with an expired lease. " + "The owner may still be working. Coordinate with the user " + "before recovering." + ) if ownership == IntentOwnership.EXPIRED: return "Intent has expired (TTL). Declare a new intent instead." if ownership == IntentOwnership.OWN_ACTIVE: diff --git a/codeclone/surfaces/mcp/_session_shared.py b/codeclone/surfaces/mcp/_session_shared.py index 3df54269..6f8b8ff8 100644 --- a/codeclone/surfaces/mcp/_session_shared.py +++ b/codeclone/surfaces/mcp/_session_shared.py @@ -749,8 +749,8 @@ class MCPHelpTopicSpec: ), ( "Recover ownership only when list_workspace marks an intent " - "recoverable and the matching run is available; do not kill " - "foreign active MCP processes." + "recoverable and the matching run is available; live foreign " + "active or stale intents require coordination." ), ( "Run analyze_repository, then declare intent with allowed_files, " @@ -760,6 +760,10 @@ class MCPHelpTopicSpec: "Use get_blast_radius and check_patch_contract(mode='budget') " "as the pre-edit boundary." ), + ( + "Use manage_change_intent(action='renew') before long edits, " + "test runs, or other blind windows between MCP calls." + ), ( "Hard overlaps mean two agents claimed the same primary file; " "soft overlaps mean primary files overlap related context." @@ -771,7 +775,7 @@ class MCPHelpTopicSpec: ), ( "Use reset_workspace for interrupted own, expired, or " - "recoverable registry records; foreign active intents require " + "recoverable registry records; foreign live intents require " "coordination." ), ), diff --git a/codeclone/surfaces/mcp/_workspace_intents.py b/codeclone/surfaces/mcp/_workspace_intents.py index 0a00c8b3..18d6811b 100644 --- a/codeclone/surfaces/mcp/_workspace_intents.py +++ b/codeclone/surfaces/mcp/_workspace_intents.py @@ -27,7 +27,7 @@ MAX_TTL_SECONDS: Final = 86400 DEFAULT_LEASE_SECONDS: Final = 300 MIN_LEASE_SECONDS: Final = 60 -MAX_LEASE_SECONDS: Final = 3600 +MAX_LEASE_SECONDS: Final = 600 _HEX_DIGEST_LENGTH: Final = 64 @@ -43,8 +43,9 @@ class WorkspaceIntentStatus(str, Enum): class IntentOwnership(str, Enum): OWN_ACTIVE = "own_active" OWN_STALE = "own_stale" - RECOVERABLE = "recoverable" FOREIGN_ACTIVE = "foreign_active" + FOREIGN_STALE = "foreign_stale" + RECOVERABLE = "recoverable" EXPIRED = "expired" @@ -123,6 +124,12 @@ def to_payload( "Do NOT kill the process. Ask the user to confirm whether " "this is an abandoned session or a parallel agent." ) + elif ownership == IntentOwnership.FOREIGN_STALE: + payload["escalation_hint"] = ( + "This intent belongs to a live process whose lease has expired. " + "The owner may still be working (context overflow, long edit, " + "test run). Coordinate with the user before proceeding." + ) return payload @@ -142,11 +149,13 @@ def classify_intent_ownership( lease_valid = lease_expiry is not None and lease_expiry > now if is_own: return IntentOwnership.OWN_ACTIVE if lease_valid else IntentOwnership.OWN_STALE - if not lease_valid: - return IntentOwnership.RECOVERABLE - if not _is_pid_alive(record.agent_pid): - return IntentOwnership.RECOVERABLE - return IntentOwnership.FOREIGN_ACTIVE + if _is_pid_alive(record.agent_pid): + return ( + IntentOwnership.FOREIGN_ACTIVE + if lease_valid + else IntentOwnership.FOREIGN_STALE + ) + return IntentOwnership.RECOVERABLE def _lease_expiry(record: WorkspaceIntentRecord) -> datetime | None: @@ -425,6 +434,7 @@ def renew_workspace_intent_lease( pid: int, start_epoch: int, intent_id: str, + lease_seconds: int | None = None, ) -> bool: found = find_workspace_intent(root=root, intent_id=intent_id) if found is None: @@ -436,7 +446,14 @@ def renew_workspace_intent_lease( expires = _parse_utc(record.expires_at_utc) if expires is None or expires <= now: return False - updated = replace(record, lease_renewed_at_utc=format_utc(now)) + new_lease = ( + resolved_lease_seconds(lease_seconds) + if lease_seconds is not None + else record.lease_seconds + ) + updated = replace( + record, lease_renewed_at_utc=format_utc(now), lease_seconds=new_lease + ) try: write_json_document_atomically( path=path, @@ -516,6 +533,24 @@ def workspace_status_counts(*, root: Path) -> dict[str, int]: } +_CONFLICT_OWNERSHIP: frozenset[IntentOwnership] = frozenset( + { + IntentOwnership.FOREIGN_ACTIVE, + IntentOwnership.FOREIGN_STALE, + } +) + +_CONFLICT_SEVERITY: dict[IntentOwnership, str] = { + IntentOwnership.FOREIGN_ACTIVE: "active", + IntentOwnership.FOREIGN_STALE: "stale", +} + +_CONFLICT_ACTION: dict[IntentOwnership, str] = { + IntentOwnership.FOREIGN_ACTIVE: "stop_and_coordinate", + IntentOwnership.FOREIGN_STALE: "coordinate_or_recover", +} + + def detect_conflicts( *, new_scope: Mapping[str, object], @@ -533,7 +568,7 @@ def detect_conflicts( own_start_epoch=own_start_epoch, now=now, ) - if ownership != IntentOwnership.FOREIGN_ACTIVE: + if ownership not in _CONFLICT_OWNERSHIP: continue existing_allowed, existing_related = _scope_file_sets(record.scope) hard_overlap = tuple(sorted(new_allowed.intersection(existing_allowed))) @@ -552,6 +587,9 @@ def detect_conflicts( "agent_start_epoch": record.agent_start_epoch, "agent_label": record.agent_label, "intent": record.intent, + "ownership": ownership.value, + "severity": _CONFLICT_SEVERITY[ownership], + "recommended_action": _CONFLICT_ACTION[ownership], "overlap_type": _overlap_type( hard=bool(hard_overlap), soft=bool(soft_overlap), @@ -565,6 +603,7 @@ def detect_conflicts( return sorted( conflicts, key=lambda item: ( + str(item["severity"]), str(item["overlap_type"]), str(item["agent_label"]), _sort_agent_pid(item.get("agent_pid")), diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 32536ffd..509158b7 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -48,7 +48,8 @@ "absolute repository root to analysis tools. For file edits, call " "manage_change_intent(action='list_workspace', root=...) before analysis, " "then analyze, declare intent, inspect blast radius and patch budget, edit " - "within scope, re-analyze, verify, validate review claims, and clear intent. " + "within scope, renew intent lease before long blind windows, re-analyze, " + "verify, validate review claims, and clear intent. " "If concurrent intents overlap, narrow scope or coordinate. This server never " "updates baselines and never mutates source files, analysis cache, or reports; " "it may write ephemeral workspace coordination state under " @@ -169,6 +170,9 @@ async def _lifespan(_app: FastMCP) -> AsyncIterator[dict[str, object]]: ) # FastMCP otherwise reports the `mcp` package version in initialize/serverInfo. mcp._mcp_server.version = __version__ + # Inject FastMCP reference so the service can lazily resolve the MCP + # clientInfo (name/version) for workspace intent agent_label fields. + service._fastmcp = mcp def tool(*args: object, **kwargs: object) -> Callable[[MCPCallable], MCPCallable]: decorator = mcp.tool(*args, **kwargs) # type: ignore[arg-type] @@ -915,8 +919,9 @@ def list_reviewed_findings(run_id: str | None = None) -> dict[str, object]: "to inspect concurrent workspace intents, 'declare' to declare " "intended scope before editing, 'get' to retrieve active intent, " "'check' to verify actual diff against declared scope, 'clear' to " - "remove intent, 'gc_workspace' to clean stale registry files, " - "'recover' to explicitly reclaim a stale leased intent, and " + "remove intent, 'renew' to refresh the active lease before long " + "edits or test runs, 'gc_workspace' to clean stale registry files, " + "'recover' to explicitly reclaim a recoverable intent, and " "'reset_workspace' for interrupted-session recovery. In-memory " "intent state remains session-local; workspace coordination state " "is ephemeral under .cache/codeclone/intents/." @@ -935,6 +940,7 @@ def manage_change_intent( changed_files: list[str] | None = None, root: str | None = None, ttl_seconds: int | None = None, + lease_seconds: int | None = None, ) -> dict[str, object]: return service.manage_change_intent( action=action, @@ -947,6 +953,7 @@ def manage_change_intent( changed_files=changed_files, root=root, ttl_seconds=ttl_seconds, + lease_seconds=lease_seconds, ) @tool( diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index 266de2d0..8a23f942 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -301,6 +301,7 @@ def _apply_public_method_signatures() -> None: _kwonly("changed_files", "Sequence[str] | None", None), _kwonly("root", "str | None", None), _kwonly("ttl_seconds", "int | None", None), + _kwonly("lease_seconds", "int | None", None), ), "get_remediation": ( _kwonly("finding_id", "str"), diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index e553079c..c1c858f4 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -95,7 +95,49 @@ def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._intent_sequence = 0 self._agent_pid = os.getpid() self._agent_start_epoch = int(time.time()) - self._agent_label = os.environ.get("CODECLONE_AGENT_LABEL", "") + self._agent_label_cache: str | None = None + self._fastmcp: object | None = None + + # ------------------------------------------------------------------ + # Agent label: lazy-resolved from MCP clientInfo on first access + # ------------------------------------------------------------------ + + @property + def _agent_label(self) -> str: + if self._agent_label_cache is None: + self._agent_label_cache = self._resolve_agent_label() + return self._agent_label_cache + + @_agent_label.setter + def _agent_label(self, value: str) -> None: + self._agent_label_cache = value + + def _resolve_agent_label(self) -> str: + """Build a human-readable agent label from MCP client metadata. + + Resolution order: + 1. MCP ``clientInfo`` from the protocol ``initialize`` handshake + (available after the first tool call) → ``"name/version"``. + 2. Fallback → ``"pid-"``. + """ + try: + get_context = getattr(self._fastmcp, "get_context", None) + if not callable(get_context): + return f"pid-{self._agent_pid}" + ctx = get_context() + session = getattr(ctx, "session", None) + params = getattr(session, "client_params", None) + info = getattr(params, "clientInfo", None) + name = getattr(info, "name", None) + if not isinstance(name, str) or not name: + return f"pid-{self._agent_pid}" + version = getattr(info, "version", None) + if isinstance(version, str) and version: + return f"{name}/{version}" + return name + except Exception: + pass + return f"pid-{self._agent_pid}" def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: self._validate_analysis_request(request) diff --git a/codeclone/ui_messages/__init__.py b/codeclone/ui_messages/__init__.py index 27ad98e8..8b23ac44 100644 --- a/codeclone/ui_messages/__init__.py +++ b/codeclone/ui_messages/__init__.py @@ -80,6 +80,10 @@ HELP_STRICTNESS = ( "Strictness profile for --patch-verify: ci, strict, or relaxed.\nDefault: ci." ) +HELP_SESSION_STATS = ( + "Show workspace session status: active agents, intents, lease health.\n" + "Read-only, does not run analysis." +) HELP_CACHE_PATH = ( "Path to the cache file.\n" "If FILE is omitted, uses /.cache/codeclone/cache.json." diff --git a/tests/fixtures/contract_snapshots/cli_help.txt b/tests/fixtures/contract_snapshots/cli_help.txt index 1558a043..a1106cca 100644 --- a/tests/fixtures/contract_snapshots/cli_help.txt +++ b/tests/fixtures/contract_snapshots/cli_help.txt @@ -2,7 +2,7 @@ usage: codeclone [--min-loc MIN_LOC] [--min-stmt MIN_STMT] [--processes PROCESSES] [--changed-only | --no-changed-only] [--diff-against GIT_REF] [--paths-from-git-diff GIT_REF] [--blast-radius FILE [FILE ...]] [--patch-verify] - [--strictness LEVEL] [--cache-path [FILE]] + [--strictness LEVEL] [--session-stats] [--cache-path [FILE]] [--cache-dir [FILE]] [--max-cache-size-mb MB] [--baseline [FILE]] [--max-baseline-size-mb MB] [--update-baseline | --no-update-baseline] @@ -65,6 +65,8 @@ Analysis: Runs analysis, checks baseline regressions and gate status, then exits. --strictness LEVEL Strictness profile for --patch-verify: ci, strict, or relaxed. Default: ci. + --session-stats Show workspace session status: active agents, intents, lease health. + Read-only, does not run analysis. --cache-path [FILE] Path to the cache file. If FILE is omitted, uses /.cache/codeclone/cache.json. --cache-dir [FILE] Legacy alias for --cache-path. diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index abb04c87..5891a9fc 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -1815,6 +1815,18 @@ ], "default": null, "title": "Ttl Seconds" + }, + "lease_seconds": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Lease Seconds" } }, "required": [ diff --git a/tests/fixtures/contract_snapshots/public_api_surface.json b/tests/fixtures/contract_snapshots/public_api_surface.json index e880bcd9..81877b88 100644 --- a/tests/fixtures/contract_snapshots/public_api_surface.json +++ b/tests/fixtures/contract_snapshots/public_api_surface.json @@ -101,7 +101,7 @@ }, { "name": "manage_change_intent", - "signature": "(self, *, action: 'str', run_id: 'str | None' = None, intent_id: 'str | None' = None, scope: 'dict[str, object] | None' = None, intent: 'str | None' = None, expected_effects: 'Sequence[str] | None' = None, diff_ref: 'str | None' = None, changed_files: 'Sequence[str] | None' = None, root: 'str | None' = None, ttl_seconds: 'int | None' = None) -> 'dict[str, object]'" + "signature": "(self, *, action: 'str', run_id: 'str | None' = None, intent_id: 'str | None' = None, scope: 'dict[str, object] | None' = None, intent: 'str | None' = None, expected_effects: 'Sequence[str] | None' = None, diff_ref: 'str | None' = None, changed_files: 'Sequence[str] | None' = None, root: 'str | None' = None, ttl_seconds: 'int | None' = None, lease_seconds: 'int | None' = None) -> 'dict[str, object]'" }, { "name": "mark_finding_reviewed", diff --git a/tests/test_cli_session_stats.py b/tests/test_cli_session_stats.py new file mode 100644 index 00000000..e32eb003 --- /dev/null +++ b/tests/test_cli_session_stats.py @@ -0,0 +1,852 @@ +from __future__ import annotations + +import json +import os +import time +from datetime import datetime, timezone +from pathlib import Path +from unittest.mock import patch + +import pytest + +import codeclone.surfaces.cli.session_stats as session_stats_mod +from codeclone.contracts import ExitCode +from codeclone.surfaces.cli.session_stats import ( + _AgentSnapshot, + _classify_workspace_health, + _format_age, + _format_duration, + _IntentSnapshot, + _is_pid_alive, + _lease_remaining_seconds, + _read_cached_report, + render_session_stats, +) +from codeclone.surfaces.mcp._workspace_intents import ( + MIN_LEASE_SECONDS, + WorkspaceIntentRecord, + compute_scope_digest, + expires_at, + format_utc, + write_workspace_intent, +) + + +class _RecordingPrinter: + def __init__(self) -> None: + self.lines: list[str] = [] + + def print(self, *objects: object, **kwargs: object) -> None: + self.lines.append(" ".join(str(item) for item in objects)) + + @property + def text(self) -> str: + return "\n".join(self.lines) + + +def _write_intent_file( + intents_dir: Path, + *, + intent_id: str = "intent-aabb0011-001", + pid: int = 99999, + start_epoch: int | None = None, + status: str = "active", + label: str = "test-agent", + allowed_files: list[str] | None = None, + ttl_seconds: int = 3600, + lease_seconds: int = 300, +) -> Path: + """Write a synthetic workspace intent JSON file.""" + now_epoch = start_epoch or int(time.time()) + now_utc = datetime.fromtimestamp(now_epoch, tz=timezone.utc) + declared = format_utc(now_utc) + scope_files = allowed_files or ["src/a.py"] + scope: dict[str, object] = { + "allowed_files": scope_files, + "allowed_related": [], + "forbidden": [], + } + root = intents_dir.parent.parent.parent + record = WorkspaceIntentRecord( + intent_id=intent_id, + agent_pid=pid, + agent_start_epoch=now_epoch, + agent_label=label, + run_id="a" * 64, + declared_at_utc=declared, + expires_at_utc=expires_at(declared_at=now_utc, ttl_seconds=ttl_seconds), + ttl_seconds=ttl_seconds, + status=status, + intent="test intent", + scope=scope, + scope_digest=compute_scope_digest(scope), + blast_radius_summary={}, + lease_renewed_at_utc=declared, + lease_seconds=lease_seconds, + report_digest="a" * 64, + ) + assert write_workspace_intent(root=root, record=record) + return intents_dir / f"{pid}-{now_epoch}-{intent_id}.json" + + +def _write_report(root: Path, *, health: int = 90, files: int = 10) -> Path: + """Write a synthetic report.json.""" + report = { + "integrity": {"digest": {"value": "abcdef01" + "0" * 56}}, + "inventory": {"file_registry": {"items": [f"f{i}.py" for i in range(files)]}}, + "metrics": {"families": {}}, + "health": {"score": health, "grade": "A"}, + "findings": {"total": 0}, + } + report_dir = root / ".cache" / "codeclone" + report_dir.mkdir(parents=True, exist_ok=True) + report_path = report_dir / "report.json" + report_path.write_text(json.dumps(report)) + return report_path + + +def _write_report_payload(root: Path, payload: object) -> Path: + report_dir = root / ".cache" / "codeclone" + report_dir.mkdir(parents=True, exist_ok=True) + report_path = report_dir / "report.json" + report_path.write_text(json.dumps(payload)) + return report_path + + +def _render_session_stats_text(root: Path, *, quiet: bool) -> str: + printer = _RecordingPrinter() + exit_code = render_session_stats( + console=printer, + root_path=root, + quiet=quiet, + ) + assert exit_code == int(ExitCode.SUCCESS) + return printer.text + + +# ── Quiet mode tests ── + + +def test_session_stats_idle_quiet(tmp_path: Path) -> None: + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "session-stats: idle" in printer.text + assert "agents=0" in printer.text + assert "intents=0" in printer.text + assert "latest_run=none" in printer.text + + +def test_session_stats_active_quiet(tmp_path: Path) -> None: + intents_dir = tmp_path / ".cache" / "codeclone" / "intents" + intents_dir.mkdir(parents=True) + current_pid = os.getpid() + _write_intent_file( + intents_dir, + pid=current_pid, + start_epoch=int(time.time()), + status="active", + ) + text = _render_session_stats_text(tmp_path, quiet=True) + + assert "session-stats: active" in text + assert "agents=1" in text + assert "intents=1" in text + + +def test_session_stats_with_cached_report(tmp_path: Path) -> None: + _write_report(tmp_path, health=85, files=42) + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "latest_run=abcdef01" in printer.text + assert "health=85" in printer.text + + +def test_session_stats_stale_quiet(tmp_path: Path) -> None: + intents_dir = tmp_path / ".cache" / "codeclone" / "intents" + intents_dir.mkdir(parents=True) + _write_intent_file( + intents_dir, + pid=2, + start_epoch=1000000, + status="active", + lease_seconds=1, + ) + printer = _RecordingPrinter() + + with patch( + "codeclone.surfaces.mcp._workspace_intents._is_pid_alive", + return_value=False, + ): + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "stale=" in printer.text + + +# ── Verbose mode tests ── + + +def test_session_stats_idle_verbose(tmp_path: Path) -> None: + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = printer.text + assert "Session Stats" in text + assert "Active agents: 0" in text + assert "Workspace health: idle" in text + + +def test_session_stats_active_verbose(tmp_path: Path) -> None: + intents_dir = tmp_path / ".cache" / "codeclone" / "intents" + intents_dir.mkdir(parents=True) + current_pid = os.getpid() + _write_intent_file( + intents_dir, + pid=current_pid, + start_epoch=int(time.time()), + status="active", + allowed_files=["src/a.py", "src/b.py"], + ) + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = printer.text + assert "Active agents: 1" in text + assert f"PID {current_pid}" in text + assert "src/a.py" in text + assert "Workspace health: active" in text + + +def test_session_stats_verbose_with_report(tmp_path: Path) -> None: + _write_report(tmp_path, health=92, files=100) + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = printer.text + assert "abcdef01" in text + assert "health=92" in text + assert "100 files" in text + + +def test_session_stats_verbose_with_report_without_file_count( + tmp_path: Path, +) -> None: + report_dir = tmp_path / ".cache" / "codeclone" + report_dir.mkdir(parents=True) + (report_dir / "report.json").write_text( + json.dumps( + { + "integrity": {"digest": {"value": "12345678" + "0" * 56}}, + "metrics": {"families": {}}, + "health": {"score": 77}, + "findings": {"total": 3}, + } + ) + ) + text = _render_session_stats_text(tmp_path, quiet=False) + + assert "12345678" in text + assert "findings=3" in text + assert "Cache:" not in text + + +def test_session_stats_verbose_truncates_allowed_files(tmp_path: Path) -> None: + intents_dir = tmp_path / ".cache" / "codeclone" / "intents" + intents_dir.mkdir(parents=True) + _write_intent_file( + intents_dir, + pid=os.getpid(), + start_epoch=int(time.time()), + allowed_files=[f"src/{index}.py" for index in range(7)], + ) + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "and 2 more" in printer.text + + +def test_session_stats_verbose_handles_empty_allowed_files(tmp_path: Path) -> None: + printer = _RecordingPrinter() + snapshot = session_stats_mod._SessionSnapshot( + root=tmp_path, + agents=( + _AgentSnapshot( + pid=123, + start_epoch=int(time.time()), + label="agent", + alive=True, + intents=( + _IntentSnapshot( + intent_id="intent-empty-files", + status="active", + ownership="foreign_active", + scope_file_count=0, + allowed_files=(), + declared_at_utc="", + lease_remaining_seconds=0, + ), + ), + ), + ), + stale_count=0, + expired_count=0, + recoverable_count=0, + latest_run_id=None, + latest_run_health=None, + latest_run_findings=None, + latest_run_files=None, + latest_run_age_seconds=None, + cache_present=False, + workspace_health="active", + ) + + exit_code = session_stats_mod._render_verbose(printer, snapshot) + + assert exit_code == int(ExitCode.SUCCESS) + assert "scope: 0 files" in printer.text + assert "lease: expired remaining" in printer.text + assert "allowed:" not in printer.text + + +# ── Edge cases ── + + +def test_session_stats_no_cache_dir(tmp_path: Path) -> None: + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "idle" in printer.text + + +def test_session_stats_corrupt_intent_file(tmp_path: Path) -> None: + intents_dir = tmp_path / ".cache" / "codeclone" / "intents" + intents_dir.mkdir(parents=True) + (intents_dir / "999-999-intent-bad.json").write_text("{corrupt json!!!") + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "idle" in printer.text + + +def test_session_stats_corrupt_report(tmp_path: Path) -> None: + report_dir = tmp_path / ".cache" / "codeclone" + report_dir.mkdir(parents=True) + (report_dir / "report.json").write_text("NOT JSON") + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "latest_run=none" in printer.text + + +def test_session_stats_reader_failure_is_idle( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + def raise_reader_error(*args: object, **kwargs: object) -> tuple[object, ...]: + raise OSError("registry unavailable") + + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_intents.list_workspace_intents", + raise_reader_error, + ) + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "session-stats: idle" in printer.text + + +def test_session_stats_contract_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + def raise_collection_error(root_path: Path) -> object: + raise RuntimeError("boom") + + monkeypatch.setattr( + session_stats_mod, + "_collect_session_snapshot", + raise_collection_error, + ) + printer = _RecordingPrinter() + + exit_code = render_session_stats( + console=printer, + root_path=tmp_path, + quiet=True, + ) + + assert exit_code == int(ExitCode.CONTRACT_ERROR) + assert "failed to read session state: boom" in printer.text + + +def test_session_stats_counts_expired_stale_and_recoverable( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + intents_dir = tmp_path / ".cache" / "codeclone" / "intents" + intents_dir.mkdir(parents=True) + own_start_epoch = int(time.time()) - MIN_LEASE_SECONDS - 10 + _write_intent_file( + intents_dir, + intent_id="intent-own-stale-001", + pid=os.getpid(), + start_epoch=own_start_epoch, + lease_seconds=MIN_LEASE_SECONDS, + ) + _write_intent_file( + intents_dir, + intent_id="intent-recoverable-001", + pid=999999, + start_epoch=own_start_epoch, + lease_seconds=300, + ) + _write_intent_file( + intents_dir, + intent_id="intent-expired-001", + pid=999998, + start_epoch=own_start_epoch - 4000, + ) + + monkeypatch.setattr( + session_stats_mod, "_process_start_epoch", lambda: own_start_epoch + ) + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_intents._is_pid_alive", + lambda pid: pid == os.getpid(), + ) + + snapshot = session_stats_mod._collect_session_snapshot(tmp_path) + + assert snapshot.stale_count == 1 + assert snapshot.recoverable_count == 1 + assert snapshot.expired_count == 1 + + +def test_session_stats_groups_multiple_intents_per_agent( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + intents_dir = tmp_path / ".cache" / "codeclone" / "intents" + intents_dir.mkdir(parents=True) + start_epoch = int(time.time()) + for index in range(2): + _write_intent_file( + intents_dir, + intent_id=f"intent-same-agent-{index:03d}", + pid=os.getpid(), + start_epoch=start_epoch, + allowed_files=[f"src/{index}.py"], + ) + + monkeypatch.setattr(session_stats_mod, "_process_start_epoch", lambda: start_epoch) + snapshot = session_stats_mod._collect_session_snapshot(tmp_path) + + assert len(snapshot.agents) == 1 + assert len(snapshot.agents[0].intents) == 2 + + +# ── Data collection helpers ── + + +def test_read_cached_report_missing(tmp_path: Path) -> None: + run_id, _health, _findings, _files, _age, present = _read_cached_report(tmp_path) + assert run_id is None + assert not present + + +def test_read_cached_report_valid(tmp_path: Path) -> None: + _write_report(tmp_path, health=88, files=50) + run_id, health, _findings, files, age, present = _read_cached_report(tmp_path) + assert run_id == "abcdef01" + assert health == 88 + assert files == 50 + assert present is True + assert age is not None and age >= 0 + + +def test_read_cached_report_non_object_payload(tmp_path: Path) -> None: + _write_report_payload(tmp_path, []) + + run_id, _health, _findings, _files, age, present = _read_cached_report(tmp_path) + + assert (run_id, _health, _findings, _files) == (None, None, None, None) + assert age is not None + assert present is True + + +def test_read_cached_report_nested_type_mismatches(tmp_path: Path) -> None: + _write_report_payload( + tmp_path, + { + "integrity": {"digest": []}, + "inventory": {"file_registry": []}, + "metrics": {"families": []}, + "findings": [], + }, + ) + + run_id, _health, _findings, _files, age, present = _read_cached_report(tmp_path) + + assert (run_id, _health, _findings, _files) == (None, None, None, None) + assert age is not None + assert present is True + + +def test_read_cached_report_leaf_type_mismatches(tmp_path: Path) -> None: + _write_report_payload( + tmp_path, + { + "integrity": {"digest": {"value": 123}}, + "inventory": {"file_registry": {"items": "bad"}}, + "metrics": {"families": {}}, + "health": [], + "findings": {"total": "bad"}, + }, + ) + + run_id, _health, _findings, _files, age, present = _read_cached_report(tmp_path) + + assert (run_id, _health, _findings, _files) == (None, None, None, None) + assert age is not None + assert present is True + + +def test_read_cached_report_stat_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _write_report(tmp_path) + + def raise_stat_error(self: Path) -> object: + raise OSError("stat failed") + + monkeypatch.setattr(Path, "is_file", lambda self: True) + monkeypatch.setattr(Path, "stat", raise_stat_error) + + run_id, _health, _findings, _files, age, present = _read_cached_report(tmp_path) + + assert run_id == "abcdef01" + assert age is None + assert present is True + + +# ── Workspace health classification ── + + +def test_classify_idle_no_agents() -> None: + assert ( + _classify_workspace_health(agents=[], stale_count=0, expired_count=0) == "idle" + ) + + +def test_classify_clean_no_active_intents() -> None: + agent = _AgentSnapshot( + pid=1, + start_epoch=1, + label="a", + alive=True, + intents=( + _IntentSnapshot( + intent_id="i", + status="clean", + ownership="own_active", + scope_file_count=1, + allowed_files=("x.py",), + declared_at_utc="", + lease_remaining_seconds=60, + ), + ), + ) + assert ( + _classify_workspace_health(agents=[agent], stale_count=0, expired_count=0) + == "clean" + ) + + +def test_classify_active_with_active_intent() -> None: + agent = _AgentSnapshot( + pid=1, + start_epoch=1, + label="a", + alive=True, + intents=( + _IntentSnapshot( + intent_id="i", + status="active", + ownership="own_active", + scope_file_count=1, + allowed_files=("x.py",), + declared_at_utc="", + lease_remaining_seconds=60, + ), + ), + ) + assert ( + _classify_workspace_health(agents=[agent], stale_count=0, expired_count=0) + == "active" + ) + + +def test_classify_contested_overlapping_scope() -> None: + intent_a = _IntentSnapshot( + intent_id="ia", + status="active", + ownership="own_active", + scope_file_count=1, + allowed_files=("shared.py",), + declared_at_utc="", + lease_remaining_seconds=60, + ) + intent_b = _IntentSnapshot( + intent_id="ib", + status="active", + ownership="foreign_active", + scope_file_count=1, + allowed_files=("shared.py",), + declared_at_utc="", + lease_remaining_seconds=60, + ) + agent_a = _AgentSnapshot( + pid=1, start_epoch=1, label="a", alive=True, intents=(intent_a,) + ) + agent_b = _AgentSnapshot( + pid=2, start_epoch=2, label="b", alive=True, intents=(intent_b,) + ) + result = _classify_workspace_health( + agents=[agent_a, agent_b], stale_count=0, expired_count=0 + ) + assert result == "contested" + + +def test_classify_active_non_overlapping_agents() -> None: + intent_a = _IntentSnapshot( + intent_id="ia", + status="active", + ownership="own_active", + scope_file_count=1, + allowed_files=("a.py",), + declared_at_utc="", + lease_remaining_seconds=60, + ) + intent_b = _IntentSnapshot( + intent_id="ib", + status="active", + ownership="foreign_active", + scope_file_count=1, + allowed_files=("b.py",), + declared_at_utc="", + lease_remaining_seconds=60, + ) + agent_a = _AgentSnapshot( + pid=1, + start_epoch=1, + label="a", + alive=True, + intents=(intent_a,), + ) + agent_b = _AgentSnapshot( + pid=2, + start_epoch=2, + label="b", + alive=True, + intents=(intent_b,), + ) + + assert ( + _classify_workspace_health( + agents=[agent_a, agent_b], + stale_count=0, + expired_count=0, + ) + == "active" + ) + + +def test_classify_ignores_inactive_empty_scopes() -> None: + inactive = _IntentSnapshot( + intent_id="ia", + status="clean", + ownership="own_active", + scope_file_count=1, + allowed_files=("shared.py",), + declared_at_utc="", + lease_remaining_seconds=60, + ) + empty_active = _IntentSnapshot( + intent_id="ib", + status="active", + ownership="foreign_active", + scope_file_count=0, + allowed_files=(), + declared_at_utc="", + lease_remaining_seconds=60, + ) + agent_a = _AgentSnapshot( + pid=1, + start_epoch=1, + label="a", + alive=True, + intents=(inactive,), + ) + agent_b = _AgentSnapshot( + pid=2, + start_epoch=2, + label="b", + alive=True, + intents=(empty_active,), + ) + + assert ( + _classify_workspace_health( + agents=[agent_a, agent_b], + stale_count=0, + expired_count=0, + ) + == "active" + ) + + +# ── Formatting helpers ── + + +def test_format_age_seconds() -> None: + assert _format_age(30) == "30s ago" + + +def test_format_age_minutes() -> None: + assert _format_age(180) == "3m ago" + + +def test_format_age_hours() -> None: + assert _format_age(3660) == "1h1m ago" + + +def test_format_age_exact_hours() -> None: + assert _format_age(3600) == "1h ago" + + +def test_format_age_none() -> None: + assert _format_age(None) == "unknown" + + +def test_format_duration_expired() -> None: + assert _format_duration(0) == "expired" + + +def test_format_duration_seconds() -> None: + assert _format_duration(45) == "45s" + + +def test_format_duration_minutes() -> None: + assert _format_duration(125) == "2m5s" + + +def test_lease_remaining_handles_invalid_lease() -> None: + scope: dict[str, object] = { + "allowed_files": ["src/a.py"], + "allowed_related": [], + "forbidden": [], + } + now_epoch = int(time.time()) + now_utc = datetime.fromtimestamp(now_epoch, tz=timezone.utc) + record = WorkspaceIntentRecord( + intent_id="intent-invalid-lease-001", + agent_pid=os.getpid(), + agent_start_epoch=now_epoch, + agent_label="test-agent", + run_id="a" * 64, + declared_at_utc=format_utc(now_utc), + expires_at_utc=expires_at(declared_at=now_utc, ttl_seconds=3600), + ttl_seconds=3600, + status="active", + intent="test intent", + scope=scope, + scope_digest=compute_scope_digest(scope), + blast_radius_summary={}, + lease_renewed_at_utc="not-a-date", + lease_seconds=300, + report_digest="a" * 64, + ) + + assert _lease_remaining_seconds(record, now_utc) == 0 + + +def test_is_pid_alive_edges(monkeypatch: pytest.MonkeyPatch) -> None: + assert _is_pid_alive(0) is False + + def raise_process_lookup(pid: int, signal: int) -> None: + raise ProcessLookupError + + def raise_permission(pid: int, signal: int) -> None: + raise PermissionError + + monkeypatch.setattr(os, "kill", raise_process_lookup) + assert _is_pid_alive(123) is False + monkeypatch.setattr(os, "kill", raise_permission) + assert _is_pid_alive(123) is True diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 488ba3ae..2050bad6 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -28,16 +28,20 @@ import codeclone.surfaces.mcp._session_baseline as mcp_baseline_mod import codeclone.surfaces.mcp._session_finding_mixin as mcp_finding_mod import codeclone.surfaces.mcp._session_helpers as mcp_helpers_mod +import codeclone.surfaces.mcp._session_intent_mixin as mcp_session_intent_mod import codeclone.surfaces.mcp._session_runtime as mcp_runtime_mod import codeclone.surfaces.mcp._session_shared as mcp_shared_mod import codeclone.surfaces.mcp._session_state_mixin as mcp_state_mod import codeclone.surfaces.mcp._workspace_intents as mcp_workspace_intents_mod +import codeclone.surfaces.mcp.server as mcp_server_mod +import codeclone.surfaces.mcp.service as mcp_service_mod import codeclone.surfaces.mcp.session as mcp_session_mod from codeclone.baseline import Baseline, current_python_tag -from codeclone.baseline.metrics_baseline import MetricsBaseline +from codeclone.baseline.metrics_baseline import MetricsBaseline, MetricsBaselineStatus from codeclone.cache.store import Cache from codeclone.config.pyproject_loader import ConfigValidationError from codeclone.contracts import BASELINE_SCHEMA_VERSION, REPORT_SCHEMA_VERSION +from codeclone.contracts.errors import BaselineValidationError from codeclone.models import MetricsDiff from codeclone.surfaces.mcp.service import CodeCloneMCPService from codeclone.surfaces.mcp.session import ( @@ -1763,6 +1767,7 @@ def test_mcp_service_build_args_handles_pyproject_and_invalid_settings( respect_pyproject=True, analysis_mode="clones_only", metrics_baseline_path="metrics.json", + coverage_xml="coverage.xml", ), ) assert args.min_loc == 12 @@ -1773,6 +1778,7 @@ def test_mcp_service_build_args_handles_pyproject_and_invalid_settings( assert str(args.baseline).endswith("conf-baseline.json") assert str(args.cache_path).endswith("conf-cache.json") assert str(args.metrics_baseline).endswith("metrics.json") + assert str(args.coverage_xml).endswith("coverage.xml") monkeypatch.setattr( mcp_state_mod, @@ -2325,6 +2331,93 @@ def test_mcp_blast_radius_projection_is_deterministic() -> None: ) +def test_mcp_blast_radius_high_scope_boundary_and_helper_edges() -> None: + report_document = copy.deepcopy(_blast_radius_report_document()) + registry = cast( + "dict[str, object]", + cast(dict[str, object], report_document["inventory"])["file_registry"], + ) + items = cast("list[str]", registry["items"]) + items.extend(f"pkg/dep_{index}.py" for index in range(6)) + dependencies = cast( + "dict[str, object]", + cast( + dict[str, object], + cast(dict[str, object], report_document["metrics"])["families"], + )["dependencies"], + ) + dependency_items = cast("list[dict[str, object]]", dependencies["items"]) + dependency_items.extend( + {"source": f"pkg.dep_{index}", "target": "pkg.a"} for index in range(6) + ) + + result = mcp_blast_radius_mod.compute_blast_radius( + run_id="abcdef12", + report_document=report_document, + files=("./pkg/a.py",), + depth="direct", + allowed_scope=("pkg/a.py",), + ) + risk_only = result.to_payload(include=("risk_signals",)) + + assert result.radius_level == "high" + assert ( + "high blast radius requires explicit human scope approval" in result.guardrails + ) + assert {item["category"] for item in result.do_not_touch} >= { + "baseline_or_generated_state", + "affected_but_not_allowed", + } + assert risk_only["do_not_touch"] == [] + assert risk_only["review_context"] == [] + assert "low_coverage_in_blast_zone" not in cast( + dict[str, object], + risk_only["structural_risk"], + ) + assert mcp_blast_radius_mod._as_int(True) == 1 + assert mcp_blast_radius_mod._as_int("bad", default=7) == 7 + assert mcp_blast_radius_mod._path_to_module("__init__.py") == "" + + +def test_mcp_blast_radius_private_edge_helpers() -> None: + assert mcp_blast_radius_mod._as_int(3) == 3 + assert mcp_blast_radius_mod._as_int(2.9) == 2 + assert mcp_blast_radius_mod._as_int(object(), default=4) == 4 + assert mcp_blast_radius_mod._normalize_relative_path(".") == "" + assert mcp_blast_radius_mod._path_to_module("pkg/data.txt") == "pkg.data.txt" + assert mcp_blast_radius_mod._item_path({}) == "" + assert mcp_blast_radius_mod._compute_transitive_dependents( + origin_modules=("pkg.a",), + reverse_graph={"pkg.a": {"pkg.b"}, "pkg.b": {"pkg.a", "pkg.c"}}, + ) == ("pkg.b", "pkg.c") + assert mcp_blast_radius_mod._guardrails( + radius_level="low", + do_not_touch=(), + ) == ( + "review direct dependents before editing public behavior", + "treat clone cohort members as comparison context, not automatic edit targets", + ) + + boundary_entries: dict[str, dict[str, str]] = {} + mcp_blast_radius_mod._append_boundary_entry( + boundary_entries, + path="", + reason="ignored", + category="ignored", + severity="hard", + ) + review_entries: dict[tuple[str, str, str], dict[str, str]] = {} + mcp_blast_radius_mod._append_review_entry( + review_entries, + path="", + reason="ignored", + category="ignored", + ) + + assert boundary_entries == {} + assert review_entries == {} + + def test_mcp_blast_radius_payload_bounds_context_sections() -> None: review_context = tuple( { @@ -2502,6 +2595,105 @@ def test_mcp_service_manage_change_intent_lifecycle(tmp_path: Path) -> None: service.manage_change_intent(action="get", run_id="abcdef12") +def test_mcp_service_wrapper_and_server_validation_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + class BadSession: + def not_a_dict(self) -> list[str]: + return [] + + service = CodeCloneMCPService(history_limit=2) + service._session_cls = BadSession # type: ignore[assignment] + with pytest.raises(TypeError, match="must return a dict"): + service._run_dict("not_a_dict") + + cleanup_service = CodeCloneMCPService(history_limit=2) + cleanup_service._active_intents["intent-missing-run"] = mcp_intent_mod.IntentRecord( + intent_id="intent-missing-run", + run_id="missing-run", + report_digest="digest", + status=mcp_intent_mod.IntentStatus.ACTIVE, + declared_at_utc="2026-01-01T00:00:00Z", + scope=mcp_intent_mod.IntentScope(allowed_files=("pkg/a.py",)), + intent_description="cleanup", + expected_effects=(), + guards=(), + ) + cleanup_service.shutdown_cleanup() + + record = _blast_radius_run_record(tmp_path) + cleanup_service._runs.register(record) + cleanup_service._active_intents["intent-cleanup"] = mcp_intent_mod.IntentRecord( + intent_id="intent-cleanup", + run_id=record.run_id, + report_digest="digest", + status=mcp_intent_mod.IntentStatus.ACTIVE, + declared_at_utc="2026-01-01T00:00:00Z", + scope=mcp_intent_mod.IntentScope(allowed_files=("pkg/a.py",)), + intent_description="cleanup", + expected_effects=(), + guards=(), + ) + removed: list[str] = [] + + def fake_safe_remove_own_intent(**kwargs: object) -> bool: + removed.append(str(kwargs["intent_id"])) + return True + + monkeypatch.setattr( + mcp_service_mod, + "safe_remove_own_intent", + fake_safe_remove_own_intent, + ) + cleanup_service.shutdown_cleanup() + assert removed == ["intent-cleanup"] + + class BrokenLock: + def __enter__(self) -> None: + raise RuntimeError("lock unavailable") + + def __exit__(self, *args: object) -> None: + return None + + cleanup_service._state_lock = BrokenLock() + cleanup_service.shutdown_cleanup() + + clear_service = CodeCloneMCPService(history_limit=2) + clear_service._active_intents["intent-missing-run"] = mcp_intent_mod.IntentRecord( + intent_id="intent-missing-run", + run_id="missing-run", + report_digest="digest", + status=mcp_intent_mod.IntentStatus.ACTIVE, + declared_at_utc="2026-01-01T00:00:00Z", + scope=mcp_intent_mod.IntentScope(allowed_files=("pkg/a.py",)), + intent_description="cleanup", + expected_effects=(), + guards=(), + ) + + def raise_contract_error( + _store: object, + run_id: str | None = None, + ) -> MCPRunRecord: + raise MCPServiceContractError("missing run") + + monkeypatch.setattr(type(clear_service._runs), "get", raise_contract_error) + cleared = clear_service.clear_session_runs() + assert cleared["cleared_intents"] == 1 + + with pytest.raises(MCPServiceContractError, match="Invalid value for include"): + cleanup_service._validated_blast_radius_include(("bad",)) + + assert mcp_server_mod._validated_analysis_mode("clones_only") == "clones_only" + with pytest.raises(MCPServiceContractError, match="analysis_mode"): + mcp_server_mod._validated_analysis_mode("bad") + assert mcp_server_mod._validated_cache_policy("refresh") == "refresh" + assert mcp_server_mod._validated_cache_policy("off") == "off" + with pytest.raises(MCPServiceContractError, match="cache_policy"): + mcp_server_mod._validated_cache_policy("bad") + + def _paired_blast_services( tmp_path: Path, *, @@ -2574,6 +2766,20 @@ def _single_service_with_stale_intent( ) +def _recoverable_workspace_service( + tmp_path: Path, +) -> tuple[CodeCloneMCPService, str]: + first, second = _paired_blast_services(tmp_path) + declared = first.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent="first agent edits pkg.a", + ) + intent_id = str(declared["intent_id"]) + _stale_workspace_intent(tmp_path, intent_id=intent_id) + return second, intent_id + + def _lease_expires_at( record: mcp_workspace_intents_mod.WorkspaceIntentRecord, ) -> str: @@ -2704,8 +2910,23 @@ def test_mcp_service_workspace_intent_recovery_after_lease_expiry( "list[dict[str, object]]", workspace["workspace_intents"], ) - assert workspace_intents[0]["ownership"] == "recoverable" - assert cast("list[dict[str, object]]", workspace["recovery_available"]) == [ + assert workspace_intents[0]["ownership"] == "foreign_stale" + assert "owner may still be working" in str(workspace_intents[0]["escalation_hint"]) + assert cast("list[dict[str, object]]", workspace["recovery_available"]) == [] + + monkeypatch.setattr(mcp_workspace_intents_mod, "_is_pid_alive", lambda pid: False) + recoverable_workspace = second.manage_change_intent( + action="list_workspace", + root=str(tmp_path), + ) + recoverable_intents = cast( + "list[dict[str, object]]", + recoverable_workspace["workspace_intents"], + ) + assert recoverable_intents[0]["ownership"] == "recoverable" + assert cast( + "list[dict[str, object]]", recoverable_workspace["recovery_available"] + ) == [ { "intent_id": intent_id, "run_id": "abcdef12", @@ -2778,6 +2999,160 @@ def test_mcp_service_workspace_intent_recovery_rejects_digest_mismatch( assert rejected["reason"] == "report_digest_mismatch" +def test_mcp_service_workspace_intent_recovery_request_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=2) + + assert service.manage_change_intent(action="recover")["reason"] == ( + "missing_intent_id" + ) + assert ( + service.manage_change_intent( + action="recover", + intent_id="intent-missing", + )["reason"] + == "missing_run_id" + ) + assert ( + service.manage_change_intent( + action="recover", + intent_id="intent-missing", + run_id="abcdef12", + )["reason"] + == "missing_root" + ) + with pytest.raises(MCPServiceContractError, match="require root"): + service.manage_change_intent(action="list_workspace") + + service._runs.register(_blast_radius_run_record(tmp_path)) + rejected = service.manage_change_intent( + action="recover", + root=str(tmp_path), + run_id="abcdef12", + intent_id="intent-missing", + ) + assert rejected["action_taken"] == "recovery_rejected" + assert rejected["reason"] == "not_found" + + second, intent_id = _recoverable_workspace_service(tmp_path) + + def reject_activation(**kwargs: object) -> dict[str, object]: + return { + "intent_id": intent_id, + "action_taken": "recovery_rejected", + "reason": "forced_activation_rejection", + } + + monkeypatch.setattr(second, "_activate_recovered_intent", reject_activation) + forced = second.manage_change_intent( + action="recover", + root=str(tmp_path), + run_id="abcdef12", + intent_id=intent_id, + ) + assert forced["reason"] == "forced_activation_rejection" + + +def test_mcp_service_workspace_intent_recovery_rejects_unavailable_run( + tmp_path: Path, +) -> None: + first = CodeCloneMCPService(history_limit=2) + second = CodeCloneMCPService(history_limit=2) + first._agent_pid, first._agent_start_epoch = 11111, 100 + second._agent_pid, second._agent_start_epoch = 22222, 200 + first._runs.register(_blast_radius_run_record(tmp_path)) + declared = first.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent="first agent edits pkg.a", + ) + intent_id = str(declared["intent_id"]) + _stale_workspace_intent(tmp_path, intent_id=intent_id) + + rejected = second.manage_change_intent( + action="recover", + root=str(tmp_path), + run_id="abcdef12", + intent_id=intent_id, + ) + + assert rejected["action_taken"] == "recovery_rejected" + assert rejected["reason"] == "run_not_available" + + +def test_mcp_service_workspace_intent_recovery_rolls_back_on_rewrite_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + second, intent_id = _recoverable_workspace_service(tmp_path) + + def reject_workspace_write(*args: object, **kwargs: object) -> bool: + return False + + monkeypatch.setattr( + mcp_session_intent_mod, + "write_workspace_intent", + reject_workspace_write, + ) + + rejected = second.manage_change_intent( + action="recover", + root=str(tmp_path), + run_id="abcdef12", + intent_id=intent_id, + ) + + assert rejected["action_taken"] == "recovery_rejected" + assert rejected["reason"] == "workspace_rewrite_failed" + with pytest.raises(MCPServiceContractError, match="Unknown change intent"): + second.manage_change_intent(action="get", intent_id=intent_id) + + +def test_mcp_service_reset_workspace_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service, intent_id, _ = _single_service_with_stale_intent(tmp_path) + + with pytest.raises(MCPServiceContractError, match="requires intent_id"): + service.manage_change_intent(action="reset_workspace", root=str(tmp_path)) + with pytest.raises(MCPServiceContractError, match="Unknown workspace intent"): + service.manage_change_intent( + action="reset_workspace", + root=str(tmp_path), + intent_id="intent-missing", + ) + + reset = service.manage_change_intent( + action="reset_workspace", + root=str(tmp_path), + intent_id=intent_id, + ttl_seconds=mcp_workspace_intents_mod.MIN_TTL_SECONDS, + ) + assert reset["action_taken"] == "reset" + assert reset["new_status"] == "active" + + second, recoverable_id = _recoverable_workspace_service(tmp_path) + monkeypatch.setattr(mcp_workspace_intents_mod, "_is_pid_alive", lambda pid: False) + + removed = second.manage_change_intent( + action="reset_workspace", + root=str(tmp_path), + intent_id=recoverable_id, + ) + assert removed["action_taken"] == "removed" + assert removed["reason"] == "orphaned" + assert ( + mcp_workspace_intents_mod.find_workspace_intent( + root=tmp_path, + intent_id=recoverable_id, + ) + is None + ) + + def test_mcp_service_workspace_intent_get_renews_lease(tmp_path: Path) -> None: service, intent_id, stale_record = _single_service_with_stale_intent(tmp_path) @@ -2853,6 +3228,182 @@ def test_mcp_service_manage_change_intent_validation_expiry_and_prune( service._runs.get("abcdef12") +def test_mcp_service_manage_change_intent_additional_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=2) + service._runs.register(_blast_radius_run_record(tmp_path, digest="")) + + with pytest.raises(MCPServiceContractError, match="requires intent text"): + service.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent=" ", + ) + + first = service.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent="first declaration", + ) + first_id = str(first["intent_id"]) + second = service.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent="replacement declaration", + ) + second_id = str(second["intent_id"]) + assert first_id != second_id + with pytest.raises(MCPServiceContractError, match="Unknown change intent"): + service.manage_change_intent(action="clear", intent_id=first_id) + + def raise_runtime_error(*args: object, **kwargs: object) -> bool: + raise RuntimeError("lease unavailable") + + monkeypatch.setattr( + mcp_session_intent_mod, + "renew_workspace_intent_lease", + raise_runtime_error, + ) + assert service.manage_change_intent(action="get", intent_id=second_id)[ + "status" + ] == ("active") + service.get_blast_radius(files=("pkg/a.py",), run_id="abcdef12") + + service._runs.register(_blast_radius_run_record(tmp_path, digest="changed")) + expired = service.manage_change_intent( + action="check", + intent_id=second_id, + changed_files=["pkg/a.py"], + ) + assert expired["status"] == "expired" + + cleared = service.manage_change_intent(action="clear") + assert cleared["cleared_intent_ids"] == [second_id] + + assert ( + service.manage_change_intent( + action="gc_workspace", + root=str(tmp_path), + )["remaining"] + == 0 + ) + assert mcp_session_intent_mod._as_sequence("not-a-sequence") == () + assert mcp_session_intent_mod._parse_utc("not-a-date") is None + assert mcp_session_intent_mod._parse_utc("2026-01-01T00:00:00") is None + assert ( + service._recovery_rejection_message( + mcp_workspace_intents_mod.IntentOwnership.EXPIRED + ) + == "Intent has expired (TTL). Declare a new intent instead." + ) + assert ( + service._recovery_rejection_message( + mcp_workspace_intents_mod.IntentOwnership.OWN_ACTIVE + ) + == "Intent is already actively owned by this session." + ) + assert ( + service._recovery_rejection_message( + mcp_workspace_intents_mod.IntentOwnership.OWN_STALE + ) + == "Intent is not recoverable." + ) + + +def test_mcp_service_intent_recovery_internal_edges(tmp_path: Path) -> None: + service = CodeCloneMCPService(history_limit=2) + record = _blast_radius_run_record(tmp_path) + service._runs.register(record) + + with pytest.raises(MCPServiceContractError, match="No active change intent"): + service.manage_change_intent(action="get", run_id="abcdef12") + assert service._optional_run_record("missing") is None + + declared = service.manage_change_intent( + action="declare", + scope={"allowed_files": ["pkg/a.py"]}, + intent="recover internals", + ) + intent_id = str(declared["intent_id"]) + resolved_record, resolved_intent = service._resolve_intent( + run_id="abcdef12", + intent_id=None, + ) + assert resolved_record.run_id == record.run_id + assert resolved_intent.intent_id == intent_id + found = mcp_workspace_intents_mod.find_workspace_intent( + root=tmp_path, + intent_id=intent_id, + ) + assert found is not None + _, workspace_record = found + now = mcp_workspace_intents_mod.utc_now() + recovery_run = mcp_session_intent_mod._RecoveryRun( + record=record, + report_digest=service._report_digest_value(record), + ) + active_target = mcp_session_intent_mod._RecoveryTarget( + root_path=tmp_path, + workspace_record=workspace_record, + now=now, + ) + + already_active = service._activate_recovered_intent( + target=active_target, + recovery_run=recovery_run, + ) + assert cast("dict[str, object]", already_active)["reason"] == "already_active" + + invalid_scope_target = mcp_session_intent_mod._RecoveryTarget( + root_path=tmp_path, + workspace_record=replace( + workspace_record, + intent_id="intent-invalid-scope", + scope={"allowed_files": []}, + ), + now=now, + ) + invalid_scope = service._activate_recovered_intent( + target=invalid_scope_target, + recovery_run=recovery_run, + ) + assert cast("dict[str, object]", invalid_scope)["reason"] == "invalid_scope" + + digest_mismatch_target = mcp_session_intent_mod._RecoveryTarget( + root_path=tmp_path, + workspace_record=replace(workspace_record, scope_digest="0" * 64), + now=now, + ) + digest_mismatch = service._recovery_run( + run_id="abcdef12", + target=digest_mismatch_target, + ) + assert cast("dict[str, object]", digest_mismatch)["reason"] == ( + "scope_digest_mismatch" + ) + + no_run_record = replace( + workspace_record, + intent_id="intent-no-run", + run_id="missing-run", + agent_pid=999999, + agent_start_epoch=999, + lease_renewed_at_utc=mcp_workspace_intents_mod.format_utc( + now - timedelta(minutes=10) + ), + lease_seconds=mcp_workspace_intents_mod.MIN_LEASE_SECONDS, + ) + assert service._recovery_available_payload(records=(no_run_record,), now=now) == [] + assert ( + service._lease_expired_at_utc( + replace(workspace_record, lease_renewed_at_utc="not-a-date") + ) + is None + ) + + def test_mcp_patch_contract_profiles_and_baseline_abuse() -> None: ci_budget = mcp_patch_contract_mod.budgets_for_strictness( strictness="ci", @@ -3109,6 +3660,194 @@ def test_mcp_service_check_patch_contract_verify_composes_existing_primitives( assert payload["reason"] == reason +def test_mcp_patch_contract_helper_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=2) + record = _patch_contract_run_record( + tmp_path, + run_id="abcdef1234567890", + digest="helper-digest", + include_regression=False, + complexity=6, + health=90, + ) + service._runs.register(record) + + assert service._validated_strictness("strict") == "strict" + assert ( + service._budgets_for_record(record=record, strictness="strict").health_floor + == 70 + ) + assert service._patch_changed_files( + after=record, + diff_ref=None, + changed_files=None, + ) == tuple(record.changed_paths) + + def fake_git_diff_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ...]: + return (f"{root_path.name}:{git_diff_ref}",) + + monkeypatch.setattr(service, "_git_diff_paths", fake_git_diff_paths) + assert service._patch_changed_files( + after=record, + diff_ref="HEAD~1", + changed_files=None, + ) == (f"{tmp_path.name}:HEAD~1",) + assert service._contract_violations( + structural_delta={"regressions": [{"id": "r1"}]}, + gate_preview={"would_fail": True}, + scope_check={"status": "violated"}, + baseline_abuse={"triggers": ["baseline_updated_without_intent"]}, + ) == ( + "structural_regressions", + "gate_failures", + "scope_violation", + "baseline_abuse:baseline_updated_without_intent", + ) + assert service._first_int({"value": "5"}, keys=("missing", "value")) == 5 + assert service._first_int({}, keys=("value",)) == 0 + assert service._item_path({"file": "pkg\\a.py"}) == "pkg/a.py" + assert service._item_path({}) == "" + assert service._item_symbol({"class_name": "Widget"}) == "Widget" + assert ( + service._metric_item_index( + {"metrics": {"families": {"complexity": {"items": [{"value": 10}]}}}}, + family="complexity", + value_keys=("value",), + ) + == {} + ) + assert service._threshold_headroom(budget=-1, current=10) is None + assert service._verify_message(status="violated", violations=("x", "y")) == ( + "Patch contract violated: x, y" + ) + assert ( + mcp_patch_contract_mod.detect_baseline_abuse( + before_gate_would_fail=False, + after_gate_would_fail=False, + after_baseline_status="ok", + regressions=0, + changed_files=0, + intent_available=True, + )["detected"] + is False + ) + assert mcp_patch_contract_mod.detect_baseline_abuse( + before_gate_would_fail=False, + after_gate_would_fail=False, + after_baseline_status="updated", + regressions=0, + changed_files=1, + intent_available=True, + )["triggers"] == ["baseline_changed_with_functional_code"] + + incomparable = service._unverified_patch_contract( + reason="incomparable_runs", + before=record, + after=record, + structural_delta={"verdict": "incomparable"}, + ) + assert incomparable["before"] == {"run_id": "abcdef12", "health": 90} + assert incomparable["after"] == {"run_id": "abcdef12", "health": 90} + assert incomparable["structural_delta"] == {"verdict": "incomparable"} + + expired = service._expired_patch_contract( + before=record, + after=record, + intent=mcp_intent_mod.IntentRecord( + intent_id="intent-expired", + run_id=record.run_id, + report_digest="old", + status=mcp_intent_mod.IntentStatus.EXPIRED, + declared_at_utc="2026-01-01T00:00:00Z", + scope=mcp_intent_mod.IntentScope(allowed_files=("pkg/a.py",)), + intent_description="expired", + expected_effects=(), + guards=(), + ), + ) + assert expired["status"] == "expired" + assert expired["contract_violations"] == ["intent_expired"] + + +def test_mcp_patch_contract_verify_incomparable_and_expired_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=4) + before = _patch_contract_run_record( + tmp_path, + run_id="beforeedge123456", + digest="before-edge", + include_regression=False, + complexity=6, + health=90, + ) + after = _patch_contract_run_record( + tmp_path, + run_id="afteredge123456", + digest="after-edge", + include_regression=False, + complexity=6, + health=90, + ) + service._runs.register(before) + service._runs.register(after) + + def incomparable_compare( + *, + run_id_before: str, + run_id_after: str | None = None, + focus: str = "all", + ) -> dict[str, object]: + return { + "comparable": False, + "regressions": [], + "improvements": [], + "health_delta": None, + "verdict": f"{run_id_before}:{run_id_after}:{focus}", + } + + monkeypatch.setattr(service, "compare_runs", incomparable_compare) + incomparable = service.check_patch_contract( + mode="verify", + before_run_id="beforeedge", + after_run_id="afteredge", + ) + assert incomparable["status"] == "unverified" + assert incomparable["reason"] == "incomparable_runs" + + def stable_compare(**kwargs: object) -> dict[str, object]: + return { + "comparable": True, + "regressions": [], + "improvements": [], + "health_delta": 0, + "verdict": "stable", + } + + def always_expired(**kwargs: object) -> bool: + return True + + monkeypatch.setattr(service, "compare_runs", stable_compare) + declared = service.manage_change_intent( + action="declare", + run_id="beforeedge", + scope={"allowed_files": ["pkg/a.py"]}, + intent="expire verify", + ) + monkeypatch.setattr(service, "_is_intent_expired", always_expired) + expired = service.check_patch_contract( + mode="verify", + before_run_id="beforeedge", + after_run_id="afteredge", + intent_id=str(declared["intent_id"]), + ) + assert expired["status"] == "expired" + + def test_claim_guard_detects_deterministic_overclaims() -> None: payload = mcp_claim_guard_mod.validate_claims( text=( @@ -3135,6 +3874,173 @@ def test_claim_guard_detects_deterministic_overclaims() -> None: assert all(not item["valid"] for item in validated) +def test_claim_guard_input_warning_and_dedupe_edges() -> None: + with pytest.raises(ValueError, match="string"): + mcp_claim_guard_mod.validate_text_input(42) + with pytest.raises(ValueError, match="empty"): + mcp_claim_guard_mod.validate_text_input(" ") + with pytest.raises(ValueError, match="maximum"): + mcp_claim_guard_mod.validate_text_input( + "x" * (mcp_claim_guard_mod.MAX_REVIEW_CLAIM_TEXT_CHARS + 1) + ) + + base_context = _claim_guard_context(has_comparison_run=True) + context = mcp_claim_guard_mod.ReportContext( + findings=base_context.findings, + short_to_canonical={**base_context.short_to_canonical, "F-9": "missing:id"}, + reachable_qualnames=base_context.reachable_qualnames, + report_only_families=base_context.report_only_families, + has_comparison_run=True, + metric_families=base_context.metric_families, + ) + + payload = mcp_claim_guard_mod.validate_claims( + text="F-1 fixed. F-1 fixed again. F-9 is mapped away. F-99 is unknown.", + report_context=context, + require_citations=True, + ) + warnings = cast("list[dict[str, str]]", payload["warnings"]) + + assert payload["valid"] is True + assert payload["citations_found"] == 2 + assert warnings == [ + { + "type": "unknown_finding", + "message": "Finding citation 'F-99' is not present in this run.", + } + ] + assert mcp_claim_guard_mod._as_sequence("abc") == () + assert mcp_claim_guard_mod._extract_qualnames_from_finding( + "dead_code:pkg.mod:func", + {"items": [{"target_qualname": "pkg.mod:func"}]}, + ) == frozenset({"pkg.mod:func"}) + + no_reachability_context = mcp_claim_guard_mod.ReportContext( + findings=base_context.findings, + short_to_canonical=base_context.short_to_canonical, + reachable_qualnames=frozenset(), + report_only_families=base_context.report_only_families, + has_comparison_run=False, + metric_families=base_context.metric_families, + ) + no_reachability = mcp_claim_guard_mod.validate_claims( + text="F-3 is dead.", + report_context=no_reachability_context, + require_citations=False, + ) + assert no_reachability["valid"] is True + no_dead_keyword = mcp_claim_guard_mod.validate_claims( + text="F-3 is mentioned as a route handler.", + report_context=base_context, + require_citations=False, + ) + assert no_dead_keyword["valid"] is True + + duplicate_citation = mcp_claim_guard_mod.Citation( + cited_id="clone:function:g1", + kind="finding", + text_window="F-1 fixed", + start_offset=0, + end_offset=3, + ) + assert mcp_claim_guard_mod._dedupe_citations( + (duplicate_citation, duplicate_citation) + ) == (duplicate_citation,) + + duplicate_violation = mcp_claim_guard_mod.Violation( + pattern="P-X", + claim="claim", + cited_id="clone:function:g1", + reason="reason", + source_flag="source", + ) + assert mcp_claim_guard_mod._dedupe_violations( + (duplicate_violation, duplicate_violation) + ) == (duplicate_violation,) + + +def test_mcp_session_helper_private_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + with pytest.raises(MCPServiceContractError, match="Invalid value"): + mcp_helpers_mod._validate_choice("kind", "bad", ("good",)) + assert mcp_helpers_mod._validate_optional_choice("kind", None, ("good",)) is None + assert [ + mcp_helpers_mod._metrics_detail_family(value) + for value in ( + "complexity", + "coupling", + "cohesion", + "coverage_adoption", + "coverage_join", + "dependencies", + "dead_code", + "api_surface", + "security_surfaces", + "god_modules", + "health", + "unknown", + ) + ] == [ + "complexity", + "coupling", + "cohesion", + "coverage_adoption", + "coverage_join", + "dependencies", + "dead_code", + "api_surface", + "security_surfaces", + "overloaded_modules", + "health", + None, + ] + assert mcp_helpers_mod._dict_rows("not-rows") == [] + assert mcp_helpers_mod._string_rows("not-rows") == [] + assert mcp_helpers_mod._summary_location_string({}) == "" + assert mcp_helpers_mod._normal_location_payload({}) == {} + record = replace( + _dummy_run_record(tmp_path, "helper1234567890"), + report_document={ + "metrics": { + "families": { + "coverage_join": { + "summary": { + "status": "ok", + "source": " coverage.xml ", + "invalid_reason": " ", + } + } + } + } + }, + ) + assert mcp_helpers_mod._summary_coverage_join_payload(record)["source"] == ( + "coverage.xml" + ) + + def raise_baseline_validation_error(*args: object, **kwargs: object) -> None: + raise BaselineValidationError( + "invalid metrics baseline", + status="invalid_json", + ) + + monkeypatch.setattr( + MetricsBaseline, + "load", + raise_baseline_validation_error, + ) + baseline_state = mcp_baseline_mod.resolve_metrics_baseline_state( + metrics_baseline_path=tmp_path / "metrics-baseline.json", + metrics_baseline_exists=True, + max_baseline_size_mb=1, + skip_metrics=False, + ) + assert baseline_state.loaded is False + assert baseline_state.status == MetricsBaselineStatus.INVALID_JSON + + def test_normalize_intent_scope_hint_on_invalid_type() -> None: """Non-dict scope gives an actionable error with format example.""" with pytest.raises(ValueError, match=r"allowed_files"): @@ -3143,6 +4049,8 @@ def test_normalize_intent_scope_hint_on_invalid_type() -> None: def test_normalize_intent_scope_edge_cases() -> None: """Path normalization: traversal, absolute, leading ./ prefix.""" + assert mcp_intent_mod._normalize_path(".") == "" + # leading ./ stripped scope = mcp_intent_mod.normalize_intent_scope({"allowed_files": ["./pkg/a.py"]}) assert scope.allowed_files == ("pkg/a.py",) @@ -3586,6 +4494,88 @@ def test_mcp_service_create_review_receipt_full_post_edit_workflow( assert "reviewed clone split" in content +def test_mcp_service_review_receipt_edge_helpers(tmp_path: Path) -> None: + service = CodeCloneMCPService(history_limit=4) + record = _patch_contract_run_record( + tmp_path, + run_id="receiptedge123456", + digest="receipt-edge", + include_regression=False, + complexity=6, + health=90, + ) + service._runs.register(record) + declared = service.manage_change_intent( + action="declare", + run_id="receiptedge", + scope={"allowed_files": ["pkg/a.py"]}, + intent="edge receipt", + ) + intent_id = str(declared["intent_id"]) + receipt = service.create_review_receipt( + run_id="receiptedge", + intent_id=intent_id, + format="json", + ) + scope = cast("dict[str, object]", receipt["scope"]) + assert scope["intent_status"] == "active" + assert scope["changed_files"] == [] + + service._review_state[record.run_id] = OrderedDict([("missing:finding", None)]) + assert service._reviewed_evidence(record)["items"] == [] + assert service._finding_by_id(record=record, canonical_id="missing:finding") is None + assert service._receipt_contract_violations( + gate_result={"would_fail": True}, + intent_check_status="violated", + regressions=1, + baseline_abuse=True, + ) == [ + "structural_regressions", + "gate_failures", + "scope_violation", + "baseline_abuse", + ] + + top_generated = replace( + record, + report_document={"meta": {"report_generated_at_utc": "2026-top"}}, + ) + fallback_generated = replace( + record, + report_document={}, + summary={**record.summary, "analysis_started_at_utc": "2026-fallback"}, + ) + assert service._receipt_generated_at(top_generated) == "2026-top" + assert service._receipt_generated_at(fallback_generated) == "2026-fallback" + assert ( + mcp_review_receipt_mod.receipt_verdict( + reviewed_count=0, + gate_relevant_count=0, + patch_status=mcp_review_receipt_mod.ReceiptPatchStatus.ACCEPTED.value, + human_decision_count=1, + ) + == mcp_review_receipt_mod.ReceiptVerdict.NEEDS_ATTENTION.value + ) + markdown_without_patch = mcp_review_receipt_mod.render_receipt_markdown( + { + "provenance": {"report_digest": "digest"}, + "reviewed_evidence": {}, + "structural_delta": {}, + "health": {}, + "human_decision_points": [], + "claims_not_made": [], + } + ) + assert "### Patch Contract\nNot available." in markdown_without_patch + + other_root = tmp_path / "other-root" + service._runs.register( + _blast_radius_run_record(other_root, run_id="otherroot123456789") + ) + with pytest.raises(MCPServiceContractError, match="same root"): + service.create_review_receipt(run_id="otherroot", intent_id=intent_id) + + def test_mcp_service_branch_helpers_on_real_runs( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/test_workspace_intents.py b/tests/test_workspace_intents.py index 313c82db..5a11119e 100644 --- a/tests/test_workspace_intents.py +++ b/tests/test_workspace_intents.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +from dataclasses import replace from datetime import timedelta from pathlib import Path @@ -51,6 +52,31 @@ def _record( ) +def _signed_payload_with( + record: WorkspaceIntentRecord, + **updates: object, +) -> dict[str, object]: + payload = record.unsigned_payload() + payload.update(updates) + payload["integrity"] = { + "payload_sha256": workspace_intents.compute_intent_digest(payload) + } + return payload + + +def _signed_payload_without( + record: WorkspaceIntentRecord, + *keys: str, +) -> dict[str, object]: + payload = record.unsigned_payload() + for key in keys: + payload.pop(key, None) + payload["integrity"] = { + "payload_sha256": workspace_intents.compute_intent_digest(payload) + } + return payload + + def test_workspace_intent_write_validate_update_and_remove(tmp_path: Path) -> None: record = _record() @@ -90,6 +116,49 @@ def test_workspace_intent_write_validate_update_and_remove(tmp_path: Path) -> No assert workspace_intents.list_workspace_intents(root=tmp_path) == () +@pytest.mark.parametrize( + "payload", + [ + None, + {1: "not-a-string-key"}, + {"integrity": {"payload_sha256": "0" * 64}}, + _signed_payload_with(_record(), registry_version="9"), + _signed_payload_without(_record(), "intent_id"), + _signed_payload_with(_record(), agent_pid=True), + _signed_payload_with(_record(), agent_start_epoch=0), + _signed_payload_with(_record(), status="finished"), + _signed_payload_with(_record(), scope_digest="not-a-digest"), + _signed_payload_with(_record(), declared_at_utc="not-a-date"), + _signed_payload_with(_record(), lease_renewed_at_utc="not-a-date"), + _signed_payload_without(_record(), "lease_renewed_at_utc"), + _signed_payload_with(_record(), lease_seconds=1), + _signed_payload_with(_record(), lease_seconds=True), + _signed_payload_without(_record(), "report_digest"), + _signed_payload_with(_record(), blast_radius_summary=[]), + _signed_payload_with(_record(), scope=[]), + _signed_payload_with(_record(), scope={"allowed_files": []}), + _signed_payload_with( + _record(), + scope={"allowed_files": ["pkg/a.py"], "allowed_related": "tests/a.py"}, + ), + _signed_payload_with( + _record(), + scope={"allowed_files": ["pkg/a.py"], "forbidden": [1]}, + ), + ], +) +def test_workspace_intent_validation_rejects_malformed_payloads( + payload: object, +) -> None: + assert workspace_intents.validate_workspace_record(payload) is None + + +def test_workspace_intent_validation_rejects_scope_digest_mismatch() -> None: + payload = _signed_payload_with(_record(), scope_digest="0" * 64) + + assert workspace_intents.validate_workspace_record(payload) is None + + def test_workspace_intent_validation_rejects_tampered_and_invalid_paths( tmp_path: Path, ) -> None: @@ -193,6 +262,444 @@ def test_workspace_intent_lease_expiry_is_recoverable_not_gc( ) == (record,) +def test_workspace_intent_io_failure_paths( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + record = _record() + + assert ( + workspace_intents.find_workspace_intent( + root=tmp_path, + intent_id=record.intent_id, + ) + is None + ) + assert ( + workspace_intents.update_workspace_intent_status( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + new_status="clean", + ) + is False + ) + assert ( + workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + assert ( + workspace_intents.update_workspace_intent_status( + root=tmp_path, + pid=record.agent_pid + 1, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + new_status="clean", + ) + is False + ) + + expired = _record( + intent_id="intent-expired-lease", + expires_delta=timedelta(days=-1), + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=expired) + assert ( + workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=expired.agent_pid, + start_epoch=expired.agent_start_epoch, + intent_id=expired.intent_id, + ) + is False + ) + + def raise_oserror(*args: object, **kwargs: object) -> None: + raise OSError("boom") + + monkeypatch.setattr( + workspace_intents, + "write_json_document_atomically", + raise_oserror, + ) + assert ( + workspace_intents.write_workspace_intent(root=tmp_path, record=_record()) + is False + ) + assert ( + workspace_intents.update_workspace_intent_status( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + new_status="violated", + ) + is False + ) + assert ( + workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + + monkeypatch.setattr(Path, "unlink", raise_oserror) + assert ( + workspace_intents.remove_workspace_intent( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + + +def test_workspace_intent_payload_and_helper_edge_cases(tmp_path: Path) -> None: + record = _record() + now = workspace_intents.utc_now() + + own_payload = record.to_payload( + own_pid=record.agent_pid, + own_start_epoch=record.agent_start_epoch, + now=now, + ) + assert own_payload["ownership"] == "own_active" + assert own_payload["is_own"] is True + assert isinstance(own_payload["lease_expires_in_seconds"], int) + + invalid_lease = replace(record, lease_renewed_at_utc="not-a-date") + invalid_payload = invalid_lease.to_payload( + own_pid=record.agent_pid, + own_start_epoch=record.agent_start_epoch, + now=now, + ) + assert invalid_payload["ownership"] == "own_stale" + assert "lease_expires_in_seconds" not in invalid_payload + + assert workspace_intents.resolved_ttl_seconds(True) == ( + workspace_intents.DEFAULT_TTL_SECONDS + ) + assert workspace_intents.resolved_ttl_seconds("bad") == ( + workspace_intents.DEFAULT_TTL_SECONDS + ) + assert workspace_intents.resolved_ttl_seconds("1") == ( + workspace_intents.MIN_TTL_SECONDS + ) + assert workspace_intents.resolved_ttl_seconds("999999") == ( + workspace_intents.MAX_TTL_SECONDS + ) + assert workspace_intents.resolved_lease_seconds("1") == ( + workspace_intents.MIN_LEASE_SECONDS + ) + assert workspace_intents.resolved_lease_seconds("999999") == ( + workspace_intents.MAX_LEASE_SECONDS + ) + assert workspace_intents.verify_intent_integrity({}) is False + assert ( + workspace_intents.safe_remove_own_intent( + root=Path("relative"), + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + assert workspace_intents.safe_remove_own_intent( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + assert not workspace_intents.intent_path( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ).exists() + + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + assert workspace_intents.remove_workspace_record(root=tmp_path, record=record) + + +def test_workspace_intent_private_edge_helpers( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + expired_status = _record( + status=workspace_intents.WorkspaceIntentStatus.EXPIRED.value + ) + orphaned_status = _record( + status=workspace_intents.WorkspaceIntentStatus.ORPHANED.value + ) + + assert workspace_intents.is_stale(expired_status) + assert workspace_intents.stale_reason(expired_status) == "expired" + assert workspace_intents.stale_reason(orphaned_status) == "orphaned" + assert workspace_intents._is_pid_alive(0) is False + assert workspace_intents._dict_payload({1: "bad"}) is None + assert workspace_intents._valid_path_list( + ["", "pkg/a.py"], + required=True, + ) == ["pkg/a.py"] + assert workspace_intents._string_value(123) == "" + assert workspace_intents._required_string(" ") is None + assert workspace_intents._required_string("value") == "value" + assert workspace_intents._positive_int(True) is None + assert workspace_intents._positive_int(0) is None + assert workspace_intents._positive_int(5) == 5 + assert ( + workspace_intents._valid_lease_seconds(workspace_intents.MIN_LEASE_SECONDS - 1) + is None + ) + assert ( + workspace_intents._valid_lease_seconds(workspace_intents.MAX_LEASE_SECONDS + 1) + is None + ) + assert ( + workspace_intents._valid_lease_seconds(workspace_intents.MIN_LEASE_SECONDS) + == workspace_intents.MIN_LEASE_SECONDS + ) + assert workspace_intents._is_hex_digest(123) is False + assert workspace_intents._is_hex_digest("0" * 63) is False + assert workspace_intents._is_hex_digest("g" * 64) is False + assert workspace_intents._is_hex_digest("A" * 64) is True + assert workspace_intents._valid_status_values() == frozenset( + status.value for status in workspace_intents.WorkspaceIntentStatus + ) + assert workspace_intents._valid_scope([]) is None + assert workspace_intents._valid_scope({1: ["pkg/a.py"]}) is None + assert workspace_intents._valid_scope({"allowed_files": []}) is None + assert ( + workspace_intents._valid_scope( + {"allowed_files": ["pkg/a.py"], "allowed_related": "tests/a.py"} + ) + is None + ) + assert ( + workspace_intents._valid_scope( + {"allowed_files": ["pkg/a.py"], "forbidden": [1]} + ) + is None + ) + assert workspace_intents._valid_path_list("pkg/a.py", required=True) is None + assert workspace_intents._valid_path_list([1], required=True) is None + assert workspace_intents._valid_path_list(["/abs.py"], required=True) is None + assert workspace_intents._valid_path_list(["../abs.py"], required=True) is None + assert workspace_intents._valid_path_list(["pkg/a.py/"], required=True) == [ + "pkg/a.py" + ] + assert workspace_intents._scope_file_sets({"allowed_files": "pkg/a.py"}) == ( + set(), + set(), + ) + assert workspace_intents._parse_utc("2026-01-01T00:00:00") is None + assert workspace_intents._sort_agent_pid(True) == 0 + assert workspace_intents._sort_agent_pid("123") == 0 + assert workspace_intents._sort_agent_pid(123) == 123 + assert workspace_intents._overlap_type(hard=False, soft=True) == "soft" + + def raise_permission_error(pid: int, signal: int) -> None: + raise PermissionError + + def raise_oserror(pid: int, signal: int) -> None: + raise OSError + + def raise_process_lookup(pid: int, signal: int) -> None: + raise ProcessLookupError + + monkeypatch.setattr(os, "kill", raise_permission_error) + assert workspace_intents._is_pid_alive(123) is True + monkeypatch.setattr(os, "kill", raise_oserror) + assert workspace_intents._is_pid_alive(123) is True + monkeypatch.setattr(os, "kill", raise_process_lookup) + assert workspace_intents._is_pid_alive(123) is False + + path = tmp_path / "intent.json" + path.write_text("{}", "utf-8") + + def raise_unlink_oserror(self: Path, missing_ok: bool = False) -> None: + raise OSError("unlink failed") + + monkeypatch.setattr(Path, "unlink", raise_unlink_oserror) + assert workspace_intents._unlink(path) is False + + def raise_resolve_oserror(self: Path, strict: bool = False) -> Path: + raise OSError("resolve failed") + + monkeypatch.setattr(Path, "resolve", raise_resolve_oserror) + assert ( + workspace_intents._is_safe_intent_path( + tmp_path / "intent.json", + workspace_intents.registry_dir(tmp_path), + ) + is False + ) + + +def test_workspace_intent_safe_path_edge_helpers(tmp_path: Path) -> None: + registry = workspace_intents.registry_dir(tmp_path) + registry.mkdir(parents=True) + good = registry / "123-456-intent-good.json" + good.write_text("{}", encoding="utf-8") + + assert workspace_intents._is_safe_intent_path(good, registry) + assert ( + workspace_intents._is_safe_intent_path(Path("relative.json"), registry) is False + ) + assert ( + workspace_intents._is_safe_intent_path(tmp_path / "outside.json", registry) + is False + ) + assert ( + workspace_intents._is_safe_intent_path(registry / "bad.json", registry) is False + ) + + directory_target = registry / "123-456-intent-dir.json" + directory_target.mkdir() + assert workspace_intents._is_safe_intent_path(directory_target, registry) is False + + non_normalized = registry / ".." / "123-456-intent-other.json" + assert workspace_intents._is_safe_intent_path(non_normalized, registry) is False + + +def test_workspace_intent_registry_defensive_failure_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + record = _record( + intent_id="intent-expired-cleanup", expires_delta=timedelta(days=-1) + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + path = workspace_intents.intent_path( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + + corrupted = workspace_intents.registry_dir(tmp_path) / "123-456-intent-bad.json" + corrupted.write_text("{", encoding="utf-8") + cleanup = workspace_intents.gc_workspace(root=tmp_path) + assert cleanup["corrupted_filenames"] == ["123-456-intent-bad.json"] + + monkeypatch.setattr(workspace_intents, "_unlink", lambda item: False) + assert workspace_intents.gc_workspace(root=tmp_path)["removed"] == 0 + + def raise_read_error(item: Path) -> dict[str, object]: + raise ValueError("bad json") + + monkeypatch.setattr(workspace_intents, "read_json_object", raise_read_error) + assert workspace_intents._read_payload(path) is None + + def raise_glob_error(self: Path, pattern: str) -> tuple[Path, ...]: + raise OSError("glob failed") + + monkeypatch.setattr(Path, "glob", raise_glob_error) + assert workspace_intents.list_workspace_intents(root=tmp_path) == () + assert workspace_intents._valid_scope({1: ["pkg/a.py"]}) is None + + def raise_safety_error(expected: Path, registry: Path) -> bool: + raise RuntimeError("safety check failed") + + monkeypatch.setattr(workspace_intents, "_is_safe_intent_path", raise_safety_error) + assert ( + workspace_intents.safe_remove_own_intent( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + + +def test_workspace_intent_foreign_stale_conflict_and_counts( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + active = _record(intent_id="intent-active-001", pid=111, start_epoch=100) + foreign_stale = _record( + intent_id="intent-stale-001", + pid=222, + start_epoch=200, + lease_renewed_delta=timedelta(minutes=-10), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) + orphaned = _record(intent_id="intent-orphaned-001", pid=333, start_epoch=300) + for record in (active, foreign_stale, orphaned): + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + monkeypatch.setattr(workspace_intents, "_is_pid_alive", lambda pid: pid != 333) + + counts = workspace_intents.workspace_status_counts(root=tmp_path) + assert counts == {"stale_count": 2, "orphaned_count": 1, "total_agents": 3} + + payload = foreign_stale.to_payload( + own_pid=111, + own_start_epoch=100, + now=workspace_intents.utc_now(), + ) + assert payload["ownership"] == workspace_intents.IntentOwnership.FOREIGN_STALE.value + assert "owner may still be working" in str(payload["escalation_hint"]) + assert workspace_intents._gc_removal_reason(foreign_stale) is None + + conflicts = workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(foreign_stale,), + own_pid=111, + own_start_epoch=100, + ) + assert conflicts == [ + { + "intent_id": foreign_stale.intent_id, + "agent_pid": 222, + "agent_start_epoch": 200, + "agent_label": "agent-a", + "intent": "edit pkg.a", + "ownership": workspace_intents.IntentOwnership.FOREIGN_STALE.value, + "severity": "stale", + "recommended_action": "coordinate_or_recover", + "overlap_type": "hard", + "hard_overlap": ["pkg/a.py"], + "soft_overlap": [], + "declared_at_utc": foreign_stale.declared_at_utc, + "expires_at_utc": foreign_stale.expires_at_utc, + } + ] + + assert ( + workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/other.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(foreign_stale,), + own_pid=111, + own_start_epoch=100, + ) + == [] + ) + + def test_workspace_intent_ownership_classification( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -205,6 +712,12 @@ def test_workspace_intent_ownership_classification( lease_seconds=workspace_intents.MIN_LEASE_SECONDS, ) foreign = _record(pid=222, start_epoch=200) + foreign_stale = _record( + pid=222, + start_epoch=200, + lease_renewed_delta=timedelta(minutes=-10), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) expired = _record(expires_delta=timedelta(seconds=-1)) monkeypatch.setattr(workspace_intents, "_is_pid_alive", lambda pid: pid != 333) @@ -236,6 +749,15 @@ def test_workspace_intent_ownership_classification( ) == workspace_intents.IntentOwnership.FOREIGN_ACTIVE ) + assert ( + workspace_intents.classify_intent_ownership( + foreign_stale, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.FOREIGN_STALE + ) dead_pid = _record(pid=333, start_epoch=300) assert ( workspace_intents.classify_intent_ownership( @@ -275,6 +797,25 @@ def test_workspace_intent_renew_lease_updates_timestamp(tmp_path: Path) -> None: assert workspace_intents.verify_intent_integrity(updated.signed_payload()) +def test_workspace_intent_update_status_can_extend_ttl(tmp_path: Path) -> None: + record = _record(lease_renewed_delta=timedelta(minutes=-2)) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + assert workspace_intents.update_workspace_intent_status( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + new_status="active", + ttl_seconds=workspace_intents.MIN_TTL_SECONDS, + ) + + updated = workspace_intents.list_workspace_intents(root=tmp_path)[0] + assert updated.ttl_seconds == workspace_intents.MIN_TTL_SECONDS + assert updated.lease_renewed_at_utc != record.lease_renewed_at_utc + assert updated.lease_renewed_at_utc == updated.declared_at_utc + + def test_workspace_intent_renew_lease_rejects_foreign_owner(tmp_path: Path) -> None: record = _record() assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) @@ -363,3 +904,89 @@ def test_workspace_intent_conflict_detection() -> None: ) == [] ) + + both = workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": ["pkg/a.py"], + "forbidden": [], + }, + existing=(existing,), + own_pid=123456, + own_start_epoch=999, + ) + assert both[0]["overlap_type"] == "both" + + +def test_workspace_intent_regression_stale_lease_silent_overlap( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Regression: expired lease + alive PID must produce conflict, not silence. + + Timeline from real incident: + T0: PID A declares intent over files X, lease=300s + T0+301s: PID A lease expires, PID A still alive + T0+352s: PID B declares intent over same files X + Expected: PID B sees concurrent_intents with ownership=foreign_stale + """ + agent_a = _record( + intent_id="intent-a-001", + pid=1000, + start_epoch=100, + scope={ + "allowed_files": ["src/shared.py"], + "allowed_related": [], + "forbidden": [], + }, + lease_renewed_delta=timedelta(minutes=-6), + lease_seconds=workspace_intents.DEFAULT_LEASE_SECONDS, + ) + monkeypatch.setattr(workspace_intents, "_is_pid_alive", lambda pid: True) + + conflicts = workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["src/shared.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(agent_a,), + own_pid=2000, + own_start_epoch=200, + ) + + conflict = conflicts[0] + assert len(conflicts) == 1 + for key, expected in ( + ("ownership", "foreign_stale"), + ("severity", "stale"), + ("recommended_action", "coordinate_or_recover"), + ("hard_overlap", ["src/shared.py"]), + ): + assert conflict[key] == expected, f"{key}: {conflict[key]!r} != {expected!r}" + + +def test_workspace_intent_renew_lease_with_custom_seconds(tmp_path: Path) -> None: + """Explicit lease_seconds on renew updates the workspace record.""" + record = _record( + lease_renewed_delta=timedelta(minutes=-2), + lease_seconds=workspace_intents.DEFAULT_LEASE_SECONDS, + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + assert workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + lease_seconds=workspace_intents.MAX_LEASE_SECONDS, + ) + updated = workspace_intents.list_workspace_intents(root=tmp_path)[0] + assert updated.lease_seconds == workspace_intents.MAX_LEASE_SECONDS + assert updated.lease_renewed_at_utc != record.lease_renewed_at_utc + + +def test_workspace_intent_max_lease_seconds_ceiling() -> None: + """MAX_LEASE_SECONDS is 600 (10 minutes), not 3600.""" + assert workspace_intents.MAX_LEASE_SECONDS == 600 + assert workspace_intents.resolved_lease_seconds(9999) == 600 + assert workspace_intents.resolved_lease_seconds(60) == 60 From 14a2b13358d1b75d5e5cc6019fc6c12894b1cecf Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 11:07:28 +0500 Subject: [PATCH 028/318] feat(controller): add audit trail and rich session stats --- codeclone/audit/__init__.py | 79 ++++++ codeclone/audit/events.py | 268 ++++++++++++++++++ codeclone/audit/reader.py | 161 +++++++++++ codeclone/audit/schema.py | 116 ++++++++ codeclone/audit/validation.py | 174 ++++++++++++ codeclone/audit/writer.py | 164 +++++++++++ codeclone/config/spec.py | 37 +++ codeclone/surfaces/cli/audit.py | 233 +++++++++++++++ codeclone/surfaces/cli/session_stats.py | 137 ++++++++- codeclone/surfaces/cli/types.py | 5 + codeclone/surfaces/cli/workflow.py | 68 ++++- .../mcp/_session_claim_guard_mixin.py | 14 +- .../surfaces/mcp/_session_intent_mixin.py | 129 ++++++++- .../mcp/_session_patch_contract_mixin.py | 62 +++- .../mcp/_session_review_receipt_mixin.py | 24 +- codeclone/surfaces/mcp/session.py | 99 ++++++- codeclone/ui_messages/__init__.py | 4 + docs/book/04-config-and-defaults.md | 9 + pyproject.toml | 4 + .../fixtures/contract_snapshots/cli_help.txt | 9 +- tests/test_audit_schema.py | 85 ++++++ tests/test_audit_writer.py | 117 ++++++++ tests/test_cli_audit.py | 143 ++++++++++ tests/test_cli_session_stats.py | 29 ++ tests/test_mcp_service.py | 55 ++++ 25 files changed, 2194 insertions(+), 31 deletions(-) create mode 100644 codeclone/audit/__init__.py create mode 100644 codeclone/audit/events.py create mode 100644 codeclone/audit/reader.py create mode 100644 codeclone/audit/schema.py create mode 100644 codeclone/audit/validation.py create mode 100644 codeclone/audit/writer.py create mode 100644 codeclone/surfaces/cli/audit.py create mode 100644 tests/test_audit_schema.py create mode 100644 tests/test_audit_writer.py create mode 100644 tests/test_cli_audit.py diff --git a/codeclone/audit/__init__.py b/codeclone/audit/__init__.py new file mode 100644 index 00000000..edb30974 --- /dev/null +++ b/codeclone/audit/__init__.py @@ -0,0 +1,79 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from .events import ( + EVENT_BASELINE_ABUSE, + EVENT_BLAST_RADIUS, + EVENT_CLAIM_COMPLETED, + EVENT_CLAIM_VIOLATED, + EVENT_INTENT_CHECKED, + EVENT_INTENT_CLEARED, + EVENT_INTENT_DECLARED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_EXPIRED, + EVENT_INTENT_RENEWED, + EVENT_INTENT_VIOLATED, + EVENT_PATCH_BUDGET, + EVENT_PATCH_EXPIRED, + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, + EVENT_RECEIPT_CREATED, + EVENT_WORKSPACE_CONFLICT, + EVENT_WORKSPACE_GC, + AuditEvent, + repo_root_digest, +) +from .validation import ( + DEFAULT_AUDIT_PATH, + DEFAULT_AUDIT_PAYLOADS, + DEFAULT_AUDIT_RETENTION_DAYS, + AuditConfigError, + AuditReadError, + AuditSchemaError, + AuditValidationError, + resolve_audit_path, + validate_payload_mode, + validate_retention_days, +) +from .writer import AuditWriter, NullAuditWriter, SqliteAuditWriter + +__all__ = [ + "DEFAULT_AUDIT_PATH", + "DEFAULT_AUDIT_PAYLOADS", + "DEFAULT_AUDIT_RETENTION_DAYS", + "EVENT_BASELINE_ABUSE", + "EVENT_BLAST_RADIUS", + "EVENT_CLAIM_COMPLETED", + "EVENT_CLAIM_VIOLATED", + "EVENT_INTENT_CHECKED", + "EVENT_INTENT_CLEARED", + "EVENT_INTENT_DECLARED", + "EVENT_INTENT_EXPANDED", + "EVENT_INTENT_EXPIRED", + "EVENT_INTENT_RENEWED", + "EVENT_INTENT_VIOLATED", + "EVENT_PATCH_BUDGET", + "EVENT_PATCH_EXPIRED", + "EVENT_PATCH_VERIFIED", + "EVENT_PATCH_VIOLATED", + "EVENT_RECEIPT_CREATED", + "EVENT_WORKSPACE_CONFLICT", + "EVENT_WORKSPACE_GC", + "AuditConfigError", + "AuditEvent", + "AuditReadError", + "AuditSchemaError", + "AuditValidationError", + "AuditWriter", + "NullAuditWriter", + "SqliteAuditWriter", + "repo_root_digest", + "resolve_audit_path", + "validate_payload_mode", + "validate_retention_days", +] diff --git a/codeclone/audit/events.py b/codeclone/audit/events.py new file mode 100644 index 00000000..edebb582 --- /dev/null +++ b/codeclone/audit/events.py @@ -0,0 +1,268 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import secrets +import time +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +AuditSeverity = Literal["info", "warn", "error"] +AuditPayloadMode = Literal["off", "compact", "full"] + +EVENT_INTENT_DECLARED = "intent.declared" +EVENT_INTENT_CHECKED = "intent.checked" +EVENT_INTENT_EXPANDED = "intent.expanded" +EVENT_INTENT_VIOLATED = "intent.violated" +EVENT_INTENT_CLEARED = "intent.cleared" +EVENT_INTENT_RENEWED = "intent.renewed" +EVENT_INTENT_EXPIRED = "intent.expired" +EVENT_WORKSPACE_CONFLICT = "workspace.conflict_detected" +EVENT_WORKSPACE_GC = "workspace.gc_completed" +EVENT_BLAST_RADIUS = "blast_radius.computed" +EVENT_PATCH_BUDGET = "patch_budget.computed" +EVENT_PATCH_VERIFIED = "patch_contract.verified" +EVENT_PATCH_VIOLATED = "patch_contract.violated" +EVENT_PATCH_EXPIRED = "patch_contract.expired" +EVENT_CLAIM_COMPLETED = "claim_validation.completed" +EVENT_CLAIM_VIOLATED = "claim_validation.violated" +EVENT_RECEIPT_CREATED = "review_receipt.created" +EVENT_BASELINE_ABUSE = "baseline_abuse.detected" + +KNOWN_EVENT_TYPES = frozenset( + { + EVENT_INTENT_DECLARED, + EVENT_INTENT_CHECKED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_VIOLATED, + EVENT_INTENT_CLEARED, + EVENT_INTENT_RENEWED, + EVENT_INTENT_EXPIRED, + EVENT_WORKSPACE_CONFLICT, + EVENT_WORKSPACE_GC, + EVENT_BLAST_RADIUS, + EVENT_PATCH_BUDGET, + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, + EVENT_PATCH_EXPIRED, + EVENT_CLAIM_COMPLETED, + EVENT_CLAIM_VIOLATED, + EVENT_RECEIPT_CREATED, + EVENT_BASELINE_ABUSE, + } +) + +PAYLOAD_MODES = frozenset({"off", "compact", "full"}) + + +@dataclass(frozen=True, slots=True) +class AuditEvent: + event_type: str + severity: AuditSeverity + repo_root_digest: str + agent_pid: int + agent_label: str + run_id: str | None = None + intent_id: str | None = None + report_digest: str | None = None + status: str | None = None + payload: Mapping[str, object] | None = None + + +def generate_event_id() -> str: + timestamp = format(int(time.time() * 1000), "x") + return f"evt_{timestamp}_{secrets.token_hex(2)}" + + +def repo_root_digest(root_path: Path) -> str: + return hashlib.sha256(str(root_path).encode("utf-8")).hexdigest()[:16] + + +def compact_payload_for_event( + *, + event_type: str, + payload: Mapping[str, object] | None, +) -> dict[str, object]: + if payload is None: + return {} + if event_type in { + EVENT_INTENT_DECLARED, + EVENT_INTENT_RENEWED, + EVENT_INTENT_EXPIRED, + }: + return _compact_intent_payload(payload) + if event_type in { + EVENT_INTENT_CHECKED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_VIOLATED, + }: + return _compact_check_payload(payload) + if event_type == EVENT_INTENT_CLEARED: + return { + "cleared": _int_value(payload.get("cleared")), + "workspace_cleared": bool(payload.get("workspace_cleared")), + } + if event_type == EVENT_WORKSPACE_CONFLICT: + return { + "concurrent_intents": _sequence_field_count( + payload, + "concurrent_intents", + ) + } + if event_type == EVENT_WORKSPACE_GC: + return { + "removed": _int_value(payload.get("removed")), + "stale_count": _int_value(payload.get("stale_count")), + "orphaned_count": _int_value(payload.get("orphaned_count")), + } + if event_type == EVENT_BLAST_RADIUS: + return _compact_blast_radius_payload(payload) + if event_type == EVENT_PATCH_BUDGET: + return _compact_budget_payload(payload) + if event_type in { + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, + EVENT_PATCH_EXPIRED, + EVENT_BASELINE_ABUSE, + }: + return _compact_verify_payload(payload) + if event_type in {EVENT_CLAIM_COMPLETED, EVENT_CLAIM_VIOLATED}: + return { + "valid": bool(payload.get("valid")), + "violations": len(_sequence(payload.get("violations"))), + "warnings": len(_sequence(payload.get("warnings"))), + } + if event_type == EVENT_RECEIPT_CREATED: + receipt = _mapping(payload.get("receipt")) + return { + "format": str(payload.get("format", "")), + "verdict": str(receipt.get("verdict", "")), + "human_decisions": _sequence_field_count( + receipt, + "human_decision_points", + ), + } + return _compact_identifiers(payload) + + +def _compact_intent_payload(payload: Mapping[str, object]) -> dict[str, object]: + scope = _mapping(payload.get("scope")) + allowed = _sequence(scope.get("allowed_files")) + return { + "scope_file_count": len(allowed), + "concurrent_intents": len(_sequence(payload.get("concurrent_intents"))), + "workspace_registered": bool(payload.get("workspace_registered")), + "ttl_seconds": _int_value(payload.get("ttl_seconds")), + "lease_seconds": _int_value(payload.get("lease_seconds")), + } + + +def _compact_check_payload(payload: Mapping[str, object]) -> dict[str, object]: + return { + "status": str(payload.get("status", "")), + "unexpected_files": len(_sequence(payload.get("unexpected_files"))), + "forbidden_touched": len(_sequence(payload.get("forbidden_touched"))), + } + + +def _compact_blast_radius_payload(payload: Mapping[str, object]) -> dict[str, object]: + structural_risk = _mapping(payload.get("structural_risk")) + return { + "radius_level": str(payload.get("radius_level", "")), + "direct_dependents": len(_sequence(payload.get("direct_dependents"))), + "clone_cohort_members": len(_sequence(payload.get("clone_cohort_members"))), + "do_not_touch": len(_sequence(payload.get("do_not_touch"))), + "review_context": len(_sequence(payload.get("review_context"))), + "risk_keys": sorted(str(key) for key in structural_risk), + } + + +def _compact_budget_payload(payload: Mapping[str, object]) -> dict[str, object]: + blast = _mapping(payload.get("blast_radius_summary")) + gate = _mapping(payload.get("gate_preview")) + return { + "strictness": str(payload.get("strictness", "")), + "radius_level": str(blast.get("radius_level", "")), + "do_not_touch_count": _int_value(blast.get("do_not_touch_count")), + "review_context_count": _int_value(blast.get("review_context_count")), + "gate_would_fail": bool(gate.get("would_fail")), + } + + +def _compact_verify_payload(payload: Mapping[str, object]) -> dict[str, object]: + delta = _mapping(payload.get("structural_delta")) + baseline_abuse = _mapping(payload.get("baseline_abuse")) + return { + "status": str(payload.get("status", "")), + "regressions": len(_sequence(delta.get("regressions"))), + "improvements": len(_sequence(delta.get("improvements"))), + "health_delta": _int_or_none(delta.get("health_delta")), + "contract_violations": [ + str(item) for item in _sequence(payload.get("contract_violations")) + ], + "baseline_abuse": bool(baseline_abuse.get("detected")), + } + + +def _compact_identifiers(payload: Mapping[str, object]) -> dict[str, object]: + keys = ("mode", "status", "reason", "run_id", "intent_id") + return {key: payload[key] for key in keys if key in payload} + + +def _sequence_field_count(payload: Mapping[str, object], key: str) -> int: + return len(_sequence(payload.get(key))) + + +def _mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _sequence(value: object) -> Sequence[object]: + if isinstance(value, str): + return () + return value if isinstance(value, Sequence) else () + + +def _int_value(value: object) -> int: + return value if isinstance(value, int) and not isinstance(value, bool) else 0 + + +def _int_or_none(value: object) -> int | None: + return value if isinstance(value, int) and not isinstance(value, bool) else None + + +__all__ = [ + "EVENT_BASELINE_ABUSE", + "EVENT_BLAST_RADIUS", + "EVENT_CLAIM_COMPLETED", + "EVENT_CLAIM_VIOLATED", + "EVENT_INTENT_CHECKED", + "EVENT_INTENT_CLEARED", + "EVENT_INTENT_DECLARED", + "EVENT_INTENT_EXPANDED", + "EVENT_INTENT_EXPIRED", + "EVENT_INTENT_RENEWED", + "EVENT_INTENT_VIOLATED", + "EVENT_PATCH_BUDGET", + "EVENT_PATCH_EXPIRED", + "EVENT_PATCH_VERIFIED", + "EVENT_PATCH_VIOLATED", + "EVENT_RECEIPT_CREATED", + "EVENT_WORKSPACE_CONFLICT", + "EVENT_WORKSPACE_GC", + "KNOWN_EVENT_TYPES", + "PAYLOAD_MODES", + "AuditEvent", + "AuditPayloadMode", + "AuditSeverity", + "compact_payload_for_event", + "generate_event_id", + "repo_root_digest", +] diff --git a/codeclone/audit/reader.py b/codeclone/audit/reader.py new file mode 100644 index 00000000..f36393ab --- /dev/null +++ b/codeclone/audit/reader.py @@ -0,0 +1,161 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from dataclasses import dataclass +from pathlib import Path + +from .schema import ensure_schema, get_meta +from .validation import AuditReadError, AuditSchemaError + + +@dataclass(frozen=True, slots=True) +class AuditRecord: + event_id: str + event_type: str + severity: str + created_at_utc: str + run_id: str | None + intent_id: str | None + status: str | None + agent_label: str + + +@dataclass(frozen=True, slots=True) +class AuditSummary: + db_path: Path + db_size_bytes: int + retention_days: int | None + total_events: int + intent_events: int + contract_events: int + receipt_events: int + violation_events: int + oldest_event_utc: str | None + latest_event_utc: str | None + events: tuple[AuditRecord, ...] + + +def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: + if not db_path.is_file(): + raise AuditReadError("no audit data") + try: + conn = sqlite3.connect(str(db_path)) + except sqlite3.Error as exc: + raise AuditReadError(f"cannot open audit database: {exc}") from exc + try: + ensure_schema(conn) + retention_days = _int_meta(conn, "retention_days") + total = _count(conn, "SELECT COUNT(*) FROM controller_events") + intent_events = _count( + conn, + "SELECT COUNT(*) FROM controller_events WHERE event_type LIKE 'intent.%'", + ) + contract_events = _count( + conn, + "SELECT COUNT(*) FROM controller_events " + "WHERE event_type IN (" + "'patch_budget.computed'," + "'patch_contract.verified'," + "'patch_contract.violated'," + "'patch_contract.expired'" + ")", + ) + receipt_events = _count( + conn, + "SELECT COUNT(*) FROM controller_events " + "WHERE event_type = 'review_receipt.created'", + ) + violation_events = _count( + conn, + "SELECT COUNT(*) FROM controller_events " + "WHERE severity IN ('warn', 'error')", + ) + oldest = _text_scalar(conn, "SELECT MIN(created_at_utc) FROM controller_events") + latest = _text_scalar(conn, "SELECT MAX(created_at_utc) FROM controller_events") + rows = conn.execute( + "SELECT event_id, event_type, severity, created_at_utc, run_id, " + "intent_id, status, agent_label " + "FROM controller_events " + "ORDER BY created_at_utc DESC, id DESC " + "LIMIT ?", + (max(1, int(limit)),), + ).fetchall() + except (sqlite3.Error, AuditSchemaError) as exc: + raise AuditReadError(f"cannot read audit database: {exc}") from exc + finally: + conn.close() + return AuditSummary( + db_path=db_path, + db_size_bytes=_db_size(db_path), + retention_days=retention_days, + total_events=total, + intent_events=intent_events, + contract_events=contract_events, + receipt_events=receipt_events, + violation_events=violation_events, + oldest_event_utc=oldest, + latest_event_utc=latest, + events=tuple(_record_from_row(row) for row in rows), + ) + + +def _record_from_row(row: tuple[object, ...]) -> AuditRecord: + return AuditRecord( + event_id=_str_or_empty(row[0]), + event_type=_str_or_empty(row[1]), + severity=_str_or_empty(row[2]), + created_at_utc=_str_or_empty(row[3]), + run_id=_str_or_none(row[4]), + intent_id=_str_or_none(row[5]), + status=_str_or_none(row[6]), + agent_label=_str_or_empty(row[7]), + ) + + +def _count(conn: sqlite3.Connection, sql: str) -> int: + value = conn.execute(sql).fetchone() + if value is None: + return 0 + item = value[0] + return item if isinstance(item, int) else 0 + + +def _text_scalar(conn: sqlite3.Connection, sql: str) -> str | None: + row = conn.execute(sql).fetchone() + if row is None: + return None + return _str_or_none(row[0]) + + +def _int_meta(conn: sqlite3.Connection, key: str) -> int | None: + value = get_meta(conn, key) + if value is None: + return None + try: + return int(value) + except ValueError: + return None + + +def _db_size(path: Path) -> int: + try: + return path.stat().st_size + except OSError: + return 0 + + +def _str_or_empty(value: object) -> str: + return value if isinstance(value, str) else "" + + +def _str_or_none(value: object) -> str | None: + return value if isinstance(value, str) else None + + +__all__ = ["AuditRecord", "AuditSummary", "read_audit_summary"] diff --git a/codeclone/audit/schema.py b/codeclone/audit/schema.py new file mode 100644 index 00000000..7b00394b --- /dev/null +++ b/codeclone/audit/schema.py @@ -0,0 +1,116 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +from .. import __version__ +from ..report.meta import current_report_timestamp_utc +from .validation import AUDIT_SCHEMA_VERSION, AuditSchemaError + +_CREATE_EVENTS_SQL = """ +CREATE TABLE IF NOT EXISTS controller_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + event_id TEXT NOT NULL UNIQUE, + event_type TEXT NOT NULL, + severity TEXT NOT NULL DEFAULT 'info', + created_at_utc TEXT NOT NULL, + + repo_root_digest TEXT NOT NULL, + run_id TEXT, + intent_id TEXT, + report_digest TEXT, + agent_label TEXT NOT NULL DEFAULT '', + agent_pid INTEGER NOT NULL, + + status TEXT, + payload_json TEXT NOT NULL DEFAULT '{}' +) +""" + +_CREATE_META_SQL = """ +CREATE TABLE IF NOT EXISTS audit_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +) +""" + +_INDEX_SQL = ( + "CREATE INDEX IF NOT EXISTS idx_events_intent ON controller_events(intent_id)", + "CREATE INDEX IF NOT EXISTS idx_events_run ON controller_events(run_id)", + "CREATE INDEX IF NOT EXISTS idx_events_type_time " + "ON controller_events(event_type, created_at_utc)", + "CREATE INDEX IF NOT EXISTS idx_events_created " + "ON controller_events(created_at_utc)", +) + + +def open_audit_db(path: Path) -> sqlite3.Connection: + path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(path), isolation_level="DEFERRED", timeout=5.0) + try: + conn.execute("PRAGMA journal_mode=WAL") + conn.execute("PRAGMA synchronous=NORMAL") + conn.execute("PRAGMA foreign_keys=OFF") + conn.execute("PRAGMA busy_timeout=5000") + ensure_schema(conn) + except Exception: + conn.close() + raise + return conn + + +def ensure_schema(conn: sqlite3.Connection) -> None: + current = get_meta(conn, "schema_version") + if current is None: + create_schema_v1(conn) + return + if current == AUDIT_SCHEMA_VERSION: + return + raise AuditSchemaError(f"Unsupported audit schema version: {current}") + + +def create_schema_v1(conn: sqlite3.Connection) -> None: + conn.execute(_CREATE_EVENTS_SQL) + conn.execute(_CREATE_META_SQL) + for statement in _INDEX_SQL: + conn.execute(statement) + now = current_report_timestamp_utc() + seed_meta = { + "schema_version": AUDIT_SCHEMA_VERSION, + "generator": "codeclone", + "codeclone_version": __version__, + "created_at_utc": now, + } + conn.executemany( + "INSERT OR IGNORE INTO audit_meta(key, value) VALUES (?, ?)", + sorted(seed_meta.items()), + ) + conn.commit() + + +def get_meta(conn: sqlite3.Connection, key: str) -> str | None: + try: + row = conn.execute( + "SELECT value FROM audit_meta WHERE key = ?", + (key,), + ).fetchone() + except sqlite3.OperationalError: + return None + if row is None: + return None + value = row[0] + return value if isinstance(value, str) else None + + +__all__ = [ + "create_schema_v1", + "ensure_schema", + "get_meta", + "open_audit_db", +] diff --git a/codeclone/audit/validation.py b/codeclone/audit/validation.py new file mode 100644 index 00000000..8adf2748 --- /dev/null +++ b/codeclone/audit/validation.py @@ -0,0 +1,174 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from .events import KNOWN_EVENT_TYPES, PAYLOAD_MODES, AuditPayloadMode, AuditSeverity + +AUDIT_SCHEMA_VERSION = "1" +DEFAULT_AUDIT_PATH = ".cache/codeclone/audit.sqlite3" +DEFAULT_AUDIT_PAYLOADS: AuditPayloadMode = "compact" +DEFAULT_AUDIT_RETENTION_DAYS = 30 +MIN_AUDIT_RETENTION_DAYS = 1 +MAX_AUDIT_RETENTION_DAYS = 365 + +_VALID_AUDIT_SUFFIXES = frozenset({".sqlite3", ".db"}) +_MAX_EVENT_ID_LEN = 48 +_MAX_EVENT_TYPE_LEN = 64 +_MAX_SEVERITY_LEN = 8 +_MAX_TIMESTAMP_LEN = 40 +_MAX_DIGEST_LEN = 128 +_MAX_RUN_ID_LEN = 128 +_MAX_INTENT_ID_LEN = 128 +_MAX_AGENT_LABEL_LEN = 128 +_MAX_STATUS_LEN = 32 +MAX_PAYLOAD_JSON_LEN = 262_144 + + +class AuditConfigError(ValueError): + """Raised for invalid audit configuration.""" + + +class AuditValidationError(ValueError): + """Raised when an audit event row violates the storage contract.""" + + +class AuditSchemaError(RuntimeError): + """Raised for unsupported or corrupt audit database schemas.""" + + +class AuditReadError(RuntimeError): + """Raised when a CLI audit read cannot be completed safely.""" + + +@dataclass(frozen=True, slots=True) +class EventRow: + event_id: str + event_type: str + severity: AuditSeverity + created_at_utc: str + repo_root_digest: str + run_id: str | None + intent_id: str | None + report_digest: str | None + agent_label: str + agent_pid: int + status: str | None + payload_json: str + + def as_tuple(self) -> tuple[object, ...]: + return ( + self.event_id, + self.event_type, + self.severity, + self.created_at_utc, + self.repo_root_digest, + self.run_id, + self.intent_id, + self.report_digest, + self.agent_label, + self.agent_pid, + self.status, + self.payload_json, + ) + + +def resolve_audit_path(*, root_path: Path, value: object) -> Path: + if not isinstance(value, str): + raise AuditConfigError("audit_path must be a string") + raw = value.strip() + if not raw: + raise AuditConfigError("audit_path must not be empty") + path = Path(raw).expanduser() + if path.is_absolute(): + raise AuditConfigError("audit_path must be relative to the repository root") + if any(part in {"", ".", ".."} for part in path.parts): + raise AuditConfigError("audit_path must not contain empty, '.', or '..' parts") + if path.suffix not in _VALID_AUDIT_SUFFIXES: + raise AuditConfigError("audit_path must end with .sqlite3 or .db") + return root_path / path + + +def validate_payload_mode(value: object) -> AuditPayloadMode: + if value not in PAYLOAD_MODES: + expected = ", ".join(sorted(PAYLOAD_MODES)) + raise AuditConfigError(f"audit_payloads must be one of: {expected}") + if value == "off": + return "off" + if value == "full": + return "full" + return "compact" + + +def validate_retention_days(value: object) -> int: + if not isinstance(value, int) or isinstance(value, bool): + raise AuditConfigError("audit_retention_days must be an integer") + if not MIN_AUDIT_RETENTION_DAYS <= value <= MAX_AUDIT_RETENTION_DAYS: + raise AuditConfigError( + "audit_retention_days must be between " + f"{MIN_AUDIT_RETENTION_DAYS} and {MAX_AUDIT_RETENTION_DAYS}" + ) + return value + + +def validate_event_row(row: EventRow) -> None: + _validate_text(row.event_id, "event_id", max_len=_MAX_EVENT_ID_LEN) + _validate_text(row.event_type, "event_type", max_len=_MAX_EVENT_TYPE_LEN) + if row.event_type not in KNOWN_EVENT_TYPES: + raise AuditValidationError(f"unknown event_type: {row.event_type}") + _validate_text(row.severity, "severity", max_len=_MAX_SEVERITY_LEN) + if row.severity not in {"info", "warn", "error"}: + raise AuditValidationError(f"invalid severity: {row.severity}") + _validate_text(row.created_at_utc, "created_at_utc", max_len=_MAX_TIMESTAMP_LEN) + _validate_text(row.repo_root_digest, "repo_root_digest", max_len=_MAX_DIGEST_LEN) + _validate_optional_text(row.run_id, "run_id", max_len=_MAX_RUN_ID_LEN) + _validate_optional_text(row.intent_id, "intent_id", max_len=_MAX_INTENT_ID_LEN) + _validate_optional_text(row.report_digest, "report_digest", max_len=_MAX_DIGEST_LEN) + _validate_text(row.agent_label, "agent_label", max_len=_MAX_AGENT_LABEL_LEN) + if not isinstance(row.agent_pid, int) or isinstance(row.agent_pid, bool): + raise AuditValidationError("agent_pid must be an integer") + if row.agent_pid <= 0: + raise AuditValidationError("agent_pid must be positive") + _validate_optional_text(row.status, "status", max_len=_MAX_STATUS_LEN) + _validate_text(row.payload_json, "payload_json", max_len=MAX_PAYLOAD_JSON_LEN) + + +def _validate_optional_text(value: str | None, field: str, *, max_len: int) -> None: + if value is None: + return + _validate_text(value, field, max_len=max_len) + + +def _validate_text(value: str, field: str, *, max_len: int) -> None: + if not isinstance(value, str): + raise AuditValidationError(f"{field} must be a string") + if not value and field not in {"agent_label", "payload_json"}: + raise AuditValidationError(f"{field} must not be empty") + if len(value) > max_len: + raise AuditValidationError(f"{field} too long") + if "\x00" in value: + raise AuditValidationError(f"{field} contains NUL byte") + + +__all__ = [ + "AUDIT_SCHEMA_VERSION", + "DEFAULT_AUDIT_PATH", + "DEFAULT_AUDIT_PAYLOADS", + "DEFAULT_AUDIT_RETENTION_DAYS", + "MAX_PAYLOAD_JSON_LEN", + "AuditConfigError", + "AuditReadError", + "AuditSchemaError", + "AuditValidationError", + "EventRow", + "resolve_audit_path", + "validate_event_row", + "validate_payload_mode", + "validate_retention_days", +] diff --git a/codeclone/audit/writer.py b/codeclone/audit/writer.py new file mode 100644 index 00000000..0665037f --- /dev/null +++ b/codeclone/audit/writer.py @@ -0,0 +1,164 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import threading +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Protocol + +from ..report.meta import current_report_timestamp_utc +from .events import ( + AuditEvent, + AuditPayloadMode, + compact_payload_for_event, + generate_event_id, +) +from .schema import open_audit_db +from .validation import EventRow, validate_event_row + +_INSERT_SQL = """ +INSERT INTO controller_events( + event_id, + event_type, + severity, + created_at_utc, + repo_root_digest, + run_id, + intent_id, + report_digest, + agent_label, + agent_pid, + status, + payload_json +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +""" + + +class AuditWriter(Protocol): + def emit(self, event: AuditEvent) -> None: ... + def close(self) -> None: ... + + +class NullAuditWriter: + def emit(self, event: AuditEvent) -> None: + return None + + def close(self) -> None: + return None + + +class SqliteAuditWriter: + def __init__( + self, + *, + db_path: Path, + payloads: AuditPayloadMode, + retention_days: int, + ) -> None: + self._conn = open_audit_db(db_path) + self._payloads = payloads + self._retention_days = retention_days + self._lock = threading.Lock() + self._closed = False + self._gc_counter = 0 + self._gc_interval = 100 + self._conn.execute( + "INSERT OR REPLACE INTO audit_meta(key, value) VALUES (?, ?)", + ("retention_days", str(retention_days)), + ) + self._conn.commit() + + def emit(self, event: AuditEvent) -> None: + try: + self._emit_impl(event) + except Exception: + return None + + def close(self) -> None: + with self._lock: + if self._closed: + return + try: + self._run_retention_gc() + finally: + self._conn.close() + self._closed = True + + def _emit_impl(self, event: AuditEvent) -> None: + row = event_to_row(event=event, payloads=self._payloads) + validate_event_row(row) + with self._lock: + if self._closed: + return + self._conn.execute(_INSERT_SQL, row.as_tuple()) + self._conn.commit() + self._gc_counter += 1 + if self._gc_counter >= self._gc_interval: + self._run_retention_gc() + self._gc_counter = 0 + + def _run_retention_gc(self) -> None: + cutoff = datetime.now(timezone.utc) - timedelta(days=self._retention_days) + cutoff_text = cutoff.replace(microsecond=0).isoformat().replace("+00:00", "Z") + self._conn.execute( + "DELETE FROM controller_events WHERE created_at_utc < ?", + (cutoff_text,), + ) + self._conn.commit() + + +def event_to_row(*, event: AuditEvent, payloads: AuditPayloadMode) -> EventRow: + payload_json = _payload_json(event=event, payloads=payloads) + return EventRow( + event_id=generate_event_id(), + event_type=event.event_type, + severity=event.severity, + created_at_utc=current_report_timestamp_utc(), + repo_root_digest=event.repo_root_digest, + run_id=event.run_id, + intent_id=event.intent_id, + report_digest=event.report_digest, + agent_label=event.agent_label, + agent_pid=event.agent_pid, + status=event.status, + payload_json=payload_json, + ) + + +def _payload_json(*, event: AuditEvent, payloads: AuditPayloadMode) -> str: + if payloads == "off": + return "{}" + payload = ( + event.payload + if payloads == "full" + else compact_payload_for_event( + event_type=event.event_type, + payload=event.payload, + ) + ) + if payload is None: + return "{}" + try: + return json.dumps( + payload, + sort_keys=True, + separators=(",", ":"), + ensure_ascii=True, + default=str, + ) + except (TypeError, ValueError): + return "{}" + + +__all__ = [ + "AuditWriter", + "NullAuditWriter", + "SqliteAuditWriter", + "event_to_row", +] diff --git a/codeclone/config/spec.py b/codeclone/config/spec.py index 95f20f07..96baf02e 100644 --- a/codeclone/config/spec.py +++ b/codeclone/config/spec.py @@ -4,6 +4,11 @@ from typing import Final, Literal from .. import ui_messages as ui +from ..audit.validation import ( + DEFAULT_AUDIT_PATH, + DEFAULT_AUDIT_PAYLOADS, + DEFAULT_AUDIT_RETENTION_DAYS, +) from ..contracts import ( DEFAULT_BASELINE_PATH, DEFAULT_BLOCK_MIN_LOC, @@ -266,6 +271,38 @@ def _option( default=False, help_text=ui.HELP_SESSION_STATS, ), + _option( + dest="audit", + group="Analysis", + cli_kind="store_true", + flags=("--audit",), + default=False, + help_text=ui.HELP_AUDIT, + ), + _option( + dest="audit_enabled", + group=None, + default=False, + pyproject_type=bool, + ), + _option( + dest="audit_path", + group=None, + default=DEFAULT_AUDIT_PATH, + pyproject_type=str, + ), + _option( + dest="audit_payloads", + group=None, + default=DEFAULT_AUDIT_PAYLOADS, + pyproject_type=str, + ), + _option( + dest="audit_retention_days", + group=None, + default=DEFAULT_AUDIT_RETENTION_DAYS, + pyproject_type=int, + ), _option( dest="cache_path", group="Analysis", diff --git a/codeclone/surfaces/cli/audit.py b/codeclone/surfaces/cli/audit.py new file mode 100644 index 00000000..6a38793c --- /dev/null +++ b/codeclone/surfaces/cli/audit.py @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from datetime import datetime, timezone +from pathlib import Path + +from ... import ui_messages as ui +from ...audit.reader import AuditSummary, read_audit_summary +from ...audit.validation import AuditConfigError, AuditReadError, resolve_audit_path +from ...contracts import ExitCode +from .types import PrinterLike + + +def render_audit( + *, + console: PrinterLike, + root_path: Path, + audit_enabled: bool, + audit_path: str, + quiet: bool, +) -> int: + if not audit_enabled: + console.print(ui.fmt_contract_error("audit is not enabled.")) + return int(ExitCode.CONTRACT_ERROR) + try: + db_path = resolve_audit_path(root_path=root_path, value=audit_path) + summary = read_audit_summary(db_path=db_path, limit=50) + except (AuditConfigError, AuditReadError) as exc: + console.print(ui.fmt_contract_error(str(exc))) + return int(ExitCode.CONTRACT_ERROR) + except Exception as exc: + console.print(ui.fmt_internal_error(exc)) + return int(ExitCode.INTERNAL_ERROR) + if quiet: + return _render_quiet(console=console, summary=summary) + return _render_verbose(console=console, summary=summary) + + +def _render_quiet(*, console: PrinterLike, summary: AuditSummary) -> int: + console.print( + "audit: " + f"{summary.total_events} events | " + f"intents={summary.intent_events} " + f"contracts={summary.contract_events} " + f"receipts={summary.receipt_events} " + f"violations={summary.violation_events} " + f"last={_relative_time(summary.latest_event_utc)}" + ) + return int(ExitCode.SUCCESS) + + +def _render_verbose(*, console: PrinterLike, summary: AuditSummary) -> int: + if _supports_rich(console): + return _render_verbose_rich(console=console, summary=summary) + + console.print("[bold]╍╍╍ Controller Audit Trail ╍╍╍[/bold]") + console.print() + console.print(f" Database: {summary.db_path} ({summary.total_events} events)") + if summary.retention_days is not None: + console.print(f" Retention: {summary.retention_days} days") + console.print(f" Oldest event: {summary.oldest_event_utc or 'none'}") + console.print(f" Latest event: {summary.latest_event_utc or 'none'}") + console.print() + for event in summary.events: + console.print( + " " + f"{_short_time(event.created_at_utc):<16} " + f"{_short_type(event.event_type):<10} " + f"{event.intent_id or '-':<24} " + f"{event.status or '-':<10} " + f"{event.run_id or '-'}" + ) + console.print() + console.print( + " Summary: " + f"{summary.intent_events} intents, " + f"{summary.contract_events} contracts, " + f"{summary.receipt_events} receipts" + ) + console.print(f" Violations: {summary.violation_events}") + return int(ExitCode.SUCCESS) + + +def _render_verbose_rich(*, console: PrinterLike, summary: AuditSummary) -> int: + from rich import box + from rich.panel import Panel + from rich.rule import Rule + from rich.table import Table + from rich.text import Text + + console.print(Rule("Controller Audit Trail", style="dim", characters="─")) + + meta = Table.grid(padding=(0, 2)) + meta.add_column(style="dim", no_wrap=True) + meta.add_column() + meta.add_row( + "Database", + f"{summary.db_path} ({summary.total_events} events, " + f"{_format_bytes(summary.db_size_bytes)})", + ) + if summary.retention_days is not None: + meta.add_row("Retention", f"{summary.retention_days} days") + meta.add_row("Oldest event", summary.oldest_event_utc or "none") + meta.add_row("Latest event", summary.latest_event_utc or "none") + meta.add_row( + "Summary", + ( + f"{summary.intent_events} intents, " + f"{summary.contract_events} contracts, " + f"{summary.receipt_events} receipts" + ), + ) + meta.add_row( + "Violations", + Text( + str(summary.violation_events), + style="red" if summary.violation_events else "green", + ), + ) + console.print(Panel(meta, border_style="cyan")) + + table = Table(box=box.SIMPLE_HEAVY, expand=True) + table.add_column("Time", no_wrap=True) + table.add_column("Type", no_wrap=True) + table.add_column("Severity", no_wrap=True) + table.add_column("Intent", overflow="fold") + table.add_column("Status", no_wrap=True) + table.add_column("Run", no_wrap=True) + table.add_column("Agent", overflow="fold") + for event in summary.events: + table.add_row( + _short_time(event.created_at_utc), + _short_type(event.event_type), + Text(event.severity, style=_severity_style(event.severity)), + event.intent_id or "-", + event.status or "-", + _short_run(event.run_id), + event.agent_label or "-", + ) + console.print(table) + return int(ExitCode.SUCCESS) + + +def _supports_rich(console: PrinterLike) -> bool: + return console.__class__.__module__.startswith("rich.") + + +def _short_type(event_type: str) -> str: + aliases = { + "intent.declared": "decl", + "intent.checked": "check", + "intent.expanded": "expand", + "intent.violated": "intent!", + "intent.cleared": "clear", + "intent.renewed": "renew", + "blast_radius.computed": "radius", + "patch_budget.computed": "budget", + "patch_contract.verified": "verify", + "patch_contract.violated": "verify!", + "patch_contract.expired": "expired", + "claim_validation.completed": "claims", + "claim_validation.violated": "claims!", + "review_receipt.created": "receipt", + "baseline_abuse.detected": "baseline!", + "workspace.conflict_detected": "conflict", + "workspace.gc_completed": "gc", + } + return aliases.get(event_type, event_type.rsplit(".", maxsplit=1)[-1]) + + +def _short_run(run_id: str | None) -> str: + return run_id[:8] if run_id else "-" + + +def _short_time(value: str) -> str: + parsed = _parse_utc(value) + if parsed is None: + return value or "-" + now = datetime.now(timezone.utc) + if parsed.date() == now.date(): + return parsed.strftime("%H:%M today") + return parsed.strftime("%Y-%m-%d %H:%M") + + +def _relative_time(value: str | None) -> str: + parsed = _parse_utc(value or "") + if parsed is None: + return "none" + seconds = max(0, int((datetime.now(timezone.utc) - parsed).total_seconds())) + if seconds < 60: + return f"{seconds}s ago" + minutes = seconds // 60 + if minutes < 60: + return f"{minutes}m ago" + hours = minutes // 60 + if hours < 24: + return f"{hours}h ago" + return f"{hours // 24}d ago" + + +def _parse_utc(value: str) -> datetime | None: + if not value: + return None + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone( + timezone.utc + ) + except ValueError: + return None + + +def _format_bytes(value: int) -> str: + if value < 1024: + return f"{value} B" + kib = value / 1024 + if kib < 1024: + return f"{kib:.1f} KiB" + return f"{kib / 1024:.1f} MiB" + + +def _severity_style(value: str) -> str: + return {"info": "green", "warn": "yellow", "error": "bold red"}.get( + value, + "white", + ) + + +__all__ = ["render_audit"] diff --git a/codeclone/surfaces/cli/session_stats.py b/codeclone/surfaces/cli/session_stats.py index b109e43d..1a7bcf85 100644 --- a/codeclone/surfaces/cli/session_stats.py +++ b/codeclone/surfaces/cli/session_stats.py @@ -14,7 +14,7 @@ from dataclasses import dataclass from datetime import datetime from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from ...contracts import ExitCode from .types import PrinterLike @@ -206,6 +206,8 @@ def _render_quiet(console: PrinterLike, snapshot: _SessionSnapshot) -> int: def _render_verbose(console: PrinterLike, snapshot: _SessionSnapshot) -> int: + if _supports_rich(console): + return _render_verbose_rich(console, snapshot) console.print("[bold]╍╍╍ Session Stats ╍╍╍[/bold]") console.print() console.print(f" Workspace: {snapshot.root}") @@ -268,6 +270,139 @@ def _render_verbose(console: PrinterLike, snapshot: _SessionSnapshot) -> int: return int(ExitCode.SUCCESS) +def _render_verbose_rich(console: PrinterLike, snapshot: _SessionSnapshot) -> int: + box, panel_cls, rule_cls, table_cls, text_cls = _rich_session_symbols() + + console.print(rule_cls("Session Stats", style="dim", characters="─")) + + summary = table_cls.grid(padding=(0, 2)) + summary.add_column(style="dim", no_wrap=True) + summary.add_column() + summary.add_row("Workspace", str(snapshot.root)) + if snapshot.cache_present and snapshot.latest_run_id: + run_text = _latest_run_text(snapshot) + summary.add_row("Latest run", run_text) + if snapshot.latest_run_files is not None: + summary.add_row( + "Cache", + f"report.json present ({snapshot.latest_run_files} files)", + ) + else: + summary.add_row("Latest run", "none") + summary.add_row("Active agents", str(len([a for a in snapshot.agents if a.alive]))) + summary.add_row("Stale intents", str(snapshot.stale_count)) + summary.add_row("Expired intents", str(snapshot.expired_count)) + summary.add_row("Recoverable", str(snapshot.recoverable_count)) + health_text = text_cls( + snapshot.workspace_health, + style=_health_style(snapshot.workspace_health), + ) + summary.add_row("Workspace health", health_text) + console.print( + panel_cls(summary, border_style=_health_style(snapshot.workspace_health)) + ) + + live_agents = [agent for agent in snapshot.agents if agent.alive] + if not live_agents: + console.print("[dim]No live workspace agents found.[/dim]") + return int(ExitCode.SUCCESS) + + table = table_cls( + title="Workspace intents", + box=box.SIMPLE_HEAVY, + show_lines=False, + expand=True, + ) + table.add_column("PID", no_wrap=True, style="dim") + table.add_column("Agent", overflow="fold") + table.add_column("Ownership", no_wrap=True) + table.add_column("Status", no_wrap=True) + table.add_column("Scope", justify="right", no_wrap=True) + table.add_column("Lease", no_wrap=True) + table.add_column("Files", overflow="fold") + + for agent in live_agents: + label = agent.label or "unknown" + for intent in agent.intents: + table.add_row( + str(agent.pid), + label, + text_cls(intent.ownership, style=_ownership_style(intent.ownership)), + text_cls(intent.status, style=_intent_status_style(intent.status)), + str(intent.scope_file_count), + _format_duration(intent.lease_remaining_seconds), + _allowed_files_label(intent.allowed_files), + ) + console.print(table) + return int(ExitCode.SUCCESS) + + +def _rich_session_symbols() -> tuple[Any, Any, Any, Any, Any]: + from rich import box + from rich.panel import Panel + from rich.rule import Rule + from rich.table import Table + from rich.text import Text + + return box, Panel, Rule, Table, Text + + +def _supports_rich(console: PrinterLike) -> bool: + return console.__class__.__module__.startswith("rich.") + + +def _latest_run_text(snapshot: _SessionSnapshot) -> str: + age_str = _format_age(snapshot.latest_run_age_seconds) + parts = [f"{snapshot.latest_run_id} ({age_str}"] + if snapshot.latest_run_health is not None: + parts.append(f", health={snapshot.latest_run_health}") + if snapshot.latest_run_findings is not None: + parts.append(f", findings={snapshot.latest_run_findings}") + parts.append(")") + return "".join(parts) + + +def _allowed_files_label(files: tuple[str, ...]) -> str: + if not files: + return "-" + shown = files[:_MAX_ALLOWED_FILES_SHOWN] + label = ", ".join(shown) + if len(files) > _MAX_ALLOWED_FILES_SHOWN: + label += f" ... and {len(files) - _MAX_ALLOWED_FILES_SHOWN} more" + return label + + +def _health_style(value: str) -> str: + return { + "idle": "dim", + "clean": "green", + "active": "cyan", + "contested": "yellow", + }.get(value, "cyan") + + +def _ownership_style(value: str) -> str: + if value.startswith("own"): + return "green" + if value == "foreign_stale": + return "yellow" + if value == "foreign_active": + return "cyan" + if value == "recoverable": + return "magenta" + return "dim" + + +def _intent_status_style(value: str) -> str: + return { + "active": "cyan", + "clean": "green", + "expanded": "yellow", + "violated": "red", + "expired": "dim", + }.get(value, "white") + + def _classify_workspace_health( *, agents: list[_AgentSnapshot] | tuple[_AgentSnapshot, ...], diff --git a/codeclone/surfaces/cli/types.py b/codeclone/surfaces/cli/types.py index 37842904..cacceab3 100644 --- a/codeclone/surfaces/cli/types.py +++ b/codeclone/surfaces/cli/types.py @@ -82,6 +82,11 @@ class CLIArgsLike(Protocol): patch_verify: bool strictness: str session_stats: bool + audit: bool + audit_enabled: bool + audit_path: str + audit_payloads: str + audit_retention_days: int skip_metrics: bool skip_dead_code: bool skip_dependencies: bool diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index 36d4320a..aba5ca52 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -173,6 +173,7 @@ def _controller_query_mode(args: object) -> bool: bool_attr(args, "blast_radius") or bool_attr(args, "patch_verify") or bool_attr(args, "session_stats") + or bool_attr(args, "audit") ) @@ -200,11 +201,19 @@ def _validate_controller_query_flags( ) sys.exit(ExitCode.CONTRACT_ERROR) session_stats = bool_attr(args, "session_stats") - if session_stats and (blast_radius or patch_verify): + audit = bool_attr(args, "audit") + if session_stats and (blast_radius or patch_verify or audit): printer.print( ui.fmt_contract_error( "--session-stats cannot be combined with " - "--blast-radius or --patch-verify." + "--audit, --blast-radius, or --patch-verify." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + if audit and (blast_radius or patch_verify): + printer.print( + ui.fmt_contract_error( + "--audit cannot be combined with --blast-radius or --patch-verify." ) ) sys.exit(ExitCode.CONTRACT_ERROR) @@ -213,13 +222,24 @@ def _validate_controller_query_flags( ui.fmt_contract_error("Use --blast-radius or --patch-verify, not both.") ) sys.exit(ExitCode.CONTRACT_ERROR) - if not (blast_radius or patch_verify or session_stats): + if not (blast_radius or patch_verify or session_stats or audit): return if bool_attr(args, "update_baseline") or bool_attr(args, "update_metrics_baseline"): printer.print( ui.fmt_contract_error("Controller query modes cannot update baselines.") ) sys.exit(ExitCode.CONTRACT_ERROR) + if ( + bool_attr(args, "changed_only") + or getattr(args, "diff_against", None) + or getattr(args, "paths_from_git_diff", None) + ): + printer.print( + ui.fmt_contract_error( + "Controller query modes cannot be combined with changed-scope flags." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) if report_outputs_requested: printer.print( ui.fmt_contract_error( @@ -264,6 +284,32 @@ def _run_controller_query( ) +def _run_pre_analysis_controller_query( + *, + args: CLIArgsLike, + root_path: Path, +) -> int | None: + if bool_attr(args, "session_stats"): + from .session_stats import render_session_stats + + return render_session_stats( + console=_console(), + root_path=root_path, + quiet=args.quiet, + ) + if bool_attr(args, "audit"): + from .audit import render_audit + + return render_audit( + console=_console(), + root_path=root_path, + audit_enabled=bool(getattr(args, "audit_enabled", False)), + audit_path=str(getattr(args, "audit_path", "")), + quiet=args.quiet, + ) + return None + + def print_banner(*, root: Path | None = None) -> None: _set_console(console) _print_banner_impl(root=root) @@ -404,16 +450,12 @@ def _main_impl() -> None: args=args, strictness_explicit=strictness_explicit, ) - if bool_attr(args, "session_stats"): - from .session_stats import render_session_stats - - sys.exit( - render_session_stats( - console=_console(), - root_path=root_path, - quiet=args.quiet, - ) - ) + pre_analysis_query_exit = _run_pre_analysis_controller_query( + args=args, + root_path=root_path, + ) + if pre_analysis_query_exit is not None: + sys.exit(pre_analysis_query_exit) git_diff_ref = _validate_changed_scope_args(args=args) changed_paths = ( _git_diff_changed_paths(root_path=root_path, git_diff_ref=git_diff_ref) diff --git a/codeclone/surfaces/mcp/_session_claim_guard_mixin.py b/codeclone/surfaces/mcp/_session_claim_guard_mixin.py index 64900c3a..fc234602 100644 --- a/codeclone/surfaces/mcp/_session_claim_guard_mixin.py +++ b/codeclone/surfaces/mcp/_session_claim_guard_mixin.py @@ -6,6 +6,7 @@ from __future__ import annotations +from ...audit import EVENT_CLAIM_COMPLETED, EVENT_CLAIM_VIOLATED from ...metrics.registry import METRIC_FAMILIES from . import _session_helpers as _helpers from ._claim_guard import ( @@ -36,7 +37,18 @@ def validate_review_claims( report_context=context, require_citations=bool(require_citations), ) - return {"run_id": _helpers._short_run_id(record.run_id), **payload} + result = {"run_id": _helpers._short_run_id(record.run_id), **payload} + valid = bool(result.get("valid")) + self._audit_emit( + root=record.root, + event_type=EVENT_CLAIM_COMPLETED if valid else EVENT_CLAIM_VIOLATED, + severity="info" if valid else "warn", + run_id=_helpers._short_run_id(record.run_id), + report_digest=self._report_digest_value(record), + status="valid" if valid else "violated", + payload=result, + ) + return result def _claim_guard_context(self, record: MCPRunRecord) -> ReportContext: _canonical_to_short, short_to_canonical = self._finding_id_maps(record) diff --git a/codeclone/surfaces/mcp/_session_intent_mixin.py b/codeclone/surfaces/mcp/_session_intent_mixin.py index 9168e01a..c8bbec5f 100644 --- a/codeclone/surfaces/mcp/_session_intent_mixin.py +++ b/codeclone/surfaces/mcp/_session_intent_mixin.py @@ -13,6 +13,18 @@ from fnmatch import fnmatchcase from pathlib import Path +from ...audit import ( + EVENT_BLAST_RADIUS, + EVENT_INTENT_CHECKED, + EVENT_INTENT_CLEARED, + EVENT_INTENT_DECLARED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_EXPIRED, + EVENT_INTENT_RENEWED, + EVENT_INTENT_VIOLATED, + EVENT_WORKSPACE_CONFLICT, + EVENT_WORKSPACE_GC, +) from . import _session_helpers as _helpers from ._intent import ( DEFAULT_INTENT_GUARDS, @@ -80,6 +92,20 @@ class _MCPSessionIntentMixin(_MCPSessionBlastRadiusMixin): _agent_start_epoch: int _agent_label: str + def _audit_emit( + self, + *, + root: Path, + event_type: str, + severity: str, + run_id: str | None = None, + intent_id: str | None = None, + report_digest: str | None = None, + status: str | None = None, + payload: Mapping[str, object] | None = None, + ) -> None: + raise NotImplementedError + def get_blast_radius( self, *, @@ -96,6 +122,15 @@ def get_blast_radius( include=include, ) self._renew_lease_for_run(record=record) + self._audit_emit( + root=record.root, + event_type=EVENT_BLAST_RADIUS, + severity="info", + run_id=_helpers._short_run_id(record.run_id), + report_digest=self._report_digest_value(record), + status=str(payload.get("radius_level", "")), + payload=payload, + ) return payload def manage_change_intent( @@ -261,6 +296,27 @@ def _declare_change_intent( payload["workspace_registered"] = workspace_registered payload["concurrent_intents"] = concurrent_intents payload["ttl_seconds"] = ttl + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_DECLARED, + severity="warn" if concurrent_intents else "info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=record_payload.intent_id, + report_digest=record_payload.report_digest, + status=record_payload.status.value, + payload=payload, + ) + if concurrent_intents: + self._audit_emit( + root=record.root, + event_type=EVENT_WORKSPACE_CONFLICT, + severity="warn", + run_id=_helpers._short_run_id(record.run_id), + intent_id=record_payload.intent_id, + report_digest=record_payload.report_digest, + status="conflict", + payload={"concurrent_intents": concurrent_intents}, + ) return payload def _check_change_intent( @@ -285,9 +341,20 @@ def _check_change_intent( with self._state_lock: self._active_intents[expired.intent_id] = expired self._sync_workspace_intent_status(record=record, intent=expired) - return expired.to_payload( + payload = expired.to_payload( short_run_id=_helpers._short_run_id(record.run_id) ) + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_EXPIRED, + severity="warn", + run_id=_helpers._short_run_id(record.run_id), + intent_id=expired.intent_id, + report_digest=expired.report_digest, + status=expired.status.value, + payload=payload, + ) + return payload actual = ( self._normalize_changed_paths(root_path=record.root, paths=changed_files) if changed_files @@ -304,6 +371,20 @@ def _check_change_intent( self._sync_workspace_intent_status(record=record, intent=updated) payload = check_result.to_payload() payload["intent_id"] = updated.intent_id + event_type = { + IntentStatus.EXPANDED: EVENT_INTENT_EXPANDED, + IntentStatus.VIOLATED: EVENT_INTENT_VIOLATED, + }.get(check_result.status, EVENT_INTENT_CHECKED) + self._audit_emit( + root=record.root, + event_type=event_type, + severity="warn" if check_result.status != IntentStatus.CLEAN else "info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=updated.intent_id, + report_digest=updated.report_digest, + status=check_result.status.value, + payload=payload, + ) return payload def _clear_change_intent(self, *, intent_id: str | None) -> dict[str, object]: @@ -322,8 +403,8 @@ def _clear_change_intent(self, *, intent_id: str | None) -> dict[str, object]: removed_ids = tuple(self._active_intents) removed_intents = tuple(self._active_intents.values()) self._active_intents.clear() - workspace_targets: tuple[tuple[Path, str], ...] = tuple( - (record.root, removed_intent.intent_id) + workspace_targets: tuple[tuple[Path, IntentRecord, str], ...] = tuple( + (record.root, removed_intent, self._report_digest_value(record)) for removed_intent in removed_intents for record in (self._optional_run_record(removed_intent.run_id),) if record is not None @@ -331,21 +412,33 @@ def _clear_change_intent(self, *, intent_id: str | None) -> dict[str, object]: for removed_intent in removed_intents: self._runs.unpin(removed_intent.run_id) workspace_cleared = True - for root_path, removed_intent_id in workspace_targets: + for root_path, removed_intent, _report_digest in workspace_targets: workspace_cleared = ( remove_workspace_intent( root=root_path, pid=self._agent_pid, start_epoch=self._agent_start_epoch, - intent_id=removed_intent_id, + intent_id=removed_intent.intent_id, ) and workspace_cleared ) - return { + payload = { "cleared": len(removed_ids), "cleared_intent_ids": list(removed_ids), "workspace_cleared": workspace_cleared, } + for root_path, removed_intent, report_digest in workspace_targets: + self._audit_emit( + root=root_path, + event_type=EVENT_INTENT_CLEARED, + severity="info", + run_id=_helpers._short_run_id(removed_intent.run_id), + intent_id=removed_intent.intent_id, + report_digest=report_digest, + status="cleared", + payload=payload, + ) + return payload def _resolve_intent( self, @@ -511,7 +604,7 @@ def _renew_change_intent( if latest_record is not None else resolved_lease_seconds(lease_seconds) ) - return { + payload: dict[str, object] = { "intent_id": active_intent.intent_id, "status": active_intent.status.value, "lease_renewed": renewed, @@ -527,6 +620,17 @@ def _renew_change_intent( "max_seconds": MAX_LEASE_SECONDS, }, } + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_RENEWED, + severity="info" if renewed else "warn", + run_id=_helpers._short_run_id(record.run_id), + intent_id=active_intent.intent_id, + report_digest=active_intent.report_digest, + status=active_intent.status.value, + payload=payload, + ) + return payload def _list_workspace_intents(self, *, root: str | None) -> dict[str, object]: root_path = self._resolve_workspace_root(root) @@ -554,7 +658,16 @@ def _list_workspace_intents(self, *, root: str | None) -> dict[str, object]: } def _gc_workspace_intents(self, *, root: str | None) -> dict[str, object]: - return gc_workspace(root=self._resolve_workspace_root(root)) + root_path = self._resolve_workspace_root(root) + payload = gc_workspace(root=root_path) + self._audit_emit( + root=root_path, + event_type=EVENT_WORKSPACE_GC, + severity="info", + status="completed", + payload=payload, + ) + return payload def _recover_change_intent( self, diff --git a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py index 62ae33e3..775a49a3 100644 --- a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py +++ b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py @@ -8,6 +8,13 @@ from collections.abc import Mapping, Sequence +from ...audit import ( + EVENT_BASELINE_ABUSE, + EVENT_PATCH_BUDGET, + EVENT_PATCH_EXPIRED, + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, +) from ...utils.coerce import as_int as _coerce_int from . import _session_helpers as _helpers from ._intent import IntentRecord, IntentStatus @@ -81,7 +88,7 @@ def _patch_contract_budget( budgets = self._budgets_for_record(record=record, strictness=strictness) current_state = self._current_state(record) gate_preview = self._gate_preview(record=record, budgets=budgets) - return { + payload: dict[str, object] = { "mode": "budget", "run_id": _helpers._short_run_id(record.run_id), "strictness": strictness, @@ -102,6 +109,17 @@ def _patch_contract_budget( gate_preview=gate_preview, ), } + self._audit_emit( + root=record.root, + event_type=EVENT_PATCH_BUDGET, + severity="warn" if bool(gate_preview.get("would_fail")) else "info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent.intent_id if intent is not None else None, + report_digest=self._report_digest_value(record), + status="budget", + payload=payload, + ) + return payload def _patch_contract_verify( self, @@ -182,7 +200,7 @@ def _patch_contract_verify( if blocking_violations else PatchContractStatus.ACCEPTED.value ) - return { + payload: dict[str, object] = { "mode": "verify", "status": status, "reason": None, @@ -199,6 +217,33 @@ def _patch_contract_verify( "blocking_violations": list(blocking_violations), "message": self._verify_message(status=status, violations=violations), } + event_type = ( + EVENT_PATCH_VIOLATED + if status == PatchContractStatus.VIOLATED.value + else EVENT_PATCH_VERIFIED + ) + self._audit_emit( + root=after.root, + event_type=event_type, + severity="warn" if blocking_violations else "info", + run_id=_helpers._short_run_id(after.run_id), + intent_id=intent.intent_id if intent is not None else None, + report_digest=self._report_digest_value(after), + status=status, + payload=payload, + ) + if bool(baseline_abuse.get("detected")): + self._audit_emit( + root=after.root, + event_type=EVENT_BASELINE_ABUSE, + severity="error", + run_id=_helpers._short_run_id(after.run_id), + intent_id=intent.intent_id if intent is not None else None, + report_digest=self._report_digest_value(after), + status="detected", + payload=payload, + ) + return payload def _validated_patch_contract_mode(self, mode: str) -> PatchContractMode: if mode not in VALID_PATCH_CONTRACT_MODES: @@ -560,7 +605,7 @@ def _expired_patch_contract( after: MCPRunRecord, intent: IntentRecord, ) -> dict[str, object]: - return { + payload: dict[str, object] = { "mode": "verify", "status": PatchContractStatus.EXPIRED.value, "reason": "report_digest_mismatch", @@ -572,6 +617,17 @@ def _expired_patch_contract( "Patch contract expired: intent was declared for another report digest." ), } + self._audit_emit( + root=after.root, + event_type=EVENT_PATCH_EXPIRED, + severity="warn", + run_id=_helpers._short_run_id(after.run_id), + intent_id=intent.intent_id, + report_digest=self._report_digest_value(after), + status=PatchContractStatus.EXPIRED.value, + payload=payload, + ) + return payload def _budget_message( self, diff --git a/codeclone/surfaces/mcp/_session_review_receipt_mixin.py b/codeclone/surfaces/mcp/_session_review_receipt_mixin.py index aba10d16..bf06c371 100644 --- a/codeclone/surfaces/mcp/_session_review_receipt_mixin.py +++ b/codeclone/surfaces/mcp/_session_review_receipt_mixin.py @@ -9,6 +9,7 @@ from collections import OrderedDict from collections.abc import Mapping +from ...audit import EVENT_RECEIPT_CREATED from ...contracts import REPORT_SCHEMA_VERSION from ...utils.coerce import as_int as _coerce_int from . import _session_helpers as _helpers @@ -99,13 +100,34 @@ def create_review_receipt( ), } if output_format == "json": + self._audit_emit( + root=record.root, + event_type=EVENT_RECEIPT_CREATED, + severity="info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent.intent_id if intent is not None else None, + report_digest=self._receipt_digest(record), + status=str(receipt.get("verdict", "")), + payload={"receipt": receipt, "format": output_format}, + ) return receipt - return { + payload: dict[str, object] = { "run_id": _helpers._short_run_id(record.run_id), "format": output_format, "content": render_receipt_markdown(receipt), "receipt": receipt, } + self._audit_emit( + root=record.root, + event_type=EVENT_RECEIPT_CREATED, + severity="info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent.intent_id if intent is not None else None, + report_digest=self._receipt_digest(record), + status=str(receipt.get("verdict", "")), + payload=payload, + ) + return payload def _validated_receipt_format(self, value: str) -> str: if value not in VALID_RECEIPT_FORMATS: diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index c1c858f4..3c0959dd 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -8,8 +8,24 @@ import os import time +from collections.abc import Mapping +from pathlib import Path +from ...audit import ( + DEFAULT_AUDIT_PATH, + DEFAULT_AUDIT_PAYLOADS, + DEFAULT_AUDIT_RETENTION_DAYS, + AuditEvent, + AuditWriter, + NullAuditWriter, + SqliteAuditWriter, + repo_root_digest, + resolve_audit_path, + validate_payload_mode, + validate_retention_days, +) from ...cache.store import resolve_cache_status +from ...config.pyproject_loader import ConfigValidationError, load_pyproject_config from ...report.meta import build_report_meta as _build_report_meta from ...report.meta import current_report_timestamp_utc as _current_report_timestamp_utc from . import _session_helpers as _helpers @@ -81,7 +97,12 @@ class MCPSession(_MCPSessionClaimGuardMixin): - def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: + def __init__( + self, + *, + history_limit: int = DEFAULT_MCP_HISTORY_LIMIT, + audit_writer: AuditWriter | None = None, + ) -> None: self._runs = CodeCloneMCPRunStore(history_limit=history_limit) self._state_lock = RLock() self._review_state: dict[str, OrderedDict[str, str | None]] = {} @@ -97,6 +118,8 @@ def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._agent_start_epoch = int(time.time()) self._agent_label_cache: str | None = None self._fastmcp: object | None = None + self._audit_writer_override = audit_writer + self._audit_writers: dict[Path, AuditWriter] = {} # ------------------------------------------------------------------ # Agent label: lazy-resolved from MCP clientInfo on first access @@ -139,6 +162,80 @@ def _resolve_agent_label(self) -> str: pass return f"pid-{self._agent_pid}" + # ------------------------------------------------------------------ + # Audit trail: best-effort observer, never controller truth + # ------------------------------------------------------------------ + + def _audit_emit( + self, + *, + root: Path, + event_type: str, + severity: str, + run_id: str | None = None, + intent_id: str | None = None, + report_digest: str | None = None, + status: str | None = None, + payload: Mapping[str, object] | None = None, + ) -> None: + try: + writer = self._audit_writer_for_root(root) + writer.emit( + AuditEvent( + event_type=event_type, + severity="error" + if severity == "error" + else ("warn" if severity == "warn" else "info"), + repo_root_digest=repo_root_digest(root), + agent_pid=self._agent_pid, + agent_label=self._agent_label, + run_id=run_id, + intent_id=intent_id, + report_digest=report_digest, + status=status, + payload=payload, + ) + ) + except Exception: + return None + + def _audit_writer_for_root(self, root: Path) -> AuditWriter: + if self._audit_writer_override is not None: + return self._audit_writer_override + root_path = root.resolve() + cached = self._audit_writers.get(root_path) + if cached is not None: + return cached + writer = self._build_audit_writer(root_path) + self._audit_writers[root_path] = writer + return writer + + def _build_audit_writer(self, root: Path) -> AuditWriter: + try: + config = load_pyproject_config(root) + except (ConfigValidationError, OSError): + return NullAuditWriter() + if not bool(config.get("audit_enabled", False)): + return NullAuditWriter() + try: + db_path = resolve_audit_path( + root_path=root, + value=config.get("audit_path", DEFAULT_AUDIT_PATH), + ) + payloads = validate_payload_mode( + config.get("audit_payloads", DEFAULT_AUDIT_PAYLOADS) + ) + retention_days = validate_retention_days( + config.get("audit_retention_days", DEFAULT_AUDIT_RETENTION_DAYS) + ) + return SqliteAuditWriter( + db_path=db_path, + payloads=payloads, + retention_days=retention_days, + ) + except Exception: + return NullAuditWriter() + def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: self._validate_analysis_request(request) root_path = _helpers._resolve_root(request.root) diff --git a/codeclone/ui_messages/__init__.py b/codeclone/ui_messages/__init__.py index 8b23ac44..6cc1cc5e 100644 --- a/codeclone/ui_messages/__init__.py +++ b/codeclone/ui_messages/__init__.py @@ -84,6 +84,10 @@ "Show workspace session status: active agents, intents, lease health.\n" "Read-only, does not run analysis." ) +HELP_AUDIT = ( + "Show local Controller audit trail from the configured audit database.\n" + "Read-only, does not run analysis." +) HELP_CACHE_PATH = ( "Path to the cache file.\n" "If FILE is omitted, uses /.cache/codeclone/cache.json." diff --git a/docs/book/04-config-and-defaults.md b/docs/book/04-config-and-defaults.md index cfa3dcfd..0710bd03 100644 --- a/docs/book/04-config-and-defaults.md +++ b/docs/book/04-config-and-defaults.md @@ -134,6 +134,15 @@ Report outputs and local UX: | `verbose` | `bool` | `false` | Enable more verbose CLI output | `-` | | `debug` | `bool` | `false` | Enable debug diagnostics | Also enabled by `CODECLONE_DEBUG=1` | +Controller audit trail: + +| Key | Type | Default | Meaning | Requires / Implies | +|------------------------|--------|--------------------------------------|-----------------------------------------------------------|-------------------------------------| +| `audit_enabled` | `bool` | `false` | Enable the optional local controller audit trail | Required for `--audit` output | +| `audit_path` | `str` | `.cache/codeclone/audit.sqlite3` | SQLite audit database path, relative to the analysis root | Used only when `audit_enabled=true` | +| `audit_payloads` | `str` | `compact` | Audit payload mode: `off`, `compact`, or `full` | Used only when `audit_enabled=true` | +| `audit_retention_days` | `int` | `30` | Retention window for audit rows | Used only when `audit_enabled=true` | + This is the exact accepted `[tool.codeclone]` key set from `codeclone/config/spec.py` and `codeclone/config/pyproject_loader.py`; unknown keys are contract errors. diff --git a/pyproject.toml b/pyproject.toml index 32acdb28..072604aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -140,6 +140,10 @@ fail_on_new_metrics = true api_surface = true golden_fixture_paths = ["tests/fixtures/golden_*"] min_typing_coverage = 99 +audit_enabled = true +audit_path = ".cache/codeclone/audit.sqlite3" +audit_payloads = "compact" # "off" | "compact" | "full" +audit_retention_days = 30 [tool.coverage.report] diff --git a/tests/fixtures/contract_snapshots/cli_help.txt b/tests/fixtures/contract_snapshots/cli_help.txt index a1106cca..1019f3f8 100644 --- a/tests/fixtures/contract_snapshots/cli_help.txt +++ b/tests/fixtures/contract_snapshots/cli_help.txt @@ -2,9 +2,10 @@ usage: codeclone [--min-loc MIN_LOC] [--min-stmt MIN_STMT] [--processes PROCESSES] [--changed-only | --no-changed-only] [--diff-against GIT_REF] [--paths-from-git-diff GIT_REF] [--blast-radius FILE [FILE ...]] [--patch-verify] - [--strictness LEVEL] [--session-stats] [--cache-path [FILE]] - [--cache-dir [FILE]] [--max-cache-size-mb MB] - [--baseline [FILE]] [--max-baseline-size-mb MB] + [--strictness LEVEL] [--session-stats] [--audit] + [--cache-path [FILE]] [--cache-dir [FILE]] + [--max-cache-size-mb MB] [--baseline [FILE]] + [--max-baseline-size-mb MB] [--update-baseline | --no-update-baseline] [--metrics-baseline [FILE]] [--update-metrics-baseline | --no-update-metrics-baseline] @@ -67,6 +68,8 @@ Analysis: Default: ci. --session-stats Show workspace session status: active agents, intents, lease health. Read-only, does not run analysis. + --audit Show local Controller audit trail from the configured audit database. + Read-only, does not run analysis. --cache-path [FILE] Path to the cache file. If FILE is omitted, uses /.cache/codeclone/cache.json. --cache-dir [FILE] Legacy alias for --cache-path. diff --git a/tests/test_audit_schema.py b/tests/test_audit_schema.py new file mode 100644 index 00000000..ffbe5e51 --- /dev/null +++ b/tests/test_audit_schema.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +import pytest + +from codeclone.audit.schema import ensure_schema, open_audit_db +from codeclone.audit.validation import ( + AUDIT_SCHEMA_VERSION, + AuditConfigError, + AuditSchemaError, + resolve_audit_path, + validate_payload_mode, + validate_retention_days, +) + + +def test_open_audit_db_creates_schema_and_meta(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + + conn = open_audit_db(db_path) + try: + tables = { + row[0] + for row in conn.execute( + "SELECT name FROM sqlite_master WHERE type = 'table'" + ) + } + schema = conn.execute( + "SELECT value FROM audit_meta WHERE key = 'schema_version'" + ).fetchone() + finally: + conn.close() + + assert {"controller_events", "audit_meta"}.issubset(tables) + assert schema == (AUDIT_SCHEMA_VERSION,) + + +def test_ensure_schema_rejects_unknown_version(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + conn = sqlite3.connect(db_path) + try: + conn.execute("CREATE TABLE audit_meta(key TEXT PRIMARY KEY, value TEXT)") + conn.execute( + "INSERT INTO audit_meta(key, value) VALUES ('schema_version', '999')" + ) + conn.commit() + + with pytest.raises(AuditSchemaError, match="Unsupported audit schema"): + ensure_schema(conn) + finally: + conn.close() + + +def test_resolve_audit_path_accepts_repo_relative_sqlite_path(tmp_path: Path) -> None: + resolved = resolve_audit_path( + root_path=tmp_path, + value=".cache/codeclone/audit.db", + ) + assert resolved == tmp_path / ".cache" / "codeclone" / "audit.db" + + +@pytest.mark.parametrize( + "value", + ["/tmp/audit.sqlite3", "../audit.sqlite3", "audit.txt"], +) +def test_resolve_audit_path_rejects_unsafe_values( + tmp_path: Path, + value: str, +) -> None: + with pytest.raises(AuditConfigError): + resolve_audit_path(root_path=tmp_path, value=value) + + +def test_payload_mode_and_retention_validation() -> None: + assert validate_payload_mode("off") == "off" + assert validate_payload_mode("compact") == "compact" + assert validate_payload_mode("full") == "full" + assert validate_retention_days(30) == 30 + + with pytest.raises(AuditConfigError): + validate_payload_mode("verbose") + with pytest.raises(AuditConfigError): + validate_retention_days(0) diff --git a/tests/test_audit_writer.py b/tests/test_audit_writer.py new file mode 100644 index 00000000..09b97b7f --- /dev/null +++ b/tests/test_audit_writer.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import json +import sqlite3 +from pathlib import Path + +import pytest + +from codeclone.audit.events import EVENT_INTENT_DECLARED, AuditEvent, repo_root_digest +from codeclone.audit.reader import read_audit_summary +from codeclone.audit.validation import ( + AuditValidationError, + EventRow, + validate_event_row, +) +from codeclone.audit.writer import NullAuditWriter, SqliteAuditWriter + + +def _event(root: Path, *, event_type: str = EVENT_INTENT_DECLARED) -> AuditEvent: + return AuditEvent( + event_type=event_type, + severity="info", + repo_root_digest=repo_root_digest(root), + agent_pid=123, + agent_label="test-agent", + run_id="run12345", + intent_id="intent-run12345-001", + report_digest="a" * 64, + status="active", + payload={ + "scope": {"allowed_files": ["pkg/a.py", "tests/test_a.py"]}, + "concurrent_intents": [], + "workspace_registered": True, + "ttl_seconds": 3600, + }, + ) + + +def _payloads(db_path: Path) -> list[dict[str, object]]: + conn = sqlite3.connect(db_path) + try: + rows = conn.execute("SELECT payload_json FROM controller_events").fetchall() + finally: + conn.close() + return [json.loads(row[0]) for row in rows] + + +def test_sqlite_writer_creates_db_and_emits_compact_event(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + writer = SqliteAuditWriter(db_path=db_path, payloads="compact", retention_days=30) + try: + writer.emit(_event(tmp_path)) + finally: + writer.close() + + summary = read_audit_summary(db_path=db_path) + assert summary.total_events == 1 + assert summary.intent_events == 1 + assert summary.events[0].event_type == EVENT_INTENT_DECLARED + assert _payloads(db_path)[0]["scope_file_count"] == 2 + + +def test_sqlite_writer_payload_modes(tmp_path: Path) -> None: + off_path = tmp_path / "off.sqlite3" + full_path = tmp_path / "full.sqlite3" + off_writer = SqliteAuditWriter(db_path=off_path, payloads="off", retention_days=30) + full_writer = SqliteAuditWriter( + db_path=full_path, + payloads="full", + retention_days=30, + ) + try: + off_writer.emit(_event(tmp_path)) + full_writer.emit(_event(tmp_path)) + finally: + off_writer.close() + full_writer.close() + + assert _payloads(off_path) == [{}] + assert "scope" in _payloads(full_path)[0] + + +def test_sqlite_writer_emit_never_raises_for_invalid_event(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + writer = SqliteAuditWriter(db_path=db_path, payloads="compact", retention_days=30) + try: + writer.emit(_event(tmp_path, event_type="unknown.event")) + finally: + writer.close() + + assert read_audit_summary(db_path=db_path).total_events == 0 + + +def test_null_writer_is_noop(tmp_path: Path) -> None: + writer = NullAuditWriter() + writer.emit(_event(tmp_path)) + writer.close() + + +def test_event_validation_rejects_unknown_type() -> None: + row = EventRow( + event_id="evt_1", + event_type="unknown.event", + severity="info", + created_at_utc="2026-05-25T00:00:00Z", + repo_root_digest="a" * 16, + run_id=None, + intent_id=None, + report_digest=None, + agent_label="agent", + agent_pid=1, + status=None, + payload_json="{}", + ) + + with pytest.raises(AuditValidationError, match="unknown event_type"): + validate_event_row(row) diff --git a/tests/test_cli_audit.py b/tests/test_cli_audit.py new file mode 100644 index 00000000..8e4852dd --- /dev/null +++ b/tests/test_cli_audit.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import io +from pathlib import Path +from typing import cast + +import pytest +from rich.console import Console + +from codeclone.audit.events import EVENT_PATCH_VERIFIED, AuditEvent, repo_root_digest +from codeclone.audit.writer import SqliteAuditWriter +from codeclone.contracts import ExitCode +from codeclone.surfaces.cli.audit import render_audit +from codeclone.surfaces.cli.types import PrinterLike + + +class _RecordingPrinter: + def __init__(self) -> None: + self.lines: list[str] = [] + + def print(self, *objects: object, **kwargs: object) -> None: + self.lines.append(" ".join(str(item) for item in objects)) + + @property + def text(self) -> str: + return "\n".join(self.lines) + + +def _write_audit_event(root: Path) -> None: + writer = SqliteAuditWriter( + db_path=root / ".cache" / "codeclone" / "audit.sqlite3", + payloads="compact", + retention_days=30, + ) + try: + writer.emit( + AuditEvent( + event_type=EVENT_PATCH_VERIFIED, + severity="info", + repo_root_digest=repo_root_digest(root), + agent_pid=123, + agent_label="test-agent", + run_id="abcdef123456", + intent_id="intent-abcdef12-001", + report_digest="a" * 64, + status="accepted", + payload={ + "status": "accepted", + "structural_delta": { + "regressions": [], + "improvements": [], + "health_delta": 0, + }, + "contract_violations": [], + "baseline_abuse": {"detected": False}, + }, + ) + ) + finally: + writer.close() + + +@pytest.mark.parametrize( + ("audit_enabled", "expected_message"), + [ + (False, "audit is not enabled"), + (True, "no audit data"), + ], +) +def test_audit_contract_errors( + tmp_path: Path, + *, + audit_enabled: bool, + expected_message: str, +) -> None: + printer = _RecordingPrinter() + + exit_code = render_audit( + console=printer, + root_path=tmp_path, + audit_enabled=audit_enabled, + audit_path=".cache/codeclone/audit.sqlite3", + quiet=True, + ) + + assert exit_code == int(ExitCode.CONTRACT_ERROR) + assert expected_message in printer.text + + +def test_audit_quiet_with_events(tmp_path: Path) -> None: + _write_audit_event(tmp_path) + printer = _RecordingPrinter() + + exit_code = render_audit( + console=printer, + root_path=tmp_path, + audit_enabled=True, + audit_path=".cache/codeclone/audit.sqlite3", + quiet=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "audit: 1 events" in printer.text + assert "contracts=1" in printer.text + + +def test_audit_verbose_renders_plain_table(tmp_path: Path) -> None: + _write_audit_event(tmp_path) + printer = _RecordingPrinter() + + exit_code = render_audit( + console=printer, + root_path=tmp_path, + audit_enabled=True, + audit_path=".cache/codeclone/audit.sqlite3", + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + assert "Controller Audit Trail" in printer.text + assert "intent-abcdef12-001" in printer.text + assert "accepted" in printer.text + + +def test_audit_verbose_uses_rich_table(tmp_path: Path) -> None: + _write_audit_event(tmp_path) + output = io.StringIO() + console = Console(file=output, force_terminal=True, color_system=None, width=160) + + exit_code = render_audit( + console=cast(PrinterLike, console), + root_path=tmp_path, + audit_enabled=True, + audit_path=".cache/codeclone/audit.sqlite3", + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = output.getvalue() + assert "Controller Audit Trail" in text + assert "Workspace" not in text + assert "verify" in text + assert "accepted" in text diff --git a/tests/test_cli_session_stats.py b/tests/test_cli_session_stats.py index e32eb003..1b21ba9d 100644 --- a/tests/test_cli_session_stats.py +++ b/tests/test_cli_session_stats.py @@ -1,13 +1,16 @@ from __future__ import annotations +import io import json import os import time from datetime import datetime, timezone from pathlib import Path +from typing import cast from unittest.mock import patch import pytest +from rich.console import Console import codeclone.surfaces.cli.session_stats as session_stats_mod from codeclone.contracts import ExitCode @@ -22,6 +25,7 @@ _read_cached_report, render_session_stats, ) +from codeclone.surfaces.cli.types import PrinterLike from codeclone.surfaces.mcp._workspace_intents import ( MIN_LEASE_SECONDS, WorkspaceIntentRecord, @@ -264,6 +268,31 @@ def test_session_stats_verbose_with_report(tmp_path: Path) -> None: assert "100 files" in text +def test_session_stats_verbose_uses_rich_table(tmp_path: Path) -> None: + intents_dir = tmp_path / ".cache" / "codeclone" / "intents" + intents_dir.mkdir(parents=True) + _write_intent_file( + intents_dir, + pid=os.getpid(), + start_epoch=int(time.time()), + allowed_files=["src/a.py"], + ) + output = io.StringIO() + console = Console(file=output, force_terminal=True, color_system=None, width=100) + + exit_code = render_session_stats( + console=cast(PrinterLike, console), + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = output.getvalue() + assert "Session Stats" in text + assert "Workspace intents" in text + assert "src/a.py" in text + + def test_session_stats_verbose_with_report_without_file_count( tmp_path: Path, ) -> None: diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 2050bad6..b77f9e5c 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -36,6 +36,7 @@ import codeclone.surfaces.mcp.server as mcp_server_mod import codeclone.surfaces.mcp.service as mcp_service_mod import codeclone.surfaces.mcp.session as mcp_session_mod +from codeclone.audit.events import AuditEvent from codeclone.baseline import Baseline, current_python_tag from codeclone.baseline.metrics_baseline import MetricsBaseline, MetricsBaselineStatus from codeclone.cache.store import Cache @@ -477,6 +478,17 @@ def _declare_pkg_a_intent(service: CodeCloneMCPService) -> dict[str, object]: ) +class _RecordingAuditWriter: + def __init__(self) -> None: + self.events: list[AuditEvent] = [] + + def emit(self, event: AuditEvent) -> None: + self.events.append(event) + + def close(self) -> None: + return None + + def _seed_patch_contract_intent( service: CodeCloneMCPService, root: Path, @@ -750,6 +762,49 @@ def test_mcp_service_analyze_repository_registers_latest_run(tmp_path: Path) -> ) +def test_mcp_session_emits_audit_events_for_controller_flow(tmp_path: Path) -> None: + audit = _RecordingAuditWriter() + service = mcp_session_mod.MCPSession(history_limit=4, audit_writer=audit) + record = _blast_radius_run_record(tmp_path, run_id="audit1234567890") + service._runs.register(record) + + declared = service.manage_change_intent( + action="declare", + run_id=record.run_id, + scope={"allowed_files": ["pkg/a.py"]}, + intent="audit controller flow", + expected_effects=["no new clone group"], + ) + service.get_blast_radius(files=["pkg/a.py"], run_id=record.run_id) + service.check_patch_contract( + mode="budget", + run_id=record.run_id, + intent_id=str(declared["intent_id"]), + ) + service.manage_change_intent( + action="check", + intent_id=str(declared["intent_id"]), + changed_files=["pkg/a.py"], + ) + service.manage_change_intent( + action="clear", + intent_id=str(declared["intent_id"]), + ) + + event_types = [event.event_type for event in audit.events] + assert event_types == [ + "intent.declared", + "blast_radius.computed", + "patch_budget.computed", + "intent.checked", + "intent.cleared", + ] + assert {event.intent_id for event in audit.events if event.intent_id} == { + declared["intent_id"] + } + assert all(str(tmp_path) not in event.repo_root_digest for event in audit.events) + + def test_mcp_service_summary_explains_untrusted_baseline_python_tag_mismatch( tmp_path: Path, ) -> None: From 773a61a15992a4a2a51ef0f964ffc1e4d5d49a86 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 11:08:14 +0500 Subject: [PATCH 029/318] docs(controller): refresh audit and integration guidance --- docs/README.md | 1 + docs/architecture.md | 2 +- docs/book/08-report.md | 2 +- docs/book/09-cli.md | 4 +++ docs/book/11-security-model.md | 4 +-- docs/book/20-mcp-interface.md | 5 ++- docs/book/24-structural-change-controller.md | 36 +++++++++++++++----- docs/book/appendix/a-status-enums.md | 13 +++++++ docs/book/appendix/b-schema-layouts.md | 2 +- docs/mcp.md | 8 ++++- 10 files changed, 60 insertions(+), 17 deletions(-) diff --git a/docs/README.md b/docs/README.md index 6f69e14f..14f4281e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -83,6 +83,7 @@ Contract-first documentation derived from code and locked tests. - [VS Code extension contract](book/21-vscode-extension.md) - [Claude Desktop bundle contract](book/22-claude-desktop-bundle.md) - [Codex plugin contract](book/23-codex-plugin.md) +- [Claim Guard](book/28-claim-guard.md) - [HTML report rendering contract](book/10-html-render.md) ### System Properties diff --git a/docs/architecture.md b/docs/architecture.md index 5375db10..4a772611 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -221,7 +221,7 @@ Security boundaries: - Read-only by design — no tool mutates source files, baselines, or repo state. - `--allow-remote` guard required for non-local transports; default is `stdio`. -- `cache_policy=refresh` rejected to preserve read-only semantics. +- Cache policies `reuse`, `refresh`, and `off` are accepted by MCP. - Review markers are session-local in-memory state, never persisted. - Run history bounded by `--history-limit` to prevent unbounded memory growth. - `git_diff_ref` validated as a safe single revision expression before any diff --git a/docs/book/08-report.md b/docs/book/08-report.md index 4fc8e20e..9916e6c8 100644 --- a/docs/book/08-report.md +++ b/docs/book/08-report.md @@ -2,7 +2,7 @@ ## Purpose -Define the canonical report contract for the current 2.0 release line: report +Define the canonical report contract for the current `2.1` release line: report schema `2.11` plus deterministic text/Markdown/SARIF/HTML projections. ## Public surface diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md index 085e3803..21efd30e 100644 --- a/docs/book/09-cli.md +++ b/docs/book/09-cli.md @@ -28,6 +28,7 @@ CLI modes: - gating mode (`--ci`, `--fail-on-new`, explicit metric gates) - baseline update mode (`--update-baseline`, `--update-metrics-baseline`) - controller query mode (`--blast-radius`, `--patch-verify`) +- session query mode (`--session-stats`) Summary metrics include: @@ -76,6 +77,9 @@ Refs: - `--strictness {ci,strict,relaxed}` is valid only with `--patch-verify`. - controller query mode does not write reports, baselines, or analysis cache data. +- Session query mode is terminal-only: + - `--session-stats` shows workspace session status: active agents, intents, + and lease health. Read-only, does not run analysis. - Contract errors use `CONTRACT ERROR:`. - Gating failures use `GATING FAILURE:`. - Internal errors use `fmt_internal_error` and include traceback only in debug mode. diff --git a/docs/book/11-security-model.md b/docs/book/11-security-model.md index 5a34a19d..3ad7a68a 100644 --- a/docs/book/11-security-model.md +++ b/docs/book/11-security-model.md @@ -32,7 +32,7 @@ Security-relevant input classes: - MCP is read-only by design: no tool mutates source files, baselines, cache, or report artifacts. - `--allow-remote` is required for non-local transports. -- `cache_policy=refresh` is rejected by MCP. +- Cache policies `reuse`, `refresh`, and `off` are accepted by MCP. - Review markers are session-local in-memory state only. - `git_diff_ref` is validated as a safe single revision expression before any `git diff` subprocess call. @@ -65,7 +65,7 @@ Refs: | Oversized cache | Cache ignored | | HTML-injected payload in metadata/source | Escaped output | | `--allow-remote` not passed for HTTP | Transport rejected | -| `cache_policy=refresh` requested in MCP | Policy rejected | +| Invalid `cache_policy` requested in MCP | Policy rejected | | `git_diff_ref` fails validation | Parameter rejected | ## Determinism / canonicalization diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index 69b120bb..1b016cb4 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -54,8 +54,7 @@ Current server characteristics: - **Roots** — analysis tools require an absolute repository root. Relative roots such as `.` are rejected. - **Analysis modes** — `full`, `clones_only`. -- **Cache policies** — `reuse`, `off`. `refresh` is rejected by the read-only - MCP service contract. +- **Cache policies** — `reuse` (default), `refresh`, `off`. !!! warning "Absolute roots and remote exposure" Analysis tools require an absolute repository root. HTTP exposure beyond @@ -122,7 +121,7 @@ drill into one finding or one hotspot family. | Tool | Key parameters | Purpose | |--------------------------|-------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------| -| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `ttl_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, list_workspace, gc_workspace, reset_workspace | +| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, list_workspace, gc_workspace, recover, reset_workspace | | `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: dependents, clone cohorts, do-not-touch, review context | | `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Budget query or post-edit verification | | `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Deterministic audit artifact: provenance, scope, reviewed findings, patch status | diff --git a/docs/book/24-structural-change-controller.md b/docs/book/24-structural-change-controller.md index 5d577c1c..c624942f 100644 --- a/docs/book/24-structural-change-controller.md +++ b/docs/book/24-structural-change-controller.md @@ -40,6 +40,7 @@ The CLI exposes read-only terminal projections for humans: codeclone . --blast-radius codeclone/core/parser.py codeclone . --patch-verify --diff-against HEAD~1 codeclone . --patch-verify --strictness relaxed +codeclone . --session-stats ``` `--blast-radius` runs normal analysis, builds the canonical report in memory, @@ -50,6 +51,9 @@ and the current working tree as after-state. It checks new clone regressions and the selected gate profile. `ci` is the default; `strict` applies tighter controller budgets; `relaxed` reports violations but exits `0`. +`--session-stats` shows workspace session status: active agents, intents, and +lease health. Read-only, does not run analysis. + CLI controller queries are terminal-only and read-only with respect to source files, baselines, reports, and analysis cache data. They are incompatible with report output flags and baseline update flags. @@ -116,12 +120,13 @@ coordination: - `list_workspace`: list active workspace intent records from all agents for a repository root. +- `renew`: refresh the active lease before long edits or test runs. - `gc_workspace`: remove expired, orphaned, or corrupted registry records. -- `recover`: explicitly reclaim a stale leased intent when the caller has the +- `recover`: explicitly reclaim a recoverable intent when the caller has the matching run and report digest in the current MCP session. - `reset_workspace`: reset an own intent or remove expired/recoverable - registry records. Foreign active intents are rejected and require - coordination. + registry records. Foreign active and foreign stale intents are rejected + and require coordination. Registry files live under `.cache/codeclone/intents/` and are protected with a SHA-256 integrity digest over canonical JSON. This detects accidental @@ -130,11 +135,26 @@ advisory: hard overlap means two agents claimed the same primary file; soft overlap means primary files overlap related context. Each registry record has a TTL and a shorter renewable lease. TTL is the hard -maximum lifetime of the record. The lease is the ownership freshness signal: -active MCP interactions renew it, while detached processes stop renewing and -become recoverable after the lease window. A foreign active record has a live -lease and should be coordinated with the user; CodeClone does not ask agents to -kill the owning process. +maximum lifetime of the record (default 3600s). The lease is the ownership +freshness signal (default 300s, max 600s): active MCP interactions auto-renew +it, while detached processes stop renewing and transition through ownership +states. + +??? info "Ownership classification" + + | State | PID alive | Lease valid | Meaning | + |------------------|-----------|-------------|------------------------------------------------------| + | `own_active` | own | yes | This session's active intent | + | `own_stale` | own | no | This session's intent with expired lease | + | `foreign_active` | foreign | yes | Another live process, active lease — coordinate | + | `foreign_stale` | foreign | no | Another live process, expired lease — coordinate | + | `recoverable` | dead | — | Owning process is dead; safe to reclaim | + | `expired` | — | — | TTL exceeded; eligible for garbage collection | + + A foreign active or foreign stale record should be coordinated with the + user; CodeClone does not ask agents to kill the owning process. Only + `recoverable` intents (dead PID) can be reclaimed without user + coordination. ## Review Receipt Payload diff --git a/docs/book/appendix/a-status-enums.md b/docs/book/appendix/a-status-enums.md index 6ce4d6fe..7f6800ce 100644 --- a/docs/book/appendix/a-status-enums.md +++ b/docs/book/appendix/a-status-enums.md @@ -9,6 +9,7 @@ Centralize machine-readable status sets used across baseline/cache/report/CLI co - Baseline statuses: `codeclone/baseline/trust.py:BaselineStatus` - Cache statuses: `codeclone/cache/versioning.py:CacheStatus` - Exit categories: `codeclone/contracts/__init__.py:ExitCode` +- Intent ownership: `codeclone/surfaces/mcp/_workspace_intents.py:IntentOwnership` ## Data model @@ -52,6 +53,18 @@ Defined by `BASELINE_UNTRUSTED_STATUSES`. - `3` gating failure - `5` internal error +### IntentOwnership + +- `own_active` +- `own_stale` +- `foreign_active` +- `foreign_stale` +- `recoverable` +- `expired` + +Semantics are defined in +[Structural Change Controller § Workspace Intent Registry](../24-structural-change-controller.md#workspace-intent-registry). + ## Contracts - Status values are serialized into report metadata. diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index 8a0ef55f..17f0868e 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -3,7 +3,7 @@ ## Purpose Compact structural layouts for baseline/cache/report contracts in the current -2.0 release line. +`2.1` release line. ## Baseline schema (`2.1`) diff --git a/docs/mcp.md b/docs/mcp.md index 49ae9d1e..15444347 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -293,6 +293,11 @@ sequenceDiagram M-->>A: regression budget, headroom Note over A: Edit files within scope + opt Long edit or test run + A->>M: renew(intent_id, lease_seconds) + M->>D: update lease timestamp + M-->>A: lease_renewed + end A->>M: analyze_repository(root) M-->>A: after_run_id registered @@ -316,7 +321,7 @@ sequenceDiagram | Tool | Purpose | |--------------------------|---------------------------------------------------------------------------------------------| -| `manage_change_intent` | Intent lifecycle: declare, get, check, clear, list_workspace, gc_workspace, reset_workspace | +| `manage_change_intent` | Intent lifecycle: declare, get, check, clear, renew, list_workspace, gc_workspace, recover, reset_workspace | | `get_blast_radius` | Pre-change risk boundary: dependents, clone cohorts, do-not-touch, review context | | `check_patch_contract` | Budget query (`mode=budget`) or post-edit verification (`mode=verify`) | | `create_review_receipt` | Deterministic audit artifact: provenance, scope, reviewed findings, patch status | @@ -403,6 +408,7 @@ manage_change_intent(action="list_workspace") -> get_blast_radius(files=[...]) -> check_patch_contract(mode="budget") -> [edit within scope] + -> manage_change_intent(action="renew", intent_id=...) # optional: long edits -> analyze_repository # after-run -> manage_change_intent(action="check", intent_id=..., changed_files=[...]) -> check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) From 3236b0926d95402559493a354107af205bdd0a2f Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 11:42:28 +0500 Subject: [PATCH 030/318] fix(mcp): make patch verify scope-aware --- codeclone/surfaces/mcp/_patch_contract.py | 1 + .../surfaces/mcp/_session_intent_mixin.py | 8 + .../mcp/_session_patch_contract_mixin.py | 179 +++++++++++++-- codeclone/surfaces/mcp/_workspace_intents.py | 203 +++++++++++++++--- docs/book/20-mcp-interface.md | 8 +- docs/book/24-structural-change-controller.md | 183 +++++++++++++++- docs/book/appendix/a-status-enums.md | 12 ++ tests/test_mcp_service.py | 187 +++++++++++++++- tests/test_workspace_intents.py | 139 +++++++++++- 9 files changed, 863 insertions(+), 57 deletions(-) diff --git a/codeclone/surfaces/mcp/_patch_contract.py b/codeclone/surfaces/mcp/_patch_contract.py index cccf60c1..fb50a598 100644 --- a/codeclone/surfaces/mcp/_patch_contract.py +++ b/codeclone/surfaces/mcp/_patch_contract.py @@ -24,6 +24,7 @@ class PatchContractStatus(str, Enum): ACCEPTED = "accepted" + ACCEPTED_EXTERNAL = "accepted_with_external_changes" VIOLATED = "violated" UNVERIFIED = "unverified" EXPIRED = "expired" diff --git a/codeclone/surfaces/mcp/_session_intent_mixin.py b/codeclone/surfaces/mcp/_session_intent_mixin.py index c8bbec5f..c7670e0f 100644 --- a/codeclone/surfaces/mcp/_session_intent_mixin.py +++ b/codeclone/surfaces/mcp/_session_intent_mixin.py @@ -53,6 +53,7 @@ classify_intent_ownership, compute_scope_digest, detect_conflicts, + detect_workspace_relations, expires_at, find_workspace_intent, format_utc, @@ -286,6 +287,12 @@ def _declare_change_intent( own_pid=self._agent_pid, own_start_epoch=self._agent_start_epoch, ) + workspace_relations = detect_workspace_relations( + new_scope=normalized_scope.to_payload(), + existing=workspace_existing, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + ) payload = record_payload.to_payload( short_run_id=_helpers._short_run_id(record.run_id) ) @@ -295,6 +302,7 @@ def _declare_change_intent( payload["review_context_summary"] = blast_payload["review_context_summary"] payload["workspace_registered"] = workspace_registered payload["concurrent_intents"] = concurrent_intents + payload["workspace_relations"] = workspace_relations payload["ttl_seconds"] = ttl self._audit_emit( root=record.root, diff --git a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py index 775a49a3..afc12477 100644 --- a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py +++ b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py @@ -7,6 +7,7 @@ from __future__ import annotations from collections.abc import Mapping, Sequence +from fnmatch import fnmatchcase from ...audit import ( EVENT_BASELINE_ABUSE, @@ -17,7 +18,7 @@ ) from ...utils.coerce import as_int as _coerce_int from . import _session_helpers as _helpers -from ._intent import IntentRecord, IntentStatus +from ._intent import IntentRecord, IntentScope, IntentStatus from ._patch_contract import ( VALID_PATCH_CONTRACT_MODES, VALID_STRICTNESS_PROFILES, @@ -180,26 +181,54 @@ def _patch_contract_verify( after_gate = self._gate_preview(record=after, budgets=budgets) structural_delta = self._structural_delta(compare_payload) regressions = _as_sequence(structural_delta.get("regressions")) + intent_regressions, external_regressions = self._partition_regressions( + after=after, + regressions=regressions, + intent=intent, + ) + worsened = self._worsened_symbols(before=before, after=after) + intent_worsened, external_worsened = self._partition_worsened( + worsened=worsened, + intent=intent, + ) + before_gate_fails = bool(before_gate["would_fail"]) + after_gate_fails = bool(after_gate["would_fail"]) + gate_worsened = not before_gate_fails and after_gate_fails + intent_caused_gate_failure = ( + after_gate_fails + if intent is None + else bool(intent_regressions or intent_worsened) + ) + gate_contract_failure = ( + after_gate_fails + if intent is None + else gate_worsened and intent_caused_gate_failure + ) + external_gate_failure = ( + intent is not None and gate_worsened and not intent_caused_gate_failure + ) baseline_abuse = detect_baseline_abuse( - before_gate_would_fail=bool(before_gate["would_fail"]), - after_gate_would_fail=bool(after_gate["would_fail"]), + before_gate_would_fail=before_gate_fails, + after_gate_would_fail=after_gate_fails, after_baseline_status=baseline_status(after.report_document), regressions=len(regressions), changed_files=len(actual_changed_files), intent_available=intent is not None, ) violations = self._contract_violations( - structural_delta=structural_delta, - gate_preview=after_gate, + intent_regressions=intent_regressions, + gate_contract_failure=gate_contract_failure, scope_check=scope_check, baseline_abuse=baseline_abuse, ) blocking_violations = () if strictness == "relaxed" else violations - status = ( - PatchContractStatus.VIOLATED.value - if blocking_violations - else PatchContractStatus.ACCEPTED.value - ) + external_context = bool(external_regressions or external_gate_failure) + if blocking_violations: + status = PatchContractStatus.VIOLATED.value + elif external_context: + status = PatchContractStatus.ACCEPTED_EXTERNAL.value + else: + status = PatchContractStatus.ACCEPTED.value payload: dict[str, object] = { "mode": "verify", "status": status, @@ -209,9 +238,16 @@ def _patch_contract_verify( "intent_id": intent.intent_id if intent is not None else None, "strictness": strictness, "structural_delta": structural_delta, - "worsened": self._worsened_symbols(before=before, after=after), + "intent_regressions": intent_regressions, + "external_regressions": external_regressions, + "worsened": worsened, + "intent_worsened": intent_worsened, + "external_worsened": external_worsened, "scope_check": scope_check, + "before_gate": before_gate, "gate_preview": after_gate, + "gate_worsened": gate_worsened, + "intent_caused_gate_failure": intent_caused_gate_failure, "baseline_abuse": baseline_abuse, "contract_violations": list(violations), "blocking_violations": list(blocking_violations), @@ -412,18 +448,129 @@ def _scope_check_payload( check_result = self._intent_check_result(intent=intent, actual=actual) return check_result.to_payload() + def _partition_regressions( + self, + *, + after: MCPRunRecord, + regressions: Sequence[object], + intent: IntentRecord | None, + ) -> tuple[list[dict[str, object]], list[dict[str, object]]]: + if intent is None: + return ( + [ + self._regression_card_with_paths(regression, paths=frozenset()) + for regression in regressions + ], + [], + ) + path_index = self._finding_path_index(after) + intent_regressions: list[dict[str, object]] = [] + external_regressions: list[dict[str, object]] = [] + for regression in regressions: + regression_map = _as_mapping(regression) + regression_id = str(regression_map.get("id", "")).strip() + paths = path_index.get(regression_id, frozenset()) + card = self._regression_card_with_paths(regression_map, paths=paths) + if self._paths_in_intent_scope(paths=paths, scope=intent.scope): + intent_regressions.append(card) + else: + external_regressions.append(card) + return intent_regressions, external_regressions + + def _finding_path_index( + self, + record: MCPRunRecord, + ) -> dict[str, frozenset[str]]: + index: dict[str, frozenset[str]] = {} + for finding in self._base_findings(record): + finding_id = str(finding.get("id", "")).strip() + if not finding_id: + continue + paths = self._finding_paths(finding) + index[finding_id] = paths + index[self._short_finding_id(record, finding_id)] = paths + return index + + def _finding_paths(self, finding: Mapping[str, object]) -> frozenset[str]: + paths: set[str] = set() + for key in ("locations", "items"): + for item in _as_sequence(finding.get(key)): + item_map = _as_mapping(item) + for path_key in ("file", "relative_path", "path", "filepath"): + path = self._normalized_report_path(item_map.get(path_key)) + if path: + paths.add(path) + for path_key in ("file", "relative_path", "path", "filepath"): + path = self._normalized_report_path(finding.get(path_key)) + if path: + paths.add(path) + return frozenset(sorted(paths)) + + def _regression_card_with_paths( + self, + regression: object, + *, + paths: frozenset[str], + ) -> dict[str, object]: + card = dict(_as_mapping(regression)) + card["paths"] = sorted(paths) + return card + + def _partition_worsened( + self, + *, + worsened: Sequence[Mapping[str, object]], + intent: IntentRecord | None, + ) -> tuple[list[dict[str, object]], list[dict[str, object]]]: + if intent is None: + return ([dict(item) for item in worsened], []) + intent_worsened: list[dict[str, object]] = [] + external_worsened: list[dict[str, object]] = [] + for item in worsened: + item_copy = dict(item) + path = self._normalized_report_path(item.get("path")) + if not path or self._path_in_scope(path=path, scope=intent.scope): + intent_worsened.append(item_copy) + else: + external_worsened.append(item_copy) + return intent_worsened, external_worsened + + def _paths_in_intent_scope( + self, + *, + paths: frozenset[str], + scope: IntentScope, + ) -> bool: + if not paths: + return True + return any(self._path_in_scope(path=path, scope=scope) for path in paths) + + def _path_in_scope(self, *, path: str, scope: IntentScope) -> bool: + patterns = (*scope.allowed_files, *scope.allowed_related) + return any( + path == pattern or fnmatchcase(path, pattern) for pattern in patterns + ) + + def _normalized_report_path(self, value: object) -> str: + path = str(value or "").replace("\\", "/").strip() + if path == ".": + return "" + if path.startswith("./"): + path = path[2:] + return path.rstrip("/") + def _contract_violations( self, *, - structural_delta: Mapping[str, object], - gate_preview: Mapping[str, object], + intent_regressions: Sequence[object], + gate_contract_failure: bool, scope_check: Mapping[str, object] | None, baseline_abuse: Mapping[str, object], ) -> tuple[str, ...]: violations: list[str] = [] - if _as_sequence(structural_delta.get("regressions")): + if intent_regressions: violations.append("structural_regressions") - if bool(gate_preview.get("would_fail")): + if gate_contract_failure: violations.append("gate_failures") if ( scope_check is not None @@ -644,6 +791,8 @@ def _budget_message( def _verify_message(self, *, status: str, violations: Sequence[str]) -> str: if status == PatchContractStatus.ACCEPTED.value: return "Patch contract accepted." + if status == PatchContractStatus.ACCEPTED_EXTERNAL.value: + return "Patch contract accepted; external workspace changes detected." return "Patch contract violated: " + ", ".join(violations) diff --git a/codeclone/surfaces/mcp/_workspace_intents.py b/codeclone/surfaces/mcp/_workspace_intents.py index 18d6811b..38fc72e9 100644 --- a/codeclone/surfaces/mcp/_workspace_intents.py +++ b/codeclone/surfaces/mcp/_workspace_intents.py @@ -13,6 +13,7 @@ from dataclasses import dataclass, replace from datetime import datetime, timedelta, timezone from enum import Enum +from fnmatch import fnmatchcase from pathlib import Path from typing import Final @@ -558,8 +559,41 @@ def detect_conflicts( own_pid: int, own_start_epoch: int, ) -> list[dict[str, object]]: - new_allowed, new_related = _scope_file_sets(new_scope) + conflicts, _relations = _detect_scope_state( + new_scope=new_scope, + existing=existing, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + ) + return conflicts + + +def detect_workspace_relations( + *, + new_scope: Mapping[str, object], + existing: Sequence[WorkspaceIntentRecord], + own_pid: int, + own_start_epoch: int, +) -> list[dict[str, object]]: + _conflicts, relations = _detect_scope_state( + new_scope=new_scope, + existing=existing, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + ) + return relations + + +def _detect_scope_state( + *, + new_scope: Mapping[str, object], + existing: Sequence[WorkspaceIntentRecord], + own_pid: int, + own_start_epoch: int, +) -> tuple[list[dict[str, object]], list[dict[str, object]]]: + new_allowed, new_related, new_forbidden = _scope_all_sets(new_scope) conflicts: list[dict[str, object]] = [] + relations: list[dict[str, object]] = [] now = utc_now() for record in existing: ownership = classify_intent_ownership( @@ -570,7 +604,9 @@ def detect_conflicts( ) if ownership not in _CONFLICT_OWNERSHIP: continue - existing_allowed, existing_related = _scope_file_sets(record.scope) + existing_allowed, existing_related, existing_forbidden = _scope_all_sets( + record.scope + ) hard_overlap = tuple(sorted(new_allowed.intersection(existing_allowed))) soft_overlap = tuple( sorted( @@ -580,35 +616,138 @@ def detect_conflicts( ) ) if hard_overlap or soft_overlap: - conflicts.append( + conflict = _edit_overlap_payload( + record=record, + ownership=ownership, + hard_overlap=hard_overlap, + soft_overlap=soft_overlap, + ) + conflicts.append(conflict) + relations.append( { - "intent_id": record.intent_id, - "agent_pid": record.agent_pid, - "agent_start_epoch": record.agent_start_epoch, - "agent_label": record.agent_label, - "intent": record.intent, - "ownership": ownership.value, - "severity": _CONFLICT_SEVERITY[ownership], - "recommended_action": _CONFLICT_ACTION[ownership], - "overlap_type": _overlap_type( - hard=bool(hard_overlap), - soft=bool(soft_overlap), - ), - "hard_overlap": list(hard_overlap), - "soft_overlap": list(soft_overlap), - "declared_at_utc": record.declared_at_utc, - "expires_at_utc": record.expires_at_utc, + **conflict, + "relation": "edit_overlap", + "message": "Foreign agent has overlapping editable scope.", } ) - return sorted( - conflicts, - key=lambda item: ( - str(item["severity"]), - str(item["overlap_type"]), - str(item["agent_label"]), - _sort_agent_pid(item.get("agent_pid")), - str(item["intent_id"]), + continue + foreign_excludes = _forbidden_matches( + files=new_allowed, + patterns=existing_forbidden, + ) + if foreign_excludes: + relations.append( + _forbidden_relation_payload( + record=record, + ownership=ownership, + relation="foreign_excludes_target", + matching_patterns=foreign_excludes, + message=( + "Foreign agent explicitly excludes files in current scope." + ), + ) + ) + continue + target_excludes = _forbidden_matches( + files=existing_allowed, + patterns=new_forbidden, + ) + if target_excludes: + relations.append( + _forbidden_relation_payload( + record=record, + ownership=ownership, + relation="target_excludes_foreign", + matching_patterns=target_excludes, + message=( + "Current scope explicitly excludes files in foreign scope." + ), + ) + ) + return ( + sorted(conflicts, key=_scope_state_sort_key), + sorted(relations, key=_scope_state_sort_key), + ) + + +def _edit_overlap_payload( + *, + record: WorkspaceIntentRecord, + ownership: IntentOwnership, + hard_overlap: Sequence[str], + soft_overlap: Sequence[str], +) -> dict[str, object]: + return { + "intent_id": record.intent_id, + "agent_pid": record.agent_pid, + "agent_start_epoch": record.agent_start_epoch, + "agent_label": record.agent_label, + "intent": record.intent, + "ownership": ownership.value, + "severity": _CONFLICT_SEVERITY[ownership], + "recommended_action": _CONFLICT_ACTION[ownership], + "overlap_type": _overlap_type( + hard=bool(hard_overlap), + soft=bool(soft_overlap), ), + "hard_overlap": list(hard_overlap), + "soft_overlap": list(soft_overlap), + "declared_at_utc": record.declared_at_utc, + "expires_at_utc": record.expires_at_utc, + } + + +def _forbidden_relation_payload( + *, + record: WorkspaceIntentRecord, + ownership: IntentOwnership, + relation: str, + matching_patterns: Sequence[str], + message: str, +) -> dict[str, object]: + return { + "intent_id": record.intent_id, + "agent_pid": record.agent_pid, + "agent_start_epoch": record.agent_start_epoch, + "agent_label": record.agent_label, + "intent": record.intent, + "ownership": ownership.value, + "relation": relation, + "severity": "info", + "matching_patterns": list(matching_patterns), + "message": message, + "declared_at_utc": record.declared_at_utc, + "expires_at_utc": record.expires_at_utc, + } + + +def _scope_state_sort_key( + item: Mapping[str, object], +) -> tuple[str, str, str, str, int, str]: + return ( + str(item.get("severity", "")), + str(item.get("relation", "")), + str(item.get("overlap_type", "")), + str(item.get("agent_label", "")), + _sort_agent_pid(item.get("agent_pid")), + str(item.get("intent_id", "")), + ) + + +def _forbidden_matches( + *, + files: set[str], + patterns: tuple[str, ...], +) -> tuple[str, ...]: + return tuple( + sorted( + { + pattern + for pattern in patterns + for path in files + if fnmatchcase(path, pattern) + } + ) ) @@ -900,12 +1039,17 @@ def _valid_path_list(value: object, *, required: bool) -> list[str] | None: return deduped -def _scope_file_sets(scope: Mapping[str, object]) -> tuple[set[str], set[str]]: +def _scope_all_sets( + scope: Mapping[str, object], +) -> tuple[set[str], set[str], tuple[str, ...]]: allowed = set(_valid_path_list(scope.get("allowed_files"), required=False) or []) related = set( _valid_path_list(scope.get("allowed_related", ()), required=False) or [] ) - return allowed, related + forbidden = tuple( + _valid_path_list(scope.get("forbidden", ()), required=False) or [] + ) + return allowed, related, forbidden def _parse_utc(value: str) -> datetime | None: @@ -944,6 +1088,7 @@ def _overlap_type(*, hard: bool, soft: bool) -> str: "compute_intent_digest", "compute_scope_digest", "detect_conflicts", + "detect_workspace_relations", "expires_at", "find_workspace_intent", "format_utc", diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index 1b016cb4..c4322bc5 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -121,7 +121,7 @@ drill into one finding or one hotspot family. | Tool | Key parameters | Purpose | |--------------------------|-------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------| -| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, list_workspace, gc_workspace, recover, reset_workspace | +| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, list_workspace, gc_workspace, recover, reset_workspace. Declare returns `workspace_relations` with forbidden-scope signals | | `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: dependents, clone cohorts, do-not-touch, review context | | `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Budget query or post-edit verification | | `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Deterministic audit artifact: provenance, scope, reviewed findings, patch status | @@ -141,6 +141,12 @@ drill into one finding or one hotspot family. reports baseline-abuse signals. Missing runs return `status="unverified"`. + When a change intent is active, verify mode attributes regressions and + gate changes to the declared scope. Intent-scope regressions produce + contract violations; external regressions are reported as informational + context. See + [Scope-Aware Patch Contract Verification](24-structural-change-controller.md#scope-aware-patch-contract-verification). + ### Session-local tools | Tool | Key parameters | Purpose | diff --git a/docs/book/24-structural-change-controller.md b/docs/book/24-structural-change-controller.md index c624942f..cba712fe 100644 --- a/docs/book/24-structural-change-controller.md +++ b/docs/book/24-structural-change-controller.md @@ -11,15 +11,18 @@ The v2.1 alpha currently includes intent, blast-radius, patch-contract checks, review receipts, workspace intent visibility, claim guard, and CLI controller queries: -| Phase | Status | Surface | -|---------------------------|-------------------|--------------------------------------------------| -| Intent declaration | Live in `2.1.0a1` | MCP `manage_change_intent` | -| Blast radius | Live in `2.1.0a1` | MCP `get_blast_radius`, CLI `--blast-radius` | -| Patch contract | Live in `2.1.0a1` | MCP `check_patch_contract`, CLI `--patch-verify` | -| Review receipt | Live in `2.1.0a1` | MCP `create_review_receipt` | -| Workspace intent registry | Live in `2.1.0a1` | MCP `manage_change_intent` | -| Lease and recovery | Live in `2.1.0a1` | MCP `manage_change_intent` | -| Claim guard | Live in `2.1.0a1` | MCP `validate_review_claims` | +| Phase | Status | Surface | +|-----------------------------|-------------------|--------------------------------------------------| +| Intent declaration | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Blast radius | Live in `2.1.0a1` | MCP `get_blast_radius`, CLI `--blast-radius` | +| Patch contract | Live in `2.1.0a1` | MCP `check_patch_contract`, CLI `--patch-verify` | +| Review receipt | Live in `2.1.0a1` | MCP `create_review_receipt` | +| Workspace intent registry | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Lease and recovery | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Claim guard | Live in `2.1.0a1` | MCP `validate_review_claims` | +| Scope-aware verification | Planned | MCP `check_patch_contract` | +| Workspace relations | Planned | MCP `manage_change_intent` | +| MCP payload token budget | Planned | Audit trail, CLI `--audit`, `--session-stats` | ## Contract @@ -197,3 +200,165 @@ The guard checks for deterministic overclaims: Warnings, such as missing or unknown citations, do not make the response invalid. Violations make `valid=false`. + +## Scope-Aware Patch Contract Verification + +When a change intent is active, `check_patch_contract(mode="verify")` attributes +regressions and gate changes to the declared scope rather than treating the +entire workspace as one undifferentiated surface. + +### Regression attribution + +Regressions from `compare_runs` are partitioned into two sets: + +- `intent_regressions` — findings whose file paths fall inside the declared + `allowed_files` or `allowed_related`. +- `external_regressions` — findings whose file paths are entirely outside + the declared scope. + +Only `intent_regressions` produce `structural_regressions` contract violations. +External regressions are reported as informational context without failing the +contract. + +Findings with no extractable file paths are conservatively classified as +intent-scope to avoid false-negative accepts. + +Without an active intent, all regressions are treated as intent-scope and +behavior is unchanged from the base contract. + +### Gate-delta logic + +Gate evaluation uses a two-layer attribution model: + +1. **Gate delta** — only gate *changes* between before-run and after-run are + contract-relevant. A gate that was already failing before the edit is + pre-existing, not a new violation. `gate_worsened` is true only when + `before_gate.would_fail` is false and `after_gate.would_fail` is true. + +2. **Gate attribution** — when `gate_worsened` is true and an intent is active, + the contract checks whether the gate-triggering signals come from intent + scope: intent-scope regressions or intent-scope worsened metric symbols. If + neither exists, the gate failure is external and does not produce a contract + violation. + +### Status values + +| Status | Meaning | +|---------------------------------|--------------------------------------------------------| +| `accepted` | No intent-scope regressions, no gate worsening | +| `accepted_with_external_changes`| Intent scope is clean but external signals exist | +| `violated` | Intent-scope regressions, intent-caused gate failure, or scope violation | +| `unverified` | Missing before or after run | +| `expired` | Report digest mismatch since declaration | + +The `accepted_with_external_changes` status signals that another agent or +concurrent edit introduced regressions outside the current intent scope. The +verify response includes `intent_regressions`, `external_regressions`, +`intent_worsened`, `external_worsened`, `gate_worsened`, and `before_gate` +fields for full attribution visibility. + +??? info "Decision table" + + | Intent | Intent regressions | External regressions | Gate worsened | Intent caused gate | Scope check | Status | + |--------|--------------------|-----------------------|---------------|--------------------|-------------|----------------------------------| + | no | any | — | any | any | — | current logic unchanged | + | yes | > 0 | any | any | any | any | `violated` | + | yes | 0 | any | yes | yes | clean | `violated` | + | yes | 0 | any | yes | no | clean | `accepted_with_external_changes` | + | yes | 0 | > 0 | no | — | clean | `accepted_with_external_changes` | + | yes | 0 | 0 | no | — | clean | `accepted` | + | yes | 0 | any | any | any | violated | `violated` (scope violation) | + +### Baseline abuse + +`detect_baseline_abuse` stays workspace-global. Baseline hygiene is a +repository-level signal: if the baseline was updated while any regressions exist +(even external), that is suspicious regardless of whose regressions they are. + +## Workspace Relations + +`detect_conflicts` classifies the relationship between a new intent and existing +workspace intents. Beyond edit-overlap detection (hard and soft conflicts), +the classifier distinguishes forbidden-scope relationships: + +| Relation | Meaning | +|---------------------------|-------------------------------------------------------| +| `edit_overlap` | Both agents claim the same files (hard or soft) | +| `foreign_excludes_target` | Foreign `forbidden` matches current `allowed_files` | +| `target_excludes_foreign` | Current `forbidden` matches foreign `allowed_files` | + +Absence of a relation entry means disjoint scope. + +The `declare` response includes a `workspace_relations` field alongside the +existing `concurrent_intents`. `concurrent_intents` continues to contain only +edit overlaps for backward compatibility; `workspace_relations` provides the +full classification including forbidden-scope signals. + +This allows agents to distinguish three cases that were previously +indistinguishable: + +1. No overlap at all (disjoint). +2. No edit overlap, but the foreign agent explicitly excludes the current + agent's target files (`foreign_excludes_target`) — a positive coordination + signal. +3. No edit overlap, but the current agent explicitly excludes the foreign + agent's target files (`target_excludes_foreign`). + +## MCP Payload Token Budget + +The optional controller audit trail can estimate the token footprint of MCP +payloads returned to the agent. This is a deterministic estimate of how much +context window each tool response consumes, not actual model billing tokens. + +### Setup + +Token estimation requires two conditions: + +1. Audit trail enabled (`audit_enabled = true` in `pyproject.toml`). +2. The `codeclone[token-bench]` optional extra installed (provides `tiktoken`). + +Without `tiktoken`, the estimator falls back to a character-based approximation +(`ceil(characters / 4)`). Without audit enabled, no estimation runs. + +### How it works + +The estimation runs inside the audit writer's `event_to_row`, not in the MCP +tool call path. The MCP session has zero overhead when audit is disabled or +when `tiktoken` is not installed. + +Each audit event row includes three optional fields: + +- `estimated_tokens` — BPE token count (or character-based approximation). +- `token_encoding` — encoding name (`o200k_base` or `chars_approx`). +- `payload_characters` — character count of the canonical JSON payload. + +The estimation input is the full original payload (what the MCP client +receives), not the compact audit storage form. + +### CLI visibility + +The `--audit` Rich TUI renderer shows token columns when data is available: + +``` +Tokens Encoding Event + 412 o200k_base intent.declared + 890 o200k_base blast_radius.computed + 1204 o200k_base patch_contract.verified +``` + +The `--session-stats` command appends a summary line when audit token data +exists: + +``` +MCP payload footprint: ~3,816 tokens (o200k_base, 7 tool calls) +``` + +### Invariants + +- Token estimation never affects controller decisions, gate results, report + digests, or baseline trust. +- Any exception in the estimation path results in `NULL` values, not a failed + audit event write. +- The `codeclone/budget/` module never imports from `codeclone/surfaces/` or + `codeclone/audit/`. Dependency direction: `audit -> budget`, never reverse. +- Base `codeclone` never depends on `tiktoken`. The import is lazy and guarded. diff --git a/docs/book/appendix/a-status-enums.md b/docs/book/appendix/a-status-enums.md index 7f6800ce..6326d0a0 100644 --- a/docs/book/appendix/a-status-enums.md +++ b/docs/book/appendix/a-status-enums.md @@ -10,6 +10,7 @@ Centralize machine-readable status sets used across baseline/cache/report/CLI co - Cache statuses: `codeclone/cache/versioning.py:CacheStatus` - Exit categories: `codeclone/contracts/__init__.py:ExitCode` - Intent ownership: `codeclone/surfaces/mcp/_workspace_intents.py:IntentOwnership` +- Patch contract: `codeclone/surfaces/mcp/_patch_contract.py:PatchContractStatus` ## Data model @@ -65,6 +66,17 @@ Defined by `BASELINE_UNTRUSTED_STATUSES`. Semantics are defined in [Structural Change Controller § Workspace Intent Registry](../24-structural-change-controller.md#workspace-intent-registry). +### PatchContractStatus + +- `accepted` +- `accepted_with_external_changes` +- `violated` +- `unverified` +- `expired` + +Semantics are defined in +[Structural Change Controller § Scope-Aware Patch Contract Verification](../24-structural-change-controller.md#scope-aware-patch-contract-verification). + ## Contracts - Status values are serialized into report metadata. diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index b77f9e5c..06cca298 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -384,6 +384,8 @@ def _patch_contract_report_document( include_regression: bool, complexity: int, baseline_status: str = "ok", + regression_path: str = "pkg/a.py", + complexity_path: str = "pkg/b.py", ) -> dict[str, object]: report_document = copy.deepcopy(_blast_radius_report_document(digest)) report_document["meta"] = { @@ -402,6 +404,8 @@ def _patch_contract_report_document( ) if not include_regression: del functions[1:] + else: + functions[1]["items"] = [{"relative_path": regression_path}] metrics = cast("dict[str, object]", report_document["metrics"]) families = cast("dict[str, object]", metrics["families"]) complexity_family = cast("dict[str, object]", families["complexity"]) @@ -409,6 +413,8 @@ def _patch_contract_report_document( "list[dict[str, object]]", complexity_family["items"], ) + complexity_family["summary"] = {"max": complexity} + complexity_items[0]["relative_path"] = complexity_path complexity_items[0]["qualname"] = "pkg.b.handle" complexity_items[0]["cyclomatic_complexity"] = complexity return report_document @@ -425,6 +431,8 @@ def _patch_contract_run_record( baseline_status: str = "ok", request: MCPAnalysisRequest | None = None, new_func: frozenset[str] = frozenset(), + regression_path: str = "pkg/a.py", + complexity_path: str = "pkg/b.py", ) -> MCPRunRecord: return replace( _dummy_run_record(root, run_id), @@ -434,6 +442,8 @@ def _patch_contract_run_record( include_regression=include_regression, complexity=complexity, baseline_status=baseline_status, + regression_path=regression_path, + complexity_path=complexity_path, ), summary={"run_id": run_id, "health": {"score": health, "grade": "B"}}, func_clones_count=2 if include_regression else 1, @@ -446,6 +456,7 @@ def _patch_contract_before_after_records( root: Path, *, before_health: int, + regression_path: str = "pkg/a.py", ) -> tuple[MCPRunRecord, MCPRunRecord]: before = _patch_contract_run_record( root, @@ -454,6 +465,7 @@ def _patch_contract_before_after_records( include_regression=False, complexity=6, health=before_health, + regression_path=regression_path, ) after = _patch_contract_run_record( root, @@ -464,6 +476,7 @@ def _patch_contract_before_after_records( health=70, baseline_status="updated", new_func=frozenset({"clone:function:g2"}), + regression_path=regression_path, ) return before, after @@ -503,6 +516,62 @@ def _seed_patch_contract_intent( return after, _declare_pkg_a_intent(service) +def _verify_scope_contract_case( + root: Path, + *, + before_run_id: str, + after_run_id: str, + include_regression: bool, + regression_path: str = "pkg/a.py", + before_complexity: int = 6, + after_complexity: int = 6, + complexity_path: str = "pkg/b.py", + new_func: frozenset[str] = frozenset(), + request: MCPAnalysisRequest | None = None, + declare_intent: bool = True, +) -> dict[str, object]: + service = CodeCloneMCPService(history_limit=4) + before = _patch_contract_run_record( + root, + run_id=before_run_id, + digest=f"{before_run_id}-digest", + include_regression=False, + complexity=before_complexity, + health=90, + request=request, + ) + after = _patch_contract_run_record( + root, + run_id=after_run_id, + digest=f"{after_run_id}-digest", + include_regression=include_regression, + complexity=after_complexity, + health=90, + request=request, + new_func=new_func, + regression_path=regression_path, + complexity_path=complexity_path, + ) + service._runs.register(before) + service._runs.register(after) + intent_id: str | None = None + if declare_intent: + declared = service.manage_change_intent( + action="declare", + run_id=before_run_id[:8], + scope={"allowed_files": ["pkg/a.py"]}, + intent="edit pkg.a", + ) + intent_id = str(declared["intent_id"]) + return service.check_patch_contract( + mode="verify", + before_run_id=before_run_id[:8], + after_run_id=after_run_id[:8], + intent_id=intent_id, + changed_files=["pkg/a.py"] if declare_intent else None, + ) + + def _payload_dicts( payload: Mapping[str, object], keys: tuple[str, ...], @@ -3557,6 +3626,7 @@ def test_mcp_service_check_patch_contract_budget_uses_intent_and_gate_preview( scope={"allowed_files": ["pkg/a.py"]}, intent="adjust pkg.a behavior", ) + assert declared["workspace_relations"] == [] payload = service.check_patch_contract( mode="budget", @@ -3715,6 +3785,119 @@ def test_mcp_service_check_patch_contract_verify_composes_existing_primitives( assert payload["reason"] == reason +def test_mcp_service_verify_external_regression_is_context( + tmp_path: Path, +) -> None: + verified = _verify_scope_contract_case( + tmp_path, + before_run_id="beforeext123456", + after_run_id="afterext1234567", + include_regression=True, + regression_path="pkg/b.py", + ) + + assert verified["status"] == "accepted_with_external_changes" + assert verified["contract_violations"] == [] + assert verified["blocking_violations"] == [] + assert verified["intent_regressions"] == [] + external = cast("list[dict[str, object]]", verified["external_regressions"]) + assert external == [ + { + "id": "fn:g2", + "kind": "function_clone", + "severity": "", + "paths": ["pkg/b.py"], + } + ] + assert cast("dict[str, object]", verified["structural_delta"])["regressions"] == [ + {"id": "fn:g2", "kind": "function_clone", "severity": ""} + ] + + +def test_mcp_service_verify_intent_regression_still_violates( + tmp_path: Path, +) -> None: + verified = _verify_scope_contract_case( + tmp_path, + before_run_id="beforeint123456", + after_run_id="afterint1234567", + include_regression=True, + regression_path="pkg/a.py", + ) + + assert verified["status"] == "violated" + assert verified["contract_violations"] == ["structural_regressions"] + assert verified["external_regressions"] == [] + intent_regressions = cast("list[dict[str, object]]", verified["intent_regressions"]) + assert intent_regressions[0]["paths"] == ["pkg/a.py"] + + +def test_mcp_service_verify_external_gate_worsening_is_context( + tmp_path: Path, +) -> None: + verified = _verify_scope_contract_case( + tmp_path, + before_run_id="beforegate12345", + after_run_id="aftergate123456", + include_regression=True, + regression_path="pkg/b.py", + new_func=frozenset({"clone:function:g2"}), + ) + + assert verified["status"] == "accepted_with_external_changes" + assert verified["gate_worsened"] is True + assert verified["intent_caused_gate_failure"] is False + assert verified["contract_violations"] == [] + + +def test_mcp_service_verify_external_worsened_symbol_is_context( + tmp_path: Path, +) -> None: + request = MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + complexity_threshold=10, + ) + verified = _verify_scope_contract_case( + tmp_path, + before_run_id="beforemetric123", + after_run_id="aftermetric1234", + include_regression=False, + before_complexity=6, + after_complexity=25, + request=request, + complexity_path="pkg/b.py", + ) + + assert verified["status"] == "accepted_with_external_changes" + assert verified["gate_worsened"] is True + assert verified["intent_caused_gate_failure"] is False + assert verified["intent_worsened"] == [] + external_worsened = cast("list[dict[str, object]]", verified["external_worsened"]) + assert external_worsened[0]["path"] == "pkg/b.py" + assert verified["contract_violations"] == [] + + +def test_mcp_service_verify_without_intent_keeps_workspace_global_gate_behavior( + tmp_path: Path, +) -> None: + verified = _verify_scope_contract_case( + tmp_path, + before_run_id="beforenointent1", + after_run_id="afternointent12", + include_regression=True, + regression_path="pkg/b.py", + new_func=frozenset({"clone:function:g2"}), + declare_intent=False, + ) + + assert verified["status"] == "violated" + assert verified["contract_violations"] == [ + "structural_regressions", + "gate_failures", + ] + + def test_mcp_patch_contract_helper_edges( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, @@ -3751,8 +3934,8 @@ def fake_git_diff_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ... changed_files=None, ) == (f"{tmp_path.name}:HEAD~1",) assert service._contract_violations( - structural_delta={"regressions": [{"id": "r1"}]}, - gate_preview={"would_fail": True}, + intent_regressions=[{"id": "r1"}], + gate_contract_failure=True, scope_check={"status": "violated"}, baseline_abuse={"triggers": ["baseline_updated_without_intent"]}, ) == ( diff --git a/tests/test_workspace_intents.py b/tests/test_workspace_intents.py index 5a11119e..aeb415b6 100644 --- a/tests/test_workspace_intents.py +++ b/tests/test_workspace_intents.py @@ -502,9 +502,10 @@ def test_workspace_intent_private_edge_helpers( assert workspace_intents._valid_path_list(["pkg/a.py/"], required=True) == [ "pkg/a.py" ] - assert workspace_intents._scope_file_sets({"allowed_files": "pkg/a.py"}) == ( + assert workspace_intents._scope_all_sets({"allowed_files": "pkg/a.py"}) == ( set(), set(), + (), ) assert workspace_intents._parse_utc("2026-01-01T00:00:00") is None assert workspace_intents._sort_agent_pid(True) == 0 @@ -918,6 +919,142 @@ def test_workspace_intent_conflict_detection() -> None: assert both[0]["overlap_type"] == "both" +def test_workspace_intent_workspace_relations_forbidden_patterns( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(workspace_intents, "_is_pid_alive", lambda pid: True) + foreign = _record( + intent_id="intent-foreign-docs", + pid=111, + start_epoch=100, + scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": ["docs/**"], + }, + ) + + relations = workspace_intents.detect_workspace_relations( + new_scope={ + "allowed_files": ["docs/readme.md"], + "allowed_related": [], + "forbidden": [], + }, + existing=(foreign,), + own_pid=222, + own_start_epoch=200, + ) + + assert ( + workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["docs/readme.md"], + "allowed_related": [], + "forbidden": [], + }, + existing=(foreign,), + own_pid=222, + own_start_epoch=200, + ) + == [] + ) + assert relations == [ + { + "intent_id": "intent-foreign-docs", + "agent_pid": 111, + "agent_start_epoch": 100, + "agent_label": "agent-a", + "intent": "edit pkg.a", + "ownership": "foreign_active", + "relation": "foreign_excludes_target", + "severity": "info", + "matching_patterns": ["docs/**"], + "message": "Foreign agent explicitly excludes files in current scope.", + "declared_at_utc": foreign.declared_at_utc, + "expires_at_utc": foreign.expires_at_utc, + } + ] + + +def test_workspace_intent_workspace_relations_target_excludes_foreign( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(workspace_intents, "_is_pid_alive", lambda pid: True) + foreign = _record( + intent_id="intent-foreign-docs", + pid=111, + start_epoch=100, + scope={ + "allowed_files": ["docs/readme.md"], + "allowed_related": [], + "forbidden": [], + }, + ) + + relations = workspace_intents.detect_workspace_relations( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": ["docs/**"], + }, + existing=(foreign,), + own_pid=222, + own_start_epoch=200, + ) + + assert ( + workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": ["docs/**"], + }, + existing=(foreign,), + own_pid=222, + own_start_epoch=200, + ) + == [] + ) + assert relations[0]["relation"] == "target_excludes_foreign" + assert relations[0]["matching_patterns"] == ["docs/**"] + + +def test_workspace_intent_workspace_relations_include_edit_overlap() -> None: + existing = _record() + + relations = workspace_intents.detect_workspace_relations( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(existing,), + own_pid=123456, + own_start_epoch=999, + ) + + assert relations[0]["relation"] == "edit_overlap" + assert relations[0]["hard_overlap"] == ["pkg/a.py"] + + +def test_workspace_intent_workspace_relations_omit_disjoint_scope() -> None: + existing = _record() + + assert ( + workspace_intents.detect_workspace_relations( + new_scope={ + "allowed_files": ["pkg/other.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(existing,), + own_pid=123456, + own_start_epoch=999, + ) + == [] + ) + + def test_workspace_intent_regression_stale_lease_silent_overlap( monkeypatch: pytest.MonkeyPatch, ) -> None: From 9356f64a56732c92e38fe1762c2d13356a1438db Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 11:55:45 +0500 Subject: [PATCH 031/318] chore(packaging): add token bench extra --- docs/book/24-structural-change-controller.md | 4 +- pyproject.toml | 3 + uv.lock | 190 ++++++++++++++++++- 3 files changed, 193 insertions(+), 4 deletions(-) diff --git a/docs/book/24-structural-change-controller.md b/docs/book/24-structural-change-controller.md index cba712fe..f80ea258 100644 --- a/docs/book/24-structural-change-controller.md +++ b/docs/book/24-structural-change-controller.md @@ -20,8 +20,8 @@ queries: | Workspace intent registry | Live in `2.1.0a1` | MCP `manage_change_intent` | | Lease and recovery | Live in `2.1.0a1` | MCP `manage_change_intent` | | Claim guard | Live in `2.1.0a1` | MCP `validate_review_claims` | -| Scope-aware verification | Planned | MCP `check_patch_contract` | -| Workspace relations | Planned | MCP `manage_change_intent` | +| Scope-aware verification | Live in `2.1.0a1` | MCP `check_patch_contract` | +| Workspace relations | Live in `2.1.0a1` | MCP `manage_change_intent` | | MCP payload token budget | Planned | Audit trail, CLI `--audit`, `--session-stats` | ## Contract diff --git a/pyproject.toml b/pyproject.toml index 072604aa..3f971a38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,9 @@ mcp = [ "mcp>=1.27.0,<2", "httpx>=0.27.1,<1", ] +token-bench = [ + "tiktoken>=0.8", +] dev = [ "pytest>=9.0.3", "pytest-cov>=7.1.0", diff --git a/uv.lock b/uv.lock index 88e110c1..0ef5e65f 100644 --- a/uv.lock +++ b/uv.lock @@ -346,6 +346,9 @@ mcp = [ { name = "httpx" }, { name = "mcp" }, ] +token-bench = [ + { name = "tiktoken" }, +] [package.metadata] requires-dist = [ @@ -361,10 +364,11 @@ requires-dist = [ { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=7.1.0" }, { name = "rich", specifier = ">=15.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.14" }, + { name = "tiktoken", marker = "extra == 'token-bench'", specifier = ">=0.8" }, { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.1" }, { name = "twine", marker = "extra == 'dev'", specifier = ">=6.2.0" }, ] -provides-extras = ["mcp", "dev"] +provides-extras = ["mcp", "token-bench", "dev"] [[package]] name = "colorama" @@ -673,7 +677,7 @@ name = "importlib-metadata" version = "9.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "zipp" }, + { name = "zipp", marker = "python_full_version < '3.15'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a9/01/15bb152d77b21318514a96f43af312635eb2500c96b55398d020c93d86ea/importlib_metadata-9.0.0.tar.gz", hash = "sha256:a4f57ab599e6a2e3016d7595cfd72eb4661a5106e787a95bcc90c7105b831efc", size = 56405, upload-time = "2026-03-20T06:42:56.999Z" } wheels = [ @@ -1538,6 +1542,127 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, ] +[[package]] +name = "regex" +version = "2026.5.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/0e/49aee608ad09480e7fd276898c99ec6192985fa331abe4eb3a986094490b/regex-2026.5.9.tar.gz", hash = "sha256:a8234aa23ec39894bfe4a3f1b85616a7032481964a13ac6fc9f10de4f6fca270", size = 416074, upload-time = "2026-05-09T23:15:19.37Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/ed/0ad2c8edf634918eb4484365d3819fa7bd7f58daf807fe7fb21812c316e5/regex-2026.5.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a9e1328e17c84c1a5d22ec9f785ecef4a967fab9a42b6a8dc3bcbebd0a0c9e44", size = 489438, upload-time = "2026-05-09T23:11:29.374Z" }, + { url = "https://files.pythonhosted.org/packages/89/a9/4ed972ad263963b860b7c3e86e0e1bcc791def47b43b8c8efe57e710f139/regex-2026.5.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfe1ce50cbfb569d74e1e4337da6468961f31dbea55fd85aa5de59c0947a805a", size = 291270, upload-time = "2026-05-09T23:11:33.254Z" }, + { url = "https://files.pythonhosted.org/packages/16/81/075930d9fa28c4ea1f53398dd015ee7c882f623539759113cda1257f4b82/regex-2026.5.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15ee42209947f4ca045412eae98416317238163618ace2a8e54f99586a466733", size = 289198, upload-time = "2026-05-09T23:11:35.769Z" }, + { url = "https://files.pythonhosted.org/packages/d4/c8/5cdfbf0b5dc6599e1b6131eff43262e5275d4ec3469ce10216061659aadb/regex-2026.5.9-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4bb445ff3f725f59df8f6014edb547ee928ec7023a774f6a39a3f953038cbb2", size = 784765, upload-time = "2026-05-09T23:11:37.689Z" }, + { url = "https://files.pythonhosted.org/packages/cd/ca/ae5fd6edc59b7f84b904b31d6ec39a860cbcecd10f64bd5a062ca83a4864/regex-2026.5.9-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:446ddd671e43ab535810c4b21cff7104945c701d4a14d1e6d1cd6f4e445a8bea", size = 852115, upload-time = "2026-05-09T23:11:39.973Z" }, + { url = "https://files.pythonhosted.org/packages/f6/ce/a91cf555afb51f3b74a182e24ba073b91ea7bb64592fc4b315c111bb19fd/regex-2026.5.9-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7b92817338591505f282cf3864c145244b1edcf5381d237038df955001091538", size = 899503, upload-time = "2026-05-09T23:11:42.48Z" }, + { url = "https://files.pythonhosted.org/packages/55/7f/725a0a2b245a4cf0c4bab29d0e97c74285d94136a65d1b55a6459a583502/regex-2026.5.9-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6b8a143aca6c39b446ea8092cde25cc8fe9304d4f5fecfbc1a9dbb0282703c2", size = 794093, upload-time = "2026-05-09T23:11:44.681Z" }, + { url = "https://files.pythonhosted.org/packages/e3/2a/996efbd59ce6b5d4a09e3af6180ceb62af171f4a9a6fb557d2f0ae0d462b/regex-2026.5.9-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0f03aa6898aaaac4592479821df16e68e8d0e29e903e65d8f2dfb2f19028a989", size = 786234, upload-time = "2026-05-09T23:11:46.882Z" }, + { url = "https://files.pythonhosted.org/packages/4b/0a/8731e8b8806174c9cdd5903f80a14990331c1f42fc4209b540952e9e010d/regex-2026.5.9-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ed457d8e98ae812ed7732bef7bf78de78e834eae0372a74e23ca90ef21d910f9", size = 769895, upload-time = "2026-05-09T23:11:49.324Z" }, + { url = "https://files.pythonhosted.org/packages/9a/0b/932473194bd563f342a412ae2ffbbd6da608306a2bc4e99249a41c2b0b92/regex-2026.5.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:71b61c5bfe1c806332defc42ad6c780b3c55f661986d7f40283a3a88274b4c00", size = 774991, upload-time = "2026-05-09T23:11:51.261Z" }, + { url = "https://files.pythonhosted.org/packages/98/80/9523d196010031df25f7177ee0a467efbee436324038e5d99def17a57515/regex-2026.5.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3b1e39888c5e0c7d92cea4fc777396c4a90363b05de75d02eb459a4752200808", size = 848790, upload-time = "2026-05-09T23:11:53.232Z" }, + { url = "https://files.pythonhosted.org/packages/3c/07/56987b35e89edf47e4a38cf2845aeee476bfa688a6bdbd3e820cda461dc1/regex-2026.5.9-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:6ba42b2e7e7f46cf68cc6a5ca36fa07959f9bbd9c6bdcc47b6ee76549a590248", size = 757679, upload-time = "2026-05-09T23:11:55.82Z" }, + { url = "https://files.pythonhosted.org/packages/04/2a/ff713fff0c566507c06a4ce2dc0ae8e7eeebc88811a95fc81cf1e7d534dd/regex-2026.5.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:c010eb8caca74bdb40c07498d7ece26b4428fd3f04aa8a72c9ac6f79e8faaac6", size = 837116, upload-time = "2026-05-09T23:11:57.934Z" }, + { url = "https://files.pythonhosted.org/packages/77/90/df6d982b03e3614785c6937ba51b57f6733d97d2ee1c9bc7531dbfab3a54/regex-2026.5.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a6a563446a41adc451393dc6b8e6ad87979efaee3c8738690a8d1b08ebead1b4", size = 782081, upload-time = "2026-05-09T23:11:59.607Z" }, + { url = "https://files.pythonhosted.org/packages/c7/8a/4e88a5f7c3e98489aac4dd23142723d907b2a595b4a6abcbacabefeded09/regex-2026.5.9-cp310-cp310-win32.whl", hash = "sha256:954cc214c04663ee6d266fc61739cad83054683048de65c5bd1d640ad28098ac", size = 266247, upload-time = "2026-05-09T23:12:01.116Z" }, + { url = "https://files.pythonhosted.org/packages/6a/40/4b224cb0582b2dca1786726e6cdabe26abbf757d7f6718332f186da155d2/regex-2026.5.9-cp310-cp310-win_amd64.whl", hash = "sha256:b310768746dd314ea6e2ff4cc89ef215426813396ff4e94ee8e6f7096c8b6e03", size = 278416, upload-time = "2026-05-09T23:12:03.2Z" }, + { url = "https://files.pythonhosted.org/packages/12/4d/014fbe803204cab0947ee428f09f658a29632053dde1d3c6176bb4f0fd4c/regex-2026.5.9-cp310-cp310-win_arm64.whl", hash = "sha256:19c16ceb4a267a8789e25733e583983eeab9f0f8664e66b0bd1c5d21f14c2d4b", size = 270413, upload-time = "2026-05-09T23:12:04.649Z" }, + { url = "https://files.pythonhosted.org/packages/c2/dc/c1f2df4027e82fc54b5a473e4b250f5139faca49a0fbe29a48668d228f34/regex-2026.5.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ccf5249114cc3e772ecdd88a98a86eca0fd74c61ce32a94743758c083fc05d48", size = 489445, upload-time = "2026-05-09T23:12:06.111Z" }, + { url = "https://files.pythonhosted.org/packages/03/d2/59f01110660081cce9c0bc30ebd0b5ee250dacf658e3248ed92f01e0e8ee/regex-2026.5.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46f1326ca6e65b0879d23ca302c0f2415aad42ff0309b9c818e7949fe19a41d8", size = 291271, upload-time = "2026-05-09T23:12:07.731Z" }, + { url = "https://files.pythonhosted.org/packages/58/b6/14b2c84ff90ddb370c81d27503f4a0fcf071496416f4855f6cc8c5d81c35/regex-2026.5.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ef31cbfe458e21c6122ba8150ff060e0c7789ed0d26eb423f25472584920b555", size = 289212, upload-time = "2026-05-09T23:12:09.266Z" }, + { url = "https://files.pythonhosted.org/packages/03/d0/4db86529117320de0c84afd90e70bb47434625875e34fcef9d8c127c5b16/regex-2026.5.9-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:992604d02e6d9c6d786c24a706a71ecffe1020fc1ef264044474cd81fa2c3919", size = 792310, upload-time = "2026-05-09T23:12:11.416Z" }, + { url = "https://files.pythonhosted.org/packages/07/78/fe4800cd322f862ecffd2d553409b20d80650e5ed71b9d178f853d020b82/regex-2026.5.9-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c9411dd64ca95477225734a93dfc8583b51916b8d5942f99d6cac21e09965451", size = 861721, upload-time = "2026-05-09T23:12:13.681Z" }, + { url = "https://files.pythonhosted.org/packages/b5/d0/b3618a895dd8feb897c61bb2954edd265e1767d82a01d53065d5871127a3/regex-2026.5.9-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4a3ff360dfb836fecdb93a4598f9d6e2ac81e3e397125145c6221bf58cf4c", size = 906460, upload-time = "2026-05-09T23:12:15.443Z" }, + { url = "https://files.pythonhosted.org/packages/33/6f/1481597e859ef19508b345eec4afd1416ed6e6b459c75a64026ef193aecf/regex-2026.5.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a661a7d270a61f7cf460caee8b9fa2d5ef9e5c681234bcb9e0fe14f488e7dfc", size = 799843, upload-time = "2026-05-09T23:12:16.892Z" }, + { url = "https://files.pythonhosted.org/packages/73/59/955734c803f59108deccba3597ae440c76b62a652733c0006e6243758420/regex-2026.5.9-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f079e50a0d3cc3cd5091fa9ff45869a2e6b2cd35895731edafb0327901a8d86d", size = 773610, upload-time = "2026-05-09T23:12:19.127Z" }, + { url = "https://files.pythonhosted.org/packages/68/8f/70c04a236d651c81881dac42ef8538bddda6121434509d0a22d9e601503b/regex-2026.5.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4ebe8f0b5ec5a5024dc4a4c59f444c4e9afc5f2abdbb8962065b75d27fb971f9", size = 781645, upload-time = "2026-05-09T23:12:20.806Z" }, + { url = "https://files.pythonhosted.org/packages/1d/96/05c7434d88185e5d27fe54aeb74df86bd77cd79f52f0b4eae54faa8fea70/regex-2026.5.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:97cf3bc1b7d7d2306772ec07366c80d9df00ff79e79cea32898883a646d2fae2", size = 854473, upload-time = "2026-05-09T23:12:22.465Z" }, + { url = "https://files.pythonhosted.org/packages/4e/c1/6e3d8202d981f3117004bf341ee74893ba4ba8a9fbaf4b94615846550a08/regex-2026.5.9-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0f9eede6a5cbdc02d4978090186390936e1776a7d1359b21e41014c609880bcf", size = 763311, upload-time = "2026-05-09T23:12:24.351Z" }, + { url = "https://files.pythonhosted.org/packages/93/c7/e7737f1526b3fb32bd4c337fd6c71c3ebb5c8296fc34d11197e0955d2e35/regex-2026.5.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:01f0f5f55f4b64dacec85dc116d3c05fd23ad3ff037bbc73a2085775953c2611", size = 844593, upload-time = "2026-05-09T23:12:26.341Z" }, + { url = "https://files.pythonhosted.org/packages/a5/27/0daffb1a535bb39f422c3d200f4ab023c71110ad66a32b366bee708baba0/regex-2026.5.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1268eddd8486dc561d08eee1156e40aa3a8fe10f4bdec8fa653b455fcbffd12c", size = 789167, upload-time = "2026-05-09T23:12:27.975Z" }, + { url = "https://files.pythonhosted.org/packages/ce/fc/294fe4fac4f2ed67207b17471815870c1c45b3a489e08e0ac96daea16ef6/regex-2026.5.9-cp311-cp311-win32.whl", hash = "sha256:8676474c07469d6f33dd1085ca2cd45f65785f32518f2b20e36d9953ca07f994", size = 266249, upload-time = "2026-05-09T23:12:30.141Z" }, + { url = "https://files.pythonhosted.org/packages/d0/b0/8dce459f6245bcf8f6e9f23ac9569f1a0f15c131cc0745e82b43226204cf/regex-2026.5.9-cp311-cp311-win_amd64.whl", hash = "sha256:246de9d60aa3f8538b519834dd95cbf276ea263d6a7bd5a3666dc3fa0230505b", size = 278423, upload-time = "2026-05-09T23:12:31.676Z" }, + { url = "https://files.pythonhosted.org/packages/db/8d/f9aeff6ad63a3ef720386f2907e6d34a35a510a6e498ebad28b0fb3f6ab6/regex-2026.5.9-cp311-cp311-win_arm64.whl", hash = "sha256:d726ca3f0d76969bf1e8e477d160d3d666bbf999f6860bd314889e5345782046", size = 270420, upload-time = "2026-05-09T23:12:33.194Z" }, + { url = "https://files.pythonhosted.org/packages/50/9b/6550044bc44e17c84d312c031c2ec42fbdb6a4ec4e29093be3a172d08772/regex-2026.5.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57eeeb05db7979413dec5438f2db21d7ecbba787cde7a711df1a6f6df672aa06", size = 490451, upload-time = "2026-05-09T23:12:34.72Z" }, + { url = "https://files.pythonhosted.org/packages/1e/95/fc7ba4303b5a0f92446a12ee6778ef2c6c799233f5060042a31bf390cfe9/regex-2026.5.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:398c521292f4c7fb807001dcd54694d3a1fcafc179a36ad9cc56f98df85930b6", size = 292112, upload-time = "2026-05-09T23:12:36.285Z" }, + { url = "https://files.pythonhosted.org/packages/54/4b/ee27938d1b2c443e89a9a10e00d2d19aa5ee300cd3d61140644e93bb083e/regex-2026.5.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f7a7c26137296beba7784de6eba69c6a93a63ccebc385e4962fe67e267a91225", size = 289599, upload-time = "2026-05-09T23:12:38.089Z" }, + { url = "https://files.pythonhosted.org/packages/d8/dd/ba103dc19614e25f3880800ca67ce093d6e21b325d72b8383c7bf906e9fa/regex-2026.5.9-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6441cc660d76107934a09c22167200839a0e89604a6297f78a974e66e931d2c0", size = 796732, upload-time = "2026-05-09T23:12:40.062Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e7/f035b4fd858b050b0080bf302968dc0f59ba34e391872d54936758e6844e/regex-2026.5.9-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:91328f1c23d47595ca3ef0a7557fa129c5a23404b775c770697d2f35b33e0107", size = 865440, upload-time = "2026-05-09T23:12:42.059Z" }, + { url = "https://files.pythonhosted.org/packages/0a/51/8cd301ecc899aea28124357f729f4272f44de7806fc7ca02490bfbe253e8/regex-2026.5.9-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:93a7860539414dddaefba2b40f8771765ae17949d4c7182b876ce429e11a8309", size = 912329, upload-time = "2026-05-09T23:12:44.373Z" }, + { url = "https://files.pythonhosted.org/packages/cc/1e/3fbe2fa1e8cebd62f3bb7d3321cff1640aca2e240b51d9bd624aad949260/regex-2026.5.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd2810d22146b6d838acc5ec15602cb6b47920aa4e33015df3868eedfd20bab8", size = 801239, upload-time = "2026-05-09T23:12:46.268Z" }, + { url = "https://files.pythonhosted.org/packages/17/2f/6f6008682bf2cf98040a0d3153a8e557b6ab728d7713d045cee4ce544ab8/regex-2026.5.9-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daff2bdbaf1d23e52fdff7c0b7bc2048b68f978df6a4d107ac981f94caef2e66", size = 777054, upload-time = "2026-05-09T23:12:48.051Z" }, + { url = "https://files.pythonhosted.org/packages/19/2b/eee0d20a6842ba04df4b8847a920b57ef56853f14ef85405473e586b605a/regex-2026.5.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4eeb011098fcb77af513dcef521a3dbecbf8849b1e38940759d293b7a93f5026", size = 785098, upload-time = "2026-05-09T23:12:49.851Z" }, + { url = "https://files.pythonhosted.org/packages/4a/98/6fc1e6410feefb92159edaed5041992bfe390e8d26c721865434acbca558/regex-2026.5.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ea9c8ecfa1b73c73b626534d6626e5340d429630943672b8480724f44e84b962", size = 860095, upload-time = "2026-05-09T23:12:51.666Z" }, + { url = "https://files.pythonhosted.org/packages/18/a3/bd855e0f2cb1a978ecf6fa6bb69632dd9c3f6ea3b81cde62fde14c9daec7/regex-2026.5.9-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:cd2846168eb9ee3c513902bc8225409cb1caab31d04728b145171fa1625d9621", size = 765762, upload-time = "2026-05-09T23:12:53.413Z" }, + { url = "https://files.pythonhosted.org/packages/dc/66/0ae8c092e60b14c79d24f8e0b7f0aea5bfbffdcab00b5483d13404d3c3a5/regex-2026.5.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:39617fb0cde9c0e6306dc70e3bfc096f3da793219879f7ae7aa341a69fbdcf6d", size = 852100, upload-time = "2026-05-09T23:12:55.256Z" }, + { url = "https://files.pythonhosted.org/packages/21/de/8dfde60fc1b21c946a893ba273403b72617edb261370cb1087099a83f088/regex-2026.5.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fd03c4f0e33280d15cae17159b899245d6b7c53d21def19b263b39655061f5ce", size = 789479, upload-time = "2026-05-09T23:12:57.573Z" }, + { url = "https://files.pythonhosted.org/packages/c3/1c/bdcc98f9a4af4fdd166c74941174619ccff4726d3ce32faa8e9a2ecd38dd/regex-2026.5.9-cp312-cp312-win32.whl", hash = "sha256:164eba9b755ea6f244b0d881196fbc1fac09714e9782c9e2732b813142033c8e", size = 266699, upload-time = "2026-05-09T23:12:59.14Z" }, + { url = "https://files.pythonhosted.org/packages/78/87/240d36864f9e48ace85f72e79ced97ceb7f27ce87739a947dcb834b4e6bc/regex-2026.5.9-cp312-cp312-win_amd64.whl", hash = "sha256:86f40a5d6444db30a125c9c9177e6b25dad981cbc37451fd838f145e6edac92e", size = 277783, upload-time = "2026-05-09T23:13:00.789Z" }, + { url = "https://files.pythonhosted.org/packages/4f/b5/7b30f312b0669dff5beebe5b0989dc2d1a312b1a44fab852199c387a5b96/regex-2026.5.9-cp312-cp312-win_arm64.whl", hash = "sha256:96f5f58b54a063d7ea9dca08e1cf57bfe10499c4d579ee672da284f57f5f0070", size = 270513, upload-time = "2026-05-09T23:13:02.426Z" }, + { url = "https://files.pythonhosted.org/packages/aa/da/797e91ecec6f84135da778ddce78c20e0af5d2a15c26f87a81bc3eadb6db/regex-2026.5.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d626b84406444b165fc0ba981604edea39f0588ff1f92baa23fe50799ea9afdb", size = 490303, upload-time = "2026-05-09T23:13:04.382Z" }, + { url = "https://files.pythonhosted.org/packages/44/da/bf30abaaa737b58f4a4b8c4a03659e02fd92092c822e0197ed9e0daab917/regex-2026.5.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d7bdc0ab8f3dd7e1b4f9ab88634e13374669db86bb3c72e8292f07ae313f539f", size = 292019, upload-time = "2026-05-09T23:13:06.022Z" }, + { url = "https://files.pythonhosted.org/packages/2d/e7/d0eaf5713828417b9e5648cf81fa9bacd4961f6ab98c380c2034f8716e35/regex-2026.5.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a8820737949116ffff55fe18f9fc644530063ba6ebfcb8314239416e78f1347c", size = 289468, upload-time = "2026-05-09T23:13:08.214Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9b/b3fdd62b003baa1a9b593cd8c8699c9651c2e80cc21a5c715707983c42d7/regex-2026.5.9-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0fbdbac82cb3e4450d0ccde7d7a35607f4cb2dd9fba4b8b69bfaf8c9fa6aed", size = 796749, upload-time = "2026-05-09T23:13:10.573Z" }, + { url = "https://files.pythonhosted.org/packages/d4/30/66ab84588765f5b4b271a9ca09ef7ce2b87caa95176ec3d2ad65d7bc4902/regex-2026.5.9-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:57e8915c7986aa33d25e4d3629cef711cd2863f2961b10409f0c04cb8b7d9020", size = 865445, upload-time = "2026-05-09T23:13:12.523Z" }, + { url = "https://files.pythonhosted.org/packages/1a/89/f05169e8588aac365f35ffc7f3bc3184f095ef4cfded7cfaa3c7fd5dbd89/regex-2026.5.9-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508f56a89ba9cb26e4168cbc37dbd60a28d82430a9e18ad1d25fe0883c314ca2", size = 912322, upload-time = "2026-05-09T23:13:14.281Z" }, + { url = "https://files.pythonhosted.org/packages/30/e1/c93444052cf41581f3c884ab3fb5823daf0992f11cd4388d4275ca610558/regex-2026.5.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6d189041f15691cfa2b6c4290448ec221244d225b3f5fe9e7771b34ffcdf6e2", size = 801269, upload-time = "2026-05-09T23:13:16.569Z" }, + { url = "https://files.pythonhosted.org/packages/50/fe/0cf96b882f540e62e8b9956599798203d599c44cf4c77917ca27400ff69b/regex-2026.5.9-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e82db382b44d0111b22601c509c89f64434816c9e0eef9d1989cda8cc6ff1c04", size = 777085, upload-time = "2026-05-09T23:13:18.675Z" }, + { url = "https://files.pythonhosted.org/packages/23/5c/d78d4924e7fc875557b9e9b768423925fdfaac5549d06da7810019a9bd26/regex-2026.5.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2acfb48634f64996b57f90f39afa692ff362162722581921fe92239a59960f3c", size = 785153, upload-time = "2026-05-09T23:13:20.525Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e0/5214774090e7b4524dcea3e3c4aa74141d43043f8beb49c1599db1c8b53a/regex-2026.5.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d29eebfc9525db68cad3c97eedd7f754fa265aa5cd0cf4f863b2421e1b48fc9f", size = 860164, upload-time = "2026-05-09T23:13:22.263Z" }, + { url = "https://files.pythonhosted.org/packages/6e/e1/4a57a83350319b1271f0d7a249b8672513ed928b237a741631270de6caea/regex-2026.5.9-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:debb893095e944091c16e641a6e33c1b0f4cb61ab945ec5afbf53ce7068834d8", size = 765731, upload-time = "2026-05-09T23:13:24.277Z" }, + { url = "https://files.pythonhosted.org/packages/12/f4/499e74a20c156fc75836ee04a72a38d1a063978f600937f9760467beb1b0/regex-2026.5.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d659eee77986549c9ea45b861c7567e44d6287c3dc9a4565478853f7b9fe2ff6", size = 852062, upload-time = "2026-05-09T23:13:26.125Z" }, + { url = "https://files.pythonhosted.org/packages/5b/92/7eebc0d0a01e78629695f342ba17e0deaff8fb45e79cc0d7b98287da6e3e/regex-2026.5.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2efa205e6d98b24d1f3ab395c11aa15cdf10935bca283d0285e0499c284fba21", size = 789577, upload-time = "2026-05-09T23:13:27.814Z" }, + { url = "https://files.pythonhosted.org/packages/05/a4/018e71f7d2ad48c1ebe6d3ae0026f9b7cb4802fd15c7cc02fdf724355102/regex-2026.5.9-cp313-cp313-win32.whl", hash = "sha256:f3844f134e834076677dd369976e9f5068679fcb8e50102fdf6b7ac96a3ec127", size = 266691, upload-time = "2026-05-09T23:13:29.549Z" }, + { url = "https://files.pythonhosted.org/packages/e6/1d/861a93719fb9ee7dbfc3761b3797b7a3e112a5d42c6129459d2d741be9b5/regex-2026.5.9-cp313-cp313-win_amd64.whl", hash = "sha256:3527bb4942d2c14552155406cdedd906567456821848aed1cb4933a391bf5eca", size = 277747, upload-time = "2026-05-09T23:13:31.859Z" }, + { url = "https://files.pythonhosted.org/packages/d9/c6/0a2436ae4da1ba76e51cb98943c6838a9a721faa40ebe2dce07694ae34e3/regex-2026.5.9-cp313-cp313-win_arm64.whl", hash = "sha256:56a33f191f17d8c417f99945ebdc1e691d3af9605d86ec68c7e54a57e3e17af6", size = 270500, upload-time = "2026-05-09T23:13:33.525Z" }, + { url = "https://files.pythonhosted.org/packages/e8/e9/d21346f7b60ed58789371358ed66b09d00f832e1bd7c06e55d9da5679882/regex-2026.5.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:01f28d868834624c934b8d2e0aa1c8341337e37831f4a012f18a5afcba4cbaf3", size = 494172, upload-time = "2026-05-09T23:13:35.935Z" }, + { url = "https://files.pythonhosted.org/packages/c4/43/fd1177a2032037c681baecdb3422ee4e1424aec4e4f470ef47793d325274/regex-2026.5.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:48036f6374aaa79eb3b754ec29c61d1c6b1606749d705a13f8854fa2539671f6", size = 293952, upload-time = "2026-05-09T23:13:38.307Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7d/9fbf919768368d3f8a4f6c692cf2aa61e482b2b81ec6a298ace4cbf02480/regex-2026.5.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b96350aa424e79d4fd6b567b344dcbe2b2d6bfc48dfe7717587e1fa6d43da6ff", size = 292314, upload-time = "2026-05-09T23:13:40.353Z" }, + { url = "https://files.pythonhosted.org/packages/e2/6c/e41bfeecb589716843e7c4df09ba46ff2a42961457afece19059d85caeef/regex-2026.5.9-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f3af7a4903c5c04a11a196a5aa75cdd7dd3f8508132f9fb3259d9f5908e3b88", size = 811681, upload-time = "2026-05-09T23:13:42.543Z" }, + { url = "https://files.pythonhosted.org/packages/87/83/a5c1c525fba0aa656e88ad0face0b1829788ef4c2fb6b26df58aa1151b84/regex-2026.5.9-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7e87577720152d2caae19fe2baaf1f8d5ca12091e9e229f03915c37d1e4b9178", size = 871135, upload-time = "2026-05-09T23:13:44.326Z" }, + { url = "https://files.pythonhosted.org/packages/18/d4/80882e799e440dd878b0979cbebf8fa4d54624a332c83037c7a701649e3f/regex-2026.5.9-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c8b9b9d294cfea3cd19c718ade7cc93492b2c4991abd9a68d0b3477ae6d8e100", size = 917265, upload-time = "2026-05-09T23:13:47.295Z" }, + { url = "https://files.pythonhosted.org/packages/ae/ff/8db60211e2286e396aad7dc7725356c502bff0901ea05bd6cdc2e1a042b9/regex-2026.5.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:728d8bfd28a8845c8b6bc5dc7ce010453d206396786c0765c2740cb65f37791e", size = 816311, upload-time = "2026-05-09T23:13:49.885Z" }, + { url = "https://files.pythonhosted.org/packages/4c/47/742ef579c61730f8d268e5cf1f9ce0e37e2ea041ad0f5644724f2378e463/regex-2026.5.9-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7e30b874d341fac767d7df5a0870540541c2c054b80cfaac116e8d367a8a7ff2", size = 785498, upload-time = "2026-05-09T23:13:52.25Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ab/cb0999802dcb0fb95b1ab005e8d4163d8afdd67efc2cb6b6630ac13f8cb1/regex-2026.5.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fd190e88a895a8901325fad284a3f74ea52b1da8525b76cc811fa9b1edf0ce2b", size = 801348, upload-time = "2026-05-09T23:13:54.127Z" }, + { url = "https://files.pythonhosted.org/packages/7d/62/8ca59a24c55bc34d166eefaf3717bd77772f329fdbf984d86581e0a3571c/regex-2026.5.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:8e76e8161ad00694cfce6767d5dea860c6391ac5b83e5c3a39661e696f11fc7e", size = 866493, upload-time = "2026-05-09T23:13:56.067Z" }, + { url = "https://files.pythonhosted.org/packages/8d/3d/30f2ae62cef3278bb5bb821f467277a55fb73f01032cf85997e15e8289a8/regex-2026.5.9-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ddda5340e6c01a293027dd46232fa79eaff1b48058ce7a98f572b6445b088041", size = 772811, upload-time = "2026-05-09T23:13:57.867Z" }, + { url = "https://files.pythonhosted.org/packages/d8/ae/7d2089bcd78ad0c0161bc684339df50032acb438a7bd3305e7ddb1193cec/regex-2026.5.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:205109e96b3cf5adf8f4cd62bedde9487feb282b9497a3535451e5a24cd706a0", size = 856584, upload-time = "2026-05-09T23:13:59.679Z" }, + { url = "https://files.pythonhosted.org/packages/a9/29/92ff47f75990131ea4f24ba17819e5a9d141e10819807e09addd73409af6/regex-2026.5.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dfbe4579b9f08036aa7d101d1835437a20783574ac66327e6b29b4018a138081", size = 803453, upload-time = "2026-05-09T23:14:01.978Z" }, + { url = "https://files.pythonhosted.org/packages/04/99/eff29f1037dcab36702c9ee5d6858cf1ce2336ea8ea2987f64245b99ea5e/regex-2026.5.9-cp313-cp313t-win32.whl", hash = "sha256:ed2c9e8068b614c574d8d30e543d617cf5379b0535d46f97ef00e904745a08b5", size = 269951, upload-time = "2026-05-09T23:14:03.661Z" }, + { url = "https://files.pythonhosted.org/packages/0e/9d/8870b8981d27b22cda77bb26a5ac7ebfa9c7d9e0dea195a834a82380e748/regex-2026.5.9-cp313-cp313t-win_amd64.whl", hash = "sha256:b46b0f094dc1d3b90356c85a0bd2c9bafc4a6a190b9d6f8ddd5a033b6e088ed4", size = 281240, upload-time = "2026-05-09T23:14:05.56Z" }, + { url = "https://files.pythonhosted.org/packages/72/b1/3379415e8f135c13ac551353397cc4fe97b4978f3cac73c5fcbcded548b8/regex-2026.5.9-cp313-cp313t-win_arm64.whl", hash = "sha256:872acc074bd29ffc9913ecdfedf6ea77502312ca44a4aa0d3779089c6069d8de", size = 272383, upload-time = "2026-05-09T23:14:07.843Z" }, + { url = "https://files.pythonhosted.org/packages/13/3e/9c3cd292d8808b3645a2ce517e200179b6d0e903f176300bd8b542e14de5/regex-2026.5.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:1bd7587a2948b4085195d5a3374eaf4a425dc3e55784c038175355ecf3bbbf8a", size = 490376, upload-time = "2026-05-09T23:14:09.64Z" }, + { url = "https://files.pythonhosted.org/packages/60/70/d43ee8a2ca0a8b68d167f21658b85520ac0574617c7f320367c5047f7556/regex-2026.5.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:dea2e88e1cce4522496cce630e11e67b98b7076620bc4336c3f674bc21a375f4", size = 291964, upload-time = "2026-05-09T23:14:11.424Z" }, + { url = "https://files.pythonhosted.org/packages/21/91/9d50b433828d8e74196904e168a43abf1e6e88b2a15d47ed742456720c37/regex-2026.5.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2099f7e7ff7b6aa3192312650a56e91cc091e49d50b04e4f6f8b6e28b3b27f1c", size = 289682, upload-time = "2026-05-09T23:14:13.123Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/b835e3cafbb9d977736912436259ff551d60919f7d7b3d37d46659c63564/regex-2026.5.9-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecd353045824e4477562a2ac718c25799cdaaa41f7aa925a806a8a3e6848a5b9", size = 796996, upload-time = "2026-05-09T23:14:14.923Z" }, + { url = "https://files.pythonhosted.org/packages/2c/a6/9f992d00019166b9de01c546dd4549bc679f2a68df11b877740b0760b7c2/regex-2026.5.9-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:65c8c8c37377794bd5b2f3ebe51919042bf17aec802e23c833d89782ed0c78af", size = 866089, upload-time = "2026-05-09T23:14:17.757Z" }, + { url = "https://files.pythonhosted.org/packages/e0/08/4d32af657e049b19cb62b02e46e38fe1518797bfb2203ee93a510b21b0dc/regex-2026.5.9-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5b73ab8afcf66c622db143d1c6fda4e58e4d537ee4f125229ad47b1ab80f34c0", size = 911530, upload-time = "2026-05-09T23:14:20.353Z" }, + { url = "https://files.pythonhosted.org/packages/d9/27/2af43dd1dc201d1fecefda64a45f4ad0995855b92724f795a777b402ee69/regex-2026.5.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0de5cf193997384ed2ca6f1cd4f78055b255d93d82d5a8cd6ba0d11c10b167e4", size = 800643, upload-time = "2026-05-09T23:14:22.265Z" }, + { url = "https://files.pythonhosted.org/packages/a4/dd/23a249047013b5321d4a60c4d2437462086f601b061776a525e5fba2a59f/regex-2026.5.9-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d641a8c9a61618047796d572a39a79b26167b0411d2c3031937b2fe2d081e2cf", size = 777223, upload-time = "2026-05-09T23:14:24.179Z" }, + { url = "https://files.pythonhosted.org/packages/94/6a/e85ed9538cd19586d0465076a4578a12e093ce776d15f3f8ce92733a8dd6/regex-2026.5.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:24b2355ef5cc9aa5b8f07d17704face1c166fdcc2290fa7bd6e6c925655a8346", size = 785760, upload-time = "2026-05-09T23:14:26.065Z" }, + { url = "https://files.pythonhosted.org/packages/2a/c4/f25473209438638e947c55f9156fd8f236f74169229028cc99116380868e/regex-2026.5.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a24852d3c29ad9e47593593d8a247c44ccc3d0548ef12c822d6ed0810affe676", size = 860891, upload-time = "2026-05-09T23:14:28.17Z" }, + { url = "https://files.pythonhosted.org/packages/f9/f7/f4f86e3c74419c37370e91f150ae0c2ef7d34b2e0e4cdd5da046a02e4022/regex-2026.5.9-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:916714069da19329ef7de197dcbc77bb3104145c7c2c864dbfbe318f46b88b14", size = 765891, upload-time = "2026-05-09T23:14:30.06Z" }, + { url = "https://files.pythonhosted.org/packages/26/70/704d8e13765939146b1cd0ef4e2feb71d7929727d2290f026eed10095955/regex-2026.5.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:fa411799ca8da32a8d38d020a88faa5b6f91657d284761352940ecf9f7c3bbdd", size = 851380, upload-time = "2026-05-09T23:14:32.123Z" }, + { url = "https://files.pythonhosted.org/packages/26/29/1a13582a8460038edc38e49f64ceb0dd7c60f5caba77571f4bf6601965d9/regex-2026.5.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1e6da47d679b7010ef27556b6e0f99771b744936db1792a10ceac6547ae1503e", size = 789350, upload-time = "2026-05-09T23:14:34.799Z" }, + { url = "https://files.pythonhosted.org/packages/73/56/3dcafe34fc72e271d62ad9a291801e88a1457bb251c132f15fcc2e5aad1a/regex-2026.5.9-cp314-cp314-win32.whl", hash = "sha256:98bd73080e8756255137e1bd3f3f00295bbc5aa383c0e0f973920e9134d7c4ad", size = 272130, upload-time = "2026-05-09T23:14:36.729Z" }, + { url = "https://files.pythonhosted.org/packages/d0/9c/02eebf0be95efe416c664db7fb8b6b05b7a0b06a7544f2884f2558b0526f/regex-2026.5.9-cp314-cp314-win_amd64.whl", hash = "sha256:ff8d372ac2acdc048d1c19916f27ee61bc5722728458ba6ca5052f2c72d51763", size = 280999, upload-time = "2026-05-09T23:14:39.126Z" }, + { url = "https://files.pythonhosted.org/packages/70/5a/1dd1abee76cb7a846a0bcf42fdc87e5720c3c33c24f3e37814310a513d9f/regex-2026.5.9-cp314-cp314-win_arm64.whl", hash = "sha256:e1d93bf647916292e8edcec150c07ddf3dc50179ccaf770c04a7f9e452155372", size = 273500, upload-time = "2026-05-09T23:14:41.059Z" }, + { url = "https://files.pythonhosted.org/packages/86/c1/c5f619b0057a7965cb78ec559c1d7a45ce8c99a35bea95483d64959a93d9/regex-2026.5.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:83d0ee4a57d1c87cb549e195ec300b8f0ec3a82eba66d835e4e2ed8634fe4499", size = 494269, upload-time = "2026-05-09T23:14:42.869Z" }, + { url = "https://files.pythonhosted.org/packages/05/2c/5d01f1aee33de4bbe60c8452945bfc8477ca7c5ae4450f6bfe711036cb36/regex-2026.5.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d3d7eb5c9a7f6df82ed3cfac9beb93882a5cbcb5b8b157b56cb2b3b276574ac1", size = 293954, upload-time = "2026-05-09T23:14:44.822Z" }, + { url = "https://files.pythonhosted.org/packages/7a/fe/e8988b2ae2108c6ef71bd4aa8d87fbe257976dd0810e826cd75f701c68b6/regex-2026.5.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:075160bf16658e16d35233300b8453aac25de4cbea808d22348b6979668e924d", size = 292405, upload-time = "2026-05-09T23:14:47.211Z" }, + { url = "https://files.pythonhosted.org/packages/79/34/d2b0937faa7859263f7f0a3c6b103a1296306be6952dc173d0154e9a2f49/regex-2026.5.9-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45375819235558a4ff1c4971dc32881f022613abdb180128f5cb4768c1765a1c", size = 811855, upload-time = "2026-05-09T23:14:49.21Z" }, + { url = "https://files.pythonhosted.org/packages/80/fe/daf53a47457a8486db66c66c01ceb9c2303eecee3f87197f1e77eb1a736d/regex-2026.5.9-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ead4b163ac30a29574510cd4b3e2e985ac5290c05fc7095557d6a5f403fc31b5", size = 871189, upload-time = "2026-05-09T23:14:51.555Z" }, + { url = "https://files.pythonhosted.org/packages/1c/75/058fc4470cbfbf57d800aff1a0022b929a3f9fa553ee10a0cdf2070eb31f/regex-2026.5.9-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8c6e4218fbdfbcd4f6c19efca40930d24a621bf4b48cb76bc6640543bd28ef20", size = 917485, upload-time = "2026-05-09T23:14:53.633Z" }, + { url = "https://files.pythonhosted.org/packages/88/e7/179cfda3a28bc843b5c6cfe7f79f23489c791ed95f151083803660878432/regex-2026.5.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6351571c8a42b505eb555c0dc47d740d0fb66977dc142919eea6f4325b7c56a0", size = 816369, upload-time = "2026-05-09T23:14:56.198Z" }, + { url = "https://files.pythonhosted.org/packages/41/90/6f0cc422071688266d344fca8462d787cba0a2c144acb25721f9a61ec265/regex-2026.5.9-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:002205cafd2a9e78c6290c7d1df277bf3277b3b7a30e0b4bb0dac2e2e3f7cb2d", size = 785869, upload-time = "2026-05-09T23:14:58.602Z" }, + { url = "https://files.pythonhosted.org/packages/02/67/a31f1760f09c27b251ef39e9beb541f462cf977381d067faa764c2c0e393/regex-2026.5.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8abd33fef90b2a9efac5557d6033ca82d1195ed3a15fea5af15ba7b463c6a63b", size = 801427, upload-time = "2026-05-09T23:15:00.642Z" }, + { url = "https://files.pythonhosted.org/packages/e3/c4/1a80654597b6bc1e1ea0494824c31200e8a956abe290afae9b19a166a148/regex-2026.5.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:31037c82eccb44b7ea2e9e221d7c01429430e989a1f4b91ea5a855f6017b509a", size = 866482, upload-time = "2026-05-09T23:15:03.384Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/960724e06482c08466ff5611e242e86f80062949cdf6b4b9cc317b9dd93d/regex-2026.5.9-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:5604dfd046dc37eca90250fc3be938b076c8059fa772ac0ed6f499b0f0fb0415", size = 773022, upload-time = "2026-05-09T23:15:05.625Z" }, + { url = "https://files.pythonhosted.org/packages/50/a8/a9979c3e7918280e93159ebcab5ef1a65116dd4f3bd6091be0eae4a126e8/regex-2026.5.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0e1b1b4e496afbb24f4a62aba855ee4f88f25578927697b340702e48c9ee6bc2", size = 856642, upload-time = "2026-05-09T23:15:07.966Z" }, + { url = "https://files.pythonhosted.org/packages/fe/d4/a9b732f2f0072c0ab12227483abb24fffcb9f73f8a2b203df0a6d0434735/regex-2026.5.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:be3372b9df6ddecff6486d37e19095a7b4973137caf5512407a89f4455361f41", size = 803552, upload-time = "2026-05-09T23:15:10.215Z" }, + { url = "https://files.pythonhosted.org/packages/d5/fe/1b3113817447a1d4155e4ac76d2e072f42c0bcba2f43fa8a0e756ea2cd91/regex-2026.5.9-cp314-cp314t-win32.whl", hash = "sha256:3ddd90103f9e5c471c49c7852ecc1fe27c7e45eb99e977aefe7caa4e779f4f58", size = 275746, upload-time = "2026-05-09T23:15:12.609Z" }, + { url = "https://files.pythonhosted.org/packages/92/73/93d42045302636c91f2e5ef588b65b84b01428f28ec77de256b1dfdfbe5c/regex-2026.5.9-cp314-cp314t-win_amd64.whl", hash = "sha256:ca518ed29c46eecba6010b15f1b9a479314d2de409536e71b6a13aa04e3b8a77", size = 285685, upload-time = "2026-05-09T23:15:15.086Z" }, + { url = "https://files.pythonhosted.org/packages/da/80/35b4c33c804a165a7f55289afda3ea9e3eb6d15800341a2d66455c0f1f30/regex-2026.5.9-cp314-cp314t-win_arm64.whl", hash = "sha256:5e41809d2683fcde7d5a8c87a6567ba1fb1ce0de9f31bff578de00a4b2d76daa", size = 275713, upload-time = "2026-05-09T23:15:16.98Z" }, +] + [[package]] name = "requests" version = "2.34.2" @@ -1773,6 +1898,67 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/e1/b2df4bc09a1e51ff664c1e17018a4274b42e5e9352e4a478ea540512dc88/starlette-1.0.1-py3-none-any.whl", hash = "sha256:7c0e69b2ee1c848bd54669d908500117a3ee13de603a21427e5c6fc1adf98dcd", size = 72802, upload-time = "2026-05-21T21:58:56.551Z" }, ] +[[package]] +name = "tiktoken" +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e4/e5/5f3cb2159769d0f4324c0e9e87f9de3c4b1cd45848a96b2eb3566ad5ca77/tiktoken-0.13.0.tar.gz", hash = "sha256:c9435714c3a84c2319499de9a300c0e604449dd0799ff246458b3bb6a7f433c1", size = 38986, upload-time = "2026-05-15T04:51:27.153Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/e3/03c90dadcf5b3f82b83cee9adee60ef666b329c654f58c066af44eae0287/tiktoken-0.13.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:47b1df8d73390a24f94980c75158cdd5c56d256f16d55f30cb49c230caba9ba4", size = 1036627, upload-time = "2026-05-15T04:50:11.229Z" }, + { url = "https://files.pythonhosted.org/packages/5e/30/760463e5b2e8ad2bc229ae0a17ecb06727b6cbc094f08d8f65844315632e/tiktoken-0.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7d40c6c5aab171dcd6eb8455bc567bde404bb9def60cdb8c1299cc782b242bb9", size = 984699, upload-time = "2026-05-15T04:50:12.874Z" }, + { url = "https://files.pythonhosted.org/packages/de/8a/8895f342a6b6aabd1a358e672f6f077b3ae51d0c63ca605d142db3bcd8ab/tiktoken-0.13.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:9b842981fa91accdffd48ff6408a977b7a91c3fbda55d353c3c68114d5c9d69e", size = 1118690, upload-time = "2026-05-15T04:50:14.234Z" }, + { url = "https://files.pythonhosted.org/packages/51/e0/92557768fb0801f0d9dd9243cb9b6d342900b05e4b1006d4771f49ce233e/tiktoken-0.13.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ed5a30027cb4d8c7ca8b273d4766f3db3cf58fad9e9f3b1a68a351ffb54873d5", size = 1138423, upload-time = "2026-05-15T04:50:15.668Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b9/a3d99feeedb032ffd09cd6652077f86bdee9a70dd0b990b2b272b445d4c3/tiktoken-0.13.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7ab10f4a21c2999846940113f6dbd72e0fa06a24119feddd74cc47e85818e06d", size = 1185077, upload-time = "2026-05-15T04:50:17.19Z" }, + { url = "https://files.pythonhosted.org/packages/cc/93/bab868277d475dc6d2aaacd34cdd239c282f4908dcc8702e0a3311a8e032/tiktoken-0.13.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a2937ad042d49d50eac6e1ba07c5661d4bd3942a5b1e0c0d08475c4df83676e1", size = 1241702, upload-time = "2026-05-15T04:50:18.772Z" }, + { url = "https://files.pythonhosted.org/packages/c3/16/27e9f7e0ed76e501cfefc9fb2112df4c7bf70ca96945b15ecb7615aac860/tiktoken-0.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:44733b99bfd72b590cd0936b1c01b3b4dd73122db2d544bc1ceeb18a7678c910", size = 876565, upload-time = "2026-05-15T04:50:20.268Z" }, + { url = "https://files.pythonhosted.org/packages/1a/4c/1bc81f4cd53e827c4ee67ca951b5935724716049452d8dfa09b8b82372bb/tiktoken-0.13.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:7bfe1849caa65d1e1d9871817170ec497bbb7984e182012e1bdce72f66608cdb", size = 1036353, upload-time = "2026-05-15T04:50:21.757Z" }, + { url = "https://files.pythonhosted.org/packages/75/91/10b9c7076bc02c246c853201fdbbe300a4b8c5ed7b84c25f7403f4e32655/tiktoken-0.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:91c180fe255bd5a86d8316210d2833a1d4d33d026cd86a67812f4773743c8d26", size = 984644, upload-time = "2026-05-15T04:50:23.256Z" }, + { url = "https://files.pythonhosted.org/packages/4e/e4/fceae98015fab47fcd49b8bd7f46145bcd187a47e0add1e5378ed67ef980/tiktoken-0.13.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:059c8ecf554eb5b41e6e054ba467b871b03277d267dee7244380aca4359747d4", size = 1119261, upload-time = "2026-05-15T04:50:24.348Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/fe42ad00de01a8c4a49ad8649a2c8a316835a9cad5961b11d21eac0020a5/tiktoken-0.13.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:36217497eaffc158607a3b26f065300db2aefd43b115263f3b9688ce38146173", size = 1138253, upload-time = "2026-05-15T04:50:25.505Z" }, + { url = "https://files.pythonhosted.org/packages/03/c4/ccee1ecccca107e9a16efcecdeeb964c325305038554d466ece65b42338f/tiktoken-0.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:303f7d91b4fce3baddbcde05c139091d4caa5026ac7214c1dc7ff7a71ee429ff", size = 1185747, upload-time = "2026-05-15T04:50:27.02Z" }, + { url = "https://files.pythonhosted.org/packages/9d/03/cd0cba295522b91eb55c6b2704f1df895f8226cfe60ab10d4d51d0cc9e69/tiktoken-0.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5d48843bee149630eb735a99e1f4a85b47308d21868ea63163f6e87768d3cfed", size = 1241265, upload-time = "2026-05-15T04:50:28.815Z" }, + { url = "https://files.pythonhosted.org/packages/7e/25/a10efd564402d82c2ff50d12057353ace447aa8007deceaa48641f63d35c/tiktoken-0.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:fc1c44cd37b43fc46bae593129164f4f281e82ea116b57a85aa81bda57eafc94", size = 876509, upload-time = "2026-05-15T04:50:30.026Z" }, + { url = "https://files.pythonhosted.org/packages/85/8e/144bde4e01df66b34bb865557c7cd754ed08b036217ebd79c9db5e9048a9/tiktoken-0.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:32ac870a806cfb260a02d0cb70426aef02e038297f8ad50df5040bb5af360791", size = 1034888, upload-time = "2026-05-15T04:50:31.579Z" }, + { url = "https://files.pythonhosted.org/packages/36/18/d4ac9d20956cdebca04841316660ed584c2fecdc2b81722a28bc7ad3b1e4/tiktoken-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d9980f11429ed2d737c463bb1fb78cf330caa026adf002f714aced7849a687b", size = 982970, upload-time = "2026-05-15T04:50:32.961Z" }, + { url = "https://files.pythonhosted.org/packages/74/ed/6bb8d05b9f731f749fee5c6f5ca63e981143c826a5985877330507bd13b7/tiktoken-0.13.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3f277ebea5edd7b8bf03c6f9431e1d67d517530115572b2dc1d465326e8f88c7", size = 1115741, upload-time = "2026-05-15T04:50:34.475Z" }, + { url = "https://files.pythonhosted.org/packages/34/de/2ca96b07a82d972b74fe4b46de055b79c904e45c7eab699354a0bfa697dc/tiktoken-0.13.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a116178fa7e1b4065bff05214360373a65cac22f965be7b3f73d00a0dbfe7649", size = 1136523, upload-time = "2026-05-15T04:50:35.782Z" }, + { url = "https://files.pythonhosted.org/packages/ee/dc/9dafec002c2d4424378563cf4cf5c7fb93631d2a55013c8b87554ee4012c/tiktoken-0.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2c397ddda233208345b01bd30f2fca79ff730e55731d0108a603f9bc57f6af3b", size = 1181954, upload-time = "2026-05-15T04:50:36.99Z" }, + { url = "https://files.pythonhosted.org/packages/a1/d0/1f8578c45b2f24759b46f0b50d31878c63c73e6bf0f2227e10ec5c5408dc/tiktoken-0.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:95097e4f89b06403976e498abf61a0ee73a7497e73fb599cb211d8197a054d91", size = 1240069, upload-time = "2026-05-15T04:50:38.221Z" }, + { url = "https://files.pythonhosted.org/packages/aa/90/28d7f154888610aa9237e541986beb62b479df29d193a5a0617dbb1514d0/tiktoken-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:8f2d16e7a7c783ad81f36e457d046d1f1c8af70b22aec8a13238efe531977c41", size = 874748, upload-time = "2026-05-15T04:50:39.587Z" }, + { url = "https://files.pythonhosted.org/packages/9c/83/b096c859c2a47c11731bf2f5885f4028b809dfe2396582883eed9cae372f/tiktoken-0.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5df5d1507bd245f1ccad4a074698240021239e455eb0bb4ced4e3d7181872154", size = 1034228, upload-time = "2026-05-15T04:50:40.988Z" }, + { url = "https://files.pythonhosted.org/packages/53/61/c68e123b6d753e3fc2751e9b18e732c9d8bf1e1926762e736eee935d931c/tiktoken-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8fe806a50664e83a6ffd56cbd1e4f5dcc6cd32a3e7538f70dc38b1a271384545", size = 982978, upload-time = "2026-05-15T04:50:42.195Z" }, + { url = "https://files.pythonhosted.org/packages/ef/8b/96cc178cc584e65d363134500f297790b06cd48cdeb1e8fcf7bbe60f4715/tiktoken-0.13.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:125bc05005e747f993a83dc67934249932d6e4209854452cd4c0b1d53fba3ba2", size = 1116355, upload-time = "2026-05-15T04:50:43.564Z" }, + { url = "https://files.pythonhosted.org/packages/86/f5/bab735d2c72ea55404b295d02d092644eb5f7cc6205e34d35eb9abfb9ab2/tiktoken-0.13.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5e6358911cab4adee6712da27d65573496a4f68cf8a2b5fca6a4ad10fc5748cf", size = 1135772, upload-time = "2026-05-15T04:50:44.782Z" }, + { url = "https://files.pythonhosted.org/packages/4e/b9/6de04ebdf904edfaad87788011b3735087a0c9ea671b9027e1e4e965e8c8/tiktoken-0.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:975cbd78d085d75d26b59660e262736dcaed1e35f8f142cd6291025c01d25486", size = 1182415, upload-time = "2026-05-15T04:50:46.422Z" }, + { url = "https://files.pythonhosted.org/packages/0d/9c/470a05f3b1caf038f44880e334d47ab674e0c80d514c66b375d14d5afa10/tiktoken-0.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:75ab9bc99fa020a4c283424590ecd7f3afd70c1c281cb3fa3192a6c3af9f9615", size = 1239879, upload-time = "2026-05-15T04:50:48.052Z" }, + { url = "https://files.pythonhosted.org/packages/42/a6/c1936d16055436cb32e6c6128d68629622e00f4768562f55653752d34768/tiktoken-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:6b1615f0ff71953d19729ceb18865429c185b0a23c5353f1bbca34a394bf60f7", size = 874829, upload-time = "2026-05-15T04:50:49.202Z" }, + { url = "https://files.pythonhosted.org/packages/d6/07/acb5992c3772b5a36284f742cfb7a5895aa4471d1848ac31464ad50d7fdf/tiktoken-0.13.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6eb4a5bfbc6426938026b1a334e898ac53541360d62d8c689870160cc80abd67", size = 1033600, upload-time = "2026-05-15T04:50:50.4Z" }, + { url = "https://files.pythonhosted.org/packages/14/e9/742e9aec30f59b9f161f7ff7cd072e02ea836c9e1c0854a8076dfcd40d5c/tiktoken-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:43cee3e5400573b2046fbf092cc7a5bc30164f9e4c95ce20714da929df48737a", size = 982516, upload-time = "2026-05-15T04:50:52.03Z" }, + { url = "https://files.pythonhosted.org/packages/72/74/ca1541b053e7648254d2e4b42a253e1bb4359f2c91a0a8d49228c794e1a0/tiktoken-0.13.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7de52e3f566d19b3b11bd37eea552c6c305ad74081f736882bd44d148ed4c48d", size = 1115518, upload-time = "2026-05-15T04:50:53.543Z" }, + { url = "https://files.pythonhosted.org/packages/46/e3/93825eaf5a4a504795b787e5d5dea07fbeb3dabf97aa7b450be8bde59c89/tiktoken-0.13.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:51384448aa508e4df84c0f7c1dc3211c7f7b8096325660ee5fc82f3e11b381ce", size = 1136867, upload-time = "2026-05-15T04:50:55.191Z" }, + { url = "https://files.pythonhosted.org/packages/8c/46/002b68de6827091d5ae90b048f326e8aad8d953520950e5ce1508879414f/tiktoken-0.13.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e28157350f7ebf35008dd8e9e0fdb621f976e4230c881099c85e8cf07eaa50e2", size = 1181826, upload-time = "2026-05-15T04:50:56.296Z" }, + { url = "https://files.pythonhosted.org/packages/db/c6/d393e3185a276505182f7abd93fe714f3c444a2be9180798fa052347504e/tiktoken-0.13.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:165cf1820ea4a354985c2490a5205d4cc74661c934aca79dd0368232fff94e0f", size = 1239489, upload-time = "2026-05-15T04:50:57.918Z" }, + { url = "https://files.pythonhosted.org/packages/b7/4d/bc07d1f1635d4897a202acc0ae11c2886eaa7325c359ba4741b47bf8e225/tiktoken-0.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6c43a675ca14f6f2749ba7f12075d37456015a24b859f2517b9beb4ef30807ec", size = 873820, upload-time = "2026-05-15T04:50:59.528Z" }, + { url = "https://files.pythonhosted.org/packages/8c/93/0dd6adca026a616c3a92974566b43381eea4b475ce1f36c062b8271a9ac5/tiktoken-0.13.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaaaef47c2406277181d2086484c317bf7fc433e2d5d03ff94f56b0dcec87471", size = 1034977, upload-time = "2026-05-15T04:51:00.957Z" }, + { url = "https://files.pythonhosted.org/packages/d9/77/5ec6e6bc5b30bed6d93f7f2162d8f6b32437b3ba27cb527cfe004f6109c9/tiktoken-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ca8b310bd93b3772cb1b7922d915446864860f562bdfe4825c63a0aed3fb28cd", size = 983635, upload-time = "2026-05-15T04:51:02.629Z" }, + { url = "https://files.pythonhosted.org/packages/94/b0/c8ae9aff00d625c50659b4513e707a0462c4bf5d4d6cc1b802103225c02e/tiktoken-0.13.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:32e0c12305105002c047b3bb1070b0dd9a73b0cb3b2856a8972b810e7a4f5881", size = 1116036, upload-time = "2026-05-15T04:51:04.082Z" }, + { url = "https://files.pythonhosted.org/packages/1b/ac/6a5dddd1d0a6018ecb389bd0353e6b4a515eb4d2286611bd0ace1937b9e1/tiktoken-0.13.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:5ba5fd62507a932d1241346179e3b39bc7bf7408f03c272652d93b3bedf5db24", size = 1135544, upload-time = "2026-05-15T04:51:05.229Z" }, + { url = "https://files.pythonhosted.org/packages/f4/b8/585032b4384b2f7dcdaddcb52865c83a701a420d09e3c2b4a2be1c450c57/tiktoken-0.13.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d108bc2d470fc53c8ecd24f2c0fd2b5f98c33e87cdb6aa2e9b8c5dced703d273", size = 1182217, upload-time = "2026-05-15T04:51:06.517Z" }, + { url = "https://files.pythonhosted.org/packages/cd/b6/993ff1ded3958215fd341a847b8e5ffeb5de473f435296870d314fc91ac4/tiktoken-0.13.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cb99cb5127449f58d0a2d5f5ccfb390d8dbdfd919c221246caaee29d8725ed51", size = 1239404, upload-time = "2026-05-15T04:51:07.843Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3d/fef7e06e3b33e7538db0ced734cf9fe23b6832d2ac4990c119c377aec55e/tiktoken-0.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:115c4f26ffa11caac8b54eea35c2ad38c612c20a48d35dd15d70a02ac6f51f58", size = 918686, upload-time = "2026-05-15T04:51:08.925Z" }, + { url = "https://files.pythonhosted.org/packages/c1/82/a7fc44582bc32ab00de988a2299bf77c077f59068b233109e34b7d6ca7e6/tiktoken-0.13.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:472527e9132952f2fbf77cd290658bacf003d4d5a3fabc18e5fbd407cbae4d9b", size = 1034454, upload-time = "2026-05-15T04:51:10.035Z" }, + { url = "https://files.pythonhosted.org/packages/37/d0/24d8a890c14f432a05cea669c17bebeaa99f96a7c79523b590f564246411/tiktoken-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e2f67d27c9626cdd25fe33d9313c5cdb3d8d82da646b68d6eb8e7e9c20e6448", size = 982976, upload-time = "2026-05-15T04:51:11.23Z" }, + { url = "https://files.pythonhosted.org/packages/49/b7/2ab43f62788a9266187a9bfc1d3af99ad83e5eaa25fbef168a69cd5ad14f/tiktoken-0.13.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:2b920b35805cd64585a37c3dc7ce65fba4d2d36016be01e1d7942482ca29093a", size = 1115526, upload-time = "2026-05-15T04:51:12.608Z" }, + { url = "https://files.pythonhosted.org/packages/64/39/1494321ed323ce7a14d88e3cd6cb9058625977df1c6961ddc492bd10a9f3/tiktoken-0.13.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:493af3aa28a4aaf2e3d2600a2ee717252c9bf5ab38fff94eb5a02db5ab77e5ad", size = 1136466, upload-time = "2026-05-15T04:51:13.926Z" }, + { url = "https://files.pythonhosted.org/packages/96/d9/dfd086aa2d918c563a140720e0ce296cada1634efd2783d5cf51e05f984e/tiktoken-0.13.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6644c9c2b5cf3916f5a3641d7d12fdb3f006a7b3d9ff6acdaec44e29ab1ff91e", size = 1181863, upload-time = "2026-05-15T04:51:15.025Z" }, + { url = "https://files.pythonhosted.org/packages/2f/68/a18b4f307086954fdae32714cb4f85562e34f9d34ab206e61f1816aa6018/tiktoken-0.13.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5cb65b60b9408563676d874a3a4ee573370066f0dc4e29d84e82e989c6517424", size = 1239218, upload-time = "2026-05-15T04:51:16.103Z" }, + { url = "https://files.pythonhosted.org/packages/16/5b/f2aa703a4fc5d2dff73460a7d46cc2f3f44aa0f3dd8eeb20d2a0ecf68862/tiktoken-0.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:85b78cc3a2c3d48723ca751fa981f1fedccd54194ca0471b957364353a898b07", size = 918110, upload-time = "2026-05-15T04:51:17.237Z" }, +] + [[package]] name = "tomli" version = "2.4.1" From b719d5a7c7f454d440984fb4f92f8cb5804a8a29 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 16:16:25 +0500 Subject: [PATCH 032/318] fix(cli): use wider console for --audit and --session-stats Move _configure_runtime_console before controller query dispatch so --audit and --session-stats use Rich instead of PlainConsole. Add CLI_AUDIT_MAX_WIDTH=120 for controller query screens while keeping CLI_LAYOUT_MAX_WIDTH=80 for the standard analysis output. --- codeclone/surfaces/cli/workflow.py | 20 ++++++++++++++++---- codeclone/ui_messages/__init__.py | 1 + 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index aba5ca52..5a16b987 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -292,16 +292,28 @@ def _run_pre_analysis_controller_query( if bool_attr(args, "session_stats"): from .session_stats import render_session_stats + stats_console = require_status_console( + _make_rich_console( + no_color=args.no_color, + width=ui.CLI_AUDIT_MAX_WIDTH, + ) + ) return render_session_stats( - console=_console(), + console=stats_console, root_path=root_path, quiet=args.quiet, ) if bool_attr(args, "audit"): from .audit import render_audit + audit_console = require_status_console( + _make_rich_console( + no_color=args.no_color, + width=ui.CLI_AUDIT_MAX_WIDTH, + ) + ) return render_audit( - console=_console(), + console=audit_console, root_path=root_path, audit_enabled=bool(getattr(args, "audit_enabled", False)), audit_path=str(getattr(args, "audit_path", "")), @@ -450,6 +462,8 @@ def _main_impl() -> None: args=args, strictness_explicit=strictness_explicit, ) + _configure_runtime_flags(args) + _configure_runtime_console(args) pre_analysis_query_exit = _run_pre_analysis_controller_query( args=args, root_path=root_path, @@ -462,8 +476,6 @@ def _main_impl() -> None: if git_diff_ref is not None else () ) - _configure_runtime_flags(args) - _configure_runtime_console(args) _validate_numeric_args_or_exit( args=args, validate_numeric_args_fn=_validate_numeric_args, diff --git a/codeclone/ui_messages/__init__.py b/codeclone/ui_messages/__init__.py index 6cc1cc5e..0293269d 100644 --- a/codeclone/ui_messages/__init__.py +++ b/codeclone/ui_messages/__init__.py @@ -237,6 +237,7 @@ PATCH_VERIFY_TITLE = "Patch Verify" CLI_LAYOUT_MAX_WIDTH = 80 +CLI_AUDIT_MAX_WIDTH = 120 SUMMARY_LABEL_FILES_FOUND = "Files found" SUMMARY_LABEL_FILES_ANALYZED = " analyzed" From 42094b0a749960f10c93107a3c22252a22149cc7 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 16:16:46 +0500 Subject: [PATCH 033/318] feat(audit): add MCP payload token budget estimation Estimate BPE token footprint of MCP JSON payloads in the audit trail using tiktoken (optional, via codeclone[token-bench]). Falls back to ceil(chars/4) approximation. Adds Tokens column and session footprint summary to --audit and --session-stats CLI views. Idempotent schema migration adds nullable token columns to existing v1 databases. --- benchmarks/mcp_token_budget.py | 271 +++++++++++++++++++ codeclone/audit/reader.py | 76 +++++- codeclone/audit/schema.py | 31 ++- codeclone/audit/validation.py | 6 + codeclone/audit/writer.py | 36 ++- codeclone/budget/__init__.py | 16 ++ codeclone/budget/estimator.py | 93 +++++++ codeclone/surfaces/cli/audit.py | 49 +++- codeclone/surfaces/cli/session_stats.py | 46 ++++ docs/book/24-structural-change-controller.md | 2 +- pyproject.toml | 4 +- tests/test_audit_writer.py | 162 +++++++++++ tests/test_token_estimator.py | 93 +++++++ uv.lock | 16 +- 14 files changed, 870 insertions(+), 31 deletions(-) create mode 100644 benchmarks/mcp_token_budget.py create mode 100644 codeclone/budget/__init__.py create mode 100644 codeclone/budget/estimator.py create mode 100644 tests/test_token_estimator.py diff --git a/benchmarks/mcp_token_budget.py b/benchmarks/mcp_token_budget.py new file mode 100644 index 00000000..5af434be --- /dev/null +++ b/benchmarks/mcp_token_budget.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Standalone benchmark for MCP payload token budget estimation. + +Requires the ``codeclone[token-bench]`` extra (``tiktoken``). + +Usage:: + + uv run python benchmarks/mcp_token_budget.py +""" + +from __future__ import annotations + +import json +import sys + + +def main() -> None: + try: + from codeclone.budget.estimator import estimate_payload + except ImportError: + print( + "ERROR: tiktoken not installed. " + "Install with: uv pip install 'codeclone[token-bench]'", + file=sys.stderr, + ) + sys.exit(1) + + scenarios: dict[str, dict[str, object]] = { + "analyze_repository_small": _analyze_repository_small(), + "get_blast_radius_bounded": _blast_radius_bounded(), + "get_blast_radius_large_truncated": _blast_radius_large(), + "check_patch_contract_verify": _patch_contract_verify(), + "create_review_receipt_markdown": _review_receipt(), + "manage_change_intent_declare": _change_intent_declare(), + } + + results: dict[str, dict[str, int]] = {} + total_chars = 0 + total_tokens = 0 + + for name, payload in scenarios.items(): + estimate = estimate_payload(payload) + results[name] = { + "chars": estimate.characters, + "tokens": estimate.tokens, + } + total_chars += estimate.characters + total_tokens += estimate.tokens + + results["full_workflow_all_calls"] = { + "chars": total_chars, + "tokens": total_tokens, + } + + output = { + "encoder": "o200k_base", + "scenarios": results, + } + + print(json.dumps(output, indent=2)) + + +def _analyze_repository_small() -> dict[str, object]: + return { + "run_id": "abc12345", + "focus": "repository", + "version": "2.1.0a1", + "schema": "2.11", + "mode": "full", + "baseline": { + "loaded": True, + "status": "ok", + "trusted": True, + }, + "inventory": {"files": 120, "lines": 45000, "functions": 800, "classes": 90}, + "health": { + "score": 92, + "grade": "A", + "dimensions": { + "clones": 100, + "complexity": 75, + "coupling": 80, + "cohesion": 95, + "dead_code": 100, + "coverage": 85, + "dependencies": 90, + }, + }, + "findings": { + "total": 3, + "new": 1, + "known": 2, + "by_family": {"clones": 2, "dead_code": 1}, + }, + "warnings": [], + "failures": [], + } + + +def _blast_radius_bounded() -> dict[str, object]: + return { + "radius_level": "medium", + "direct_dependents": [ + { + "path": f"pkg/module_{i}.py", + "reason": "imports target", + "edge_type": "import", + } + for i in range(8) + ], + "clone_cohort_members": [ + { + "path": f"pkg/clone_{i}.py", + "finding_id": f"CCLONE00{i}", + "clone_type": "Type-2", + } + for i in range(3) + ], + "do_not_touch": [ + {"path": ".cache/codeclone/**", "reason": "generated state"}, + {"path": "codeclone.baseline.json", "reason": "baseline file"}, + ], + "review_context": [ + { + "path": f"pkg/context_{i}.py", + "reason": "report-only signal", + "category": "security_boundary", + } + for i in range(5) + ], + "structural_risk": { + "hub_dependents": 8, + "cohort_spread": 3, + }, + } + + +def _blast_radius_large() -> dict[str, object]: + base = _blast_radius_bounded() + base["direct_dependents"] = [ + { + "path": f"pkg/deep/sub/module_{i}.py", + "reason": "transitive import chain via pkg.core", + "edge_type": "import", + } + for i in range(50) + ] + base["review_context"] = [ + { + "path": f"pkg/large_context_{i}.py", + "reason": f"overloaded module candidate (score={0.7 + i * 0.01:.2f})", + "category": "overloaded_module", + } + for i in range(30) + ] + return base + + +def _patch_contract_verify() -> dict[str, object]: + return { + "mode": "verify", + "status": "accepted", + "before": {"run_id": "before12", "health": 90}, + "after": {"run_id": "after123", "health": 90}, + "strictness": "ci", + "structural_delta": { + "regressions": [], + "improvements": [ + {"id": "CCLONE001", "kind": "clone_group", "severity": "medium"} + ], + "health_delta": 0, + "verdict": "stable", + }, + "worsened": [], + "scope_check": { + "status": "clean", + "declared_scope": ["pkg/a.py", "pkg/b.py"], + "actual_changed_files": ["pkg/a.py"], + "unexpected_files": [], + "forbidden_touched": [], + }, + "gate_preview": {"would_fail": False, "exit_code": 0, "reasons": []}, + "baseline_abuse": {"detected": False, "triggers": []}, + "contract_violations": [], + "blocking_violations": [], + "message": "Patch contract accepted.", + } + + +def _review_receipt() -> dict[str, object]: + return { + "format": "markdown", + "receipt": { + "verdict": "clean", + "provenance": { + "digest": "a" * 64, + "schema_version": "2.11", + "baseline_trust": "ok", + "run_id": "abc12345", + "root": "/repo", + }, + "scope": { + "intent_id": "intent-abc-001", + "declared_files": ["pkg/a.py", "pkg/b.py"], + "changed_files": ["pkg/a.py"], + "unexpected_files": [], + }, + "blast_radius_summary": { + "level": "low", + "direct_dependents": 2, + "clone_cohorts": 0, + "do_not_touch": 3, + }, + "reviewed_findings": [ + { + "finding_id": "CCLONE001", + "reviewed": True, + "note": "Accepted: intentional parallel implementation", + } + ], + "patch_contract": { + "status": "accepted", + "violations": [], + }, + "human_decision_points": [ + "Clone divergence in pkg/a.py:func_a acknowledged", + ], + "claims_not_made": [ + "Security Surfaces are boundary inventory, not vulnerability claims", + "Report-only signals are not CI gates", + ], + }, + } + + +def _change_intent_declare() -> dict[str, object]: + return { + "intent_id": "intent-abc-001", + "run_id": "abc12345", + "status": "active", + "scope": { + "allowed_files": ["pkg/a.py", "pkg/b.py", "tests/test_a.py"], + "allowed_related": ["pkg/utils.py"], + "forbidden": [".cache/**", "codeclone.baseline.json"], + }, + "intent": "Refactor module A and B to reduce coupling", + "guards": [ + "scope_expansion_requires_explanation", + "baseline_update_forbidden", + "new_structural_regression_forbidden", + ], + "blast_radius_summary": { + "radius_level": "medium", + "direct_dependents_count": 5, + "clone_cohort_members_count": 1, + "do_not_touch_count": 3, + }, + "concurrent_intents": [], + "workspace_registered": True, + "ttl_seconds": 3600, + } + + +if __name__ == "__main__": + main() diff --git a/codeclone/audit/reader.py b/codeclone/audit/reader.py index f36393ab..97c3aafe 100644 --- a/codeclone/audit/reader.py +++ b/codeclone/audit/reader.py @@ -24,6 +24,9 @@ class AuditRecord: intent_id: str | None status: str | None agent_label: str + estimated_tokens: int | None = None + token_encoding: str | None = None + payload_characters: int | None = None @dataclass(frozen=True, slots=True) @@ -39,6 +42,9 @@ class AuditSummary: oldest_event_utc: str | None latest_event_utc: str | None events: tuple[AuditRecord, ...] + total_estimated_tokens: int | None = None + token_encoding: str | None = None + token_event_count: int = 0 def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: @@ -78,18 +84,33 @@ def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: ) oldest = _text_scalar(conn, "SELECT MIN(created_at_utc) FROM controller_events") latest = _text_scalar(conn, "SELECT MAX(created_at_utc) FROM controller_events") - rows = conn.execute( - "SELECT event_id, event_type, severity, created_at_utc, run_id, " - "intent_id, status, agent_label " - "FROM controller_events " - "ORDER BY created_at_utc DESC, id DESC " - "LIMIT ?", - (max(1, int(limit)),), - ).fetchall() + token_cols = _has_token_columns(conn) + if token_cols: + rows = conn.execute( + "SELECT event_id, event_type, severity, created_at_utc, run_id, " + "intent_id, status, agent_label, " + "estimated_tokens, token_encoding, payload_characters " + "FROM controller_events " + "ORDER BY created_at_utc DESC, id DESC " + "LIMIT ?", + (max(1, int(limit)),), + ).fetchall() + token_summary = _token_summary(conn) + else: + rows = conn.execute( + "SELECT event_id, event_type, severity, created_at_utc, run_id, " + "intent_id, status, agent_label " + "FROM controller_events " + "ORDER BY created_at_utc DESC, id DESC " + "LIMIT ?", + (max(1, int(limit)),), + ).fetchall() + token_summary = (None, None, 0) except (sqlite3.Error, AuditSchemaError) as exc: raise AuditReadError(f"cannot read audit database: {exc}") from exc finally: conn.close() + total_tokens, token_enc, token_event_cnt = token_summary return AuditSummary( db_path=db_path, db_size_bytes=_db_size(db_path), @@ -102,6 +123,9 @@ def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: oldest_event_utc=oldest, latest_event_utc=latest, events=tuple(_record_from_row(row) for row in rows), + total_estimated_tokens=total_tokens, + token_encoding=token_enc, + token_event_count=token_event_cnt, ) @@ -115,9 +139,41 @@ def _record_from_row(row: tuple[object, ...]) -> AuditRecord: intent_id=_str_or_none(row[5]), status=_str_or_none(row[6]), agent_label=_str_or_empty(row[7]), + estimated_tokens=_int_or_none(row[8]) if len(row) > 8 else None, + token_encoding=_str_or_none(row[9]) if len(row) > 9 else None, + payload_characters=_int_or_none(row[10]) if len(row) > 10 else None, ) +def _has_token_columns(conn: sqlite3.Connection) -> bool: + """Check whether the controller_events table has token columns.""" + columns = { + row[1] + for row in conn.execute("PRAGMA table_info(controller_events)").fetchall() + } + return "estimated_tokens" in columns + + +def _token_summary( + conn: sqlite3.Connection, +) -> tuple[int | None, str | None, int]: + """Aggregate token estimation data across all events.""" + row = conn.execute( + "SELECT SUM(estimated_tokens), COUNT(estimated_tokens) " + "FROM controller_events WHERE estimated_tokens IS NOT NULL" + ).fetchone() + if row is None or row[1] == 0: + return None, None, 0 + total_tokens = row[0] if isinstance(row[0], int) else None + event_count = row[1] if isinstance(row[1], int) else 0 + enc_row = conn.execute( + "SELECT token_encoding FROM controller_events " + "WHERE token_encoding IS NOT NULL LIMIT 1" + ).fetchone() + encoding = _str_or_none(enc_row[0]) if enc_row else None + return total_tokens, encoding, event_count + + def _count(conn: sqlite3.Connection, sql: str) -> int: value = conn.execute(sql).fetchone() if value is None: @@ -158,4 +214,8 @@ def _str_or_none(value: object) -> str | None: return value if isinstance(value, str) else None +def _int_or_none(value: object) -> int | None: + return value if isinstance(value, int) and not isinstance(value, bool) else None + + __all__ = ["AuditRecord", "AuditSummary", "read_audit_summary"] diff --git a/codeclone/audit/schema.py b/codeclone/audit/schema.py index 7b00394b..3b73e04f 100644 --- a/codeclone/audit/schema.py +++ b/codeclone/audit/schema.py @@ -29,7 +29,11 @@ agent_pid INTEGER NOT NULL, status TEXT, - payload_json TEXT NOT NULL DEFAULT '{}' + payload_json TEXT NOT NULL DEFAULT '{}', + + estimated_tokens INTEGER, + token_encoding TEXT, + payload_characters INTEGER ) """ @@ -70,9 +74,9 @@ def ensure_schema(conn: sqlite3.Connection) -> None: if current is None: create_schema_v1(conn) return - if current == AUDIT_SCHEMA_VERSION: - return - raise AuditSchemaError(f"Unsupported audit schema version: {current}") + if current != AUDIT_SCHEMA_VERSION: + raise AuditSchemaError(f"Unsupported audit schema version: {current}") + _migrate_v1_add_token_columns(conn) def create_schema_v1(conn: sqlite3.Connection) -> None: @@ -94,6 +98,25 @@ def create_schema_v1(conn: sqlite3.Connection) -> None: conn.commit() +def _migrate_v1_add_token_columns(conn: sqlite3.Connection) -> None: + """Add nullable token estimation columns to an existing v1 schema. + + Idempotent: checks which columns already exist before altering. + """ + existing = { + row[1] + for row in conn.execute("PRAGMA table_info(controller_events)").fetchall() + } + for col, col_type in ( + ("estimated_tokens", "INTEGER"), + ("token_encoding", "TEXT"), + ("payload_characters", "INTEGER"), + ): + if col not in existing: + conn.execute(f"ALTER TABLE controller_events ADD COLUMN {col} {col_type}") + conn.commit() + + def get_meta(conn: sqlite3.Connection, key: str) -> str | None: try: row = conn.execute( diff --git a/codeclone/audit/validation.py b/codeclone/audit/validation.py index 8adf2748..15ad0f80 100644 --- a/codeclone/audit/validation.py +++ b/codeclone/audit/validation.py @@ -61,6 +61,9 @@ class EventRow: agent_pid: int status: str | None payload_json: str + estimated_tokens: int | None = None + token_encoding: str | None = None + payload_characters: int | None = None def as_tuple(self) -> tuple[object, ...]: return ( @@ -76,6 +79,9 @@ def as_tuple(self) -> tuple[object, ...]: self.agent_pid, self.status, self.payload_json, + self.estimated_tokens, + self.token_encoding, + self.payload_characters, ) diff --git a/codeclone/audit/writer.py b/codeclone/audit/writer.py index 0665037f..16da0071 100644 --- a/codeclone/audit/writer.py +++ b/codeclone/audit/writer.py @@ -8,9 +8,10 @@ import json import threading +from collections.abc import Mapping from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import Protocol +from typing import TYPE_CHECKING, Protocol from ..report.meta import current_report_timestamp_utc from .events import ( @@ -22,6 +23,9 @@ from .schema import open_audit_db from .validation import EventRow, validate_event_row +if TYPE_CHECKING: + from ..budget.estimator import TokenEstimate + _INSERT_SQL = """ INSERT INTO controller_events( event_id, @@ -35,8 +39,11 @@ agent_label, agent_pid, status, - payload_json -) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + payload_json, + estimated_tokens, + token_encoding, + payload_characters +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ @@ -115,6 +122,7 @@ def _run_retention_gc(self) -> None: def event_to_row(*, event: AuditEvent, payloads: AuditPayloadMode) -> EventRow: payload_json = _payload_json(event=event, payloads=payloads) + token_estimate = _estimate_payload_tokens(event.payload) return EventRow( event_id=generate_event_id(), event_type=event.event_type, @@ -128,9 +136,31 @@ def event_to_row(*, event: AuditEvent, payloads: AuditPayloadMode) -> EventRow: agent_pid=event.agent_pid, status=event.status, payload_json=payload_json, + estimated_tokens=token_estimate.tokens if token_estimate else None, + token_encoding=token_estimate.encoding if token_estimate else None, + payload_characters=token_estimate.characters if token_estimate else None, ) +def _estimate_payload_tokens( + payload: Mapping[str, object] | None, +) -> TokenEstimate | None: + """Estimate token count for the full original payload. + + Lazy import of ``codeclone.budget.estimator``. Any failure + (ImportError, encoding error, etc.) returns None — the audit writer + never fails because of token estimation. + """ + if payload is None: + return None + try: + from ..budget.estimator import estimate_payload + + return estimate_payload(payload) + except Exception: + return None + + def _payload_json(*, event: AuditEvent, payloads: AuditPayloadMode) -> str: if payloads == "off": return "{}" diff --git a/codeclone/budget/__init__.py b/codeclone/budget/__init__.py new file mode 100644 index 00000000..eb319e1b --- /dev/null +++ b/codeclone/budget/__init__.py @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""MCP payload token budget estimation (optional leaf module). + +Requires the ``codeclone[token-bench]`` extra for exact BPE counts. +Falls back to character-based approximation when ``tiktoken`` is absent. + +This module must not import from ``codeclone.surfaces`` or +``codeclone.audit``. Dependency direction: ``audit -> budget``. +""" + +from __future__ import annotations diff --git a/codeclone/budget/estimator.py b/codeclone/budget/estimator.py new file mode 100644 index 00000000..34f3e188 --- /dev/null +++ b/codeclone/budget/estimator.py @@ -0,0 +1,93 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Deterministic token-count estimator for MCP JSON payloads. + +Uses ``tiktoken`` when available, falls back to ``ceil(chars / 4)`` +character-based approximation otherwise. The payload is serialized to +canonical JSON (sorted keys, compact separators, no ASCII escaping) +before counting. + +This module is imported lazily by the audit writer. Base ``codeclone`` +never imports ``tiktoken``. +""" + +from __future__ import annotations + +import json +from collections.abc import Mapping +from dataclasses import dataclass + + +@dataclass(frozen=True, slots=True) +class TokenEstimate: + """Result of a payload token estimation.""" + + encoding: str # e.g. "o200k_base" or "chars_approx" + characters: int + tokens: int + method: str # "tiktoken" | "chars_approx" + + +def estimate_payload( + payload: Mapping[str, object], + *, + encoding: str = "o200k_base", +) -> TokenEstimate: + """Estimate token count for a canonical JSON payload. + + Uses tiktoken if available, falls back to character-based approximation. + The payload is serialized to the same canonical form used by the audit + writer: sorted keys, compact separators, no ASCII escaping. + """ + text = _canonical_json(payload) + characters = len(text) + try: + return _tiktoken_estimate(text, encoding=encoding) + except _TiktokenUnavailable: + return TokenEstimate( + encoding="chars_approx", + characters=characters, + tokens=_approx_tokens(characters), + method="chars_approx", + ) + + +def _canonical_json(payload: Mapping[str, object]) -> str: + return json.dumps( + payload, + sort_keys=True, + separators=(",", ":"), + ensure_ascii=False, + default=str, + ) + + +class _TiktokenUnavailable(Exception): + pass + + +def _tiktoken_estimate(text: str, *, encoding: str) -> TokenEstimate: + try: + import tiktoken + except ImportError as exc: + raise _TiktokenUnavailable from exc + enc = tiktoken.get_encoding(encoding) + tokens = len(enc.encode(text)) + return TokenEstimate( + encoding=encoding, + characters=len(text), + tokens=tokens, + method="tiktoken", + ) + + +def _approx_tokens(characters: int) -> int: + """Rough approximation: 1 token ~ 4 characters for JSON.""" + return -(-characters // 4) # ceil division + + +__all__ = ["TokenEstimate", "estimate_payload"] diff --git a/codeclone/surfaces/cli/audit.py b/codeclone/surfaces/cli/audit.py index 6a38793c..68ab39c6 100644 --- a/codeclone/surfaces/cli/audit.py +++ b/codeclone/surfaces/cli/audit.py @@ -122,27 +122,48 @@ def _render_verbose_rich(*, console: PrinterLike, summary: AuditSummary) -> int: style="red" if summary.violation_events else "green", ), ) + if summary.total_estimated_tokens is not None and summary.token_event_count > 0: + enc_label = summary.token_encoding or "unknown" + meta.add_row( + "MCP token footprint", + f"~{summary.total_estimated_tokens:,} tokens " + f"({enc_label}, {summary.token_event_count} tool calls)", + ) console.print(Panel(meta, border_style="cyan")) - table = Table(box=box.SIMPLE_HEAVY, expand=True) + table = Table(box=box.SIMPLE_HEAVY) + table.add_column("Tokens", justify="right", no_wrap=True) table.add_column("Time", no_wrap=True) table.add_column("Type", no_wrap=True) table.add_column("Severity", no_wrap=True) - table.add_column("Intent", overflow="fold") + table.add_column("Intent", no_wrap=True) table.add_column("Status", no_wrap=True) table.add_column("Run", no_wrap=True) - table.add_column("Agent", overflow="fold") + table.add_column("Agent", no_wrap=True) for event in summary.events: table.add_row( + _format_tokens(event.estimated_tokens), _short_time(event.created_at_utc), _short_type(event.event_type), Text(event.severity, style=_severity_style(event.severity)), - event.intent_id or "-", + _short_intent(event.intent_id), event.status or "-", _short_run(event.run_id), - event.agent_label or "-", + _short_agent(event.agent_label), ) console.print(table) + + if summary.total_estimated_tokens is not None and summary.token_event_count > 0: + enc_label = summary.token_encoding or "unknown" + console.print( + Text( + f"Session MCP token footprint: " + f"~{summary.total_estimated_tokens:,} tokens " + f"({enc_label}, {summary.token_event_count} tool calls)", + style="dim", + ) + ) + return int(ExitCode.SUCCESS) @@ -173,6 +194,18 @@ def _short_type(event_type: str) -> str: return aliases.get(event_type, event_type.rsplit(".", maxsplit=1)[-1]) +def _short_intent(intent_id: str | None) -> str: + if not intent_id: + return "-" + return intent_id.removeprefix("intent-") + + +def _short_agent(agent_label: str | None) -> str: + if not agent_label: + return "-" + return agent_label.replace("claude-code/", "cc/") + + def _short_run(run_id: str | None) -> str: return run_id[:8] if run_id else "-" @@ -214,6 +247,12 @@ def _parse_utc(value: str) -> datetime | None: return None +def _format_tokens(value: int | None) -> str: + if value is None: + return "—" + return f"{value:,}" + + def _format_bytes(value: int) -> str: if value < 1024: return f"{value} B" diff --git a/codeclone/surfaces/cli/session_stats.py b/codeclone/surfaces/cli/session_stats.py index 1a7bcf85..aa8065fe 100644 --- a/codeclone/surfaces/cli/session_stats.py +++ b/codeclone/surfaces/cli/session_stats.py @@ -60,6 +60,9 @@ class _SessionSnapshot: latest_run_age_seconds: int | None cache_present: bool workspace_health: str + mcp_token_footprint: int | None = None + mcp_token_encoding: str | None = None + mcp_token_event_count: int = 0 def render_session_stats( @@ -172,6 +175,8 @@ def _collect_session_snapshot(root_path: Path) -> _SessionSnapshot: expired_count=expired_count, ) + mcp_tokens, mcp_enc, mcp_count = _read_audit_token_footprint(root_path) + return _SessionSnapshot( root=root_path, agents=tuple(agents), @@ -185,6 +190,9 @@ def _collect_session_snapshot(root_path: Path) -> _SessionSnapshot: latest_run_age_seconds=latest_run_age_seconds, cache_present=cache_present, workspace_health=workspace_health, + mcp_token_footprint=mcp_tokens, + mcp_token_encoding=mcp_enc, + mcp_token_event_count=mcp_count, ) @@ -265,6 +273,13 @@ def _render_verbose(console: PrinterLike, snapshot: _SessionSnapshot) -> int: console.print(f" Stale intents: {snapshot.stale_count}") console.print(f" Expired intents: {snapshot.expired_count}") console.print(f" Recoverable: {snapshot.recoverable_count}") + if snapshot.mcp_token_footprint is not None and snapshot.mcp_token_event_count > 0: + enc = snapshot.mcp_token_encoding or "unknown" + console.print( + f" MCP payload footprint: " + f"~{snapshot.mcp_token_footprint:,} tokens " + f"({enc}, {snapshot.mcp_token_event_count} tool calls)" + ) console.print() console.print(f" Workspace health: {snapshot.workspace_health}") return int(ExitCode.SUCCESS) @@ -293,6 +308,13 @@ def _render_verbose_rich(console: PrinterLike, snapshot: _SessionSnapshot) -> in summary.add_row("Stale intents", str(snapshot.stale_count)) summary.add_row("Expired intents", str(snapshot.expired_count)) summary.add_row("Recoverable", str(snapshot.recoverable_count)) + if snapshot.mcp_token_footprint is not None and snapshot.mcp_token_event_count > 0: + enc = snapshot.mcp_token_encoding or "unknown" + summary.add_row( + "MCP payload footprint", + f"~{snapshot.mcp_token_footprint:,} tokens " + f"({enc}, {snapshot.mcp_token_event_count} tool calls)", + ) health_text = text_cls( snapshot.workspace_health, style=_health_style(snapshot.workspace_health), @@ -546,6 +568,30 @@ def _process_start_epoch() -> int: return int(time.time()) +def _read_audit_token_footprint( + root_path: Path, +) -> tuple[int | None, str | None, int]: + """Read aggregate token estimation from audit trail, if available.""" + try: + from ...audit.reader import read_audit_summary + from ...audit.validation import resolve_audit_path + + db_path = resolve_audit_path( + root_path=root_path, + value=".cache/codeclone/audit.sqlite3", + ) + if not db_path.is_file(): + return None, None, 0 + summary = read_audit_summary(db_path=db_path, limit=1) + return ( + summary.total_estimated_tokens, + summary.token_encoding, + summary.token_event_count, + ) + except Exception: + return None, None, 0 + + def _format_age(seconds: int | None) -> str: if seconds is None or seconds < 0: return "unknown" diff --git a/docs/book/24-structural-change-controller.md b/docs/book/24-structural-change-controller.md index f80ea258..1f25b923 100644 --- a/docs/book/24-structural-change-controller.md +++ b/docs/book/24-structural-change-controller.md @@ -22,7 +22,7 @@ queries: | Claim guard | Live in `2.1.0a1` | MCP `validate_review_claims` | | Scope-aware verification | Live in `2.1.0a1` | MCP `check_patch_contract` | | Workspace relations | Live in `2.1.0a1` | MCP `manage_change_intent` | -| MCP payload token budget | Planned | Audit trail, CLI `--audit`, `--session-stats` | +| MCP payload token budget | Live in `2.1.0a1` | Audit trail, CLI `--audit`, `--session-stats` | ## Contract diff --git a/pyproject.toml b/pyproject.toml index 3f971a38..aae207cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ mcp = [ "httpx>=0.27.1,<1", ] token-bench = [ - "tiktoken>=0.8", + "tiktoken>=0.13.0", ] dev = [ "pytest>=9.0.3", @@ -145,7 +145,7 @@ golden_fixture_paths = ["tests/fixtures/golden_*"] min_typing_coverage = 99 audit_enabled = true audit_path = ".cache/codeclone/audit.sqlite3" -audit_payloads = "compact" # "off" | "compact" | "full" +audit_payloads = "full" # "off" | "compact" | "full" audit_retention_days = 30 diff --git a/tests/test_audit_writer.py b/tests/test_audit_writer.py index 09b97b7f..f7767ca0 100644 --- a/tests/test_audit_writer.py +++ b/tests/test_audit_writer.py @@ -97,6 +97,168 @@ def test_null_writer_is_noop(tmp_path: Path) -> None: writer.close() +def test_audit_event_row_includes_token_fields(tmp_path: Path) -> None: + """Token estimation fields are populated when tiktoken is available.""" + db_path = tmp_path / "audit.sqlite3" + writer = SqliteAuditWriter(db_path=db_path, payloads="compact", retention_days=30) + try: + writer.emit(_event(tmp_path)) + finally: + writer.close() + + conn = sqlite3.connect(db_path) + try: + row = conn.execute( + "SELECT estimated_tokens, token_encoding, payload_characters " + "FROM controller_events" + ).fetchone() + finally: + conn.close() + + assert row is not None + estimated_tokens, token_encoding, payload_characters = row + assert isinstance(estimated_tokens, int) + assert estimated_tokens > 0 + assert isinstance(token_encoding, str) + assert token_encoding in {"o200k_base", "chars_approx"} + assert isinstance(payload_characters, int) + assert payload_characters > 0 + + +def test_audit_event_row_token_fields_null_when_no_payload(tmp_path: Path) -> None: + """Token columns are NULL when payload is None.""" + db_path = tmp_path / "audit.sqlite3" + writer = SqliteAuditWriter(db_path=db_path, payloads="compact", retention_days=30) + event = AuditEvent( + event_type=EVENT_INTENT_DECLARED, + severity="info", + repo_root_digest=repo_root_digest(tmp_path), + agent_pid=123, + agent_label="test-agent", + payload=None, + ) + try: + writer.emit(event) + finally: + writer.close() + + conn = sqlite3.connect(db_path) + try: + row = conn.execute( + "SELECT estimated_tokens, token_encoding, payload_characters " + "FROM controller_events" + ).fetchone() + finally: + conn.close() + + assert row is not None + assert row[0] is None # estimated_tokens + assert row[1] is None # token_encoding + assert row[2] is None # payload_characters + + +def test_token_estimation_failure_does_not_break_audit(tmp_path: Path) -> None: + """Audit event write succeeds even when estimation raises.""" + from unittest.mock import patch as mock_patch + + db_path = tmp_path / "audit.sqlite3" + writer = SqliteAuditWriter(db_path=db_path, payloads="compact", retention_days=30) + try: + with mock_patch( + "codeclone.audit.writer._estimate_payload_tokens", + side_effect=RuntimeError("boom"), + ): + writer.emit(_event(tmp_path)) + finally: + writer.close() + + # The event should still be written (emit swallows exceptions) + # but since _estimate_payload_tokens raised before event_to_row completed, + # the entire emit is swallowed by the outer try/except in emit(). + # This confirms the audit writer never crashes from estimation failures. + summary = read_audit_summary(db_path=db_path) + assert summary.total_events <= 1 + + +def test_audit_schema_migration_adds_token_columns(tmp_path: Path) -> None: + """Existing v1 DB without token columns gets them after ensure_schema.""" + from codeclone.audit.schema import ensure_schema + from codeclone.audit.validation import AUDIT_SCHEMA_VERSION + + db_path = tmp_path / "audit.sqlite3" + conn = sqlite3.connect(db_path) + try: + # Create a v1 schema without token columns + conn.execute(""" + CREATE TABLE controller_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + event_id TEXT NOT NULL UNIQUE, + event_type TEXT NOT NULL, + severity TEXT NOT NULL DEFAULT 'info', + created_at_utc TEXT NOT NULL, + repo_root_digest TEXT NOT NULL, + run_id TEXT, + intent_id TEXT, + report_digest TEXT, + agent_label TEXT NOT NULL DEFAULT '', + agent_pid INTEGER NOT NULL, + status TEXT, + payload_json TEXT NOT NULL DEFAULT '{}' + ) + """) + conn.execute( + "CREATE TABLE audit_meta(key TEXT PRIMARY KEY, value TEXT NOT NULL)" + ) + conn.execute( + "INSERT INTO audit_meta(key, value) VALUES ('schema_version', ?)", + (AUDIT_SCHEMA_VERSION,), + ) + conn.commit() + + # Verify no token columns yet + cols_before = { + row[1] + for row in conn.execute("PRAGMA table_info(controller_events)").fetchall() + } + assert "estimated_tokens" not in cols_before + + # Run migration + ensure_schema(conn) + + # Verify columns added + cols_after = { + row[1] + for row in conn.execute("PRAGMA table_info(controller_events)").fetchall() + } + assert "estimated_tokens" in cols_after + assert "token_encoding" in cols_after + assert "payload_characters" in cols_after + + # Verify insert works with new columns + conn.execute( + "INSERT INTO controller_events" + "(event_id, event_type, severity, created_at_utc, " + "repo_root_digest, agent_label, agent_pid, " + "estimated_tokens, token_encoding, payload_characters) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + "evt_test", + "intent.declared", + "info", + "2026-01-01T00:00:00Z", + "abc123", + "agent", + 1, + 42, + "o200k_base", + 168, + ), + ) + conn.commit() + finally: + conn.close() + + def test_event_validation_rejects_unknown_type() -> None: row = EventRow( event_id="evt_1", diff --git a/tests/test_token_estimator.py b/tests/test_token_estimator.py new file mode 100644 index 00000000..6ee40c09 --- /dev/null +++ b/tests/test_token_estimator.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from codeclone.budget.estimator import estimate_payload + + +def test_estimate_payload_with_tiktoken() -> None: + """Exact BPE estimation when tiktoken is available.""" + payload = { + "status": "accepted", + "health": 90, + "findings": {"total": 5, "new": 2}, + "message": "Patch contract accepted.", + } + result = estimate_payload(payload) + assert result.method == "tiktoken" + assert result.encoding == "o200k_base" + assert result.tokens > 0 + assert result.characters > 0 + assert result.tokens < result.characters + + +def test_estimate_payload_without_tiktoken() -> None: + """Character-based fallback when tiktoken import fails.""" + payload = {"key": "value", "number": 42} + with patch.dict("sys.modules", {"tiktoken": None}): + result = estimate_payload(payload) + assert result.method == "chars_approx" + assert result.encoding == "chars_approx" + assert result.tokens == -(-result.characters // 4) + + +def test_estimate_payload_canonical_json_determinism() -> None: + """Same content in different insertion order -> identical estimates.""" + payload_a = {"z_last": 1, "a_first": 2, "m_middle": 3} + payload_b = {"a_first": 2, "m_middle": 3, "z_last": 1} + result_a = estimate_payload(payload_a) + result_b = estimate_payload(payload_b) + assert result_a.tokens == result_b.tokens + assert result_a.characters == result_b.characters + + +def test_estimate_empty_payload() -> None: + """Empty dict produces minimal token count.""" + result = estimate_payload({}) + assert result.characters == 2 # "{}" + assert result.tokens >= 1 + + +def test_estimate_payload_custom_encoding() -> None: + """Custom encoding parameter is passed through.""" + result = estimate_payload({"key": "value"}, encoding="cl100k_base") + assert result.encoding == "cl100k_base" + assert result.method == "tiktoken" + assert result.tokens > 0 + + +def test_token_estimate_is_frozen() -> None: + """TokenEstimate is immutable.""" + result = estimate_payload({"x": 1}) + with pytest.raises(AttributeError): + result.tokens = 999 # type: ignore[misc] + + +def test_estimate_payload_with_nested_structures() -> None: + """Complex nested payloads produce reasonable estimates.""" + payload = { + "scope": { + "allowed_files": [f"pkg/module_{i}.py" for i in range(20)], + "forbidden": [".cache/**", "*.baseline.json"], + }, + "blast_radius": { + "level": "high", + "dependents": [ + {"path": f"dep_{i}.py", "reason": "import"} for i in range(5) + ], + }, + "gate_preview": {"would_fail": False, "reasons": []}, + } + result = estimate_payload(payload) + assert result.tokens > 50 + assert result.characters > 200 + + +def test_estimate_payload_with_unicode() -> None: + """Unicode content is handled correctly (ensure_ascii=False).""" + payload = {"message": "Результат: чистый", "emoji": "✅"} + result = estimate_payload(payload) + assert result.tokens > 0 + assert result.characters > 0 diff --git a/uv.lock b/uv.lock index 0ef5e65f..217567ce 100644 --- a/uv.lock +++ b/uv.lock @@ -364,7 +364,7 @@ requires-dist = [ { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=7.1.0" }, { name = "rich", specifier = ">=15.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.14" }, - { name = "tiktoken", marker = "extra == 'token-bench'", specifier = ">=0.8" }, + { name = "tiktoken", marker = "extra == 'token-bench'", specifier = ">=0.13.0" }, { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.1" }, { name = "twine", marker = "extra == 'dev'", specifier = ">=6.2.0" }, ] @@ -677,7 +677,7 @@ name = "importlib-metadata" version = "9.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "zipp", marker = "python_full_version < '3.15'" }, + { name = "zipp" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a9/01/15bb152d77b21318514a96f43af312635eb2500c96b55398d020c93d86ea/importlib_metadata-9.0.0.tar.gz", hash = "sha256:a4f57ab599e6a2e3016d7595cfd72eb4661a5106e787a95bcc90c7105b831efc", size = 56405, upload-time = "2026-03-20T06:42:56.999Z" } wheels = [ @@ -1887,15 +1887,15 @@ wheels = [ [[package]] name = "starlette" -version = "1.0.1" +version = "1.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/a3/84e821cc54b4ab50ae6dbc6ac3800a651b65ec35f045cc73785380654057/starlette-1.0.1.tar.gz", hash = "sha256:512399c5f1de7fac99c88572212ded9ddeddef2fb32afa82d724000e88b38f4f", size = 2659596, upload-time = "2026-05-21T21:58:58.433Z" } +sdist = { url = "https://files.pythonhosted.org/packages/95/66/4d20cdf39a8d6a51e663b7038e3b828ff211d3891a43a713fe7e4643f3a8/starlette-1.1.0.tar.gz", hash = "sha256:e83c7fe0ddecd8719c5b840080325aec0260acec86e9832899e377b91d65e90f", size = 2660060, upload-time = "2026-05-23T16:55:41.376Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/e1/b2df4bc09a1e51ff664c1e17018a4274b42e5e9352e4a478ea540512dc88/starlette-1.0.1-py3-none-any.whl", hash = "sha256:7c0e69b2ee1c848bd54669d908500117a3ee13de603a21427e5c6fc1adf98dcd", size = 72802, upload-time = "2026-05-21T21:58:56.551Z" }, + { url = "https://files.pythonhosted.org/packages/93/79/920b8e0a8b20f793e8d64855095cb8febabf6175b8550b6f7a547d813891/starlette-1.1.0-py3-none-any.whl", hash = "sha256:7f0dfd38e428aad5cb6f9f667f0ca1d2d8ca3f3385dccac8305f79ec98458382", size = 72899, upload-time = "2026-05-23T16:55:39.201Z" }, ] [[package]] @@ -2065,16 +2065,16 @@ wheels = [ [[package]] name = "uvicorn" -version = "0.47.0" +version = "0.48.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f6/b1/8e7077a8641086aea449e1b5752a570f1b5906c64e0a33cd6d93b63a066b/uvicorn-0.47.0.tar.gz", hash = "sha256:7c9a0ea1a9414106bbab7324609c162d8fa0cdcdcb703060987269d77c7bb533", size = 90582, upload-time = "2026-05-14T18:16:54.455Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/bf/f6544ba992ddb9a6077343a576f9844f7f8f06ab819aefd00206e9255f18/uvicorn-0.48.0.tar.gz", hash = "sha256:a5504207195d08c2511bf9125ede5ac4a4b71725d519e758d01dcf0bc2d31c37", size = 91074, upload-time = "2026-05-24T12:08:41.925Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/15/41/ac2dfdbc1f60c7af4f994c7a335cfa7040c01642b605d65f611cecc2a1e4/uvicorn-0.47.0-py3-none-any.whl", hash = "sha256:2c5715bc12d1892d84752049f400cd1c3cb018514967fdfeb97640443a6a9432", size = 71301, upload-time = "2026-05-14T18:16:51.762Z" }, + { url = "https://files.pythonhosted.org/packages/01/be/72532be3da7acc5fdfbccdb95215cd04f995a0886532a5b423f929cda4cc/uvicorn-0.48.0-py3-none-any.whl", hash = "sha256:48097851328b87ec36117d3d575234519eb58c2b22d79666e9bbc6c49a761dad", size = 71410, upload-time = "2026-05-24T12:08:40.258Z" }, ] [[package]] From 388da5795e4fd5e034745afa501c3de790fe510b Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 16:50:32 +0500 Subject: [PATCH 034/318] feat(cli): add MCP payload footprint analytics to audit screen Breakdown by event type (count, total, max tokens), top 5 expensive payloads with color-coded budget thresholds, workflow/single-payload budget warnings panel. New --audit-json flag for machine-readable JSON export (implies --audit). CLI help snapshot updated. --- codeclone/audit/reader.py | 150 +++++++++++++++++- codeclone/config/spec.py | 8 + codeclone/surfaces/cli/audit.py | 146 +++++++++++++++-- codeclone/surfaces/cli/workflow.py | 5 +- codeclone/ui_messages/__init__.py | 4 + .../fixtures/contract_snapshots/cli_help.txt | 4 +- 6 files changed, 299 insertions(+), 18 deletions(-) diff --git a/codeclone/audit/reader.py b/codeclone/audit/reader.py index 97c3aafe..e01f7a38 100644 --- a/codeclone/audit/reader.py +++ b/codeclone/audit/reader.py @@ -29,6 +29,40 @@ class AuditRecord: payload_characters: int | None = None +@dataclass(frozen=True, slots=True) +class TypeTokenProfile: + """Token stats for one event type.""" + + event_type: str + call_count: int + total_tokens: int + max_tokens: int + + +@dataclass(frozen=True, slots=True) +class TopPayload: + """A single expensive audit payload.""" + + event_type: str + event_id: str + estimated_tokens: int + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class PayloadFootprint: + """Aggregate payload cost analytics.""" + + encoding: str + tool_calls: int + total_tokens: int + avg_tokens: int + p95_tokens: int + max_tokens: int + by_type: tuple[TypeTokenProfile, ...] + top_payloads: tuple[TopPayload, ...] + + @dataclass(frozen=True, slots=True) class AuditSummary: db_path: Path @@ -45,6 +79,7 @@ class AuditSummary: total_estimated_tokens: int | None = None token_encoding: str | None = None token_event_count: int = 0 + payload_footprint: PayloadFootprint | None = None def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: @@ -106,6 +141,7 @@ def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: (max(1, int(limit)),), ).fetchall() token_summary = (None, None, 0) + footprint = _read_payload_footprint(conn) if token_cols else None except (sqlite3.Error, AuditSchemaError) as exc: raise AuditReadError(f"cannot read audit database: {exc}") from exc finally: @@ -126,6 +162,7 @@ def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: total_estimated_tokens=total_tokens, token_encoding=token_enc, token_event_count=token_event_cnt, + payload_footprint=footprint, ) @@ -174,6 +211,109 @@ def _token_summary( return total_tokens, encoding, event_count +def payload_footprint_to_dict(fp: PayloadFootprint) -> dict[str, object]: + """Serialize PayloadFootprint to a JSON-safe dict.""" + return { + "encoding": fp.encoding, + "tool_calls": fp.tool_calls, + "total_tokens": fp.total_tokens, + "avg_tokens": fp.avg_tokens, + "p95_tokens": fp.p95_tokens, + "max_tokens": fp.max_tokens, + "by_type": { + tp.event_type: { + "count": tp.call_count, + "tokens": tp.total_tokens, + "max": tp.max_tokens, + } + for tp in fp.by_type + }, + "top_payloads": [ + { + "event_type": tp.event_type, + "tokens": tp.estimated_tokens, + "created_at_utc": tp.created_at_utc, + } + for tp in fp.top_payloads + ], + } + + +def _read_payload_footprint(conn: sqlite3.Connection) -> PayloadFootprint | None: + """Build aggregate payload analytics from token columns.""" + agg = conn.execute( + "SELECT COUNT(*), SUM(estimated_tokens), MAX(estimated_tokens) " + "FROM controller_events WHERE estimated_tokens IS NOT NULL" + ).fetchone() + if agg is None or agg[0] == 0: + return None + tool_calls = agg[0] if isinstance(agg[0], int) else 0 + total_tokens = agg[1] if isinstance(agg[1], int) else 0 + max_tokens = agg[2] if isinstance(agg[2], int) else 0 + avg_tokens = total_tokens // tool_calls if tool_calls else 0 + + # p95: skip top 5% rows, take the next one + p95_offset = max(0, tool_calls * 5 // 100) + p95_row = conn.execute( + "SELECT estimated_tokens FROM controller_events " + "WHERE estimated_tokens IS NOT NULL " + "ORDER BY estimated_tokens DESC " + "LIMIT 1 OFFSET ?", + (p95_offset,), + ).fetchone() + p95_tokens = p95_row[0] if p95_row and isinstance(p95_row[0], int) else max_tokens + + # Breakdown by event_type + type_rows = conn.execute( + "SELECT event_type, COUNT(*), SUM(estimated_tokens), MAX(estimated_tokens) " + "FROM controller_events WHERE estimated_tokens IS NOT NULL " + "GROUP BY event_type ORDER BY SUM(estimated_tokens) DESC" + ).fetchall() + by_type = tuple( + TypeTokenProfile( + event_type=_str_or_empty(r[0]), + call_count=r[1] if isinstance(r[1], int) else 0, + total_tokens=r[2] if isinstance(r[2], int) else 0, + max_tokens=r[3] if isinstance(r[3], int) else 0, + ) + for r in type_rows + ) + + # Top 5 most expensive payloads + top_rows = conn.execute( + "SELECT event_type, event_id, estimated_tokens, created_at_utc " + "FROM controller_events WHERE estimated_tokens IS NOT NULL " + "ORDER BY estimated_tokens DESC LIMIT 5" + ).fetchall() + top_payloads = tuple( + TopPayload( + event_type=_str_or_empty(r[0]), + event_id=_str_or_empty(r[1]), + estimated_tokens=r[2] if isinstance(r[2], int) else 0, + created_at_utc=_str_or_empty(r[3]), + ) + for r in top_rows + ) + + # Encoding (single value for the session) + enc_row = conn.execute( + "SELECT token_encoding FROM controller_events " + "WHERE token_encoding IS NOT NULL LIMIT 1" + ).fetchone() + encoding = _str_or_none(enc_row[0]) if enc_row else "unknown" + + return PayloadFootprint( + encoding=encoding or "unknown", + tool_calls=tool_calls, + total_tokens=total_tokens, + avg_tokens=avg_tokens, + p95_tokens=p95_tokens, + max_tokens=max_tokens, + by_type=by_type, + top_payloads=top_payloads, + ) + + def _count(conn: sqlite3.Connection, sql: str) -> int: value = conn.execute(sql).fetchone() if value is None: @@ -218,4 +358,12 @@ def _int_or_none(value: object) -> int | None: return value if isinstance(value, int) and not isinstance(value, bool) else None -__all__ = ["AuditRecord", "AuditSummary", "read_audit_summary"] +__all__ = [ + "AuditRecord", + "AuditSummary", + "PayloadFootprint", + "TopPayload", + "TypeTokenProfile", + "payload_footprint_to_dict", + "read_audit_summary", +] diff --git a/codeclone/config/spec.py b/codeclone/config/spec.py index 96baf02e..eecd66c6 100644 --- a/codeclone/config/spec.py +++ b/codeclone/config/spec.py @@ -279,6 +279,14 @@ def _option( default=False, help_text=ui.HELP_AUDIT, ), + _option( + dest="audit_json", + group="Analysis", + cli_kind="store_true", + flags=("--audit-json",), + default=False, + help_text=ui.HELP_AUDIT_JSON, + ), _option( dest="audit_enabled", group=None, diff --git a/codeclone/surfaces/cli/audit.py b/codeclone/surfaces/cli/audit.py index 68ab39c6..7456b385 100644 --- a/codeclone/surfaces/cli/audit.py +++ b/codeclone/surfaces/cli/audit.py @@ -10,7 +10,12 @@ from pathlib import Path from ... import ui_messages as ui -from ...audit.reader import AuditSummary, read_audit_summary +from ...audit.reader import ( + AuditSummary, + PayloadFootprint, + payload_footprint_to_dict, + read_audit_summary, +) from ...audit.validation import AuditConfigError, AuditReadError, resolve_audit_path from ...contracts import ExitCode from .types import PrinterLike @@ -23,6 +28,7 @@ def render_audit( audit_enabled: bool, audit_path: str, quiet: bool, + json_summary: bool = False, ) -> int: if not audit_enabled: console.print(ui.fmt_contract_error("audit is not enabled.")) @@ -36,6 +42,8 @@ def render_audit( except Exception as exc: console.print(ui.fmt_internal_error(exc)) return int(ExitCode.INTERNAL_ERROR) + if json_summary: + return _render_json_summary(console=console, summary=summary) if quiet: return _render_quiet(console=console, summary=summary) return _render_verbose(console=console, summary=summary) @@ -54,6 +62,22 @@ def _render_quiet(*, console: PrinterLike, summary: AuditSummary) -> int: return int(ExitCode.SUCCESS) +def _render_json_summary(*, console: PrinterLike, summary: AuditSummary) -> int: + import json + + fp = summary.payload_footprint + data = { + "mcp_payload_footprint": payload_footprint_to_dict(fp) if fp else None, + "total_events": summary.total_events, + "intents": summary.intent_events, + "contracts": summary.contract_events, + "receipts": summary.receipt_events, + "violations": summary.violation_events, + } + console.print(json.dumps(data, indent=2), markup=False) + return int(ExitCode.SUCCESS) + + def _render_verbose(*, console: PrinterLike, summary: AuditSummary) -> int: if _supports_rich(console): return _render_verbose_rich(console=console, summary=summary) @@ -122,12 +146,11 @@ def _render_verbose_rich(*, console: PrinterLike, summary: AuditSummary) -> int: style="red" if summary.violation_events else "green", ), ) - if summary.total_estimated_tokens is not None and summary.token_event_count > 0: - enc_label = summary.token_encoding or "unknown" + fp = summary.payload_footprint + if fp is not None: meta.add_row( - "MCP token footprint", - f"~{summary.total_estimated_tokens:,} tokens " - f"({enc_label}, {summary.token_event_count} tool calls)", + "MCP payload footprint", + f"~{fp.total_tokens:,} tokens ({fp.encoding}, {fp.tool_calls} tool calls)", ) console.print(Panel(meta, border_style="cyan")) @@ -153,19 +176,112 @@ def _render_verbose_rich(*, console: PrinterLike, summary: AuditSummary) -> int: ) console.print(table) - if summary.total_estimated_tokens is not None and summary.token_event_count > 0: - enc_label = summary.token_encoding or "unknown" + if fp is not None: + _render_payload_analytics(console=console, fp=fp) + + return int(ExitCode.SUCCESS) + + +# Payload budget thresholds (tokens) +_SINGLE_PAYLOAD_OK = 500 +_SINGLE_PAYLOAD_WATCH = 1500 +_WORKFLOW_OK = 5000 +_WORKFLOW_WATCH = 15000 + + +def _render_payload_analytics( + *, + console: PrinterLike, + fp: PayloadFootprint, +) -> None: + from rich import box + from rich.panel import Panel + from rich.table import Table + from rich.text import Text + + # ── Aggregate stats ── + stats = Table.grid(padding=(0, 2)) + stats.add_column(style="dim", no_wrap=True) + stats.add_column(justify="right", no_wrap=True) + stats.add_row("Total tokens", f"~{fp.total_tokens:,}") + stats.add_row("Tool calls", str(fp.tool_calls)) + stats.add_row("Avg tokens/call", str(fp.avg_tokens)) + stats.add_row("p95 tokens", str(fp.p95_tokens)) + stats.add_row("Max tokens", str(fp.max_tokens)) + stats.add_row("Encoding", fp.encoding) + + # ── Breakdown by type ── + breakdown = Table(box=box.SIMPLE, show_edge=False) + breakdown.add_column("Type", no_wrap=True) + breakdown.add_column("Calls", justify="right", no_wrap=True) + breakdown.add_column("Total", justify="right", no_wrap=True) + breakdown.add_column("Max", justify="right", no_wrap=True) + for tp in fp.by_type: + breakdown.add_row( + _short_type(tp.event_type), + str(tp.call_count), + f"{tp.total_tokens:,}", + str(tp.max_tokens), + ) + + # ── Top payloads ── + top = Table(box=box.SIMPLE, show_edge=False) + top.add_column("#", justify="right", no_wrap=True, style="dim") + top.add_column("Type", no_wrap=True) + top.add_column("Tokens", justify="right", no_wrap=True) + top.add_column("Time", no_wrap=True) + for i, payload in enumerate(fp.top_payloads, 1): + style = ( + "bold red" + if payload.estimated_tokens > _SINGLE_PAYLOAD_WATCH + else "yellow" + if payload.estimated_tokens > _SINGLE_PAYLOAD_OK + else "" + ) + top.add_row( + str(i), + _short_type(payload.event_type), + Text(f"{payload.estimated_tokens:,}", style=style), + _short_time(payload.created_at_utc), + ) + + # ── Budget warnings ── + warnings: list[str] = [] + if fp.total_tokens > _WORKFLOW_WATCH: + warnings.append( + f"Workflow total {fp.total_tokens:,} tokens exceeds " + f"{_WORKFLOW_WATCH:,} threshold (heavy)" + ) + elif fp.total_tokens > _WORKFLOW_OK: + warnings.append( + f"Workflow total {fp.total_tokens:,} tokens exceeds " + f"{_WORKFLOW_OK:,} threshold (watch)" + ) + warnings.extend( + f"{_short_type(payload.event_type)} payload " + f"{payload.estimated_tokens:,} tokens (heavy)" + for payload in fp.top_payloads + if payload.estimated_tokens > _SINGLE_PAYLOAD_WATCH + ) + + # ── Render ── + console.print() + console.print(Panel(stats, title="MCP Payload Footprint", border_style="cyan")) + console.print(Panel(breakdown, title="Tokens by Type", border_style="dim")) + if fp.top_payloads: + console.print(Panel(top, title="Top Payloads", border_style="dim")) + if warnings: + warning_text = Text() + for w in warnings: + warning_text.append(f" ⚠ {w}\n", style="yellow") console.print( - Text( - f"Session MCP token footprint: " - f"~{summary.total_estimated_tokens:,} tokens " - f"({enc_label}, {summary.token_event_count} tool calls)", - style="dim", + Panel( + warning_text, + title="Payload Budget Warnings", + border_style="yellow", ) ) - return int(ExitCode.SUCCESS) - def _supports_rich(console: PrinterLike) -> bool: return console.__class__.__module__.startswith("rich.") diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index 5a16b987..2ce18424 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -174,6 +174,7 @@ def _controller_query_mode(args: object) -> bool: or bool_attr(args, "patch_verify") or bool_attr(args, "session_stats") or bool_attr(args, "audit") + or bool_attr(args, "audit_json") ) @@ -303,7 +304,8 @@ def _run_pre_analysis_controller_query( root_path=root_path, quiet=args.quiet, ) - if bool_attr(args, "audit"): + audit_json = bool_attr(args, "audit_json") + if bool_attr(args, "audit") or audit_json: from .audit import render_audit audit_console = require_status_console( @@ -318,6 +320,7 @@ def _run_pre_analysis_controller_query( audit_enabled=bool(getattr(args, "audit_enabled", False)), audit_path=str(getattr(args, "audit_path", "")), quiet=args.quiet, + json_summary=audit_json, ) return None diff --git a/codeclone/ui_messages/__init__.py b/codeclone/ui_messages/__init__.py index 0293269d..ba85796a 100644 --- a/codeclone/ui_messages/__init__.py +++ b/codeclone/ui_messages/__init__.py @@ -88,6 +88,10 @@ "Show local Controller audit trail from the configured audit database.\n" "Read-only, does not run analysis." ) +HELP_AUDIT_JSON = ( + "Output audit payload footprint as JSON.\n" + "Implies --audit. Useful for cross-repository comparison." +) HELP_CACHE_PATH = ( "Path to the cache file.\n" "If FILE is omitted, uses /.cache/codeclone/cache.json." diff --git a/tests/fixtures/contract_snapshots/cli_help.txt b/tests/fixtures/contract_snapshots/cli_help.txt index 1019f3f8..3d19615a 100644 --- a/tests/fixtures/contract_snapshots/cli_help.txt +++ b/tests/fixtures/contract_snapshots/cli_help.txt @@ -3,7 +3,7 @@ usage: codeclone [--min-loc MIN_LOC] [--min-stmt MIN_STMT] [--diff-against GIT_REF] [--paths-from-git-diff GIT_REF] [--blast-radius FILE [FILE ...]] [--patch-verify] [--strictness LEVEL] [--session-stats] [--audit] - [--cache-path [FILE]] [--cache-dir [FILE]] + [--audit-json] [--cache-path [FILE]] [--cache-dir [FILE]] [--max-cache-size-mb MB] [--baseline [FILE]] [--max-baseline-size-mb MB] [--update-baseline | --no-update-baseline] @@ -70,6 +70,8 @@ Analysis: Read-only, does not run analysis. --audit Show local Controller audit trail from the configured audit database. Read-only, does not run analysis. + --audit-json Output audit payload footprint as JSON. + Implies --audit. Useful for cross-repository comparison. --cache-path [FILE] Path to the cache file. If FILE is omitted, uses /.cache/codeclone/cache.json. --cache-dir [FILE] Legacy alias for --cache-path. From 1a26268069c1d5675226a77152a5ca82f4117432 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 19:18:06 +0500 Subject: [PATCH 035/318] test(audit): restore token budget coverage --- tests/test_audit_writer.py | 397 ++++++++++++++ tests/test_cli_audit.py | 907 +++++++++++++++++++++++++++++++- tests/test_cli_session_stats.py | 235 +++++++++ 3 files changed, 1526 insertions(+), 13 deletions(-) diff --git a/tests/test_audit_writer.py b/tests/test_audit_writer.py index f7767ca0..91c81359 100644 --- a/tests/test_audit_writer.py +++ b/tests/test_audit_writer.py @@ -277,3 +277,400 @@ def test_event_validation_rejects_unknown_type() -> None: with pytest.raises(AuditValidationError, match="unknown event_type"): validate_event_row(row) + + +def test_close_is_idempotent(tmp_path: Path) -> None: + """Calling close() twice does not raise (line 93).""" + db_path = tmp_path / "audit.sqlite3" + writer = SqliteAuditWriter(db_path=db_path, payloads="compact", retention_days=30) + writer.emit(_event(tmp_path)) + writer.close() + writer.close() # second close is a no-op + + +def test_emit_on_closed_writer_is_silent(tmp_path: Path) -> None: + """Emit after close does not raise (lines 104-105).""" + db_path = tmp_path / "audit.sqlite3" + writer = SqliteAuditWriter(db_path=db_path, payloads="compact", retention_days=30) + writer.close() + writer.emit(_event(tmp_path)) # should not raise + + summary = read_audit_summary(db_path=db_path) + assert summary.total_events == 0 + + +def test_gc_triggers_after_interval(tmp_path: Path) -> None: + """Retention GC fires at the gc_interval boundary (lines 109-111).""" + db_path = tmp_path / "audit.sqlite3" + writer = SqliteAuditWriter( + db_path=db_path, + payloads="compact", + retention_days=30, + ) + # Lower the interval so GC triggers after 2 emits + writer._gc_interval = 2 + try: + writer.emit(_event(tmp_path)) + writer.emit(_event(tmp_path)) # triggers gc at counter==2 + writer.emit(_event(tmp_path)) # after gc reset + finally: + writer.close() + + summary = read_audit_summary(db_path=db_path) + assert summary.total_events == 3 + + +def test_token_estimation_exception_returns_none(tmp_path: Path) -> None: + """_estimate_payload_tokens returns None on exception (lines 160-161).""" + from codeclone.audit.writer import _estimate_payload_tokens + + # Valid payload should succeed + result = _estimate_payload_tokens({"key": "value"}) + assert result is not None + + # None payload returns None + assert _estimate_payload_tokens(None) is None + + +def test_payload_json_none_payload_compact_mode(tmp_path: Path) -> None: + """_payload_json returns '{}' for None payload in compact mode (line 176).""" + from codeclone.audit.writer import _payload_json + + event = AuditEvent( + event_type=EVENT_INTENT_DECLARED, + severity="info", + repo_root_digest="digest", + agent_pid=1, + agent_label="agent", + payload=None, + ) + assert _payload_json(event=event, payloads="compact") == "{}" + + +def test_payload_json_serialize_error_returns_empty(tmp_path: Path) -> None: + """_payload_json returns '{}' when JSON serialization fails (lines 185-186).""" + from unittest.mock import patch as mock_patch + + from codeclone.audit.writer import _payload_json + + event = AuditEvent( + event_type=EVENT_INTENT_DECLARED, + severity="info", + repo_root_digest="digest", + agent_pid=1, + agent_label="agent", + payload={"key": "value"}, + ) + with mock_patch("codeclone.audit.writer.json.dumps", side_effect=TypeError("boom")): + assert _payload_json(event=event, payloads="full") == "{}" + + +# ── events.py compact_payload_for_event coverage ── + + +def test_compact_payload_intent_checked() -> None: + """Exercise _compact_check_payload (line 106).""" + from codeclone.audit.events import EVENT_INTENT_CHECKED, compact_payload_for_event + + result = compact_payload_for_event( + event_type=EVENT_INTENT_CHECKED, + payload={ + "status": "clean", + "unexpected_files": ["a.py"], + "forbidden_touched": [], + }, + ) + assert result["status"] == "clean" + assert result["unexpected_files"] == 1 + + +def test_compact_payload_intent_cleared() -> None: + """Exercise intent cleared branch (lines 107-108).""" + from codeclone.audit.events import EVENT_INTENT_CLEARED, compact_payload_for_event + + result = compact_payload_for_event( + event_type=EVENT_INTENT_CLEARED, + payload={"cleared": 1, "workspace_cleared": True}, + ) + assert result["cleared"] == 1 + assert result["workspace_cleared"] is True + + +def test_compact_payload_workspace_conflict() -> None: + """Exercise workspace conflict branch (lines 112-113).""" + from codeclone.audit.events import ( + EVENT_WORKSPACE_CONFLICT, + compact_payload_for_event, + ) + + result = compact_payload_for_event( + event_type=EVENT_WORKSPACE_CONFLICT, + payload={"concurrent_intents": [{"id": "1"}, {"id": "2"}]}, + ) + assert result["concurrent_intents"] == 2 + + +def test_compact_payload_workspace_gc() -> None: + """Exercise workspace gc branch (lines 119-120).""" + from codeclone.audit.events import EVENT_WORKSPACE_GC, compact_payload_for_event + + result = compact_payload_for_event( + event_type=EVENT_WORKSPACE_GC, + payload={"removed": 3, "stale_count": 1, "orphaned_count": 2}, + ) + assert result["removed"] == 3 + assert result["stale_count"] == 1 + + +def test_compact_payload_claim_completed() -> None: + """Exercise claim validation completed branch (lines 136-137).""" + from codeclone.audit.events import EVENT_CLAIM_COMPLETED, compact_payload_for_event + + result = compact_payload_for_event( + event_type=EVENT_CLAIM_COMPLETED, + payload={ + "valid": True, + "violations": [], + "warnings": ["minor issue"], + }, + ) + assert result["valid"] is True + assert result["violations"] == 0 + assert result["warnings"] == 1 + + +def test_compact_payload_receipt_created() -> None: + """Exercise receipt created branch (lines 142-144).""" + from codeclone.audit.events import EVENT_RECEIPT_CREATED, compact_payload_for_event + + result = compact_payload_for_event( + event_type=EVENT_RECEIPT_CREATED, + payload={ + "format": "v2", + "receipt": { + "verdict": "approved", + "human_decision_points": ["a", "b"], + }, + }, + ) + assert result["format"] == "v2" + assert result["verdict"] == "approved" + assert result["human_decisions"] == 2 + + +def test_compact_payload_budget() -> None: + """Exercise budget payload branch (line 168).""" + from codeclone.audit.events import EVENT_PATCH_BUDGET, compact_payload_for_event + + result = compact_payload_for_event( + event_type=EVENT_PATCH_BUDGET, + payload={ + "strictness": "ci", + "blast_radius_summary": { + "radius_level": "low", + "do_not_touch_count": 2, + "review_context_count": 5, + }, + "gate_preview": {"would_fail": False}, + }, + ) + assert result["strictness"] == "ci" + assert result["radius_level"] == "low" + + +def test_sequence_helper_rejects_string() -> None: + """_sequence treats strings as empty (line 229).""" + from codeclone.audit.events import _sequence + + assert _sequence("hello") == () + assert _sequence([1, 2]) == [1, 2] + assert _sequence(None) == () + + +def test_sequence_field_count() -> None: + """Exercise _sequence_field_count (line 220).""" + from codeclone.audit.events import _sequence_field_count + + assert _sequence_field_count({"items": [1, 2, 3]}, "items") == 3 + assert _sequence_field_count({"items": "text"}, "items") == 0 + assert _sequence_field_count({}, "missing") == 0 + + +# ── validation.py edge cases ── + + +def test_resolve_audit_path_rejects_non_string(tmp_path: Path) -> None: + """resolve_audit_path raises for non-string value (line 90).""" + from codeclone.audit.validation import AuditConfigError, resolve_audit_path + + with pytest.raises(AuditConfigError, match="must be a string"): + resolve_audit_path(root_path=tmp_path, value=123) + + +def test_resolve_audit_path_rejects_empty(tmp_path: Path) -> None: + """resolve_audit_path raises for empty string (line 93).""" + from codeclone.audit.validation import AuditConfigError, resolve_audit_path + + with pytest.raises(AuditConfigError, match="must not be empty"): + resolve_audit_path(root_path=tmp_path, value=" ") + + +def test_validate_retention_days_rejects_non_int() -> None: + """validate_retention_days raises for non-integer (line 117).""" + from codeclone.audit.validation import AuditConfigError, validate_retention_days + + with pytest.raises(AuditConfigError, match="must be an integer"): + validate_retention_days("30") + + +def test_validate_event_row_rejects_invalid_severity() -> None: + """validate_event_row raises for invalid severity (line 133).""" + row = EventRow( + event_id="evt_1", + event_type="intent.declared", + severity="debug", # type: ignore[arg-type] + created_at_utc="2026-05-25T00:00:00Z", + repo_root_digest="a" * 16, + run_id=None, + intent_id=None, + report_digest=None, + agent_label="agent", + agent_pid=1, + status=None, + payload_json="{}", + ) + with pytest.raises(AuditValidationError, match="invalid severity"): + validate_event_row(row) + + +def test_validate_event_row_rejects_non_int_pid() -> None: + """validate_event_row raises for non-integer pid (line 141).""" + row = EventRow( + event_id="evt_1", + event_type="intent.declared", + severity="info", + created_at_utc="2026-05-25T00:00:00Z", + repo_root_digest="a" * 16, + run_id=None, + intent_id=None, + report_digest=None, + agent_label="agent", + agent_pid=True, + status=None, + payload_json="{}", + ) + with pytest.raises(AuditValidationError, match="agent_pid must be an integer"): + validate_event_row(row) + + +def test_validate_event_row_rejects_non_positive_pid() -> None: + """validate_event_row raises for non-positive pid (line 143).""" + row = EventRow( + event_id="evt_1", + event_type="intent.declared", + severity="info", + created_at_utc="2026-05-25T00:00:00Z", + repo_root_digest="a" * 16, + run_id=None, + intent_id=None, + report_digest=None, + agent_label="agent", + agent_pid=0, + status=None, + payload_json="{}", + ) + with pytest.raises(AuditValidationError, match="agent_pid must be positive"): + validate_event_row(row) + + +def test_validate_text_rejects_non_string() -> None: + """_validate_text raises for non-string (line 156).""" + from codeclone.audit.validation import AuditValidationError, _validate_text + + with pytest.raises(AuditValidationError, match="must be a string"): + _validate_text(123, "field", max_len=50) # type: ignore[arg-type] + + +def test_validate_text_rejects_empty() -> None: + """_validate_text raises for empty value (line 158).""" + from codeclone.audit.validation import AuditValidationError, _validate_text + + with pytest.raises(AuditValidationError, match="must not be empty"): + _validate_text("", "event_id", max_len=50) + + +def test_validate_text_rejects_too_long() -> None: + """_validate_text raises for too-long value (line 160).""" + from codeclone.audit.validation import AuditValidationError, _validate_text + + with pytest.raises(AuditValidationError, match="too long"): + _validate_text("x" * 200, "field", max_len=50) + + +def test_validate_text_rejects_nul_byte() -> None: + """_validate_text raises for NUL byte (line 162).""" + from codeclone.audit.validation import AuditValidationError, _validate_text + + with pytest.raises(AuditValidationError, match="contains NUL byte"): + _validate_text("abc\x00def", "field", max_len=50) + + +# ── writer.py: _estimate_payload_tokens exception ── + + +def test_estimate_payload_tokens_exception_returns_none() -> None: + """_estimate_payload_tokens returns None on estimation failure.""" + from unittest.mock import patch + + from codeclone.audit.writer import _estimate_payload_tokens + + with patch( + "codeclone.budget.estimator.estimate_payload", + side_effect=RuntimeError("boom"), + ): + result = _estimate_payload_tokens({"key": "value"}) + assert result is None + + +def test_payload_json_none_payload_full_mode() -> None: + """_payload_json returns '{}' when full-mode payload is None.""" + from codeclone.audit.writer import _payload_json + + none_payload_event = AuditEvent( + event_type="intent.declared", + severity="info", + repo_root_digest="a" * 16, + agent_pid=123, + agent_label="test-agent", + run_id="run123", + intent_id="intent-run123-001", + report_digest="b" * 64, + status="active", + payload=None, + ) + result = _payload_json(event=none_payload_event, payloads="full") + assert result == "{}" + + +# ── schema.py: open_audit_db exception path ── + + +def test_open_audit_db_exception_closes_connection(tmp_path: Path) -> None: + """open_audit_db closes connection on PRAGMA/schema failure (schema.py:66-68).""" + from unittest.mock import MagicMock, patch + + from codeclone.audit.schema import open_audit_db + + db_path = tmp_path / "subdir" / "audit.sqlite3" + + # Mock connect to return a connection that fails on execute + mock_conn = MagicMock() + mock_conn.execute.side_effect = sqlite3.OperationalError("disk error") + + with ( + patch("sqlite3.connect", return_value=mock_conn), + pytest.raises(sqlite3.OperationalError, match="disk error"), + ): + open_audit_db(db_path) + + mock_conn.close.assert_called_once() diff --git a/tests/test_cli_audit.py b/tests/test_cli_audit.py index 8e4852dd..9243eee4 100644 --- a/tests/test_cli_audit.py +++ b/tests/test_cli_audit.py @@ -1,17 +1,37 @@ from __future__ import annotations import io +import json +import sqlite3 +from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import cast +from typing import TYPE_CHECKING, cast import pytest from rich.console import Console -from codeclone.audit.events import EVENT_PATCH_VERIFIED, AuditEvent, repo_root_digest +from codeclone.audit.events import ( + EVENT_BLAST_RADIUS, + EVENT_INTENT_DECLARED, + EVENT_PATCH_BUDGET, + EVENT_PATCH_VERIFIED, + AuditEvent, + repo_root_digest, +) + +if TYPE_CHECKING: + from codeclone.audit.events import AuditSeverity +from codeclone.audit.reader import ( + PayloadFootprint, + TopPayload, + TypeTokenProfile, + payload_footprint_to_dict, + read_audit_summary, +) from codeclone.audit.writer import SqliteAuditWriter from codeclone.contracts import ExitCode from codeclone.surfaces.cli.audit import render_audit -from codeclone.surfaces.cli.types import PrinterLike +from codeclone.surfaces.cli.types import CLIArgsLike, PrinterLike class _RecordingPrinter: @@ -26,7 +46,16 @@ def text(self) -> str: return "\n".join(self.lines) -def _write_audit_event(root: Path) -> None: +def _write_audit_event( + root: Path, + *, + event_type: str = EVENT_PATCH_VERIFIED, + severity: AuditSeverity = "info", + agent_label: str = "test-agent", + run_id: str = "abcdef123456", + intent_id: str = "intent-abcdef12-001", + status: str = "accepted", +) -> None: writer = SqliteAuditWriter( db_path=root / ".cache" / "codeclone" / "audit.sqlite3", payloads="compact", @@ -35,17 +64,17 @@ def _write_audit_event(root: Path) -> None: try: writer.emit( AuditEvent( - event_type=EVENT_PATCH_VERIFIED, - severity="info", + event_type=event_type, + severity=severity, repo_root_digest=repo_root_digest(root), agent_pid=123, - agent_label="test-agent", - run_id="abcdef123456", - intent_id="intent-abcdef12-001", + agent_label=agent_label, + run_id=run_id, + intent_id=intent_id, report_digest="a" * 64, - status="accepted", + status=status, payload={ - "status": "accepted", + "status": status, "structural_delta": { "regressions": [], "improvements": [], @@ -60,6 +89,95 @@ def _write_audit_event(root: Path) -> None: writer.close() +def _write_multiple_events(root: Path) -> None: + """Write events of different types to exercise by-type breakdown.""" + events: list[tuple[str, AuditSeverity, str]] = [ + (EVENT_PATCH_VERIFIED, "info", "accepted"), + (EVENT_PATCH_BUDGET, "info", "ok"), + (EVENT_INTENT_DECLARED, "info", "active"), + (EVENT_BLAST_RADIUS, "info", "computed"), + ] + writer = SqliteAuditWriter( + db_path=root / ".cache" / "codeclone" / "audit.sqlite3", + payloads="compact", + retention_days=30, + ) + try: + for event_type, severity, status in events: + writer.emit( + AuditEvent( + event_type=event_type, + severity=severity, + repo_root_digest=repo_root_digest(root), + agent_pid=123, + agent_label="claude-code/opus-4", + run_id="abcdef123456", + intent_id="intent-abcdef12-001", + report_digest="a" * 64, + status=status, + payload={ + "status": status, + "data": "x" * 200, # ensure non-trivial token count + }, + ) + ) + finally: + writer.close() + + +def _payload_footprint( + *, + event_type: str, + tool_calls: int, + total_tokens: int, + avg_tokens: int, + p95_tokens: int, + max_tokens: int, + top_payload_tokens: int | None, +) -> PayloadFootprint: + top_payloads = ( + () + if top_payload_tokens is None + else ( + TopPayload( + event_type=event_type, + event_id="evt_test_1", + estimated_tokens=top_payload_tokens, + created_at_utc="2026-05-26T10:00:00Z", + ), + ) + ) + return PayloadFootprint( + encoding="o200k_base", + tool_calls=tool_calls, + total_tokens=total_tokens, + avg_tokens=avg_tokens, + p95_tokens=p95_tokens, + max_tokens=max_tokens, + by_type=( + TypeTokenProfile( + event_type=event_type, + call_count=tool_calls, + total_tokens=total_tokens, + max_tokens=max_tokens, + ), + ), + top_payloads=top_payloads, + ) + + +def _render_payload_analytics_text(fp: PayloadFootprint) -> str: + from codeclone.surfaces.cli.audit import _render_payload_analytics + + output = io.StringIO() + console = Console(file=output, force_terminal=True, color_system=None, width=160) + _render_payload_analytics(console=cast(PrinterLike, console), fp=fp) + return output.getvalue() + + +# ── Contract error paths ── + + @pytest.mark.parametrize( ("audit_enabled", "expected_message"), [ @@ -87,6 +205,34 @@ def test_audit_contract_errors( assert expected_message in printer.text +def test_audit_internal_error_on_unexpected_exception( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Exercise the except Exception branch (lines 42-44).""" + from codeclone.surfaces.cli import audit as audit_mod + + def _boom(*, root_path: Path, value: str) -> Path: + msg = "simulated crash" + raise RuntimeError(msg) + + monkeypatch.setattr(audit_mod, "resolve_audit_path", _boom) + printer = _RecordingPrinter() + + exit_code = render_audit( + console=printer, + root_path=tmp_path, + audit_enabled=True, + audit_path="whatever", + quiet=True, + ) + + assert exit_code == int(ExitCode.INTERNAL_ERROR) + + +# ── Quiet mode ── + + def test_audit_quiet_with_events(tmp_path: Path) -> None: _write_audit_event(tmp_path) printer = _RecordingPrinter() @@ -104,6 +250,9 @@ def test_audit_quiet_with_events(tmp_path: Path) -> None: assert "contracts=1" in printer.text +# ── Plain (non-Rich) verbose ── + + def test_audit_verbose_renders_plain_table(tmp_path: Path) -> None: _write_audit_event(tmp_path) printer = _RecordingPrinter() @@ -122,10 +271,13 @@ def test_audit_verbose_renders_plain_table(tmp_path: Path) -> None: assert "accepted" in printer.text +# ── Rich verbose ── + + def test_audit_verbose_uses_rich_table(tmp_path: Path) -> None: _write_audit_event(tmp_path) output = io.StringIO() - console = Console(file=output, force_terminal=True, color_system=None, width=160) + console = Console(file=output, force_terminal=True, color_system=None, width=240) exit_code = render_audit( console=cast(PrinterLike, console), @@ -140,4 +292,733 @@ def test_audit_verbose_uses_rich_table(tmp_path: Path) -> None: assert "Controller Audit Trail" in text assert "Workspace" not in text assert "verify" in text - assert "accepted" in text + assert "accept" in text + + +def test_audit_rich_with_payload_footprint(tmp_path: Path) -> None: + """Rich path with multiple events exercises payload analytics panel.""" + _write_multiple_events(tmp_path) + output = io.StringIO() + console = Console(file=output, force_terminal=True, color_system=None, width=160) + + exit_code = render_audit( + console=cast(PrinterLike, console), + root_path=tmp_path, + audit_enabled=True, + audit_path=".cache/codeclone/audit.sqlite3", + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = output.getvalue() + assert "MCP Payload Footprint" in text + assert "Tokens by Type" in text + assert "Top Payloads" in text + + +@pytest.mark.parametrize( + ("total_tokens", "tool_calls", "avg_tokens", "p95_tokens", "max_tokens", "label"), + [ + (8000, 10, 800, 1200, 1400, "watch"), + (20000, 20, 1000, 1800, 2000, "heavy"), + ], +) +def test_audit_rich_payload_budget_warnings( + total_tokens: int, + tool_calls: int, + avg_tokens: int, + p95_tokens: int, + max_tokens: int, + label: str, +) -> None: + """Trigger workflow-level payload budget warnings.""" + fp = _payload_footprint( + event_type="patch_contract.verified", + tool_calls=tool_calls, + total_tokens=total_tokens, + avg_tokens=avg_tokens, + p95_tokens=p95_tokens, + max_tokens=max_tokens, + top_payload_tokens=max_tokens, + ) + text = _render_payload_analytics_text(fp) + assert "Payload Budget Warnings" in text + assert label in text + + +@pytest.mark.parametrize( + ( + "tool_calls", + "total_tokens", + "avg_tokens", + "p95_tokens", + "max_tokens", + "top_payload_tokens", + "shows_top_payloads", + ), + [ + (3, 900, 300, 400, 450, 450, True), + (1, 100, 100, 100, 100, None, False), + ], +) +def test_audit_rich_payload_under_budget_sections( + tool_calls: int, + total_tokens: int, + avg_tokens: int, + p95_tokens: int, + max_tokens: int, + top_payload_tokens: int | None, + *, + shows_top_payloads: bool, +) -> None: + """Under-budget payload analytics render optional sections correctly.""" + fp = _payload_footprint( + event_type="intent.declared", + tool_calls=tool_calls, + total_tokens=total_tokens, + avg_tokens=avg_tokens, + p95_tokens=p95_tokens, + max_tokens=max_tokens, + top_payload_tokens=top_payload_tokens, + ) + text = _render_payload_analytics_text(fp) + assert "Payload Budget Warnings" not in text + if shows_top_payloads: + assert "Top Payloads" in text + else: + assert "Top Payloads" not in text + assert "MCP Payload Footprint" in text + + +# ── JSON summary ── + + +def test_audit_json_summary_with_footprint(tmp_path: Path) -> None: + """Exercise _render_json_summary with payload footprint data.""" + _write_multiple_events(tmp_path) + printer = _RecordingPrinter() + + exit_code = render_audit( + console=printer, + root_path=tmp_path, + audit_enabled=True, + audit_path=".cache/codeclone/audit.sqlite3", + quiet=False, + json_summary=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + data = json.loads(printer.text) + assert "mcp_payload_footprint" in data + assert data["mcp_payload_footprint"] is not None + assert data["mcp_payload_footprint"]["tool_calls"] == 4 + assert "total_tokens" in data["mcp_payload_footprint"] + assert "by_type" in data["mcp_payload_footprint"] + assert "top_payloads" in data["mcp_payload_footprint"] + + +def test_audit_json_summary_without_footprint(tmp_path: Path) -> None: + """JSON summary with no token data yields null footprint.""" + _write_event_without_tokens(tmp_path) + printer = _RecordingPrinter() + + exit_code = render_audit( + console=printer, + root_path=tmp_path, + audit_enabled=True, + audit_path=".cache/codeclone/audit.sqlite3", + quiet=False, + json_summary=True, + ) + + assert exit_code == int(ExitCode.SUCCESS) + data = json.loads(printer.text) + assert data["mcp_payload_footprint"] is None + assert data["total_events"] == 1 + + +def _write_event_without_tokens(root: Path) -> None: + """Insert an event row directly with NULL token columns.""" + db_path = root / ".cache" / "codeclone" / "audit.sqlite3" + db_path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(db_path)) + try: + from codeclone.audit.schema import ensure_schema + + ensure_schema(conn) + conn.execute( + "INSERT INTO controller_events " + "(event_id, event_type, severity, created_at_utc, " + "repo_root_digest, agent_label, agent_pid, status, run_id, intent_id) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + "evt_no_tokens", + EVENT_PATCH_VERIFIED, + "info", + "2026-05-26T10:00:00Z", + "digest123", + "test-agent", + 123, + "accepted", + "run123", + "intent-test-001", + ), + ) + conn.commit() + finally: + conn.close() + + +# ── reader.py: payload_footprint_to_dict ── + + +def test_payload_footprint_to_dict_roundtrip() -> None: + fp = PayloadFootprint( + encoding="o200k_base", + tool_calls=5, + total_tokens=2500, + avg_tokens=500, + p95_tokens=800, + max_tokens=1000, + by_type=( + TypeTokenProfile( + event_type="intent.declared", + call_count=3, + total_tokens=1500, + max_tokens=700, + ), + TypeTokenProfile( + event_type="patch_contract.verified", + call_count=2, + total_tokens=1000, + max_tokens=1000, + ), + ), + top_payloads=( + TopPayload( + event_type="patch_contract.verified", + event_id="evt_1", + estimated_tokens=1000, + created_at_utc="2026-05-26T10:00:00Z", + ), + ), + ) + result = payload_footprint_to_dict(fp) + assert result["encoding"] == "o200k_base" + assert result["tool_calls"] == 5 + assert result["total_tokens"] == 2500 + assert result["p95_tokens"] == 800 + assert isinstance(result["by_type"], dict) + assert "intent.declared" in result["by_type"] + assert result["by_type"]["intent.declared"]["count"] == 3 + assert isinstance(result["top_payloads"], list) + assert len(result["top_payloads"]) == 1 + assert result["top_payloads"][0]["tokens"] == 1000 + # Verify JSON-serializable + json.dumps(result) + + +# ── reader.py: read_audit_summary with footprint ── + + +def test_read_audit_summary_includes_payload_footprint(tmp_path: Path) -> None: + _write_multiple_events(tmp_path) + db_path = tmp_path / ".cache" / "codeclone" / "audit.sqlite3" + summary = read_audit_summary(db_path=db_path, limit=50) + + assert summary.payload_footprint is not None + fp = summary.payload_footprint + assert fp.tool_calls == 4 + assert fp.total_tokens > 0 + assert fp.avg_tokens > 0 + assert fp.max_tokens >= fp.avg_tokens + assert fp.p95_tokens > 0 + assert len(fp.by_type) > 0 + assert len(fp.top_payloads) > 0 + assert fp.encoding != "unknown" + + +def test_read_audit_summary_no_tokens_yields_no_footprint(tmp_path: Path) -> None: + _write_event_without_tokens(tmp_path) + db_path = tmp_path / ".cache" / "codeclone" / "audit.sqlite3" + summary = read_audit_summary(db_path=db_path, limit=50) + + # Event has NULL estimated_tokens → footprint should be None + assert summary.payload_footprint is None + assert summary.total_events == 1 + + +# ── Helper functions ── + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("intent-abcdef12-001", "abcdef12-001"), + (None, "-"), + ("", "-"), + ("custom-id-123", "custom-id-123"), + ], +) +def test_short_intent(value: str | None, expected: str) -> None: + from codeclone.surfaces.cli.audit import _short_intent + + assert _short_intent(value) == expected + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("claude-code/opus-4", "cc/opus-4"), + (None, "-"), + ("", "-"), + ("test-agent", "test-agent"), + ], +) +def test_short_agent(value: str | None, expected: str) -> None: + from codeclone.surfaces.cli.audit import _short_agent + + assert _short_agent(value) == expected + + +@pytest.mark.parametrize( + ("delta", "suffix"), + [ + (timedelta(seconds=30), "s ago"), + (timedelta(minutes=15), "m ago"), + (timedelta(hours=5), "h ago"), + (timedelta(days=3), "d ago"), + ], +) +def test_relative_time_age_suffixes(delta: timedelta, suffix: str) -> None: + from codeclone.surfaces.cli.audit import _relative_time + + ts = (datetime.now(timezone.utc) - delta).isoformat() + assert _relative_time(ts).endswith(suffix) + + +@pytest.mark.parametrize("value", [None, ""]) +def test_relative_time_missing_values(value: str | None) -> None: + from codeclone.surfaces.cli.audit import _relative_time + + assert _relative_time(value) == "none" + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime.now(timezone.utc).isoformat(), "today"), + ("2025-01-15T10:30:00Z", "2025-01-15"), + ("not-a-date", "not-a-date"), + ("", "-"), + ], +) +def test_short_time(value: str, expected: str) -> None: + from codeclone.surfaces.cli.audit import _short_time + + assert expected in _short_time(value) + + +class TestFormatTokens: + def test_none_returns_dash(self) -> None: + from codeclone.surfaces.cli.audit import _format_tokens + + assert _format_tokens(None) == "—" + + def test_formats_with_commas(self) -> None: + from codeclone.surfaces.cli.audit import _format_tokens + + assert _format_tokens(12345) == "12,345" + + def test_zero(self) -> None: + from codeclone.surfaces.cli.audit import _format_tokens + + assert _format_tokens(0) == "0" + + +class TestFormatBytes: + def test_bytes_range(self) -> None: + from codeclone.surfaces.cli.audit import _format_bytes + + assert _format_bytes(500) == "500 B" + + def test_kib_range(self) -> None: + from codeclone.surfaces.cli.audit import _format_bytes + + result = _format_bytes(2048) + assert "KiB" in result + + def test_mib_range(self) -> None: + from codeclone.surfaces.cli.audit import _format_bytes + + result = _format_bytes(2 * 1024 * 1024) + assert "MiB" in result + + +class TestSeverityStyle: + def test_known_severities(self) -> None: + from codeclone.surfaces.cli.audit import _severity_style + + assert _severity_style("info") == "green" + assert _severity_style("warn") == "yellow" + assert _severity_style("error") == "bold red" + + def test_unknown_severity(self) -> None: + from codeclone.surfaces.cli.audit import _severity_style + + assert _severity_style("debug") == "white" + + +# ── workflow.py: _validate_controller_query_flags ── + + +class TestControllerQueryFlagValidation: + """Cover the validation branches in _validate_controller_query_flags.""" + + @staticmethod + def _validate(**attrs: object) -> None: + from argparse import Namespace + + import codeclone.surfaces.cli.workflow as wf + + wf.console = wf._make_plain_console() + defaults: dict[str, object] = { + "blast_radius": None, + "patch_verify": False, + "session_stats": False, + "audit": False, + "audit_json": False, + "strictness": "ci", + "update_baseline": False, + "update_metrics_baseline": False, + "changed_only": False, + "diff_against": None, + "paths_from_git_diff": None, + } + defaults.update(attrs) + args = Namespace(**defaults) + wf._validate_controller_query_flags(args=args) + + def test_invalid_strictness(self) -> None: + with pytest.raises(SystemExit): + self._validate(patch_verify=True, strictness="ultra") + + def test_strictness_without_patch_verify(self) -> None: + from argparse import Namespace + + import codeclone.surfaces.cli.workflow as wf + + wf.console = wf._make_plain_console() + args = Namespace( + blast_radius=None, + patch_verify=False, + session_stats=False, + audit=True, + audit_json=False, + strictness="strict", + update_baseline=False, + update_metrics_baseline=False, + changed_only=False, + diff_against=None, + paths_from_git_diff=None, + ) + with pytest.raises(SystemExit): + wf._validate_controller_query_flags(args=args, strictness_explicit=True) + + def test_session_stats_with_audit(self) -> None: + with pytest.raises(SystemExit): + self._validate(session_stats=True, audit=True) + + def test_session_stats_with_blast_radius(self) -> None: + with pytest.raises(SystemExit): + self._validate( + session_stats=True, + blast_radius=("pkg/a.py",), + ) + + def test_audit_with_blast_radius(self) -> None: + with pytest.raises(SystemExit): + self._validate(audit=True, blast_radius=("pkg/a.py",)) + + def test_audit_with_patch_verify(self) -> None: + with pytest.raises(SystemExit): + self._validate(audit=True, patch_verify=True) + + def test_update_baseline_in_controller_mode(self) -> None: + with pytest.raises(SystemExit): + self._validate(audit=True, update_baseline=True) + + def test_update_metrics_baseline_in_controller_mode(self) -> None: + with pytest.raises(SystemExit): + self._validate(audit=True, update_metrics_baseline=True) + + def test_changed_only_in_controller_mode(self) -> None: + with pytest.raises(SystemExit): + self._validate(audit=True, changed_only=True) + + def test_diff_against_in_controller_mode(self) -> None: + with pytest.raises(SystemExit): + self._validate(audit=True, diff_against="HEAD") + + def test_report_outputs_in_controller_mode(self) -> None: + from argparse import Namespace + + import codeclone.surfaces.cli.workflow as wf + + wf.console = wf._make_plain_console() + args = Namespace( + blast_radius=None, + patch_verify=False, + session_stats=False, + audit=True, + audit_json=False, + strictness="ci", + update_baseline=False, + update_metrics_baseline=False, + changed_only=False, + diff_against=None, + paths_from_git_diff=None, + ) + with pytest.raises(SystemExit): + wf._validate_controller_query_flags( + args=args, report_outputs_requested=True + ) + + def test_valid_audit_mode_passes(self) -> None: + # Should NOT raise + self._validate(audit=True) + + def test_valid_audit_json_mode_passes(self) -> None: + # Should NOT raise + self._validate(audit_json=True) + + def test_non_controller_mode_noop(self) -> None: + # No controller flags → should return without raising + self._validate() + + +# ── workflow.py: _run_pre_analysis_controller_query ── + + +class TestRunPreAnalysisControllerQuery: + """Cover _run_pre_analysis_controller_query branches (lines 294-317, 475).""" + + @staticmethod + def _make_args(**attrs: object) -> CLIArgsLike: + from argparse import Namespace + + defaults: dict[str, object] = { + "session_stats": False, + "audit": False, + "audit_json": False, + "no_color": True, + "quiet": True, + "audit_enabled": False, + "audit_path": ".cache/codeclone/audit.sqlite3", + } + defaults.update(attrs) + return cast(CLIArgsLike, Namespace(**defaults)) + + def test_session_stats_branch( + self, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Exercise session_stats branch (lines 294-302).""" + import codeclone.surfaces.cli.workflow as wf + + monkeypatch.setattr( + "codeclone.surfaces.cli.session_stats.render_session_stats", + lambda **kw: 0, + ) + args = self._make_args(session_stats=True) + result = wf._run_pre_analysis_controller_query(args=args, root_path=tmp_path) + assert result == 0 + + def test_audit_branch(self, tmp_path: Path) -> None: + """Exercise audit branch (lines 309-317).""" + import codeclone.surfaces.cli.workflow as wf + + args = self._make_args(audit=True, audit_enabled=False) + result = wf._run_pre_analysis_controller_query(args=args, root_path=tmp_path) + # audit_enabled=False → CONTRACT_ERROR + assert result == int(ExitCode.CONTRACT_ERROR) + + def test_audit_json_branch(self, tmp_path: Path) -> None: + """Exercise audit_json branch (lines 307-324).""" + import codeclone.surfaces.cli.workflow as wf + + args = self._make_args(audit_json=True, audit_enabled=False) + result = wf._run_pre_analysis_controller_query(args=args, root_path=tmp_path) + assert result == int(ExitCode.CONTRACT_ERROR) + + def test_no_controller_mode_returns_none(self, tmp_path: Path) -> None: + import codeclone.surfaces.cli.workflow as wf + + args = self._make_args() + result = wf._run_pre_analysis_controller_query(args=args, root_path=tmp_path) + assert result is None + + +class TestParseUtc: + def test_valid_iso(self) -> None: + from codeclone.surfaces.cli.audit import _parse_utc + + result = _parse_utc("2026-05-26T10:00:00Z") + assert result is not None + assert result.tzinfo is not None + + def test_empty(self) -> None: + from codeclone.surfaces.cli.audit import _parse_utc + + assert _parse_utc("") is None + + def test_invalid(self) -> None: + from codeclone.surfaces.cli.audit import _parse_utc + + assert _parse_utc("not-valid") is None + + +# ── reader.py: schema without token columns ── + + +def test_read_audit_summary_no_token_columns(tmp_path: Path) -> None: + """Exercise the no-token-columns branch (lines 135-143).""" + db_path = tmp_path / "audit.sqlite3" + conn = sqlite3.connect(str(db_path)) + try: + # Create schema without token columns + conn.execute(""" + CREATE TABLE IF NOT EXISTS controller_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + event_id TEXT NOT NULL, + event_type TEXT NOT NULL, + severity TEXT NOT NULL DEFAULT 'info', + created_at_utc TEXT NOT NULL, + repo_root_digest TEXT NOT NULL DEFAULT '', + run_id TEXT, + intent_id TEXT, + report_digest TEXT, + agent_label TEXT NOT NULL DEFAULT '', + agent_pid INTEGER NOT NULL DEFAULT 0, + status TEXT, + payload_json TEXT + ) + """) + conn.execute(""" + CREATE TABLE IF NOT EXISTS controller_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ) + """) + conn.execute( + "INSERT INTO controller_meta (key, value) VALUES ('schema_version', '1')" + ) + conn.execute( + "INSERT INTO controller_events " + "(event_id, event_type, severity, created_at_utc, " + "repo_root_digest, agent_label, agent_pid, status) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + ( + "evt_old_1", + EVENT_PATCH_VERIFIED, + "info", + "2026-05-26T10:00:00Z", + "digest", + "agent", + 1, + "accepted", + ), + ) + conn.commit() + finally: + conn.close() + + summary = read_audit_summary(db_path=db_path, limit=50) + assert summary.total_events == 1 + assert summary.payload_footprint is None + assert summary.total_estimated_tokens is None + assert summary.token_encoding is None + assert summary.token_event_count == 0 + + +# ── reader.py: private helper edge cases ── + + +def test_reader_db_size_oserror(tmp_path: Path) -> None: + """_db_size returns 0 on missing path.""" + from codeclone.audit.reader import _db_size + + assert _db_size(tmp_path / "nonexistent.db") == 0 + + +def test_reader_int_or_none_bool() -> None: + """_int_or_none rejects bool (isinstance(True, int) is True in Python).""" + from codeclone.audit.reader import _int_or_none + + assert _int_or_none(True) is None + assert _int_or_none(False) is None + assert _int_or_none(42) == 42 + assert _int_or_none("text") is None + + +def test_reader_connect_error(tmp_path: Path) -> None: + """read_audit_summary wraps sqlite3.Error during connect (reader.py:90-91).""" + from unittest.mock import patch + + from codeclone.audit.validation import AuditReadError + + db_path = tmp_path / "audit.sqlite3" + db_path.write_text("") # file exists but triggers error + + with ( + patch("sqlite3.connect", side_effect=sqlite3.Error("disk I/O error")), + pytest.raises(AuditReadError, match="cannot open audit database"), + ): + read_audit_summary(db_path=db_path, limit=50) + + +def test_reader_count_none_result() -> None: + """_count returns 0 when fetchone yields None (reader.py:320).""" + from codeclone.audit.reader import _count + + conn = sqlite3.connect(":memory:") + # Empty table → COUNT(*) always returns a value, so we use a mock + from unittest.mock import MagicMock + + mock_conn = MagicMock() + mock_result = MagicMock() + mock_result.fetchone.return_value = None + mock_conn.execute.return_value = mock_result + assert _count(mock_conn, "SELECT COUNT(*) FROM t") == 0 + conn.close() + + +def test_reader_text_scalar_none_row() -> None: + """_text_scalar returns None when row is None (reader.py:328).""" + from unittest.mock import MagicMock + + from codeclone.audit.reader import _text_scalar + + mock_conn = MagicMock() + mock_result = MagicMock() + mock_result.fetchone.return_value = None + mock_conn.execute.return_value = mock_result + assert _text_scalar(mock_conn, "SELECT x FROM t") is None + + +def test_reader_int_meta_value_error() -> None: + """_int_meta returns None on non-numeric meta value (reader.py:338-339).""" + from codeclone.audit.reader import _int_meta + + conn = sqlite3.connect(":memory:") + conn.execute( + "CREATE TABLE IF NOT EXISTS audit_meta (key TEXT PRIMARY KEY, value TEXT)" + ) + conn.execute( + "INSERT INTO audit_meta (key, value) VALUES ('retention_days', 'not_a_number')" + ) + conn.commit() + result = _int_meta(conn, "retention_days") + assert result is None + conn.close() diff --git a/tests/test_cli_session_stats.py b/tests/test_cli_session_stats.py index 1b21ba9d..23e917c1 100644 --- a/tests/test_cli_session_stats.py +++ b/tests/test_cli_session_stats.py @@ -128,6 +128,39 @@ def _render_session_stats_text(root: Path, *, quiet: bool) -> str: return printer.text +def _snapshot( + *, + agents: tuple[_AgentSnapshot, ...] = (), + workspace_health: str = "idle", + latest_run_id: str | None = None, + latest_run_health: int | None = None, + latest_run_findings: int | None = None, + latest_run_files: int | None = None, + latest_run_age_seconds: int | None = None, + cache_present: bool = False, + mcp_token_footprint: int | None = None, + mcp_token_encoding: str | None = None, + mcp_token_event_count: int = 0, +) -> session_stats_mod._SessionSnapshot: + return session_stats_mod._SessionSnapshot( + root=Path("/tmp/test"), + agents=agents, + stale_count=0, + expired_count=0, + recoverable_count=0, + latest_run_id=latest_run_id, + latest_run_health=latest_run_health, + latest_run_findings=latest_run_findings, + latest_run_files=latest_run_files, + latest_run_age_seconds=latest_run_age_seconds, + cache_present=cache_present, + workspace_health=workspace_health, + mcp_token_footprint=mcp_token_footprint, + mcp_token_encoding=mcp_token_encoding, + mcp_token_event_count=mcp_token_event_count, + ) + + # ── Quiet mode tests ── @@ -879,3 +912,205 @@ def raise_permission(pid: int, signal: int) -> None: assert _is_pid_alive(123) is False monkeypatch.setattr(os, "kill", raise_permission) assert _is_pid_alive(123) is True + + +# ── Token footprint in verbose plain mode ── + + +def test_session_stats_verbose_plain_with_token_footprint() -> None: + """Exercise plain verbose path with mcp_token_footprint (lines 277-278).""" + printer = _RecordingPrinter() + snapshot = _snapshot( + mcp_token_footprint=5000, + mcp_token_encoding="o200k_base", + mcp_token_event_count=10, + ) + + exit_code = session_stats_mod._render_verbose(printer, snapshot) + + assert exit_code == int(ExitCode.SUCCESS) + assert "MCP payload footprint" in printer.text + assert "5,000" in printer.text + assert "o200k_base" in printer.text + + +# ── Rich verbose with cached report + file count ── + + +def test_session_stats_rich_with_cached_report_and_files(tmp_path: Path) -> None: + """Exercise Rich path with latest_run_files (lines 298-301).""" + _write_report(tmp_path, health=92, files=100) + output = io.StringIO() + console = Console(file=output, force_terminal=True, color_system=None, width=120) + + exit_code = render_session_stats( + console=cast(PrinterLike, console), + root_path=tmp_path, + quiet=False, + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = output.getvalue() + assert "report.json present" in text + assert "100 files" in text + + +# ── Rich verbose with token footprint ── + + +def test_session_stats_rich_with_token_footprint() -> None: + """Exercise Rich path with mcp_token_footprint (lines 312-313).""" + output = io.StringIO() + console = Console(file=output, force_terminal=True, color_system=None, width=120) + snapshot = _snapshot( + mcp_token_footprint=3000, + mcp_token_encoding="o200k_base", + mcp_token_event_count=7, + ) + + exit_code = session_stats_mod._render_verbose_rich( + cast(PrinterLike, console), snapshot + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = output.getvalue() + assert "MCP payload footprint" in text + assert "3,000" in text + + +# ── Rich verbose with no live agents ── + + +def test_session_stats_rich_no_live_agents() -> None: + """Exercise Rich path with dead agent only (lines 329-330).""" + output = io.StringIO() + console = Console(file=output, force_terminal=True, color_system=None, width=120) + snapshot = _snapshot( + agents=( + _AgentSnapshot( + pid=999999, + start_epoch=int(time.time()), + label="dead-agent", + alive=False, + intents=(), + ), + ), + ) + + exit_code = session_stats_mod._render_verbose_rich( + cast(PrinterLike, console), snapshot + ) + + assert exit_code == int(ExitCode.SUCCESS) + text = output.getvalue() + assert "No live workspace agents found" in text + + +# ── _latest_run_text with health and findings ── + + +def test_latest_run_text_with_health_and_findings() -> None: + """Exercise _latest_run_text branches (lines 377-384).""" + snapshot = _snapshot( + latest_run_id="abc12345", + latest_run_health=90, + latest_run_findings=5, + latest_run_files=100, + latest_run_age_seconds=120, + cache_present=True, + workspace_health="clean", + ) + + result = session_stats_mod._latest_run_text(snapshot) + + assert "abc12345" in result + assert "health=90" in result + assert "findings=5" in result + + +# ── _allowed_files_label ── + + +def test_allowed_files_label_empty() -> None: + """Exercise _allowed_files_label with empty tuple (line 389).""" + assert session_stats_mod._allowed_files_label(()) == "-" + + +def test_allowed_files_label_many() -> None: + """Exercise _allowed_files_label truncation (line 393).""" + files = tuple(f"src/{i}.py" for i in range(7)) + result = session_stats_mod._allowed_files_label(files) + assert "and 2 more" in result + + +# ── _ownership_style ── + + +def test_ownership_style_branches() -> None: + """Exercise all _ownership_style branches (lines 409-415).""" + assert session_stats_mod._ownership_style("own_active") == "green" + assert session_stats_mod._ownership_style("own_stale") == "green" + assert session_stats_mod._ownership_style("foreign_stale") == "yellow" + assert session_stats_mod._ownership_style("foreign_active") == "cyan" + assert session_stats_mod._ownership_style("recoverable") == "magenta" + assert session_stats_mod._ownership_style("unknown") == "dim" + + +# ── _resolve_mcp_tokens ── + + +def test_resolve_mcp_tokens_with_audit_data(tmp_path: Path) -> None: + """Exercise _resolve_mcp_tokens with existing audit DB (lines 585-592).""" + from codeclone.audit.events import ( + EVENT_PATCH_VERIFIED, + AuditEvent, + repo_root_digest, + ) + from codeclone.audit.writer import SqliteAuditWriter + + db_path = tmp_path / ".cache" / "codeclone" / "audit.sqlite3" + writer = SqliteAuditWriter(db_path=db_path, payloads="compact", retention_days=30) + try: + writer.emit( + AuditEvent( + event_type=EVENT_PATCH_VERIFIED, + severity="info", + repo_root_digest=repo_root_digest(tmp_path), + agent_pid=123, + agent_label="agent", + run_id="run123", + status="accepted", + payload={"data": "value"}, + ) + ) + finally: + writer.close() + + tokens, encoding, count = session_stats_mod._read_audit_token_footprint(tmp_path) + + assert tokens is not None + assert tokens > 0 + assert encoding is not None + assert count == 1 + + +def test_resolve_mcp_tokens_no_db(tmp_path: Path) -> None: + """_read_audit_token_footprint returns (None, None, 0) when no DB exists.""" + tokens, encoding, count = session_stats_mod._read_audit_token_footprint(tmp_path) + + assert tokens is None + assert encoding is None + assert count == 0 + + +def test_resolve_mcp_tokens_corrupt_db(tmp_path: Path) -> None: + """_read_audit_token_footprint tolerates corrupt audit storage.""" + db_path = tmp_path / ".cache" / "codeclone" / "audit.sqlite3" + db_path.parent.mkdir(parents=True) + db_path.write_text("NOT A DATABASE") + + tokens, encoding, count = session_stats_mod._read_audit_token_footprint(tmp_path) + + assert tokens is None + assert encoding is None + assert count == 0 From 4f04744eb02693c7762d1d75330736cb3566e87c Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 19:32:32 +0500 Subject: [PATCH 036/318] test(mcp): cover session audit branches --- tests/test_mcp_service.py | 165 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 160 insertions(+), 5 deletions(-) diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 06cca298..34175d0b 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -37,6 +37,7 @@ import codeclone.surfaces.mcp.service as mcp_service_mod import codeclone.surfaces.mcp.session as mcp_session_mod from codeclone.audit.events import AuditEvent +from codeclone.audit.writer import NullAuditWriter, SqliteAuditWriter from codeclone.baseline import Baseline, current_python_tag from codeclone.baseline.metrics_baseline import MetricsBaseline, MetricsBaselineStatus from codeclone.cache.store import Cache @@ -502,6 +503,18 @@ def close(self) -> None: return None +def _mcp_session_with_registered_run( + root: Path, + *, + run_id: str, +) -> tuple[mcp_session_mod.MCPSession, _RecordingAuditWriter, MCPRunRecord]: + audit = _RecordingAuditWriter() + service = mcp_session_mod.MCPSession(history_limit=4, audit_writer=audit) + record = _blast_radius_run_record(root, run_id=run_id) + service._runs.register(record) + return service, audit, record + + def _seed_patch_contract_intent( service: CodeCloneMCPService, root: Path, @@ -832,11 +845,10 @@ def test_mcp_service_analyze_repository_registers_latest_run(tmp_path: Path) -> def test_mcp_session_emits_audit_events_for_controller_flow(tmp_path: Path) -> None: - audit = _RecordingAuditWriter() - service = mcp_session_mod.MCPSession(history_limit=4, audit_writer=audit) - record = _blast_radius_run_record(tmp_path, run_id="audit1234567890") - service._runs.register(record) - + service, audit, record = _mcp_session_with_registered_run( + tmp_path, + run_id="audit1234567890", + ) declared = service.manage_change_intent( action="declare", run_id=record.run_id, @@ -874,6 +886,149 @@ def test_mcp_session_emits_audit_events_for_controller_flow(tmp_path: Path) -> N assert all(str(tmp_path) not in event.repo_root_digest for event in audit.events) +def test_mcp_session_resolves_agent_label_from_client_info() -> None: + service = mcp_session_mod.MCPSession(history_limit=4) + service._fastmcp = SimpleNamespace( + get_context=lambda: SimpleNamespace( + session=SimpleNamespace( + client_params=SimpleNamespace( + clientInfo=SimpleNamespace(name="codex", version="2.1") + ) + ) + ) + ) + assert service._resolve_agent_label() == "codex/2.1" + + service._fastmcp = SimpleNamespace( + get_context=lambda: SimpleNamespace( + session=SimpleNamespace( + client_params=SimpleNamespace( + clientInfo=SimpleNamespace(name="codex", version="") + ) + ) + ) + ) + assert service._resolve_agent_label() == "codex" + + service._fastmcp = SimpleNamespace( + get_context=lambda: SimpleNamespace( + session=SimpleNamespace( + client_params=SimpleNamespace(clientInfo=SimpleNamespace(name="")) + ) + ) + ) + assert service._resolve_agent_label() == f"pid-{service._agent_pid}" + + +def test_mcp_session_audit_writer_config_paths( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = mcp_session_mod.MCPSession(history_limit=4) + configs: list[dict[str, object] | Exception] = [ + ConfigValidationError("bad config"), + {"audit_enabled": False}, + {"audit_enabled": True, "audit_path": "../bad.sqlite3"}, + { + "audit_enabled": True, + "audit_path": "audit.sqlite3", + "audit_payloads": "full", + "audit_retention_days": 1, + }, + ] + + def load_config(_root: Path) -> dict[str, object]: + value = configs.pop(0) + if isinstance(value, Exception): + raise value + return value + + monkeypatch.setattr(mcp_session_mod, "load_pyproject_config", load_config) + writer_types: list[type[object]] = [] + for _ in range(4): + writer = service._build_audit_writer(tmp_path) + writer_types.append(type(writer)) + writer.close() + + assert writer_types == [ + NullAuditWriter, + NullAuditWriter, + NullAuditWriter, + SqliteAuditWriter, + ] + + +def test_mcp_session_audit_writer_for_root_caches_writer( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = mcp_session_mod.MCPSession(history_limit=4) + monkeypatch.setattr( + mcp_session_mod, + "load_pyproject_config", + lambda _root: { + "audit_enabled": True, + "audit_path": "audit.sqlite3", + "audit_payloads": "compact", + "audit_retention_days": 1, + }, + ) + + first = service._audit_writer_for_root(tmp_path) + second = service._audit_writer_for_root(tmp_path) + + assert first is second + first.close() + + +def test_mcp_session_audit_emit_swallows_writer_errors( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = mcp_session_mod.MCPSession(history_limit=4) + + def raise_writer(_root: Path) -> _RecordingAuditWriter: + raise RuntimeError("audit unavailable") + + monkeypatch.setattr(service, "_audit_writer_for_root", raise_writer) + + service._audit_emit( + root=tmp_path, + event_type="intent.declared", + severity="warn", + payload={"status": "active"}, + ) + + +def test_mcp_session_renews_latest_active_intent(tmp_path: Path) -> None: + service, audit, record = _mcp_session_with_registered_run( + tmp_path, + run_id="renew1234567890", + ) + declared = service.manage_change_intent( + action="declare", + run_id=record.run_id, + scope={"allowed_files": ["pkg/a.py"]}, + intent="renew before long work", + expected_effects=["no new clone group"], + ) + + renewed = service.manage_change_intent(action="renew", lease_seconds=120) + + assert renewed["intent_id"] == declared["intent_id"] + assert renewed["lease_renewed"] is True + assert renewed["lease_seconds"] == 120 + assert renewed["lease_expires_at_utc"] is not None + assert audit.events[-1].event_type == "intent.renewed" + + +def test_mcp_session_renew_requires_active_intent() -> None: + service = mcp_session_mod.MCPSession(history_limit=4) + + with pytest.raises(MCPServiceContractError, match="requires intent_id"): + service.manage_change_intent(action="renew") + + def test_mcp_service_summary_explains_untrusted_baseline_python_tag_mismatch( tmp_path: Path, ) -> None: From 2b3aaf889f134ccfb5bd0aa52e6a3a9effaca153 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 21:56:31 +0500 Subject: [PATCH 037/318] fix(ci)): fixed test crashes due to missing extras --- .github/workflows/tests.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6acb4a4d..382fa904 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,7 +35,7 @@ jobs: enable-cache: true - name: Install dependencies - run: uv sync --extra dev --extra mcp + run: uv sync --extra dev --extra mcp --extra token-bench - name: Run tests # Smoke CLI tests intentionally disable subprocess coverage collection diff --git a/pyproject.toml b/pyproject.toml index aae207cf..cf5136e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -144,7 +144,7 @@ api_surface = true golden_fixture_paths = ["tests/fixtures/golden_*"] min_typing_coverage = 99 audit_enabled = true -audit_path = ".cache/codeclone/audit.sqlite3" +audit_path = ".cache/codeclone/db/audit.sqlite3" audit_payloads = "full" # "off" | "compact" | "full" audit_retention_days = 30 From 300e2253b63baf32af44d142135adb8b95e60fa9 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 21:56:49 +0500 Subject: [PATCH 038/318] chore(docs): update README.md --- README.md | 390 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 272 insertions(+), 118 deletions(-) diff --git a/README.md b/README.md index 37a84ca6..559b82cc 100644 --- a/README.md +++ b/README.md @@ -10,67 +10,66 @@ srcset="https://raw.githubusercontent.com/orenlab/codeclone/main/docs/assets/codeclone-wordmark.svg" > CodeClone

Structural Change Controller for AI-assisted Python development

+

Deterministic structural review, baseline-aware CI gates,
and explicit change boundaries for coding agents — one canonical report, every surface.

-[![][pypi-shield]][pypi-link] [![][python-shield]][pypi-link] [![][downloads-shield]][pypi-link] [![][tests-shield]][tests-link] [![][license-shield]][license-link] +[![][pypi-shield]][pypi-link] [![][status-shield]][pypi-link] [![][downloads-shield]][pypi-link] [![][python-shield]][pypi-link] [![][license-shield]][license-link] + +[![][tests-shield]][tests-link] [![][benchmark-shield]][benchmark-link] --- -CodeClone is a **deterministic structural review layer for Python**. - -It gives humans and AI coding agents one canonical view of structural code quality: -clone findings, code-health metrics, baseline-aware CI gates, coverage context, -public API changes, and a **Structural Change Controller** that starts before a -diff exists. +**CodeClone** is a deterministic structural review layer and change controller for Python. It gives human reviewers and +AI coding agents one canonical view of structural code quality — clone findings, complexity and coupling metrics, +baseline-aware CI gates, coverage join, public API diffs — and adds a **Structural Change Controller** that starts +working *before* a diff exists. -The controller lets agents declare intent, inspect structural blast radius, -stay inside explicit edit boundaries, verify the patch after editing, and leave -an auditable review receipt. +The Controller lets coding agents declare intent, inspect structural blast radius, stay inside explicit edit boundaries, +verify the patch after editing, validate review claims against the canonical report, and leave an auditable review +receipt. Every decision is grounded in deterministic structural facts — never in LLM judgment about what is "safe to +change". -One canonical analysis, many surfaces: **CLI, HTML reports, JSON, SARIF, MCP, -VS Code, Claude Desktop, Codex, and CI**. Humans and agents operate on the same -deterministic facts. +One analysis, many surfaces: **CLI, HTML, JSON, SARIF, Markdown, MCP, VS Code, Claude Desktop, Codex, GitHub Action, and +CI**. Humans and agents operate on the same facts. Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · -[Live report](https://orenlab.github.io/codeclone/examples/report/) +Live sample report: [orenlab.github.io/codeclone/examples/report/](https://orenlab.github.io/codeclone/examples/report/) > [!NOTE] > This README tracks the in-development **v2.1** line. -> For the latest stable release see the -> [`v2.0.2` README](https://github.com/orenlab/codeclone/blob/v2.0.2/README.md). +> For the latest stable release see the [`v2.0.2` README](https://github.com/orenlab/codeclone/blob/v2.0.2/README.md). ## Why CodeClone -AI coding agents do not just write code faster. They also expand scope faster. +AI coding agents do not just write code faster. They expand scope faster. -A prompt asks for one change. The agent edits the target file, touches another -module because it is "related", updates a helper, changes tests, and the final -diff still looks plausible. The problem is not speed. The problem is silent -scope expansion. +A prompt asks for one change. The agent edits the target file, touches another module because it looks "related", +updates a helper, rewrites a few tests — and the final diff still looks plausible. The problem is not speed. The problem +is **silent scope expansion**. -CodeClone introduces a Structural Change Controller for that workflow: +CodeClone provides a Structural Change Controller for that workflow: ```text declare intent -→ inspect blast radius -→ constrain edit scope -→ edit -→ verify patch contract -→ validate claims -→ leave review receipt + → inspect structural blast radius + → constrain edit scope + → edit + → verify patch contract + → validate review claims + → leave auditable receipt ``` -CodeClone does not replace the agent and does not use LLM judgment to decide -what is safe. It gives the agent deterministic structural boundaries before the -diff exists, then verifies whether the resulting patch stayed inside them. +CodeClone does not replace the agent and does not use LLM judgment to decide what is safe. It gives the agent +**deterministic structural boundaries before the diff exists**, then verifies whether the resulting patch stayed inside +them. Same control surface protects human reviewers, CI pipelines, and pre-merge gates. ## Install @@ -78,8 +77,12 @@ diff exists, then verifies whether the resulting patch stayed inside them. uv tool install codeclone # recommended pip install codeclone # or pip -# with MCP server for AI agents / IDE clients +# with MCP server for AI agents and IDE clients uv tool install "codeclone[mcp]" +pip install "codeclone[mcp]" + +# with token-accurate MCP payload sizing (adds tiktoken) +uv tool install "codeclone[mcp,token-bench]" ```
@@ -95,7 +98,8 @@ uvx codeclone@latest . ```bash codeclone . # analyze current directory -codeclone . --html --open-html-report # HTML report in browser +codeclone . --html --open-html-report # interactive HTML report +codeclone . --json --md --sarif --text # all report formats codeclone . --ci # CI mode: baseline-aware gating ``` @@ -103,71 +107,101 @@ codeclone . --ci # CI mode: baseline-aware gating More commands ```bash -codeclone . --json --md --sarif --text # all report formats -codeclone . --changed-only --diff-against main # changed-scope review +# Changed-scope review against a branch +codeclone . --changed-only --diff-against main +codeclone . --paths-from-git-diff HEAD~1 -# Structural Change Controller CLI surface +# Timestamped report snapshots +codeclone . --html --json --timestamped-report-paths + +# Structural Change Controller — CLI surface codeclone . --blast-radius codeclone/core/parser.py codeclone . --patch-verify --diff-against HEAD~1 ```
+## How It Works + +
+Pipeline overview +
+CodeClone pipeline — parse, analyze, fuse, report, gate +
+ +CodeClone produces **one canonical JSON report** and renders it through every surface — CLI, HTML, Markdown, SARIF, MCP, +IDE extensions, GitHub Action, CI. The same deterministic facts drive human review, baseline-aware gates, and agent +workflows. The canonical report is the source of truth; surfaces render, filter, and explain it without creating a +second analysis engine. + +Architecture: [Architecture narrative](https://orenlab.github.io/codeclone/architecture/) · +CFG semantics: [CFG semantics](https://orenlab.github.io/codeclone/cfg/) + ## Structural Change Controller -The Controller governs AI-assisted edits before they become invisible diffs. +The Controller governs AI-assisted edits before they become invisible diffs. Every stage is deterministic — structural +facts come from the canonical report, not from LLM inference. -| Stage | Surface | Purpose | -|----------------------|-------------------------------------------|-------------------------------------------------------------------------| -| Declare intent | `manage_change_intent` | Agent states intended scope before editing | -| Map blast radius | `get_blast_radius` / `--blast-radius` | Reverse imports, clone cohorts, review context, do-not-touch boundaries | -| Check patch contract | `check_patch_contract` / `--patch-verify` | Pre-edit budget and post-edit structural verification | -| Generate receipt | `create_review_receipt` | Auditable artifact: intent, scope, blast radius, patch outcome | -| Validate claims | `validate_review_claims` | Cross-check review text against cited report facts | -| Coordinate workspace | workspace intent registry | Make active declared scopes visible across MCP processes | +| Stage | Surface | Purpose | +|-----------------------------|--------------------------------------------------|-------------------------------------------------------------------------| +| **Declare intent** | `manage_change_intent` | Agent states intended scope and rationale before editing | +| **Map blast radius** | `get_blast_radius` · `--blast-radius` | Reverse imports, clone cohorts, review context, do-not-touch boundaries | +| **Check patch contract** | `check_patch_contract` · `--patch-verify` | Pre-edit budget check and post-edit structural verification | +| **Validate claims** | `validate_review_claims` | Cross-check review text against cited report facts | +| **Generate receipt** | `create_review_receipt` | Auditable artifact: intent, scope, blast radius, patch outcome | +| **Coordinate workspace** | workspace intent registry | Make active declared scopes visible across MCP processes | +| **Audit controller events** | optional audit trail | Record passive workflow events and MCP payload footprint when enabled | -Every step is deterministic: structural facts come from the canonical report, -not from LLM inference. +Intent execution is **session-local**. Cross-agent visibility is optional, advisory, TTL/lease-bound, and stored as +ephemeral workspace coordination state under `.cache/codeclone/intents/`. -Intent execution is session-local. Cross-agent visibility is optional, -advisory, TTL/lease-bound, and stored as ephemeral workspace coordination state -under `.cache/codeclone/intents/`. CodeClone never mutates source files, -baselines, generated reports, or analysis cache through MCP. +The optional audit trail records controller events and estimated MCP payload footprint when enabled. It is **not +canonical analysis truth** and does not affect gates, baselines, report digests, cache compatibility, or finding +identity. + +CodeClone never mutates source files, baselines, generated reports, or analysis cache through MCP — read-only by +contract. [Structural Change Controller docs](https://orenlab.github.io/codeclone/book/24-structural-change-controller/) ## What CodeClone Reviews -| Category | What | -|-------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| **Clone structure** | Function clones using CFG fingerprints, block clones using statement windows, segment clones as report-only review context | -| **Structural findings** | Duplicated branch families, clone guard/exit divergence, clone-cohort drift | -| **Quality metrics** | Cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, adaptive dependency depth, dead code, health score | -| **Baseline governance** | Separates accepted legacy debt from new regressions so CI fails only on what got worse | -| **Coverage Join** | Fuses external Cobertura XML into the current run to surface untested hotspots and coverage scope gaps | -| **Adoption and API** | Type/docstring adoption, public API surface inventory, baseline-aware API break detection | -| **Security Surfaces** | Report-only inventory of security-relevant capability boundaries without vulnerability claims | -| **Design signals** | Overloaded modules and other report-only structural review context | +| Category | What it covers | +|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **Clone detection** | Function clones via CFG fingerprints, block clones via statement windows, segment clones as report-only review context | +| **Structural findings** | Duplicated branch families, clone guard/exit divergence, clone-cohort drift | +| **Quality metrics** | Cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, adaptive depth profile, dead code, overall health score, overloaded-module profile | +| **Baseline governance** | Separates accepted legacy debt from new regressions — CI fails only on what got worse | +| **Coverage Join** | Fuses external Cobertura XML into the current run to surface untested hotspots and coverage scope gaps | +| **Adoption & API** | Type and docstring annotation coverage, public API surface inventory, baseline-aware API break detection | +| **Security Surfaces** | Report-only inventory of security-relevant capability boundaries — no vulnerability claims | +| **Design signals** | Overloaded modules and other report-only structural review context | ## Baseline-Aware CI ```bash -# 1. Generate baseline (commit to repo) +# 1. Generate baseline once (commit it to your repo) codeclone . --update-baseline -# 2. Enforce it in CI +# 2. Enforce it on every push codeclone . --ci ``` -`--ci` equals `--fail-on-new --no-color --quiet`. When a trusted metrics baseline -is present, it also enables `--fail-on-new-metrics`. +`--ci` is equivalent to `--fail-on-new --no-color --quiet`. When a trusted metrics baseline is present, it also enables +`--fail-on-new-metrics`. > [!TIP] -> Run `codeclone . --update-baseline` once after install. Commit the baseline -> file — it becomes the contract CI enforces on every push. +> Run `codeclone . --update-baseline` once after install. Commit the baseline file — it becomes the contract CI enforces +> on every push, separating accepted legacy debt from real regressions. ### GitHub Action +CodeClone ships a composite Action for PR and CI workflows: + ```yaml - uses: orenlab/codeclone/.github/actions/codeclone@v2 with: @@ -176,24 +210,27 @@ is present, it also enables `--fail-on-new-metrics`. pr-comment: "true" ``` -Runs gating, generates reports, uploads SARIF to GitHub Code Scanning, and posts -or updates a PR summary. +The Action runs baseline-aware gating, generates JSON and SARIF reports, uploads SARIF to GitHub Code Scanning, and +posts or updates a PR summary comment. [Action docs](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) ### Quality Gates ```bash -# Structural thresholds -codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 -codeclone . --fail-cycles --fail-dead-code --fail-health 60 +# Structural metric thresholds +codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 --fail-health 60 +codeclone . --fail-cycles --fail-dead-code -# Baseline-aware metric regression detection +# Baseline-aware regression detection codeclone . --fail-on-new-metrics codeclone . --fail-on-typing-regression --fail-on-docstring-regression -# API and coverage governance +# Adoption and API governance +codeclone . --min-typing-coverage 80 --min-docstring-coverage 60 codeclone . --api-surface --fail-on-api-break + +# Coverage Join — fuse external Cobertura XML into the review codeclone . --coverage coverage.xml --fail-on-untested-hotspots --coverage-min 50 ``` @@ -216,38 +253,44 @@ repos: ## MCP Control Surface -CodeClone ships a 26-tool MCP control surface for AI agents and IDE clients. +CodeClone ships an MCP control surface for AI agents and IDE clients, built on the same canonical pipeline as the CLI. +Canonical analysis is **read-only by contract**: MCP tools never mutate source, baselines, generated reports, or +analysis cache. Controller state is session-local or ephemeral workspace coordination state. -Canonical analysis remains read-only by contract: MCP tools never mutate source -files, baselines, generated reports, or analysis cache. Controller state is -session-local or ephemeral workspace coordination state. +- **26 tools across 6 workflow phases** — *analyze → triage → drill down → focused checks → change control → session*. + Triage-first design avoids dumping the full report into agent context. +- **Stable read-only resources** — `codeclone://latest/*` and `codeclone://runs/{run_id}/*` URIs return deterministic + projections (summary, report, health, gates, changed, triage, schema) without re-triggering analysis. +- **Bounded in-memory run history** — `--history-limit` (default `4`, max `10`); session-local, does not survive + restart. +- **Run identity from canonical report integrity digest** — same digest, same run, same projection across every tool and + resource. ```bash -codeclone-mcp --transport stdio # local clients +codeclone-mcp --transport stdio # local clients (IDE, agents) codeclone-mcp --transport streamable-http # HTTP transport ``` > [!WARNING] -> Analysis tools require an absolute repository root. Relative roots like `.` are rejected. -> Keep `stdio` as the default transport for local IDE and agent clients. HTTP exposure beyond -> loopback requires explicit `--allow-remote`. +> Analysis tools require an absolute repository root. Relative roots such as `.` are rejected. +> Keep `stdio` as the default transport for local IDE and agent clients; HTTP exposure beyond loopback requires explicit `--allow-remote`. [MCP usage guide](https://orenlab.github.io/codeclone/mcp/) · [MCP interface contract](https://orenlab.github.io/codeclone/book/20-mcp-interface/) ### Native Agent and IDE Clients -| Surface | Install | Docs | -|--------------------|------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------| -| **VS Code** | [Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) | [Guide](https://orenlab.github.io/codeclone/book/21-vscode-extension/) | -| **Claude Desktop** | [`extensions/claude-desktop-codeclone/`](https://github.com/orenlab/codeclone/tree/main/extensions/claude-desktop-codeclone) | [Guide](https://orenlab.github.io/codeclone/book/22-claude-desktop-bundle/) | -| **Codex** | [`orenlab/codeclone-codex`](https://github.com/orenlab/codeclone-codex) | [Guide](https://orenlab.github.io/codeclone/book/23-codex-plugin/) | +| Surface | Install | Docs | +|---------------------------|------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------| +| **VS Code extension** | [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) | [Guide](https://orenlab.github.io/codeclone/book/21-vscode-extension/) | +| **Claude Desktop bundle** | [`extensions/claude-desktop-codeclone/`](https://github.com/orenlab/codeclone/tree/main/extensions/claude-desktop-codeclone) | [Guide](https://orenlab.github.io/codeclone/book/22-claude-desktop-bundle/) | +| **Codex plugin** | [`orenlab/codeclone-codex`](https://github.com/orenlab/codeclone-codex) | [Guide](https://orenlab.github.io/codeclone/book/23-codex-plugin/) | All clients connect to the same `codeclone-mcp` contract — no second analysis engine. ## Reports -All report formats render from one canonical JSON payload. +All formats render from one canonical JSON payload — same facts, different audiences. | Format | Flag | Default path | |----------|-----------|---------------------------------| @@ -261,12 +304,100 @@ All report formats render from one canonical JSON payload. codeclone . --html --json --md --sarif --text ``` +`--open-html-report` opens the HTML in the default browser. `--timestamped-report-paths` appends a UTC timestamp to +default filenames. + [Report contract](https://orenlab.github.io/codeclone/book/08-report/) · [HTML render](https://orenlab.github.io/codeclone/book/10-html-render/) +
+Canonical JSON report shape (v2.11) + +Top-level keys: `report_schema_version`, `meta`, `inventory`, `findings`, `metrics`, `derived`, `integrity`. + +```json +{ + "report_schema_version": "2.11", + "meta": { + "codeclone_version": "...", + "project_name": "...", + "scan_root": ".", + "...": "..." + }, + "inventory": { + "files": {}, + "code": {}, + "file_registry": { + "encoding": "relative_path", + "items": [] + } + }, + "findings": { + "summary": {}, + "groups": { + "clones": { + "functions": [], + "blocks": [], + "segments": [] + }, + "structural": { + "groups": [] + }, + "dead_code": { + "groups": [] + }, + "design": { + "groups": [] + } + } + }, + "metrics": { + "summary": { + "coverage_adoption": {}, + "coverage_join": {}, + "api_surface": {} + }, + "families": { + "coverage_adoption": {}, + "coverage_join": {}, + "api_surface": {} + } + }, + "derived": { + "suggestions": [], + "overview": { + "families": {}, + "top_risks": [], + "health_snapshot": {}, + "directory_hotspots": {} + }, + "hotlists": { + "most_actionable_ids": [], + "highest_spread_ids": [], + "production_hotspot_ids": [] + } + }, + "integrity": { + "canonicalization": { + "version": "1", + "scope": "canonical_only" + }, + "digest": { + "algorithm": "sha256", + "verified": true, + "value": "..." + } + } +} +``` + +Full schema contract: [Report contract](https://orenlab.github.io/codeclone/book/08-report/) + +
+ ## Configuration -CodeClone loads project-level configuration from `pyproject.toml`. +CodeClone loads project-level configuration from `pyproject.toml`: ```toml [tool.codeclone] @@ -282,18 +413,23 @@ segment_min_loc = 20 segment_min_stmt = 10 golden_fixture_paths = ["tests/fixtures/golden_*"] - -html_out = ".cache/codeclone/report.html" -json_out = ".cache/codeclone/report.json" -md_out = ".cache/codeclone/report.md" -sarif_out = ".cache/codeclone/report.sarif" -text_out = ".cache/codeclone/report.txt" ``` -Precedence: CLI flags > `pyproject.toml` > built-in defaults. +Precedence: **CLI flags > `pyproject.toml` > built-in defaults**. [Config reference](https://orenlab.github.io/codeclone/book/04-config-and-defaults/) +## Baseline Workflow + +Baselines capture the current duplication and metrics state. Once committed, they become the CI reference point. + +- Clones are classified as **NEW** (not in baseline) or **KNOWN** (accepted debt) +- `--update-baseline` writes both clone and metrics snapshots +- Trust is verified via `generator`, `fingerprint_version`, and `payload_sha256` +- In `--ci` mode, an untrusted baseline is a contract error (exit `2`) + +[Baseline contract](https://orenlab.github.io/codeclone/book/06-baseline/) + ## Exit Codes | Code | Meaning | @@ -309,9 +445,8 @@ Contract errors (`2`) take precedence over gating failures (`3`). ## Inline Suppressions -When a symbol is invoked through runtime dynamics — framework callbacks, -plugin loading, reflection — suppress a known false positive at the declaration -site: +When a symbol is invoked through runtime dynamics — framework callbacks, plugin loading, reflection — suppress the known +false positive at the declaration site: ```python # codeclone: ignore[dead-code] @@ -326,21 +461,46 @@ class Middleware: # codeclone: ignore[dead-code] [Inline suppressions](https://orenlab.github.io/codeclone/book/19-inline-suppressions/) · [Dead-code contract](https://orenlab.github.io/codeclone/book/16-dead-code-contract/) +## Controller Audit and MCP Payload Footprint + +When enabled, the optional controller audit trail records passive workflow events: declared intents, blast radius +summaries, patch budget checks, patch verification, claim validation, review receipts, and workspace coordination +events. + +It can also record **estimated CodeClone MCP payload tokens** for each request. With the `codeclone[token-bench]` extra +installed, estimation uses `tiktoken` (`o200k_base`); without it, the estimator falls back to a character-based +approximation (`ceil(chars / 4)`). This measures the deterministic payload footprint emitted by CodeClone — not actual +model billing tokens for a full client session. + +Audit is **disabled by default** and never affects controller decisions, gates, baselines, report digests, cache +compatibility, or finding identity. + ## Benchmarking +
+Reproducible Docker benchmark + ```bash ./benchmarks/run_docker_benchmark.sh ``` -The Docker benchmark writes reproducible results to +The wrapper builds `benchmarks/Dockerfile`, runs isolated container benchmarks, and writes results to `.cache/benchmarks/codeclone-benchmark.json`. +Pin the benchmark envelope via environment overrides: + ```bash CPUSET=0 CPUS=1.0 MEMORY=2g RUNS=16 WARMUPS=4 \ ./benchmarks/run_docker_benchmark.sh ``` -[Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmarking/) +The MCP token footprint benchmark estimates deterministic CodeClone-emitted payload size. It does not claim actual model +billing tokens. + +Performance claims are backed by the reproducible benchmark workflow documented in +the [Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmarking/). + +
## Documentation @@ -359,32 +519,26 @@ Quick links: - **Code:** MPL-2.0 (`LICENSE`) - **Documentation and docs-site content:** MIT (`LICENSE-MIT`) -Versions released before the license change remain under their original terms. - ## Links -[Docs](https://orenlab.github.io/codeclone/) · -[PyPI](https://pypi.org/project/codeclone/) · -[Issues](https://github.com/orenlab/codeclone/issues) · -[Discussions](https://github.com/orenlab/codeclone/discussions) · -[License scope map](https://github.com/orenlab/codeclone/blob/main/LICENSES.md) +- **Docs:** +- **PyPI:** +- **Issues:** +- **Discussions:** +- **Licenses:** [MPL-2.0](https://github.com/orenlab/codeclone/blob/main/LICENSE) + · [MIT docs](https://github.com/orenlab/codeclone/blob/main/LICENSE-MIT) + · [Scope map](https://github.com/orenlab/codeclone/blob/main/LICENSES.md) - [pypi-shield]: https://img.shields.io/pypi/v/codeclone?style=flat-square&color=6366f1 - +[status-shield]: https://img.shields.io/pypi/status/codeclone?style=flat-square&color=6366f1 [downloads-shield]: https://img.shields.io/pypi/dm/codeclone?style=flat-square&color=6366f1 - [python-shield]: https://img.shields.io/pypi/pyversions/codeclone?style=flat-square&color=6366f1 - [license-shield]: https://img.shields.io/badge/license-MPL--2.0-6366f1?style=flat-square - [tests-shield]: https://img.shields.io/github/actions/workflow/status/orenlab/codeclone/tests.yml?branch=main&style=flat-square&label=tests - +[benchmark-shield]: https://img.shields.io/github/actions/workflow/status/orenlab/codeclone/benchmark.yml?style=flat-square&label=benchmark - [pypi-link]: https://pypi.org/project/codeclone/ - [license-link]: #license - [tests-link]: https://github.com/orenlab/codeclone/actions/workflows/tests.yml +[benchmark-link]: https://github.com/orenlab/codeclone/actions/workflows/benchmark.yml From e953d942de3bda12c402b525357519b650593271 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 26 May 2026 22:11:10 +0500 Subject: [PATCH 039/318] fix(ci)): fixed test crashes due to missing extras --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 382fa904..dfe39a44 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -67,7 +67,7 @@ jobs: enable-cache: true - name: Install dependencies - run: uv sync --extra dev --extra mcp + run: uv sync --extra dev --extra mcp --extra token-bench - name: Ruff run: uv run ruff check . From 1bc9ffe4cff055125af3674aea424ef839490200 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 27 May 2026 14:08:00 +0500 Subject: [PATCH 040/318] fix(mcp): harden workspace intent registry against path traversal --- codeclone/surfaces/mcp/_workspace_intents.py | 27 +++++-- tests/test_workspace_intents.py | 75 ++++++++++++++++++++ 2 files changed, 96 insertions(+), 6 deletions(-) diff --git a/codeclone/surfaces/mcp/_workspace_intents.py b/codeclone/surfaces/mcp/_workspace_intents.py index 38fc72e9..2d2cf574 100644 --- a/codeclone/surfaces/mcp/_workspace_intents.py +++ b/codeclone/surfaces/mcp/_workspace_intents.py @@ -9,6 +9,7 @@ import hashlib import hmac import os +import re from collections.abc import Mapping, Sequence from dataclasses import dataclass, replace from datetime import datetime, timedelta, timezone @@ -30,6 +31,7 @@ MIN_LEASE_SECONDS: Final = 60 MAX_LEASE_SECONDS: Final = 600 _HEX_DIGEST_LENGTH: Final = 64 +_SAFE_INTENT_ID_RE: Final = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}$") class WorkspaceIntentStatus(str, Enum): @@ -282,6 +284,8 @@ def validate_workspace_record(data: object) -> WorkspaceIntentRecord | None: if version not in {REGISTRY_VERSION, LEGACY_REGISTRY_VERSION}: return None intent_id = _required_string(data.get("intent_id")) + if not _is_safe_intent_id(intent_id): + return None agent_pid = _positive_int(data.get("agent_pid")) agent_start_epoch = _positive_int(data.get("agent_start_epoch")) agent_label = _string_value(data.get("agent_label")) @@ -474,17 +478,18 @@ def remove_workspace_intent( start_epoch: int, intent_id: str, ) -> bool: - path = intent_path( + """Remove a workspace intent file with path-containment safety. + + Delegates to :func:`safe_remove_own_intent` which validates that the + constructed path resolves inside the registry directory, rejects + symlink indirection, and checks filename structure before unlinking. + """ + return safe_remove_own_intent( root=root, pid=pid, start_epoch=start_epoch, intent_id=intent_id, ) - try: - path.unlink(missing_ok=True) - except OSError: - return False - return True def remove_workspace_record(*, root: Path, record: WorkspaceIntentRecord) -> bool: @@ -991,6 +996,16 @@ def _valid_lease_seconds(value: object) -> int | None: return parsed +def _is_safe_intent_id(value: object) -> bool: + """Return True if *value* is a safe intent identifier. + + Rejects path separators, traversal components, control characters, + and empty strings. Accepts only ``[a-zA-Z0-9._-]`` with an + alphanumeric first character, max 128 chars. + """ + return isinstance(value, str) and _SAFE_INTENT_ID_RE.match(value) is not None + + def _is_hex_digest(value: object) -> bool: if not isinstance(value, str) or len(value) != _HEX_DIGEST_LENGTH: return False diff --git a/tests/test_workspace_intents.py b/tests/test_workspace_intents.py index aeb415b6..c73bb74e 100644 --- a/tests/test_workspace_intents.py +++ b/tests/test_workspace_intents.py @@ -1127,3 +1127,78 @@ def test_workspace_intent_max_lease_seconds_ceiling() -> None: assert workspace_intents.MAX_LEASE_SECONDS == 600 assert workspace_intents.resolved_lease_seconds(9999) == 600 assert workspace_intents.resolved_lease_seconds(60) == 60 + + +# ── intent_id format validation (path traversal hardening) ── + + +class TestSafeIntentId: + """_is_safe_intent_id rejects path traversal and control characters.""" + + @pytest.mark.parametrize( + "value", + [ + "intent-abcdef12-001", + "intent-run12345-003", + "simple", + "a", + "with.dot", + "with_underscore", + "A123-B456", + ], + ) + def test_accepts_safe_ids(self, value: str) -> None: + assert workspace_intents._is_safe_intent_id(value) is True + + @pytest.mark.parametrize( + ("value", "reason"), + [ + ("../../etc/passwd", "path traversal with ../"), + ("../target", "single-level traversal"), + ("foo/bar", "forward slash"), + ("foo\\bar", "backslash"), + ("", "empty string"), + ("-starts-with-dash", "leading dash"), + (".starts-with-dot", "leading dot"), + ("has\x00null", "NUL byte"), + ("has\nnewline", "newline"), + ("has space", "space"), + (None, "None value"), + (42, "integer"), + ("x" * 129, "too long (129 chars)"), + ], + ) + def test_rejects_unsafe_ids(self, value: object, reason: str) -> None: + assert workspace_intents._is_safe_intent_id(value) is False, reason + + def test_max_length_boundary(self) -> None: + assert workspace_intents._is_safe_intent_id("a" * 128) is True + assert workspace_intents._is_safe_intent_id("a" * 129) is False + + +def test_validate_workspace_record_rejects_traversal_intent_id() -> None: + """validate_workspace_record rejects intent_id with path separators.""" + malicious = _record(intent_id="../../etc/passwd") + payload = malicious.signed_payload() + assert workspace_intents.validate_workspace_record(payload) is None + + +def test_remove_workspace_intent_rejects_traversal(tmp_path: Path) -> None: + """remove_workspace_intent returns False for traversal intent_id. + + The function delegates to safe_remove_own_intent which validates + path containment. A crafted intent_id must not cause deletion + outside the registry directory. + """ + # Create a sentinel file that a traversal would target + sentinel = tmp_path / "do_not_delete.json" + sentinel.write_text("{}") + + result = workspace_intents.remove_workspace_intent( + root=tmp_path, + pid=1, + start_epoch=100, + intent_id="../../do_not_delete", + ) + assert result is False + assert sentinel.exists(), "sentinel file must survive traversal attempt" From 55e47b2816a556576ad291d1fc426501fb3893b9 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 27 May 2026 22:27:29 +0500 Subject: [PATCH 041/318] feat(vscode): add blast radius visualization and upgrade packaging toolchain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce Show Blast Radius (SVG WebviewPanel) and Copy Blast Radius Brief (Markdown clipboard) commands driven by the get_blast_radius MCP tool. The webview uses enableScripts: false with a nonce-scoped CSP and no local resource roots. Upgrade @vscode/vsce 2.25.0 → 3.9.1, resolving transitive tmp path-traversal (GHSA-ph9p-34f9-6g65) and qs DoS (GHSA-q8mj-m7cp-5q26) vulnerabilities. Bump @types/node to 25.9.1 and typescript to 6.0.3. Extension version 0.2.7 → 0.3.0. --- extensions/vscode-codeclone/CHANGELOG.md | 16 + extensions/vscode-codeclone/README.md | 19 + extensions/vscode-codeclone/package-lock.json | 2848 +++++++++++++++-- extensions/vscode-codeclone/package.json | 34 +- extensions/vscode-codeclone/src/extension.js | 125 + extensions/vscode-codeclone/src/renderers.js | 340 ++ .../vscode-codeclone/test/renderers.test.js | 109 + 7 files changed, 3173 insertions(+), 318 deletions(-) diff --git a/extensions/vscode-codeclone/CHANGELOG.md b/extensions/vscode-codeclone/CHANGELOG.md index 3fe9bd26..c2a93fff 100644 --- a/extensions/vscode-codeclone/CHANGELOG.md +++ b/extensions/vscode-codeclone/CHANGELOG.md @@ -1,5 +1,21 @@ # Change Log +## 0.3.0 + +- add **Show Blast Radius** command — concentric SVG diagram of structural + impact for the active file, rendered in a secure WebviewPanel with no scripts + and nonce-scoped CSP +- add **Copy Blast Radius Brief** command — structured Markdown summary of + origin, dependents, clone cohort, risk signals, and guardrails copied to + clipboard +- both commands available from the editor title menu when a run is active and + the workspace is trusted +- bump minimum version to reflect the new MCP `get_blast_radius` dependency +- upgrade `@vscode/vsce` from `2.25.0` to `3.9.1`, resolving the transitive + `tmp` path-traversal (GHSA-ph9p-34f9-6g65) and `qs` DoS + (GHSA-q8mj-m7cp-5q26) vulnerabilities +- upgrade `@types/node` to `25.9.1` and `typescript` to `6.0.3` + ## 0.2.7 - surface Coverage Join review items in Hotspots when coverage data is available diff --git a/extensions/vscode-codeclone/README.md b/extensions/vscode-codeclone/README.md index 992274ca..484fe43e 100644 --- a/extensions/vscode-codeclone/README.md +++ b/extensions/vscode-codeclone/README.md @@ -18,6 +18,8 @@ and driven by the same canonical report as the CLI and HTML output. at a glance; report-only Security Surfaces and Overloaded Modules kept visually separate - **Baseline-aware** — distinguishes known debt from new regressions against the stored baseline - **Changed-files review** — `Review Changes` scopes analysis to the current diff via a configurable git ref +- **Blast Radius** — `Show Blast Radius` renders a concentric SVG diagram of structural + impact for the active file; `Copy Blast Radius Brief` puts a Markdown summary on the clipboard - **Coverage Join** — integrates `coverage.xml` to surface untested hotspots when available - **Source-first navigation** — `Reveal Source` opens the exact location; `Next / Previous Hotspot` steps through active targets in the editor @@ -98,6 +100,23 @@ Focus mode is explicit and persisted per workspace; `Recommended` is the default Bounded MCP session state: server availability, current run identity, reviewed findings, and help topics. Reviewed markers are session-local and do not mutate the repository or report. +### Blast Radius + +Visual structural impact analysis for the active file. + +- **Show Blast Radius** — opens a WebviewPanel with a concentric SVG diagram + showing origin, direct dependents, transitive dependents, and clone cohort. + Risk signals (complexity, coverage, overloaded modules) are overlaid as + colored dots. Do-not-touch boundaries and guardrails are listed below the + diagram. +- **Copy Blast Radius Brief** — copies a structured Markdown summary of the + same data to the clipboard for use in PR descriptions or review notes. + +Both commands are available from the editor title context menu and the command +palette when a run is active and the workspace is trusted. The webview uses +`enableScripts: false` and a nonce-scoped Content Security Policy with no +external resource access. + --- ## Settings diff --git a/extensions/vscode-codeclone/package-lock.json b/extensions/vscode-codeclone/package-lock.json index b65a76cb..b8ba30d1 100644 --- a/extensions/vscode-codeclone/package-lock.json +++ b/extensions/vscode-codeclone/package-lock.json @@ -1,24 +1,234 @@ { "name": "codeclone", - "version": "0.2.7", + "version": "0.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "codeclone", - "version": "0.2.7", + "version": "0.3.0", "license": "MPL-2.0", "devDependencies": { - "@types/node": "^25.5.2", + "@types/node": "^25.9.1", "@types/vscode": "1.100.0", - "@vscode/vsce": "2.25.0", + "@vscode/vsce": "3.9.1", "esbuild": "^0.28.0", - "typescript": "^6.0.2" + "typescript": "^6.0.3" }, "engines": { "vscode": "^1.100.0" } }, + "node_modules/@azu/format-text": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@azu/format-text/-/format-text-1.0.2.tgz", + "integrity": "sha512-Swi4N7Edy1Eqq82GxgEECXSSLyn6GOb5htRFPzBDdUkECGXtlf12ynO5oJSpWKPwCaUssOu7NfhDcCWpIC6Ywg==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@azu/style-format": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@azu/style-format/-/style-format-1.0.1.tgz", + "integrity": "sha512-AHcTojlNBdD/3/KxIKlg8sxIWHfOtQszLvOpagLTO+bjC3u7SAszu1lf//u7JJC50aUSH+BVWDD/KvaA6Gfn5g==", + "dev": true, + "license": "WTFPL", + "dependencies": { + "@azu/format-text": "^1.0.1" + } + }, + "node_modules/@azure/abort-controller": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz", + "integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==", + "dev": true, + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-auth": { + "version": "1.10.1", + "resolved": "https://registry.npmjs.org/@azure/core-auth/-/core-auth-1.10.1.tgz", + "integrity": "sha512-ykRMW8PjVAn+RS6ww5cmK9U2CyH9p4Q88YJwvUslfuMmN98w/2rdGRLPqJYObapBCdzBVeDgYWdJnFPFb7qzpg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-util": "^1.13.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-client": { + "version": "1.10.1", + "resolved": "https://registry.npmjs.org/@azure/core-client/-/core-client-1.10.1.tgz", + "integrity": "sha512-Nh5PhEOeY6PrnxNPsEHRr9eimxLwgLlpmguQaHKBinFYA/RU9+kOYVOQqOrTsCL+KSxrLLl1gD8Dk5BFW/7l/w==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.10.0", + "@azure/core-rest-pipeline": "^1.22.0", + "@azure/core-tracing": "^1.3.0", + "@azure/core-util": "^1.13.0", + "@azure/logger": "^1.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-rest-pipeline": { + "version": "1.23.0", + "resolved": "https://registry.npmjs.org/@azure/core-rest-pipeline/-/core-rest-pipeline-1.23.0.tgz", + "integrity": "sha512-Evs1INHo+jUjwHi1T6SG6Ua/LHOQBCLuKEEE6efIpt4ZOoNonaT1kP32GoOcdNDbfqsD2445CPri3MubBy5DEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.10.0", + "@azure/core-tracing": "^1.3.0", + "@azure/core-util": "^1.13.0", + "@azure/logger": "^1.3.0", + "@typespec/ts-http-runtime": "^0.3.4", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-tracing": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/@azure/core-tracing/-/core-tracing-1.3.1.tgz", + "integrity": "sha512-9MWKevR7Hz8kNzzPLfX4EAtGM2b8mr50HPDBvio96bURP/9C+HjdH3sBlLSNNrvRAr5/k/svoH457gB5IKpmwQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-util": { + "version": "1.13.1", + "resolved": "https://registry.npmjs.org/@azure/core-util/-/core-util-1.13.1.tgz", + "integrity": "sha512-XPArKLzsvl0Hf0CaGyKHUyVgF7oDnhKoP85Xv6M4StF/1AhfORhZudHtOyf2s+FcbuQ9dPRAjB8J2KvRRMUK2A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/identity": { + "version": "4.13.1", + "resolved": "https://registry.npmjs.org/@azure/identity/-/identity-4.13.1.tgz", + "integrity": "sha512-5C/2WD5Vb1lHnZS16dNQRPMjN6oV/Upba+C9nBIs15PmOi6A3ZGs4Lr2u60zw4S04gi+u3cEXiqTVP7M4Pz3kw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-auth": "^1.9.0", + "@azure/core-client": "^1.9.2", + "@azure/core-rest-pipeline": "^1.17.0", + "@azure/core-tracing": "^1.0.0", + "@azure/core-util": "^1.11.0", + "@azure/logger": "^1.0.0", + "@azure/msal-browser": "^5.5.0", + "@azure/msal-node": "^5.1.0", + "open": "^10.1.0", + "tslib": "^2.2.0" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/logger": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@azure/logger/-/logger-1.3.0.tgz", + "integrity": "sha512-fCqPIfOcLE+CGqGPd66c8bZpwAji98tZ4JI9i/mlTNTlsIWslCfpg48s/ypyLxZTump5sypjrKn2/kY7q8oAbA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/msal-browser": { + "version": "5.11.0", + "resolved": "https://registry.npmjs.org/@azure/msal-browser/-/msal-browser-5.11.0.tgz", + "integrity": "sha512-zkGNYS3TwY8lUpPIafAmsFCYZbgFixY9y/LZB9GUg0IILoHTqpN26j5OrkL1AQThh/YdZsawe4iWXfp85lFVxg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/msal-common": "16.6.2" + }, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@azure/msal-common": { + "version": "16.6.2", + "resolved": "https://registry.npmjs.org/@azure/msal-common/-/msal-common-16.6.2.tgz", + "integrity": "sha512-hQjjsekAjB00cM1EmatWJlzhEoK2Qhz7Rj5gvM6tYf8iL7RM3tkxlpU9fG0+ofkulzg9AEEA6dIEnSmDr5ZqUA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@azure/msal-node": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/@azure/msal-node/-/msal-node-5.2.2.tgz", + "integrity": "sha512-toS+2AePxqyzb0YOKttDOOiSl3jrkK9aiqIvpurpis0O34QcIS5gToqrgT39p04Dpxw3YoUU0lxJKTpSFFfA6Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/msal-common": "16.6.2", + "jsonwebtoken": "^9.0.0" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/@babel/code-frame": { + "version": "7.29.7", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.7.tgz", + "integrity": "sha512-Aup7aUOfpbAUg2ROOJN6Iw5f9DMBlzu0mIkm/malLQFN/YQgO48wCj0Kxa3sEHJvPVFg7siR+qRInwXd2qhQKw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-validator-identifier": "^7.29.7", + "js-tokens": "^4.0.0", + "picocolors": "^1.1.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.29.7", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.29.7.tgz", + "integrity": "sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.28.0", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.28.0.tgz", @@ -461,200 +671,828 @@ "node": ">=18" } }, - "node_modules/@types/node": { - "version": "25.5.2", - "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.2.tgz", - "integrity": "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg==", + "node_modules/@isaacs/cliui": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-9.0.0.tgz", + "integrity": "sha512-AokJm4tuBHillT+FpMtxQ60n8ObyXBatq7jD2/JA9dxbDDokKQm8KMht5ibGzLVU9IJDIKK4TPKgMHEYMn3lMg==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", + "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", "dev": true, "license": "MIT", "dependencies": { - "undici-types": "~7.18.0" + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" } }, - "node_modules/@types/vscode": { - "version": "1.100.0", - "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.100.0.tgz", - "integrity": "sha512-4uNyvzHoraXEeCamR3+fzcBlh7Afs4Ifjs4epINyUX/jvdk0uzLnwiDY35UKDKnkCHP5Nu3dljl2H8lR6s+rQw==", + "node_modules/@nodelib/fs.stat": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", + "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", "dev": true, - "license": "MIT" + "license": "MIT", + "engines": { + "node": ">= 8" + } }, - "node_modules/@vscode/vsce": { - "version": "2.25.0", - "resolved": "https://registry.npmjs.org/@vscode/vsce/-/vsce-2.25.0.tgz", - "integrity": "sha512-VXMCGUaP6wKBadA7vFQdsksxkBAMoh4ecZgXBwauZMASAgnwYesHyLnqIyWYeRwjy2uEpitHvz/1w5ENnR30pg==", + "node_modules/@nodelib/fs.walk": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", + "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", "dev": true, "license": "MIT", "dependencies": { - "azure-devops-node-api": "^12.5.0", - "chalk": "^2.4.2", - "cheerio": "^1.0.0-rc.9", - "cockatiel": "^3.1.2", - "commander": "^6.2.1", - "form-data": "^4.0.0", - "glob": "^7.0.6", - "hosted-git-info": "^4.0.2", - "jsonc-parser": "^3.2.0", - "leven": "^3.1.0", - "markdown-it": "^12.3.2", - "mime": "^1.3.4", - "minimatch": "^3.0.3", - "parse-semver": "^1.1.1", - "read": "^1.0.7", - "semver": "^7.5.2", - "tmp": "^0.2.1", - "typed-rest-client": "^1.8.4", - "url-join": "^4.0.1", - "xml2js": "^0.5.0", - "yauzl": "^2.3.1", - "yazl": "^2.2.2" - }, - "bin": { - "vsce": "vsce" + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" }, "engines": { - "node": ">= 16" - }, - "optionalDependencies": { - "keytar": "^7.7.0" + "node": ">= 8" } }, - "node_modules/ansi-styles": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", - "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "node_modules/@secretlint/config-creator": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/config-creator/-/config-creator-10.2.2.tgz", + "integrity": "sha512-BynOBe7Hn3LJjb3CqCHZjeNB09s/vgf0baBaHVw67w7gHF0d25c3ZsZ5+vv8TgwSchRdUCRrbbcq5i2B1fJ2QQ==", "dev": true, "license": "MIT", "dependencies": { - "color-convert": "^1.9.0" + "@secretlint/types": "^10.2.2" }, "engines": { - "node": ">=4" + "node": ">=20.0.0" } }, - "node_modules/argparse": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true, - "license": "Python-2.0" - }, - "node_modules/asynckit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "node_modules/@secretlint/config-loader": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/config-loader/-/config-loader-10.2.2.tgz", + "integrity": "sha512-ndjjQNgLg4DIcMJp4iaRD6xb9ijWQZVbd9694Ol2IszBIbGPPkwZHzJYKICbTBmh6AH/pLr0CiCaWdGJU7RbpQ==", "dev": true, - "license": "MIT" + "license": "MIT", + "dependencies": { + "@secretlint/profiler": "^10.2.2", + "@secretlint/resolver": "^10.2.2", + "@secretlint/types": "^10.2.2", + "ajv": "^8.17.1", + "debug": "^4.4.1", + "rc-config-loader": "^4.1.3" + }, + "engines": { + "node": ">=20.0.0" + } }, - "node_modules/azure-devops-node-api": { - "version": "12.5.0", - "resolved": "https://registry.npmjs.org/azure-devops-node-api/-/azure-devops-node-api-12.5.0.tgz", - "integrity": "sha512-R5eFskGvOm3U/GzeAuxRkUsAl0hrAwGgWn6zAd2KrZmrEhWZVqLew4OOupbQlXUuojUzpGtq62SmdhJ06N88og==", + "node_modules/@secretlint/core": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/core/-/core-10.2.2.tgz", + "integrity": "sha512-6rdwBwLP9+TO3rRjMVW1tX+lQeo5gBbxl1I5F8nh8bgGtKwdlCMhMKsBWzWg1ostxx/tIG7OjZI0/BxsP8bUgw==", "dev": true, "license": "MIT", "dependencies": { - "tunnel": "0.0.6", - "typed-rest-client": "^1.8.4" + "@secretlint/profiler": "^10.2.2", + "@secretlint/types": "^10.2.2", + "debug": "^4.4.1", + "structured-source": "^4.0.0" + }, + "engines": { + "node": ">=20.0.0" } }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "node_modules/@secretlint/formatter": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/formatter/-/formatter-10.2.2.tgz", + "integrity": "sha512-10f/eKV+8YdGKNQmoDUD1QnYL7TzhI2kzyx95vsJKbEa8akzLAR5ZrWIZ3LbcMmBLzxlSQMMccRmi05yDQ5YDA==", "dev": true, - "license": "MIT" + "license": "MIT", + "dependencies": { + "@secretlint/resolver": "^10.2.2", + "@secretlint/types": "^10.2.2", + "@textlint/linter-formatter": "^15.2.0", + "@textlint/module-interop": "^15.2.0", + "@textlint/types": "^15.2.0", + "chalk": "^5.4.1", + "debug": "^4.4.1", + "pluralize": "^8.0.0", + "strip-ansi": "^7.1.0", + "table": "^6.9.0", + "terminal-link": "^4.0.0" + }, + "engines": { + "node": ">=20.0.0" + } }, - "node_modules/base64-js": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", - "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "node_modules/@secretlint/formatter/node_modules/chalk": { + "version": "5.6.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz", + "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==", "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], "license": "MIT", - "optional": true + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } }, - "node_modules/bl": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", - "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "node_modules/@secretlint/node": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/node/-/node-10.2.2.tgz", + "integrity": "sha512-eZGJQgcg/3WRBwX1bRnss7RmHHK/YlP/l7zOQsrjexYt6l+JJa5YhUmHbuGXS94yW0++3YkEJp0kQGYhiw1DMQ==", "dev": true, "license": "MIT", - "optional": true, "dependencies": { - "buffer": "^5.5.0", - "inherits": "^2.0.4", - "readable-stream": "^3.4.0" + "@secretlint/config-loader": "^10.2.2", + "@secretlint/core": "^10.2.2", + "@secretlint/formatter": "^10.2.2", + "@secretlint/profiler": "^10.2.2", + "@secretlint/source-creator": "^10.2.2", + "@secretlint/types": "^10.2.2", + "debug": "^4.4.1", + "p-map": "^7.0.3" + }, + "engines": { + "node": ">=20.0.0" } }, - "node_modules/boolbase": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", - "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "node_modules/@secretlint/profiler": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/profiler/-/profiler-10.2.2.tgz", + "integrity": "sha512-qm9rWfkh/o8OvzMIfY8a5bCmgIniSpltbVlUVl983zDG1bUuQNd1/5lUEeWx5o/WJ99bXxS7yNI4/KIXfHexig==", "dev": true, - "license": "ISC" + "license": "MIT" }, - "node_modules/brace-expansion": { - "version": "1.1.13", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz", - "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==", + "node_modules/@secretlint/resolver": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/resolver/-/resolver-10.2.2.tgz", + "integrity": "sha512-3md0cp12e+Ae5V+crPQYGd6aaO7ahw95s28OlULGyclyyUtf861UoRGS2prnUrKh7MZb23kdDOyGCYb9br5e4w==", + "dev": true, + "license": "MIT" + }, + "node_modules/@secretlint/secretlint-formatter-sarif": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/secretlint-formatter-sarif/-/secretlint-formatter-sarif-10.2.2.tgz", + "integrity": "sha512-ojiF9TGRKJJw308DnYBucHxkpNovDNu1XvPh7IfUp0A12gzTtxuWDqdpuVezL7/IP8Ua7mp5/VkDMN9OLp1doQ==", "dev": true, "license": "MIT", "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" + "node-sarif-builder": "^3.2.0" } }, - "node_modules/buffer": { - "version": "5.7.1", - "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", - "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "node_modules/@secretlint/secretlint-rule-no-dotenv": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/secretlint-rule-no-dotenv/-/secretlint-rule-no-dotenv-10.2.2.tgz", + "integrity": "sha512-KJRbIShA9DVc5Va3yArtJ6QDzGjg3PRa1uYp9As4RsyKtKSSZjI64jVca57FZ8gbuk4em0/0Jq+uy6485wxIdg==", "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], "license": "MIT", - "optional": true, "dependencies": { - "base64-js": "^1.3.1", - "ieee754": "^1.1.13" + "@secretlint/types": "^10.2.2" + }, + "engines": { + "node": ">=20.0.0" } }, - "node_modules/buffer-crc32": { - "version": "0.2.13", - "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", - "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "node_modules/@secretlint/secretlint-rule-preset-recommend": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/secretlint-rule-preset-recommend/-/secretlint-rule-preset-recommend-10.2.2.tgz", + "integrity": "sha512-K3jPqjva8bQndDKJqctnGfwuAxU2n9XNCPtbXVI5JvC7FnQiNg/yWlQPbMUlBXtBoBGFYp08A94m6fvtc9v+zA==", "dev": true, "license": "MIT", "engines": { - "node": "*" + "node": ">=20.0.0" } }, - "node_modules/call-bind-apply-helpers": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "node_modules/@secretlint/source-creator": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/source-creator/-/source-creator-10.2.2.tgz", + "integrity": "sha512-h6I87xJfwfUTgQ7irWq7UTdq/Bm1RuQ/fYhA3dtTIAop5BwSFmZyrchph4WcoEvbN460BWKmk4RYSvPElIIvxw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@secretlint/types": "^10.2.2", + "istextorbinary": "^9.5.0" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@secretlint/types": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/types/-/types-10.2.2.tgz", + "integrity": "sha512-Nqc90v4lWCXyakD6xNyNACBJNJ0tNCwj2WNk/7ivyacYHxiITVgmLUFXTBOeCdy79iz6HtN9Y31uw/jbLrdOAg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@sindresorhus/merge-streams": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@sindresorhus/merge-streams/-/merge-streams-2.3.0.tgz", + "integrity": "sha512-LtoMMhxAlorcGhmFYI+LhPgbPZCkgP6ra1YL604EeF6U98pLlQ3iWIGMdWSC+vWmPBWBNgmDBAhnAobLROJmwg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@textlint/ast-node-types": { + "version": "15.7.1", + "resolved": "https://registry.npmjs.org/@textlint/ast-node-types/-/ast-node-types-15.7.1.tgz", + "integrity": "sha512-Wii5UgUKFEh9Uv6wbq1zr4/Kf+dtjiUuzPrrXzKp8H+ifkvKNzi23V4Nz+6wVyHQn5T28AFuc8VH8OtzvGYecA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@textlint/linter-formatter": { + "version": "15.7.1", + "resolved": "https://registry.npmjs.org/@textlint/linter-formatter/-/linter-formatter-15.7.1.tgz", + "integrity": "sha512-TdwZ/debWYFD05K3CcoHtwvnCrza29wZxD+BjDTk/V5N7iRqkK1dTTHSD4A8AIgROLiDkHJmIKQbasbmsg8AvA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azu/format-text": "^1.0.2", + "@azu/style-format": "^1.0.1", + "@textlint/module-interop": "15.7.1", + "@textlint/resolver": "15.7.1", + "@textlint/types": "15.7.1", + "chalk": "^4.1.2", + "debug": "^4.4.3", + "js-yaml": "^4.1.1", + "lodash": "^4.18.1", + "pluralize": "^2.0.0", + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1", + "table": "^6.9.0", + "text-table": "^0.2.0" + } + }, + "node_modules/@textlint/linter-formatter/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/@textlint/linter-formatter/node_modules/pluralize": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/pluralize/-/pluralize-2.0.0.tgz", + "integrity": "sha512-TqNZzQCD4S42De9IfnnBvILN7HAW7riLqsCyp8lgjXeysyPlX5HhqKAcJHHHb9XskE4/a+7VGC9zzx8Ls0jOAw==", + "dev": true, + "license": "MIT" + }, + "node_modules/@textlint/linter-formatter/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/@textlint/module-interop": { + "version": "15.7.1", + "resolved": "https://registry.npmjs.org/@textlint/module-interop/-/module-interop-15.7.1.tgz", + "integrity": "sha512-Jg+sQW2L/cRJypk59wtcMUVVpt8vmit5ZMT3gUnFwevP3A6Qp1HfOtUy9ObT4hBX3lOSGT/ekcCDxR1pL7uH1g==", + "dev": true, + "license": "MIT" + }, + "node_modules/@textlint/resolver": { + "version": "15.7.1", + "resolved": "https://registry.npmjs.org/@textlint/resolver/-/resolver-15.7.1.tgz", + "integrity": "sha512-8XnO0pgF6mXnm41VvWmBbEIdGPhiCUt31uLZkOis1ECeg/1SoUcIT6Mx/F0e1rukq8l0UlOSeY9a31CsvRMK0g==", + "dev": true, + "license": "MIT" + }, + "node_modules/@textlint/types": { + "version": "15.7.1", + "resolved": "https://registry.npmjs.org/@textlint/types/-/types-15.7.1.tgz", + "integrity": "sha512-Vye/GmFNBTgVzZFtIFJTmLB+s2A7oIADxNG6r9UhfPuY+Czv0z5G3xeyFZZudPlfxURsKUyPIU5XsjOFqVp33A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@textlint/ast-node-types": "15.7.1" + } + }, + "node_modules/@types/node": { + "version": "25.9.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.9.1.tgz", + "integrity": "sha512-xfrlY7UD5rMJk3ZVJP8BNzS28J36YJg+xp+LPXV1TdWxr8uMH5A860QNxYDGQe/ylDSgjxE52Q9VnO7p75tJxg==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": ">=7.24.0 <7.24.7" + } + }, + "node_modules/@types/normalize-package-data": { + "version": "2.4.4", + "resolved": "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.4.tgz", + "integrity": "sha512-37i+OaWTh9qeK4LSHPsyRC7NahnGotNuZvjLSgcPzblpHB3rrCJxAOgI5gCdKm7coonsaX1Of0ILiTcnZjbfxA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/sarif": { + "version": "2.1.7", + "resolved": "https://registry.npmjs.org/@types/sarif/-/sarif-2.1.7.tgz", + "integrity": "sha512-kRz0VEkJqWLf1LLVN4pT1cg1Z9wAuvI6L97V3m2f5B76Tg8d413ddvLBPTEHAZJlnn4XSvu0FkZtViCQGVyrXQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/vscode": { + "version": "1.100.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.100.0.tgz", + "integrity": "sha512-4uNyvzHoraXEeCamR3+fzcBlh7Afs4Ifjs4epINyUX/jvdk0uzLnwiDY35UKDKnkCHP5Nu3dljl2H8lR6s+rQw==", + "dev": true, + "license": "MIT" + }, + "node_modules/@typespec/ts-http-runtime": { + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/@typespec/ts-http-runtime/-/ts-http-runtime-0.3.5.tgz", + "integrity": "sha512-yURCknZhvywvQItHMMmFSo+fq5arCUIyz/CVk7jD89MSai7dkaX8ufjCWp3NttLojoTVbcE72ri+be/TnEbMHw==", + "dev": true, + "license": "MIT", + "dependencies": { + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@vscode/vsce": { + "version": "3.9.1", + "resolved": "https://registry.npmjs.org/@vscode/vsce/-/vsce-3.9.1.tgz", + "integrity": "sha512-MPn5p+DoudI+3GfJSpAZZraE1lgLv0LcwbH3+xy7RgEhty3UIkmUMUA+5jPTDaxXae00AnX5u77FxGM8FhfKKA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/identity": "^4.1.0", + "@secretlint/node": "^10.1.2", + "@secretlint/secretlint-formatter-sarif": "^10.1.2", + "@secretlint/secretlint-rule-no-dotenv": "^10.1.2", + "@secretlint/secretlint-rule-preset-recommend": "^10.1.2", + "@vscode/vsce-sign": "^2.0.0", + "azure-devops-node-api": "^12.5.0", + "chalk": "^4.1.2", + "cheerio": "^1.0.0-rc.9", + "cockatiel": "^3.1.2", + "commander": "^12.1.0", + "form-data": "^4.0.0", + "glob": "^11.0.0", + "hosted-git-info": "^4.0.2", + "jsonc-parser": "^3.2.0", + "leven": "^3.1.0", + "markdown-it": "^14.1.0", + "mime": "^1.3.4", + "minimatch": "^3.0.3", + "parse-semver": "^1.1.1", + "read": "^1.0.7", + "secretlint": "^10.1.2", + "semver": "^7.5.2", + "tmp": "^0.2.3", + "typed-rest-client": "^1.8.4", + "url-join": "^4.0.1", + "xml2js": "^0.5.0", + "yauzl": "^3.2.1", + "yazl": "^2.2.2" + }, + "bin": { + "vsce": "vsce" + }, + "engines": { + "node": ">= 20" + }, + "optionalDependencies": { + "keytar": "^7.7.0" + } + }, + "node_modules/@vscode/vsce-sign": { + "version": "2.0.9", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign/-/vsce-sign-2.0.9.tgz", + "integrity": "sha512-8IvaRvtFyzUnGGl3f5+1Cnor3LqaUWvhaUjAYO8Y39OUYlOf3cRd+dowuQYLpZcP3uwSG+mURwjEBOSq4SOJ0g==", + "dev": true, + "hasInstallScript": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optionalDependencies": { + "@vscode/vsce-sign-alpine-arm64": "2.0.6", + "@vscode/vsce-sign-alpine-x64": "2.0.6", + "@vscode/vsce-sign-darwin-arm64": "2.0.6", + "@vscode/vsce-sign-darwin-x64": "2.0.6", + "@vscode/vsce-sign-linux-arm": "2.0.6", + "@vscode/vsce-sign-linux-arm64": "2.0.6", + "@vscode/vsce-sign-linux-x64": "2.0.6", + "@vscode/vsce-sign-win32-arm64": "2.0.6", + "@vscode/vsce-sign-win32-x64": "2.0.6" + } + }, + "node_modules/@vscode/vsce-sign-alpine-arm64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-alpine-arm64/-/vsce-sign-alpine-arm64-2.0.6.tgz", + "integrity": "sha512-wKkJBsvKF+f0GfsUuGT0tSW0kZL87QggEiqNqK6/8hvqsXvpx8OsTEc3mnE1kejkh5r+qUyQ7PtF8jZYN0mo8Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "alpine" + ] + }, + "node_modules/@vscode/vsce-sign-alpine-x64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-alpine-x64/-/vsce-sign-alpine-x64-2.0.6.tgz", + "integrity": "sha512-YoAGlmdK39vKi9jA18i4ufBbd95OqGJxRvF3n6ZbCyziwy3O+JgOpIUPxv5tjeO6gQfx29qBivQ8ZZTUF2Ba0w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "alpine" + ] + }, + "node_modules/@vscode/vsce-sign-darwin-arm64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-darwin-arm64/-/vsce-sign-darwin-arm64-2.0.6.tgz", + "integrity": "sha512-5HMHaJRIQuozm/XQIiJiA0W9uhdblwwl2ZNDSSAeXGO9YhB9MH5C4KIHOmvyjUnKy4UCuiP43VKpIxW1VWP4tQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@vscode/vsce-sign-darwin-x64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-darwin-x64/-/vsce-sign-darwin-x64-2.0.6.tgz", + "integrity": "sha512-25GsUbTAiNfHSuRItoQafXOIpxlYj+IXb4/qarrXu7kmbH94jlm5sdWSCKrrREs8+GsXF1b+l3OB7VJy5jsykw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@vscode/vsce-sign-linux-arm": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-linux-arm/-/vsce-sign-linux-arm-2.0.6.tgz", + "integrity": "sha512-UndEc2Xlq4HsuMPnwu7420uqceXjs4yb5W8E2/UkaHBB9OWCwMd3/bRe/1eLe3D8kPpxzcaeTyXiK3RdzS/1CA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@vscode/vsce-sign-linux-arm64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-linux-arm64/-/vsce-sign-linux-arm64-2.0.6.tgz", + "integrity": "sha512-cfb1qK7lygtMa4NUl2582nP7aliLYuDEVpAbXJMkDq1qE+olIw/es+C8j1LJwvcRq1I2yWGtSn3EkDp9Dq5FdA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@vscode/vsce-sign-linux-x64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-linux-x64/-/vsce-sign-linux-x64-2.0.6.tgz", + "integrity": "sha512-/olerl1A4sOqdP+hjvJ1sbQjKN07Y3DVnxO4gnbn/ahtQvFrdhUi0G1VsZXDNjfqmXw57DmPi5ASnj/8PGZhAA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@vscode/vsce-sign-win32-arm64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-win32-arm64/-/vsce-sign-win32-arm64-2.0.6.tgz", + "integrity": "sha512-ivM/MiGIY0PJNZBoGtlRBM/xDpwbdlCWomUWuLmIxbi1Cxe/1nooYrEQoaHD8ojVRgzdQEUzMsRbyF5cJJgYOg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@vscode/vsce-sign-win32-x64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-win32-x64/-/vsce-sign-win32-x64-2.0.6.tgz", + "integrity": "sha512-mgth9Kvze+u8CruYMmhHw6Zgy3GRX2S+Ed5oSokDEK5vPEwGGKnmuXua9tmFhomeAnhgJnL4DCna3TiNuGrBTQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/ajv": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz", + "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ansi-escapes": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-7.3.0.tgz", + "integrity": "sha512-BvU8nYgGQBxcmMuEeUEmNTvrMVjJNSH7RgW24vXexN4Ven6qCvy4TntnvlnwnMLTVlcRQQdbRY8NKnaIoeWDNg==", + "dev": true, + "license": "MIT", + "dependencies": { + "environment": "^1.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/ansi-regex": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", + "integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true, + "license": "Python-2.0" + }, + "node_modules/astral-regex": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz", + "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/azure-devops-node-api": { + "version": "12.5.0", + "resolved": "https://registry.npmjs.org/azure-devops-node-api/-/azure-devops-node-api-12.5.0.tgz", + "integrity": "sha512-R5eFskGvOm3U/GzeAuxRkUsAl0hrAwGgWn6zAd2KrZmrEhWZVqLew4OOupbQlXUuojUzpGtq62SmdhJ06N88og==", + "dev": true, + "license": "MIT", + "dependencies": { + "tunnel": "0.0.6", + "typed-rest-client": "^1.8.4" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true + }, + "node_modules/binaryextensions": { + "version": "6.11.0", + "resolved": "https://registry.npmjs.org/binaryextensions/-/binaryextensions-6.11.0.tgz", + "integrity": "sha512-sXnYK/Ij80TO3lcqZVV2YgfKN5QjUWIRk/XSm2J/4bd/lPko3lvk0O4ZppH6m+6hB2/GTu+ptNwVFe1xh+QLQw==", + "dev": true, + "license": "Artistic-2.0", + "dependencies": { + "editions": "^6.21.0" + }, + "engines": { + "node": ">=4" + }, + "funding": { + "url": "https://bevry.me/fund" + } + }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "dev": true, + "license": "ISC" + }, + "node_modules/boundary": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/boundary/-/boundary-2.0.0.tgz", + "integrity": "sha512-rJKn5ooC9u8q13IMCrW0RSp31pxBCHE3y9V/tp3TdWSLf8Em3p6Di4NBpfzbJge9YjjFEsD0RtFEjtvHL5VyEA==", + "dev": true, + "license": "BSD-2-Clause" + }, + "node_modules/brace-expansion": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.15.tgz", + "integrity": "sha512-EwOCDEex4quD37XhqM3omwtMoJjr//isUZz1JopUNWms+4Z2ViyM/k1YIRePpoVNnQhENnxtFjLaxNHrT7xIUg==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "dev": true, + "license": "MIT", + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true, + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, + "node_modules/buffer-crc32": { + "version": "0.2.13", + "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", + "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/bundle-name": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bundle-name/-/bundle-name-4.1.0.tgz", + "integrity": "sha512-tjwM5exMg6BGRI+kNmTntNsvdZS1X8BFYS6tnJ2hdH0kVxM6/eVZ2xy+FqStSWvYmtfFMDLIxurorHwDKfDz5Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "run-applescript": "^7.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", "dev": true, "license": "MIT", @@ -684,18 +1522,20 @@ } }, "node_modules/chalk": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", - "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", "dev": true, "license": "MIT", "dependencies": { - "ansi-styles": "^3.2.1", - "escape-string-regexp": "^1.0.5", - "supports-color": "^5.3.0" + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" }, "engines": { - "node": ">=4" + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" } }, "node_modules/cheerio": { @@ -761,19 +1601,22 @@ } }, "node_modules/color-convert": { - "version": "1.9.3", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", - "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", "dev": true, "license": "MIT", "dependencies": { - "color-name": "1.1.3" + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" } }, "node_modules/color-name": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", - "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==", + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "dev": true, "license": "MIT" }, @@ -791,13 +1634,13 @@ } }, "node_modules/commander": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/commander/-/commander-6.2.1.tgz", - "integrity": "sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA==", + "version": "12.1.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz", + "integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==", "dev": true, "license": "MIT", "engines": { - "node": ">= 6" + "node": ">=18" } }, "node_modules/concat-map": { @@ -807,6 +1650,21 @@ "dev": true, "license": "MIT" }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/css-select": { "version": "5.2.2", "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", @@ -837,6 +1695,24 @@ "url": "https://github.com/sponsors/fb55" } }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, "node_modules/decompress-response": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", @@ -865,6 +1741,49 @@ "node": ">=4.0.0" } }, + "node_modules/default-browser": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/default-browser/-/default-browser-5.5.0.tgz", + "integrity": "sha512-H9LMLr5zwIbSxrmvikGuI/5KGhZ8E2zH3stkMgM5LpOWDutGM2JZaj460Udnf1a+946zc7YBgrqEWwbk7zHvGw==", + "dev": true, + "license": "MIT", + "dependencies": { + "bundle-name": "^4.1.0", + "default-browser-id": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/default-browser-id": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/default-browser-id/-/default-browser-id-5.0.1.tgz", + "integrity": "sha512-x1VCxdX4t+8wVfd1so/9w+vQ4vx7lKd2Qp5tDRutErwmR85OgmfX7RlLRMWafRMY7hbEiXIbudNrjOAPa/hL8Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/define-lazy-prop": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-3.0.0.tgz", + "integrity": "sha512-N+MeXYoqr3pOgn8xfyRPREN7gHakLYjhsHhWGT3fWAiL4IkAt0iDw14QiiEm2bE30c5XX5q0FtAA3CK5f9/BUg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -960,6 +1879,40 @@ "node": ">= 0.4" } }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, + "node_modules/editions": { + "version": "6.22.0", + "resolved": "https://registry.npmjs.org/editions/-/editions-6.22.0.tgz", + "integrity": "sha512-UgGlf8IW75je7HZjNDpJdCv4cGJWIi6yumFdZ0R7A8/CIhQiWUjyGLCxdHpd8bmyD1gnkfUNK0oeOXqUS2cpfQ==", + "dev": true, + "license": "Artistic-2.0", + "dependencies": { + "version-range": "^4.15.0" + }, + "engines": { + "ecmascript": ">= es5", + "node": ">=4" + }, + "funding": { + "url": "https://bevry.me/fund" + } + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true, + "license": "MIT" + }, "node_modules/encoding-sniffer": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz", @@ -998,6 +1951,19 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/environment": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/environment/-/environment-1.1.0.tgz", + "integrity": "sha512-xUtoPkMggbz0MPyPiIWr1Kp4aeWJjDZ6SMvURhimjdZgsRuDplF5/s9hcgGhyXMhs+6vpnuoiZ2kFiu3FMnS8Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/es-define-property": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", @@ -1019,9 +1985,9 @@ } }, "node_modules/es-object-atoms": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", - "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.2.tgz", + "integrity": "sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw==", "dev": true, "license": "MIT", "dependencies": { @@ -1089,16 +2055,6 @@ "@esbuild/win32-x64": "0.28.0" } }, - "node_modules/escape-string-regexp": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", - "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.8.0" - } - }, "node_modules/expand-template": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", @@ -1110,14 +2066,85 @@ "node": ">=6" } }, - "node_modules/fd-slicer": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", - "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/fast-glob": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", + "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==", "dev": true, "license": "MIT", "dependencies": { - "pend": "~1.2.0" + "@nodelib/fs.stat": "^2.0.2", + "@nodelib/fs.walk": "^1.2.3", + "glob-parent": "^5.1.2", + "merge2": "^1.3.0", + "micromatch": "^4.0.8" + }, + "engines": { + "node": ">=8.6.0" + } + }, + "node_modules/fast-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz", + "integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/fastq": { + "version": "1.20.1", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.20.1.tgz", + "integrity": "sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==", + "dev": true, + "license": "ISC", + "dependencies": { + "reusify": "^1.0.4" + } + }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "dev": true, + "license": "MIT", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/foreground-child": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", + "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", + "dev": true, + "license": "ISC", + "dependencies": { + "cross-spawn": "^7.0.6", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" } }, "node_modules/form-data": { @@ -1145,12 +2172,20 @@ "license": "MIT", "optional": true }, - "node_modules/fs.realpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "node_modules/fs-extra": { + "version": "11.3.5", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.3.5.tgz", + "integrity": "sha512-eKpRKAovdpZtR1WopLHxlBWvAgPny3c4gX1G5Jhwmmw4XJj0ifSD5qB5TOo8hmA0wlRKDAOAhEE1yVPgs6Fgcg==", "dev": true, - "license": "ISC" + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=14.14" + } }, "node_modules/function-bind": { "version": "1.1.2", @@ -1194,41 +2229,117 @@ "dev": true, "license": "MIT", "dependencies": { - "dunder-proto": "^1.0.1", - "es-object-atoms": "^1.0.0" + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/github-from-package": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "dev": true, + "license": "MIT", + "optional": true + }, + "node_modules/glob": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-11.1.0.tgz", + "integrity": "sha512-vuNwKSaKiqm7g0THUBu2x7ckSs3XJLXE+2ssL7/MfTGPLLcrJQ/4Uq1CjPTtO5cCIiRxqvN6Twy1qOwhL0Xjcw==", + "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "foreground-child": "^3.3.1", + "jackspeak": "^4.1.1", + "minimatch": "^10.1.1", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^2.0.0" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/glob/node_modules/balanced-match": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", + "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/glob/node_modules/brace-expansion": { + "version": "5.0.6", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.6.tgz", + "integrity": "sha512-kLpxurY4Z4r9sgMsyG0Z9uzsBlgiU/EFKhj/h91/8yHu0edo7XuixOIH3VcJ8kkxs6/jPzoI6U9Vj3WqbMQ94g==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" }, "engines": { - "node": ">= 0.4" + "node": "18 || 20 || >=22" } }, - "node_modules/github-from-package": { - "version": "0.0.0", - "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", - "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "node_modules/glob/node_modules/minimatch": { + "version": "10.2.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz", + "integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==", "dev": true, - "license": "MIT", - "optional": true + "license": "BlueOak-1.0.0", + "dependencies": { + "brace-expansion": "^5.0.5" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } }, - "node_modules/glob": { - "version": "7.2.3", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", - "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", - "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", + "node_modules/globby": { + "version": "14.1.0", + "resolved": "https://registry.npmjs.org/globby/-/globby-14.1.0.tgz", + "integrity": "sha512-0Ia46fDOaT7k4og1PDW4YbodWWr3scS2vAr2lTbsplOt2WkKp0vQbkI9wKis/T5LV/dqPjO3bpS/z6GTJB82LA==", "dev": true, - "license": "ISC", + "license": "MIT", "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.1.1", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" + "@sindresorhus/merge-streams": "^2.1.0", + "fast-glob": "^3.3.3", + "ignore": "^7.0.3", + "path-type": "^6.0.0", + "slash": "^5.1.0", + "unicorn-magic": "^0.3.0" }, "engines": { - "node": "*" + "node": ">=18" }, "funding": { - "url": "https://github.com/sponsors/isaacs" + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/gopd": { @@ -1244,14 +2355,21 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "dev": true, + "license": "ISC" + }, "node_modules/has-flag": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", - "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", "dev": true, "license": "MIT", "engines": { - "node": ">=4" + "node": ">=8" } }, "node_modules/has-symbols": { @@ -1284,9 +2402,9 @@ } }, "node_modules/hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.3.tgz", + "integrity": "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==", "dev": true, "license": "MIT", "dependencies": { @@ -1342,6 +2460,34 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/iconv-lite": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", @@ -1377,16 +2523,27 @@ "license": "BSD-3-Clause", "optional": true }, - "node_modules/inflight": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", - "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", - "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.", + "node_modules/ignore": { + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", + "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", "dev": true, - "license": "ISC", - "dependencies": { - "once": "^1.3.0", - "wrappy": "1" + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/index-to-position": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/index-to-position/-/index-to-position-1.2.0.tgz", + "integrity": "sha512-Yg7+ztRkqslMAS2iFaU+Oa4KTSidr63OsFGlOrJoW981kIYO3CGCS3wA95P1mUi/IVSJkn0D479KTJpVpvFNuw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/inherits": { @@ -1394,7 +2551,8 @@ "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "dev": true, - "license": "ISC" + "license": "ISC", + "optional": true }, "node_modules/ini": { "version": "1.3.8", @@ -1404,6 +2562,181 @@ "license": "ISC", "optional": true }, + "node_modules/is-docker": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-3.0.0.tgz", + "integrity": "sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ==", + "dev": true, + "license": "MIT", + "bin": { + "is-docker": "cli.js" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-inside-container": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-inside-container/-/is-inside-container-1.0.0.tgz", + "integrity": "sha512-KIYLCCJghfHZxqjYBE7rEy0OBuTd5xCHS7tHVgvCLkx7StIoaxwNW3hCALgEUjFfeRk+MG/Qxmp/vtETEF3tRA==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-docker": "^3.0.0" + }, + "bin": { + "is-inside-container": "cli.js" + }, + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/is-wsl": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-3.1.1.tgz", + "integrity": "sha512-e6rvdUCiQCAuumZslxRJWR/Doq4VpPR82kqclvcS0efgt430SlGIk05vdCN58+VrzgtIcfNODjozVielycD4Sw==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-inside-container": "^1.0.0" + }, + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true, + "license": "ISC" + }, + "node_modules/istextorbinary": { + "version": "9.5.0", + "resolved": "https://registry.npmjs.org/istextorbinary/-/istextorbinary-9.5.0.tgz", + "integrity": "sha512-5mbUj3SiZXCuRf9fT3ibzbSSEWiy63gFfksmGfdOzujPjW3k+z8WvIBxcJHBoQNlaZaiyB25deviif2+osLmLw==", + "dev": true, + "license": "Artistic-2.0", + "dependencies": { + "binaryextensions": "^6.11.0", + "editions": "^6.21.0", + "textextensions": "^6.11.0" + }, + "engines": { + "node": ">=4" + }, + "funding": { + "url": "https://bevry.me/fund" + } + }, + "node_modules/jackspeak": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-4.2.3.tgz", + "integrity": "sha512-ykkVRwrYvFm1nb2AJfKKYPr0emF6IiXDYUaFx4Zn9ZuIH7MrzEZ3sD5RlqGXNRpHtvUHJyOnCEFxOlNDtGo7wg==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/cliui": "^9.0.0" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/js-yaml": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", + "dev": true, + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "dev": true, + "license": "MIT" + }, + "node_modules/json5": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", + "dev": true, + "license": "MIT", + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/jsonc-parser": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", @@ -1411,6 +2744,65 @@ "dev": true, "license": "MIT" }, + "node_modules/jsonfile": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.1.tgz", + "integrity": "sha512-zwOTdL3rFQ/lRdBnntKVOX6k5cKJwEc1HdilT71BWEu7J41gXIB2MRp+vxduPSwZJPWBxEzv4yH1wYLJGUHX4Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "universalify": "^2.0.0" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.6" + } + }, + "node_modules/jsonwebtoken": { + "version": "9.0.3", + "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.3.tgz", + "integrity": "sha512-MT/xP0CrubFRNLNKvxJ2BYfy53Zkm++5bX9dtuPbqAeQpTVe0MQTFhao8+Cp//EmJp244xt6Drw/GVEGCUj40g==", + "dev": true, + "license": "MIT", + "dependencies": { + "jws": "^4.0.1", + "lodash.includes": "^4.3.0", + "lodash.isboolean": "^3.0.3", + "lodash.isinteger": "^4.0.4", + "lodash.isnumber": "^3.0.3", + "lodash.isplainobject": "^4.0.6", + "lodash.isstring": "^4.0.1", + "lodash.once": "^4.0.0", + "ms": "^2.1.1", + "semver": "^7.5.4" + }, + "engines": { + "node": ">=12", + "npm": ">=6" + } + }, + "node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "dev": true, + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.1.tgz", + "integrity": "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==", + "dev": true, + "license": "MIT", + "dependencies": { + "jwa": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, "node_modules/keytar": { "version": "7.9.0", "resolved": "https://registry.npmjs.org/keytar/-/keytar-7.9.0.tgz", @@ -1424,25 +2816,98 @@ "prebuild-install": "^7.0.1" } }, - "node_modules/leven": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", - "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==", + "node_modules/leven": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", + "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/linkify-it": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-5.0.1.tgz", + "integrity": "sha512-wVoTjP4Q6R0NW5hiZkVJaFZPWgtXfoGF+6LucL3/FtiNjmcHhYjEr5f1Kqjirc1nBW07J/ZuRFumqr2oqccEWg==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/puzrin" + }, + { + "type": "github", + "url": "https://github.com/sponsors/markdown-it" + } + ], + "license": "MIT", + "dependencies": { + "uc.micro": "^2.0.0" + } + }, + "node_modules/lodash": { + "version": "4.18.1", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz", + "integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.includes": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz", + "integrity": "sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.isboolean": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz", + "integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.isinteger": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz", + "integrity": "sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.isnumber": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isnumber/-/lodash.isnumber-3.0.3.tgz", + "integrity": "sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.isplainobject": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz", + "integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.isstring": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz", + "integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.once": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz", + "integrity": "sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==", "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } + "license": "MIT" }, - "node_modules/linkify-it": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-3.0.3.tgz", - "integrity": "sha512-ynTsyrFSdE5oZ/O9GEf00kPngmOfVwazR5GKDq6EYfhlpFug3J2zybX56a2PRRpc9P+FuSoGNAwjlbDs9jJBPQ==", + "node_modules/lodash.truncate": { + "version": "4.4.2", + "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz", + "integrity": "sha512-jttmRe7bRse52OsWIMDLaXxWqRAmtIUccAQ3garviCqJjafXOfNMO0yMfNpdD6zbGaTU0P5Nz7e7gAT6cKmJRw==", "dev": true, - "license": "MIT", - "dependencies": { - "uc.micro": "^1.0.1" - } + "license": "MIT" }, "node_modules/lru-cache": { "version": "6.0.0", @@ -1458,30 +2923,31 @@ } }, "node_modules/markdown-it": { - "version": "12.3.2", - "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-12.3.2.tgz", - "integrity": "sha512-TchMembfxfNVpHkbtriWltGWc+m3xszaRD0CZup7GFFhzIgQqxIfn3eGj1yZpfuflzPvfkt611B2Q/Bsk1YnGg==", + "version": "14.2.0", + "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-14.2.0.tgz", + "integrity": "sha512-1TGiQiJVRQ3NPmZH6sx5Cfnmg6GQm9jvC1ch4TK511NjSJvjzKLzn5pPfZRNZkRPZP0HqCioSndqH8v2nRaWVQ==", "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/puzrin" + }, + { + "type": "github", + "url": "https://github.com/sponsors/markdown-it" + } + ], "license": "MIT", "dependencies": { "argparse": "^2.0.1", - "entities": "~2.1.0", - "linkify-it": "^3.0.1", - "mdurl": "^1.0.1", - "uc.micro": "^1.0.5" + "entities": "^4.4.0", + "linkify-it": "^5.0.1", + "mdurl": "^2.0.0", + "punycode.js": "^2.3.1", + "uc.micro": "^2.1.0" }, "bin": { - "markdown-it": "bin/markdown-it.js" - } - }, - "node_modules/markdown-it/node_modules/entities": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-2.1.0.tgz", - "integrity": "sha512-hCx1oky9PFrJ611mf0ifBLBRW8lUUVRlFolb5gWRfIELabBlbp9xZvrqZLZAs+NxFnbfQoeGd8wDkygjg7U85w==", - "dev": true, - "license": "BSD-2-Clause", - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" + "markdown-it": "bin/markdown-it.mjs" } }, "node_modules/math-intrinsics": { @@ -1495,12 +2961,36 @@ } }, "node_modules/mdurl": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz", - "integrity": "sha512-/sKlQJCBYVY9Ers9hqzKou4H6V5UWc/M59TH2dvkt+84itfnq7uFOMLpOiOS4ujvHP4etln18fmIxA5R5fll0g==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-2.0.0.tgz", + "integrity": "sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w==", "dev": true, "license": "MIT" }, + "node_modules/merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/micromatch": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", + "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", + "dev": true, + "license": "MIT", + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, "node_modules/mime": { "version": "1.6.0", "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", @@ -1575,6 +3065,16 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/minipass": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.3.tgz", + "integrity": "sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, "node_modules/mkdirp-classic": { "version": "0.5.3", "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", @@ -1583,6 +3083,13 @@ "license": "MIT", "optional": true }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, "node_modules/mute-stream": { "version": "0.0.8", "resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-0.0.8.tgz", @@ -1599,9 +3106,9 @@ "optional": true }, "node_modules/node-abi": { - "version": "3.89.0", - "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.89.0.tgz", - "integrity": "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==", + "version": "3.92.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.92.0.tgz", + "integrity": "sha512-KdHvFWZjEKDf0cakgFjebl371GPsISX2oZHcuyKqM7DtogIsHrqKeLTo8wBHxaXRAQlY2PsPlZmfo+9ZCxEREQ==", "dev": true, "license": "MIT", "optional": true, @@ -1620,6 +3127,55 @@ "license": "MIT", "optional": true }, + "node_modules/node-sarif-builder": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/node-sarif-builder/-/node-sarif-builder-3.4.0.tgz", + "integrity": "sha512-tGnJW6OKRii9u/b2WiUViTJS+h7Apxx17qsMUjsUeNDiMMX5ZFf8F8Fcz7PAQ6omvOxHZtvDTmOYKJQwmfpjeg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/sarif": "^2.1.7", + "fs-extra": "^11.1.1" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/normalize-package-data": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-6.0.2.tgz", + "integrity": "sha512-V6gygoYb/5EmNI+MEGrWkC+e6+Rr7mTmfHrxDbLzxQogBkgzo76rkok0Am6thgSF7Mv2nLOajAJj5vDJZEFn7g==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "hosted-git-info": "^7.0.0", + "semver": "^7.3.5", + "validate-npm-package-license": "^3.0.4" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/normalize-package-data/node_modules/hosted-git-info": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-7.0.2.tgz", + "integrity": "sha512-puUZAUKT5m8Zzvs72XWy3HtvVbTWljRE66cP60bxJzAqf2DgICo7lYTY2IHUmLnNpjYvw5bvmoHvPc0QO2a62w==", + "dev": true, + "license": "ISC", + "dependencies": { + "lru-cache": "^10.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/normalize-package-data/node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "dev": true, + "license": "ISC" + }, "node_modules/nth-check": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", @@ -1652,10 +3208,68 @@ "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", "dev": true, "license": "ISC", + "optional": true, "dependencies": { "wrappy": "1" } }, + "node_modules/open": { + "version": "10.2.0", + "resolved": "https://registry.npmjs.org/open/-/open-10.2.0.tgz", + "integrity": "sha512-YgBpdJHPyQ2UE5x+hlSXcnejzAvD0b22U2OuAP+8OnlJT+PjWPxtgmGqKKc+RgTM63U9gN0YzrYc71R2WT/hTA==", + "dev": true, + "license": "MIT", + "dependencies": { + "default-browser": "^5.2.1", + "define-lazy-prop": "^3.0.0", + "is-inside-container": "^1.0.0", + "wsl-utils": "^0.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-map": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/p-map/-/p-map-7.0.4.tgz", + "integrity": "sha512-tkAQEw8ysMzmkhgw8k+1U/iPhWNhykKnSk4Rd5zLoPJCuJaGRPo6YposrZgaxHKzDHdDWWZvE/Sk7hsL2X/CpQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/package-json-from-dist": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", + "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", + "dev": true, + "license": "BlueOak-1.0.0" + }, + "node_modules/parse-json": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-8.3.0.tgz", + "integrity": "sha512-ybiGyvspI+fAoRQbIPRddCcSTV9/LsJbf0e/S85VLowVGzRmokfneg2kwVW/KU5rOXrPSbF1qAKPMgNTqqROQQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.26.2", + "index-to-position": "^1.1.0", + "type-fest": "^4.39.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/parse-semver": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/parse-semver/-/parse-semver-1.1.1.tgz", @@ -1729,14 +3343,54 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, - "node_modules/path-is-absolute": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", - "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", "dev": true, "license": "MIT", "engines": { - "node": ">=0.10.0" + "node": ">=8" + } + }, + "node_modules/path-scurry": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.2.tgz", + "integrity": "sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^11.0.0", + "minipass": "^7.1.2" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/path-scurry/node_modules/lru-cache": { + "version": "11.5.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.5.1.tgz", + "integrity": "sha512-RPimw/7aMdv2oqRrxKwvZXcPfwBrn/JZ2xYcY9Hus/6LaS3VOAKVWKWgNLCFSiOm1ESXinjsDlidVU7JlnCN2A==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/path-type": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-6.0.0.tgz", + "integrity": "sha512-Vj7sf++t5pBD637NSfkxpHSMfWaeig5+DKWLhcqIYx6mWQz5hdJTGDVMQiJcw1ZYkhs7AazKDGpRVji1LJCZUQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/pend": { @@ -1746,6 +3400,36 @@ "dev": true, "license": "MIT" }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true, + "license": "ISC" + }, + "node_modules/picomatch": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/pluralize": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/pluralize/-/pluralize-8.0.0.tgz", + "integrity": "sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, "node_modules/prebuild-install": { "version": "7.1.3", "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", @@ -1787,10 +3471,20 @@ "once": "^1.3.1" } }, + "node_modules/punycode.js": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode.js/-/punycode.js-2.3.1.tgz", + "integrity": "sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/qs": { - "version": "6.15.0", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.0.tgz", - "integrity": "sha512-mAZTtNCeetKMH+pSjrb76NAM8V9a05I9aBZOHztWy/UqcJdQYNsf59vrRKWnojAT9Y+GbIvoTBC++CPHqpDBhQ==", + "version": "6.15.2", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.2.tgz", + "integrity": "sha512-Rzq0KEyX/w/tEybncDgdkZrJgVUsUMk3xjh3t5bv3S1HTAtg+uOYt72+ZfwiQwKdysThkTBdL/rTi6HDmX9Ddw==", "dev": true, "license": "BSD-3-Clause", "dependencies": { @@ -1803,6 +3497,27 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/queue-microtask": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", + "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/rc": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", @@ -1820,6 +3535,19 @@ "rc": "cli.js" } }, + "node_modules/rc-config-loader": { + "version": "4.1.4", + "resolved": "https://registry.npmjs.org/rc-config-loader/-/rc-config-loader-4.1.4.tgz", + "integrity": "sha512-3GiwEzklkbXTDp52UR5nT8iXgYAx1V9ZG/kDZT7p60u2GCv2XTwQq4NzinMoMpNtXhmt3WkhYXcj6HH8HdwCEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "js-yaml": "^4.1.1", + "json5": "^2.2.3", + "require-from-string": "^2.0.2" + } + }, "node_modules/read": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/read/-/read-1.0.7.tgz", @@ -1833,20 +3561,111 @@ "node": ">=0.8" } }, + "node_modules/read-pkg": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-9.0.1.tgz", + "integrity": "sha512-9viLL4/n1BJUCT1NXVTdS1jtm80yDEgR5T4yCelII49Mbj0v1rZdKqj7zCiYdbB0CuCgdrvHcNogAKTFPBocFA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/normalize-package-data": "^2.4.3", + "normalize-package-data": "^6.0.0", + "parse-json": "^8.0.0", + "type-fest": "^4.6.0", + "unicorn-magic": "^0.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/read-pkg/node_modules/unicorn-magic": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.1.0.tgz", + "integrity": "sha512-lRfVq8fE8gz6QMBuDM6a+LO3IAzTi05H6gCVaUpir2E1Rwpo4ZUog45KpNXKC/Mn3Yb9UDuHumeFTo9iV/D9FQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/readable-stream": { "version": "3.6.2", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", "dev": true, "license": "MIT", - "optional": true, + "optional": true, + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/reusify": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz", + "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==", + "dev": true, + "license": "MIT", + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" + } + }, + "node_modules/run-applescript": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-7.1.0.tgz", + "integrity": "sha512-DPe5pVFaAsinSaV6QjQ6gdiedWDcRCbUuiQfQa2wmWV7+xC9bGulGI8+TdRmoFkAPaBXk8CrAbnlY2ISniJ47Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/run-parallel": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", + "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", "dependencies": { - "inherits": "^2.0.3", - "string_decoder": "^1.1.1", - "util-deprecate": "^1.0.1" - }, - "engines": { - "node": ">= 6" + "queue-microtask": "^1.2.2" } }, "node_modules/safe-buffer": { @@ -1868,8 +3687,7 @@ "url": "https://feross.org/support" } ], - "license": "MIT", - "optional": true + "license": "MIT" }, "node_modules/safer-buffer": { "version": "2.1.2", @@ -1888,10 +3706,32 @@ "node": ">=11.0.0" } }, + "node_modules/secretlint": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/secretlint/-/secretlint-10.2.2.tgz", + "integrity": "sha512-xVpkeHV/aoWe4vP4TansF622nBEImzCY73y/0042DuJ29iKIaqgoJ8fGxre3rVSHHbxar4FdJobmTnLp9AU0eg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@secretlint/config-creator": "^10.2.2", + "@secretlint/formatter": "^10.2.2", + "@secretlint/node": "^10.2.2", + "@secretlint/profiler": "^10.2.2", + "debug": "^4.4.1", + "globby": "^14.1.0", + "read-pkg": "^9.0.1" + }, + "bin": { + "secretlint": "bin/secretlint.js" + }, + "engines": { + "node": ">=20.0.0" + } + }, "node_modules/semver": { - "version": "7.7.4", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", - "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "version": "7.8.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.1.tgz", + "integrity": "sha512-rkVq3IXh+4FDGch+KwzX3aV9W3kO54GyEgpvBzSyctDA6Xtd7RJQV1xmXbeQp5v7+VzLOfVqiutSE6GICgPFvg==", "dev": true, "license": "ISC", "bin": { @@ -1901,6 +3741,29 @@ "node": ">=10" } }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/side-channel": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", @@ -1922,14 +3785,14 @@ } }, "node_modules/side-channel-list": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", - "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.1.tgz", + "integrity": "sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==", "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", - "object-inspect": "^1.13.3" + "object-inspect": "^1.13.4" }, "engines": { "node": ">= 0.4" @@ -1977,6 +3840,19 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/simple-concat": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", @@ -2026,6 +3902,73 @@ "simple-concat": "^1.0.0" } }, + "node_modules/slash": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-5.1.0.tgz", + "integrity": "sha512-ZA6oR3T/pEyuqwMgAKT0/hAv8oAXckzbkmR0UkUosQ+Mc4RxGoJkRmwHgHufaenlyAgE1Mxgpdcrf75y6XcnDg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/slice-ansi": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz", + "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "astral-regex": "^2.0.0", + "is-fullwidth-code-point": "^3.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/slice-ansi?sponsor=1" + } + }, + "node_modules/spdx-correct": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.2.0.tgz", + "integrity": "sha512-kN9dJbvnySHULIluDHy32WHRUu3Og7B9sbY7tsFLctQkIqnMh3hErYgdMjTYuqmcXX+lK5T1lnUt3G7zNswmZA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "spdx-expression-parse": "^3.0.0", + "spdx-license-ids": "^3.0.0" + } + }, + "node_modules/spdx-exceptions": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/spdx-exceptions/-/spdx-exceptions-2.5.0.tgz", + "integrity": "sha512-PiU42r+xO4UbUS1buo3LPJkjlO7430Xn5SVAhdpzzsPHsjbYVflnnFdATgabnLude+Cqu25p6N+g2lw/PFsa4w==", + "dev": true, + "license": "CC-BY-3.0" + }, + "node_modules/spdx-expression-parse": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz", + "integrity": "sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "spdx-exceptions": "^2.1.0", + "spdx-license-ids": "^3.0.0" + } + }, + "node_modules/spdx-license-ids": { + "version": "3.0.23", + "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.23.tgz", + "integrity": "sha512-CWLcCCH7VLu13TgOH+r8p1O/Znwhqv/dbb6lqWy67G+pT1kHmeD/+V36AVb/vq8QMIQwVShJ6Ssl5FPh0fuSdw==", + "dev": true, + "license": "CC0-1.0" + }, "node_modules/string_decoder": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", @@ -2037,6 +3980,60 @@ "safe-buffer": "~5.2.0" } }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.2.0.tgz", + "integrity": "sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^6.2.2" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, "node_modules/strip-json-comments": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", @@ -2048,17 +4045,84 @@ "node": ">=0.10.0" } }, + "node_modules/structured-source": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/structured-source/-/structured-source-4.0.0.tgz", + "integrity": "sha512-qGzRFNJDjFieQkl/sVOI2dUjHKRyL9dAJi2gCPGJLbJHBIkyOHxjuocpIEfbLioX+qSJpvbYdT49/YCdMznKxA==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "boundary": "^2.0.0" + } + }, "node_modules/supports-color": { - "version": "5.5.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", - "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", "dev": true, "license": "MIT", "dependencies": { - "has-flag": "^3.0.0" + "has-flag": "^4.0.0" }, "engines": { - "node": ">=4" + "node": ">=8" + } + }, + "node_modules/supports-hyperlinks": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/supports-hyperlinks/-/supports-hyperlinks-3.2.0.tgz", + "integrity": "sha512-zFObLMyZeEwzAoKCyu1B91U79K2t7ApXuQfo8OuxwXLDgcKxuwM+YvcbIhm6QWqz7mHUH1TVytR1PwVVjEuMig==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0", + "supports-color": "^7.0.0" + }, + "engines": { + "node": ">=14.18" + }, + "funding": { + "url": "https://github.com/chalk/supports-hyperlinks?sponsor=1" + } + }, + "node_modules/table": { + "version": "6.9.0", + "resolved": "https://registry.npmjs.org/table/-/table-6.9.0.tgz", + "integrity": "sha512-9kY+CygyYM6j02t5YFHbNz2FN5QmYGv9zAjVp4lCDjlCw7amdckXlEt/bjMhUIfj4ThGRE4gCUH5+yGnNuPo5A==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "ajv": "^8.0.1", + "lodash.truncate": "^4.4.2", + "slice-ansi": "^4.0.0", + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/table/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/table/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" } }, "node_modules/tar-fs": { @@ -2093,16 +4157,76 @@ "node": ">=6" } }, + "node_modules/terminal-link": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/terminal-link/-/terminal-link-4.0.0.tgz", + "integrity": "sha512-lk+vH+MccxNqgVqSnkMVKx4VLJfnLjDBGzH16JVZjKE2DoxP57s6/vt6JmXV5I3jBcfGrxNrYtC+mPtU7WJztA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-escapes": "^7.0.0", + "supports-hyperlinks": "^3.2.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/text-table": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", + "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==", + "dev": true, + "license": "MIT" + }, + "node_modules/textextensions": { + "version": "6.11.0", + "resolved": "https://registry.npmjs.org/textextensions/-/textextensions-6.11.0.tgz", + "integrity": "sha512-tXJwSr9355kFJI3lbCkPpUH5cP8/M0GGy2xLO34aZCjMXBaK3SoPnZwr/oWmo1FdCnELcs4npdCIOFtq9W3ruQ==", + "dev": true, + "license": "Artistic-2.0", + "dependencies": { + "editions": "^6.21.0" + }, + "engines": { + "node": ">=4" + }, + "funding": { + "url": "https://bevry.me/fund" + } + }, "node_modules/tmp": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz", - "integrity": "sha512-voyz6MApa1rQGUxT3E+BK7/ROe8itEx7vD8/HEvt4xwXucvQ5G5oeEiHkmHZJuBO21RpOf+YYm9MOivj709jow==", + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.7.tgz", + "integrity": "sha512-e0votIpp4Uo2AJYSzVHV6xCcawuiez3DzqDAbrTc3YxBkplN6e+dM13ZeIcZnDg/QpSuU2zfZ3rzwY8ukEnaXw==", "dev": true, "license": "MIT", "engines": { "node": ">=14.14" } }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "dev": true, + "license": "0BSD" + }, "node_modules/tunnel": { "version": "0.0.6", "resolved": "https://registry.npmjs.org/tunnel/-/tunnel-0.0.6.tgz", @@ -2127,6 +4251,19 @@ "node": "*" } }, + "node_modules/type-fest": { + "version": "4.41.0", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", + "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==", + "dev": true, + "license": "(MIT OR CC0-1.0)", + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/typed-rest-client": { "version": "1.8.11", "resolved": "https://registry.npmjs.org/typed-rest-client/-/typed-rest-client-1.8.11.tgz", @@ -2140,9 +4277,9 @@ } }, "node_modules/typescript": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.2.tgz", - "integrity": "sha512-bGdAIrZ0wiGDo5l8c++HWtbaNCWTS4UTv7RaTH/ThVIgjkveJt83m74bBHMJkuCbslY8ixgLBVZJIOiQlQTjfQ==", + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.3.tgz", + "integrity": "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw==", "dev": true, "license": "Apache-2.0", "bin": { @@ -2154,9 +4291,9 @@ } }, "node_modules/uc.micro": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-1.0.6.tgz", - "integrity": "sha512-8Y75pvTYkLJW2hWQHXxoqRgV7qb9B+9vFEtidML+7koHUFapnVJAZ6cKs+Qjz5Aw3aZWHMC6u0wJE3At+nSGwA==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz", + "integrity": "sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A==", "dev": true, "license": "MIT" }, @@ -2168,9 +4305,9 @@ "license": "MIT" }, "node_modules/undici": { - "version": "7.24.7", - "resolved": "https://registry.npmjs.org/undici/-/undici-7.24.7.tgz", - "integrity": "sha512-H/nlJ/h0ggGC+uRL3ovD+G0i4bqhvsDOpbDv7At5eFLlj2b41L8QliGbnl2H7SnDiYhENphh1tQFJZf+MyfLsQ==", + "version": "7.26.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.26.0.tgz", + "integrity": "sha512-3O9Tf67pGhgOv9jM35AbhkXAKi13f3oy3aE4CSgr+TckGeY+/iu97ZXN+J7DpHPzLbVApFd1IFhcnBjREYXYcg==", "dev": true, "license": "MIT", "engines": { @@ -2178,12 +4315,35 @@ } }, "node_modules/undici-types": { - "version": "7.18.2", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", - "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", + "version": "7.24.6", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.24.6.tgz", + "integrity": "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==", "dev": true, "license": "MIT" }, + "node_modules/unicorn-magic": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.3.0.tgz", + "integrity": "sha512-+QBBXBCvifc56fsbuxZQ6Sic3wqqc3WWaqxs58gvJrcOuN83HGTCwz3oS5phzU9LthRNE9VrJCFCLUgHeeFnfA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 10.0.0" + } + }, "node_modules/url-join": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/url-join/-/url-join-4.0.1.tgz", @@ -2199,6 +4359,30 @@ "license": "MIT", "optional": true }, + "node_modules/validate-npm-package-license": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", + "integrity": "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "spdx-correct": "^3.0.0", + "spdx-expression-parse": "^3.0.0" + } + }, + "node_modules/version-range": { + "version": "4.15.0", + "resolved": "https://registry.npmjs.org/version-range/-/version-range-4.15.0.tgz", + "integrity": "sha512-Ck0EJbAGxHwprkzFO966t4/5QkRuzh+/I1RxhLgUKKwEn+Cd8NwM60mE3AqBZg5gYODoXW0EFsQvbZjRlvdqbg==", + "dev": true, + "license": "Artistic-2.0", + "engines": { + "node": ">=4" + }, + "funding": { + "url": "https://bevry.me/fund" + } + }, "node_modules/whatwg-encoding": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", @@ -2223,12 +4407,45 @@ "node": ">=18" } }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", "dev": true, - "license": "ISC" + "license": "ISC", + "optional": true + }, + "node_modules/wsl-utils": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/wsl-utils/-/wsl-utils-0.1.0.tgz", + "integrity": "sha512-h3Fbisa2nKGPxCpm89Hk33lBLsnaGBvctQopaBSOW/uIs6FTe1ATyAnKFJrzVs9vpGdsTe73WF3V4lIsk4Gacw==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-wsl": "^3.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } }, "node_modules/xml2js": { "version": "0.5.0", @@ -2262,14 +4479,17 @@ "license": "ISC" }, "node_modules/yauzl": { - "version": "2.10.0", - "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz", - "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-3.3.1.tgz", + "integrity": "sha512-RNPCUkiE/ZgO4w8i9U5yDQVHaFDdnzaFANElRvpJteCspvmv2VqrRb9lvS6odVD+jqI/zDsxAHJVsafpcheVQQ==", "dev": true, "license": "MIT", "dependencies": { "buffer-crc32": "~0.2.3", - "fd-slicer": "~1.1.0" + "pend": "~1.2.0" + }, + "engines": { + "node": ">=12" } }, "node_modules/yazl": { diff --git a/extensions/vscode-codeclone/package.json b/extensions/vscode-codeclone/package.json index fdce31b9..f0313dbc 100644 --- a/extensions/vscode-codeclone/package.json +++ b/extensions/vscode-codeclone/package.json @@ -2,7 +2,7 @@ "name": "codeclone", "displayName": "CodeClone", "description": "Baseline-aware, triage-first structural review for Python, powered by CodeClone MCP.", - "version": "0.2.7", + "version": "0.3.0", "publisher": "orenlab", "license": "MPL-2.0", "repository": { @@ -359,6 +359,18 @@ "title": "Open Overview", "category": "CodeClone" }, + { + "command": "codeclone.showBlastRadius", + "title": "Show Blast Radius", + "category": "CodeClone", + "icon": "$(target)" + }, + { + "command": "codeclone.copyBlastRadiusBrief", + "title": "Copy Blast Radius Brief", + "category": "CodeClone", + "icon": "$(copy)" + }, { "command": "codeclone.clearSessionState", "title": "Clear Session", @@ -447,6 +459,10 @@ { "command": "codeclone.reviewSecuritySurface", "when": "false" + }, + { + "command": "codeclone.copyBlastRadiusBrief", + "when": "false" } ], "view/title": [ @@ -683,6 +699,16 @@ "command": "codeclone.copySecuritySurfaceBrief", "when": "editorTextFocus && codeclone.activeReviewTargetVisibleInEditor && codeclone.activeReviewTargetIsSecuritySurface", "group": "secondary@4" + }, + { + "command": "codeclone.showBlastRadius", + "when": "editorTextFocus && codeclone.hasRun && isWorkspaceTrusted", + "group": "secondary@6" + }, + { + "command": "codeclone.copyBlastRadiusBrief", + "when": "editorTextFocus && codeclone.hasRun && isWorkspaceTrusted", + "group": "secondary@7" } ] }, @@ -832,10 +858,10 @@ } }, "devDependencies": { - "@types/node": "^25.5.2", + "@types/node": "^25.9.1", "@types/vscode": "1.100.0", - "@vscode/vsce": "2.25.0", + "@vscode/vsce": "3.9.1", "esbuild": "^0.28.0", - "typescript": "^6.0.2" + "typescript": "^6.0.3" } } diff --git a/extensions/vscode-codeclone/src/extension.js b/extensions/vscode-codeclone/src/extension.js index 74a2e93c..913ef6c9 100644 --- a/extensions/vscode-codeclone/src/extension.js +++ b/extensions/vscode-codeclone/src/extension.js @@ -1,5 +1,6 @@ "use strict"; +const crypto = require("node:crypto"); const fs = require("node:fs/promises"); const path = require("node:path"); /** @type {any} */ @@ -67,6 +68,8 @@ const { const {CodeCloneMcpClient, MCPClientError} = require("./mcpClient"); const { markdownBulletList, + renderBlastRadiusMarkdown, + renderBlastRadiusSvgHtml, renderCoverageJoinMarkdown, renderFindingMarkdown, renderOverloadedModuleMarkdown, @@ -377,6 +380,12 @@ class CodeCloneController { vscode.commands.registerCommand("codeclone.reviewSecuritySurface", (node) => this.reviewSecuritySurface(node) ), + vscode.commands.registerCommand("codeclone.showBlastRadius", () => + this.showBlastRadius() + ), + vscode.commands.registerCommand("codeclone.copyBlastRadiusBrief", () => + this.copyBlastRadiusBrief() + ), ]; this.context.subscriptions.push(...subscriptions); } @@ -2786,6 +2795,122 @@ class CodeCloneController { ); } + async showBlastRadius() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + if (!(await this.ensureWorkspaceTrust())) { + return; + } + const state = this.getWorkspaceState(folder); + if (!state.currentRunId) { + const choice = await vscode.window.showInformationMessage( + "No CodeClone run is available. Analyze the workspace first.", + "Analyze Workspace" + ); + if (choice === "Analyze Workspace") { + await this.analyzeWorkspace(); + } + return; + } + const files = this.resolveBlastRadiusFiles(folder); + if (files.length === 0) { + const input = await vscode.window.showInputBox({ + title: "Blast Radius", + prompt: "Enter a workspace-relative file path", + placeHolder: "src/module.py", + }); + if (!input || !input.trim()) { + return; + } + files.push(input.trim()); + } + try { + await this.ensureConnected(folder); + const payload = await this.client.callTool("get_blast_radius", { + files, + run_id: state.currentRunId, + depth: "transitive", + }); + const nonce = crypto.randomBytes(16).toString("hex"); + const panel = vscode.window.createWebviewPanel( + "codeclone.blastRadius", + `Blast Radius: ${files.map((f) => path.basename(f)).join(", ")}`, + vscode.ViewColumn.Beside, + { + enableScripts: false, + localResourceRoots: [], + } + ); + panel.iconPath = new vscode.ThemeIcon("target"); + panel.webview.html = renderBlastRadiusSvgHtml( + payload, + folder.name, + nonce + ); + } catch (error) { + this.handleError(error, "Could not compute blast radius."); + } + } + + async copyBlastRadiusBrief() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + const state = this.getWorkspaceState(folder); + if (!state.currentRunId) { + await vscode.window.showInformationMessage( + "No CodeClone run is available. Analyze the workspace first." + ); + return; + } + const files = this.resolveBlastRadiusFiles(folder); + if (files.length === 0) { + const input = await vscode.window.showInputBox({ + title: "Blast Radius Brief", + prompt: "Enter a workspace-relative file path", + placeHolder: "src/module.py", + }); + if (!input || !input.trim()) { + return; + } + files.push(input.trim()); + } + try { + await this.ensureConnected(folder); + const payload = await this.client.callTool("get_blast_radius", { + files, + run_id: state.currentRunId, + depth: "transitive", + }); + const brief = renderBlastRadiusMarkdown(payload, folder.name); + await vscode.env.clipboard.writeText(brief); + await vscode.window.showInformationMessage( + `Copied blast radius brief for ${files.join(", ")}.` + ); + } catch (error) { + this.handleError(error, "Could not compute blast radius for brief."); + } + } + + /** + * @param {any} folder + * @returns {string[]} + */ + resolveBlastRadiusFiles(folder) { + const editor = vscode.window.activeTextEditor; + if (!editor) { + return []; + } + const relativePath = workspaceRelativePath(folder, editor.document.uri.fsPath); + if (relativePath && !relativePath.startsWith("..")) { + return [relativePath]; + } + return []; + } + async clearSessionState() { const folder = this.getPreferredFolder(); if (!folder) { diff --git a/extensions/vscode-codeclone/src/renderers.js b/extensions/vscode-codeclone/src/renderers.js index 9b254896..e32352ff 100644 --- a/extensions/vscode-codeclone/src/renderers.js +++ b/extensions/vscode-codeclone/src/renderers.js @@ -381,8 +381,348 @@ function renderSecuritySurfaceMarkdown(item) { ].join("\n"); } +function escapeHtml(text) { + return String(text) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +function blastRadiusFileListSection(title, items, open) { + if (items.length === 0) { + return ""; + } + const openAttr = open ? " open" : ""; + const listItems = items + .map((f) => `
  • ${escapeHtml(f)}
  • `) + .join(""); + return `${escapeHtml(title)} (${items.length})
      ${listItems}
    `; +} + +function renderBlastRadiusMarkdown(payload, workspaceName) { + const origin = safeArray(payload.origin); + const direct = safeArray(payload.direct_dependents); + const transitive = safeArray(payload.transitive_dependents); + const cloneCohort = safeArray(payload.clone_cohort_members); + const inCycle = safeArray(payload.in_dependency_cycle); + const risk = safeObject(payload.structural_risk); + const doNotTouch = safeArray(payload.do_not_touch); + const reviewContext = safeArray(payload.review_context); + const guardrails = safeArray(payload.guardrails); + const radiusLevel = capitalize(String(payload.radius_level || "unknown")); + + const lines = [ + "# Blast Radius", + "", + `- Run: \`${payload.run_id || "unknown"}\``, + `- Workspace: \`${workspaceName || "unknown"}\``, + `- Depth: ${payload.depth || "direct"}`, + `- Radius level: **${radiusLevel}**`, + `- Origin: ${origin.length} files`, + `- Direct dependents: ${direct.length}`, + `- Transitive dependents: ${transitive.length}`, + `- Clone cohort: ${cloneCohort.length}`, + ]; + if (origin.length > 0) { + lines.push( + "", + "## Origin files", + markdownBulletList(origin.map((f) => `\`${f}\``)) + ); + } + if (direct.length > 0) { + lines.push( + "", + "## Direct dependents", + markdownBulletList(direct.map((f) => `\`${f}\``)) + ); + } + if (transitive.length > 0) { + lines.push( + "", + "## Transitive dependents", + markdownBulletList(transitive.map((f) => `\`${f}\``)) + ); + } + if (cloneCohort.length > 0) { + lines.push( + "", + "## Clone cohort members", + markdownBulletList(cloneCohort.map((f) => `\`${f}\``)) + ); + } + if (inCycle.length > 0) { + lines.push( + "", + "## In dependency cycle", + markdownBulletList(inCycle.map((f) => `\`${f}\``)) + ); + } + const riskEntries = Object.entries(risk).filter( + ([, paths]) => safeArray(paths).length > 0 + ); + if (riskEntries.length > 0) { + lines.push("", "## Structural risk"); + for (const [key, paths] of riskEntries) { + lines.push( + "", + `### ${humanizeIdentifier(key)}`, + markdownBulletList(safeArray(paths).map((f) => `\`${f}\``)) + ); + } + } + if (doNotTouch.length > 0) { + lines.push( + "", + "## Do not touch", + markdownBulletList( + doNotTouch.map( + (e) => + `\`${safeObject(e).path}\` — ${safeObject(e).reason}` + ) + ) + ); + } + if (reviewContext.length > 0) { + lines.push( + "", + "## Review context", + markdownBulletList( + reviewContext.map( + (e) => + `\`${safeObject(e).path}\` — ${safeObject(e).reason}` + ) + ) + ); + } + if (guardrails.length > 0) { + lines.push("", "## Guardrails", markdownBulletList(guardrails)); + } + return lines.join("\n"); +} + +function renderBlastRadiusSvgHtml(payload, workspaceName, nonce) { + const origin = safeArray(payload.origin); + const direct = safeArray(payload.direct_dependents); + const transitive = safeArray(payload.transitive_dependents); + const cloneCohort = safeArray(payload.clone_cohort_members); + const inCycle = safeArray(payload.in_dependency_cycle); + const risk = safeObject(payload.structural_risk); + const doNotTouch = safeArray(payload.do_not_touch); + const reviewContext = safeArray(payload.review_context); + const guardrails = safeArray(payload.guardrails); + const radiusLevel = String(payload.radius_level || "unknown").toLowerCase(); + const depth = String(payload.depth || "direct"); + const runId = String(payload.run_id || "unknown"); + + const hasDirect = direct.length > 0; + const hasTransitive = transitive.length > 0; + const hasClones = cloneCohort.length > 0; + + const cx = hasClones ? 260 : 300; + const cy = 170; + const originR = 50; + const directR = hasDirect ? 105 : 0; + const transitiveR = hasTransitive ? 155 : 0; + const outerR = transitiveR || directR || originR; + const svgWidth = hasClones ? 600 : 520; + + let svgContent = ""; + + if (hasTransitive) { + svgContent += ``; + svgContent += `Transitive (${transitive.length})`; + } + if (hasDirect) { + svgContent += ``; + svgContent += `Direct (${direct.length})`; + } + svgContent += ``; + svgContent += `Origin`; + svgContent += `${origin.length} file${origin.length !== 1 ? "s" : ""}`; + + if (hasClones) { + const boxX = cx + outerR + 30; + const boxW = Math.max(svgWidth - boxX - 10, 80); + svgContent += ``; + svgContent += `Clone cohort`; + svgContent += `${cloneCohort.length}`; + svgContent += ``; + } + + if (inCycle.length > 0) { + svgContent += ``; + svgContent += `${inCycle.length} in cycle`; + } + + const legendY = cy + outerR + 25; + let legendX = 20; + const legendItems = [{cssClass: "ring-origin", label: "Origin"}]; + if (hasDirect) { + legendItems.push({cssClass: "ring-direct", label: "Direct"}); + } + if (hasTransitive) { + legendItems.push({cssClass: "ring-transitive", label: "Transitive"}); + } + if (hasClones) { + legendItems.push({cssClass: "clone-box", label: "Clones"}); + } + + for (const item of legendItems) { + svgContent += ``; + svgContent += `${escapeHtml(item.label)}`; + legendX += 18 + item.label.length * 7 + 16; + } + + const svgHeight = legendY + 30; + + const svg = [ + ``, + svgContent, + "", + ].join(""); + + const detailSections = []; + detailSections.push(blastRadiusFileListSection("Origin files", origin, true)); + if (hasDirect) { + detailSections.push(blastRadiusFileListSection("Direct dependents", direct, false)); + } + if (hasTransitive) { + detailSections.push(blastRadiusFileListSection("Transitive dependents", transitive, false)); + } + if (hasClones) { + detailSections.push(blastRadiusFileListSection("Clone cohort members", cloneCohort, false)); + } + if (inCycle.length > 0) { + detailSections.push(blastRadiusFileListSection("In dependency cycle", inCycle, false)); + } + + const riskEntries = Object.entries(risk).filter( + ([, paths]) => safeArray(paths).length > 0 + ); + if (riskEntries.length > 0) { + let riskHtml = "

    Structural risk

    "; + for (const [key, paths] of riskEntries) { + const riskClass = key.includes("complexity") + ? "risk-high" + : key.includes("coverage") + ? "risk-coverage" + : key.includes("overloaded") + ? "risk-overloaded" + : "risk-high"; + riskHtml += `

    ${escapeHtml(humanizeIdentifier(key))}

    `; + for (const p of safeArray(paths)) { + riskHtml += `
    ${escapeHtml(p)}
    `; + } + riskHtml += "
    "; + } + detailSections.push(riskHtml); + } + + if (doNotTouch.length > 0) { + let html = `

    Do not touch (${doNotTouch.length})

    `; + for (const entry of doNotTouch) { + const e = safeObject(entry); + html += `
    ${escapeHtml(e.path)}`; + html += ` — ${escapeHtml(e.reason)}
    `; + } + detailSections.push(html); + } + + if (reviewContext.length > 0) { + let html = `

    Review context (${reviewContext.length})

    `; + for (const entry of reviewContext) { + const e = safeObject(entry); + html += `
    ${escapeHtml(e.path)}`; + html += ` — ${escapeHtml(e.reason)}
    `; + } + detailSections.push(html); + } + + let guardrailsHtml = ""; + if (guardrails.length > 0) { + const items = guardrails.map((g) => `
  • ${escapeHtml(g)}
  • `).join(""); + guardrailsHtml = `

    Guardrails

      ${items}
    `; + } + + return [ + "", + '', + "", + '', + '', + ``, + `", + "", + "", + '
    ', + "

    Blast Radius

    ", + `${escapeHtml(capitalize(radiusLevel))}`, + "
    ", + '
    ', + `Run: ${escapeHtml(runId)}`, + `Depth: ${escapeHtml(depth)}`, + `Workspace: ${escapeHtml(workspaceName)}`, + "
    ", + `
    ${svg}
    `, + detailSections.filter(Boolean).join("\n"), + guardrailsHtml, + "", + "", + ].join("\n"); +} + module.exports = { markdownBulletList, + renderBlastRadiusMarkdown, + renderBlastRadiusSvgHtml, renderFindingMarkdown, renderCoverageJoinMarkdown, renderOverloadedModuleMarkdown, diff --git a/extensions/vscode-codeclone/test/renderers.test.js b/extensions/vscode-codeclone/test/renderers.test.js index 6110b251..bb15071e 100644 --- a/extensions/vscode-codeclone/test/renderers.test.js +++ b/extensions/vscode-codeclone/test/renderers.test.js @@ -23,6 +23,8 @@ const { formatBaselineTags, } = require("../src/formatters"); const { + renderBlastRadiusMarkdown, + renderBlastRadiusSvgHtml, renderCoverageJoinMarkdown, renderSecuritySurfaceMarkdown, renderOverloadedModuleMarkdown, @@ -158,3 +160,110 @@ test("renderCoverageJoinMarkdown explains joined coverage review context", () => assert.match(markdown, /Coverage: 42%/); assert.match(markdown, /joined coverage review context/); }); + +test("renderBlastRadiusMarkdown produces a structured blast radius brief", () => { + const markdown = renderBlastRadiusMarkdown( + { + run_id: "abc123", + origin: ["src/core.py", "src/utils.py"], + depth: "transitive", + radius_level: "medium", + direct_dependents: ["src/cli.py", "src/api.py"], + transitive_dependents: ["tests/test_cli.py"], + clone_cohort_members: ["src/compat.py"], + in_dependency_cycle: [], + structural_risk: { + high_complexity_in_blast_zone: ["src/cli.py"], + high_coupling_in_blast_zone: [], + low_coverage_in_blast_zone: [], + overloaded_modules_in_blast_zone: [], + }, + do_not_touch: [ + { + path: "codeclone.baseline.json", + reason: "baseline state requires separate changes", + category: "baseline_or_generated_state", + severity: "hard", + }, + ], + review_context: [], + guardrails: [ + "review direct dependents before editing public behavior", + ], + }, + "demo-repo" + ); + + assert.match(markdown, /# Blast Radius/); + assert.match(markdown, /Run: `abc123`/); + assert.match(markdown, /Workspace: `demo-repo`/); + assert.match(markdown, /Radius level: \*\*Medium\*\*/); + assert.match(markdown, /Origin: 2 files/); + assert.match(markdown, /Direct dependents: 2/); + assert.match(markdown, /Transitive dependents: 1/); + assert.match(markdown, /Clone cohort: 1/); + assert.match(markdown, /`src\/core.py`/); + assert.match(markdown, /`src\/cli.py`/); + assert.match(markdown, /`src\/compat.py`/); + assert.match(markdown, /High complexity in blast zone/); + assert.match(markdown, /codeclone\.baseline\.json/); + assert.match(markdown, /review direct dependents/); +}); + +test("renderBlastRadiusSvgHtml produces valid HTML with SVG and CSP", () => { + const html = renderBlastRadiusSvgHtml( + { + run_id: "def456", + origin: ["src/engine.py"], + depth: "direct", + radius_level: "low", + direct_dependents: ["src/runner.py"], + transitive_dependents: [], + clone_cohort_members: [], + in_dependency_cycle: [], + structural_risk: {}, + do_not_touch: [], + review_context: [], + guardrails: [], + }, + "test-workspace", + "abc123nonce" + ); + + assert.match(html, //); + assert.match(html, /nonce-abc123nonce/); + assert.match(html, /Blast Radius/); + assert.match(html, /badge-low/); + assert.match(html, / { + const html = renderBlastRadiusSvgHtml( + { + run_id: "esc1", + origin: ['src/.py'], + depth: "direct", + radius_level: "high", + direct_dependents: [], + transitive_dependents: [], + clone_cohort_members: [], + in_dependency_cycle: [], + structural_risk: {}, + do_not_touch: [], + review_context: [], + guardrails: [], + }, + "xss-test", + "safenonce" + ); + + assert.doesNotMatch(html, /
    Workspace Intent Registry"] end - MCPSession -->|"writes coordination records"| Disk - MCPSession -->|"never writes"| BL[Baselines] - MCPSession -->|"never writes"| CA[Cache] - MCPSession -->|"never writes"| RP[Reports] - MCPSession -->|"never writes"| SC[Source Files] - - style BL fill:#fee2e2 - style CA fill:#fee2e2 - style RP fill:#fee2e2 - style SC fill:#fee2e2 + MCPSession -->|" writes coordination records "| Disk + MCPSession -->|" never writes "| BL[Baselines] + MCPSession -->|" never writes "| CA[Cache] + MCPSession -->|" never writes "| RP[Reports] + MCPSession -->|" never writes "| SC[Source Files] + style BL fill: #fee2e2 + style CA fill: #fee2e2 + style RP fill: #fee2e2 + style SC fill: #fee2e2 ``` ### Mixin chain @@ -79,15 +77,13 @@ graph BT RR["_MCPSessionReviewReceiptMixin
    audit receipt composition"] CG["_MCPSessionClaimGuardMixin
    citation-based validation"] S["MCPSession"] - F --> CP --> AA --> RSB --> SM --> RPM --> STM --> BR --> IM --> PC --> RR --> CG --> S - - style S stroke:#6366f1,stroke-width:2px - style CG fill:#f0fdf4 - style RR fill:#f0fdf4 - style PC fill:#f0fdf4 - style IM fill:#f0fdf4 - style BR fill:#f0fdf4 + style S stroke: #6366f1, stroke-width: 2px + style CG fill: #f0fdf4 + style RR fill: #f0fdf4 + style PC fill: #f0fdf4 + style IM fill: #f0fdf4 + style BR fill: #f0fdf4 ``` --- @@ -192,9 +188,9 @@ codeclone-mcp --transport streamable-http --host 127.0.0.1 --port 8000 ``` !!! warning "Remote exposure is opt-in" - Non-loopback hosts require `--allow-remote`. The built-in HTTP server - has no authentication. Use it only on trusted networks or behind an - authenticated reverse proxy. +Non-loopback hosts require `--allow-remote`. The built-in HTTP server +has no authentication. Use it only on trusted networks or behind an +authenticated reverse proxy. ### Run retention @@ -236,8 +232,8 @@ stored runs. | `compare_runs` | Run-to-run delta: regressions, improvements, health change | !!! tip "Start here" - After analysis, call `get_run_summary` or `get_production_triage` first. - Prefer `list_hotspots` or `check_*` before broad `list_findings` calls. +After analysis, call `get_run_summary` or `get_production_triage` first. +Prefer `list_hotspots` or `check_*` before broad `list_findings` calls. ### Phase 3: Drill down @@ -273,71 +269,63 @@ sequenceDiagram participant A as Agent participant M as MCP Server participant D as Disk Registry - - A->>M: list_workspace(root) - M->>D: read .cache/codeclone/intents/ - D-->>M: active intents - M-->>A: workspace state - - A->>M: analyze_repository(root) - M-->>A: run registered - - A->>M: declare(scope, intent) - M->>D: write intent record - M-->>A: intent_id, blast_radius, concurrent_intents - - A->>M: get_blast_radius(files) - M-->>A: do_not_touch, review_context - - A->>M: check_patch_contract(mode=budget) - M-->>A: regression budget, headroom - + A ->> M: list_workspace(root) + M ->> D: read .cache/codeclone/intents/ + D -->> M: active intents + M -->> A: workspace state + A ->> M: analyze_repository(root) + M -->> A: run registered + A ->> M: declare(scope, intent) + M ->> D: write intent record + M -->> A: intent_id, blast_radius, concurrent_intents + A ->> M: get_blast_radius(files) + M -->> A: do_not_touch, review_context + A ->> M: check_patch_contract(mode=budget) + M -->> A: regression budget, headroom Note over A: Edit files within scope opt Long edit or test run - A->>M: renew(intent_id, lease_seconds) - M->>D: update lease timestamp - M-->>A: lease_renewed + A ->> M: renew(intent_id, lease_seconds) + M ->> D: update lease timestamp + M -->> A: lease_renewed end - A->>M: analyze_repository(root) - M-->>A: after_run_id registered - - A->>M: check(intent_id, changed_files or diff_ref) + A ->> M: analyze_repository(root) + M -->> A: after_run_id registered + A ->> M: check(intent_id, changed_files or diff_ref) Note right of M: intent stays on before-run, changed scope is explicit - M-->>A: clean / expanded / violated - - A->>M: check_patch_contract(mode=verify, before_run_id, after_run_id, intent_id) - M-->>A: accepted / violated - - A->>M: validate_review_claims(text) - M-->>A: valid / violations - - A->>M: create_review_receipt - M-->>A: audit artifact - - A->>M: clear - M->>D: remove intent record + M -->> A: clean / expanded / violated + A ->> M: check_patch_contract(mode=verify, before_run_id, after_run_id, intent_id) + M -->> A: accepted / violated + A ->> M: validate_review_claims(text) + M -->> A: valid / violations + A ->> M: create_review_receipt + M -->> A: audit artifact + A ->> M: clear + M ->> D: remove intent record ``` -| Tool | Purpose | -|--------------------------|---------------------------------------------------------------------------------------------| +| Tool | Purpose | +|--------------------------|-------------------------------------------------------------------------------------------------------------| | `manage_change_intent` | Intent lifecycle: declare, get, check, clear, renew, list_workspace, gc_workspace, recover, reset_workspace | -| `get_blast_radius` | Pre-change risk boundary: dependents, clone cohorts, do-not-touch, review context | -| `check_patch_contract` | Budget query (`mode=budget`) or post-edit verification (`mode=verify`) | -| `create_review_receipt` | Deterministic audit artifact: provenance, scope, reviewed findings, patch status | -| `validate_review_claims` | Citation-based overclaim detection against stored run semantics | +| `get_blast_radius` | Pre-change risk boundary: dependents, clone cohorts, do-not-touch, review context | +| `check_patch_contract` | Budget query (`mode=budget`) or post-edit verification (`mode=verify`) | +| `create_review_receipt` | Deterministic audit artifact: provenance, scope, reviewed findings, patch status, verification profile | +| `validate_review_claims` | Citation-based overclaim detection against stored run semantics | ??? info "Blast radius: do_not_touch vs review_context" - `do_not_touch` contains actionable edit prohibitions: baselines, generated - state, forbidden paths. `review_context` contains report-only signals: - security boundary inventory, overloaded-module candidates, known baseline - debt. Review context is information, not an edit ban. +`do_not_touch` contains actionable edit prohibitions: baselines, generated +state, forbidden paths. `review_context` contains report-only signals: +security boundary inventory, overloaded-module candidates, known baseline +debt. Review context is information, not an edit ban. ??? info "Patch contract modes" - **Budget** reads one stored run and optional intent. Shows regression - headroom per quality dimension before editing. **Verify** compares explicit - before/after stored runs, previews gates, validates scope, and reports - baseline-abuse signals. Missing runs return `status=unverified`. +**Budget** reads one stored run and optional intent. Shows regression +headroom per quality dimension before editing. **Verify** compares explicit +before/after stored runs, previews gates, validates scope, and reports +baseline-abuse signals. Verify derives a **verification profile** from +changed files — docs-only and non-Python patches skip structural checks; +Python source changes require a full after-run. Missing runs return +`status=unverified`. ### Phase 6: Session management @@ -460,15 +448,15 @@ Separate accepted baseline debt from new regressions. ``` !!! tip "Best practices" - - Use `analyze_changed_paths` for PRs, not full analysis. - - Prefer `get_run_summary` or `get_production_triage` as the first pass. - - Prefer `list_hotspots` or narrow `check_*` tools before broad `list_findings`. - - Use `get_finding` / `get_remediation` for one finding instead of raising - `detail_level` on larger lists. - - Pass an absolute `root` — MCP rejects relative roots like `.`. - - Use `coverage_xml` only with `analysis_mode="full"`. - - Use `source_kind="production-only"` to cut test/fixture noise. - - Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions. +- Use `analyze_changed_paths` for PRs, not full analysis. +- Prefer `get_run_summary` or `get_production_triage` as the first pass. +- Prefer `list_hotspots` or narrow `check_*` tools before broad `list_findings`. +- Use `get_finding` / `get_remediation` for one finding instead of raising +`detail_level` on larger lists. +- Pass an absolute `root` — MCP rejects relative roots like `.`. +- Use `coverage_xml` only with `analysis_mode="full"`. +- Use `source_kind="production-only"` to cut test/fixture noise. +- Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions. --- diff --git a/uv.lock b/uv.lock index 217567ce..453483b5 100644 --- a/uv.lock +++ b/uv.lock @@ -3,7 +3,8 @@ revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.15'", - "python_full_version < '3.15'", + "python_full_version >= '3.11' and python_full_version < '3.15'", + "python_full_version < '3.11'", ] [[package]] @@ -381,115 +382,115 @@ wheels = [ [[package]] name = "coverage" -version = "7.14.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/23/7f/d0720730a397a999ffc0fd3f5bebef347338e3a47b727da66fbb228e2ff2/coverage-7.14.0.tar.gz", hash = "sha256:057a6af2f160a85384cde4ab36f0d2777bae1057bae255f95413cdd382aa5c74", size = 919489, upload-time = "2026-05-10T18:02:31.397Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/59/9d/7c83ef51c3eb495f10010094e661833588b7709946da634c8b66520b97c7/coverage-7.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:84c32d90bf4537f0e7b4dec9aaa9a938fb8205136b9d2ecf4d7629d5262dc075", size = 219668, upload-time = "2026-05-10T17:59:23.106Z" }, - { url = "https://files.pythonhosted.org/packages/24/34/898546aefbd28f0af131201d0dc852c9e976f817bd7d5bfb8dc4e02863bb/coverage-7.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7c843572c605ab51cfdb5c6b5f2586e2a8467c0d28eca4bdef4ec70c5fecbd82", size = 220192, upload-time = "2026-05-10T17:59:26.095Z" }, - { url = "https://files.pythonhosted.org/packages/df/4a/b457c88aca72b0df13a98167ebd5d947135ccd9881ea88ce6a570e13aa9b/coverage-7.14.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0c451757d3fa2603354fdc789b5e58a0e327a117c370a40e3476ba4eabab228c", size = 246932, upload-time = "2026-05-10T17:59:27.806Z" }, - { url = "https://files.pythonhosted.org/packages/b5/d9/92600e89486fd074c50f0117422b2c9592c3e144e2f25bd5ac0bc62bc7a0/coverage-7.14.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3fd43f0616e765ab78d069cf8358def7363957a45cee446d65c502dcfeea7893", size = 248762, upload-time = "2026-05-10T17:59:29.479Z" }, - { url = "https://files.pythonhosted.org/packages/0d/e1/9ea1eb9c311da7f15853559dc1d9d82bef88ecd3e59fbeb51f16bc2ffa91/coverage-7.14.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:731e535b1498b27d13594a0527a79b0510867b0ad891532be41cb883f2128e20", size = 250625, upload-time = "2026-05-10T17:59:31.33Z" }, - { url = "https://files.pythonhosted.org/packages/a5/03/57afca1b8106f8549a5329139315041fe166d6099bd9381346b9430dfbd1/coverage-7.14.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c7492f2d493b976941c7ca050f273cbda2f43c381124f7586a3e3c16d1804fec", size = 252539, upload-time = "2026-05-10T17:59:32.692Z" }, - { url = "https://files.pythonhosted.org/packages/57/5e/2e9fc63c9928119c1dbae02222be51407d3e7ebac5811ebbda4af3557795/coverage-7.14.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:dc38367eaa2abb1b766ac333142bce7655335a73537f5c8b75aaa89c2b987757", size = 247636, upload-time = "2026-05-10T17:59:34.599Z" }, - { url = "https://files.pythonhosted.org/packages/f0/e2/0b7898cda21041cc67546e19b80ba66cbbb47cbece52a76a5904de6a3aaf/coverage-7.14.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0a951308cde22cf77f953955a754d04dccb57fe3bb8e345d685778ed9fc1632a", size = 248666, upload-time = "2026-05-10T17:59:36.232Z" }, - { url = "https://files.pythonhosted.org/packages/d6/e3/d33662a2fdaef23229c15921f39c84ec38441f3069ba26e134ed402c833b/coverage-7.14.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fab3877e4ebb06bd9d4d4d00ee53309ee5478e66873c66a382272e3ee33eb7ea", size = 246670, upload-time = "2026-05-10T17:59:38.029Z" }, - { url = "https://files.pythonhosted.org/packages/99/b2/533942c3bfbf6770b5c32d7f2ff029fe013dba31f3fe8b45cabbb250365e/coverage-7.14.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:b812eb847b19876ebf33fb6c4f11819af05ab6050b0bfa1bc53412ae81779adb", size = 250484, upload-time = "2026-05-10T17:59:39.974Z" }, - { url = "https://files.pythonhosted.org/packages/d8/00/15acbad83a96de13c73831486c7627bfed73dfaec53b04e4a6315edf3fd8/coverage-7.14.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d9c8ef6ed820c433de075657d72dda1f89a2984955e58b8a75feb3f184250218", size = 246942, upload-time = "2026-05-10T17:59:41.659Z" }, - { url = "https://files.pythonhosted.org/packages/70/db/cef0228de493f2c740c760a9057a61d00c6849480073b70a75b87c7d4bab/coverage-7.14.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d128b1bba9361fbaaf6a19e179e6cfd6a9103ce0c0555876f72780acc93efd85", size = 247544, upload-time = "2026-05-10T17:59:43.471Z" }, - { url = "https://files.pythonhosted.org/packages/77/a0/d9ef8e148f3025c2ae8401d77cda1502b6d2a4d8102603a8af31460aedb6/coverage-7.14.0-cp310-cp310-win32.whl", hash = "sha256:65f267ca1370726ec2c1aa38bbe4df9a71a740f22878d2d4bf59d71a4cd8d323", size = 222285, upload-time = "2026-05-10T17:59:44.908Z" }, - { url = "https://files.pythonhosted.org/packages/85/c0/30c454c7d3cf47b2805d4e06f12443f5eece8a5d030d3b0350e7b74ecb49/coverage-7.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:b34ece8065914f938ed7f2c5872bb865336977a52919149846eac3744327267a", size = 223215, upload-time = "2026-05-10T17:59:46.779Z" }, - { url = "https://files.pythonhosted.org/packages/fc/e4/649c8d4f7f1709b6dbfc474358aa1bba02f67bcd52e2fec291a5014006cd/coverage-7.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a78e2a9d9c5e3b8d4ab9b9d28c985ea66fced0a7d7c2aec1f216e03a2011480", size = 219795, upload-time = "2026-05-10T17:59:48.198Z" }, - { url = "https://files.pythonhosted.org/packages/7f/8d/46692d24b3f395d4cbf17bfcc57136b4f2f9c0c0df864b0bddfc1d71a014/coverage-7.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a1816c505187592dcd1c5a5f226601a549f70365fbd00930ac88b0c225b76bb4", size = 220299, upload-time = "2026-05-10T17:59:49.683Z" }, - { url = "https://files.pythonhosted.org/packages/12/c2/a40f5cb295bbcbb697a76947a56081c494c61950366294ee426ffe261099/coverage-7.14.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d8e1762f0e9cbc26ec315471e7b47855218e833cd5a032d706fbf43845d878c7", size = 250721, upload-time = "2026-05-10T17:59:51.494Z" }, - { url = "https://files.pythonhosted.org/packages/fd/35/202235eb5c3c14c212462cd91d61b7386bf8fc44bc7a77f4742d2a69174b/coverage-7.14.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9336e23e8bb3a3925398261385e2a1533957d3e760e91070dcb0e98bfa514eed", size = 252633, upload-time = "2026-05-10T17:59:53.244Z" }, - { url = "https://files.pythonhosted.org/packages/bb/80/5f596e8995785124ee191c42535664c5e62c65995b66f4ca21e28ae04c81/coverage-7.14.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cd1169b2230f9cbe9c638ba38022ed7a2b1e641cc07f7cea0365e4be2a74980", size = 254743, upload-time = "2026-05-10T17:59:55.021Z" }, - { url = "https://files.pythonhosted.org/packages/1e/6d/0d178825be2350f0adb27984d0aa7cf84bbdab201f6fb926b535d23a8f5f/coverage-7.14.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d1bb3543b58fea74d2cd1abc4054cc927e4724687cb4560cd2ed88d2c7d820c0", size = 256700, upload-time = "2026-05-10T17:59:56.511Z" }, - { url = "https://files.pythonhosted.org/packages/19/5b/9e549c2f6e9dfea472adadba06c294e64735dabc2dd19015fac082095013/coverage-7.14.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a93bac2cb577ef60074999ed56d8a1535894398e2ed920d4185c3ec0c8864742", size = 250854, upload-time = "2026-05-10T17:59:57.94Z" }, - { url = "https://files.pythonhosted.org/packages/3d/1c/b94f9f5f36396021ee2f62c5834b12e6a3d31f0bed5d6fc6d1c3caec087c/coverage-7.14.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5904abf7e18cddc463219b17552229650c6b79e061d31a1059283051169cf7d5", size = 252433, upload-time = "2026-05-10T17:59:59.688Z" }, - { url = "https://files.pythonhosted.org/packages/b5/cb/d192cd8e1345eccabc32016f2d39072ecd10cb4f4b983ed8d0ebdeaf00dc/coverage-7.14.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:741f57cddc9004a8c81b084660215f33a6b597dbe62c31386b983ee26310e327", size = 250494, upload-time = "2026-05-10T18:00:01.953Z" }, - { url = "https://files.pythonhosted.org/packages/53/c5/aac9f460a41d835dbddef1d377f105f6ac2311d0f3c1588e9f51046d8813/coverage-7.14.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:664123feb0929d7affc135717dbd70d61d98688a08ab1e5ba464739620c6252d", size = 254261, upload-time = "2026-05-10T18:00:03.779Z" }, - { url = "https://files.pythonhosted.org/packages/23/aa/7af7c0081980a9cb3d289c5a435a4b7657dcecbd128e25c580e6a50389b5/coverage-7.14.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:c83d2399a51bbec8429266905d33616f04bc5726b1138c35844d5fcd896b2e20", size = 250216, upload-time = "2026-05-10T18:00:05.262Z" }, - { url = "https://files.pythonhosted.org/packages/35/60/a4257538ce2f6b978aeb51870d6c4208c510928a03db7e0339bb625dccb7/coverage-7.14.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb2e855b87321259a037429288ae85216d191c74de3e79bf57cd2bc0761992c", size = 251125, upload-time = "2026-05-10T18:00:06.858Z" }, - { url = "https://files.pythonhosted.org/packages/a1/ab/f91af47642ec1aa53490e835a95847168d9c77fc39aa58527604c051e145/coverage-7.14.0-cp311-cp311-win32.whl", hash = "sha256:731dc15b385ac52289743d476245b61e1a2927e803bef655b52bc3b2a75a21f3", size = 222300, upload-time = "2026-05-10T18:00:08.608Z" }, - { url = "https://files.pythonhosted.org/packages/f0/f0/a71ddbd874431e7a7cd96071f0c331cfbbad07704833c765d24ffbab8a67/coverage-7.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:bfb0ed8ec5d25e93face268115d7964db9df8b9aae8edcde9ec6b16c726a7cc1", size = 223241, upload-time = "2026-05-10T18:00:10.746Z" }, - { url = "https://files.pythonhosted.org/packages/d8/6e/d9d312a5151a96cd110efee32efc3fc97b01ebd86203fe618ccb29cf4c92/coverage-7.14.0-cp311-cp311-win_arm64.whl", hash = "sha256:7ebb1c6df9f78046a1b1e0a89674cd4bf73b7c648914eebcf976a57fd99a5627", size = 221908, upload-time = "2026-05-10T18:00:12.242Z" }, - { url = "https://files.pythonhosted.org/packages/09/1e/2f996b2c8415cbb6f54b0f5ec1ee850c96d7911961afb4fc05f4a89d8c58/coverage-7.14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7ffd19fc8aed057fd686a17a4935eef5f9859d69208f96310e893e64b9b6ccf5", size = 219967, upload-time = "2026-05-10T18:00:13.756Z" }, - { url = "https://files.pythonhosted.org/packages/34/23/35c7aea1274aef7525bdd2dc92f710bdde6d11652239d71d1ec450067939/coverage-7.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:829994cfe1aeb773ca27bf246d4badc1e764893e3bfb98fff820fcecd1ca4662", size = 220329, upload-time = "2026-05-10T18:00:15.264Z" }, - { url = "https://files.pythonhosted.org/packages/75/cf/a8f4b43a16e194b0261257ad28ded5853ec052570afef4a84e1d81189f3b/coverage-7.14.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b4f07cf7edcb7ec39431a5074d7ea83b29a9f71fcfc494f0f40af4e65180420f", size = 251839, upload-time = "2026-05-10T18:00:17.16Z" }, - { url = "https://files.pythonhosted.org/packages/69/ff/6699e7b71e60d3049eb2bdcbc95ee3f35707b2b0e48f32e9e63d3ce30c08/coverage-7.14.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ca3d9cf2c32b521bd9518385608787fa86f38daf993695307531822c3430ed67", size = 254576, upload-time = "2026-05-10T18:00:18.829Z" }, - { url = "https://files.pythonhosted.org/packages/22/ec/c936d495fcd67f48f03a9c4ad3297ff80d1f222a5df3980f15b34c186c21/coverage-7.14.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92af52828e7f29d827346b0294e5a0853fa206db77db0395b282918d41e28db9", size = 255690, upload-time = "2026-05-10T18:00:20.648Z" }, - { url = "https://files.pythonhosted.org/packages/5c/42/5af63f636cc62a4a2b1b3ba9146f6ee6f53a35a50d5cefc54d5670f60999/coverage-7.14.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7b2bb6c9d7e769360d0f20a0f219603fd64f0c8f97de17ab25853261602be0fb", size = 257949, upload-time = "2026-05-10T18:00:22.28Z" }, - { url = "https://files.pythonhosted.org/packages/26/d3/a225317bd2012132a27e1176d51660b826f99bb975876463c44ea0d7ee5a/coverage-7.14.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1c9ed6ef99f88fb8c14aa8e2bf8eb0fe55fa2edfea68f8675d78741df1a5ac0e", size = 252242, upload-time = "2026-05-10T18:00:24.076Z" }, - { url = "https://files.pythonhosted.org/packages/f1/7f/9e65495298c3ea414742998539c37d048b5e81cc818fb1828cc6b51d10bf/coverage-7.14.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8231ade007f37959fbf58acc677f26b922c02eda6f0428ea307da0fd39681bf3", size = 253608, upload-time = "2026-05-10T18:00:25.588Z" }, - { url = "https://files.pythonhosted.org/packages/94/46/1522b524a35bdad22b2b8c4f9d32d0a104b524726ec380b2db68db1746f5/coverage-7.14.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d8b013632cc1ce1d09dbe4f32667b4d320ec2f54fc326ebeffcd0b0bcc2bb6c4", size = 251753, upload-time = "2026-05-10T18:00:27.104Z" }, - { url = "https://files.pythonhosted.org/packages/f3/e9/cdf00d38817742c541ade405e115a3f7bf36e6f2a8b99d4f209861b85a2d/coverage-7.14.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1733198802d71ec4c524f322e2867ee05c62e9e75df86bdca545407a221827d1", size = 255823, upload-time = "2026-05-10T18:00:29.038Z" }, - { url = "https://files.pythonhosted.org/packages/38/fc/5e7877cf5f902d08a17ff1c532511476d87e1bea355bd5028cb97f902e79/coverage-7.14.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:72a305291fa8ee01332f1aaf38b348ca34097f6aa0b0ef627eef2837e57bbba5", size = 251323, upload-time = "2026-05-10T18:00:30.647Z" }, - { url = "https://files.pythonhosted.org/packages/18/9d/50f05a72dff8487464fdd4178dda5daed642a060e60afb644e3d45123559/coverage-7.14.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fcaba850dd317c65423a9d63d88f9573c53b00354d6dd95724576cc98a131595", size = 253197, upload-time = "2026-05-10T18:00:32.211Z" }, - { url = "https://files.pythonhosted.org/packages/00/3f/6f61ffe6439df266c3cf60f5c99cfaa21103d0210d706a42fc6c30683ff8/coverage-7.14.0-cp312-cp312-win32.whl", hash = "sha256:5ac83957a80d0701310e96d8bec68cdcf4f90a7674b7d13f15a344315b41ab27", size = 222515, upload-time = "2026-05-10T18:00:33.717Z" }, - { url = "https://files.pythonhosted.org/packages/85/19/93853133df2cb371083285ef6a93982a0173e7a233b0f61373ba9fd30eb2/coverage-7.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:70390b0da32cb90b501953716302906e8bcce087cb283e70d8c97729f22e92b2", size = 223324, upload-time = "2026-05-10T18:00:35.172Z" }, - { url = "https://files.pythonhosted.org/packages/74/18/9f7fe62f659f24b7a82a0be56bf94c1bd0a89e0ae7ab4c668f6e82404294/coverage-7.14.0-cp312-cp312-win_arm64.whl", hash = "sha256:91b993743d959b8be85b4abf9d5478216a69329c321efe5be0433c1a841d691d", size = 221944, upload-time = "2026-05-10T18:00:37.014Z" }, - { url = "https://files.pythonhosted.org/packages/6b/76/b7c66ee3c66e1b0f9d894c8125983aa0c03fb2336f2fd16559f9c966157f/coverage-7.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f2bbb8254370eb4c628ff3d6fa8a7f74ddc40565394d4f7ab791d1fe568e37ef", size = 219990, upload-time = "2026-05-10T18:00:38.887Z" }, - { url = "https://files.pythonhosted.org/packages/b3/af/e567cbad5ba69c013a50146dfa886dc7193361fda77521f51274ff620e1b/coverage-7.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:23b81107f46d3f21d0cbce30664fcec0f5d9f585638a67081750f99738f6bf66", size = 220365, upload-time = "2026-05-10T18:00:40.864Z" }, - { url = "https://files.pythonhosted.org/packages/44/6f/9ad575d505b4d805b254febc8a5b338a2efe278f8786e56ff1cb8413f9c3/coverage-7.14.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:22a7e06a5f11a757cdfe79018e9095f9f69ae283c5cd8123774c788deec8717b", size = 251363, upload-time = "2026-05-10T18:00:42.489Z" }, - { url = "https://files.pythonhosted.org/packages/6f/5f/b5370068b2f57787454592ed7dcd1002f0f1703b7db1fa30f6a325a4ca6e/coverage-7.14.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9d1aa57a1dc8e05bdc42e81c5d671d849577aeedf279f4c449d6d286f9ed88ca", size = 253961, upload-time = "2026-05-10T18:00:44.079Z" }, - { url = "https://files.pythonhosted.org/packages/29/1e/51adf17738976e8f2b85ddef7b7aa12a0838b056c92f175941d8862767c1/coverage-7.14.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:90c1a51bcfddf645b3bb7ec333d9e94393a8e94f55642380fa8a9a5a9e636cb7", size = 255193, upload-time = "2026-05-10T18:00:45.623Z" }, - { url = "https://files.pythonhosted.org/packages/9e/7b/5bfd7ac1df3b881c2ac7a5cbc99c7609e6296c402f5ef587cd81c6f355b3/coverage-7.14.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a841fae2fadcae4f438d43b6ccc4aac2ad609f47cdb6cfdce60cbb3fe5ca7bc2", size = 257326, upload-time = "2026-05-10T18:00:47.173Z" }, - { url = "https://files.pythonhosted.org/packages/7d/38/1d37d316b174fad3843a1d76dbdfe4398771c9ecd0515935dd9ece9cd627/coverage-7.14.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c79d2319cabef1fe8e86df73371126931550804738f78ad7d31e3aad85a67367", size = 251582, upload-time = "2026-05-10T18:00:49.152Z" }, - { url = "https://files.pythonhosted.org/packages/34/46/746704f95980ba220214e1a41e18cec5aea80a898eaa53c51bf2d645ff36/coverage-7.14.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1b23b0c6f0b1db6ad769b7050c8b641c0bf215ded26c1816955b17b7f26edfa9", size = 253325, upload-time = "2026-05-10T18:00:51.252Z" }, - { url = "https://files.pythonhosted.org/packages/e1/b9/bbe87206d9687b192352f893797825b5f5b15ecd3aa9c68fbff0c074d77b/coverage-7.14.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:55d3089079ce181a4566b1065ab28d2575eb76d8ac8f81f4fcda2bf037fee087", size = 251291, upload-time = "2026-05-10T18:00:52.816Z" }, - { url = "https://files.pythonhosted.org/packages/46/57/b8cdb12ac0d73ef0243218bd5e22c9df8f92edab8018213a86aec67c5324/coverage-7.14.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:49c005cba1e2f9677fb2845dcdf9a2e72a52a17d63e8231aaaae35d9f50215ef", size = 255448, upload-time = "2026-05-10T18:00:54.548Z" }, - { url = "https://files.pythonhosted.org/packages/1f/d4/5002019538b2036ce3c84340f54d2fd5100d55b0a6b0894eee56128d03c7/coverage-7.14.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:9117377b823daa28aa8635fbb08cda1cd6be3d7143257345459559aeef852d52", size = 251110, upload-time = "2026-05-10T18:00:56.122Z" }, - { url = "https://files.pythonhosted.org/packages/37/53/20c5009477660f084e6ed60bc02a91894b8e234e617e86ecfd9aaf78e27b/coverage-7.14.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7b79d646cf46d5cf9a9f40281d4441df5849e445726e369006d2b117710b33fe", size = 252885, upload-time = "2026-05-10T18:00:57.967Z" }, - { url = "https://files.pythonhosted.org/packages/ae/ab/3cf6427ac9c1f1db747dbb1ce71dde47984876d4c2cfd018a3fef0a78d4d/coverage-7.14.0-cp313-cp313-win32.whl", hash = "sha256:fb609b3658479e33f9516d46f1a89dbb9b6c261366e3a11844a96ec487533dae", size = 222539, upload-time = "2026-05-10T18:00:59.581Z" }, - { url = "https://files.pythonhosted.org/packages/8f/b8/9228523e80321c2cb4880d1f589bc0171f2f71432c35118ad04dc01decce/coverage-7.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:0773d8329cf32b6fd222e4b52622c61fe8d503eb966cfc8d3c3c10c96266d50e", size = 223344, upload-time = "2026-05-10T18:01:01.531Z" }, - { url = "https://files.pythonhosted.org/packages/a3/99/118daa192f95e3a6cb2740100fbf8797cda1734b4134ef0b5d501a7fa8f3/coverage-7.14.0-cp313-cp313-win_arm64.whl", hash = "sha256:b4e26a0f1b696faf283bffe5b8569e44e336c582439df5d53281ab89ee0cba96", size = 221966, upload-time = "2026-05-10T18:01:03.16Z" }, - { url = "https://files.pythonhosted.org/packages/e6/f1/a46cc0c013be170216253184a32366d7cbdb9252feaec866b05c2d12a894/coverage-7.14.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:953f521ca9445300397e65fda3dca58b2dbd68fee983777420b57ac3c77e9f90", size = 220679, upload-time = "2026-05-10T18:01:05.058Z" }, - { url = "https://files.pythonhosted.org/packages/64/8c/9c30a3d311a34177fa432995be7fbfc64477d8bac5630bd38055b1c9b424/coverage-7.14.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:98af83fd65ae24b1fdd03aaead967a9f523bcd2f1aab2d4f3ffda65bb568a6f1", size = 221033, upload-time = "2026-05-10T18:01:07.002Z" }, - { url = "https://files.pythonhosted.org/packages/9a/cd/3fb5e06c3badefd0c1b47e2044fdca67f8220a4ec2e7fcfb476aa0a67c6c/coverage-7.14.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:668b92e6958c4db7cf92e81caac328dfbbdbb215db2850ad28f0cbe1eea0bfbd", size = 262333, upload-time = "2026-05-10T18:01:08.903Z" }, - { url = "https://files.pythonhosted.org/packages/a8/e6/fbc322325c7294d3e22c1ad6b79e45d0806b25228c8e5842aed6d8169aa7/coverage-7.14.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9fbd898551762dea00d3fef2b1c4f99afd2c6a3ff952ea07d60a9bd5ed4f34bc", size = 264410, upload-time = "2026-05-10T18:01:10.531Z" }, - { url = "https://files.pythonhosted.org/packages/08/92/c497b264bec1673c47cc77e26f760fcda4654cabf1f39546d1a23a3b8c35/coverage-7.14.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:68af363c07ecd8d4b7d4043d85cb376d7d227eceb54e5323ee45da73dbd3e426", size = 266836, upload-time = "2026-05-10T18:01:12.19Z" }, - { url = "https://files.pythonhosted.org/packages/78/fc/045da320987f401af5d2815d351e8aa799aec859f60e29f445e3089eeedb/coverage-7.14.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6e57054a583da8ac55edf24117ea4c9133032cfc4cf72aa2d48c1e5d4b52f899", size = 267974, upload-time = "2026-05-10T18:01:13.926Z" }, - { url = "https://files.pythonhosted.org/packages/1b/ae/227b1e379497fb7a4fc3286e620f80c8a1e7cec66d45695a01639eb1af65/coverage-7.14.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cc3499459bbcdd51a65b64c35ab7ed2764eaf3cba826e0df3f1d7fe2e102b70b", size = 261578, upload-time = "2026-05-10T18:01:15.564Z" }, - { url = "https://files.pythonhosted.org/packages/a0/f5/3570342900f2acea31d33ff1590c5d8bac1a8e1a2e1c6d34a5d5e61de681/coverage-7.14.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:45899ec2138a4346ed34d601dedf5076fb74edf2d1dd9dc76a78e82397edee90", size = 264394, upload-time = "2026-05-10T18:01:17.607Z" }, - { url = "https://files.pythonhosted.org/packages/16/29/de1bbc01c935b28f89b1dc3db85b011c055e843a8e5e3b83141c3f80af7f/coverage-7.14.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8767486808c436f05b23ab98eb963fb29185e32a9357a166971685cb3459900f", size = 262022, upload-time = "2026-05-10T18:01:19.304Z" }, - { url = "https://files.pythonhosted.org/packages/35/95/f53890b0bf2fc10ab168e05d38869215e73ca24c4cb521c3bb0eb62fe16b/coverage-7.14.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a3b5ddfd6aa7ddad53ee3edb231e88a2151507a43229b7d71b953916deca127d", size = 265732, upload-time = "2026-05-10T18:01:21.494Z" }, - { url = "https://files.pythonhosted.org/packages/ed/ea/c919e259081dd2bdf0e43b87209709ba7ec2e4117c2a7f5185379c43463c/coverage-7.14.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:63df0fe568e698e1045792399f8ab6da3a6c2dce3182813fb92afa2641087b47", size = 260921, upload-time = "2026-05-10T18:01:23.533Z" }, - { url = "https://files.pythonhosted.org/packages/1a/2c/c2831889705a81dc5d1c6ca12e4d8e9b95dfc146d153488a6c0ea685d28e/coverage-7.14.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:827d6397dbd95144939b18f89edf31f63e1f99633e8d5f32f22ba8bdda567477", size = 263109, upload-time = "2026-05-10T18:01:25.165Z" }, - { url = "https://files.pythonhosted.org/packages/5a/a9/2fcae5003cac3d63fe344d2166243c2756935f48420863c5272b240d550b/coverage-7.14.0-cp313-cp313t-win32.whl", hash = "sha256:7bf43e000d24012599b879791cff41589af90674722421ef11b11a5431920bab", size = 223212, upload-time = "2026-05-10T18:01:27.157Z" }, - { url = "https://files.pythonhosted.org/packages/3f/bb/18e94d7b14b9b398164197114a587a04ab7c9fdbe1d237eef57311c5e883/coverage-7.14.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3f5549365af25d770e06b1f8f5682d9a5637d06eb494db91c6fa75d3950cc917", size = 224272, upload-time = "2026-05-10T18:01:29.107Z" }, - { url = "https://files.pythonhosted.org/packages/db/56/4f14fad782b035c81c4ffd09159e7103d42bb1d93ac8496d04b90a11b7da/coverage-7.14.0-cp313-cp313t-win_arm64.whl", hash = "sha256:6d160217ec6fe890f16ad3a9531761589443749e448f91986c972714fad361c8", size = 222530, upload-time = "2026-05-10T18:01:31.151Z" }, - { url = "https://files.pythonhosted.org/packages/1c/18/b9a6586d73992807c26f9a5f274131be3d76b56b18a82b9392e2a25d2e45/coverage-7.14.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9aed9fa983514ca032790f3fe0d1c0e42ca7e16b42432af1706b50a9a46bef5d", size = 220036, upload-time = "2026-05-10T18:01:33.057Z" }, - { url = "https://files.pythonhosted.org/packages/f3/9b/4165a1d56ddc302a0e2d518fd9d412a4fd0b57562618c78c5f21c57194f5/coverage-7.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ba3b8390db29296dbbf49e91b6fe08f990743a90c8f447ba4c2ffc29670dfa63", size = 220368, upload-time = "2026-05-10T18:01:34.705Z" }, - { url = "https://files.pythonhosted.org/packages/69/aa/c12e52a5ba148d9995229d557e3be6e554fe469addc0e9241b2f0956d8ea/coverage-7.14.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3a5d8e876dfa2f102e970b183863d6dedd023d3c0eeca1fe7a9787bc5f28b212", size = 251417, upload-time = "2026-05-10T18:01:36.949Z" }, - { url = "https://files.pythonhosted.org/packages/d7/51/ec641c26e6dca1b25a7d2035ba6ecb7c884ef1a100a9e42fbe4ce4405139/coverage-7.14.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5ebb8f4614a3787d567e610bbfdf96a4798dd69a1afb1bd8ad228d4111fe6ff3", size = 253924, upload-time = "2026-05-10T18:01:38.985Z" }, - { url = "https://files.pythonhosted.org/packages/33/c4/59c3de0bd1b538824173fd518fed51c1ce740ca5ed68e74545983f4053a9/coverage-7.14.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b9bf47223dd8db3d4c4b2e443b02bace480d428f0822c3f991600448a176c97", size = 255269, upload-time = "2026-05-10T18:01:40.957Z" }, - { url = "https://files.pythonhosted.org/packages/7b/a9/36dfa153a62040296f6e7febfdb20a5720622f6ef5a81a41e8237b9a5344/coverage-7.14.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3485a836550b303d006d57cc06e3d5afaabc642c77050b7c985a97b13e3776b8", size = 257583, upload-time = "2026-05-10T18:01:42.607Z" }, - { url = "https://files.pythonhosted.org/packages/26/7b/cc2c048d4114d9ab1c2409e9ee365e5ae10736df6dffcfc9444effa6c708/coverage-7.14.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3e7e88110bae996d199d1693ca8ec3fd52441d426401ae963437598667b4c5eb", size = 251434, upload-time = "2026-05-10T18:01:44.537Z" }, - { url = "https://files.pythonhosted.org/packages/ee/df/6770eaa576e604575e9a78055313250faef5faa84bd6f71a39fece519c43/coverage-7.14.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:15228a6800ce7bdf1b74800595e56db7138cecb338fdbf044806e10dcf182dfe", size = 253280, upload-time = "2026-05-10T18:01:46.175Z" }, - { url = "https://files.pythonhosted.org/packages/ad/9e/1c0264514a3f98259a6d64765a397b2c8373e3ba59ee722a4802d3ec0c61/coverage-7.14.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9d26ac7f5398bafc5b57421ad994e8a4749e8a7a0e62d05ec7d53014d5963bfa", size = 251241, upload-time = "2026-05-10T18:01:48.732Z" }, - { url = "https://files.pythonhosted.org/packages/64/16/4efdf3e3c4079cdbf0ece56a2fea872df9e8a3e15a13a0af4400e1075944/coverage-7.14.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2fb73254ff43c911c967a899e1359bc5049b4b115d6e8fbdde4937d0a2246cd5", size = 255516, upload-time = "2026-05-10T18:01:50.819Z" }, - { url = "https://files.pythonhosted.org/packages/93/69/b1de96346603881b3d1bc8d6447c83200e1c9700ffbaff926ba01ff5724c/coverage-7.14.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:454a380af72c6adada298ed270d38c7a391288198dbfb8467f786f588751a90c", size = 251059, upload-time = "2026-05-10T18:01:52.773Z" }, - { url = "https://files.pythonhosted.org/packages/a4/66/2881853e0363a5e0a724d1103e53650795367471b6afb234f8b49e713bc6/coverage-7.14.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:65c86fb646d2bd2972e96bd1a8b45817ed907cee68655d6295fe7ec031d04cca", size = 252716, upload-time = "2026-05-10T18:01:54.506Z" }, - { url = "https://files.pythonhosted.org/packages/55/5c/0d3305d002c41dcde873dbe456491e663dc55152ca526b630b5c47efd62f/coverage-7.14.0-cp314-cp314-win32.whl", hash = "sha256:6a6516b02a6101398e19a3f44820f69bab2590697f7def4331f668b14adaf828", size = 222788, upload-time = "2026-05-10T18:01:56.487Z" }, - { url = "https://files.pythonhosted.org/packages/f9/58/6e1b8f52fdc3184b47dc5037f5070d83a3d11042db1594b02d2a44d786c8/coverage-7.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:45e0f79d8351fa76e256716df91eab12890d32678b9590df7ae1042e4bd4cf5d", size = 223600, upload-time = "2026-05-10T18:01:58.497Z" }, - { url = "https://files.pythonhosted.org/packages/00/70/a18c408e674bc26281cadaedc7351f929bd2094e191e4b15271c30b084cc/coverage-7.14.0-cp314-cp314-win_arm64.whl", hash = "sha256:4b899594a8b2d81e5cc064a0d7f9cac2081fed91049456cae7676787e41549c9", size = 222168, upload-time = "2026-05-10T18:02:00.411Z" }, - { url = "https://files.pythonhosted.org/packages/3d/89/2681f071d238b62aff8dfc2ab44fc24cfdb38d1c01f391a80522ff5d3a16/coverage-7.14.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f580f8c80acd94ac72e863efe2cab791d8c38d153e0b463b92dfa000d5c84cd1", size = 220766, upload-time = "2026-05-10T18:02:02.313Z" }, - { url = "https://files.pythonhosted.org/packages/bd/c7/c987babafd9207ffa1995e1ef1f9b26762cf4963aa768a66b6f0501e4616/coverage-7.14.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a2bd259c442cd43c49b30fbafc51776eb19ea396faf159d26a83e6a0a5f13b0c", size = 221035, upload-time = "2026-05-10T18:02:04.017Z" }, - { url = "https://files.pythonhosted.org/packages/5a/e9/d6a5ac3b333088143d6fc877d398a9a674dc03124a2f776e131f03864823/coverage-7.14.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a706b908dfa85538863504c624b237a3cc34232bf403c057414ebfdb3b4d9f84", size = 262405, upload-time = "2026-05-10T18:02:05.915Z" }, - { url = "https://files.pythonhosted.org/packages/38/b1/e70838d29a7c08e22d44398a46db90815bbcbf28de06992bd9210d1a8d8e/coverage-7.14.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7333cd944ee4393b9b3d3c1b598c936d4fc8d70573a4c7dacfec5590dd50e436", size = 264530, upload-time = "2026-05-10T18:02:07.582Z" }, - { url = "https://files.pythonhosted.org/packages/6b/73/5c31ef97763288d03d9995152b96d5475b527c63d91c84b01caea894b83a/coverage-7.14.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f162bc9a15b82d947b02651b0c7e1609d6f7a8735ca330cfadec8481dd97d5a", size = 266932, upload-time = "2026-05-10T18:02:09.401Z" }, - { url = "https://files.pythonhosted.org/packages/e1/76/dd56d80f29c5f05b4d76f7e7c6d47cafacae017189c75c5759d24f9ff0cc/coverage-7.14.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:362cb78e01a5dc82009d88004cf60f2e6b6d6fcbfdec05b05af73b0abf40118f", size = 268062, upload-time = "2026-05-10T18:02:11.399Z" }, - { url = "https://files.pythonhosted.org/packages/6e/c7/27ba85cd5b95614f159ff93ebff1901584a8d192e2e5e24c4943a7453f59/coverage-7.14.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:acebd068fca5512c3a6fde9c045f901613478781a73f0e82b307b214daef23fb", size = 261504, upload-time = "2026-05-10T18:02:13.257Z" }, - { url = "https://files.pythonhosted.org/packages/13/2e/e8149f60ab5d5684c6eee881bdf34b127115cddbb958b196768dd9d63473/coverage-7.14.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:29fe3da551dface75deb2ccbf87b6b66e2e7ef38f6d89050b428be94afff3490", size = 264398, upload-time = "2026-05-10T18:02:15.063Z" }, - { url = "https://files.pythonhosted.org/packages/d9/7f/1261b025285323225f4b4abffa5a643649dfd67e25ddca7ebcbdea3b7cb3/coverage-7.14.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b4cc4fce8672fffcb09b0eafc167b396b3ba53c4a7230f54b7aaffbf6c835fa9", size = 262000, upload-time = "2026-05-10T18:02:16.756Z" }, - { url = "https://files.pythonhosted.org/packages/d3/dc/829c54f60b9d08389439c00f813c752781c496fc5788c78d8006db4b4f2b/coverage-7.14.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5d4a51aad8ba8bdcd2b8bd8f03d4aca19693fa2327a3470e4718a25b03481020", size = 265732, upload-time = "2026-05-10T18:02:18.817Z" }, - { url = "https://files.pythonhosted.org/packages/ed/b0/70bd1419941652fa062689cba9c3eeafb8f5e6fbb890bce41c3bdda5dbd6/coverage-7.14.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:9f323af3e1e4f68b60b7b247e37b8515563a61375518fa59de1af48ba28a3db6", size = 260847, upload-time = "2026-05-10T18:02:20.528Z" }, - { url = "https://files.pythonhosted.org/packages/f2/73/be40b2390656c654d35ea0015ea7ba3d945769cf80790ad5e0bb2d56d2ba/coverage-7.14.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:1a0abc7342ea9711c469dd8b821c6c311e6bc6aac1442e5fbd6b27fae0a8f3db", size = 263166, upload-time = "2026-05-10T18:02:22.337Z" }, - { url = "https://files.pythonhosted.org/packages/29/55/4a643f712fcf7cf2881f8ec1e0ccb7b164aff3108f69b51801246c8799f2/coverage-7.14.0-cp314-cp314t-win32.whl", hash = "sha256:a9f864ef57b7172e2db87a096642dd51e179e085ab6b2c371c29e885f65c8fb2", size = 223573, upload-time = "2026-05-10T18:02:24.11Z" }, - { url = "https://files.pythonhosted.org/packages/27/96/3acae5da0953be042c0b4dea6d6789d2f080701c77b88e44d5bd41b9219b/coverage-7.14.0-cp314-cp314t-win_amd64.whl", hash = "sha256:29943e552fdc08e082eb51400fb2f58e118a83b5542bd06531214e084399b644", size = 224680, upload-time = "2026-05-10T18:02:25.896Z" }, - { url = "https://files.pythonhosted.org/packages/93/3d/6ab5d2dd8325d838737c6f8d83d62eb6230e0d70b87b51b57bbfd08fa767/coverage-7.14.0-cp314-cp314t-win_arm64.whl", hash = "sha256:742a73ea621953b012f2c4c2219b512180dd84489acf5b1596b0aafc55b9100b", size = 222703, upload-time = "2026-05-10T18:02:27.822Z" }, - { url = "https://files.pythonhosted.org/packages/61/e8/cb8e80d6f9f55b99588625062822bf946cf03ed06315df4bd8397f5632a1/coverage-7.14.0-py3-none-any.whl", hash = "sha256:8de5b61163aee3d05c8a2beab6f47913df7981dad1baf82c414d99158c286ab1", size = 211764, upload-time = "2026-05-10T18:02:29.538Z" }, +version = "7.14.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/fd/0ab2772530e946e1be1abd0bc09e647ec9b02e88f0867857601fefca8953/coverage-7.14.1.tar.gz", hash = "sha256:30c08f7d90415aa98b3c990385dea2939b0da55f38515e5b369b83655f8523be", size = 920132, upload-time = "2026-05-26T20:41:36.783Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/69/0d2ef01ff4b8fcecd4cba920d11e92fa4f96ae412441d3b56a90a258e69b/coverage-7.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3e3680291c4a1d0dadfa84a2c459576a4af5133abb617905714339a0c73138cf", size = 219722, upload-time = "2026-05-26T20:38:14.002Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ae/9afdeaa31b9d9ce98124b6abf8bb49119bf71aecae04f8567c189d91299f/coverage-7.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a5274669f37f2343635a347b91a60777621341ab3378e9c6ac9335eee704bddf", size = 220240, upload-time = "2026-05-26T20:38:17.424Z" }, + { url = "https://files.pythonhosted.org/packages/51/69/c998589871df7ea7dba865cc5ee32b5a3e1d47ba6c68ef91104c7c46fa5e/coverage-7.14.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cfe5a5fec635799ef33428f1e5e61bafa45a92a96190ba731561ba558ccc214d", size = 246981, upload-time = "2026-05-26T20:38:19.266Z" }, + { url = "https://files.pythonhosted.org/packages/fc/10/1c7d04c13040dac531d21b712bbe08f902e6dd9b58f5d77875c4d030f8f2/coverage-7.14.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:62a9f70b52e0b5a95cfef4a5c5641b06983cadc5e538a3feeb5c00211f523ac2", size = 248812, upload-time = "2026-05-26T20:38:20.75Z" }, + { url = "https://files.pythonhosted.org/packages/c1/65/2a38a4607ef27cadcfbcee034dba5830ae2569f90144a0f4c7dbf47d30b0/coverage-7.14.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c18ebc343e15be53049b3a2dce38fe82d58f37e20ab9094b3a39c0aa4f6bb47", size = 250675, upload-time = "2026-05-26T20:38:22.159Z" }, + { url = "https://files.pythonhosted.org/packages/c9/a2/a446ed9752a4a59b79e0fb6cbb319f6facb2183045c0725462625e66f87e/coverage-7.14.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b84ffdf877644e7096aa936991efeed873f7f3df57b9cd001312b7668ab08550", size = 252590, upload-time = "2026-05-26T20:38:23.63Z" }, + { url = "https://files.pythonhosted.org/packages/9e/fd/e81fbd7ba752365546e9842b1cbdaad3d6919d2a522c590aef16a281ec5e/coverage-7.14.1-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e854312c4103f2ad4c0dc023b69b77ebfd2c89db5f86c4c94dc2353f9a92167e", size = 247691, upload-time = "2026-05-26T20:38:25.057Z" }, + { url = "https://files.pythonhosted.org/packages/53/35/f3c26fdaae9ea937d154ca4d372e5ea0a4167ff70d36c6074ac2eacb2f83/coverage-7.14.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c643734307300234fafa36bf2a040a7235f8f177ea1fd6ec1423aea6fb7b929f", size = 248716, upload-time = "2026-05-26T20:38:26.406Z" }, + { url = "https://files.pythonhosted.org/packages/2e/14/940b6c49551fd343e8507ee2b0ba7af5d0aa04ed5bf768285cb7c72a9884/coverage-7.14.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:84ac9499e48700399a5dd0ea7085b5091961fec52c68d66b4ec0d3cf7f4441b1", size = 246721, upload-time = "2026-05-26T20:38:28.282Z" }, + { url = "https://files.pythonhosted.org/packages/aa/2c/40fc0634186c28292a662dff578866b3913983d6c375a3c2a74020938719/coverage-7.14.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:7f02d09f70776579b926d889a4c9c235070a1f47c40458aeaca563fae5acfdb5", size = 250533, upload-time = "2026-05-26T20:38:29.753Z" }, + { url = "https://files.pythonhosted.org/packages/de/e3/2c26bf1e811f9df991ff2a9bdddebdd13ee0665d564df7d05979f9146297/coverage-7.14.1-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:ce66d8e46da2bb5ee313a745cbd2e391d319176c1f7a9451bfcd3a2fb920859b", size = 246990, upload-time = "2026-05-26T20:38:31.516Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b0/060260ef56bd92363ebdce0c7095ce422b06e69aae71828efeca473ab1ca/coverage-7.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c912c259304cfb5ee584481cfb7ce1ff932b4d61e6c9140b8f19cb7b5ed82332", size = 247593, upload-time = "2026-05-26T20:38:33.065Z" }, + { url = "https://files.pythonhosted.org/packages/63/f3/501502046efeb0d6d94b5ca54941d95f1184183dd6bdb7f283985783bb4a/coverage-7.14.1-cp310-cp310-win32.whl", hash = "sha256:1238cb94638e610e972c60dac68e813f868dc7d6e982535270558443058d9d59", size = 222330, upload-time = "2026-05-26T20:38:35.36Z" }, + { url = "https://files.pythonhosted.org/packages/a0/5d/1bf99f2c558f128faf7906817ccbdb576ba815d3b41ce2ac1719b70a3663/coverage-7.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:fc459e5d73be2d6332fcfe8dbf3d8994671fe33c700f4565988ecfa511547253", size = 223261, upload-time = "2026-05-26T20:38:37.196Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d7/477ad149490e6cb849f28abea1dabb9c823cea72e7500c81b4240ce619c0/coverage-7.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:478b5bcd63c2e1357c5c7e16c070690df7b07f676b1c114d7b93e533c664309f", size = 219848, upload-time = "2026-05-26T20:38:38.715Z" }, + { url = "https://files.pythonhosted.org/packages/91/82/a5eb47257c50601bb7b9a9d2857c67b7a3a85ad74180eb2c98bb1fbe0ce5/coverage-7.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a24a81f9715ee42ef59a316cc11611c98fe23920f7c81861315c9f3ff4a230f4", size = 220354, upload-time = "2026-05-26T20:38:40.232Z" }, + { url = "https://files.pythonhosted.org/packages/43/8b/78419b5391a5cb706b6544390507e469d83ffc9a8248b02c4011aceb9365/coverage-7.14.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:196a13319ad88d6d8ef5ab489ec4f44ddde2143c0c7d5b27786f6c3ffd56a7e1", size = 250771, upload-time = "2026-05-26T20:38:41.782Z" }, + { url = "https://files.pythonhosted.org/packages/77/63/e77aaacd491182210d639636b7a8bba23ffffa9b82aa3762da9431855fa9/coverage-7.14.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3d452fd08b5c72c5167c93e6867b5c08500bd40f2a21e1e854a500550b6cc36f", size = 252683, upload-time = "2026-05-26T20:38:43.305Z" }, + { url = "https://files.pythonhosted.org/packages/65/1c/a022e3cfbec2ac241640003cb3a817e161d9c7f5aa9b49173756cdc03204/coverage-7.14.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23bf7fa51ac02e07fc7c96849b82946da47ae862dc8f86d183b2a4864fc38129", size = 254791, upload-time = "2026-05-26T20:38:45.361Z" }, + { url = "https://files.pythonhosted.org/packages/61/d6/967e408aca4c1ceb88cb0cc677169110ae7f5995fb5eaf5fb1f5a1bb8f5d/coverage-7.14.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bcaa50684dcaadfa599ac48f81103c756d791cfd85c97203d2217c593d48b860", size = 256748, upload-time = "2026-05-26T20:38:46.91Z" }, + { url = "https://files.pythonhosted.org/packages/b8/be/869188f7fe28638078ec479331ace6dc5f7b40b7153eb616f47ab79404d8/coverage-7.14.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4ea1c034f95c9b056e856b794630b17f9fa3d57e4800ff1e503d3be0f9c9078c", size = 250907, upload-time = "2026-05-26T20:38:48.493Z" }, + { url = "https://files.pythonhosted.org/packages/07/aa/adb7d3b4278d690e68703abcd76ab1b948242e3668d921711551b78f9ddb/coverage-7.14.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c7e057326434e441306226fbeb5d1aaf14a2637efe97ba668306635835f32ad7", size = 252483, upload-time = "2026-05-26T20:38:50.074Z" }, + { url = "https://files.pythonhosted.org/packages/43/61/331c74103c62dcb0c4b9b3a0de9a61aca016208b0a90f109592a9f9ecc28/coverage-7.14.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:59baf88468dbc8d63b1887afd92bda52e40bb1561696e5819670601403810cec", size = 250545, upload-time = "2026-05-26T20:38:51.613Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b6/c5dae3c104d89be04828f61810e6b3473825482e4c288cc4ed04553e08ae/coverage-7.14.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d34d75f892b3ab73ba11cab5442cce7b3e168fd64162b16f0e1e0d09c508edef", size = 254310, upload-time = "2026-05-26T20:38:53.503Z" }, + { url = "https://files.pythonhosted.org/packages/ad/a1/2b9d5863e3b83c01ad8199e3c597802fbb3a9dc90b058885804c20296d31/coverage-7.14.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:3a56abc20a472baf0304c455721bc601477440d28ecfde8a03dde79ede07e0df", size = 250266, upload-time = "2026-05-26T20:38:55.414Z" }, + { url = "https://files.pythonhosted.org/packages/7f/5e/0e511fbdb269359be26fe678a1c3fa1f2aa2a01573cc3f54268c8d6d4797/coverage-7.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6a3cb83d1552c0cd1b4906655b6a33fd4a8473229633a901c6b73bf86914dee9", size = 251174, upload-time = "2026-05-26T20:38:57.141Z" }, + { url = "https://files.pythonhosted.org/packages/85/10/e55307b622b3dd9671cb321824502dc10f93e72f2802b9946159a8edadeb/coverage-7.14.1-cp311-cp311-win32.whl", hash = "sha256:10274a1fbeb8ec5d72966e17bb198a3104257aca4ac09d98667c5f8aca8c8548", size = 222354, upload-time = "2026-05-26T20:38:58.727Z" }, + { url = "https://files.pythonhosted.org/packages/71/cf/107421693cfb71e4f1ca5bf70443f64d4161878068d07a3e51c7ad21d17b/coverage-7.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:87ebdf787d4888e3f3f2d523eadc6e18c6d18c6d0eb173801a189641627fb37e", size = 223290, upload-time = "2026-05-26T20:39:00.413Z" }, + { url = "https://files.pythonhosted.org/packages/b8/1d/3e3644585eb29e9dafefb19555078529a4d7cce12bd21929664eea989277/coverage-7.14.1-cp311-cp311-win_arm64.whl", hash = "sha256:dd34767fa19848d35659ffc0a75314f58c7af3f1cd87ec521e8292a1238398a3", size = 221953, upload-time = "2026-05-26T20:39:02.159Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b7/bdbb725ba02c5b42825b200c940f38b7a54fcad24627b7192f78f8110d76/coverage-7.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a06c76364a9360e33d6d23769aefdf7f66f38e2ffb60ceb1baaa4989d83b695c", size = 220022, upload-time = "2026-05-26T20:39:03.702Z" }, + { url = "https://files.pythonhosted.org/packages/72/81/fdc0898a55c6219223291ec1a1fe89966ef212ce82276aa0899df84b5de0/coverage-7.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fad54e871165f6ec2f536063ac74c3104508a12963e64072ba44bd822de52b0c", size = 220379, upload-time = "2026-05-26T20:39:05.381Z" }, + { url = "https://files.pythonhosted.org/packages/de/72/de048c4a25e13bce59ac6a339351c10bdf2515e07459afcdaf04dc3143a2/coverage-7.14.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:84b535f00655ecafe1d929d1fb00ed5d6fa3051ea643ab2c161a3887b86f294b", size = 251888, upload-time = "2026-05-26T20:39:07.367Z" }, + { url = "https://files.pythonhosted.org/packages/28/30/300c343f68beb9d4cbb64ec81e58c5b6b80b56927f72d2b38654ac26e013/coverage-7.14.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6b6b0853b895fe0e98cbfc580d1ec3393d9302b4b1e96a77b3f5c91fdab899e6", size = 254624, upload-time = "2026-05-26T20:39:09.037Z" }, + { url = "https://files.pythonhosted.org/packages/b1/ed/7b25642496e8170b6bac14adce00537c6e5fa2d586159401a4de3e8b49e6/coverage-7.14.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:442cc9c952b2df400cda54bb04ab87330cf2cd08a8692cbbea36773531eb6f37", size = 255739, upload-time = "2026-05-26T20:39:10.889Z" }, + { url = "https://files.pythonhosted.org/packages/7f/a2/abd210b8c4e29c24e4624916db97bb519097a91034aaeb767f937e7da794/coverage-7.14.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8270544c361ed405a27a060dbc9ed2c124b084d96dfdc2d9a2510482aef981ad", size = 257998, upload-time = "2026-05-26T20:39:12.722Z" }, + { url = "https://files.pythonhosted.org/packages/7f/24/7c50beed3792fe62f6ce0545c6686ce83379719e2c0276179333d97eae92/coverage-7.14.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:48b283b1dd6372e8de2a7a9a4c4d5dc06f4d4fd209b876f3c88a7a205a0c8f84", size = 252296, upload-time = "2026-05-26T20:39:14.259Z" }, + { url = "https://files.pythonhosted.org/packages/15/05/0f874628ebcbfc77ead559ff210281ef06a97db08481832e7dd39274a135/coverage-7.14.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5b0c99ba93a07d56f6df340bb79be53202a082b2fdb81bfe6190b741a3470d54", size = 253658, upload-time = "2026-05-26T20:39:15.923Z" }, + { url = "https://files.pythonhosted.org/packages/99/6f/ca6ad067364b337ef997802115e7ecad2abd2248b05471464b0dea02b4d4/coverage-7.14.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e471bc5769ff073b058cfadb0d736b56ce067c8560eabeb0da88462df98c23e7", size = 251803, upload-time = "2026-05-26T20:39:17.537Z" }, + { url = "https://files.pythonhosted.org/packages/c0/30/b9b4d377cd9f40baf228068f5a81faf8450c6228503011bd499708483a50/coverage-7.14.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f497a1ea81d4cd7c10ddcaa685135b9aabd291af3d55775a9ddf3cb7a364cdd9", size = 255873, upload-time = "2026-05-26T20:39:19.414Z" }, + { url = "https://files.pythonhosted.org/packages/3c/21/7c721a9e5e6bb88547d30a787aefb97512d3f54c1324c7488d9b3743f7f9/coverage-7.14.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2222be86d0b54f5dd5a38f45f17f315f737245e857bf0bdedc70734f84a13c02", size = 251372, upload-time = "2026-05-26T20:39:21.169Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f8ae5a2200130e1503cd7661a6cd3b2b7bacef98277fbf3571fb13f8b766/coverage-7.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:85e85586565842f6932abebd4c18bcb1074223dc0b3576e7d173ca710622813a", size = 253245, upload-time = "2026-05-26T20:39:23.097Z" }, + { url = "https://files.pythonhosted.org/packages/34/62/70a9024672a5f6910517d9628c52c9afbdd3cf8f46426af52bb148a56fff/coverage-7.14.1-cp312-cp312-win32.whl", hash = "sha256:4a28fd227808366b196a75476dced2eb35b351d6766ba9c858dc93319e87f4f1", size = 222567, upload-time = "2026-05-26T20:39:24.868Z" }, + { url = "https://files.pythonhosted.org/packages/f6/81/8b7cd386839b039ebe1855733b9f9449a8dec5d79564018234f185a7fa70/coverage-7.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:54acdb6674a4661768d7bf7db32dfb9f46ab1d764f8aba6df75ce1a6a088724e", size = 223372, upload-time = "2026-05-26T20:39:26.603Z" }, + { url = "https://files.pythonhosted.org/packages/ae/ba/b44d472022f620d289d95fa830143235c0c36461c6f2437ea8d51e5481ed/coverage-7.14.1-cp312-cp312-win_arm64.whl", hash = "sha256:99cd41ff91afd94896fea3bc002706b6ae4ce95727d06e4a0f39c0a8d8bd8b1a", size = 221989, upload-time = "2026-05-26T20:39:28.242Z" }, + { url = "https://files.pythonhosted.org/packages/8a/9e/5f6d56327c62b185225d145191c607e07515294a0aa6338e58805cd4a5ac/coverage-7.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:be9f2c802dcfce3f71298303aa5dad0dce440a76c52f2f60dacd8656dab78793", size = 220044, upload-time = "2026-05-26T20:39:29.902Z" }, + { url = "https://files.pythonhosted.org/packages/75/92/e82aca356744cbbc0f77a0b623e38918c1872361963413a3bab5d0340393/coverage-7.14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6223a72fd0e4c7156353ec0f08a5f93623e1d3034d0e2683b9bb8ea674131b1d", size = 220412, upload-time = "2026-05-26T20:39:31.561Z" }, + { url = "https://files.pythonhosted.org/packages/27/c9/385bde0bf7ed0f4bf3a7ee5367060a86b5d218718cfd6fb943c0f836b34f/coverage-7.14.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7279d2110a28cebc738b6459ecda2771735a4c18465fbbd36b3288fe5ed92247", size = 251412, upload-time = "2026-05-26T20:39:33.337Z" }, + { url = "https://files.pythonhosted.org/packages/51/8c/23faf6a2343a0d17f960a4bd56c43bc7eb4cf312f774dd6ceebd82c7d8fc/coverage-7.14.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9eeb3fcbc13ba40dfbdb22d01d196a28e9cef9ed4c29b60061a1e0e823a9929d", size = 254008, upload-time = "2026-05-26T20:39:35.009Z" }, + { url = "https://files.pythonhosted.org/packages/42/06/36f4aa9ca8a815e6036156e80706a67828bb97bd826948244f6996dda957/coverage-7.14.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f0cfc27c539f07cf5c0a4cfe211d0b6cae039f8f40526dbaa71944e64b50a7b", size = 255241, upload-time = "2026-05-26T20:39:36.71Z" }, + { url = "https://files.pythonhosted.org/packages/ca/79/95266316352f90f6b1c6736bb413302edfde2453fb32422d3911642691b3/coverage-7.14.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:221c70f316241a78e77e607c227cefc8808d4e08f28d99c04f35694690e940be", size = 257373, upload-time = "2026-05-26T20:39:38.412Z" }, + { url = "https://files.pythonhosted.org/packages/e3/9c/58316d1f66c488b5fca8a0eb3e98348807813efa8a0d0833b9021be27488/coverage-7.14.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:da028256b04ec30e5e0114b6f76172938c313991f0a2d3d894271315cf5d5e43", size = 251635, upload-time = "2026-05-26T20:39:40.268Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5a/ca2398a568e16fed7bb713e84ba3603a7164fb65779abe645c565ec890d5/coverage-7.14.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76a085d7005236a767e3426148b2c407e53ad61695c562f8a81da2d373324901", size = 253373, upload-time = "2026-05-26T20:39:42.145Z" }, + { url = "https://files.pythonhosted.org/packages/6e/2c/0396562c32deaebe7be51d865b3a41e9a87d7561acafe1a28f53b07e019a/coverage-7.14.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b553d04b5e778a8e56d57eb134aff42a92718ecba45e79c4764ecfa40efd92ff", size = 251341, upload-time = "2026-05-26T20:39:43.907Z" }, + { url = "https://files.pythonhosted.org/packages/fd/8f/a94f9221184c9cae1ee115820e3798e48b6b17777a9f19e46fb9a0c8dc74/coverage-7.14.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:46f714d2fb8ae2f4f29f23ada7f1e79b759fff5a70f94a1dac23af204c3ec9e4", size = 255497, upload-time = "2026-05-26T20:39:46.166Z" }, + { url = "https://files.pythonhosted.org/packages/71/69/505d70e47db1eaebcd002c39759707621ef184cd6b1ae084d9f41293f323/coverage-7.14.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:1896f5e19ff3f0431c7ce2172adc54890fd97f86b59ced8ca1649145d9ffe35d", size = 251159, upload-time = "2026-05-26T20:39:48.03Z" }, + { url = "https://files.pythonhosted.org/packages/e0/aa/58681c383aa33a9d2ed40a02d7a22fbf780d1fa4d575396365777828198c/coverage-7.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:62fd185ef9df3c33d1c8178c5af105f762afbad96038de9a4ae100aa6297ca33", size = 252934, upload-time = "2026-05-26T20:39:49.872Z" }, + { url = "https://files.pythonhosted.org/packages/eb/fd/11c928cd6bdffc7074bb5965c173d9ebf517fb00205e1da524b98d29ef92/coverage-7.14.1-cp313-cp313-win32.whl", hash = "sha256:ab4af6352741a604c431c6072fce5bee33bf0f20dc7a56618d6bf6bb89e9810c", size = 222584, upload-time = "2026-05-26T20:39:51.68Z" }, + { url = "https://files.pythonhosted.org/packages/6f/92/fb416fc26d340dcba19518c418d6048e913186e17243982c5e435e41fa7a/coverage-7.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:7af486dabe8954d03b087f0021540897afe084f04e16ff5579e08cc46f871416", size = 223394, upload-time = "2026-05-26T20:39:53.472Z" }, + { url = "https://files.pythonhosted.org/packages/73/c6/02d56e3867972f77d5036de924643f26c056e848f00452cafb4dbc3c29b4/coverage-7.14.1-cp313-cp313-win_arm64.whl", hash = "sha256:2224f89ffd0c5605ccce1ed7a584da162bc7c55f601ab1c946bc9de31a486b42", size = 222015, upload-time = "2026-05-26T20:39:55.374Z" }, + { url = "https://files.pythonhosted.org/packages/4d/9e/fcc77914050df73f7662fa1f00902774c79c075a8388ab334074574bf77e/coverage-7.14.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:de286598cc65d2b489411174b1faec2f5a7775fb3201fd925db2a76b4030f37d", size = 220733, upload-time = "2026-05-26T20:39:57.189Z" }, + { url = "https://files.pythonhosted.org/packages/f7/67/2963cbdaf5cbadec44efa3a1e39eaa1f02df4079585f05387607a221e126/coverage-7.14.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:042c46ded7c288aeb07cf14a28b6c1e10b78fcba40171c3fa1e939377eeef0b5", size = 221086, upload-time = "2026-05-26T20:39:59.019Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c5/8701645574e11881f2f47d8930f98bc48b5d43b25eb5b4430dfc4a2f9f48/coverage-7.14.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f4ddbe407477f04c45115d1a4e5bc480f753553b534d338d4c3358b1cdd0ea52", size = 262381, upload-time = "2026-05-26T20:40:00.822Z" }, + { url = "https://files.pythonhosted.org/packages/7c/28/7a64d73598263e0c5abd5084211a8474488d31b3c552ff531c719dfcff62/coverage-7.14.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d13e6725992e2d2fd7d81d4f5241952d13740121dfd501da09201be39b2c003a", size = 264458, upload-time = "2026-05-26T20:40:02.506Z" }, + { url = "https://files.pythonhosted.org/packages/fa/d8/4969179db9f7eb4df218e69540adf829d1c835f59452513d065d15446802/coverage-7.14.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f747dc8edcfe740130f28f32f3995e955494285717e86ee25af51db2219df08a", size = 266884, upload-time = "2026-05-26T20:40:04.421Z" }, + { url = "https://files.pythonhosted.org/packages/a6/78/a45d5794dbc9bafd97afc96a4377c86c7820d78b6cf51b89bc1d4e919275/coverage-7.14.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ced2f09ef276fd58611a1ef502164ad266d2b75174e5a40cabbdb4033f9f6cf2", size = 268022, upload-time = "2026-05-26T20:40:06.298Z" }, + { url = "https://files.pythonhosted.org/packages/21/cb/4f5e354e9e3e67af96bd4e57113e6db6b22298c7168b13eec408a549903d/coverage-7.14.1-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b84800013769a78ccb9ef4659402e26d06867e337b61ec365f77ad008adea80e", size = 261631, upload-time = "2026-05-26T20:40:08.226Z" }, + { url = "https://files.pythonhosted.org/packages/ec/49/eced49af4cb996d5d8b7e94e736175c513e4facd3398507b89892b4326d8/coverage-7.14.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ea8cd6ca0ee9f616aaef3afc6882e32c2cbf18b00d96313ffd76af650574034d", size = 264443, upload-time = "2026-05-26T20:40:10.137Z" }, + { url = "https://files.pythonhosted.org/packages/f1/d8/5603a88a7c5913a6b54f6cb1a8c46f7b39cbb30f27cd3f492908da09b2d7/coverage-7.14.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:aa5e304a873fabddc11e484e9b6b738bd38bd7bed17b09aa84eecf5332e8b8bb", size = 262069, upload-time = "2026-05-26T20:40:11.999Z" }, + { url = "https://files.pythonhosted.org/packages/f0/59/2ae3cb79da554a06c8619d6c88ea19dd1e4aed4b834b6a83bb1fa243bdc5/coverage-7.14.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:5a1c5215be81035e629d5bc756650634d0bf31991038db7a0eccb90f025ce16d", size = 265780, upload-time = "2026-05-26T20:40:13.858Z" }, + { url = "https://files.pythonhosted.org/packages/af/5f/b130c1dc999031f2648bd25317fbce505ad8d5562079b4ed81e736a84967/coverage-7.14.1-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:79058c47dae6788504b5effb319961bcd72d7240551464b91d474bc0ed186d69", size = 260970, upload-time = "2026-05-26T20:40:16.142Z" }, + { url = "https://files.pythonhosted.org/packages/87/d1/ec13ccddeb48ec963bdfa72a11224bac2584bd045ba13beca82f8113e9c7/coverage-7.14.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:370c5afae3fa0658e11694a32b24c2778f6bc2d17718121f94ee185e69f26b54", size = 263157, upload-time = "2026-05-26T20:40:18.382Z" }, + { url = "https://files.pythonhosted.org/packages/cf/c2/cd91ead503045161092d3845f7bb95ea2f25131ce96d3e314dd835d91b9c/coverage-7.14.1-cp313-cp313t-win32.whl", hash = "sha256:3758dd0a7f1fa57365ef2e781df0f0731d38b6e3772259d13dae4bd8a958d4b1", size = 223259, upload-time = "2026-05-26T20:40:20.381Z" }, + { url = "https://files.pythonhosted.org/packages/71/9f/1e28d97e6bd2c76b07f38b7c02870f1371255ff6717f54eca578fcbbdd0e/coverage-7.14.1-cp313-cp313t-win_amd64.whl", hash = "sha256:6ff665fb023a77386fe11685190cee1f60a7d635994a30d9b0a061533d470fce", size = 224320, upload-time = "2026-05-26T20:40:22.316Z" }, + { url = "https://files.pythonhosted.org/packages/a9/e0/d936e908f0e1efa55e52b91e01b52f1055cef5e1ab2718493390ed8e2fb8/coverage-7.14.1-cp313-cp313t-win_arm64.whl", hash = "sha256:17a5a241e5997621a956a7f402a7433ef4221e5152809b785bec79e2323799f1", size = 222577, upload-time = "2026-05-26T20:40:24.894Z" }, + { url = "https://files.pythonhosted.org/packages/d6/34/fc2f101b151af3799a101f0550b0454aa008afdc0add677394ec4aa8ea10/coverage-7.14.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d5ed429d0b8edaac649e889b4ffcedb6c80b06629a3f93050e3dddfb99235bee", size = 220091, upload-time = "2026-05-26T20:40:27.249Z" }, + { url = "https://files.pythonhosted.org/packages/3d/a7/1ebae2ab5b961b5c79bb09fe7b3ac99edb190d8be4a8c510b2cf66f46468/coverage-7.14.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8011224a62280e50dab346960c03cf47aca1a1e09e608c0fb33fd6e0cc8e9500", size = 220421, upload-time = "2026-05-26T20:40:30.084Z" }, + { url = "https://files.pythonhosted.org/packages/5e/90/92aca9cf0acc95123c96cd1eb1f08917897a7f5dee01e15738922971ec31/coverage-7.14.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:12c42ec1e14f553c4f817e989365982e646e27211f10a0f717855b94a79c8906", size = 251466, upload-time = "2026-05-26T20:40:32.542Z" }, + { url = "https://files.pythonhosted.org/packages/26/2b/78048cbe3b999f6cbf9cc0d90abba6a88a3e0863a8c1c6cbc762f3f8802f/coverage-7.14.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:06144cd511cf2624873a035c5069cf297144f6e77a73ee3d7a55b605ec5efb42", size = 253973, upload-time = "2026-05-26T20:40:34.473Z" }, + { url = "https://files.pythonhosted.org/packages/8e/21/c2e33b29d1cfde484a19d437afc343c6cd30b08d78cbbf9f5aff14e57b2b/coverage-7.14.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a311d8e1da24be5c1ccf85cbfb06315dbaa1703d5a1eab3f6432c72b837917c8", size = 255318, upload-time = "2026-05-26T20:40:38.154Z" }, + { url = "https://files.pythonhosted.org/packages/8e/ee/aad2f108d63b769121005302f16bf66db8625c88ceaba466942e09a2607e/coverage-7.14.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c79cead5b5bc584d9c71451cb984d0e3a84e0c0937379c8efcbf27c8d661b851", size = 257633, upload-time = "2026-05-26T20:40:40.164Z" }, + { url = "https://files.pythonhosted.org/packages/c2/f8/11a2c29b4fd76d9849f81d0bb812ec0017a9396df3217214e38934a8c837/coverage-7.14.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:dcbf65f1f66a26cdd88c35cf68fb4729c5d1cd2e88added72420541dfb212034", size = 251488, upload-time = "2026-05-26T20:40:42.631Z" }, + { url = "https://files.pythonhosted.org/packages/c9/b8/9a5820de4b8ac2b71d85e3b5fb49108d7469c665f0e2ad0dd7569023e305/coverage-7.14.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fd86572566fb40189a8260446158235159bc7a82dfbc87a3b39cf4fb57fcec1c", size = 253329, upload-time = "2026-05-26T20:40:45.208Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ff/f33e4823667e27548e8fd8df44217515303f9808d0ff29817db56f87d990/coverage-7.14.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:7771b601718fdde84832c3a434ca9bbf4ae9adbc49d84198b4110700c3c77c36", size = 251291, upload-time = "2026-05-26T20:40:47.502Z" }, + { url = "https://files.pythonhosted.org/packages/68/9b/489db0ebb209054766b90a9014a45f6d26eb724c02ec21311c3733b5a644/coverage-7.14.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:39b21e212c55af06fa375e3dbf90a8a8e38792f3a910c580066d23563830ddd5", size = 255564, upload-time = "2026-05-26T20:40:49.372Z" }, + { url = "https://files.pythonhosted.org/packages/27/b5/16bc2d4c2409b23c7737edb68c83bc89e345f378050549fe1d75ac7d34d5/coverage-7.14.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f2302660e32562a532b442480121aef8aa61a5bdb20b30bf0adab29f10a5a4b4", size = 251107, upload-time = "2026-05-26T20:40:51.677Z" }, + { url = "https://files.pythonhosted.org/packages/7d/0c/2629997469a00cd069d588a41c9dc887610f2775ae89d250c4791e65272a/coverage-7.14.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:03a6f93c1ec3b7f2e77b5dbcc5573a2c21f12529a5c6bbe0f16f72303cc2fa4d", size = 252764, upload-time = "2026-05-26T20:40:54.267Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ee/f78d63c8f079e0d7211c7e2401fa17e311514534ba61bae03e4b287ce4ab/coverage-7.14.1-cp314-cp314-win32.whl", hash = "sha256:8a3ce026d73290f42f08dafecbd82c193a74df280461fbf97300fec51fd133ee", size = 222837, upload-time = "2026-05-26T20:40:56.496Z" }, + { url = "https://files.pythonhosted.org/packages/dc/b9/be539854f93a70dfbeec69117f33ec70dc42ff0b65b5b07ab8d40d04228e/coverage-7.14.1-cp314-cp314-win_amd64.whl", hash = "sha256:114c95ef29302423b87d159075805f4ab973254a2638a5d7d046c94887cc87d7", size = 223650, upload-time = "2026-05-26T20:40:58.351Z" }, + { url = "https://files.pythonhosted.org/packages/fe/9e/24e2842fef40f35ac82ba3a7719c8023d011bf3bf652d0675316a9d088a1/coverage-7.14.1-cp314-cp314-win_arm64.whl", hash = "sha256:a07891c3f4805442b31b71e84ba3cf29ed1aa9a428284e06deeb4b23e5b46343", size = 222218, upload-time = "2026-05-26T20:41:00.321Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1d/ac0a9df5fe31c1e8bdd658074905fc12844a05c1a7e3fdb8417e97c31e23/coverage-7.14.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1101a5ebb083aecb625ebb6209d4105b58f647b093cb2dc8122d7b33f743cfe1", size = 220822, upload-time = "2026-05-26T20:41:02.281Z" }, + { url = "https://files.pythonhosted.org/packages/32/cf/f964fd9aff20323f9f1a726c97135f8a76bcd87b92dad141a456a43f3c64/coverage-7.14.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:851b9e1e4e8a4608e77c79714b2e77c0970d2ed7202a05e92ae407817481887b", size = 221084, upload-time = "2026-05-26T20:41:04.593Z" }, + { url = "https://files.pythonhosted.org/packages/d8/5e/7e5ef2aba844de2b80d678619fcf0841b42e3f37f16411226f3fe4c1016f/coverage-7.14.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d5b89cdfb2ee051b71e8c3c70bd81a9eff81100f736a269136fe1a68efe00474", size = 262454, upload-time = "2026-05-26T20:41:06.641Z" }, + { url = "https://files.pythonhosted.org/packages/64/62/75809bded87015cc4935524218a2a8ed8dd1a8498bfed30a2f4f7a4b4d34/coverage-7.14.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0177614a0370f227888b4e436a7c55686d6a9f90eb1ade2b624ba685a1686e86", size = 264578, upload-time = "2026-05-26T20:41:08.556Z" }, + { url = "https://files.pythonhosted.org/packages/f3/42/d33392dc14633525012d2d504fa1a33b05538bf535f5c1d64675e5754b78/coverage-7.14.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2d69af5dea2de76fc485a83032a630523f985198b7e25be901ec60181587b01e", size = 266981, upload-time = "2026-05-26T20:41:10.824Z" }, + { url = "https://files.pythonhosted.org/packages/2a/49/0157c4428c2aca7f1e09d5565930586fd5ae36f1655f08b0daa7cf1fcae1/coverage-7.14.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:35ab22d91de736e8966b980dc355cbcdd2c6dbbcfe275f9a2991bc8a91b3df65", size = 268112, upload-time = "2026-05-26T20:41:12.966Z" }, + { url = "https://files.pythonhosted.org/packages/96/26/86b9ce71f4092b1ed325ce1421698081df1286b833400b6836912834d6e0/coverage-7.14.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:357d4e32935c36588aaba057d734fa32428c360c9fc2e4442afbf1b646beee6e", size = 261558, upload-time = "2026-05-26T20:41:15Z" }, + { url = "https://files.pythonhosted.org/packages/20/4c/c311210c5472cf5401d8422b0d7812cdd520f24417673afabda6c323faca/coverage-7.14.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:51bd64741cc6fa065abd300ede1afe5a5291ece9c31da8b24884deda48bcc3f8", size = 264447, upload-time = "2026-05-26T20:41:17.369Z" }, + { url = "https://files.pythonhosted.org/packages/fb/71/59513f8710ed3e6b0ac0a050a5b7e977bb9c9e880354863b5d00d8809256/coverage-7.14.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:9132cd363a68a4c3daa7c8704a654b1e39d3360f6f5b8ddd470608a945236c07", size = 262048, upload-time = "2026-05-26T20:41:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/84/8d/bceed32dc494f5bbf50f775cd2e78ca814953942b5ea28d3c1c3ac316f14/coverage-7.14.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:07c6290b1697b862c0478eab545eec949a0d0e4d6d03497f446d706da3b4f2de", size = 265781, upload-time = "2026-05-26T20:41:21.559Z" }, + { url = "https://files.pythonhosted.org/packages/e7/c5/9348fe40dbfd4991aaf78df2c6c3098bfb2cc834d1fd362a64b4efef855a/coverage-7.14.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:5ea0c297e27133853b4d8a3eb799bff5a2dbd9f2f41537a240d337ac9b4df890", size = 260896, upload-time = "2026-05-26T20:41:23.428Z" }, + { url = "https://files.pythonhosted.org/packages/ca/92/1ea0f03929da7cf87206b1fa24f4c8e9c158be0455481af29ec0a1f3503f/coverage-7.14.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:01b7733daad0237daa01ef80fe2dfceffc911e6a17fa7b55d14aa8214eaaaecd", size = 263214, upload-time = "2026-05-26T20:41:25.419Z" }, + { url = "https://files.pythonhosted.org/packages/f6/a9/b2493c054c0e01a643266742ab45e15744e60743f9260cd930c7142b1124/coverage-7.14.1-cp314-cp314t-win32.whl", hash = "sha256:6adc5a36984624a70bf11d7184e20fa0a49aa7c47ffab43804106a1a695ea22e", size = 223624, upload-time = "2026-05-26T20:41:27.795Z" }, + { url = "https://files.pythonhosted.org/packages/fc/bd/3e1e6a57fccd2d7c83fcdf338e93ba98eb85c6e877dd34731ac585375490/coverage-7.14.1-cp314-cp314t-win_amd64.whl", hash = "sha256:ddf799247318f34dbcd2efa8c95a8d0642674e926bb1774cf9b63dfd2a389d1c", size = 224728, upload-time = "2026-05-26T20:41:30.098Z" }, + { url = "https://files.pythonhosted.org/packages/bb/d7/31066cf1d2f0c6c797fce911bcfa01dd35642dc6da992a950256097c5860/coverage-7.14.1-cp314-cp314t-win_arm64.whl", hash = "sha256:145986fe66647eb489f18d9a997567a3fd358584c4b5a808769113abc07466af", size = 222752, upload-time = "2026-05-26T20:41:32.123Z" }, + { url = "https://files.pythonhosted.org/packages/8a/3c/1a983b9a745d7f83d53f057bcc5bf79ba6a2bbc08266b3f0c7d6fe630c9b/coverage-7.14.1-py3-none-any.whl", hash = "sha256:a252f21c27e38347e60111a3266b03827422a7d5525951aceee313aa68bab1d2", size = 211815, upload-time = "2026-05-26T20:41:34.078Z" }, ] [package.optional-dependencies] @@ -568,11 +569,11 @@ wheels = [ [[package]] name = "docutils" -version = "0.22.4" +version = "0.23" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" } +sdist = { url = "https://files.pythonhosted.org/packages/39/a4/5180d9afc57e8fca05601dd652bdff19604c218814037fe90ffc7625a50a/docutils-0.23.tar.gz", hash = "sha256:746f5060322511280a1e50eb76846ed6bf2342984b2ac04dc42caa1a8d78799e", size = 2303823, upload-time = "2026-05-27T17:41:06.934Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, + { url = "https://files.pythonhosted.org/packages/32/91/30151a39f7570f448ed84529390628a651d7f27c87d73c9b887f8189695e/docutils-0.23-py3-none-any.whl", hash = "sha256:25d013af9bf23bc1c7b2b093dff4208166c53a94786c9e447808335ef1185fea", size = 634701, upload-time = "2026-05-27T17:40:58.442Z" }, ] [[package]] @@ -746,7 +747,8 @@ dependencies = [ { name = "attrs" }, { name = "jsonschema-specifications" }, { name = "referencing" }, - { name = "rpds-py" }, + { name = "rpds-py", version = "0.30.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "rpds-py", version = "2026.5.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } wheels = [ @@ -1135,11 +1137,11 @@ wheels = [ [[package]] name = "platformdirs" -version = "4.9.6" +version = "4.10.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9f/4a/0883b8e3802965322523f0b200ecf33d31f10991d0401162f4b23c698b42/platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a", size = 29400, upload-time = "2026-04-09T00:04:10.812Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/47/e4501f49c178ae1d9f4a75073fda4204f52647993f075a9db4d14930e0c5/platformdirs-4.10.0.tar.gz", hash = "sha256:31e761a6a0ca04faf7353ea759bdba55652be214725111e5aac52dfa29d4bef7", size = 31224, upload-time = "2026-05-28T03:32:53.587Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348, upload-time = "2026-04-09T00:04:09.463Z" }, + { url = "https://files.pythonhosted.org/packages/81/e6/cd9575ac904136b3cbf7aa7ee819ef86eedb7274e46f230e94ea4342e729/platformdirs-4.10.0-py3-none-any.whl", hash = "sha256:fb516cdb12eb0d857d0cd85a7c57cea4d060bee4578d6cf5a14dfdf8cbf8784a", size = 22743, upload-time = "2026-05-28T03:32:52.175Z" }, ] [[package]] @@ -1390,15 +1392,15 @@ wheels = [ [[package]] name = "python-discovery" -version = "1.3.1" +version = "1.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/48/60/e88788207d81e46362cfbef0d4aaf4c0f49efc3c12d4c3fa3f542c34ebec/python_discovery-1.3.1.tar.gz", hash = "sha256:62f6db28064c9613e7ca76cb3f00c38c839a07c31c00dfe7ed0986493d2150a6", size = 68011, upload-time = "2026-05-12T20:53:36.336Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a6/12/38c1a0b1e64806780c9563e3fc9f6e472251839662587cfbe9bfaf2ae10a/python_discovery-1.4.0.tar.gz", hash = "sha256:eb8bc7daad3c226c147e45bb4e970a1feb1bf4048ee178e6db59e197b8010ce3", size = 68455, upload-time = "2026-05-28T01:15:37.639Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/6f/a05a317a66fee0aad270011461f1a63a453ed12471249f172f7d2e2bc7b4/python_discovery-1.3.1-py3-none-any.whl", hash = "sha256:ed188687ebb3b82c01a17cd5ac62fc94d9f6487a7f1a0f9dfe89753fec91039c", size = 33185, upload-time = "2026-05-12T20:53:34.969Z" }, + { url = "https://files.pythonhosted.org/packages/c8/8d/3d316429f65029532bb1e28ff77b797d86b5ac3915bb44ca4e19aa283d43/python_discovery-1.4.0-py3-none-any.whl", hash = "sha256:26ed78d703e234879a66244c7d4114563fb13ec5cd30a2d1357e5fb4850782da", size = 33217, upload-time = "2026-05-28T01:15:36.573Z" }, ] [[package]] @@ -1534,7 +1536,8 @@ version = "0.37.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, - { name = "rpds-py" }, + { name = "rpds-py", version = "0.30.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "rpds-py", version = "2026.5.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } @@ -1716,6 +1719,9 @@ wheels = [ name = "rpds-py" version = "0.30.0" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/06/0c/0c411a0ec64ccb6d104dcabe0e713e05e153a9a2c3c2bd2b32ce412166fe/rpds_py-0.30.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:679ae98e00c0e8d68a7fda324e16b90fd5260945b45d3b824c892cec9eea3288", size = 370490, upload-time = "2025-11-30T20:21:33.256Z" }, @@ -1834,6 +1840,147 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, ] +[[package]] +name = "rpds-py" +version = "2026.5.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.15'", + "python_full_version >= '3.11' and python_full_version < '3.15'", +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/43/25a8dcd3feedd735039a8f0b5b7e3b118232b5eae288c4fd9ab200d41094/rpds_py-2026.5.1.tar.gz", hash = "sha256:07b24fea40541e28570e5b795a4a38fbdcd12550c06bd0748005ecc8116ca256", size = 64459, upload-time = "2026-05-28T12:02:13.232Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4f/a0/acf8b6fc20bfdcd3a45bd3f57680fb198e157b7e997b9123b10763798bd2/rpds_py-2026.5.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3397a5ed7174dc2786bb214030232fc36fe8e5584fec43a9952cc542b1a12036", size = 355609, upload-time = "2026-05-28T11:58:50.78Z" }, + { url = "https://files.pythonhosted.org/packages/b6/95/f8203fd997484b1690a6869cd0e503b6c3c6be55b0ecc36d1a491fe742f0/rpds_py-2026.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:99ab6ba7bfa2cb0f96a04e3652355bf04e3f51aceb1e943b8541dab7ba4828cc", size = 348460, upload-time = "2026-05-28T11:58:52.374Z" }, + { url = "https://files.pythonhosted.org/packages/33/8c/b47326ad2f0be545a5e5c1a55937a12afaea7d392ba2837bb9680f57e6c9/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0efbe45632665e53e3db8fe1e5692db58fc5cb9bab4459d570b83efefe11164", size = 381031, upload-time = "2026-05-28T11:58:53.775Z" }, + { url = "https://files.pythonhosted.org/packages/22/0b/e83bbd97ffac6f6389b605cd4e1c8ac5761dc7e977769c9255d8c5adb7bd/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:01d17b29c0c23d82b1f4751147ec49cf451f1fc2554eb9ef5f957e55d2656ead", size = 387121, upload-time = "2026-05-28T11:58:55.243Z" }, + { url = "https://files.pythonhosted.org/packages/fd/0e/d285d1bc8864245919c61e1ca82263e4a66d337759c3a4cef72766ff9afc/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7559f72b94ae52659086c595dfa017cde03155f7832071d30959049052cb3ece", size = 501026, upload-time = "2026-05-28T11:58:56.788Z" }, + { url = "https://files.pythonhosted.org/packages/86/06/ccb2109a1e543437b5e43816f2b43b9554cc6783145528a4e3711e05c011/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e25b7088f9ccbfc0dfcaa52bf969300ca229e10ecf758974ebcbb080a4b37bb", size = 391865, upload-time = "2026-05-28T11:58:58.298Z" }, + { url = "https://files.pythonhosted.org/packages/3d/33/237173db1cfef10105b3839a24de00eb8d2a523711add4632447cdf0aedd/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:613fc4ee9eaef26dc5840666214dd6fbcebcf32f46e76f4abc473059f4e13dda", size = 378012, upload-time = "2026-05-28T11:58:59.589Z" }, + { url = "https://files.pythonhosted.org/packages/97/64/1eae54e34d5161f9969295e80bd6b62a55f2b6ac5f2a5b60d02c2140e758/rpds_py-2026.5.1-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:85264a90ff4c05c1568dd65f5921c837614b67c60358fb4c17df3b7f2e90690a", size = 391111, upload-time = "2026-05-28T11:59:01.104Z" }, + { url = "https://files.pythonhosted.org/packages/d8/34/5bb334a5a0f65d77869217c4654f34c78a7d11b93938a3c076a2edeafc52/rpds_py-2026.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe71bca7d547acb17027c7fd1624ff8aae623499c498d3e7011182c4de5c25e0", size = 409225, upload-time = "2026-05-28T11:59:02.433Z" }, + { url = "https://files.pythonhosted.org/packages/16/0f/007ec21283b5b040b4ec3bd95e0402591e22bfa7d5c93dfe01c465c2d2d7/rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05fa4f41f37ec97c9c260441a940450a192f78d774d2b097eee1379f1e1246a", size = 556487, upload-time = "2026-05-28T11:59:04.012Z" }, + { url = "https://files.pythonhosted.org/packages/ff/10/5437c94508169b6b22d8418fef7a66e9ffb5f3b9e9c94460f2eedafe06ff/rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df1d2a1996755b24b9ecee92cb4d36c28f86f464a6a173349c26bab41e94b8c2", size = 620798, upload-time = "2026-05-28T11:59:05.485Z" }, + { url = "https://files.pythonhosted.org/packages/e0/d5/9937dce4d6bda74157b954e7d1460db05a22f5929dccfeeba1ed27a93df0/rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8895840ac4809e5f60c88fd07617cd71326e73d6e5a8aa783c5c0f7c24985de2", size = 584053, upload-time = "2026-05-28T11:59:06.837Z" }, + { url = "https://files.pythonhosted.org/packages/6c/31/750617dd0ae1752471bf43f9e41d263398fae7cde7849d23b8574a70e617/rpds_py-2026.5.1-cp311-cp311-win32.whl", hash = "sha256:3684a59b158a7683aaeb8e25352e9a9dd2122cec78f2d8530266e4f91b4c7b3f", size = 214390, upload-time = "2026-05-28T11:59:08.402Z" }, + { url = "https://files.pythonhosted.org/packages/3c/bb/3dcab0e1d9516303f2eb672a5d6f62eca5a69e2886301e9c8c54b520c39b/rpds_py-2026.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:7bd530e6a530bb3ea892f194fafa455f3516ac25ecf7143fd33c09be62b0470a", size = 231097, upload-time = "2026-05-28T11:59:09.786Z" }, + { url = "https://files.pythonhosted.org/packages/49/d6/c6bbf5cb1cf12b9732df8074b57f6ef8341ba884c95d40632ae8bddb44e4/rpds_py-2026.5.1-cp311-cp311-win_arm64.whl", hash = "sha256:0a5ae4dbe43c1076983b72616496919872ae7bbe7a1e21cc48336bc3154d130b", size = 226361, upload-time = "2026-05-28T11:59:11.079Z" }, + { url = "https://files.pythonhosted.org/packages/d4/e7/a78582dc57caa592dcc7d4fb69b61390561e908eb3d2f5df5928a8e354c0/rpds_py-2026.5.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3abe24a66e57adcfa645d718063a5fa5103ecc71ddbf26d78af8f9368018ff1d", size = 353040, upload-time = "2026-05-28T11:59:12.531Z" }, + { url = "https://files.pythonhosted.org/packages/a3/43/35e3f136343aef451e545ce8c38d36c2f93c0ed88703db8b64ba2b205c68/rpds_py-2026.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58b1d94308ddf0b1982f61f2eb54bf92997c9ece8a8093ef014250f4a517906c", size = 345775, upload-time = "2026-05-28T11:59:13.827Z" }, + { url = "https://files.pythonhosted.org/packages/20/e1/0f2160c5982d3157734d5cb3ed63d8b2d583a73c9864f77b666449f32cf8/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa92420128dadce7f54bd73ba1825a273e9268fe9e35dbf7e6362890efa4e08", size = 376329, upload-time = "2026-05-28T11:59:15.271Z" }, + { url = "https://files.pythonhosted.org/packages/d0/11/ee0ba42aff83bf4effdbc576673c6be64c5e173978c3f6d537e94482f77d/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca653c6546386227cd9800d1bef6a348099acf8db4250341da6d90f663d6dfcb", size = 383539, upload-time = "2026-05-28T11:59:16.665Z" }, + { url = "https://files.pythonhosted.org/packages/11/df/d94aa6a499d4ac40afe2d7620f2c597fd3c0f182e854ad7cf3f596a81cb6/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66c93681c4729e4e3ecba31b8179fae083ff3118841672835140338b4b9867c1", size = 494674, upload-time = "2026-05-28T11:59:17.991Z" }, + { url = "https://files.pythonhosted.org/packages/1f/75/33d30f43bb2f458de11979486a591b1bf6e5651765ed1704c6197c2dc773/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40ff257542e04796880e011e15cd4dc21c2599975df2aaa8f2c8495ca574e1a5", size = 389268, upload-time = "2026-05-28T11:59:19.434Z" }, + { url = "https://files.pythonhosted.org/packages/f4/1e/2c9096fc19d5fd084b0184ca2b651e659aa0a37e6fdbecf6ece47f147fe1/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6825cc329b290e93c5f6a9be2393118a763f6ccf6abd83704e0c102ca583644", size = 376280, upload-time = "2026-05-28T11:59:21Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e5/61ec9f8be8211ea7f48448195549e4aaf02004083475493b0e137702ecb2/rpds_py-2026.5.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:de42116e69cb53b911cc34aee5ab98f36c597b822545045d49e938818b99e5e4", size = 387233, upload-time = "2026-05-28T11:59:22.454Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ca/bcec1005c4f4a234f92a29078631fee49206c7265ccae966f18fd332e80e/rpds_py-2026.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0f920015df2a504bebaba6d4c31ccf3fcf942f92655c086da30b671aad19aa6", size = 405009, upload-time = "2026-05-28T11:59:23.845Z" }, + { url = "https://files.pythonhosted.org/packages/72/e6/4d5718c5cf26c522dc7c9999e238da1e77380b81d0c5d1df11e271ddfeb1/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0408a24e44feb919423dc6d9da677cb5cddb894d2ca9e763967d156d9c60fab4", size = 553113, upload-time = "2026-05-28T11:59:25.184Z" }, + { url = "https://files.pythonhosted.org/packages/d4/25/2ee807bdb3e1f0b7eddf7782acd5665a8b5205a331a7d7244a52c4812fd9/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cea68bcd53467561ae2f96a6bdad1544299ba97b5b0ddcd5ac3d376e5c781c24", size = 618838, upload-time = "2026-05-28T11:59:26.749Z" }, + { url = "https://files.pythonhosted.org/packages/6a/c1/7d4c26f167f8c41501cc073d30ee22082b16ce358cf5b00ec97cbc7804ea/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4be8b1d2a705cc37d08256004e1d07de143fa0075c8e85a3df020b776f62b732", size = 582436, upload-time = "2026-05-28T11:59:28.11Z" }, + { url = "https://files.pythonhosted.org/packages/04/1d/9d12b0a337bab46f4769f8857f4007e3b2d639e14f9a44a0efe157696e64/rpds_py-2026.5.1-cp312-cp312-win32.whl", hash = "sha256:6736718bd4fc49cbcb538ba30516fdbef161522acefb739657d48b97bd864fed", size = 212734, upload-time = "2026-05-28T11:59:29.689Z" }, + { url = "https://files.pythonhosted.org/packages/c5/93/e4116f2de7f56bc7406a76033dc501811ddeb22b7f056b92d632871ebb0c/rpds_py-2026.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:0a7d1eec967df0e9b22614a5e177622e0c89611d03727fa0cb48e45028907870", size = 229045, upload-time = "2026-05-28T11:59:31.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/53/6c3419d85eb2ec5938a37627c585b42d76a63bb731d6e42ed4b079ebf486/rpds_py-2026.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:1841d067089e117142d79b98aa0df2f08b52f2ecc1819dd2700636c0db74a473", size = 223967, upload-time = "2026-05-28T11:59:32.318Z" }, + { url = "https://files.pythonhosted.org/packages/6c/32/14c961ad295f490eb0849ada8b79683e93a59b9de3afdd983eaf55fa6867/rpds_py-2026.5.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:efef4ac29c6ff495531eb17ee705b62841ecaa291b7c7077e848ea03e237164d", size = 352787, upload-time = "2026-05-28T11:59:33.655Z" }, + { url = "https://files.pythonhosted.org/packages/ca/bb/d1b85117967c11191441a7274ae616c65d93901d082c588f89a50a8da5ae/rpds_py-2026.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c39f5b67a8a2e67179ada2a954227d670fe65fa9098457f698f56ddf248709b3", size = 345179, upload-time = "2026-05-28T11:59:35Z" }, + { url = "https://files.pythonhosted.org/packages/7c/46/d84105f062e626a1b233f863907288a4708c2d833b8b4c6fb2764bc080c0/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5c30f3f04eef4fbd362226a6f31d7c8895ca4fbb6e0b790f6890a98d8da8559", size = 376173, upload-time = "2026-05-28T11:59:36.43Z" }, + { url = "https://files.pythonhosted.org/packages/e2/ae/469d7959ce5b1201e1de135dc735b86db3b35dd0d1734f6a44246d5f061c/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:277f6c82f0580848796c7ecc8a7173aa3bfb928e4ff831261c2f60a81dc270db", size = 383162, upload-time = "2026-05-28T11:59:37.995Z" }, + { url = "https://files.pythonhosted.org/packages/dc/a2/57853d31a1116a561aa072794602ad3f6341e18d70a8523f1bd5b9fc1e5a/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63c2c4c213f1a4e3f3de28ecab029dbdee976324e729c0d7a55211be72576b02", size = 495093, upload-time = "2026-05-28T11:59:39.453Z" }, + { url = "https://files.pythonhosted.org/packages/99/63/3a8eabcad9314b7daf5c65f451d2c33d989235cd8a5762186cf2c3f5a4f8/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3350ec808fb538fe71a1f94dfaa0e29c598dfad805ce49f0caec5ae3183c652b", size = 389829, upload-time = "2026-05-28T11:59:40.896Z" }, + { url = "https://files.pythonhosted.org/packages/4b/25/05678d97fc25e2622df14dc530fb82023174ecfff6733991ed0d78f167bd/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1b964e3ab599e718dc46c018d104b1ebc007cbc6567d827c94a687fca56d77e", size = 374786, upload-time = "2026-05-28T11:59:42.626Z" }, + { url = "https://files.pythonhosted.org/packages/88/d1/8c90b6431e80a3b91b284a5c7c8c0c4f9c006444d90477a740d6e0f9c694/rpds_py-2026.5.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:19cb09fab7b7fc96b2a6e28f2e34b72a3705ff27b37edb77455316e5d3f3dc9b", size = 386920, upload-time = "2026-05-28T11:59:44.124Z" }, + { url = "https://files.pythonhosted.org/packages/ff/99/4638f672ab356682d633ee0da9255f5b67ce6efd0b85eb94ad3e255e65a5/rpds_py-2026.5.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abe76bcdba31e576cb83eeb8797aa0d882b738fef6dc65d0601fc753806a5b46", size = 405059, upload-time = "2026-05-28T11:59:47.177Z" }, + { url = "https://files.pythonhosted.org/packages/66/3f/3546524b6eb4cc2e1f363a3d638fa52f6c24faae3500c25fb488b02f1740/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8bff7073db3899158fff55ebf57b113a67030af26f80a18978f9f0aa60250ddf", size = 553030, upload-time = "2026-05-28T11:59:48.603Z" }, + { url = "https://files.pythonhosted.org/packages/c6/c3/7b3388c796fcf471bd17194242d4dc1a7608567c0fa422bcc1c5e79f9c1e/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8ba264fa49be666cd9cc56bf34ec7002fb3d27a4aee5bcb4d43d0d18feb1bb6f", size = 618975, upload-time = "2026-05-28T11:59:50.314Z" }, + { url = "https://files.pythonhosted.org/packages/61/1e/a3cb07f2795075d1d88efddae2f541359fde5f08c81ee114c29c2949c90a/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4860b603ddda0475a8885499b3729e90229d480105b42651962a5397d995fa89", size = 581178, upload-time = "2026-05-28T11:59:51.673Z" }, + { url = "https://files.pythonhosted.org/packages/a1/74/e758c03a5ef46f04c37f2651a2893db846d569ba8a7bca469d4b58939bcd/rpds_py-2026.5.1-cp313-cp313-win32.whl", hash = "sha256:7944270ae71383f6e2657dd7d5ce4eeb4ac2d0059a6738f0510583d462ab4842", size = 212481, upload-time = "2026-05-28T11:59:53.148Z" }, + { url = "https://files.pythonhosted.org/packages/70/ec/a2aca432db9c7359b40fa393eeeaa0d166c2f70175be956e75fa24197c44/rpds_py-2026.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:88647f43a73c4e01be19b04ceef0c8d3a1958153604d13c773becd8016f2a0cf", size = 228519, upload-time = "2026-05-28T11:59:54.505Z" }, + { url = "https://files.pythonhosted.org/packages/29/60/a73bfdd45b096574556acf303bbd9fa9eed36ca8a818b514e2a5d5fe2b9d/rpds_py-2026.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:453895624ecf7db7063b1004e44037522bbaef9ff6a945e59bc71662d7a03abd", size = 223446, upload-time = "2026-05-28T11:59:56.081Z" }, + { url = "https://files.pythonhosted.org/packages/18/e2/408105fd611823f00882aea810f3989a30d26b1bab8b6beb20f98c724e0e/rpds_py-2026.5.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:b4e4bc98639ec915f512fde3aa7a95e0041d95d9c3cc86eea841fa63cb1e8600", size = 355287, upload-time = "2026-05-28T11:59:57.448Z" }, + { url = "https://files.pythonhosted.org/packages/8d/58/5c4a43436843c90d0f6d19f82c200c80e3843ca9fa07b237623327f6d384/rpds_py-2026.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cacedb7a6e167680acba45ad5716e89067d225dc80da0d7040cae8c81d4572fa", size = 347033, upload-time = "2026-05-28T11:59:58.881Z" }, + { url = "https://files.pythonhosted.org/packages/fb/c2/1a71acdacaf4e259b10278fb87b039ded3cf80041bcd89dd8a3ea702ded6/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68700371c5d7ae1412862ddfa719090925c93ecf351c566d66f09d04b136ea00", size = 376891, upload-time = "2026-05-28T12:00:00.516Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c8/535f3d9b65addd8e28aa87b83c6e526799c3717a88273db8ea795beeef7a/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:296c799becfa849c779c8725494fe9ed94959ed886787df4364b058465bad7f0", size = 385646, upload-time = "2026-05-28T12:00:02.394Z" }, + { url = "https://files.pythonhosted.org/packages/1c/91/dc033f313345c354ade914dbe73cdb90b615a4409ea02430d5356794f3d8/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3858b908218ee108d0bbfb2095ccc237648053c9bf98affad7cb079acaf1d97", size = 498830, upload-time = "2026-05-28T12:00:04.189Z" }, + { url = "https://files.pythonhosted.org/packages/27/fc/90fcbea459dbb8ddc18a2e0fd1de9412b48bc84ffff2db771cf714bacfd6/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4fb8d2e7cb2f850b169806d61d1b991738acec96500a75c30f49caf064ce7cef", size = 392830, upload-time = "2026-05-28T12:00:05.797Z" }, + { url = "https://files.pythonhosted.org/packages/b2/1d/46cd11a228c9750684a798d98f878be6f614aa762438da7378f035e79e35/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b74c10ed6a8f190f4287f53bcfea348b92a84a9c9f70d30183d1e6172d580d", size = 379613, upload-time = "2026-05-28T12:00:07.433Z" }, + { url = "https://files.pythonhosted.org/packages/24/4a/d9b0c6af3a1de03eb93741bbe8be2bdce84d8fda8224f3005451d86df389/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:b9a6528956191c48c52294a592dbd4a8386d7048bdb25c0efcb6b966466c6d83", size = 388183, upload-time = "2026-05-28T12:00:09.227Z" }, + { url = "https://files.pythonhosted.org/packages/c5/b4/db7aaabdda6d020afc87d981bcc2f57a434c7dec60ecfc2ab3dd50b20351/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:af03e34e860047bc7a352b842856fcf78798fbb81132cc98bd2f907ab4eb9cd2", size = 408578, upload-time = "2026-05-28T12:00:10.779Z" }, + { url = "https://files.pythonhosted.org/packages/08/d6/070f6a41cbb343e2ac4171859bf3f3623e0ab002f72619d6d505313ec2de/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fea6e836d10abbe191d557d33bd58bd5987725fe63aa1eefe557d230209855bd", size = 553573, upload-time = "2026-05-28T12:00:12.443Z" }, + { url = "https://files.pythonhosted.org/packages/75/ab/1a71ea3589c4345dac0a0518f0e6a031cb42689277851b683c46d27463a5/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:fc0c0f878ea770a0a8a462456c5ad36fc9fe6358e6b76fdadc7f17575e0b8bf1", size = 620861, upload-time = "2026-05-28T12:00:14.09Z" }, + { url = "https://files.pythonhosted.org/packages/8a/22/9bf80a56069c0c443fcfefac639a86a744550a2898817a6dfd3e26654924/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e0b360f316d966b048b085857630b3cc51f3db2f07b06f440eac8f695374d1e3", size = 585633, upload-time = "2026-05-28T12:00:15.66Z" }, + { url = "https://files.pythonhosted.org/packages/da/68/3b2c0a75c9e04125696f84ebdbbf304acf5a40b58ba4481cdb98a922c3ba/rpds_py-2026.5.1-cp313-cp313t-win32.whl", hash = "sha256:a2999883eedf72fdfb7520b92c7d4ec2572a71ff40239377aa604cc529eecafc", size = 210074, upload-time = "2026-05-28T12:00:17.291Z" }, + { url = "https://files.pythonhosted.org/packages/e7/8b/609157d5a25d37d4f29f92840ba531f416907c34ae5c5739dd21fc2bef98/rpds_py-2026.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e07be2a9d7122bd6e82dea89814ef8dc893feb1aae97fec1630f3263bbb30e55", size = 228635, upload-time = "2026-05-28T12:00:18.73Z" }, + { url = "https://files.pythonhosted.org/packages/d4/6f/19c1918a4b590d8de87e712e4abe4b3875771eff60216fb6153cf6665c68/rpds_py-2026.5.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:1f2c391c3059798093b65df23aca2cac150460ae9c630d99dec83d703d9485b9", size = 349756, upload-time = "2026-05-28T12:00:20.217Z" }, + { url = "https://files.pythonhosted.org/packages/e5/60/a06fe7da34eca79dacbf958a2ba0c6eea85bc2b29de20080bf40f72f66fa/rpds_py-2026.5.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:413b424f7c4ee65ab5e5be91f5731be0f8b41a1ee2b12dfe810d716312e95a78", size = 343831, upload-time = "2026-05-28T12:00:21.711Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ec/b2333b97b90e2a6ef6ca8ad386ee284968e74bcfe113b3f1a8d9036429a9/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c595a1d9255dce0599e13130d1440ab2506654f2b50294226ee06402f8fef63", size = 375127, upload-time = "2026-05-28T12:00:23.326Z" }, + { url = "https://files.pythonhosted.org/packages/14/7f/e00aae54067f2b488c4637961d5f58204d470795fc791085fa3f15060d2e/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c27c5f6102eac8c03e7595a00827a53b271ba40a53b59ff8709170e0855ea4a", size = 379034, upload-time = "2026-05-28T12:00:24.89Z" }, + { url = "https://files.pythonhosted.org/packages/be/cc/423999bbb8ae8dc93c77fc1d5e984ade5eb89d237d3bb884ccfa72ae2890/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c7fcf61d44cacecaf3aea542b0e053db77972a4573e7ceda16fb2b399161195", size = 490823, upload-time = "2026-05-28T12:00:26.676Z" }, + { url = "https://files.pythonhosted.org/packages/0f/aa/c671bf660f12e68d3c52ff86c7066ed1372df5a0f4f2ff584e419b8207e7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c817a189d4ee14290420e5ff051e4dd6baa13f3edf84685071dee07a6d538ee", size = 388144, upload-time = "2026-05-28T12:00:28.577Z" }, + { url = "https://files.pythonhosted.org/packages/19/c8/d63bb75b68afe77b229e3021c6031bcaf01da5db5b0e69d0d10f9ba679a7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21846aac0ed2e0589f38c12dc44e77bb64e494b771eadbcf169cba00566ba7ba", size = 371959, upload-time = "2026-05-28T12:00:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/82/35/c51122014d8274ff37dc606d60049c3db7d83da02b5b282511e5a906a9a6/rpds_py-2026.5.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b317c87a13f769a4e787819bd508aaa5d69aa09b0880de9af6d3a8a54571cdec", size = 383558, upload-time = "2026-05-28T12:00:31.764Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f9/2790cb99c136a5363acdeacf5c27c56f3de0d4118a1f48fca83404c99c89/rpds_py-2026.5.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce87129d9f2c14fa6c4a8601fb80eb4488c80d38a20cd13758ef11123e14995d", size = 402789, upload-time = "2026-05-28T12:00:33.247Z" }, + { url = "https://files.pythonhosted.org/packages/e5/1b/e4fb584f8c75d35c38150ff6a332cda949e6f97acba1f4fd123b14ab56fe/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9cdddb6c1207d284d94fd1530adf57fbd797fe7c4b8704ba85f49414f2557e7d", size = 551405, upload-time = "2026-05-28T12:00:34.819Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f7/a6731b4216cb3793ea1af5391da240f5683dacc0d13e034fe5fc3503f240/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4e237e139f94d3c036fd28eb9f564c99055476ff4ff05cd42be55ce349b5aa02", size = 616975, upload-time = "2026-05-28T12:00:36.268Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/2e051a81d95d8e63f4b35a1c463a87e8766bc3d083c067c5dfb6bf220747/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ed0954b524873214369184a9c82b0eaa45a3fbb9a798cd95b17e0d98499e7ea0", size = 578701, upload-time = "2026-05-28T12:00:37.82Z" }, + { url = "https://files.pythonhosted.org/packages/65/56/b5f6fdb2083e32bca8a8993d89e70db114b4756c9e2c38421328126689d2/rpds_py-2026.5.1-cp314-cp314-win32.whl", hash = "sha256:2d88621d6a7d4dfa633d21abe90f280bb205274e16b1d1e61c6ad4640b2453b7", size = 209806, upload-time = "2026-05-28T12:00:39.492Z" }, + { url = "https://files.pythonhosted.org/packages/fb/80/65a5aa96c155e611d1ed844e4e1f57f3e36b021f396d9f8585d756e6b90d/rpds_py-2026.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:cef8ac28d26f4dda3533060c20fbf80a325458fa9fd23ea72a73cdfa8e978838", size = 225985, upload-time = "2026-05-28T12:00:40.94Z" }, + { url = "https://files.pythonhosted.org/packages/27/7c/ad185212e87b05f196daef92bc5f3caf07298eb47c295b5585c3dd3093ac/rpds_py-2026.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:eaaea962c68cdc68d4a533ba985ab8e9484277910bbfaa2ab3ef7732667bfed8", size = 221219, upload-time = "2026-05-28T12:00:43.15Z" }, + { url = "https://files.pythonhosted.org/packages/23/58/e14ae18759020334646b031e708ab4158d653a938822bfb7b95ef2e93aa3/rpds_py-2026.5.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:21942f52dbbd5f8758bf021213d28bd45c39e873e65e2407faf5f1846f5761ad", size = 352148, upload-time = "2026-05-28T12:00:44.638Z" }, + { url = "https://files.pythonhosted.org/packages/31/9b/5f4a1e2f960bca3ac5d052b139dd31eed97b259f9d909173821760d542e8/rpds_py-2026.5.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f414556f6e3958300ff941e40c9f97e3dc9774ddd1b3434c475d73dd354bbed3", size = 345196, upload-time = "2026-05-28T12:00:46.14Z" }, + { url = "https://files.pythonhosted.org/packages/1a/71/1d9574d6a2fa20ab60eaa55c7467f5aa20cbc770f341a05f09c0876f59e2/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef1013a8625c74043210190b246f5b1551e09757c1f356c6e4160ef96c5bc081", size = 374981, upload-time = "2026-05-28T12:00:47.531Z" }, + { url = "https://files.pythonhosted.org/packages/0c/9a/37e99f4915a80aa71670263c1267f7ae0af95f53a3f61e6c3bdc016d4515/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cc68e231a77a5f0d774ae278a1f8e55c0456501820847c1e4efb3829f3441df6", size = 379961, upload-time = "2026-05-28T12:00:49.216Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ff/6e73f74b89d2e0715e0fc86b7dde893f9a61ae2f9b256ff3bdfe41ac4e94/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9baffb505aff33acc69b422a19f77806680f3c8632227d79f48de8a810d1c2c5", size = 495965, upload-time = "2026-05-28T12:00:51.111Z" }, + { url = "https://files.pythonhosted.org/packages/ea/e0/425faba25f59d74d4638b267f7c7a80e8649d2ef4db10a19b0c4a71e6e6f/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8d2f912928d426e8cfa396f7f3f8d29a59e6689c86dcca3c420730c1096322b", size = 389526, upload-time = "2026-05-28T12:00:52.77Z" }, + { url = "https://files.pythonhosted.org/packages/c6/76/7a41960e3fddae47fab43a28684d5da981401dffd88253de0944148654cb/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90f628283be835db980c941767d41c9a27b5239e54ba0a9c1335247e82406964", size = 376190, upload-time = "2026-05-28T12:00:54.215Z" }, + { url = "https://files.pythonhosted.org/packages/27/60/5f38dc70824fc6951b51d35377e577a3a3a4c81a6769cc5a2de25ebe0ad1/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:1ebb2f0ab7e16132995a72de805170e0203df0c3dd22e1ef1cd1fdd90bd7a131", size = 383921, upload-time = "2026-05-28T12:00:55.673Z" }, + { url = "https://files.pythonhosted.org/packages/60/1a/d60a38caa1505f4b9483c3fbbde12c94e1079154f4f401a6da96f7e77621/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f3df3d16ded76f1f8c9cdebd0e1ea55fdf4c23b812de189814da7cf229c22a81", size = 404766, upload-time = "2026-05-28T12:00:57.518Z" }, + { url = "https://files.pythonhosted.org/packages/87/ff/602fd3f174d6425f0bce05ad0dfbec0e96b38d0f7d08a79af5aa20083885/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9af8905b8f854990e40d5206aa5ac58d9b0fe0b7f351ff2bb086c20f6c8c6a47", size = 551343, upload-time = "2026-05-28T12:00:58.978Z" }, + { url = "https://files.pythonhosted.org/packages/b8/c1/1be13327acdbead3eca1fde03b6a34dbb011f1e864e217f0d32cc1779a7f/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:036a36a87fb1cd3b214d11c4b3c4f7d2ddad933625dca1c900b56a057c07740a", size = 618502, upload-time = "2026-05-28T12:01:00.656Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d7/afb49b49d7f2be8b7ba1a9f0977fa5168003437b93086726f066544e8351/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ae3853454fe9ef283a03c96c2d835d39e84b14643a9d62c82ef0fb87d702ca", size = 581916, upload-time = "2026-05-28T12:01:02.22Z" }, + { url = "https://files.pythonhosted.org/packages/25/d1/dbef8c1f8a10f07beb62b5f054e20099fd9924b3ec001b8f0b6ac7813a85/rpds_py-2026.5.1-cp314-cp314t-win32.whl", hash = "sha256:6c3d771a46ec18b12af06ce36243a9a80b07a5d0515236332d90863ca8bb326a", size = 207855, upload-time = "2026-05-28T12:01:03.821Z" }, + { url = "https://files.pythonhosted.org/packages/2a/72/bfa4e61ab8e7dc1c8adf397e05e6cbdd4239357bd72b248d3de662f23915/rpds_py-2026.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c93c629be4636cf54337bd5f06c104d55e42ced54d681f6fe21ae510a65116f6", size = 225422, upload-time = "2026-05-28T12:01:05.194Z" }, + { url = "https://files.pythonhosted.org/packages/27/3a/7b5da92b640f67b6717ccafc83cdd06bfa7ff2395c3685c68922bb54d703/rpds_py-2026.5.1-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:3574b55c604b8f75dacb007136508bbc0db406e626301778096a133327e7f2fb", size = 349576, upload-time = "2026-05-28T12:01:06.722Z" }, + { url = "https://files.pythonhosted.org/packages/d7/8a/2aafd7ad355a1bd48ca76e2262b74b15e6432b5a1efe150efd4d779cd55d/rpds_py-2026.5.1-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:94068eb3ae6d43f5a786b7db96a406a34e6d5c24489feef32fd6e8946ea7b291", size = 343640, upload-time = "2026-05-28T12:01:08.441Z" }, + { url = "https://files.pythonhosted.org/packages/f7/7d/6c9523c1abbe840a1b7fba3c516d48e1d3487cc80fea4366c4071cf56784/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a5b10e8ce894825f380a8f1b6444cf73c294dfea62afbb2d13e3a9e630cec1", size = 375322, upload-time = "2026-05-28T12:01:09.934Z" }, + { url = "https://files.pythonhosted.org/packages/5a/5d/0b7b03fb1dc509321f01de3149784ab773e34c8573022029af8076afcb9c/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fc09f82e63d4bcd58149572f857a431bae851dc747e313c3b5bdf7abb907fda8", size = 379066, upload-time = "2026-05-28T12:01:11.48Z" }, + { url = "https://files.pythonhosted.org/packages/d7/e2/8ef6012999ebf1cb1c22f876d9ce5e63d960fd4631d2af3202d3f480aa25/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e10464d17df3b582745c25cec695cb9558bca2cb6ddb631aee1787fc72c767b2", size = 494586, upload-time = "2026-05-28T12:01:13.051Z" }, + { url = "https://files.pythonhosted.org/packages/80/af/1eeb029bec67582c226b7809172207cd005073af4ebd906e65ff494f4983/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba05adbf15d994c38ec0b7ab32e858e5110c21e9009a00a86545fd220f84e038", size = 388415, upload-time = "2026-05-28T12:01:14.631Z" }, + { url = "https://files.pythonhosted.org/packages/18/23/ffbe10711c4d766c1cab0557d6906c074f795814863c67b351355d29354a/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77c004fdc7b891967106f78ddfd7b076bfe6813c6139c6fff6aed3bcaa960b26", size = 372427, upload-time = "2026-05-28T12:01:16.153Z" }, + { url = "https://files.pythonhosted.org/packages/bd/3a/30ba4a6ad457e5b070c18d742a33fb77d8d922b565cc881f8a5313d63bfe/rpds_py-2026.5.1-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:83bcf894486c9d78dd290d3c0124ff6dd8875d3025e2090a8ec49fcc37c55fdd", size = 383615, upload-time = "2026-05-28T12:01:17.809Z" }, + { url = "https://files.pythonhosted.org/packages/d3/69/62e242b53ce39c0814bd24e1a6e6eba6c92be716277745f317f9540a2e7b/rpds_py-2026.5.1-cp315-cp315-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3df104083952a0e0c6f10de33e440eabe98fb6317d23e1a58c68f6df08d01b9", size = 402786, upload-time = "2026-05-28T12:01:19.419Z" }, + { url = "https://files.pythonhosted.org/packages/38/c1/a770b9c186928a1ed0f7e6d7ae50e7f3950ed23e3f9e366dbc8e38cb55de/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:980450826cf22e133c57e0835070bdd0dd3f73b9b708c3ce223def2cb9469e14", size = 551583, upload-time = "2026-05-28T12:01:21.013Z" }, + { url = "https://files.pythonhosted.org/packages/21/7c/68e8579b95375b70d2a963103c42e705856cdb98569258bd807f4423891c/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_i686.whl", hash = "sha256:205dde846f24332ab0c1188699a043b8d165b79bb84529ce272c45048ff6be01", size = 616941, upload-time = "2026-05-28T12:01:22.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/a1/a6135aed5730ff03ab957182259987ac11e55fb392a28dc6f0592048a280/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:3966b82dd563176396df030f3dd52a6e54cb69b718e95e78bd555ed3d1e0185d", size = 578349, upload-time = "2026-05-28T12:01:24.118Z" }, + { url = "https://files.pythonhosted.org/packages/09/6e/f24201a76a84e6c49d0bdfdfcb735210e21701e9b21c5bfc0ba497dd62f6/rpds_py-2026.5.1-cp315-cp315-win32.whl", hash = "sha256:7818f8d0a415be74d2be3590b0a1c1f463a642f4d0217e7d10602dceef5b79aa", size = 209922, upload-time = "2026-05-28T12:01:25.522Z" }, + { url = "https://files.pythonhosted.org/packages/9e/e4/966bc240bb0485fc265278f6de44d05834bf0b3618886e0b22e33d54c49a/rpds_py-2026.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:b3cc20c0d800af78fd0fac68086e28c1856cec51ea528bb81ea851aa40d39325", size = 226003, upload-time = "2026-05-28T12:01:27.062Z" }, + { url = "https://files.pythonhosted.org/packages/5c/5c/a15a59269cd5e74472734516c73795c15eccfc841b3d4b0228c3f53f19d0/rpds_py-2026.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:3609e9939a8a76cd904cf98a3f1f13b5dc7e150adeaee89e0ea09652ea213e16", size = 221245, upload-time = "2026-05-28T12:01:28.51Z" }, + { url = "https://files.pythonhosted.org/packages/e0/22/135ce03804e179a71ceb13be095deda4a279bc88f7a6b8fa161c5ad44e12/rpds_py-2026.5.1-cp315-cp315t-macosx_10_12_x86_64.whl", hash = "sha256:5d333a7127d4b307601ac37792bee01bb95c867cbfacf21b6375b804d6bbd723", size = 352015, upload-time = "2026-05-28T12:01:30.214Z" }, + { url = "https://files.pythonhosted.org/packages/3b/5f/f1f6d2652eb9d848f6eb369d8db83a2da6249bb49ad2c2a48f45d54538d3/rpds_py-2026.5.1-cp315-cp315t-macosx_11_0_arm64.whl", hash = "sha256:b5f077b44a4f7808520f66dae234988d867deb9aed9be5da057ce9ba831b2a41", size = 345016, upload-time = "2026-05-28T12:01:31.656Z" }, + { url = "https://files.pythonhosted.org/packages/88/66/b74182775691ea2290c99e52ac8d5db844e56fbec90ce421f107658c8314/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d8f9b7b78c9538fc9e04e82ec0e888ff0c3cffcfad152c77e57cd09351a98a", size = 374775, upload-time = "2026-05-28T12:01:33.136Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8f/15e5a61d9f0a43902d36561d4f07cae6ae9f4716be825159fd72717f33af/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e3a8ae58895ac107ed934a6bf51e5846f95c53b9b940c2c6d310838fd5846358", size = 380270, upload-time = "2026-05-28T12:01:34.574Z" }, + { url = "https://files.pythonhosted.org/packages/02/c3/f859b12763a80540cdf2af0f15b19904cf756a71d7bdd3f82ff3e5b1bbf9/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0957cf3c2b8632ec7aaebffebea8005b353cc2a237b6e2ae3c2cac0820704cfb", size = 495285, upload-time = "2026-05-28T12:01:36.127Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c7/ff27c2ac8411d30b03b1829fd88cae8dad1a4d0da48dd25e57c4038042e6/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c396c1304de421050b3681ea70f371874b54d41b0151e96109758144c231e30b", size = 389581, upload-time = "2026-05-28T12:01:37.635Z" }, + { url = "https://files.pythonhosted.org/packages/6e/67/fe92ee32a6cc05c77228a2f8b1762e7124f386ec20ff83d0757b762d58d0/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad1bff7f666b9598e573815affd666aac6a13a585dde336f843e33350c7fadc", size = 376041, upload-time = "2026-05-28T12:01:39.307Z" }, + { url = "https://files.pythonhosted.org/packages/f8/91/b4d6685c27aba55bd82f25b278be8237038117d05f9659a6213ad3408130/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_31_riscv64.whl", hash = "sha256:656a042550878f12d45752452d47094b7cfe5ad1e9d7b87b5a22ad3ae5ff8015", size = 383946, upload-time = "2026-05-28T12:01:41.043Z" }, + { url = "https://files.pythonhosted.org/packages/bd/79/2c1d832a53c8e0f8e98fc970ec257b950fecd4f62be2ab7182b500a0cbc8/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c4bd4f70294737b5206a3e8e30ccadbf8a60301831c8ea23eec5dbeea1ecfa", size = 405526, upload-time = "2026-05-28T12:01:43.032Z" }, + { url = "https://files.pythonhosted.org/packages/78/c4/c98117b03c6a8581ab2c2dfccfe9a5ad82bd8128a3c28b46a6ad2d97c393/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:43bca78665423cabae77146f2fe7ce55272b6c8d55d82cca83effd42c7e13972", size = 551165, upload-time = "2026-05-28T12:01:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c1/bc479ca069200af730881b1bd525e3114b2b391a351509fcb1b772f28086/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_i686.whl", hash = "sha256:42d0f20e85e549c870749d0e247f0c10d318a45b7e9676d575d2dcb04a1b2e66", size = 618778, upload-time = "2026-05-28T12:01:46.337Z" }, + { url = "https://files.pythonhosted.org/packages/77/65/38ab2f90df44c2febfb63cc10ced40763d9b4bc94d173e734528663fe7f5/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:b1be5c35683684d5331b93600c210e8367c254683d8a6df6bd21bd2da3a334fb", size = 581839, upload-time = "2026-05-28T12:01:48.109Z" }, + { url = "https://files.pythonhosted.org/packages/15/2d/ce1f605fe036aadd460e5822e578c6c7ec3a860936cca37d6e0f299daa77/rpds_py-2026.5.1-cp315-cp315t-win32.whl", hash = "sha256:75808f6c38ce7749bb68cc2770161aae5045e6c6f6781a9782e74b93304399df", size = 207866, upload-time = "2026-05-28T12:01:49.648Z" }, + { url = "https://files.pythonhosted.org/packages/79/cb/966040123eb102371559746908ef2c9471f4d43e17ec9a645a2258dab64b/rpds_py-2026.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:90bd6630002a1c7f09e7843dd79f0d24f3d2897cc25a753480917865d14f15b3", size = 225441, upload-time = "2026-05-28T12:01:51.408Z" }, + { url = "https://files.pythonhosted.org/packages/42/56/3fe0fb34820ff667be791b3a3c22b85e8bcba54e9c832f47438c191fa7be/rpds_py-2026.5.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:edf2765d84e42447f112ad877af8fe1db0089aaec5b28e88d6eab45e7fe99cea", size = 357151, upload-time = "2026-05-28T12:01:53.43Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f2/3eb9ccdb9f143b8c9b003978898cb497f942a324c077401e6b8834238e63/rpds_py-2026.5.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ad3773236e95f7f33991eb125224b7da66f206504d032a253a02da7e134519fb", size = 350195, upload-time = "2026-05-28T12:01:54.901Z" }, + { url = "https://files.pythonhosted.org/packages/a7/24/dbda232bc4f3ed732120692ab0d2c8402cb020516556d8bee622dcef2413/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a04df86b3f0fade39ec8fd0e0aab089b1da9fbd2b48df778a57ef96f5e7d38df", size = 381850, upload-time = "2026-05-28T12:01:56.601Z" }, + { url = "https://files.pythonhosted.org/packages/40/30/32e769839a358f78810c234f160f2cc21d1e4e47e1c0e0e0d535be5a0219/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6142dbd80c4df62a5d899f0d616d417f84e0bc8d32526c8e5589019d75d028a7", size = 387899, upload-time = "2026-05-28T12:01:58.212Z" }, + { url = "https://files.pythonhosted.org/packages/ab/86/ec84d243aadb3b34b71dd26a010d0930b2d284ff5fc9a69fec53810ee6fd/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b35217adefe87f2fe4db7e9766cabe84744bfe9616d9667be18988928c7f2dc", size = 501618, upload-time = "2026-05-28T12:01:59.888Z" }, + { url = "https://files.pythonhosted.org/packages/74/25/b60e52686bbff777a64f9e4f4d3dd57980dc846913777177a2c92e4937aa/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b95d5e11fc712b752081183a55a244c03cd00570489edd7014d8899f8ceb8162", size = 394003, upload-time = "2026-05-28T12:02:01.482Z" }, + { url = "https://files.pythonhosted.org/packages/9b/c7/b3a6a588cc2219510ef3f42e207483a93950bedd1e3a0fd4015c95cff9e5/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141c9498daf2ace9eda35d2b0e376f9ea8b058d84f2aef4f96fccfd449a2f251", size = 379778, upload-time = "2026-05-28T12:02:03.197Z" }, + { url = "https://files.pythonhosted.org/packages/31/00/c7dba3fc8a3da8cb3f6db1eb3386be4d79c2e97c6890d20eb9ac66ae8c43/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:6f249f8b860a200ad35193af961183ebe9132710484e6f6ce0cf89fd83c63a9a", size = 392359, upload-time = "2026-05-28T12:02:04.817Z" }, + { url = "https://files.pythonhosted.org/packages/93/dd/472ba494c70753f93745992c99855bee0636daf74e6984e5e003f150316f/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4abbf391a70be864920858bf360f4fb380577c9a0f732438a1996726e2c195b", size = 412820, upload-time = "2026-05-28T12:02:06.401Z" }, + { url = "https://files.pythonhosted.org/packages/1d/6f/93831a3bfe789542ed0c1d0d74b78b440f055d6dc3ea4640eba2d95e6e23/rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:c74005a7bb87752acf351c93897ec63ad77a07a0da7ecad9c050e32e7286ba34", size = 557243, upload-time = "2026-05-28T12:02:08.013Z" }, + { url = "https://files.pythonhosted.org/packages/1f/ff/0b3d604614ffc77522c6b288fdbce68957eb583da1002aa65ba38ac0ee40/rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:8213afbe8a3a906fb9acb2014423fe3359ee783d0bf90995f70623a3217bfa6c", size = 623541, upload-time = "2026-05-28T12:02:09.661Z" }, + { url = "https://files.pythonhosted.org/packages/ea/ea/e7b0251441da9adfeaebcf29601d10f2a1455fcf0772fae9e7e19032bd96/rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:8c43a8a973270fd173bf48cdf80bbe66312421cba68d40845034f174f2389049", size = 586326, upload-time = "2026-05-28T12:02:11.47Z" }, +] + [[package]] name = "ruff" version = "0.15.14" @@ -1887,15 +2034,15 @@ wheels = [ [[package]] name = "starlette" -version = "1.1.0" +version = "1.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/95/66/4d20cdf39a8d6a51e663b7038e3b828ff211d3891a43a713fe7e4643f3a8/starlette-1.1.0.tar.gz", hash = "sha256:e83c7fe0ddecd8719c5b840080325aec0260acec86e9832899e377b91d65e90f", size = 2660060, upload-time = "2026-05-23T16:55:41.376Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c5/bf/616a066c2760f6c2b1ae3437cc28149734d069fbb46511712beae118a68c/starlette-1.2.0.tar.gz", hash = "sha256:3c5a6b23fff42492914e93890bb80cbfea72dbf37de268eec06185d62a4ca553", size = 2668923, upload-time = "2026-05-28T11:42:50.568Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/79/920b8e0a8b20f793e8d64855095cb8febabf6175b8550b6f7a547d813891/starlette-1.1.0-py3-none-any.whl", hash = "sha256:7f0dfd38e428aad5cb6f9f667f0ca1d2d8ca3f3385dccac8305f79ec98458382", size = 72899, upload-time = "2026-05-23T16:55:39.201Z" }, + { url = "https://files.pythonhosted.org/packages/9f/85/492183764d5d01d4514be3730fdb8e228a80605783099551c51627578b5d/starlette-1.2.0-py3-none-any.whl", hash = "sha256:36e0c76ac59157e75dc4b3bdeafba97fb04eaf1878045f15dbef666a6f092ed7", size = 73213, upload-time = "2026-05-28T11:42:48.801Z" }, ] [[package]] @@ -2079,7 +2226,7 @@ wheels = [ [[package]] name = "virtualenv" -version = "21.3.3" +version = "21.4.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, @@ -2088,9 +2235,9 @@ dependencies = [ { name = "python-discovery" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/15/ba/1f6e8c957e4932be060dcdc482d339c12e0216351478add3645cdaa53c05/virtualenv-21.3.3.tar.gz", hash = "sha256:f5bda277e553b1c2b3c1a8debfc30496e1288cc93ce6b7b71b3280047e317328", size = 7613784, upload-time = "2026-05-13T18:01:30.19Z" } +sdist = { url = "https://files.pythonhosted.org/packages/95/f0/b47ecf438211a25a97f8f0e4b23c22bc2496ebfea18dd6ec16210f09cc36/virtualenv-21.4.1.tar.gz", hash = "sha256:2ca543c713b72840ceffd94e9bdedfbd09a661defa1f7f69e5429ad4059442e2", size = 7613344, upload-time = "2026-05-28T04:12:49.905Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/34/a9dbe051de88a63eb7408ea66630bac38e72f7f6077d4be58737106860d9/virtualenv-21.3.3-py3-none-any.whl", hash = "sha256:7d5987d8369e098e41406efb780a3d4ca79280097293899e351a6407ee153ab3", size = 7594554, upload-time = "2026-05-13T18:01:27.815Z" }, + { url = "https://files.pythonhosted.org/packages/ff/dc/ac4f3a987a87e1a18556896f257c4e15c95ed157b7975347ec6b313b75ce/virtualenv-21.4.1-py3-none-any.whl", hash = "sha256:caf4ff72d1b4039057f41d8e8466e859513d67c0400d9c6b62c02c9d1ebc3e12", size = 7594078, upload-time = "2026-05-28T04:12:47.686Z" }, ] [[package]] From da31aaa522421b831ef5a7504f5d5a47dbf65cbe Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 11:34:22 +0500 Subject: [PATCH 047/318] feat(mcp): add verification profile classifier for patch contract Derive verification depth from actual changed files instead of requiring full structural runs for every patch. Documentation-only and non-Python patches verify without after_run_id when diff evidence is provided; Python source and governance config still require full structural verification. New module _verification_profile.py: pure classifier with priority chain (state artifact > Python > governance > docs > fallback), deterministic CheckMatrix, expanded documentation patterns for real-world repos. Receipt includes verification_profile section with "not applicable" verdict for skipped structural checks. Claim Guard warns when review text references structural verification on a non-structural profile. CLAUDE.md and agent skills updated with profile table and rules. --- .../mcp/_session_patch_contract_mixin.py | 94 +++++++++++++++- tests/test_mcp_service.py | 106 ++++++++++++++++++ 2 files changed, 194 insertions(+), 6 deletions(-) diff --git a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py index 9fbf600d..d673feb4 100644 --- a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py +++ b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py @@ -141,10 +141,16 @@ def _patch_contract_verify( changed_files: Sequence[str] | None, ) -> dict[str, object]: # ── 1. Resolve before-run (required for intent binding) ───── - if before_run_id is None: + # When intent_id is provided but before_run_id is not, auto- + # resolve from the intent's stored run_id. This removes one + # mandatory parameter the agent must track across sessions. + resolved_before_run_id = before_run_id + if resolved_before_run_id is None and intent_id is not None: + resolved_before_run_id = self._before_run_id_from_intent(intent_id) + if resolved_before_run_id is None: return self._unverified_patch_contract(reason="no_before_run") try: - before = self._runs.get(before_run_id) + before = self._runs.get(resolved_before_run_id) except MCPRunNotFoundError: return self._unverified_patch_contract(reason="no_before_run") @@ -255,6 +261,64 @@ def _validated_strictness(self, strictness: str) -> StrictnessProfile: return "relaxed" return "ci" + def _before_run_id_from_intent(self, intent_id: str) -> str | None: + """Resolve before_run_id from an active intent's stored run_id.""" + with self._state_lock: + intent = self._active_intents.get(intent_id) + if intent is not None: + return intent.run_id + return None + + @staticmethod + def _next_step_hint(reason: str) -> str | None: + """Return an actionable hint for non-accepted verify outcomes.""" + hints: dict[str, str] = { + "no_before_run": ( + "Run analyze_repository, then pass the run_id as" + " before_run_id — or pass intent_id to auto-resolve." + ), + "no_after_run": ( + "Run analyze_repository after editing, then pass the" + " new run_id as after_run_id." + ), + "after_run_required_for_governance": ( + "Governance config changes require a post-edit analysis." + " Run analyze_repository and pass after_run_id." + ), + "incomparable_runs": ( + "Before and after runs are not comparable." + " Re-run analyze_repository with the same settings." + ), + "report_digest_mismatch": ( + "Intent was declared against a different report." + " Do not redeclare on the after-run — use the original" + " intent_id with the original before_run_id." + ), + "state_artifact_mutation": ( + "Baseline, cache, or generated state was touched." + " Remove those files from the patch and use a separate" + " workflow." + ), + "scope_violation": ( + "Patch touched files outside declared scope." + " Redeclare intent with expanded scope, or remove the" + " out-of-scope changes." + ), + } + return hints.get(reason) + + @staticmethod + def _claim_validation_recommended( + classification: ClassificationResult | None, + ) -> bool: + """Decide whether claim validation is meaningful for the profile.""" + if classification is None: + return True + return classification.profile in ( + VerificationProfile.PYTHON_STRUCTURAL, + VerificationProfile.GOVERNANCE_CONFIG, + ) + def _optional_intent( self, *, @@ -450,10 +514,11 @@ def _state_artifact_violated( and scope_check.get("status") == IntentStatus.VIOLATED.value ): violations.append("scope_violation") + reason = "state_artifact_mutation" payload: dict[str, object] = { "mode": "verify", "status": PatchContractStatus.VIOLATED.value, - "reason": "state_artifact_mutation", + "reason": reason, "before": self._run_ref_payload(before), "after": None, "intent_id": intent.intent_id if intent is not None else None, @@ -461,6 +526,8 @@ def _state_artifact_violated( "contract_violations": violations, "blocking_violations": violations, **profile_payload, + "next_step": self._next_step_hint(reason), + "claim_validation_recommended": False, "message": ( "Patch touched CodeClone generated state. " "This requires a separate explicit workflow." @@ -499,11 +566,12 @@ def _profile_fast_path( # Scope violation is always blocking, regardless of profile. if scope_violated and strictness != "relaxed": - violations = ["scope_violation"] + reason = "scope_violation" + violations = [reason] payload: dict[str, object] = { "mode": "verify", "status": PatchContractStatus.VIOLATED.value, - "reason": "scope_violation", + "reason": reason, "before": self._run_ref_payload(before), "after": None, "intent_id": (intent.intent_id if intent is not None else None), @@ -511,6 +579,8 @@ def _profile_fast_path( "contract_violations": violations, "blocking_violations": violations, **profile_payload, + "next_step": self._next_step_hint(reason), + "claim_validation_recommended": False, "message": self._verify_message( status=PatchContractStatus.VIOLATED.value, violations=tuple(violations), @@ -566,6 +636,9 @@ def _profile_fast_path( "blocking_violations": [], **profile_payload, "limitations": limitations, + "claim_validation_recommended": self._claim_validation_recommended( + classification + ), "message": profile_accepted_message(profile), } self._audit_emit( @@ -659,6 +732,7 @@ def _full_structural_verify( else: status = PatchContractStatus.ACCEPTED.value profile_payload = classification.to_payload() + violated = status == PatchContractStatus.VIOLATED.value payload: dict[str, object] = { "mode": "verify", "status": status, @@ -682,6 +756,7 @@ def _full_structural_verify( "contract_violations": list(violations), "blocking_violations": list(blocking_violations), **profile_payload, + "claim_validation_recommended": not violated, "message": self._verify_message(status=status, violations=violations), } event_type = ( @@ -1027,6 +1102,10 @@ def _unverified_patch_contract( "scope_check": scope_check, "contract_violations": [], **profile_fields, + "next_step": self._next_step_hint(reason), + "claim_validation_recommended": self._claim_validation_recommended( + classification + ), "message": message, } @@ -1037,14 +1116,17 @@ def _expired_patch_contract( after: MCPRunRecord, intent: IntentRecord, ) -> dict[str, object]: + reason = "report_digest_mismatch" payload: dict[str, object] = { "mode": "verify", "status": PatchContractStatus.EXPIRED.value, - "reason": "report_digest_mismatch", + "reason": reason, "before": self._run_ref_payload(before), "after": self._run_ref_payload(after), "intent_id": intent.intent_id, "contract_violations": ["intent_expired"], + "next_step": self._next_step_hint(reason), + "claim_validation_recommended": False, "message": ( "Patch contract expired: intent was declared for another report digest." ), diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 34175d0b..78537eed 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -4241,6 +4241,112 @@ def always_expired(**kwargs: object) -> bool: assert expired["status"] == "expired" +def _seed_docs_intent( + root: Path, + *, + run_id: str = "docseed112345678", + digest: str = "docs-seed-digest", + intent_text: str = "docs intent", +) -> tuple[CodeCloneMCPService, str]: + """Create a service with one registered run and a README-scoped intent.""" + service = CodeCloneMCPService(history_limit=4) + before = _patch_contract_run_record( + root, + run_id=run_id, + digest=digest, + include_regression=False, + complexity=6, + health=90, + ) + service._runs.register(before) + declared = service.manage_change_intent( + action="declare", + run_id=run_id[:8], + scope={"allowed_files": ["README.md"]}, + intent=intent_text, + ) + return service, str(declared["intent_id"]) + + +def test_mcp_verify_auto_resolves_before_run_id_from_intent( + tmp_path: Path, +) -> None: + """When before_run_id is omitted but intent_id is provided, verify + auto-resolves before_run_id from the intent's stored run_id.""" + service, intent_id = _seed_docs_intent( + tmp_path, run_id="autoresol12345678", digest="auto-resolve-digest" + ) + + # Verify WITHOUT before_run_id — should auto-resolve from intent. + result = service.check_patch_contract( + mode="verify", + intent_id=intent_id, + changed_files=["README.md"], + ) + assert result["status"] == "accepted" + assert result["intent_id"] == intent_id + before_ref = cast("dict[str, object]", result["before"]) + assert before_ref["run_id"] == "autoreso" + + # Without intent_id AND without before_run_id → still unverified. + no_ids = service.check_patch_contract(mode="verify") + assert no_ids["status"] == "unverified" + assert no_ids["reason"] == "no_before_run" + + +def test_mcp_verify_returns_next_step_hints(tmp_path: Path) -> None: + """Non-accepted verify responses include actionable next_step hints.""" + service = CodeCloneMCPService(history_limit=4) + + # no_before_run: no before_run_id and no intent_id + no_before = service.check_patch_contract(mode="verify") + assert no_before["status"] == "unverified" + assert no_before["reason"] == "no_before_run" + assert isinstance(no_before["next_step"], str) + assert "before_run_id" in no_before["next_step"] + + # no_after_run: before_run provided, no after and no evidence + before = _patch_contract_run_record( + tmp_path, + run_id="hinttest12345678", + digest="hint-digest", + include_regression=False, + complexity=6, + health=90, + ) + service._runs.register(before) + no_after = service.check_patch_contract( + mode="verify", + before_run_id="hinttest", + ) + assert no_after["status"] == "unverified" + assert isinstance(no_after["next_step"], str) + assert "after_run_id" in no_after["next_step"] + + +def test_mcp_verify_returns_claim_validation_recommended( + tmp_path: Path, +) -> None: + """Verify responses include claim_validation_recommended flag.""" + service, intent_id = _seed_docs_intent( + tmp_path, run_id="claimval12345678", digest="claim-val-digest" + ) + + # Docs-only fast path: claim validation not recommended + docs_result = service.check_patch_contract( + mode="verify", + before_run_id="claimval", + intent_id=intent_id, + changed_files=["README.md"], + ) + assert docs_result["status"] == "accepted" + assert docs_result["claim_validation_recommended"] is False + + # Unverified without classification: defaults to recommended (unknown profile) + no_before = service.check_patch_contract(mode="verify") + assert no_before["claim_validation_recommended"] is True + + def test_claim_guard_detects_deterministic_overclaims() -> None: payload = mcp_claim_guard_mod.validate_claims( text=( From 56dda8f4023f2e0108b36abc244bcea4f31c3b38 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 12:16:18 +0500 Subject: [PATCH 048/318] feat(mcp): add intent queue MVP for multi-agent scope coordination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Queued intents preserve blocked agent work when scope overlaps a live foreign intent. on_conflict="queue" creates a queued intent instead of active; promote action transitions queued → active after re-checking conflicts. Queued records are visible in workspace but do not block active declares or pass patch verification. Budget queries work with edit_allowed=false advisory. --- codeclone/audit/__init__.py | 6 + codeclone/audit/events.py | 16 + codeclone/surfaces/mcp/_intent.py | 1 + .../surfaces/mcp/_session_intent_mixin.py | 288 +++++++++++++++++- .../mcp/_session_patch_contract_mixin.py | 29 +- codeclone/surfaces/mcp/_workspace_intents.py | 3 + codeclone/surfaces/mcp/server.py | 2 + .../contract_snapshots/mcp_tool_schemas.json | 12 + tests/test_mcp_service.py | 205 +++++++++++++ 9 files changed, 552 insertions(+), 10 deletions(-) diff --git a/codeclone/audit/__init__.py b/codeclone/audit/__init__.py index edb30974..05e0d0bf 100644 --- a/codeclone/audit/__init__.py +++ b/codeclone/audit/__init__.py @@ -16,6 +16,9 @@ EVENT_INTENT_DECLARED, EVENT_INTENT_EXPANDED, EVENT_INTENT_EXPIRED, + EVENT_INTENT_PROMOTED, + EVENT_INTENT_QUEUE_BLOCKED, + EVENT_INTENT_QUEUED, EVENT_INTENT_RENEWED, EVENT_INTENT_VIOLATED, EVENT_PATCH_BUDGET, @@ -55,6 +58,9 @@ "EVENT_INTENT_DECLARED", "EVENT_INTENT_EXPANDED", "EVENT_INTENT_EXPIRED", + "EVENT_INTENT_PROMOTED", + "EVENT_INTENT_QUEUED", + "EVENT_INTENT_QUEUE_BLOCKED", "EVENT_INTENT_RENEWED", "EVENT_INTENT_VIOLATED", "EVENT_PATCH_BUDGET", diff --git a/codeclone/audit/events.py b/codeclone/audit/events.py index edebb582..46ee1729 100644 --- a/codeclone/audit/events.py +++ b/codeclone/audit/events.py @@ -18,6 +18,9 @@ AuditPayloadMode = Literal["off", "compact", "full"] EVENT_INTENT_DECLARED = "intent.declared" +EVENT_INTENT_QUEUED = "intent.queued" +EVENT_INTENT_PROMOTED = "intent.promoted" +EVENT_INTENT_QUEUE_BLOCKED = "intent.queue_blocked" EVENT_INTENT_CHECKED = "intent.checked" EVENT_INTENT_EXPANDED = "intent.expanded" EVENT_INTENT_VIOLATED = "intent.violated" @@ -39,6 +42,9 @@ KNOWN_EVENT_TYPES = frozenset( { EVENT_INTENT_DECLARED, + EVENT_INTENT_QUEUED, + EVENT_INTENT_PROMOTED, + EVENT_INTENT_QUEUE_BLOCKED, EVENT_INTENT_CHECKED, EVENT_INTENT_EXPANDED, EVENT_INTENT_VIOLATED, @@ -94,10 +100,17 @@ def compact_payload_for_event( return {} if event_type in { EVENT_INTENT_DECLARED, + EVENT_INTENT_QUEUED, + EVENT_INTENT_PROMOTED, EVENT_INTENT_RENEWED, EVENT_INTENT_EXPIRED, }: return _compact_intent_payload(payload) + if event_type == EVENT_INTENT_QUEUE_BLOCKED: + return { + "intent_id": str(payload.get("intent_id", "")), + "blocking_count": _int_value(payload.get("blocking_count")), + } if event_type in { EVENT_INTENT_CHECKED, EVENT_INTENT_EXPANDED, @@ -248,6 +261,9 @@ def _int_or_none(value: object) -> int | None: "EVENT_INTENT_DECLARED", "EVENT_INTENT_EXPANDED", "EVENT_INTENT_EXPIRED", + "EVENT_INTENT_PROMOTED", + "EVENT_INTENT_QUEUED", + "EVENT_INTENT_QUEUE_BLOCKED", "EVENT_INTENT_RENEWED", "EVENT_INTENT_VIOLATED", "EVENT_PATCH_BUDGET", diff --git a/codeclone/surfaces/mcp/_intent.py b/codeclone/surfaces/mcp/_intent.py index 7b2f14c1..6eab3be2 100644 --- a/codeclone/surfaces/mcp/_intent.py +++ b/codeclone/surfaces/mcp/_intent.py @@ -31,6 +31,7 @@ class IntentStatus(str, Enum): ACTIVE = "active" + QUEUED = "queued" CLEAN = "clean" EXPANDED = "expanded" VIOLATED = "violated" diff --git a/codeclone/surfaces/mcp/_session_intent_mixin.py b/codeclone/surfaces/mcp/_session_intent_mixin.py index c7670e0f..09ee8848 100644 --- a/codeclone/surfaces/mcp/_session_intent_mixin.py +++ b/codeclone/surfaces/mcp/_session_intent_mixin.py @@ -20,6 +20,9 @@ EVENT_INTENT_DECLARED, EVENT_INTENT_EXPANDED, EVENT_INTENT_EXPIRED, + EVENT_INTENT_PROMOTED, + EVENT_INTENT_QUEUE_BLOCKED, + EVENT_INTENT_QUEUED, EVENT_INTENT_RENEWED, EVENT_INTENT_VIOLATED, EVENT_WORKSPACE_CONFLICT, @@ -148,6 +151,7 @@ def manage_change_intent( root: str | None = None, ttl_seconds: int | None = None, lease_seconds: int | None = None, + on_conflict: str | None = None, ) -> dict[str, object]: match action: case "declare": @@ -157,7 +161,10 @@ def manage_change_intent( intent=intent, expected_effects=expected_effects, ttl_seconds=ttl_seconds, + on_conflict=on_conflict, ) + case "promote": + return self._promote_queued_intent(intent_id=intent_id) case "get": record, active_intent = self._resolve_intent( run_id=run_id, @@ -201,8 +208,8 @@ def manage_change_intent( raise MCPServiceContractError( "Invalid value for action: " f"{action!r}. Expected one of: check, clear, declare, " - "gc_workspace, get, list_workspace, recover, renew, " - "reset_workspace." + "gc_workspace, get, list_workspace, promote, recover, " + "renew, reset_workspace." ) def _declare_change_intent( @@ -213,6 +220,7 @@ def _declare_change_intent( intent: str | None, expected_effects: Sequence[str] | None, ttl_seconds: int | None, + on_conflict: str | None = None, ) -> dict[str, object]: record = self._runs.get(run_id) try: @@ -293,16 +301,32 @@ def _declare_change_intent( own_pid=self._agent_pid, own_start_epoch=self._agent_start_epoch, ) + # ── Queue branch: downgrade to queued if conflicts block ─── + if on_conflict == "queue" and concurrent_intents: + return self._downgrade_to_queued( + record=record, + intent=record_payload, + workspace_record=workspace_record, + workspace_registered=workspace_registered, + concurrent_intents=concurrent_intents, + workspace_existing=workspace_existing, + blast_payload=blast_payload, + ttl=ttl, + ) + # ── Queued context: advisory info about waiting agents ───── + queued_context = self._queued_context_from_workspace( + scope=normalized_scope, + workspace_existing=workspace_existing, + ) payload = record_payload.to_payload( short_run_id=_helpers._short_run_id(record.run_id) ) - payload["do_not_touch"] = blast_payload["do_not_touch"] - payload["do_not_touch_summary"] = blast_payload["do_not_touch_summary"] - payload["review_context"] = blast_payload["review_context"] - payload["review_context_summary"] = blast_payload["review_context_summary"] + _apply_blast_context(payload, blast_payload) payload["workspace_registered"] = workspace_registered payload["concurrent_intents"] = concurrent_intents payload["workspace_relations"] = workspace_relations + if queued_context: + payload["queued_context"] = queued_context payload["ttl_seconds"] = ttl self._audit_emit( root=record.root, @@ -327,6 +351,240 @@ def _declare_change_intent( ) return payload + def _downgrade_to_queued( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + workspace_record: WorkspaceIntentRecord, + workspace_registered: bool, + concurrent_intents: list[dict[str, object]], + workspace_existing: tuple[WorkspaceIntentRecord, ...], + blast_payload: dict[str, object], + ttl: int, + ) -> dict[str, object]: + """Downgrade an already-registered active intent to queued.""" + queued_intent = replace(intent, status=IntentStatus.QUEUED) + with self._state_lock: + self._active_intents[intent.intent_id] = queued_intent + self._runs.unpin(record.run_id) + update_workspace_intent_status( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent.intent_id, + new_status=IntentStatus.QUEUED.value, + ) + blocked_by = [ + { + "intent_id": conflict.get("intent_id"), + "agent_pid": conflict.get("agent_pid"), + "agent_label": conflict.get("agent_label"), + "ownership": conflict.get("ownership"), + "overlapping_files": sorted( + { + *_as_str_sequence(conflict.get("hard_overlap")), + *_as_str_sequence(conflict.get("soft_overlap")), + } + ), + } + for conflict in concurrent_intents + ] + queue_position = self._compute_queue_position( + intent_id=intent.intent_id, + workspace_records=workspace_existing, + ) + payload = queued_intent.to_payload( + short_run_id=_helpers._short_run_id(record.run_id) + ) + _apply_blast_context(payload, blast_payload) + payload["workspace_registered"] = workspace_registered + payload["before_run_pinned"] = False + payload["blocked_by"] = blocked_by + payload["queue_position"] = queue_position + payload["ttl_seconds"] = ttl + payload["message"] = ( + "Intent queued behind active workspace intent. " + "Do not edit until promoted. Queued intents do not pin " + "the before-run; long waits may require re-analysis " + "before promotion." + ) + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_QUEUED, + severity="info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent.intent_id, + report_digest=intent.report_digest, + status="queued", + payload=payload, + ) + return payload + + def _promote_queued_intent( + self, + *, + intent_id: str | None, + ) -> dict[str, object]: + """Promote a queued intent to active after re-checking conflicts.""" + if intent_id is None: + raise MCPServiceContractError("action='promote' requires intent_id.") + with self._state_lock: + queued_intent = self._active_intents.get(intent_id) + if queued_intent is None: + raise MCPServiceContractError(f"Unknown change intent id: {intent_id}") + if queued_intent.status != IntentStatus.QUEUED: + raise MCPServiceContractError( + f"Intent {intent_id} has status " + f"{queued_intent.status.value!r}, not 'queued'. " + "Only queued intents can be promoted." + ) + # Resolve the before-run — may have been evicted (not pinned). + try: + record = self._runs.get(queued_intent.run_id) + except MCPRunNotFoundError: + return { + "intent_id": intent_id, + "status": "unverified", + "reason": "before_run_evicted", + "next_step": ( + "Run analyze_repository to create a fresh" + " before-run, then redeclare the intent." + ), + "message": ( + "Before-run was evicted from bounded history. " + "Re-analyze and redeclare the intent." + ), + } + # Re-check workspace conflicts. + workspace_existing = list_workspace_intents(root=record.root) + conflicts = detect_conflicts( + new_scope=queued_intent.scope.to_payload(), + existing=workspace_existing, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + ) + if conflicts: + blocked_by = [ + { + "intent_id": conflict.get("intent_id"), + "ownership": conflict.get("ownership"), + "overlapping_files": sorted( + { + *_as_str_sequence(conflict.get("hard_overlap")), + *_as_str_sequence(conflict.get("soft_overlap")), + } + ), + } + for conflict in conflicts + ] + payload: dict[str, object] = { + "intent_id": intent_id, + "status": "queued", + "blocked_by": blocked_by, + "blocking_count": len(blocked_by), + "message": ("Intent is still blocked by active workspace intents."), + } + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_QUEUE_BLOCKED, + severity="warn", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent_id, + report_digest=queued_intent.report_digest, + status="queued", + payload=payload, + ) + return payload + # Promote: active status, pin run, renew lease. + promoted = replace(queued_intent, status=IntentStatus.ACTIVE) + with self._state_lock: + self._active_intents[intent_id] = promoted + self._runs.pin(record.run_id) + update_workspace_intent_status( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent_id, + new_status=IntentStatus.ACTIVE.value, + ) + renew_workspace_intent_lease( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent_id, + ) + promoted_payload: dict[str, object] = { + "intent_id": intent_id, + "previous_status": "queued", + "status": "active", + "run_id": _helpers._short_run_id(record.run_id), + "message": ( + "Queued intent promoted. Re-check blast radius " + "and patch budget before editing." + ), + } + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_PROMOTED, + severity="info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent_id, + report_digest=promoted.report_digest, + status="active", + payload=promoted_payload, + ) + return promoted_payload + + def _queued_context_from_workspace( + self, + *, + scope: IntentScope, + workspace_existing: tuple[WorkspaceIntentRecord, ...], + ) -> list[dict[str, object]]: + """Return advisory info about queued intents with overlapping scope.""" + new_allowed = set(scope.allowed_files) + if not new_allowed: + return [] + context: list[dict[str, object]] = [] + for record in workspace_existing: + if record.status != IntentStatus.QUEUED.value: + continue + if record.agent_pid == self._agent_pid and ( + record.agent_start_epoch == self._agent_start_epoch + ): + continue + raw_existing = record.scope.get("allowed_files") + existing_allowed = ( + set(raw_existing) if isinstance(raw_existing, list) else set() + ) + overlap = sorted(new_allowed & existing_allowed) + if overlap: + context.append( + { + "intent_id": record.intent_id, + "overlapping_files": overlap, + "message": ("Another agent is waiting for this scope."), + } + ) + return context + + @staticmethod + def _compute_queue_position( + *, + intent_id: str, + workspace_records: tuple[WorkspaceIntentRecord, ...], + ) -> int: + """Compute advisory queue position among all queued records.""" + queued = sorted( + (r for r in workspace_records if r.status == IntentStatus.QUEUED.value), + key=lambda r: (r.declared_at_utc, r.intent_id), + ) + for i, record in enumerate(queued, start=1): + if record.intent_id == intent_id: + return i + return 1 + def _check_change_intent( self, *, @@ -1219,6 +1477,20 @@ def _intent_check_result( ) +def _apply_blast_context( + payload: dict[str, object], + blast_payload: Mapping[str, object], +) -> None: + """Copy blast radius context fields into an intent payload.""" + for key in ( + "do_not_touch", + "do_not_touch_summary", + "review_context", + "review_context_summary", + ): + payload[key] = blast_payload[key] + + def _as_mapping(value: object) -> Mapping[str, object]: return value if isinstance(value, Mapping) else {} @@ -1229,6 +1501,10 @@ def _as_sequence(value: object) -> Sequence[object]: return () +def _as_str_sequence(value: object) -> tuple[str, ...]: + return tuple(str(item) for item in _as_sequence(value)) + + def _utc_now() -> str: return ( datetime.now(timezone.utc) diff --git a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py index d673feb4..c776aca5 100644 --- a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py +++ b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py @@ -97,6 +97,15 @@ def _patch_contract_budget( budgets = self._budgets_for_record(record=record, strictness=strictness) current_state = self._current_state(record) gate_preview = self._gate_preview(record=record, budgets=budgets) + is_queued = intent is not None and intent.status == IntentStatus.QUEUED + budget_message = ( + "Budget computed for queued intent. Do not edit until promoted." + if is_queued + else self._budget_message( + strictness=strictness, + gate_preview=gate_preview, + ) + ) payload: dict[str, object] = { "mode": "budget", "run_id": _helpers._short_run_id(record.run_id), @@ -113,11 +122,11 @@ def _patch_contract_budget( "current_state": current_state, "headroom": self._headroom(budgets=budgets, current_state=current_state), "gate_preview": gate_preview, - "message": self._budget_message( - strictness=strictness, - gate_preview=gate_preview, - ), + "message": budget_message, } + if is_queued: + payload["intent_status"] = "queued" + payload["edit_allowed"] = False self._audit_emit( root=record.root, event_type=EVENT_PATCH_BUDGET, @@ -159,6 +168,13 @@ def _patch_contract_verify( if intent is not None: self._renew_lease_if_active(record=before, intent=intent) + # ── 2b. Queued intents cannot be verified ────────────────── + if intent is not None and intent.status == IntentStatus.QUEUED: + return self._unverified_patch_contract( + reason="intent_not_active", + before=before, + ) + # ── 3. Compute actual changed files ───────────────────────── actual_changed_files = self._patch_changed_files_flexible( before=before, @@ -289,6 +305,11 @@ def _next_step_hint(reason: str) -> str | None: "Before and after runs are not comparable." " Re-run analyze_repository with the same settings." ), + "intent_not_active": ( + "Queued intent must be promoted before editing or" + " verification. Call" + " manage_change_intent(action='promote')." + ), "report_digest_mismatch": ( "Intent was declared against a different report." " Do not redeclare on the after-run — use the original" diff --git a/codeclone/surfaces/mcp/_workspace_intents.py b/codeclone/surfaces/mcp/_workspace_intents.py index 2d2cf574..a20fd8f0 100644 --- a/codeclone/surfaces/mcp/_workspace_intents.py +++ b/codeclone/surfaces/mcp/_workspace_intents.py @@ -36,6 +36,7 @@ class WorkspaceIntentStatus(str, Enum): ACTIVE = "active" + QUEUED = "queued" CLEAN = "clean" EXPANDED = "expanded" VIOLATED = "violated" @@ -601,6 +602,8 @@ def _detect_scope_state( relations: list[dict[str, object]] = [] now = utc_now() for record in existing: + if record.status == WorkspaceIntentStatus.QUEUED.value: + continue ownership = classify_intent_ownership( record, own_pid=own_pid, diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 509158b7..841fb588 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -941,6 +941,7 @@ def manage_change_intent( root: str | None = None, ttl_seconds: int | None = None, lease_seconds: int | None = None, + on_conflict: str | None = None, ) -> dict[str, object]: return service.manage_change_intent( action=action, @@ -954,6 +955,7 @@ def manage_change_intent( root=root, ttl_seconds=ttl_seconds, lease_seconds=lease_seconds, + on_conflict=on_conflict, ) @tool( diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index 5891a9fc..eb45ecb3 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -1827,6 +1827,18 @@ ], "default": null, "title": "Lease Seconds" + }, + "on_conflict": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "On Conflict" } }, "required": [ diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 78537eed..56cb79d1 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -4347,6 +4347,211 @@ def test_mcp_verify_returns_claim_validation_recommended( assert no_before["claim_validation_recommended"] is True +def _two_agent_service( + root: Path, + monkeypatch: pytest.MonkeyPatch, +) -> tuple[CodeCloneMCPService, str]: + """Service with one run and a *foreign* active intent on pkg/a.py. + + Writes a workspace intent file with a different PID to simulate + agent A owning scope that agent B (this service) wants. + """ + from codeclone.surfaces.mcp import _workspace_intents as _ws_mod + from codeclone.surfaces.mcp._workspace_intents import ( + WorkspaceIntentRecord, + compute_scope_digest, + format_utc, + utc_now, + write_workspace_intent, + ) + from codeclone.surfaces.mcp._workspace_intents import ( + expires_at as _expires_at, + ) + + monkeypatch.setattr(_ws_mod, "_is_pid_alive", lambda pid: True) + service = CodeCloneMCPService(history_limit=4) + before = _patch_contract_run_record( + root, + run_id="queuetest12345678", + digest="queue-test-digest", + include_regression=False, + complexity=6, + health=90, + ) + service._runs.register(before) + # Write a foreign workspace intent file (different PID). + now = utc_now() + foreign_scope: dict[str, object] = { + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + } + foreign_record = WorkspaceIntentRecord( + intent_id="intent-foreign-001", + agent_pid=99999, + agent_start_epoch=1000000, + agent_label="agent-a", + run_id="queuetest12345678", + declared_at_utc=format_utc(now), + expires_at_utc=_expires_at(declared_at=now, ttl_seconds=3600), + ttl_seconds=3600, + status="active", + intent="agent A edits pkg/a", + scope=foreign_scope, + scope_digest=compute_scope_digest(foreign_scope), + blast_radius_summary={}, + lease_renewed_at_utc=format_utc(now), + lease_seconds=300, + report_digest="queue-test-digest", + ) + write_workspace_intent(root=root, record=foreign_record) + return service, "intent-foreign-001" + + +def test_mcp_declare_queued_on_conflict( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """on_conflict='queue' with overlap creates a queued intent.""" + service, _agent_a_id = _two_agent_service(tmp_path, monkeypatch) + + # Same-session overlap with on_conflict="queue" → queued. + queued = service.manage_change_intent( + action="declare", + run_id="queuetest", + scope={"allowed_files": ["pkg/a.py"]}, + intent="agent B wants pkg/a too", + on_conflict="queue", + ) + assert queued["status"] == "queued" + assert queued["before_run_pinned"] is False + assert isinstance(queued["blocked_by"], list) + assert len(queued["blocked_by"]) >= 1 + assert cast("int", queued["queue_position"]) >= 1 + + +def test_mcp_declare_queue_no_conflict_creates_active( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """on_conflict='queue' without overlap creates active, not queued.""" + service, _agent_a_id = _two_agent_service(tmp_path, monkeypatch) + + # No overlap → active even with on_conflict="queue". + result = service.manage_change_intent( + action="declare", + run_id="queuetest", + scope={"allowed_files": ["pkg/b.py"]}, + intent="agent B edits non-overlapping file", + on_conflict="queue", + ) + assert result["status"] == "active" + + +def _declare_queued_pkg_a( + service: CodeCloneMCPService, + intent_text: str = "queued pkg/a intent", +) -> str: + """Declare a queued intent on pkg/a.py and return its ID.""" + queued = service.manage_change_intent( + action="declare", + run_id="queuetest", + scope={"allowed_files": ["pkg/a.py"]}, + intent=intent_text, + on_conflict="queue", + ) + assert queued["status"] == "queued" + return str(queued["intent_id"]) + + +def test_mcp_promote_queued_intent( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Promote transitions queued → active after blocker is cleared.""" + service, agent_a_id = _two_agent_service(tmp_path, monkeypatch) + queued_id = _declare_queued_pkg_a(service, "agent B queued") + + # Promote while blocker active → still queued. + still_blocked = service.manage_change_intent( + action="promote", + intent_id=queued_id, + ) + assert still_blocked["status"] == "queued" + assert cast("int", still_blocked["blocking_count"]) >= 1 + + # Clear blocker by removing foreign workspace file. + from codeclone.surfaces.mcp._workspace_intents import ( + remove_workspace_intent as _remove_ws, + ) + + _remove_ws( + root=tmp_path, + pid=99999, + start_epoch=1000000, + intent_id=agent_a_id, + ) + + # Now promote succeeds. + promoted = service.manage_change_intent( + action="promote", + intent_id=queued_id, + ) + assert promoted["status"] == "active" + assert promoted["previous_status"] == "queued" + + +def test_mcp_verify_rejects_queued_intent( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """check_patch_contract(mode='verify') rejects queued intents.""" + service, _agent_a_id = _two_agent_service(tmp_path, monkeypatch) + queued_id = _declare_queued_pkg_a(service) + + result = service.check_patch_contract( + mode="verify", + before_run_id="queuetest", + intent_id=queued_id, + ) + assert result["status"] == "unverified" + assert result["reason"] == "intent_not_active" + assert "promote" in str(result.get("next_step", "")) + + +def test_mcp_budget_queued_intent_edit_not_allowed( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Budget for queued intent includes edit_allowed=False.""" + service, _agent_a_id = _two_agent_service(tmp_path, monkeypatch) + queued_id = _declare_queued_pkg_a(service, "queued budget test") + + budget = service.check_patch_contract( + mode="budget", + run_id="queuetest", + intent_id=queued_id, + ) + assert budget["intent_status"] == "queued" + assert budget["edit_allowed"] is False + + +def test_mcp_clear_queued_intent( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Clear removes queued intents the same as active ones.""" + service, _agent_a_id = _two_agent_service(tmp_path, monkeypatch) + queued_id = _declare_queued_pkg_a(service, "will be cleared") + + cleared = service.manage_change_intent( + action="clear", + intent_id=queued_id, + ) + assert cleared["cleared"] == 1 + assert queued_id in cast("list[str]", cleared["cleared_intent_ids"]) + + def test_claim_guard_detects_deterministic_overclaims() -> None: payload = mcp_claim_guard_mod.validate_claims( text=( From 982ddd9b16c888e7fcf45e0678a3f7527dd96635 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 13:06:29 +0500 Subject: [PATCH 049/318] feat(mcp): add intent queue MVP for multi-agent scope coordination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Queued intents preserve blocked agent work when scope overlaps a live foreign intent. on_conflict="queue" creates a queued intent instead of active; promote action transitions queued → active after re-checking conflicts. Queued records are visible in workspace but do not block active declares or pass patch verification. Budget queries work with edit_allowed=false advisory. --- CHANGELOG.md | 20 +++++ docs/book/20-mcp-interface.md | 20 +++-- docs/book/24-structural-change-controller.md | 95 ++++++++++++++++++++ docs/book/appendix/a-status-enums.md | 37 ++++++++ docs/mcp.md | 42 +++++++-- 5 files changed, 198 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9db526ca..6598fd18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,22 @@ ### Added +- Add intent queue for multi-agent scope coordination. When scope overlaps a + foreign active intent, `manage_change_intent(action="declare", + on_conflict="queue")` creates a queued intent instead of reporting a conflict. + Queued intents are visible in workspace listings but do not own scope, do not + pin the before-run, and cannot pass patch verification. A separate + `manage_change_intent(action="promote")` transitions queued → active after + re-checking workspace conflicts, pinning the run, and renewing the lease. If + conflicts persist, promote returns `blocking_count` without state change. +- Add verify ergonomics: `check_patch_contract(mode="verify")` auto-resolves + `before_run_id` from the intent record when `intent_id` is provided but + `before_run_id` is omitted. Non-accepted verify responses include `next_step` + with an actionable hint for each failure reason and + `claim_validation_recommended` to advise whether `validate_review_claims` is + meaningful for the verification profile. +- Add `intent.queued`, `intent.promoted`, and `intent.queue_blocked` audit trail + events with compact payload handlers for MCP payload token budget tracking. - Add MCP `get_blast_radius` as a deterministic pre-change projection over the canonical report: direct dependents, clone cohorts, dependency-cycle membership, coverage/risk signals, actionable do-not-touch paths, and @@ -46,6 +62,10 @@ ### Internal +- Keep queued intents unpinned: active intents call `_runs.pin()` to prevent + eviction from bounded history, queued intents do not — pinning happens at + promotion. Conflict detection in `_detect_scope_state` skips records with + `status == "queued"` so queued records do not block active declares. - Keep intent and blast-radius cache state in MCP process memory only; they do not mutate source files, baselines, cache artifacts, reports, or canonical report integrity. Workspace intent files are ephemeral coordination state, diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index c4322bc5..b12d79b0 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -121,9 +121,9 @@ drill into one finding or one hotspot family. | Tool | Key parameters | Purpose | |--------------------------|-------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------| -| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, list_workspace, gc_workspace, recover, reset_workspace. Declare returns `workspace_relations` with forbidden-scope signals | +| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `on_conflict`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, promote, list_workspace, gc_workspace, recover, reset_workspace. `on_conflict="queue"` creates a queued intent when scope overlaps a foreign active. `action="promote"` transitions queued → active. Declare returns `workspace_relations` with forbidden-scope signals | | `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: dependents, clone cohorts, do-not-touch, review context | -| `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Budget query or post-edit verification | +| `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Budget query or post-edit verification. Verify auto-resolves `before_run_id` from intent when omitted. Non-accepted responses include `next_step` hint and `claim_validation_recommended` flag | | `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Deterministic audit artifact: provenance, scope, reviewed findings, patch status | | `validate_review_claims` | `text`, `run_id`, `require_citations` | Citation-based overclaim detection against stored run semantics | @@ -136,16 +136,20 @@ drill into one finding or one hotspot family. ??? info "Patch contract modes" **Budget** reads one stored run and optional intent. Shows regression - headroom per quality dimension before editing. **Verify** compares - explicit before/after stored runs, previews gates, validates scope, and - reports baseline-abuse signals. Missing runs return - `status="unverified"`. + headroom per quality dimension before editing. Queued intents return + `edit_allowed=false`. **Verify** compares explicit before/after stored + runs, previews gates, validates scope, and reports baseline-abuse + signals. When `intent_id` is provided but `before_run_id` is omitted, + verify auto-resolves the before-run from the intent record. Missing runs + return `status="unverified"`. Non-accepted responses include a + `next_step` hint and `claim_validation_recommended` flag. When a change intent is active, verify mode attributes regressions and gate changes to the declared scope. Intent-scope regressions produce contract violations; external regressions are reported as informational - context. See - [Scope-Aware Patch Contract Verification](24-structural-change-controller.md#scope-aware-patch-contract-verification). + context. Queued intents are rejected with `reason="intent_not_active"`. + See [Scope-Aware Patch Contract Verification](24-structural-change-controller.md#scope-aware-patch-contract-verification) + and [Verify Ergonomics](24-structural-change-controller.md#verify-ergonomics). ### Session-local tools diff --git a/docs/book/24-structural-change-controller.md b/docs/book/24-structural-change-controller.md index 9da19d7a..da415486 100644 --- a/docs/book/24-structural-change-controller.md +++ b/docs/book/24-structural-change-controller.md @@ -23,6 +23,8 @@ queries: | Scope-aware verification | Live in `2.1.0a1` | MCP `check_patch_contract` | | Workspace relations | Live in `2.1.0a1` | MCP `manage_change_intent` | | Verification profiles | Live in `2.1.0a1` | MCP `check_patch_contract` | +| Intent queue | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Verify ergonomics | Live in `2.1.0a1` | MCP `check_patch_contract` | | MCP payload token budget | Live in `2.1.0a1` | Audit trail, CLI `--audit`, `--session-stats` | ## Contract @@ -356,6 +358,99 @@ indistinguishable: 3. No edit overlap, but the current agent explicitly excludes the foreign agent's target files (`target_excludes_foreign`). +## Intent Queue + +When multiple agents target overlapping scope, `manage_change_intent` supports +an advisory queue so a blocked agent can register its intent without failing. + +### Declare with queue + +`manage_change_intent(action="declare", on_conflict="queue")` first attempts a +normal declare. If `detect_conflicts` finds overlapping foreign active intents, +it downgrades the already-registered intent to `queued` instead of returning an +error. + +A queued intent: + +- Is visible in `list_workspace` as a workspace record with `status="queued"`. +- Does **not** own scope — conflict detection skips queued records. +- Does **not** pin the before-run — long waits may cause eviction from bounded + run history. +- Cannot pass `check_patch_contract(mode="verify")` or + `check_patch_contract(mode="budget")` with `edit_allowed=true`. +- Can be cleared via `manage_change_intent(action="clear")`. + +The declare response includes `blocked_by` (list of blocking intents with +`intent_id`, `agent_pid`, `ownership`, `overlapping_files`) and +`queue_position` (deterministic ordering by `declared_at_utc`, then +`intent_id`). + +### Promote + +`manage_change_intent(action="promote", intent_id=...)` transitions a queued +intent to active: + +1. Validates the intent has `status="queued"`. +2. Resolves the before-run — if evicted, returns `status="unverified"` with + `reason="before_run_evicted"` and a `next_step` hint. +3. Re-checks workspace conflicts. If conflicts persist, returns `status="queued"` + with `blocking_count` and `blocked_by` without changing state. +4. On success: sets status to `active`, pins the run, renews the lease, and + updates the workspace record. + +### Queue semantic invariants + +- `queued` is a lifecycle status, not an ownership classification. Ownership + (`own_active`, `foreign_active`, etc.) and status (`active`, `queued`) are + orthogonal. +- Queued intents do not block other agents. `_detect_scope_state` skips records + with `status == "queued"`. +- Queue position is deterministic: sorted by `declared_at_utc`, then + `intent_id` as tiebreaker. + +### Audit events + +| Event | When | +|------------------------|--------------------------------| +| `intent.queued` | Declare downgrades to queued | +| `intent.promoted` | Promote succeeds | +| `intent.queue_blocked` | Promote blocked by conflicts | + +## Verify Ergonomics + +`check_patch_contract(mode="verify")` includes three ergonomic features that +reduce agent error and wasted context tokens. + +### Auto-resolve before_run_id + +When `intent_id` is provided but `before_run_id` is omitted, verify resolves +the before-run from the intent record's `run_id`. This eliminates the most +common agent error: forgetting to pass `before_run_id`. + +### Next-step hints + +Non-accepted verify responses include a `next_step` field with an actionable +hint matched to the failure reason: + +| Reason | Hint | +|-------------------------------------|---------------------------------------------------------| +| `no_before_run` | Run analysis or pass intent_id to auto-resolve | +| `no_after_run` | Run analysis after editing and pass after_run_id | +| `after_run_required_for_governance` | Governance changes require post-edit analysis | +| `incomparable_runs` | Re-run analysis with the same settings | +| `intent_not_active` | Queued intent must be promoted first | +| `report_digest_mismatch` | Use the original intent_id with the original before-run | +| `state_artifact_mutation` | Remove baseline/cache files from the patch | +| `scope_violation` | Redeclare intent with expanded scope | + +### Claim validation recommended + +The `claim_validation_recommended` boolean in verify responses advises whether +calling `validate_review_claims` is meaningful for the verification profile. +It is `true` for `python_structural` and `governance_config` profiles, `false` +for `documentation_only`, `non_python_patch`, `state_artifact_change`, and +non-accepted outcomes. + ## MCP Payload Token Budget The optional controller audit trail can estimate the token footprint of MCP diff --git a/docs/book/appendix/a-status-enums.md b/docs/book/appendix/a-status-enums.md index 5c18e211..aeaa01b7 100644 --- a/docs/book/appendix/a-status-enums.md +++ b/docs/book/appendix/a-status-enums.md @@ -9,7 +9,9 @@ Centralize machine-readable status sets used across baseline/cache/report/CLI co - Baseline statuses: `codeclone/baseline/trust.py:BaselineStatus` - Cache statuses: `codeclone/cache/versioning.py:CacheStatus` - Exit categories: `codeclone/contracts/__init__.py:ExitCode` +- Intent status: `codeclone/surfaces/mcp/_intent.py:IntentStatus` - Intent ownership: `codeclone/surfaces/mcp/_workspace_intents.py:IntentOwnership` +- Workspace intent status: `codeclone/surfaces/mcp/_workspace_intents.py:WorkspaceIntentStatus` - Patch contract: `codeclone/surfaces/mcp/_patch_contract.py:PatchContractStatus` - Verification profile: `codeclone/surfaces/mcp/_verification_profile.py:VerificationProfile` @@ -55,6 +57,20 @@ Defined by `BASELINE_UNTRUSTED_STATUSES`. - `3` gating failure - `5` internal error +### IntentStatus + +- `active` +- `queued` +- `clean` +- `expanded` +- `violated` +- `unverified` +- `expired` + +Semantics are defined in +[Structural Change Controller § Pre-Change Workflow](../24-structural-change-controller.md#pre-change-workflow) +and [§ Intent Queue](../24-structural-change-controller.md#intent-queue). + ### IntentOwnership - `own_active` @@ -78,6 +94,27 @@ Semantics are defined in Semantics are defined in [Structural Change Controller § Scope-Aware Patch Contract Verification](../24-structural-change-controller.md#scope-aware-patch-contract-verification). +### WorkspaceIntentStatus + +- `active` +- `queued` + +Workspace registry records mirror the session-local `IntentStatus` for `active` +and `queued`. Other session-local statuses (`clean`, `expanded`, etc.) are not +persisted to the workspace registry. + +### VerificationProfile + +- `state_artifact_change` +- `python_structural` +- `governance_config` +- `documentation_only` +- `non_python_patch` + +Priority-ordered. A single file from a higher-priority category overrides +the entire patch. Semantics are defined in +[Structural Change Controller § Verification Profiles](../24-structural-change-controller.md#verification-profiles). + ## Contracts - Status values are serialized into report metadata. diff --git a/docs/mcp.md b/docs/mcp.md index 5663dec9..25d9d8cb 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -278,6 +278,15 @@ sequenceDiagram A ->> M: declare(scope, intent) M ->> D: write intent record M -->> A: intent_id, blast_radius, concurrent_intents + alt Scope conflict with on_conflict="queue" + A ->> M: declare(scope, intent, on_conflict="queue") + M ->> D: write queued intent record + M -->> A: status=queued, blocked_by, queue_position + Note over A: Wait for foreign intent to clear + A ->> M: promote(intent_id) + M ->> D: re-check conflicts, update to active + M -->> A: status=active + end A ->> M: get_blast_radius(files) M -->> A: do_not_touch, review_context A ->> M: check_patch_contract(mode=budget) @@ -306,7 +315,7 @@ sequenceDiagram | Tool | Purpose | |--------------------------|-------------------------------------------------------------------------------------------------------------| -| `manage_change_intent` | Intent lifecycle: declare, get, check, clear, renew, list_workspace, gc_workspace, recover, reset_workspace | +| `manage_change_intent` | Intent lifecycle: declare, get, check, clear, renew, promote, list_workspace, gc_workspace, recover, reset_workspace | | `get_blast_radius` | Pre-change risk boundary: dependents, clone cohorts, do-not-touch, review context | | `check_patch_contract` | Budget query (`mode=budget`) or post-edit verification (`mode=verify`) | | `create_review_receipt` | Deterministic audit artifact: provenance, scope, reviewed findings, patch status, verification profile | @@ -320,11 +329,15 @@ debt. Review context is information, not an edit ban. ??? info "Patch contract modes" **Budget** reads one stored run and optional intent. Shows regression -headroom per quality dimension before editing. **Verify** compares explicit -before/after stored runs, previews gates, validates scope, and reports -baseline-abuse signals. Verify derives a **verification profile** from -changed files — docs-only and non-Python patches skip structural checks; -Python source changes require a full after-run. Missing runs return +headroom per quality dimension before editing. Queued intents return +`edit_allowed=false`. **Verify** compares explicit before/after stored +runs, previews gates, validates scope, and reports baseline-abuse signals. +When `intent_id` is provided but `before_run_id` is omitted, verify +auto-resolves the before-run from the intent record. Verify derives a +**verification profile** from changed files — docs-only and non-Python +patches skip structural checks; Python source changes require a full +after-run. Non-accepted responses include a `next_step` hint and +`claim_validation_recommended` flag. Missing runs return `status=unverified`. ### Phase 6: Session management @@ -399,12 +412,25 @@ manage_change_intent(action="list_workspace") -> manage_change_intent(action="renew", intent_id=...) # optional: long edits -> analyze_repository # after-run -> manage_change_intent(action="check", intent_id=..., changed_files=[...]) - -> check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) - -> validate_review_claims(text="...") + -> check_patch_contract(mode="verify", after_run_id=..., intent_id=...) # before_run_id auto-resolved + -> validate_review_claims(text="...") # if claim_validation_recommended -> create_review_receipt -> manage_change_intent(action="clear") ``` +### Multi-agent queue + +``` +manage_change_intent(action="list_workspace") # foreign_active found + -> analyze_repository + -> manage_change_intent(action="declare", scope={...}, on_conflict="queue") # queued behind foreign + -> [wait for foreign intent to clear] + -> manage_change_intent(action="promote", intent_id=...) # queued → active + -> get_blast_radius(files=[...]) + -> check_patch_contract(mode="budget") + -> [edit within scope, then verify as normal] +``` + ### Coverage review ``` From 821ecb19defee0dd91dd54c4b2f5fca65c17159f Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 16:12:56 +0500 Subject: [PATCH 050/318] feat(docs): align agent directives and skills with controller contracts --- CLAUDE.md | 132 ++++++++++++++---- .../skills/codeclone-change-control/SKILL.md | 127 +++++++++++------ .../skills/codeclone-change-control/SKILL.md | 127 +++++++++++------ 3 files changed, 273 insertions(+), 113 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index f0358dcf..074141c6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,8 @@ CodeClone: deterministic structural controller for Python. Full architecture, contracts, and agent playbook → `AGENTS.md`. -Code is the source of truth. If docs and code diverge, follow code. +Code is the implementation source of truth. If docs and code diverge, +follow code for implementation decisions and report the divergence. ## Default role @@ -18,31 +19,59 @@ reason to skip change control. ## Change control workflow -This workflow is mandatory protocol, not advisory text. Do not skip, replace, -reorder, or approximate these steps. If a required MCP call fails or is -unavailable, stop and report the blocker instead of continuing as a normal -edit. +The protocol below is mandatory, but the visible workflow depends on the +patch type: + +- **Python structural / governance config**: full before/after workflow. +- **Documentation-only**: lightweight verify; no after-run required when the + controller derives `documentation_only`. +- **Blocked follow-up**: queue intent behind foreign active; promote before + editing. +- **Read-only / spec validation**: no edit workflow unless repository files + change. + +Do not skip, replace, reorder, or approximate these steps. If a required MCP +call fails or is unavailable, stop and report the blocker instead of +continuing as a normal edit. Before editing any repository files: 1. `manage_change_intent(action="list_workspace", root="")` - — if `foreign_active` intents overlap, **stop and ask the user** + — if `foreign_active` intents overlap, do not edit. Prefer + `on_conflict="queue"` in step 3 for follow-up work that can wait. + Ask the user only if you need to edit immediately, recover/reset + another agent's intent, or touch a `do_not_touch` path 2. `analyze_repository(root="")` 3. `manage_change_intent(action="declare", scope={...})` - — if `concurrent_intents` non-empty, narrow scope or ask + — if `concurrent_intents` non-empty, narrow scope or ask. + Use `on_conflict="queue"` to create a queued intent behind foreign + active intents instead of failing. A queued intent does not own scope + and cannot be verified — promote it first (step 3b) + 3b. *(only for queued intents)* When the foreign intent clears: + `manage_change_intent(action="promote", intent_id=...)` + — transitions queued → active, pins the run, renews the lease. + If the before-run was evicted, re-analyze and redeclare 4. `get_blast_radius(files=[...])` 5. `check_patch_contract(mode="budget")` 6. Edit within declared scope only -7. `analyze_repository(root="")` — re-run after edits +7. `analyze_repository(root="")` — re-run after edits for Python + structural and governance config changes. For documentation-only or + non-Python patches, this step may be skipped; the controller derives the + profile from actual changed files during verify. If unsure, re-run 8. `manage_change_intent(action="check", intent_id=..., changed_files=[...])` — pass the original `intent_id` explicitly and provide either `changed_files` or `diff_ref` (the intent is bound to the before-run; without `intent_id`, `_resolve_intent` looks up the latest run and misses it) -9. `check_patch_contract(mode="verify", before_run_id=..., - after_run_id=..., intent_id=...)` — verify compares the intent's - `report_digest` against the before-run; redeclare on the after-run - would cause an `expired` mismatch +9. `check_patch_contract(mode="verify", after_run_id=..., intent_id=...)` + — `before_run_id` auto-resolves from intent when omitted. + `after_run_id` is required only when the derived verification profile + requires it (`python_structural`, `governance_config`). For + `documentation_only` and `non_python_patch`, pass `changed_files` or + `diff_ref` evidence and omit `after_run_id`. + Non-accepted responses include `next_step` hint — follow it. + Verify compares the intent's `report_digest` against the before-run; + redeclare on the after-run would cause an `expired` mismatch 10. `manage_change_intent(action="clear", intent_id=...)` ### Rules @@ -60,8 +89,17 @@ Before editing any repository files: `check`/`get`/`verify` — otherwise `_resolve_intent` resolves by latest run_id and misses intents bound to the before-run. - `do_not_touch` is a hard boundary. `review_context` is context, not a ban. -- Do not update baselines, cache, or generated reports. -- If `list_workspace` shows overlapping foreign intent, stop and coordinate. +- Do not update baselines, analysis cache, or generated reports. +- If `list_workspace` shows overlapping foreign intent, stop and coordinate — + or use `on_conflict="queue"` to queue behind it. +- MUST NOT edit while intent is `queued`. Promote first. +- MUST NOT call verify on a queued intent — verify rejects with + `reason="intent_not_active"`. +- MAY call budget on a queued intent for planning only. Budget responses + for queued intents include `edit_allowed=false` and are not edit + permission. +- When verify returns a `next_step` hint, follow it — do not invent a + different recovery path. - CodeClone findings are the source of truth — do not reinterpret. - If `check_patch_contract(mode="verify")` returns `unverified` or `violated`, do not claim the patch is verified. @@ -70,17 +108,40 @@ Before editing any repository files: - Live foreign intent means **stop**, not kill. Never suggest killing a process without explicit user confirmation that the PID is abandoned. +### User escalation policy + +Run routine controller steps automatically. Queue blocked follow-up work +automatically when it can wait — do not ask before queueing. + +Ask the user only when: + +- scope expansion is required; +- a `do_not_touch` path must be touched; +- a live foreign intent overlaps and queue is not appropriate; +- patch contract returned `violated` or `unverified`; +- baseline, cache, or generated state would be modified; +- recovery or reset of another agent's intent is needed. + +Routine controller work is automatic. Boundary decisions require the user. + ### Completion gate Do not say "done", "implemented", "validated", "verified", "ready", or equivalent unless all of these are true: -1. an after-run was created after the last edit; +1. either: + - an after-run was created after the last edit (required for + `python_structural` and `governance_config` patches); or + - `check_patch_contract(mode="verify")` derived a profile that does not + require `after_run_id` (`documentation_only` or `non_python_patch`); 2. `manage_change_intent(action="check", intent_id=..., changed_files=...)` or `diff_ref=...` returned `clean`; -3. `check_patch_contract(mode="verify", before_run_id=..., - after_run_id=..., intent_id=...)` returned `accepted`; -4. any final summary claims passed `validate_review_claims`; +3. `check_patch_contract(mode="verify", intent_id=..., after_run_id=...)` + returned `accepted`; `after_run_id` is required only when the derived + verification profile requires it; +4. any final summary claims passed `validate_review_claims` — skip only + when `claim_validation_recommended` is explicitly `false` in the + controller response, not by agent judgment; 5. `manage_change_intent(action="clear", intent_id=...)` succeeded. If any item cannot be completed, report `BLOCKED` or `UNVERIFIED`, include the @@ -93,29 +154,34 @@ The controller derives a **verification profile** from actual changed files. The profile determines which structural checks apply. The agent does not choose the profile — it is computed by `check_patch_contract(mode="verify")`. -| Profile | When | `after_run` required | Structural checks | -|---|---|---|---| -| `python_structural` | any `.py` / `.pyi` touched | yes | all | -| `governance_config` | config files only (pyproject.toml, CI, Dockerfile…) | yes | not applicable | -| `documentation_only` | only docs files (`.md`, `.rst`, LICENSE…) | no | not applicable | -| `non_python_patch` | other files, no Python / docs | no | not applicable | -| `state_artifact_change` | baseline or cache touched | no (violated) | not applicable | +| Profile | When | `after_run` required | Structural checks | +|-------------------------|-----------------------------------------------------|----------------------|-------------------| +| `python_structural` | any `.py` / `.pyi` touched | yes | all | +| `governance_config` | config files only (pyproject.toml, CI, Dockerfile…) | yes | not applicable | +| `documentation_only` | only docs files (`.md`, `.rst`, LICENSE…) | no | not applicable | +| `non_python_patch` | other files, no Python / docs | no | not applicable | +| `state_artifact_change` | baseline or cache touched | no (violated) | not applicable | Key rules: - If **any** Python source, governance configuration, baseline, cache, or generated state files were touched, the lightweight path is not accepted. -- Documentation-only and non-Python patches can verify without `after_run_id` +- Documentation-only patches can verify without `after_run_id` when `changed_files` or `diff_ref` evidence is provided. +- Other non-Python patches may verify without `after_run_id`, but only + with controller-reported limitations. Do not present this as full + structural verification. - The agent MUST NOT claim which profile applies — CodeClone decides. - Receipts use "not applicable" for skipped structural checks, never "passed". -- Claim Guard warns when a review references structural verification but the - profile says structural checks were not applicable. +- Claim Guard may reject or warn on claims that exceed the derived profile. + For documentation-only patches, "no Python files touched" is allowed; + "no structural regressions" requires structural evidence from an after-run. ### When to skip - Read-only tasks (analysis, validation, research) -- CodeClone MCP not available +- CodeClone MCP not available and the task is read-only. For repository + edits that require change control, stop and report the blocker - User explicitly says analysis-only ## Spec writing discipline @@ -178,10 +244,14 @@ See `AGENTS.md` §3 for surface-specific commands. ## Hard boundaries -- Never update golden snapshots to "fix" tests. +- Never update golden snapshots merely to "fix" tests. Snapshot updates + require explicit user approval and a contract/schema change rationale. - Never change fingerprint semantics without `FINGERPRINT_VERSION` review. - Never make base `codeclone` depend on MCP runtime packages. -- Never let MCP mutate baselines, source files, reports, or cache. +- Never let MCP mutate baselines, source files, canonical reports, or + analysis cache. Ephemeral coordination state (workspace intents) and + audit trail under `.cache/codeclone/` are allowed only through the + controller and audit contracts. - Never iterate sets/dicts without sorting when output order matters. - Never introduce `Any` in core/domain code without narrowing it immediately. - Never create `*.md` specs inside `docs/` — use `specs/` directory. diff --git a/plugins/codeclone/skills/codeclone-change-control/SKILL.md b/plugins/codeclone/skills/codeclone-change-control/SKILL.md index 732306c3..3a155fbc 100644 --- a/plugins/codeclone/skills/codeclone-change-control/SKILL.md +++ b/plugins/codeclone/skills/codeclone-change-control/SKILL.md @@ -16,14 +16,16 @@ patch. Use this workflow whenever this skill is selected for a repository edit. Start with a workspace intent check, then run pre-edit analysis and keep the returned -`run_id` and `intent_id` for verification. If a required MCP tool is unavailable -in the connected server, continue with the available steps and state which step -was skipped. +`run_id` and `intent_id` for verification. If a required MCP tool is +unavailable, continue only for read-only analysis. For repository edits that +require change control, stop and report the blocker unless the unavailable tool +is explicitly optional or legacy-compatible. Do not downgrade the task to an ordinary edit after this skill has been selected. The only valid reasons to skip the workflow are: no repository files -will be changed, CodeClone MCP is unavailable, or the user explicitly asks for -analysis only. +will be changed, the user explicitly asks for analysis only, or CodeClone MCP +is unavailable and the task remains read-only. Do not perform repository edits +without change control after this skill is selected. ## Rules @@ -34,17 +36,23 @@ analysis only. `analyze_repository` yourself before declaring intent. - Declare intent before editing. - Do not silently expand scope. -- If concurrent workspace intents overlap your files, narrow scope or - coordinate before editing. +- If concurrent workspace intents overlap your files, prefer + `on_conflict="queue"` for follow-up work. Ask the user only when immediate + editing is required or queue is not appropriate. - Treat blast-radius dependents and clone cohorts as review context, not permission to modify. - Treat `do_not_touch` as a boundary unless the user explicitly expands scope. - Treat `review_context` as context, not an edit ban. -- Do not update baselines, cache, or generated reports as part of a functional - change. +- Do not update baselines, analysis cache, or generated reports as part of a + functional change. - Do not fall back to CLI or local report files. - CodeClone is the source of truth — do not reinterpret findings independently. - Never auto-suppress findings or mutate CodeClone baseline state. +- Run routine controller steps automatically. Queue blocked follow-up work + automatically — do not ask before queueing. Ask the user only when: scope + expansion is needed, a `do_not_touch` path must be touched, patch contract + returned `violated` or `unverified`, or baseline/cache/generated state would + be modified. ## Workflow @@ -54,11 +62,11 @@ manage_change_intent(action="list_workspace", root=...) → manage_change_intent(action="declare") # intent bound to before-run → get_blast_radius → check_patch_contract(mode="budget") -→ edit code -→ analyze_repository # after-run +→ edit declared files +→ analyze_repository # after-run (skip for docs-only) → manage_change_intent(action="check", intent_id=..., changed_files=[...]) -→ check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) -→ validate_review_claims +→ check_patch_contract(mode="verify", after_run_id=..., intent_id=...) # before_run_id auto-resolved +→ validate_review_claims # skip only if claim_validation_recommended is explicitly false → create_review_receipt → manage_change_intent(action="clear") ``` @@ -71,8 +79,10 @@ redeclared intent would cause an `expired` mismatch. Use `diff_ref=...` instead of `changed_files=[...]` when the changed set should come from git. Older MCP servers may not support `list_workspace`, `validate_review_claims`, -or `create_review_receipt`. Skip only unavailable steps and say so explicitly. -Keep the pre-edit `run_id` as `before_run_id`; verify against the explicit +or `create_review_receipt`. These legacy-compatible steps may be skipped when +unavailable, and the summary must say so explicitly. Do not skip core edit +control steps: `analyze_repository`, `declare`, `check`, and `verify`. Keep +the pre-edit `run_id` as `before_run_id`; verify against the explicit after-run produced after the edit. ## Workspace check @@ -86,7 +96,29 @@ manage_change_intent(action="list_workspace", root="/absolute/repo") If it returns active intents from other agents, compare their `scope` to your planned files. A hard overlap means another agent claimed the same primary file. A soft overlap means your primary file is in another agent's related context, or -the reverse. In either case, narrow scope or coordinate before editing. +the reverse. + +Do not ask the user before queueing blocked follow-up work that can wait. +Prefer `on_conflict="queue"` in the declare step to queue behind the foreign +intent. Ask the user only if immediate editing is required, recovery/reset is +needed, or a `do_not_touch` path must be touched: + +``` +manage_change_intent(action="declare", scope={...}, on_conflict="queue") +``` + +A queued intent does not own scope, does not pin the before-run, and cannot pass +verification. You may call `check_patch_contract(mode="budget")` on a queued +intent for planning, but it returns `edit_allowed=false`. When the foreign +intent clears, promote it: + +``` +manage_change_intent(action="promote", intent_id=...) +``` + +If promote returns `status="queued"` with `blocking_count`, the foreign intent is +still active — wait and retry. If it returns `reason="before_run_evicted"`, +re-analyze and redeclare the intent. ## Legacy workflow @@ -97,16 +129,15 @@ analyze_repository → manage_change_intent(action="declare") → get_blast_radius → check_patch_contract(mode="budget") -→ edit code +→ edit declared files → analyze_repository → manage_change_intent(action="check", intent_id=..., changed_files=[...]) → check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) → validate_review_claims → create_review_receipt +→ manage_change_intent(action="clear") # if supported ``` -Still clear any declared intent when the server supports `clear`. - ## Intent first Before editing, call: @@ -177,17 +208,23 @@ explicitly: ``` manage_change_intent(action="check", intent_id=..., changed_files=[...]) -check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) +check_patch_contract(mode="verify", intent_id=..., after_run_id=...) ``` +`before_run_id` auto-resolves from the intent record when `intent_id` is +provided. `after_run_id` is required only for `python_structural` and +`governance_config` profiles. For `documentation_only` and `non_python_patch`, +pass `changed_files` or `diff_ref` evidence and omit `after_run_id`. + Use `diff_ref=...` instead of `changed_files=[...]` when the changed set should come from git. -If the result is `unverified`, report what is missing. Do not claim the patch is -verified. +If the result is `unverified` or `violated`, read the `next_step` hint and +follow it. Do not claim the patch is verified. Do not invent a different +recovery path — the hint is deterministic and authoritative. -If the result is `violated`, stop and explain the violation instead of continuing -to broaden the patch. +If `claim_validation_recommended` is `true`, call `validate_review_claims` +before writing a summary. If it is explicitly `false`, skip claim validation. ## Verification profiles @@ -195,28 +232,32 @@ The controller derives a **verification profile** from actual changed files during `check_patch_contract(mode="verify")`. The profile determines which structural checks apply. The agent does not choose the profile. -| Profile | When | `after_run` required | Structural checks | -|---|---|---|---| -| `python_structural` | any `.py` / `.pyi` touched | yes | all | -| `governance_config` | config files only | yes | not applicable | -| `documentation_only` | only docs files | no | not applicable | -| `non_python_patch` | other files, no Python / docs | no | not applicable | -| `state_artifact_change` | baseline or cache touched | no (violated) | not applicable | +| Profile | When | `after_run` required | Structural checks | +|-------------------------|-------------------------------|----------------------|-------------------| +| `python_structural` | any `.py` / `.pyi` touched | yes | all | +| `governance_config` | config files only | yes | not applicable | +| `documentation_only` | only docs files | no | not applicable | +| `non_python_patch` | other files, no Python / docs | no | not applicable | +| `state_artifact_change` | baseline or CodeClone state/cache touched | no (violated) | not applicable | Rules: -- If any Python source, governance config, baseline, cache, or generated state +- If any Python source, governance config, baseline, CodeClone state/cache, or generated state was touched, the lightweight path is not accepted. -- Documentation-only and non-Python patches can verify without `after_run_id` - when `changed_files` or `diff_ref` evidence is provided. +- Documentation-only patches can verify without `after_run_id` when + `changed_files` or `diff_ref` evidence is provided. +- Other non-Python patches may verify without `after_run_id`, but only with + controller-reported limitations. Do not present this as full structural + verification. - Do not claim which profile applies — CodeClone decides. - Receipts use "not applicable" for skipped structural checks, never "passed". -- When writing review summaries for non-structural profiles, do not claim - structural verification was performed. +- Claim Guard may reject or warn on claims that exceed the derived profile. + For documentation-only patches, "no Python files touched" is allowed; + "no structural regressions" requires structural evidence from an after-run. ## Claim discipline -When writing a summary, call: +When writing a summary and `claim_validation_recommended` was `true`, call: ``` validate_review_claims @@ -251,13 +292,17 @@ The final user summary should include: The task is complete only when: -- intent was declared before editing +- intent was declared before editing; if queued, it was promoted before editing - blast radius was inspected - edits stayed inside declared scope, or expansion was explicit -- patch contract was checked +- patch contract was checked: either an after-run was created (Python + structural, governance config) or verify derived a profile that does not + require it (documentation-only, non-Python) - baseline/cache/generated state was not changed accidentally -- claims were validated when a review summary was written -- a review receipt was created when available +- claims were validated when `claim_validation_recommended` was explicitly + `true` in the controller response, not skipped by agent judgment +- a review receipt was created when the server supports it; if unsupported, + the final summary states that receipt creation was unavailable ## Non-goals diff --git a/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md b/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md index 732306c3..3a155fbc 100644 --- a/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md +++ b/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md @@ -16,14 +16,16 @@ patch. Use this workflow whenever this skill is selected for a repository edit. Start with a workspace intent check, then run pre-edit analysis and keep the returned -`run_id` and `intent_id` for verification. If a required MCP tool is unavailable -in the connected server, continue with the available steps and state which step -was skipped. +`run_id` and `intent_id` for verification. If a required MCP tool is +unavailable, continue only for read-only analysis. For repository edits that +require change control, stop and report the blocker unless the unavailable tool +is explicitly optional or legacy-compatible. Do not downgrade the task to an ordinary edit after this skill has been selected. The only valid reasons to skip the workflow are: no repository files -will be changed, CodeClone MCP is unavailable, or the user explicitly asks for -analysis only. +will be changed, the user explicitly asks for analysis only, or CodeClone MCP +is unavailable and the task remains read-only. Do not perform repository edits +without change control after this skill is selected. ## Rules @@ -34,17 +36,23 @@ analysis only. `analyze_repository` yourself before declaring intent. - Declare intent before editing. - Do not silently expand scope. -- If concurrent workspace intents overlap your files, narrow scope or - coordinate before editing. +- If concurrent workspace intents overlap your files, prefer + `on_conflict="queue"` for follow-up work. Ask the user only when immediate + editing is required or queue is not appropriate. - Treat blast-radius dependents and clone cohorts as review context, not permission to modify. - Treat `do_not_touch` as a boundary unless the user explicitly expands scope. - Treat `review_context` as context, not an edit ban. -- Do not update baselines, cache, or generated reports as part of a functional - change. +- Do not update baselines, analysis cache, or generated reports as part of a + functional change. - Do not fall back to CLI or local report files. - CodeClone is the source of truth — do not reinterpret findings independently. - Never auto-suppress findings or mutate CodeClone baseline state. +- Run routine controller steps automatically. Queue blocked follow-up work + automatically — do not ask before queueing. Ask the user only when: scope + expansion is needed, a `do_not_touch` path must be touched, patch contract + returned `violated` or `unverified`, or baseline/cache/generated state would + be modified. ## Workflow @@ -54,11 +62,11 @@ manage_change_intent(action="list_workspace", root=...) → manage_change_intent(action="declare") # intent bound to before-run → get_blast_radius → check_patch_contract(mode="budget") -→ edit code -→ analyze_repository # after-run +→ edit declared files +→ analyze_repository # after-run (skip for docs-only) → manage_change_intent(action="check", intent_id=..., changed_files=[...]) -→ check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) -→ validate_review_claims +→ check_patch_contract(mode="verify", after_run_id=..., intent_id=...) # before_run_id auto-resolved +→ validate_review_claims # skip only if claim_validation_recommended is explicitly false → create_review_receipt → manage_change_intent(action="clear") ``` @@ -71,8 +79,10 @@ redeclared intent would cause an `expired` mismatch. Use `diff_ref=...` instead of `changed_files=[...]` when the changed set should come from git. Older MCP servers may not support `list_workspace`, `validate_review_claims`, -or `create_review_receipt`. Skip only unavailable steps and say so explicitly. -Keep the pre-edit `run_id` as `before_run_id`; verify against the explicit +or `create_review_receipt`. These legacy-compatible steps may be skipped when +unavailable, and the summary must say so explicitly. Do not skip core edit +control steps: `analyze_repository`, `declare`, `check`, and `verify`. Keep +the pre-edit `run_id` as `before_run_id`; verify against the explicit after-run produced after the edit. ## Workspace check @@ -86,7 +96,29 @@ manage_change_intent(action="list_workspace", root="/absolute/repo") If it returns active intents from other agents, compare their `scope` to your planned files. A hard overlap means another agent claimed the same primary file. A soft overlap means your primary file is in another agent's related context, or -the reverse. In either case, narrow scope or coordinate before editing. +the reverse. + +Do not ask the user before queueing blocked follow-up work that can wait. +Prefer `on_conflict="queue"` in the declare step to queue behind the foreign +intent. Ask the user only if immediate editing is required, recovery/reset is +needed, or a `do_not_touch` path must be touched: + +``` +manage_change_intent(action="declare", scope={...}, on_conflict="queue") +``` + +A queued intent does not own scope, does not pin the before-run, and cannot pass +verification. You may call `check_patch_contract(mode="budget")` on a queued +intent for planning, but it returns `edit_allowed=false`. When the foreign +intent clears, promote it: + +``` +manage_change_intent(action="promote", intent_id=...) +``` + +If promote returns `status="queued"` with `blocking_count`, the foreign intent is +still active — wait and retry. If it returns `reason="before_run_evicted"`, +re-analyze and redeclare the intent. ## Legacy workflow @@ -97,16 +129,15 @@ analyze_repository → manage_change_intent(action="declare") → get_blast_radius → check_patch_contract(mode="budget") -→ edit code +→ edit declared files → analyze_repository → manage_change_intent(action="check", intent_id=..., changed_files=[...]) → check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) → validate_review_claims → create_review_receipt +→ manage_change_intent(action="clear") # if supported ``` -Still clear any declared intent when the server supports `clear`. - ## Intent first Before editing, call: @@ -177,17 +208,23 @@ explicitly: ``` manage_change_intent(action="check", intent_id=..., changed_files=[...]) -check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) +check_patch_contract(mode="verify", intent_id=..., after_run_id=...) ``` +`before_run_id` auto-resolves from the intent record when `intent_id` is +provided. `after_run_id` is required only for `python_structural` and +`governance_config` profiles. For `documentation_only` and `non_python_patch`, +pass `changed_files` or `diff_ref` evidence and omit `after_run_id`. + Use `diff_ref=...` instead of `changed_files=[...]` when the changed set should come from git. -If the result is `unverified`, report what is missing. Do not claim the patch is -verified. +If the result is `unverified` or `violated`, read the `next_step` hint and +follow it. Do not claim the patch is verified. Do not invent a different +recovery path — the hint is deterministic and authoritative. -If the result is `violated`, stop and explain the violation instead of continuing -to broaden the patch. +If `claim_validation_recommended` is `true`, call `validate_review_claims` +before writing a summary. If it is explicitly `false`, skip claim validation. ## Verification profiles @@ -195,28 +232,32 @@ The controller derives a **verification profile** from actual changed files during `check_patch_contract(mode="verify")`. The profile determines which structural checks apply. The agent does not choose the profile. -| Profile | When | `after_run` required | Structural checks | -|---|---|---|---| -| `python_structural` | any `.py` / `.pyi` touched | yes | all | -| `governance_config` | config files only | yes | not applicable | -| `documentation_only` | only docs files | no | not applicable | -| `non_python_patch` | other files, no Python / docs | no | not applicable | -| `state_artifact_change` | baseline or cache touched | no (violated) | not applicable | +| Profile | When | `after_run` required | Structural checks | +|-------------------------|-------------------------------|----------------------|-------------------| +| `python_structural` | any `.py` / `.pyi` touched | yes | all | +| `governance_config` | config files only | yes | not applicable | +| `documentation_only` | only docs files | no | not applicable | +| `non_python_patch` | other files, no Python / docs | no | not applicable | +| `state_artifact_change` | baseline or CodeClone state/cache touched | no (violated) | not applicable | Rules: -- If any Python source, governance config, baseline, cache, or generated state +- If any Python source, governance config, baseline, CodeClone state/cache, or generated state was touched, the lightweight path is not accepted. -- Documentation-only and non-Python patches can verify without `after_run_id` - when `changed_files` or `diff_ref` evidence is provided. +- Documentation-only patches can verify without `after_run_id` when + `changed_files` or `diff_ref` evidence is provided. +- Other non-Python patches may verify without `after_run_id`, but only with + controller-reported limitations. Do not present this as full structural + verification. - Do not claim which profile applies — CodeClone decides. - Receipts use "not applicable" for skipped structural checks, never "passed". -- When writing review summaries for non-structural profiles, do not claim - structural verification was performed. +- Claim Guard may reject or warn on claims that exceed the derived profile. + For documentation-only patches, "no Python files touched" is allowed; + "no structural regressions" requires structural evidence from an after-run. ## Claim discipline -When writing a summary, call: +When writing a summary and `claim_validation_recommended` was `true`, call: ``` validate_review_claims @@ -251,13 +292,17 @@ The final user summary should include: The task is complete only when: -- intent was declared before editing +- intent was declared before editing; if queued, it was promoted before editing - blast radius was inspected - edits stayed inside declared scope, or expansion was explicit -- patch contract was checked +- patch contract was checked: either an after-run was created (Python + structural, governance config) or verify derived a profile that does not + require it (documentation-only, non-Python) - baseline/cache/generated state was not changed accidentally -- claims were validated when a review summary was written -- a review receipt was created when available +- claims were validated when `claim_validation_recommended` was explicitly + `true` in the controller response, not skipped by agent judgment +- a review receipt was created when the server supports it; if unsupported, + the final summary states that receipt creation was unavailable ## Non-goals From 97229e681c01a8cfabb281d7009d5707c5703900 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 22:19:46 +0500 Subject: [PATCH 051/318] feat(mcp): add workflow consolidation tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce start_controlled_change and finish_controlled_change MCP tools that aggregate the 10-step atomic change control workflow into 3-4 calls per edit cycle. Workflow tools call existing internal methods without new engine logic — same audit events, same verification profiles, same boundary enforcement. --- AGENTS.md | 9 + CHANGELOG.md | 18 + CLAUDE.md | 192 ++++--- .../surfaces/mcp/_session_workflow_mixin.py | 493 ++++++++++++++++++ codeclone/surfaces/mcp/server.py | 83 ++- codeclone/surfaces/mcp/service.py | 32 ++ codeclone/surfaces/mcp/session.py | 4 +- docs/book/20-mcp-interface.md | 19 +- docs/book/24-structural-change-controller.md | 30 ++ docs/mcp.md | 43 +- .../skills/codeclone-change-control/SKILL.md | 271 +++++----- .../skills/codeclone-change-control/SKILL.md | 271 +++++----- .../contract_snapshots/mcp_tool_schemas.json | 158 ++++++ .../public_api_surface.json | 14 +- tests/test_codex_plugin.py | 6 +- tests/test_mcp_server.py | 10 +- 16 files changed, 1273 insertions(+), 380 deletions(-) create mode 100644 codeclone/surfaces/mcp/_session_workflow_mixin.py diff --git a/AGENTS.md b/AGENTS.md index 527b57ec..4a8c1420 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -238,6 +238,15 @@ Session review markers and change intent truth are ephemeral MCP process state. Workspace intent registry files under `.cache/codeclone/intents/` are advisory coordination state only, not analysis cache or report truth. +For file edits, agents should prefer the workflow tools +`start_controlled_change` and `finish_controlled_change` — they aggregate +workspace check, intent declaration, blast radius, budget, verification, +receipt, and cleanup into two calls. Atomic change control tools +(`manage_change_intent`, `get_blast_radius`, `check_patch_contract`, +`validate_review_claims`, `create_review_receipt`) remain available for +queue/promote/recover operations, deep inspection, and backward +compatibility with older MCP servers. + ### Report invariants - Ordering must be deterministic (stable sort keys). diff --git a/CHANGELOG.md b/CHANGELOG.md index 6598fd18..fd38c32f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## [2.1.0a2] - Unreleased + +### Added + +- Add `start_controlled_change` MCP tool — aggregates workspace check, + intent declaration, blast radius computation (direct + bounded transitive + for high-radius changes), and patch budget into a single pre-edit call. +- Add `finish_controlled_change` MCP tool — aggregates scope check, patch + verification, claim validation, review receipt, and intent cleanup into + a single post-edit call. + +### Changed + +- Agent workflow reduced from 7–11 MCP calls to 3–4 per edit cycle. + CLAUDE.md and plugin skills updated to prefer workflow tools. +- Atomic change control tools remain available for advanced/diagnostic + use and backward compatibility. + ## [2.1.0a1] - 2026-05-22 `2.1.0a1` opens the v2.1 alpha line for structural change control. diff --git a/CLAUDE.md b/CLAUDE.md index 074141c6..24002bd0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -30,79 +30,100 @@ patch type: - **Read-only / spec validation**: no edit workflow unless repository files change. -Do not skip, replace, reorder, or approximate these steps. If a required MCP -call fails or is unavailable, stop and report the blocker instead of -continuing as a normal edit. +Do not skip, replace, reorder, or approximate the required steps for the +derived workflow profile. Steps explicitly marked as optional or +profile-dependent may be skipped only under the stated conditions. +If a required MCP call fails or is unavailable, stop and report the blocker +instead of continuing as a normal edit. Before editing any repository files: -1. `manage_change_intent(action="list_workspace", root="")` - — if `foreign_active` intents overlap, do not edit. Prefer - `on_conflict="queue"` in step 3 for follow-up work that can wait. - Ask the user only if you need to edit immediately, recover/reset - another agent's intent, or touch a `do_not_touch` path -2. `analyze_repository(root="")` -3. `manage_change_intent(action="declare", scope={...})` - — if `concurrent_intents` non-empty, narrow scope or ask. - Use `on_conflict="queue"` to create a queued intent behind foreign - active intents instead of failing. A queued intent does not own scope - and cannot be verified — promote it first (step 3b) - 3b. *(only for queued intents)* When the foreign intent clears: - `manage_change_intent(action="promote", intent_id=...)` - — transitions queued → active, pins the run, renews the lease. - If the before-run was evicted, re-analyze and redeclare -4. `get_blast_radius(files=[...])` -5. `check_patch_contract(mode="budget")` -6. Edit within declared scope only -7. `analyze_repository(root="")` — re-run after edits for Python - structural and governance config changes. For documentation-only or - non-Python patches, this step may be skipped; the controller derives the - profile from actual changed files during verify. If unsure, re-run -8. `manage_change_intent(action="check", intent_id=..., changed_files=[...])` - — pass the original `intent_id` explicitly and provide either - `changed_files` or `diff_ref` (the intent is bound to the before-run; - without `intent_id`, `_resolve_intent` looks up the latest run and - misses it) -9. `check_patch_contract(mode="verify", after_run_id=..., intent_id=...)` - — `before_run_id` auto-resolves from intent when omitted. - `after_run_id` is required only when the derived verification profile - requires it (`python_structural`, `governance_config`). For - `documentation_only` and `non_python_patch`, pass `changed_files` or - `diff_ref` evidence and omit `after_run_id`. - Non-accepted responses include `next_step` hint — follow it. - Verify compares the intent's `report_digest` against the before-run; - redeclare on the after-run would cause an `expired` mismatch -10. `manage_change_intent(action="clear", intent_id=...)` +1. `analyze_repository(root="")` + — if a valid recent run for the same absolute root already exists, skip +2. `start_controlled_change(root="", scope={...}, intent="...")` + — returns blast radius, budget, workspace state, intent_id + — if `status: "needs_analysis"`, run `analyze_repository` first + — if `status: "queued"`, do not edit; wait for promotion + — if `concurrent_intents` non-empty without queue, narrow scope or ask +3. Edit within declared scope only +4. `analyze_repository(root="")` + — after-run; required for Python structural and governance config changes. + May be skipped for documentation-only and other non-Python patches when + `finish` can verify from changed-file evidence +5. `finish_controlled_change(intent_id=..., changed_files=[...], after_run_id=...)` + — returns scope check, verification, receipt, and clears intent + — if `user_action_required: true`, stop and follow `next_step` + — if `status: "unverified"`, follow `next_step` hint + — `auto_clear=true` by default; intent cleared on accepted + +Workflow profiles determine which steps are needed: + +- **Python structural / governance config**: + `analyze` → `start` → edit → `analyze` → `finish(after_run_id=...)` +- **Documentation-only / non-Python**: + `analyze` → `start` → edit → `finish(changed_files=[...])` + For `non_python_patch`, report controller-stated limitations and do not + present the result as full structural verification. + +Queue/promote workflow (when `start` returns `status: "queued"`): + +1. `start_controlled_change(on_conflict="queue")` → `status: "queued"` +2. Wait for foreign intent to clear +3. `manage_change_intent(action="promote", intent_id=...)` + — edit only after promote returns `status: "active"` + — if `before_run_evicted`: re-analyze and re-start + +### Atomic workflow (fallback) + +Use the atomic workflow only when `start_controlled_change` or +`finish_controlled_change` are unavailable (older MCP servers), +for step-by-step debugging, or for recovery operations: + +``` +manage_change_intent(action="list_workspace", root=...) +→ analyze_repository +→ manage_change_intent(action="declare") +→ get_blast_radius +→ check_patch_contract(mode="budget") +→ edit declared files +→ analyze_repository +→ manage_change_intent(action="check", intent_id=..., changed_files=[...]) +→ check_patch_contract(mode="verify", after_run_id=..., intent_id=...) +→ validate_review_claims +→ create_review_receipt +→ manage_change_intent(action="clear") +``` ### Rules +- Prefer `start_controlled_change` / `finish_controlled_change` over + the atomic workflow. Use atomic tools only for queue/promote/recover + or when the workflow tools are unavailable. +- Do not mix workflow and atomic verification paths in the same edit + cycle. Queue/promote/recover operations via `manage_change_intent` + are allowed alongside workflow tools because workflow tools do not + expose those administrative transitions. +- `start_controlled_change` does not run analysis. Ensure a valid run + exists before calling it. +- `finish_controlled_change` does not run analysis. For Python + structural and governance config changes, run `analyze_repository` + after editing and pass `after_run_id`. - MUST NOT edit files without declaring intent first. -- MUST NOT silently expand scope — redeclare with expanded scope before - editing the extra file. -- MUST NOT redeclare on the after-run. Re-declare only to expand scope before - editing or to start a separate change. -- MUST NOT call the `check` action without exactly one changed-scope source: - `changed_files` or `diff_ref`. -- MUST clear the original intent by explicit `intent_id` after successful - verification. -- After re-analyze, pass `intent_id` explicitly to - `check`/`get`/`verify` — otherwise `_resolve_intent` resolves by - latest run_id and misses intents bound to the before-run. +- MUST NOT silently expand scope. If the fix requires files outside the + declared scope, stop before editing them. Expand scope only after user + approval unless the user already explicitly allowed expansion. Call + `start_controlled_change` again with the expanded scope to get a fresh + intent with updated blast radius and budget. Continue only when the + expanded intent is active. Do not edit extra files based on blast-radius + context alone. +- MUST NOT edit while intent is `queued`. Promote first. - `do_not_touch` is a hard boundary. `review_context` is context, not a ban. - Do not update baselines, analysis cache, or generated reports. -- If `list_workspace` shows overlapping foreign intent, stop and coordinate — - or use `on_conflict="queue"` to queue behind it. -- MUST NOT edit while intent is `queued`. Promote first. -- MUST NOT call verify on a queued intent — verify rejects with - `reason="intent_not_active"`. -- MAY call budget on a queued intent for planning only. Budget responses - for queued intents include `edit_allowed=false` and are not edit - permission. -- When verify returns a `next_step` hint, follow it — do not invent a - different recovery path. +- When `finish` or verify returns a `next_step` hint, follow it — do not + invent a different recovery path. - CodeClone findings are the source of truth — do not reinterpret. -- If `check_patch_contract(mode="verify")` returns `unverified` or `violated`, - do not claim the patch is verified. +- If `finish_controlled_change` returns `status: "unverified"` or + `"violated"`, do not claim the patch is verified. - Leaving an active or recoverable own intent behind is a blocked cleanup, not a completed task. - Live foreign intent means **stop**, not kill. Never suggest killing @@ -115,11 +136,14 @@ automatically when it can wait — do not ask before queueing. Ask the user only when: -- scope expansion is required; +- scope expansion is required and was not already explicitly allowed by + the user; - a `do_not_touch` path must be touched; - a live foreign intent overlaps and queue is not appropriate; -- patch contract returned `violated` or `unverified`; -- baseline, cache, or generated state would be modified; +- patch contract returned `violated`, or returned `unverified` and the + agent cannot execute the deterministic `next_step`; +- baseline, analysis cache, canonical reports, or generated state would + be modified; - recovery or reset of another agent's intent is needed. Routine controller work is automatic. Boundary decisions require the user. @@ -129,20 +153,24 @@ Routine controller work is automatic. Boundary decisions require the user. Do not say "done", "implemented", "validated", "verified", "ready", or equivalent unless all of these are true: -1. either: - - an after-run was created after the last edit (required for - `python_structural` and `governance_config` patches); or - - `check_patch_contract(mode="verify")` derived a profile that does not - require `after_run_id` (`documentation_only` or `non_python_patch`); -2. `manage_change_intent(action="check", intent_id=..., changed_files=...)` - or `diff_ref=...` returned `clean`; -3. `check_patch_contract(mode="verify", intent_id=..., after_run_id=...)` - returned `accepted`; `after_run_id` is required only when the derived - verification profile requires it; -4. any final summary claims passed `validate_review_claims` — skip only - when `claim_validation_recommended` is explicitly `false` in the - controller response, not by agent judgment; -5. `manage_change_intent(action="clear", intent_id=...)` succeeded. +1. `finish_controlled_change` returned `status: "accepted"` (or + `"accepted_with_external_changes"`); OR, in the atomic fallback + workflow, `manage_change_intent(action="check")` returned `clean` or + `expanded`, `check_patch_contract(mode="verify")` returned `accepted`, + and `manage_change_intent(action="clear")` succeeded; +2. `scope_check.status` is `"clean"` or `"expanded"`; +3. `intent_cleared` is `true` in the finish response; OR + `manage_change_intent(action="clear")` succeeded; +4. if `claims` is present in the finish response and `claims.valid` is + `false`, report the warnings — do not suppress; +5. claim validation was handled by `finish_controlled_change` when + `review_text` was provided and `claim_validation_recommended` was + `true`; for atomic workflow, final summary claims passed + `validate_review_claims` unless `claim_validation_recommended` was + explicitly `false`. + +If status is `accepted_with_external_changes`, report the external-change +advisory instead of presenting the patch as fully clean. If any item cannot be completed, report `BLOCKED` or `UNVERIFIED`, include the `intent_id`, and state the exact missing step. Do not present the work as @@ -152,7 +180,9 @@ finished. The controller derives a **verification profile** from actual changed files. The profile determines which structural checks apply. The agent does not choose -the profile — it is computed by `check_patch_contract(mode="verify")`. +the profile — it is computed by `finish_controlled_change` (through +`check_patch_contract(mode="verify")` internally), or directly by +`check_patch_contract(mode="verify")` in the atomic workflow. | Profile | When | `after_run` required | Structural checks | |-------------------------|-----------------------------------------------------|----------------------|-------------------| diff --git a/codeclone/surfaces/mcp/_session_workflow_mixin.py b/codeclone/surfaces/mcp/_session_workflow_mixin.py new file mode 100644 index 00000000..f50bb967 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_workflow_mixin.py @@ -0,0 +1,493 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Workflow-level orchestration for agent change control. + +``start_controlled_change`` and ``finish_controlled_change`` aggregate +atomic change-control steps into two workflow calls. They call existing +internal methods only — no new engine logic. + +Design invariants (phase-16 spec): +- No implicit ``analyze_repository``. +- No hidden boundary decisions. +- ``check`` before ``verify`` is mandatory (check writes state). +- Changed files resolved once from exactly one source. +- ``auto_clear`` only on ``accepted`` / ``accepted_with_external_changes``. +- Audit events are emitted by the internal methods, not duplicated here. +""" + +from __future__ import annotations + +from collections.abc import Sequence +from pathlib import Path +from typing import Final + +from . import _session_helpers as _helpers +from ._blast_radius import BlastRadiusResult +from ._intent import IntentRecord, IntentStatus +from ._patch_contract import PatchContractStatus +from ._session_claim_guard_mixin import _MCPSessionClaimGuardMixin +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPRunRecord, + MCPServiceContractError, +) + +TRANSITIVE_SUMMARY_LIMIT: Final[int] = 10 + +VALID_BLAST_RADIUS_DEPTHS: Final[frozenset[str]] = frozenset( + {"direct", "transitive", "auto"} +) + +_ACCEPTED_STATUSES: Final[frozenset[str]] = frozenset( + { + PatchContractStatus.ACCEPTED.value, + PatchContractStatus.ACCEPTED_EXTERNAL.value, + } +) + + +class _MCPSessionWorkflowMixin(_MCPSessionClaimGuardMixin): + """Workflow orchestration over atomic change-control primitives.""" + + _runs: CodeCloneMCPRunStore + _active_intents: dict[str, IntentRecord] + + # ------------------------------------------------------------------ + # start_controlled_change + # ------------------------------------------------------------------ + + def start_controlled_change( + self, + *, + root: str, + scope: dict[str, object], + intent: str, + expected_effects: Sequence[str] | None = None, + on_conflict: str | None = None, + strictness: str = "ci", + ttl_seconds: int | None = None, + blast_radius_depth: str = "auto", + ) -> dict[str, object]: + validated_depth = _validated_blast_radius_depth(blast_radius_depth) + root_path = _helpers._resolve_root(root) + + # 1. Workspace check + workspace = self._list_workspace_intents(root=root) + + # 2. Root-aware run resolution (not _runs.get(None) — multi-repo safe) + record = self._latest_run_for_root(root_path) + if record is None: + return { + "status": "needs_analysis", + "intent_id": None, + "edit_allowed": False, + "root": str(root_path), + "message": ( + "No analysis run available for this root. " + "Call analyze_repository first." + ), + "workspace": _workspace_summary(workspace), + } + + # 3. Declare intent + declare_payload = self._declare_change_intent( + run_id=record.run_id, + scope=scope, + intent=intent, + expected_effects=expected_effects, + ttl_seconds=ttl_seconds, + on_conflict=on_conflict, + ) + + intent_id = str(declare_payload.get("intent_id", "")) + status = str(declare_payload.get("status", "")) + + # Queued: no blast radius or budget + if status == IntentStatus.QUEUED.value: + return { + "intent_id": intent_id, + "status": "queued", + "run_id": _helpers._short_run_id(record.run_id), + "blocked_by": declare_payload.get("blocked_by", []), + "queue_position": declare_payload.get("queue_position", 1), + "before_run_pinned": declare_payload.get("before_run_pinned", False), + "edit_allowed": False, + "message": ( + "Intent queued behind active workspace intent. " + "Do not edit until promoted." + ), + } + + # 4. Blast radius (full payload, not just declare's subset) + with self._state_lock: + active_intent = self._active_intents.get(intent_id) + if active_intent is None: + raise MCPServiceContractError( + f"Intent {intent_id} not found after declare." + ) + + blast_result = self._blast_radius_result( + record=record, + files=active_intent.scope.allowed_paths, + depth="direct", + forbidden_patterns=active_intent.scope.forbidden, + ) + blast_payload = blast_result.to_payload() + + # 5. Transitive summary (auto-escalated or explicit) + transitive_summary = self._compute_transitive_summary( + record=record, + intent=active_intent, + blast_result=blast_result, + depth=validated_depth, + ) + if transitive_summary is not None: + blast_payload["transitive_summary"] = transitive_summary + + # 6. Budget + budget_payload = self._patch_contract_budget( + run_id=record.run_id, + intent_id=intent_id, + strictness=self._validated_strictness(strictness), + ) + + # 7. Compose response + return { + "intent_id": intent_id, + "status": "active", + "run_id": _helpers._short_run_id(record.run_id), + "workspace": _workspace_summary(workspace), + "blast_radius": blast_payload, + "budget": _budget_summary(budget_payload), + "scope": active_intent.scope.to_payload(), + "edit_allowed": True, + "message": self._start_message(blast_payload, budget_payload), + } + + # ------------------------------------------------------------------ + # finish_controlled_change + # ------------------------------------------------------------------ + + def finish_controlled_change( + self, + *, + intent_id: str, + changed_files: Sequence[str] | None = None, + diff_ref: str | None = None, + after_run_id: str | None = None, + review_text: str | None = None, + create_receipt: bool = True, + auto_clear: bool = True, + strictness: str = "ci", + ) -> dict[str, object]: + # 1. Resolve intent + record, active_intent = self._resolve_intent( + run_id=None, + intent_id=intent_id, + ) + + # Queued intents cannot be verified + if active_intent.status == IntentStatus.QUEUED: + return { + "intent_id": intent_id, + "status": "unverified", + "reason": "intent_not_active", + "scope_check": None, + "verification": None, + "claims": None, + "receipt": None, + "intent_cleared": False, + "user_action_required": False, + "next_step": ( + "Promote the queued intent before editing or verification." + ), + "message": ("Queued intent must be promoted before verification."), + } + + # 2. Resolve changed files — exactly one source + resolved_files = self._resolve_changed_files_once( + root_path=record.root, + changed_files=changed_files, + diff_ref=diff_ref, + ) + + # 3. Check (writes IntentRecord.check_result — required for receipt) + check_payload = self._check_change_intent( + run_id=None, + intent_id=intent_id, + diff_ref=None, + changed_files=resolved_files, + ) + check_status = str(check_payload.get("status", "")) + + # Expired intent + if check_status == IntentStatus.EXPIRED.value: + return { + "intent_id": intent_id, + "status": "expired", + "reason": "report_digest_mismatch", + "scope_check": check_payload, + "verification": None, + "claims": None, + "receipt": None, + "intent_cleared": False, + "user_action_required": False, + "next_step": ( + "Intent was declared against a different report. " + "Do not redeclare on the after-run — use the " + "original intent_id with the original before_run_id." + ), + "message": "Intent expired: report digest mismatch.", + } + + # 4. Scope violation — early exit + if check_status == IntentStatus.VIOLATED.value: + return { + "intent_id": intent_id, + "status": "violated", + "reason": "scope_violation", + "scope_check": check_payload, + "verification": None, + "claims": None, + "receipt": None, + "intent_cleared": False, + "user_action_required": True, + "next_step": ( + "Redeclare intent with expanded scope, or " + "remove the out-of-scope changes." + ), + "message": ("Patch touched files outside declared scope."), + } + + # 5. Verify (before_run_id auto-resolves from intent) + verify_payload = self._patch_contract_verify( + before_run_id=None, + after_run_id=after_run_id, + intent_id=intent_id, + strictness=self._validated_strictness(strictness), + diff_ref=None, + changed_files=resolved_files, + ) + verify_status = str(verify_payload.get("status", "")) + + # 6. Non-accepted verification — return without receipt/clear + if verify_status not in _ACCEPTED_STATUSES: + verify_reason = str(verify_payload.get("reason", "")) + return { + "intent_id": intent_id, + "status": verify_status, + "reason": verify_reason, + "scope_check": check_payload, + "verification": verify_payload, + "claims": None, + "receipt": None, + "intent_cleared": False, + "user_action_required": verify_status + == PatchContractStatus.VIOLATED.value, + "next_step": verify_payload.get("next_step"), + "message": str(verify_payload.get("message", "")), + } + + # 7. Claim validation (conditional) + claims_payload = self._conditional_claim_validation( + record=record, + verify_payload=verify_payload, + review_text=review_text, + ) + + # 8. Receipt (after claims, before clear) + receipt_payload: dict[str, object] | None = None + receipt_error: str | None = None + if create_receipt: + try: + receipt_payload = self.create_review_receipt( + run_id=record.run_id, + intent_id=intent_id, + ) + except MCPServiceContractError as exc: + receipt_error = str(exc) + + # 9. Auto-clear (only on accepted, only if receipt didn't fail) + intent_cleared = False + if auto_clear and verify_status in _ACCEPTED_STATUSES and receipt_error is None: + self._clear_change_intent(intent_id=intent_id) + intent_cleared = True + + # 10. Compose response + result: dict[str, object] = { + "intent_id": intent_id, + "status": verify_status, + "reason": verify_payload.get("reason"), + "scope_check": check_payload, + "verification": verify_payload, + "claims": claims_payload, + "receipt": receipt_payload, + "intent_cleared": intent_cleared, + "user_action_required": False, + "message": self._finish_message( + verify_status=verify_status, + intent_cleared=intent_cleared, + receipt_error=receipt_error, + ), + } + if receipt_error is not None: + result["receipt_error"] = receipt_error + return result + + # ------------------------------------------------------------------ + # Internal helpers (no new engine logic) + # ------------------------------------------------------------------ + + def _latest_run_for_root(self, root_path: Path) -> MCPRunRecord | None: + """Find the latest run matching the requested root (root-safe).""" + resolved = root_path.resolve() + latest: MCPRunRecord | None = None + for record in self._runs.records(): + if record.root == resolved: + latest = record + return latest + + def _resolve_changed_files_once( + self, + *, + root_path: Path, + changed_files: Sequence[str] | None, + diff_ref: str | None, + ) -> tuple[str, ...]: + """Resolve changed files from exactly one source. + + Contract: providing both or neither is a contract error. + ``diff_ref`` is resolved here and never passed further. + """ + has_files = changed_files is not None and len(changed_files) > 0 + has_ref = diff_ref is not None and str(diff_ref).strip() != "" + if has_files and has_ref: + raise MCPServiceContractError( + "finish_controlled_change requires exactly one of " + "changed_files or diff_ref, not both." + ) + if not has_files and not has_ref: + raise MCPServiceContractError( + "finish_controlled_change requires changed_files or diff_ref." + ) + if has_ref: + return self._git_diff_paths(root_path=root_path, git_diff_ref=str(diff_ref)) + assert changed_files is not None + return self._normalize_changed_paths(root_path=root_path, paths=changed_files) + + def _compute_transitive_summary( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + blast_result: BlastRadiusResult, + depth: str, + ) -> dict[str, object] | None: + """Compute bounded transitive summary when appropriate.""" + needs_transitive = depth == "transitive" or ( + depth == "auto" and blast_result.radius_level == "high" + ) + if not needs_transitive: + return None + + transitive_result = self._blast_radius_result( + record=record, + files=intent.scope.allowed_paths, + depth="transitive", + forbidden_patterns=intent.scope.forbidden, + ) + all_transitive = transitive_result.transitive_dependents + shown = min(len(all_transitive), TRANSITIVE_SUMMARY_LIMIT) + return { + "total": len(all_transitive), + "shown": shown, + "truncated": shown < len(all_transitive), + "top_paths": list(all_transitive[:TRANSITIVE_SUMMARY_LIMIT]), + } + + def _conditional_claim_validation( + self, + *, + record: MCPRunRecord, + verify_payload: dict[str, object], + review_text: str | None, + ) -> dict[str, object] | None: + """Run claim validation only when both conditions are met.""" + if not review_text: + return None + if not verify_payload.get("claim_validation_recommended"): + return None + return self.validate_review_claims( + text=review_text, + run_id=record.run_id, + ) + + @staticmethod + def _start_message( + blast_payload: dict[str, object], + budget_payload: dict[str, object], + ) -> str: + parts: list[str] = ["Intent active."] + radius_level = str(blast_payload.get("radius_level", "low")) + if radius_level == "high": + parts.append("Blast radius is high — review transitive summary.") + gate = budget_payload.get("gate_preview") + if isinstance(gate, dict) and gate.get("would_fail"): + parts.append("Budget is already outside CI thresholds.") + else: + parts.append("Budget is within CI thresholds.") + return " ".join(parts) + + @staticmethod + def _finish_message( + *, + verify_status: str, + intent_cleared: bool, + receipt_error: str | None, + ) -> str: + if receipt_error is not None: + return ( + "Change verified but receipt creation failed. " + "Intent not cleared for retry." + ) + if intent_cleared: + return "Change verified and completed. Intent cleared." + return f"Change verified (status: {verify_status}). Intent active." + + +def _validated_blast_radius_depth(value: str) -> str: + if value not in VALID_BLAST_RADIUS_DEPTHS: + expected = ", ".join(sorted(VALID_BLAST_RADIUS_DEPTHS)) + raise MCPServiceContractError( + f"Invalid value for blast_radius_depth: {value!r}. " + f"Expected one of: {expected}." + ) + return value + + +def _workspace_summary(workspace: dict[str, object]) -> dict[str, object]: + """Extract workspace summary for the start response.""" + return { + "concurrent_intents": workspace.get("workspace_intents", []), + "total_agents": workspace.get("total_agents", 0), + "stale_count": workspace.get("stale_count", 0), + } + + +def _budget_summary(budget_payload: dict[str, object]) -> dict[str, object]: + """Extract budget-relevant fields for the start response.""" + return { + "strictness": budget_payload.get("strictness"), + "budgets": budget_payload.get("budgets"), + "current_state": budget_payload.get("current_state"), + "headroom": budget_payload.get("headroom"), + "gate_preview": budget_payload.get("gate_preview"), + "message": budget_payload.get("message"), + } + + +__all__ = ["_MCPSessionWorkflowMixin"] diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 841fb588..81331fb0 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -45,11 +45,12 @@ "bounded metrics drill-down, and prefer generate_pr_summary(format='markdown') " "unless machine JSON is required. Coverage join accepts external Cobertura " "XML as a current-run signal and does not become baseline truth. Pass an " - "absolute repository root to analysis tools. For file edits, call " - "manage_change_intent(action='list_workspace', root=...) before analysis, " - "then analyze, declare intent, inspect blast radius and patch budget, edit " - "within scope, renew intent lease before long blind windows, re-analyze, " - "verify, validate review claims, and clear intent. " + "absolute repository root to analysis tools. For file edits, prefer " + "start_controlled_change and finish_controlled_change for the complete " + "edit cycle. Use manage_change_intent for queue/promote/recover " + "operations. Atomic tools (get_blast_radius, check_patch_contract, " + "validate_review_claims, create_review_receipt) remain available for " + "advanced inspection and diagnostic use. " "If concurrent intents overlap, narrow scope or coordinate. This server never " "updates baselines and never mutates source files, analysis cache, or reports; " "it may write ephemeral workspace coordination state under " @@ -911,6 +912,78 @@ def mark_finding_reviewed( def list_reviewed_findings(run_id: str | None = None) -> dict[str, object]: return service.list_reviewed_findings(run_id=run_id) + @tool( + title="Start Controlled Change", + description=( + "Pre-edit workflow: check workspace for concurrent intents, " + "declare change intent with scope, compute blast radius " + "(direct + bounded transitive for high-radius changes), and " + "return patch budget — all in one call. Requires an existing " + "analysis run for the given root; call analyze_repository " + "first if needed. Returns intent_id for finish_controlled_change. " + "Does not run analysis implicitly." + ), + annotations=session_tool, + structured_output=True, + ) + def start_controlled_change( + root: str, + scope: dict[str, object], + intent: str, + expected_effects: list[str] | None = None, + on_conflict: str | None = None, + strictness: str = "ci", + ttl_seconds: int | None = None, + blast_radius_depth: str = "auto", + ) -> dict[str, object]: + return service.start_controlled_change( + root=root, + scope=scope, + intent=intent, + expected_effects=expected_effects, + on_conflict=on_conflict, + strictness=strictness, + ttl_seconds=ttl_seconds, + blast_radius_depth=blast_radius_depth, + ) + + @tool( + title="Finish Controlled Change", + description=( + "Post-edit workflow: verify scope compliance, run patch " + "contract verification, validate review claims (when " + "review_text provided and recommended), generate review " + "receipt, and clear intent — all in one call. Pass the " + "intent_id from start_controlled_change. For Python " + "structural or governance config changes, pass after_run_id " + "from a post-edit analyze_repository call. For docs-only or " + "non-Python changes, changed_files or diff_ref evidence is " + "sufficient without after_run_id." + ), + annotations=session_tool, + structured_output=True, + ) + def finish_controlled_change( + intent_id: str, + changed_files: list[str] | None = None, + diff_ref: str | None = None, + after_run_id: str | None = None, + review_text: str | None = None, + create_receipt: bool = True, + auto_clear: bool = True, + strictness: str = "ci", + ) -> dict[str, object]: + return service.finish_controlled_change( + intent_id=intent_id, + changed_files=changed_files, + diff_ref=diff_ref, + after_run_id=after_run_id, + review_text=review_text, + create_receipt=create_receipt, + auto_clear=auto_clear, + strictness=strictness, + ) + @tool( title="Manage Change Intent", description=( diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index 8a23f942..3fdff5fb 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -84,6 +84,18 @@ def validate_review_claims( ) -> dict[str, object]: return self._run_dict("validate_review_claims", **params) + def start_controlled_change( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("start_controlled_change", **params) + + def finish_controlled_change( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("finish_controlled_change", **params) + def generate_pr_summary( self: _RunDictService, **params: object, @@ -348,6 +360,26 @@ def _apply_public_method_signatures() -> None: _kwonly("run_id", "str | None", None), _kwonly("note", "str | None", None), ), + "start_controlled_change": ( + _kwonly("root", "str"), + _kwonly("scope", "dict[str, object]"), + _kwonly("intent", "str"), + _kwonly("expected_effects", "Sequence[str] | None", None), + _kwonly("on_conflict", "str | None", None), + _kwonly("strictness", "str", "ci"), + _kwonly("ttl_seconds", "int | None", None), + _kwonly("blast_radius_depth", "str", "auto"), + ), + "finish_controlled_change": ( + _kwonly("intent_id", "str"), + _kwonly("changed_files", "Sequence[str] | None", None), + _kwonly("diff_ref", "str | None", None), + _kwonly("after_run_id", "str | None", None), + _kwonly("review_text", "str | None", None), + _kwonly("create_receipt", "bool", True), + _kwonly("auto_clear", "bool", True), + _kwonly("strictness", "str", "ci"), + ), } self_param = inspect.Parameter("self", inspect.Parameter.POSITIONAL_OR_KEYWORD) for name, params in signature_specs.items(): diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 3c0959dd..f200497c 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -35,7 +35,6 @@ resolve_clone_baseline_state, resolve_metrics_baseline_state, ) -from ._session_claim_guard_mixin import _MCPSessionClaimGuardMixin from ._session_shared import ( _REPORT_DUMMY_PATH, DEFAULT_BLOCK_MIN_LOC, @@ -76,6 +75,7 @@ process, report, ) +from ._session_workflow_mixin import _MCPSessionWorkflowMixin __all__ = [ "DEFAULT_MCP_HISTORY_LIMIT", @@ -96,7 +96,7 @@ ] -class MCPSession(_MCPSessionClaimGuardMixin): +class MCPSession(_MCPSessionWorkflowMixin): def __init__( self, *, diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index b12d79b0..cec3474c 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -117,15 +117,22 @@ drill into one finding or one hotspot family. | `check_cohesion` | `run_id` or `root`, `path`, `max_results`, `detail_level` | Cohesion hotspot query | | `check_dead_code` | `run_id` or `root`, `path`, `min_severity`, `max_results`, `detail_level` | Dead code query | -### Change control tools +### Workflow tools (preferred) + +| Tool | Key parameters | Purpose | +|-------------------------------|---------------------------------------------------------------------------|---------------------------------------------------------------------------------------| +| `start_controlled_change` | `root`, `scope`, `intent`, `expected_effects`, `on_conflict`, `strictness`, `blast_radius_depth` | Pre-edit: workspace check + declare + blast radius + budget in one call. Returns `intent_id` for `finish`. Does not run analysis | +| `finish_controlled_change` | `intent_id`, `changed_files` or `diff_ref`, `after_run_id`, `review_text`, `create_receipt`, `auto_clear` | Post-edit: scope check + verify + claims + receipt + clear in one call. `after_run_id` required for Python structural / governance config profiles | + +### Atomic change control tools (advanced / diagnostic) | Tool | Key parameters | Purpose | |--------------------------|-------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------| -| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `on_conflict`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, promote, list_workspace, gc_workspace, recover, reset_workspace. `on_conflict="queue"` creates a queued intent when scope overlaps a foreign active. `action="promote"` transitions queued → active. Declare returns `workspace_relations` with forbidden-scope signals | -| `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: dependents, clone cohorts, do-not-touch, review context | -| `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Budget query or post-edit verification. Verify auto-resolves `before_run_id` from intent when omitted. Non-accepted responses include `next_step` hint and `claim_validation_recommended` flag | -| `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Deterministic audit artifact: provenance, scope, reviewed findings, patch status | -| `validate_review_claims` | `text`, `run_id`, `require_citations` | Citation-based overclaim detection against stored run semantics | +| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `on_conflict`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, promote, list_workspace, gc_workspace, recover, reset_workspace. Use for queue/promote/recover operations alongside workflow tools | +| `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: full transitive graph, custom include filters | +| `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Manual budget query or step-by-step verification | +| `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Manual receipt generation | +| `validate_review_claims` | `text`, `run_id`, `require_citations` | Standalone citation-based overclaim detection | ??? info "Blast radius: do_not_touch vs review_context" `do_not_touch` is limited to actionable negative context: baselines, diff --git a/docs/book/24-structural-change-controller.md b/docs/book/24-structural-change-controller.md index da415486..ca445194 100644 --- a/docs/book/24-structural-change-controller.md +++ b/docs/book/24-structural-change-controller.md @@ -509,3 +509,33 @@ MCP payload footprint: ~3,816 tokens (o200k_base, 7 tool calls) - The `codeclone/budget/` module never imports from `codeclone/surfaces/` or `codeclone/audit/`. Dependency direction: `audit -> budget`, never reverse. - Base `codeclone` never depends on `tiktoken`. The import is lazy and guarded. + +## Workflow consolidation + +The atomic change control workflow requires 7–11 MCP tool calls per edit +cycle. Two **workflow-level tools** aggregate these steps while preserving +the same evidence, state updates, and boundary checks: + +| Tool | Replaces | Calls | +|------|----------|-------| +| `start_controlled_change` | workspace check + declare + blast radius + budget | 1 instead of 4 | +| `finish_controlled_change` | scope check + verify + claims + receipt + clear | 1 instead of 4–6 | + +Workflow tools are orchestration shortcuts. They call the same internal +methods as the atomic tools and emit the same semantic audit events. +`analyze_repository` remains a separate explicit call — workflow tools +never run analysis implicitly. + +**Tool tiers:** + +- **Normal workflow:** `analyze_repository`, `start_controlled_change`, + `finish_controlled_change` — every edit cycle. +- **Queue/recovery:** `manage_change_intent` (promote, recover, reset, + renew) — multi-agent coordination, crash recovery. +- **Advanced/diagnostic:** `get_blast_radius`, `check_patch_contract`, + `validate_review_claims`, `create_review_receipt` — deep inspection, + step-by-step debugging. + +The same semantic audit events are preserved regardless of which +approach the agent uses. Atomic tools remain available for backward +compatibility and advanced use cases. diff --git a/docs/mcp.md b/docs/mcp.md index 25d9d8cb..29f8d7e9 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -402,6 +402,37 @@ analyze_changed_paths(changed_paths=[...] or git_diff_ref="HEAD~1") ### Change control +```mermaid +sequenceDiagram + participant Agent + participant MCP as CodeClone MCP + + Note over Agent,MCP: Primary workflow (workflow tools) + Agent->>MCP: analyze_repository + MCP-->>Agent: run_id + Agent->>MCP: start_controlled_change(scope, intent) + MCP-->>Agent: intent_id, blast_radius, budget + Note over Agent: edit files + opt Python structural / governance config + Agent->>MCP: analyze_repository + MCP-->>Agent: after_run_id + end + Agent->>MCP: finish_controlled_change(intent_id, changed_files, after_run_id?) + MCP-->>Agent: status, receipt, intent_cleared +``` + +!!! info "Tool tiers" + + | Tier | Tools | When to use | + |------|-------|-------------| + | Normal workflow | `analyze_repository`, `start_controlled_change`, `finish_controlled_change` | Every edit cycle | + | Queue/recovery | `manage_change_intent` (promote, recover, reset, renew) | Multi-agent coordination, crash recovery | + | Advanced/diagnostic | `get_blast_radius`, `check_patch_contract`, `validate_review_claims`, `create_review_receipt` | Deep inspection, step-by-step debugging | + +### Detailed atomic workflow + +For older MCP servers or step-by-step debugging: + ``` manage_change_intent(action="list_workspace") -> analyze_repository @@ -409,10 +440,9 @@ manage_change_intent(action="list_workspace") -> get_blast_radius(files=[...]) -> check_patch_contract(mode="budget") -> [edit within scope] - -> manage_change_intent(action="renew", intent_id=...) # optional: long edits -> analyze_repository # after-run -> manage_change_intent(action="check", intent_id=..., changed_files=[...]) - -> check_patch_contract(mode="verify", after_run_id=..., intent_id=...) # before_run_id auto-resolved + -> check_patch_contract(mode="verify", after_run_id=..., intent_id=...) -> validate_review_claims(text="...") # if claim_validation_recommended -> create_review_receipt -> manage_change_intent(action="clear") @@ -421,14 +451,11 @@ manage_change_intent(action="list_workspace") ### Multi-agent queue ``` -manage_change_intent(action="list_workspace") # foreign_active found - -> analyze_repository - -> manage_change_intent(action="declare", scope={...}, on_conflict="queue") # queued behind foreign +start_controlled_change(scope={...}, on_conflict="queue") # queued behind foreign -> [wait for foreign intent to clear] -> manage_change_intent(action="promote", intent_id=...) # queued → active - -> get_blast_radius(files=[...]) - -> check_patch_contract(mode="budget") - -> [edit within scope, then verify as normal] + -> [edit within scope] + -> finish_controlled_change(intent_id=..., changed_files=[...]) # verify + clear ``` ### Coverage review diff --git a/plugins/codeclone/skills/codeclone-change-control/SKILL.md b/plugins/codeclone/skills/codeclone-change-control/SKILL.md index 3a155fbc..7b4db1eb 100644 --- a/plugins/codeclone/skills/codeclone-change-control/SKILL.md +++ b/plugins/codeclone/skills/codeclone-change-control/SKILL.md @@ -30,129 +30,123 @@ without change control after this skill is selected. ## Rules - Use MCP tools only when invoked through the CodeClone plugin. -- Call `manage_change_intent(action="list_workspace", root=...)` before - analysis when the connected server supports it. -- If no latest MCP run exists after the workspace check, call - `analyze_repository` yourself before declaring intent. -- Declare intent before editing. -- Do not silently expand scope. +- For workflow tools, `start_controlled_change` performs workspace + coordination. For atomic fallback, call + `manage_change_intent(action="list_workspace", root=...)` before + analysis when supported. +- If no valid recent run exists for the same absolute root, call + `analyze_repository` before `start_controlled_change`. +- Declare intent before editing; in the primary workflow this means + `start_controlled_change` must return `status: "active"` before edits. +- If the fix requires files outside declared scope, stop before editing + them. Get user approval unless expansion was already explicitly + allowed, then call `start_controlled_change` again with the expanded + scope. Continue only when the expanded intent is active. Do not edit + extra files based on blast-radius context alone. - If concurrent workspace intents overlap your files, prefer `on_conflict="queue"` for follow-up work. Ask the user only when immediate editing is required or queue is not appropriate. - Treat blast-radius dependents and clone cohorts as review context, not permission to modify. - Treat `do_not_touch` as a boundary unless the user explicitly expands scope. + Escalate to user only if the edit requires touching them. - Treat `review_context` as context, not an edit ban. -- Do not update baselines, analysis cache, or generated reports as part of a - functional change. +- Do not update baselines, CodeClone state/cache, analysis cache, canonical + reports, or generated state as part of a functional change. - Do not fall back to CLI or local report files. - CodeClone is the source of truth — do not reinterpret findings independently. - Never auto-suppress findings or mutate CodeClone baseline state. - Run routine controller steps automatically. Queue blocked follow-up work automatically — do not ask before queueing. Ask the user only when: scope - expansion is needed, a `do_not_touch` path must be touched, patch contract - returned `violated` or `unverified`, or baseline/cache/generated state would - be modified. + expansion is needed and was not already explicitly allowed, a `do_not_touch` + path must be touched, patch contract returned `violated` or returned + `unverified` and the agent cannot execute the deterministic `next_step`, or + baseline/CodeClone state/cache/generated state would be modified. ## Workflow +### Primary workflow + ``` -manage_change_intent(action="list_workspace", root=...) -→ analyze_repository # before-run -→ manage_change_intent(action="declare") # intent bound to before-run -→ get_blast_radius -→ check_patch_contract(mode="budget") +analyze_repository # before-run +→ start_controlled_change(root=..., scope={...}, intent="...") → edit declared files -→ analyze_repository # after-run (skip for docs-only) -→ manage_change_intent(action="check", intent_id=..., changed_files=[...]) -→ check_patch_contract(mode="verify", after_run_id=..., intent_id=...) # before_run_id auto-resolved -→ validate_review_claims # skip only if claim_validation_recommended is explicitly false -→ create_review_receipt -→ manage_change_intent(action="clear") +→ analyze_repository # after-run (profile-dependent) +→ finish_controlled_change(intent_id=..., changed_files=[...], after_run_id=...) ``` -The intent stays bound to the before-run. After re-analyze, pass `intent_id` -explicitly to `check` and `verify`; without it, `_resolve_intent` resolves by -latest run id and misses the intent. Do not redeclare on the after-run: -`verify` compares the intent's `report_digest` against the before-run, and a -redeclared intent would cause an `expired` mismatch. Use `diff_ref=...` instead -of `changed_files=[...]` when the changed set should come from git. +Use this workflow when the connected MCP server supports +`start_controlled_change` and `finish_controlled_change`. -Older MCP servers may not support `list_workspace`, `validate_review_claims`, -or `create_review_receipt`. These legacy-compatible steps may be skipped when -unavailable, and the summary must say so explicitly. Do not skip core edit -control steps: `analyze_repository`, `declare`, `check`, and `verify`. Keep -the pre-edit `run_id` as `before_run_id`; verify against the explicit -after-run produced after the edit. +`start_controlled_change` returns workspace state, blast radius (direct +dependents, structural risk, do-not-touch, review context), and patch +budget in a single call. If `status: "needs_analysis"`, call +`analyze_repository` first. -## Workspace check +`finish_controlled_change` handles scope check, patch verification, +claim validation, review receipt, and intent cleanup. If +`user_action_required: true`, stop and follow the `next_step` hint. -Before analysis, call: +Workflow profiles determine which steps are needed: -``` -manage_change_intent(action="list_workspace", root="/absolute/repo") -``` +- **Python structural / governance config**: + `analyze` → `start` → edit → `analyze` → `finish(after_run_id=...)` +- **Documentation-only / non-Python**: + `analyze` → `start` → edit → `finish(changed_files=[...])` + For `non_python_patch`, report controller-stated limitations and do not + present the result as full structural verification. -If it returns active intents from other agents, compare their `scope` to your -planned files. A hard overlap means another agent claimed the same primary file. -A soft overlap means your primary file is in another agent's related context, or -the reverse. +Do not mix workflow and atomic verification paths in the same edit +cycle. Queue/promote/recover via `manage_change_intent` is allowed. -Do not ask the user before queueing blocked follow-up work that can wait. -Prefer `on_conflict="queue"` in the declare step to queue behind the foreign -intent. Ask the user only if immediate editing is required, recovery/reset is -needed, or a `do_not_touch` path must be touched: +### Queue/promote workflow -``` -manage_change_intent(action="declare", scope={...}, on_conflict="queue") -``` - -A queued intent does not own scope, does not pin the before-run, and cannot pass -verification. You may call `check_patch_contract(mode="budget")` on a queued -intent for planning, but it returns `edit_allowed=false`. When the foreign -intent clears, promote it: - -``` -manage_change_intent(action="promote", intent_id=...) -``` +When `start_controlled_change` returns `status: "queued"`: -If promote returns `status="queued"` with `blocking_count`, the foreign intent is -still active — wait and retry. If it returns `reason="before_run_evicted"`, -re-analyze and redeclare the intent. +1. Do not edit until promoted. +2. Wait for the foreign intent to clear. +3. `manage_change_intent(action="promote", intent_id=...)` + — edit only after promote returns `status: "active"` + — if `before_run_evicted`: re-analyze and re-start -## Legacy workflow +### Atomic workflow (fallback) -Use this only when `list_workspace` is unavailable in the connected MCP server: +Use the atomic workflow only when `start_controlled_change` or +`finish_controlled_change` are unavailable, or for advanced operations +(queue management, recovery, step-by-step debugging): ``` -analyze_repository +manage_change_intent(action="list_workspace", root=...) +→ analyze_repository → manage_change_intent(action="declare") → get_blast_radius → check_patch_contract(mode="budget") → edit declared files → analyze_repository → manage_change_intent(action="check", intent_id=..., changed_files=[...]) -→ check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) +→ check_patch_contract(mode="verify", after_run_id=..., intent_id=...) → validate_review_claims → create_review_receipt -→ manage_change_intent(action="clear") # if supported +→ manage_change_intent(action="clear") ``` -## Intent first +Older MCP servers may not support `start_controlled_change`, +`finish_controlled_change`, `validate_review_claims`, or +`create_review_receipt`. Legacy-compatible steps may be skipped when +unavailable, and the summary must say so explicitly. -Before editing, call: +## Intent first -``` -manage_change_intent(action="declare") -``` +Before editing, call `start_controlled_change`. It declares the intent, +returns workspace coordination state, blast radius, and patch budget. -Declare: +Declare scope includes: -- intended files -- allowed related files -- forbidden files -- short intent -- expected effects +- intended files (`allowed_files`) +- allowed related files (`allowed_related`) +- forbidden files (`forbidden`) +- short intent description +- expected structural effects Example expected effects: @@ -161,76 +155,84 @@ Example expected effects: - no dependency cycle - no baseline update +Use `manage_change_intent(action="declare")` only in the atomic fallback +workflow or when explicitly following a controller-provided recovery path. + ## Scope expansion If the fix requires a file outside declared scope: 1. stop; 2. explain why the extra file is needed; -3. redeclare intent with the expanded scope; -4. continue only after the new intent is active. +3. get user approval unless the user already explicitly allowed expansion; +4. call `start_controlled_change` again with the expanded scope to get + a fresh intent with updated blast radius and budget; +5. continue only after the expanded intent is active. A patch that fixes the issue but expands scope silently is a failed patch. +Do not edit extra files based on blast-radius context alone. ## Blast radius -Use: +`start_controlled_change` returns blast radius context in its response: +direct dependents, clone cohort members, structural risk signals, +do-not-touch paths, and review context. When the radius is high, a +bounded transitive summary is also included. -``` -get_blast_radius -``` +Use a separate `get_blast_radius(depth="transitive")` call only when +the bounded summary is insufficient and you need the full transitive +dependency graph. -Read the response this way: +Read the blast radius response this way: -- `direct_dependents` / `transitive_dependents`: review before changing public - behavior +- `direct_dependents`: review before changing public behavior - `clone_cohort_members`: comparison context, not automatic edit targets - `structural_risk`: risk context for review priority -- `do_not_touch`: paths that require explicit approval or a separate workflow +- `do_not_touch`: paths that require explicit approval; escalate to user + only if the edit requires touching them - `review_context`: supporting context, not a ban +- `transitive_summary`: downstream risk awareness (when present) ## Patch budget -Before editing, call: +Budget is included in the `start_controlled_change` response. Review that +budget before editing. Do not introduce new clone groups, dead code, +dependency cycles, API breaks, or baseline changes unless explicitly allowed. -``` -check_patch_contract(mode="budget") -``` - -Use the returned budget as the edit boundary. Do not introduce new clone groups, -dead code, dependency cycles, API breaks, or baseline changes unless explicitly -allowed. +Use `check_patch_contract(mode="budget")` only in the atomic fallback +workflow or for standalone planning, such as planning around a queued +intent. Budget on a queued intent is advisory and does not grant edit +permission. ## Patch verification -After editing, run analysis again, then pass the original `intent_id` -explicitly: +After editing, call `finish_controlled_change`: ``` -manage_change_intent(action="check", intent_id=..., changed_files=[...]) -check_patch_contract(mode="verify", intent_id=..., after_run_id=...) +finish_controlled_change( + intent_id=..., + changed_files=[...], # or diff_ref=... + after_run_id=..., # required for python_structural / governance_config + review_text="...", # optional, for claim validation +) ``` -`before_run_id` auto-resolves from the intent record when `intent_id` is -provided. `after_run_id` is required only for `python_structural` and -`governance_config` profiles. For `documentation_only` and `non_python_patch`, -pass `changed_files` or `diff_ref` evidence and omit `after_run_id`. +The tool handles: scope check, patch contract verification, claim +validation, review receipt generation, and intent cleanup. -Use `diff_ref=...` instead of `changed_files=[...]` when the changed set should -come from git. +If the result is `user_action_required: true`, read the `next_step` +hint and follow it. Do not claim the patch is verified. -If the result is `unverified` or `violated`, read the `next_step` hint and -follow it. Do not claim the patch is verified. Do not invent a different -recovery path — the hint is deterministic and authoritative. - -If `claim_validation_recommended` is `true`, call `validate_review_claims` -before writing a summary. If it is explicitly `false`, skip claim validation. +If the result is `status: "unverified"`, the `next_step` hint tells +you what is missing (usually `after_run_id` for Python changes). ## Verification profiles The controller derives a **verification profile** from actual changed files -during `check_patch_contract(mode="verify")`. The profile determines which -structural checks apply. The agent does not choose the profile. +during `finish_controlled_change` (through the underlying verify path), or +directly during `check_patch_contract(mode="verify")` in the atomic workflow. +The profile determines which structural checks apply. The agent does not +choose the profile. | Profile | When | `after_run` required | Structural checks | |-------------------------|-------------------------------|----------------------|-------------------| @@ -257,11 +259,13 @@ Rules: ## Claim discipline -When writing a summary and `claim_validation_recommended` was `true`, call: +In the primary workflow, pass `review_text` to `finish_controlled_change` +when you want final summary claims validated. If claim validation is +recommended and `review_text` is provided, `finish` runs claim validation +and returns the result. -``` -validate_review_claims -``` +Use `validate_review_claims` directly only in the atomic fallback workflow +or when re-validating changed review text after `finish`. Do not claim: @@ -273,11 +277,10 @@ Do not claim: ## Review receipt -At the end, call: - -``` -create_review_receipt -``` +In the primary workflow, `finish_controlled_change` creates the review +receipt when `create_receipt=true` (default). Do not call +`create_review_receipt` separately unless using the atomic fallback +workflow or manually regenerating a receipt. The final user summary should include: @@ -286,23 +289,23 @@ The final user summary should include: - blast radius summary - patch contract status - remaining human decisions -- receipt location or payload, if returned +- receipt content, if returned in finish response ## Success criteria The task is complete only when: -- intent was declared before editing; if queued, it was promoted before editing -- blast radius was inspected +- `start_controlled_change` returned an active intent before editing; + if queued, it was promoted before editing +- blast radius was inspected (included in start response) - edits stayed inside declared scope, or expansion was explicit -- patch contract was checked: either an after-run was created (Python - structural, governance config) or verify derived a profile that does not - require it (documentation-only, non-Python) -- baseline/cache/generated state was not changed accidentally -- claims were validated when `claim_validation_recommended` was explicitly - `true` in the controller response, not skipped by agent judgment -- a review receipt was created when the server supports it; if unsupported, - the final summary states that receipt creation was unavailable +- `finish_controlled_change` returned `status: "accepted"` or + `"accepted_with_external_changes"`; `after_run_id` was provided when + required by the verification profile +- `intent_cleared` is `true` in the finish response +- baseline, CodeClone state/cache, and generated reports were not changed + accidentally +- if finish returned claims warnings, they were reported ## Non-goals diff --git a/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md b/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md index 3a155fbc..7b4db1eb 100644 --- a/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md +++ b/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md @@ -30,129 +30,123 @@ without change control after this skill is selected. ## Rules - Use MCP tools only when invoked through the CodeClone plugin. -- Call `manage_change_intent(action="list_workspace", root=...)` before - analysis when the connected server supports it. -- If no latest MCP run exists after the workspace check, call - `analyze_repository` yourself before declaring intent. -- Declare intent before editing. -- Do not silently expand scope. +- For workflow tools, `start_controlled_change` performs workspace + coordination. For atomic fallback, call + `manage_change_intent(action="list_workspace", root=...)` before + analysis when supported. +- If no valid recent run exists for the same absolute root, call + `analyze_repository` before `start_controlled_change`. +- Declare intent before editing; in the primary workflow this means + `start_controlled_change` must return `status: "active"` before edits. +- If the fix requires files outside declared scope, stop before editing + them. Get user approval unless expansion was already explicitly + allowed, then call `start_controlled_change` again with the expanded + scope. Continue only when the expanded intent is active. Do not edit + extra files based on blast-radius context alone. - If concurrent workspace intents overlap your files, prefer `on_conflict="queue"` for follow-up work. Ask the user only when immediate editing is required or queue is not appropriate. - Treat blast-radius dependents and clone cohorts as review context, not permission to modify. - Treat `do_not_touch` as a boundary unless the user explicitly expands scope. + Escalate to user only if the edit requires touching them. - Treat `review_context` as context, not an edit ban. -- Do not update baselines, analysis cache, or generated reports as part of a - functional change. +- Do not update baselines, CodeClone state/cache, analysis cache, canonical + reports, or generated state as part of a functional change. - Do not fall back to CLI or local report files. - CodeClone is the source of truth — do not reinterpret findings independently. - Never auto-suppress findings or mutate CodeClone baseline state. - Run routine controller steps automatically. Queue blocked follow-up work automatically — do not ask before queueing. Ask the user only when: scope - expansion is needed, a `do_not_touch` path must be touched, patch contract - returned `violated` or `unverified`, or baseline/cache/generated state would - be modified. + expansion is needed and was not already explicitly allowed, a `do_not_touch` + path must be touched, patch contract returned `violated` or returned + `unverified` and the agent cannot execute the deterministic `next_step`, or + baseline/CodeClone state/cache/generated state would be modified. ## Workflow +### Primary workflow + ``` -manage_change_intent(action="list_workspace", root=...) -→ analyze_repository # before-run -→ manage_change_intent(action="declare") # intent bound to before-run -→ get_blast_radius -→ check_patch_contract(mode="budget") +analyze_repository # before-run +→ start_controlled_change(root=..., scope={...}, intent="...") → edit declared files -→ analyze_repository # after-run (skip for docs-only) -→ manage_change_intent(action="check", intent_id=..., changed_files=[...]) -→ check_patch_contract(mode="verify", after_run_id=..., intent_id=...) # before_run_id auto-resolved -→ validate_review_claims # skip only if claim_validation_recommended is explicitly false -→ create_review_receipt -→ manage_change_intent(action="clear") +→ analyze_repository # after-run (profile-dependent) +→ finish_controlled_change(intent_id=..., changed_files=[...], after_run_id=...) ``` -The intent stays bound to the before-run. After re-analyze, pass `intent_id` -explicitly to `check` and `verify`; without it, `_resolve_intent` resolves by -latest run id and misses the intent. Do not redeclare on the after-run: -`verify` compares the intent's `report_digest` against the before-run, and a -redeclared intent would cause an `expired` mismatch. Use `diff_ref=...` instead -of `changed_files=[...]` when the changed set should come from git. +Use this workflow when the connected MCP server supports +`start_controlled_change` and `finish_controlled_change`. -Older MCP servers may not support `list_workspace`, `validate_review_claims`, -or `create_review_receipt`. These legacy-compatible steps may be skipped when -unavailable, and the summary must say so explicitly. Do not skip core edit -control steps: `analyze_repository`, `declare`, `check`, and `verify`. Keep -the pre-edit `run_id` as `before_run_id`; verify against the explicit -after-run produced after the edit. +`start_controlled_change` returns workspace state, blast radius (direct +dependents, structural risk, do-not-touch, review context), and patch +budget in a single call. If `status: "needs_analysis"`, call +`analyze_repository` first. -## Workspace check +`finish_controlled_change` handles scope check, patch verification, +claim validation, review receipt, and intent cleanup. If +`user_action_required: true`, stop and follow the `next_step` hint. -Before analysis, call: +Workflow profiles determine which steps are needed: -``` -manage_change_intent(action="list_workspace", root="/absolute/repo") -``` +- **Python structural / governance config**: + `analyze` → `start` → edit → `analyze` → `finish(after_run_id=...)` +- **Documentation-only / non-Python**: + `analyze` → `start` → edit → `finish(changed_files=[...])` + For `non_python_patch`, report controller-stated limitations and do not + present the result as full structural verification. -If it returns active intents from other agents, compare their `scope` to your -planned files. A hard overlap means another agent claimed the same primary file. -A soft overlap means your primary file is in another agent's related context, or -the reverse. +Do not mix workflow and atomic verification paths in the same edit +cycle. Queue/promote/recover via `manage_change_intent` is allowed. -Do not ask the user before queueing blocked follow-up work that can wait. -Prefer `on_conflict="queue"` in the declare step to queue behind the foreign -intent. Ask the user only if immediate editing is required, recovery/reset is -needed, or a `do_not_touch` path must be touched: +### Queue/promote workflow -``` -manage_change_intent(action="declare", scope={...}, on_conflict="queue") -``` - -A queued intent does not own scope, does not pin the before-run, and cannot pass -verification. You may call `check_patch_contract(mode="budget")` on a queued -intent for planning, but it returns `edit_allowed=false`. When the foreign -intent clears, promote it: - -``` -manage_change_intent(action="promote", intent_id=...) -``` +When `start_controlled_change` returns `status: "queued"`: -If promote returns `status="queued"` with `blocking_count`, the foreign intent is -still active — wait and retry. If it returns `reason="before_run_evicted"`, -re-analyze and redeclare the intent. +1. Do not edit until promoted. +2. Wait for the foreign intent to clear. +3. `manage_change_intent(action="promote", intent_id=...)` + — edit only after promote returns `status: "active"` + — if `before_run_evicted`: re-analyze and re-start -## Legacy workflow +### Atomic workflow (fallback) -Use this only when `list_workspace` is unavailable in the connected MCP server: +Use the atomic workflow only when `start_controlled_change` or +`finish_controlled_change` are unavailable, or for advanced operations +(queue management, recovery, step-by-step debugging): ``` -analyze_repository +manage_change_intent(action="list_workspace", root=...) +→ analyze_repository → manage_change_intent(action="declare") → get_blast_radius → check_patch_contract(mode="budget") → edit declared files → analyze_repository → manage_change_intent(action="check", intent_id=..., changed_files=[...]) -→ check_patch_contract(mode="verify", before_run_id=..., after_run_id=..., intent_id=...) +→ check_patch_contract(mode="verify", after_run_id=..., intent_id=...) → validate_review_claims → create_review_receipt -→ manage_change_intent(action="clear") # if supported +→ manage_change_intent(action="clear") ``` -## Intent first +Older MCP servers may not support `start_controlled_change`, +`finish_controlled_change`, `validate_review_claims`, or +`create_review_receipt`. Legacy-compatible steps may be skipped when +unavailable, and the summary must say so explicitly. -Before editing, call: +## Intent first -``` -manage_change_intent(action="declare") -``` +Before editing, call `start_controlled_change`. It declares the intent, +returns workspace coordination state, blast radius, and patch budget. -Declare: +Declare scope includes: -- intended files -- allowed related files -- forbidden files -- short intent -- expected effects +- intended files (`allowed_files`) +- allowed related files (`allowed_related`) +- forbidden files (`forbidden`) +- short intent description +- expected structural effects Example expected effects: @@ -161,76 +155,84 @@ Example expected effects: - no dependency cycle - no baseline update +Use `manage_change_intent(action="declare")` only in the atomic fallback +workflow or when explicitly following a controller-provided recovery path. + ## Scope expansion If the fix requires a file outside declared scope: 1. stop; 2. explain why the extra file is needed; -3. redeclare intent with the expanded scope; -4. continue only after the new intent is active. +3. get user approval unless the user already explicitly allowed expansion; +4. call `start_controlled_change` again with the expanded scope to get + a fresh intent with updated blast radius and budget; +5. continue only after the expanded intent is active. A patch that fixes the issue but expands scope silently is a failed patch. +Do not edit extra files based on blast-radius context alone. ## Blast radius -Use: +`start_controlled_change` returns blast radius context in its response: +direct dependents, clone cohort members, structural risk signals, +do-not-touch paths, and review context. When the radius is high, a +bounded transitive summary is also included. -``` -get_blast_radius -``` +Use a separate `get_blast_radius(depth="transitive")` call only when +the bounded summary is insufficient and you need the full transitive +dependency graph. -Read the response this way: +Read the blast radius response this way: -- `direct_dependents` / `transitive_dependents`: review before changing public - behavior +- `direct_dependents`: review before changing public behavior - `clone_cohort_members`: comparison context, not automatic edit targets - `structural_risk`: risk context for review priority -- `do_not_touch`: paths that require explicit approval or a separate workflow +- `do_not_touch`: paths that require explicit approval; escalate to user + only if the edit requires touching them - `review_context`: supporting context, not a ban +- `transitive_summary`: downstream risk awareness (when present) ## Patch budget -Before editing, call: +Budget is included in the `start_controlled_change` response. Review that +budget before editing. Do not introduce new clone groups, dead code, +dependency cycles, API breaks, or baseline changes unless explicitly allowed. -``` -check_patch_contract(mode="budget") -``` - -Use the returned budget as the edit boundary. Do not introduce new clone groups, -dead code, dependency cycles, API breaks, or baseline changes unless explicitly -allowed. +Use `check_patch_contract(mode="budget")` only in the atomic fallback +workflow or for standalone planning, such as planning around a queued +intent. Budget on a queued intent is advisory and does not grant edit +permission. ## Patch verification -After editing, run analysis again, then pass the original `intent_id` -explicitly: +After editing, call `finish_controlled_change`: ``` -manage_change_intent(action="check", intent_id=..., changed_files=[...]) -check_patch_contract(mode="verify", intent_id=..., after_run_id=...) +finish_controlled_change( + intent_id=..., + changed_files=[...], # or diff_ref=... + after_run_id=..., # required for python_structural / governance_config + review_text="...", # optional, for claim validation +) ``` -`before_run_id` auto-resolves from the intent record when `intent_id` is -provided. `after_run_id` is required only for `python_structural` and -`governance_config` profiles. For `documentation_only` and `non_python_patch`, -pass `changed_files` or `diff_ref` evidence and omit `after_run_id`. +The tool handles: scope check, patch contract verification, claim +validation, review receipt generation, and intent cleanup. -Use `diff_ref=...` instead of `changed_files=[...]` when the changed set should -come from git. +If the result is `user_action_required: true`, read the `next_step` +hint and follow it. Do not claim the patch is verified. -If the result is `unverified` or `violated`, read the `next_step` hint and -follow it. Do not claim the patch is verified. Do not invent a different -recovery path — the hint is deterministic and authoritative. - -If `claim_validation_recommended` is `true`, call `validate_review_claims` -before writing a summary. If it is explicitly `false`, skip claim validation. +If the result is `status: "unverified"`, the `next_step` hint tells +you what is missing (usually `after_run_id` for Python changes). ## Verification profiles The controller derives a **verification profile** from actual changed files -during `check_patch_contract(mode="verify")`. The profile determines which -structural checks apply. The agent does not choose the profile. +during `finish_controlled_change` (through the underlying verify path), or +directly during `check_patch_contract(mode="verify")` in the atomic workflow. +The profile determines which structural checks apply. The agent does not +choose the profile. | Profile | When | `after_run` required | Structural checks | |-------------------------|-------------------------------|----------------------|-------------------| @@ -257,11 +259,13 @@ Rules: ## Claim discipline -When writing a summary and `claim_validation_recommended` was `true`, call: +In the primary workflow, pass `review_text` to `finish_controlled_change` +when you want final summary claims validated. If claim validation is +recommended and `review_text` is provided, `finish` runs claim validation +and returns the result. -``` -validate_review_claims -``` +Use `validate_review_claims` directly only in the atomic fallback workflow +or when re-validating changed review text after `finish`. Do not claim: @@ -273,11 +277,10 @@ Do not claim: ## Review receipt -At the end, call: - -``` -create_review_receipt -``` +In the primary workflow, `finish_controlled_change` creates the review +receipt when `create_receipt=true` (default). Do not call +`create_review_receipt` separately unless using the atomic fallback +workflow or manually regenerating a receipt. The final user summary should include: @@ -286,23 +289,23 @@ The final user summary should include: - blast radius summary - patch contract status - remaining human decisions -- receipt location or payload, if returned +- receipt content, if returned in finish response ## Success criteria The task is complete only when: -- intent was declared before editing; if queued, it was promoted before editing -- blast radius was inspected +- `start_controlled_change` returned an active intent before editing; + if queued, it was promoted before editing +- blast radius was inspected (included in start response) - edits stayed inside declared scope, or expansion was explicit -- patch contract was checked: either an after-run was created (Python - structural, governance config) or verify derived a profile that does not - require it (documentation-only, non-Python) -- baseline/cache/generated state was not changed accidentally -- claims were validated when `claim_validation_recommended` was explicitly - `true` in the controller response, not skipped by agent judgment -- a review receipt was created when the server supports it; if unsupported, - the final summary states that receipt creation was unavailable +- `finish_controlled_change` returned `status: "accepted"` or + `"accepted_with_external_changes"`; `after_run_id` was provided when + required by the verification profile +- `intent_cleared` is `true` in the finish response +- baseline, CodeClone state/cache, and generated reports were not changed + accidentally +- if finish returned claims warnings, they were reported ## Non-goals diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index eb45ecb3..b24b61dc 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -1156,6 +1156,88 @@ "type": "object" } }, + { + "name": "finish_controlled_change", + "input_schema": { + "properties": { + "intent_id": { + "title": "Intent Id", + "type": "string" + }, + "changed_files": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Changed Files" + }, + "diff_ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Diff Ref" + }, + "after_run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "After Run Id" + }, + "review_text": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Review Text" + }, + "create_receipt": { + "default": true, + "title": "Create Receipt", + "type": "boolean" + }, + "auto_clear": { + "default": true, + "title": "Auto Clear", + "type": "boolean" + }, + "strictness": { + "default": "ci", + "title": "Strictness", + "type": "string" + } + }, + "required": [ + "intent_id" + ], + "title": "finish_controlled_changeArguments", + "type": "object" + } + }, { "name": "generate_pr_summary", "input_schema": { @@ -1888,6 +1970,82 @@ "type": "object" } }, + { + "name": "start_controlled_change", + "input_schema": { + "properties": { + "root": { + "title": "Root", + "type": "string" + }, + "scope": { + "additionalProperties": true, + "title": "Scope", + "type": "object" + }, + "intent": { + "title": "Intent", + "type": "string" + }, + "expected_effects": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Expected Effects" + }, + "on_conflict": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "On Conflict" + }, + "strictness": { + "default": "ci", + "title": "Strictness", + "type": "string" + }, + "ttl_seconds": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Ttl Seconds" + }, + "blast_radius_depth": { + "default": "auto", + "title": "Blast Radius Depth", + "type": "string" + } + }, + "required": [ + "root", + "scope", + "intent" + ], + "title": "start_controlled_changeArguments", + "type": "object" + } + }, { "name": "validate_review_claims", "input_schema": { diff --git a/tests/fixtures/contract_snapshots/public_api_surface.json b/tests/fixtures/contract_snapshots/public_api_surface.json index 81877b88..943b19e0 100644 --- a/tests/fixtures/contract_snapshots/public_api_surface.json +++ b/tests/fixtures/contract_snapshots/public_api_surface.json @@ -1,11 +1,11 @@ { + "codeclone_exports": [ + "__version__" + ], "main_exports": [ "main" ], "main_signature": "() -> 'None'", - "codeclone_exports": [ - "__version__" - ], "mcp_service_public_methods": [ { "name": "analyze_changed_paths", @@ -55,6 +55,10 @@ "name": "evaluate_gates", "signature": "(self, request: 'MCPGateRequest') -> 'dict[str, object]'" }, + { + "name": "finish_controlled_change", + "signature": "(self, *, intent_id: 'str', changed_files: 'Sequence[str] | None' = None, diff_ref: 'str | None' = None, after_run_id: 'str | None' = None, review_text: 'str | None' = None, create_receipt: 'bool' = True, auto_clear: 'bool' = True, strictness: 'str' = 'ci') -> 'dict[str, object]'" + }, { "name": "generate_pr_summary", "signature": "(self, *, run_id: 'str | None' = None, changed_paths: 'Sequence[str]' = (), git_diff_ref: 'str | None' = None, format: 'PRSummaryFormat' = 'markdown') -> 'dict[str, object]'" @@ -115,6 +119,10 @@ "name": "shutdown_cleanup", "signature": "(self) -> 'None'" }, + { + "name": "start_controlled_change", + "signature": "(self, *, root: 'str', scope: 'dict[str, object]', intent: 'str', expected_effects: 'Sequence[str] | None' = None, on_conflict: 'str | None' = None, strictness: 'str' = 'ci', ttl_seconds: 'int | None' = None, blast_radius_depth: 'str' = 'auto') -> 'dict[str, object]'" + }, { "name": "validate_review_claims", "signature": "(self, *, text: 'str', run_id: 'str | None' = None, require_citations: 'bool' = True) -> 'dict[str, object]'" diff --git a/tests/test_codex_plugin.py b/tests/test_codex_plugin.py index 36df2b24..5a057719 100644 --- a/tests/test_codex_plugin.py +++ b/tests/test_codex_plugin.py @@ -139,9 +139,9 @@ def test_codex_plugin_skill_exists() -> None: change_control_skill_text, ( "name: codeclone-change-control", - "Declare intent before editing.", - 'check_patch_contract(mode="budget")', - "create_review_receipt", + "Declare intent before editing", + "start_controlled_change", + "finish_controlled_change", ), ) diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 76fbd7be..d862665c 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -115,11 +115,9 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: assert "default or pyproject-resolved thresholds for the first pass" in str( server.instructions ) - assert "manage_change_intent(action='list_workspace', root=...)" in str( - server.instructions - ) + assert "start_controlled_change" in str(server.instructions) + assert "finish_controlled_change" in str(server.instructions) assert ".cache/codeclone/intents/" in str(server.instructions) - assert "validate review claims" in str(server.instructions) tools = {tool.name: tool for tool in asyncio.run(server.list_tools())} assert set(tools) == { @@ -149,6 +147,8 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "mark_finding_reviewed", "list_reviewed_findings", "manage_change_intent", + "start_controlled_change", + "finish_controlled_change", } for name, tool in tools.items(): assert tool.annotations is not None @@ -186,6 +186,8 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "mark_finding_reviewed", "manage_change_intent", "clear_session_runs", + "start_controlled_change", + "finish_controlled_change", } ) assert tool.annotations.idempotentHint is True From e69c8bbbb8f64de1725adaa5701c57d6f2745d73 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:03:08 +0500 Subject: [PATCH 052/318] docs(mcp): refine workflow directives, fix doc audit divergences --- AGENTS.md | 91 +++++++++---- CHANGELOG.md | 127 +++++------------- CLAUDE.md | 11 +- codeclone/surfaces/mcp/_blast_radius.py | 4 +- docs/book/03-contracts-exit-codes.md | 2 +- docs/book/07-cache.md | 2 +- docs/book/08-report.md | 2 +- docs/book/15-health-score.md | 2 +- docs/book/20-mcp-interface.md | 2 +- docs/book/appendix/b-schema-layouts.md | 4 +- docs/getting-started.md | 2 +- docs/mcp.md | 6 +- mkdocs.yml | 2 + .../skills/codeclone-change-control/SKILL.md | 14 +- .../skills/codeclone-change-control/SKILL.md | 14 +- 15 files changed, 144 insertions(+), 141 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 4a8c1420..f2de0dd7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,7 +14,8 @@ If AGENTS.md and code diverge, follow code and update AGENTS.md accordingly. ## 1) Operating principles (non‑negotiable) 1. **Do not break CI contracts.** - - Treat baseline, cache, and report formats as **public APIs**. + - Treat baseline, analysis cache, canonical report formats, and documented + MCP tool payloads as **public APIs**. - Any contract change must be **versioned**, documented, and accompanied by tests. 2. **Determinism > cleverness.** @@ -34,12 +35,12 @@ If AGENTS.md and code diverge, follow code and update AGENTS.md accordingly. 6. **Fingerprint-adjacent optimization policy** - Performance work must not change AST normalization, fingerprint inputs, or clone identity semantics while - `FINGERPRINT_VERSION` remains unchanged. + `BASELINE_FINGERPRINT_VERSION` remains unchanged. - If a change in AST/core analysis can affect fingerprint bytes, clone identity, NEW vs KNOWN classification, or baseline compatibility semantics, it is not a routine optimization. It must be treated as an explicit fingerprint contract change and requires: - - `FINGERPRINT_VERSION` review or bump + - `BASELINE_FINGERPRINT_VERSION` review or bump - documentation updates - migration/release notes - explicit maintainer approval @@ -60,16 +61,22 @@ Key artifacts: - `codeclone.baseline.json` — trusted baseline snapshot (for CI comparisons) - `.cache/codeclone/cache.json` — analysis cache (integrity-checked) - `.cache/codeclone/report.html|report.json|report.md|report.sarif|report.txt` — reports -- `codeclone-mcp` — optional read-only MCP server (install via `codeclone[mcp]`) +- `codeclone-mcp` — optional MCP server: read-only with respect to source + files, baselines, canonical reports, and analysis cache; stateful only for + session-local review/controller state, ephemeral workspace intent + coordination, and optional audit trail (install via `codeclone[mcp]`) - `extensions/vscode-codeclone/` — stable VS Code extension as a native, read-only IDE client over `codeclone-mcp` - `extensions/claude-desktop-codeclone/` — stable Claude Desktop `.mcpb` bundle as a local install wrapper over `codeclone-mcp` - `plugins/codeclone/` + `.agents/plugins/marketplace.json` — stable Codex plugin as a native local discovery layer - over `codeclone-mcp`, with a bundled CodeClone review skill + over `codeclone-mcp`, with bundled CodeClone skills under `plugins/codeclone/skills/` (`codeclone-review`, + `codeclone-hotspots`, `codeclone-change-control`) - MCP runs are in-memory only. Review markers are session-local. Change intent truth is session-local, with optional ephemeral workspace coordination records under `.cache/codeclone/intents/`; none of this may leak into - baseline/cache/report artifacts. + baseline/cache/report artifacts. Optional audit trail is passive evidence + state and must not affect canonical report digests, baseline trust, cache + compatibility, or finding identity. - `docs/`, `mkdocs.yml`, `.github/workflows/docs.yml` — published documentation site and docs build pipeline --- @@ -82,7 +89,8 @@ Run these locally before proposing changes: uv run pre-commit run --all-files ``` -If you touched baseline/cache/report contracts, also run the repo’s audit runner (or the scenario script if present). +If you touched baseline/cache/report contracts or CLI/MCP audit surfaces, also exercise the CLI audit path +(`--audit` / `codeclone/surfaces/cli/audit.py`) or the relevant audit/MCP tests. If you touched `docs/`, `mkdocs.yml`, docs publishing workflow, or sample-report generation, also run: ```bash @@ -232,11 +240,13 @@ Reports come in: - SARIF (`--sarif`) - Text (`--text`) -MCP is a separate optional interface, not a report format. It must remain a -read-only agent layer over the same canonical report/baseline/cache contracts. -Session review markers and change intent truth are ephemeral MCP process state. -Workspace intent registry files under `.cache/codeclone/intents/` are advisory -coordination state only, not analysis cache or report truth. +MCP is a separate optional interface, not a report format. It must remain +read-only with respect to repository source, baselines, canonical reports, +generated reports, and analysis cache. Session-local controller state, +workspace intent records, and audit trail are allowed only through explicit +controller/audit contracts. Workspace intent registry files under +`.cache/codeclone/intents/` are advisory coordination state only, not analysis +cache or report truth. For file edits, agents should prefer the workflow tools `start_controlled_change` and `finish_controlled_change` — they aggregate @@ -307,6 +317,10 @@ No UI-only heuristics that affect gating. ## 8) How to propose changes (agent workflow) +For repository edits, follow `CLAUDE.md` / the active CodeClone change-control +skill first. This section describes what to report after the controlled change, +not a replacement workflow. + When you implement something: 1. **State the intent** (what user-visible issue does it solve?) @@ -343,6 +357,16 @@ Changed-scope flags are contract-sensitive: - `--diff-against` requires `--changed-only`. - `--paths-from-git-diff` implies `--changed-only`. +Controller and workspace query flags (terminal-only; see `docs/book/09-cli.md` and +`tests/fixtures/contract_snapshots/cli_help.txt`): + +- `--blast-radius`, `--patch-verify`, `--strictness` — patch/blast-radius query +- `--session-stats`, `--audit`, `--audit-json` — workspace/audit query (read-only; + `--audit` requires `audit_enabled=true` in effective config) + +Full flag inventory and combination rules: `docs/book/09-cli.md`, +`docs/book/04-config-and-defaults.md`. + If you introduce a new exit reason, document it and add tests. --- @@ -371,7 +395,9 @@ Before cutting a release: - Don’t embed suppressions into baseline unless explicitly designed as a versioned contract. - Don’t introduce nondeterministic ordering (dict iteration, set ordering, filesystem traversal without sort). - Don’t make the base `codeclone` install depend on optional MCP runtime packages. -- Don’t let MCP mutate baselines, source files, reports, or analysis cache data. +- Don’t let MCP mutate source files, baselines, canonical reports, generated + reports, or analysis cache data. Ephemeral controller state and audit trail + are allowed only through explicit controller/audit contracts. - Don’t let MCP re-synthesize design findings from raw metrics; read canonical `findings.groups.design` only. --- @@ -386,8 +412,9 @@ Architecture is layered, but grounded in current code (not aspirational diagrams `pyproject.toml` loading, and CLI > pyproject > defaults resolution. - **Core orchestration** (`codeclone/core/*`) owns bootstrap → discovery → worker processing → project metrics → report/gate integration. It does not own shell UX. -- **Analysis layer** (`codeclone/analysis/*`, `codeclone/blocks/*`, `codeclone/paths/*`, `codeclone/qualnames/*`) - parses source, normalizes AST/CFG facts, extracts units, and prepares deterministic analysis inputs. +- **Analysis layer** (`codeclone/analysis/*`, `codeclone/blocks/*`, `codeclone/paths/*`, `codeclone/qualnames/*`, + `codeclone/scanner/*`) parses source, normalizes AST/CFG facts, extracts units, and prepares deterministic analysis + inputs. - **Clone/finding derivation layer** (`codeclone/findings/*`, `codeclone/metrics/*`) groups clones and computes structural and quality signals from already-extracted facts. - **Domain/contracts layer** (`codeclone/models.py`, `codeclone/contracts/*`, `codeclone/domain/*`) defines typed @@ -399,8 +426,13 @@ Architecture is layered, but grounded in current code (not aspirational diagrams and deterministic projections. - **HTML/UI rendering** (`codeclone/report/html/*`) renders views from canonical report/meta facts. HTML is render-only. -- **MCP agent interface** (`codeclone/surfaces/mcp/*`) exposes the same pipeline/report contracts as a deterministic, - read-only MCP surface for AI agents and MCP-capable clients. +- **MCP agent interface** (`codeclone/surfaces/mcp/*`) exposes the same pipeline/report contracts as a deterministic + MCP surface for AI agents and MCP-capable clients, read-only with respect to repository artifacts and stateful only + for session-local controller/review state, workspace intent coordination, and optional audit. +- **Audit trail** (`codeclone/audit/*`) stores optional passive evidence (SQLite by default via + `codeclone/surfaces/cli/audit.py` / MCP audit emit). It must not affect canonical report digests, baseline trust, + cache compatibility, or finding identity. +- **Patch budget helpers** (`codeclone/budget/*`) provide shared budget estimation for CLI/MCP patch-verify flows. - **Documentation/publishing surface** (`docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) publishes contract docs and the live sample report. - **VS Code extension surface** (`extensions/vscode-codeclone/*`) is a native, workspace-only IDE client over @@ -408,7 +440,7 @@ Architecture is layered, but grounded in current code (not aspirational diagrams - **Claude Desktop bundle surface** (`extensions/claude-desktop-codeclone/*`) is a native `.mcpb` install wrapper for Claude Desktop that launches the same local `codeclone-mcp` server via local `stdio`. - **Codex plugin surface** (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) is a native local Codex plugin - over `codeclone-mcp`, with repo-local discovery metadata and a bundled CodeClone review skill. + over `codeclone-mcp`, with repo-local discovery metadata and bundled skills under `plugins/codeclone/skills/`. - **Tests-as-spec** (`tests/`) lock behavior, contracts, determinism, and architecture boundaries. Non-negotiable interpretation: @@ -431,13 +463,15 @@ Use this map to route changes to the right owner module. - `codeclone/surfaces/cli/workflow.py` — top-level CLI orchestration and exit routing. Add CLI control flow here, not in `main.py`. - `codeclone/surfaces/cli/*` — CLI support slices (startup, runtime, execution, post-run handling, summaries, - reports, changed-scope logic, baseline state, console helpers). Keep them orchestration/UX-focused. + reports, changed-scope logic, baseline state, audit rendering, console helpers). Keep them orchestration/UX-focused. - `codeclone/config/*` — parser construction, option specs/defaults, pyproject loading, config resolution. Do not duplicate option semantics elsewhere. - `codeclone/core/*` — canonical runtime pipeline and payload plumbing. Change integration flow here; do not move shell UX or HTML-only logic here. - `codeclone/analysis/*` — AST parsing, CFG/fingerprint preparation, declaration/reference collection, and unit - extraction. Change parsing/extraction semantics here; keep it independent from CLI/report/baseline UX. + extraction (`units.py`, `_module_walk.py`). Change parsing/extraction semantics here; keep it independent from + CLI/report/baseline UX. +- `codeclone/scanner/*` — Python file discovery helpers and module-name resolution used by core discovery. - `codeclone/findings/clones/grouping.py` + `codeclone/blocks/*` — clone grouping and block/segment mechanics. - `codeclone/findings/structural/detectors.py` — structural finding extraction/normalization policy; keep it factual and deterministic. @@ -467,6 +501,8 @@ Use this map to route changes to the right owner module. ephemeral workspace intent records under `.cache/codeclone/intents/`. - `codeclone/surfaces/mcp/server.py` — optional MCP launcher/server wiring, transport config, and MCP tool/resource registration; keep dependency loading lazy so base installs/CI do not require MCP runtime packages. +- `codeclone/audit/*` — audit event schema, validation, writer/reader; passive evidence only. +- `codeclone/budget/*` — patch/token budget estimation shared by CLI and MCP surfaces. - `tests/test_mcp_service.py`, `tests/test_mcp_server.py` — MCP contract and integration tests; run these when touching any MCP surface. - `codeclone/contracts/*` — version constants, schema types, exit enum, URLs, and typed exceptions. Treat as contract @@ -520,8 +556,8 @@ Inline suppressions are explicit local policy, not analysis truth. - inline comment on the declaration header closing line for multiline signatures - Binding is target-specific (`filepath`, `qualname`, declaration span, kind). No file-wide/global implicit scope. - Unknown/malformed directives are ignored safely; analysis must not fail because of suppression syntax issues. -- Current active semantic effect is dead-code suppression (`dead-code`) through `extractor.py` → - `DeadCandidate.suppressed_rules` → `metrics/dead_code.py`. +- Current active semantic effect is dead-code suppression (`dead-code`) through + `codeclone/analysis/_module_walk.py` → `DeadCandidate.suppressed_rules` → `codeclone/metrics/dead_code.py`. - Suppressed dead-code findings are excluded from active dead-code findings and health impact, but remain observable in report surfaces where implemented (JSON summary/details, text/markdown/html, CLI counters). - Suppressions must not silently alter unrelated finding families. @@ -538,9 +574,9 @@ If you change a contract-sensitive zone, route docs/tests/approval deliberately. | Cache schema/profile/integrity (`codeclone/cache/store.py`, `codeclone/cache/versioning.py`, `codeclone/cache/integrity.py`) | `docs/book/07-cache.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_cache.py`, pipeline/CLI cache integration tests | cache schema/status/profile compatibility semantics change | cache payload/version/status semantics change | | Canonical report JSON shape (`codeclone/report/document/*`, report projections) | `docs/book/08-report.md` (+ `docs/book/10-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | | CLI flags/help/exit behavior (`codeclone/main.py`, `codeclone/surfaces/cli/*`, `codeclone/config/*`, `codeclone/contracts/*`) | `docs/book/09-cli.md`, `docs/book/03-contracts-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | -| Fingerprint-adjacent analysis (`extractor/cfg/normalize/grouping`) | `docs/book/05-core-pipeline.md`, `docs/cfg.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | -| Suppression semantics/reporting (`codeclone/analysis/suppressions.py`, extractor dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/16-dead-code-contract.md`, `docs/book/08-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | -| MCP interface (`codeclone/surfaces/mcp/*`, packaging extra/launcher) | `README.md`, `docs/book/20-mcp-interface.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, plus CLI/package tests if launcher/install semantics change | tool/resource shapes, read-only semantics, optional-dependency packaging behavior change | public MCP tool names, resource URIs, launcher/install behavior, or response semantics change | +| Fingerprint-adjacent analysis (`codeclone/analysis/units.py`, `codeclone/analysis/_module_walk.py`, `codeclone/analysis/cfg.py`, `codeclone/analysis/normalizer.py`, `codeclone/findings/clones/grouping.py`) | `docs/book/05-core-pipeline.md`, `docs/cfg.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | +| Suppression semantics/reporting (`codeclone/analysis/suppressions.py`, `codeclone/analysis/_module_walk.py` dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/16-dead-code-contract.md`, `docs/book/08-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | +| MCP interface (`codeclone/surfaces/mcp/*`, packaging extra/launcher) | `README.md`, `docs/book/20-mcp-interface.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, `tests/fixtures/contract_snapshots/mcp_tool_schemas.json`, plus CLI/package tests if launcher/install semantics change | tool/resource shapes, workflow tool payloads, repository-read-only semantics, optional-dependency packaging behavior change | public MCP tool names, workflow tool payloads, resource URIs, launcher/install behavior, or response semantics change | | VS Code extension surface (`extensions/vscode-codeclone/*`) | `README.md`, `docs/book/21-vscode-extension.md`, `docs/vscode-extension.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `node --check extensions/vscode-codeclone/src/support.js`, `node --check extensions/vscode-codeclone/src/mcpClient.js`, `node --check extensions/vscode-codeclone/src/extension.js`, `node --test extensions/vscode-codeclone/test/*.test.js`, plus local extension-host smoke and package smoke when surface/manifest/assets change | command/view UX, trust/runtime model, source-first review flow, or packaging metadata change | documented commands/views/setup/trust behavior, packaged assets, or publish metadata change | | Claude Desktop bundle surface (`extensions/claude-desktop-codeclone/*`) | `docs/book/22-claude-desktop-bundle.md`, `docs/claude-desktop-bundle.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `node --check extensions/claude-desktop-codeclone/server/index.js`, `node --check extensions/claude-desktop-codeclone/src/launcher.js`, `node --check extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs`, `node --test extensions/claude-desktop-codeclone/test/*.test.js`, plus `.mcpb` build smoke | bundle install/runtime model, launcher UX, local-stdio constraints, or bundle metadata change | documented Claude Desktop install/setup/runtime behavior or packaged bundle semantics change | | Codex plugin surface (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) | `docs/book/23-codex-plugin.md`, `docs/codex-plugin.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `python3 -m json.tool plugins/codeclone/.codex-plugin/plugin.json`, `python3 -m json.tool plugins/codeclone/.mcp.json`, `python3 -m json.tool .agents/plugins/marketplace.json`, `tests/test_codex_plugin.py` | plugin discovery/runtime model, bundled MCP config, bundled skill behavior, or plugin metadata change | documented Codex plugin install/discovery/runtime behavior or plugin manifest/marketplace semantics change | @@ -578,7 +614,10 @@ Policy: - Cache schema/status/profile compatibility/integrity (`CACHE_VERSION` contract family). - Canonical report JSON schema/payload semantics (`REPORT_SCHEMA_VERSION` contract family). - Documented report projections and their machine/user-facing semantics (HTML/Markdown/SARIF/Text). -- Documented MCP launcher/install behavior, tool names, resource URIs, and read-only semantics. +- Documented MCP launcher/install behavior, tool names, resource URIs, and + repository-read-only semantics. +- Documented MCP workflow tools, verification profiles, workspace intent + coordination, queue/promote semantics, and review receipt payloads. - Session-local MCP review state semantics (`mark_finding_reviewed`, `exclude_reviewed`) as documented public behavior. - Documented VS Code extension behavior: commands, views, setup guidance, trusted-workspace model, and its baseline-aware triage workflow over MCP. diff --git a/CHANGELOG.md b/CHANGELOG.md index fd38c32f..defc90c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,105 +1,46 @@ # Changelog -## [2.1.0a2] - Unreleased +## [2.1.0a1] - Unreleased -### Added - -- Add `start_controlled_change` MCP tool — aggregates workspace check, - intent declaration, blast radius computation (direct + bounded transitive - for high-radius changes), and patch budget into a single pre-edit call. -- Add `finish_controlled_change` MCP tool — aggregates scope check, patch - verification, claim validation, review receipt, and intent cleanup into - a single post-edit call. - -### Changed - -- Agent workflow reduced from 7–11 MCP calls to 3–4 per edit cycle. - CLAUDE.md and plugin skills updated to prefer workflow tools. -- Atomic change control tools remain available for advanced/diagnostic - use and backward compatibility. - -## [2.1.0a1] - 2026-05-22 - -`2.1.0a1` opens the v2.1 alpha line for structural change control. +`2.1.0a1` opens the v2.1 alpha line with the structural change controller: +intent-first edit workflow, blast radius, patch verification, claim guard, +review receipts, and workflow consolidation tools. ### Added -- Add intent queue for multi-agent scope coordination. When scope overlaps a - foreign active intent, `manage_change_intent(action="declare", - on_conflict="queue")` creates a queued intent instead of reporting a conflict. - Queued intents are visible in workspace listings but do not own scope, do not - pin the before-run, and cannot pass patch verification. A separate - `manage_change_intent(action="promote")` transitions queued → active after - re-checking workspace conflicts, pinning the run, and renewing the lease. If - conflicts persist, promote returns `blocking_count` without state change. -- Add verify ergonomics: `check_patch_contract(mode="verify")` auto-resolves - `before_run_id` from the intent record when `intent_id` is provided but - `before_run_id` is omitted. Non-accepted verify responses include `next_step` - with an actionable hint for each failure reason and - `claim_validation_recommended` to advise whether `validate_review_claims` is - meaningful for the verification profile. -- Add `intent.queued`, `intent.promoted`, and `intent.queue_blocked` audit trail - events with compact payload handlers for MCP payload token budget tracking. -- Add MCP `get_blast_radius` as a deterministic pre-change projection over the - canonical report: direct dependents, clone cohorts, dependency-cycle - membership, coverage/risk signals, actionable do-not-touch paths, and - bounded review-only context. -- Add MCP `manage_change_intent` for session-local change intent lifecycle: - declare intended scope, inspect active intent, check actual changed files - against scope, and clear intent state. -- Add a workspace intent registry under `.cache/codeclone/intents/` so separate - MCP stdio processes can see advisory multi-agent edit intents before - declaring overlapping scope. -- Add MCP `check_patch_contract` with read-only `budget` and `verify` modes: - pre-edit gate budget/headroom, post-edit before/after comparison, gate - preview, intent-scope validation, and baseline-abuse signals. -- Add MCP `create_review_receipt` for deterministic markdown/JSON audit - artifacts that compose report provenance, intent scope, blast radius, - reviewed findings, structural delta, patch-contract status, human decision - points, and claims-not-made without mutating repository state. -- Add MCP `validate_review_claims` as a deterministic, citation-based claim - guard for review text. It flags overclaims such as Security Surfaces called - vulnerabilities, report-only families called CI failures, known findings - called new regressions, dead-code certainty despite runtime reachability - evidence, and fixes claimed before post-patch verification. -- Add verification profile classifier for MCP patch contract. The controller - derives verification depth from actual changed files: `python_structural`, - `documentation_only`, `governance_config`, `non_python_patch`, and - `state_artifact_change`. Documentation-only and non-Python patches verify - without `after_run_id` when diff evidence is provided. Review receipts - include the profile section with "not applicable" for skipped structural - checks. Claim guard warns when review text references structural - verification on a non-structural profile. -- Add CLI controller query modes: `--blast-radius FILE [FILE...]` for - terminal pre-change boundary review and `--patch-verify` for trusted-baseline - patch verification with `ci`, `strict`, and `relaxed` profiles. -- Add lease-aware workspace intent recovery for MCP change control. Intent - records now carry renewable ownership leases, `list_workspace` distinguishes - own/recoverable/foreign-active records, and `manage_change_intent` can - explicitly recover stale intents without killing another MCP process. +- Structural change controller for MCP: 28 tools total, including + `start_controlled_change` and `finish_controlled_change` workflow tools + that reduce the edit cycle from 7–11 MCP calls to 3–4. +- Change intent lifecycle (`manage_change_intent`): declare scope, check + changed files, clear intent, queue behind foreign agents with + `on_conflict="queue"`, promote queued intents, recover stale intents. +- Workspace intent registry under `.cache/codeclone/intents/` for + multi-agent coordination across separate MCP stdio processes. +- Blast radius projection (`get_blast_radius`): direct/transitive + dependents, clone cohorts, structural risk, do-not-touch boundaries. +- Patch contract (`check_patch_contract`): pre-edit budget and post-edit + verification with profile-aware depth (python_structural, + documentation_only, governance_config, non_python_patch, + state_artifact_change). +- Claim guard (`validate_review_claims`): citation-based overclaim + detection against canonical report semantics. +- Review receipts (`create_review_receipt`): deterministic audit + artifacts with provenance, scope, patch status, and claims-not-made. +- Verify ergonomics: auto-resolve `before_run_id` from intent, `next_step` + hints, `claim_validation_recommended` flag. +- Lease-aware intent recovery: renewable ownership leases, + own/recoverable/foreign-active classification, explicit recovery. +- CLI controller query modes: `--blast-radius` and `--patch-verify`. +- Audit trail events for intent lifecycle and token budget tracking. ### Internal -- Keep queued intents unpinned: active intents call `_runs.pin()` to prevent - eviction from bounded history, queued intents do not — pinning happens at - promotion. Conflict detection in `_detect_scope_state` skips records with - `status == "queued"` so queued records do not block active declares. -- Keep intent and blast-radius cache state in MCP process memory only; they do - not mutate source files, baselines, cache artifacts, reports, or canonical - report integrity. Workspace intent files are ephemeral coordination state, - not analysis cache or report truth. -- Keep patch-contract budget payloads explicit: disabled numeric thresholds are - `null` in MCP payloads, and boolean enforcement policies use `forbid_*` - names. -- Pin MCP runs referenced by active change intents so bounded run-history - pruning cannot drop the declared before-run before verification. -- Mark the package as `2.1.0a1` with the PyPI alpha classifier while v2.1 - controller features are under development. -- Keep CLI controller query modes read-only by skipping baseline, report, and - analysis-cache writes. -- Keep workspace intent registry upgrades versioned and backward-readable: - registry v2 records add lease and report-digest fields, while v1 records are - accepted with conservative lease defaults until natural expiry. +- MCP session state (intents, blast-radius cache, review markers) is + process-local only; workspace intent files are ephemeral coordination + state, not analysis cache or report truth. +- Queued intents do not pin runs; pinning happens at promotion. +- Workspace intent registry v2 with lease and report-digest fields; + v1 records accepted with conservative defaults until expiry. ## [2.0.2] - 2026-05-19 diff --git a/CLAUDE.md b/CLAUDE.md index 24002bd0..194d75ba 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -52,9 +52,14 @@ Before editing any repository files: `finish` can verify from changed-file evidence 5. `finish_controlled_change(intent_id=..., changed_files=[...], after_run_id=...)` — returns scope check, verification, receipt, and clears intent - — if `user_action_required: true`, stop and follow `next_step` - — if `status: "unverified"`, follow `next_step` hint - — `auto_clear=true` by default; intent cleared on accepted + — if `status: "unverified"`, the intent stays active; follow `next_step` + (e.g., run `analyze_repository`), then call `finish` again on the + **same `intent_id`** with the missing evidence + — if `status: "violated"` (scope), the intent stays active; either + remove out-of-scope changes and retry `finish`, or expand scope via + `start_controlled_change` with a wider scope + — if `user_action_required: true`, stop and escalate to the user + — `auto_clear=true` by default; intent cleared only on accepted Workflow profiles determine which steps are needed: diff --git a/codeclone/surfaces/mcp/_blast_radius.py b/codeclone/surfaces/mcp/_blast_radius.py index 5df8f34b..a627176b 100644 --- a/codeclone/surfaces/mcp/_blast_radius.py +++ b/codeclone/surfaces/mcp/_blast_radius.py @@ -546,8 +546,8 @@ def _compute_change_boundaries( do_not_touch_entries, path=pattern, reason=( - "baseline, cache, and generated CodeClone state require explicit " - "separate changes" + "baseline, CodeClone state/cache, and generated artifacts " + "require explicit separate changes" ), category="baseline_or_generated_state", severity="hard", diff --git a/docs/book/03-contracts-exit-codes.md b/docs/book/03-contracts-exit-codes.md index 951a0584..6866fe55 100644 --- a/docs/book/03-contracts-exit-codes.md +++ b/docs/book/03-contracts-exit-codes.md @@ -37,7 +37,7 @@ Refs: - `codeclone/ui_messages/__init__.py:MARKER_CONTRACT_ERROR` - `codeclone/ui_messages/__init__.py:MARKER_INTERNAL_ERROR` - `codeclone/ui_messages/__init__.py:fmt_contract_error` -- `codeclone/ui_messages/__init__.py:fmt_gating_failure` +- `codeclone/report/gates/reasons.py:print_gating_failure_block` - `codeclone/ui_messages/__init__.py:fmt_internal_error` ## Invariants (MUST) diff --git a/docs/book/07-cache.md b/docs/book/07-cache.md index f38fd33e..b8b67ae1 100644 --- a/docs/book/07-cache.md +++ b/docs/book/07-cache.md @@ -94,7 +94,7 @@ CLI behavior: cache failures do not change exit code; analysis continues without Refs: - `codeclone/cache/versioning.py:CacheStatus` -- `codeclone/surfaces/cli/runtime.py:resolve_cache_status` +- `codeclone/cache/store.py:resolve_cache_status` ## Determinism / canonicalization diff --git a/docs/book/08-report.md b/docs/book/08-report.md index 9916e6c8..726b386e 100644 --- a/docs/book/08-report.md +++ b/docs/book/08-report.md @@ -93,7 +93,7 @@ Refs: Refs: - `codeclone/report/document/builder.py:build_report_document` -- `codeclone/report/derived.py:_health_snapshot` +- `codeclone/report/document/derived.py:_health_snapshot` - `codeclone/report/overview.py:materialize_report_overview` - `codeclone/report/suggestions.py:generate_suggestions` diff --git a/docs/book/15-health-score.md b/docs/book/15-health-score.md index 207d790b..1c41dd78 100644 --- a/docs/book/15-health-score.md +++ b/docs/book/15-health-score.md @@ -13,7 +13,7 @@ policy for future scoring-model expansion. - Canonical report surface: `codeclone/report/document/builder.py:build_report_document` - Health snapshot projections: - `codeclone/report/derived.py:_health_snapshot`, + `codeclone/report/document/derived.py:_health_snapshot`, `codeclone/report/overview.py:_health_snapshot` - CLI / HTML / MCP consumers: `codeclone/surfaces/cli/summary.py`, diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index cec3474c..9534bf30 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -65,7 +65,7 @@ Current server characteristics: ## Tools -Current tool set: **26 tools** organized by workflow phase. +Current tool set: **28 tools** organized by workflow phase. ```mermaid graph LR diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index 17f0868e..92cd9d04 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -3,7 +3,9 @@ ## Purpose Compact structural layouts for baseline/cache/report contracts in the current -`2.1` release line. +`2.1` release line. Generator/package version in JSON examples is illustrative; +the actual version is defined in `codeclone/contracts/__init__.py` and +`pyproject.toml`. ## Baseline schema (`2.1`) diff --git a/docs/getting-started.md b/docs/getting-started.md index 55479141..0c8a9449 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -148,7 +148,7 @@ See [Exit codes](book/03-contracts-exit-codes.md). ## MCP Setup -The MCP server exposes 26 read-only tools over the same canonical pipeline. +The MCP server exposes 28 tools over the same canonical pipeline. ### Start the server diff --git a/docs/mcp.md b/docs/mcp.md index 29f8d7e9..b8efc577 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -76,9 +76,11 @@ graph BT PC["_MCPSessionPatchContractMixin
    budget and verify"] RR["_MCPSessionReviewReceiptMixin
    audit receipt composition"] CG["_MCPSessionClaimGuardMixin
    citation-based validation"] + WF["_MCPSessionWorkflowMixin
    start/finish orchestration"] S["MCPSession"] - F --> CP --> AA --> RSB --> SM --> RPM --> STM --> BR --> IM --> PC --> RR --> CG --> S + F --> CP --> AA --> RSB --> SM --> RPM --> STM --> BR --> IM --> PC --> RR --> CG --> WF --> S style S stroke: #6366f1, stroke-width: 2px + style WF fill: #eff6ff style CG fill: #f0fdf4 style RR fill: #f0fdf4 style PC fill: #f0fdf4 @@ -207,7 +209,7 @@ the client workspace. ## Tool surface -Current surface: **26 tools**, **7 fixed resources**, **3 URI templates**. +Current surface: **28 tools**, **7 fixed resources**, **3 URI templates**. The surface is organized by workflow phase. Start at the top, drill down as needed. diff --git a/mkdocs.yml b/mkdocs.yml index cf2de392..0802a8f7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -111,6 +111,7 @@ nav: - VS Code Extension: book/21-vscode-extension.md - Claude Desktop Bundle: book/22-claude-desktop-bundle.md - Codex Plugin: book/23-codex-plugin.md + - Cursor Plugin: book/25-cursor-plugin.md - Structural Change Controller: book/24-structural-change-controller.md - Claim Guard: book/28-claim-guard.md - HTML Render: book/10-html-render.md @@ -141,6 +142,7 @@ nav: - VS Code Extension: vscode-extension.md - Claude Desktop Bundle: claude-desktop-bundle.md - Codex Plugin: codex-plugin.md + - Cursor Plugin: cursor-plugin.md - Privacy Policy: privacy-policy.md - Terms of Use: terms-of-use.md - SARIF for IDEs: sarif.md diff --git a/plugins/codeclone/skills/codeclone-change-control/SKILL.md b/plugins/codeclone/skills/codeclone-change-control/SKILL.md index 7b4db1eb..e7bc2d2b 100644 --- a/plugins/codeclone/skills/codeclone-change-control/SKILL.md +++ b/plugins/codeclone/skills/codeclone-change-control/SKILL.md @@ -220,11 +220,17 @@ finish_controlled_change( The tool handles: scope check, patch contract verification, claim validation, review receipt generation, and intent cleanup. -If the result is `user_action_required: true`, read the `next_step` -hint and follow it. Do not claim the patch is verified. +Intent stays active on non-accepted results — retry `finish` on the +**same `intent_id`** after resolving the issue: -If the result is `status: "unverified"`, the `next_step` hint tells -you what is missing (usually `after_run_id` for Python changes). +- `status: "unverified"` — follow `next_step` (e.g., run + `analyze_repository`, then call `finish` again with `after_run_id`) +- `status: "violated"` (scope) — either remove out-of-scope changes and + retry `finish`, or expand scope via `start_controlled_change` +- `user_action_required: true` — stop and escalate to the user + +Do not start a new cycle unless the intent is expired or scope must +change. Do not claim the patch is verified on non-accepted status. ## Verification profiles diff --git a/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md b/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md index 7b4db1eb..e7bc2d2b 100644 --- a/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md +++ b/plugins/cursor-codeclone/skills/codeclone-change-control/SKILL.md @@ -220,11 +220,17 @@ finish_controlled_change( The tool handles: scope check, patch contract verification, claim validation, review receipt generation, and intent cleanup. -If the result is `user_action_required: true`, read the `next_step` -hint and follow it. Do not claim the patch is verified. +Intent stays active on non-accepted results — retry `finish` on the +**same `intent_id`** after resolving the issue: -If the result is `status: "unverified"`, the `next_step` hint tells -you what is missing (usually `after_run_id` for Python changes). +- `status: "unverified"` — follow `next_step` (e.g., run + `analyze_repository`, then call `finish` again with `after_run_id`) +- `status: "violated"` (scope) — either remove out-of-scope changes and + retry `finish`, or expand scope via `start_controlled_change` +- `user_action_required: true` — stop and escalate to the user + +Do not start a new cycle unless the intent is expired or scope must +change. Do not claim the patch is verified on non-accepted status. ## Verification profiles From ac7825e661e82485b585d1bf36319cff0e12ca0b Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:21:46 +0500 Subject: [PATCH 053/318] feat(audit): move default audit DB to .cache/codeclone/db/audit.sqlite3 --- codeclone/audit/validation.py | 2 +- codeclone/surfaces/cli/session_stats.py | 4 ++-- tests/test_cli_audit.py | 26 ++++++++++++------------- tests/test_cli_session_stats.py | 4 ++-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/codeclone/audit/validation.py b/codeclone/audit/validation.py index 15ad0f80..52a17581 100644 --- a/codeclone/audit/validation.py +++ b/codeclone/audit/validation.py @@ -12,7 +12,7 @@ from .events import KNOWN_EVENT_TYPES, PAYLOAD_MODES, AuditPayloadMode, AuditSeverity AUDIT_SCHEMA_VERSION = "1" -DEFAULT_AUDIT_PATH = ".cache/codeclone/audit.sqlite3" +DEFAULT_AUDIT_PATH = ".cache/codeclone/db/audit.sqlite3" DEFAULT_AUDIT_PAYLOADS: AuditPayloadMode = "compact" DEFAULT_AUDIT_RETENTION_DAYS = 30 MIN_AUDIT_RETENTION_DAYS = 1 diff --git a/codeclone/surfaces/cli/session_stats.py b/codeclone/surfaces/cli/session_stats.py index aa8065fe..171b3347 100644 --- a/codeclone/surfaces/cli/session_stats.py +++ b/codeclone/surfaces/cli/session_stats.py @@ -574,11 +574,11 @@ def _read_audit_token_footprint( """Read aggregate token estimation from audit trail, if available.""" try: from ...audit.reader import read_audit_summary - from ...audit.validation import resolve_audit_path + from ...audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path db_path = resolve_audit_path( root_path=root_path, - value=".cache/codeclone/audit.sqlite3", + value=DEFAULT_AUDIT_PATH, ) if not db_path.is_file(): return None, None, 0 diff --git a/tests/test_cli_audit.py b/tests/test_cli_audit.py index 6d42bbbe..75c50101 100644 --- a/tests/test_cli_audit.py +++ b/tests/test_cli_audit.py @@ -62,7 +62,7 @@ def _write_audit_event( status: str = "accepted", ) -> None: writer = SqliteAuditWriter( - db_path=root / ".cache" / "codeclone" / "audit.sqlite3", + db_path=root / ".cache" / "codeclone" / "db" / "audit.sqlite3", payloads="compact", retention_days=30, ) @@ -103,7 +103,7 @@ def _write_multiple_events(root: Path) -> None: (EVENT_BLAST_RADIUS, "info", "computed"), ] writer = SqliteAuditWriter( - db_path=root / ".cache" / "codeclone" / "audit.sqlite3", + db_path=root / ".cache" / "codeclone" / "db" / "audit.sqlite3", payloads="compact", retention_days=30, ) @@ -202,7 +202,7 @@ def test_audit_contract_errors( console=printer, root_path=tmp_path, audit_enabled=audit_enabled, - audit_path=".cache/codeclone/audit.sqlite3", + audit_path=".cache/codeclone/db/audit.sqlite3", quiet=True, ) @@ -246,7 +246,7 @@ def test_audit_quiet_with_events(tmp_path: Path) -> None: console=printer, root_path=tmp_path, audit_enabled=True, - audit_path=".cache/codeclone/audit.sqlite3", + audit_path=".cache/codeclone/db/audit.sqlite3", quiet=True, ) @@ -266,7 +266,7 @@ def test_audit_verbose_renders_plain_table(tmp_path: Path) -> None: console=printer, root_path=tmp_path, audit_enabled=True, - audit_path=".cache/codeclone/audit.sqlite3", + audit_path=".cache/codeclone/db/audit.sqlite3", quiet=False, ) @@ -288,7 +288,7 @@ def test_audit_verbose_uses_rich_table(tmp_path: Path) -> None: console=cast(PrinterLike, console), root_path=tmp_path, audit_enabled=True, - audit_path=".cache/codeclone/audit.sqlite3", + audit_path=".cache/codeclone/db/audit.sqlite3", quiet=False, ) @@ -310,7 +310,7 @@ def test_audit_rich_with_payload_footprint(tmp_path: Path) -> None: console=cast(PrinterLike, console), root_path=tmp_path, audit_enabled=True, - audit_path=".cache/codeclone/audit.sqlite3", + audit_path=".cache/codeclone/db/audit.sqlite3", quiet=False, ) @@ -407,7 +407,7 @@ def test_audit_json_summary_with_footprint(tmp_path: Path) -> None: console=printer, root_path=tmp_path, audit_enabled=True, - audit_path=".cache/codeclone/audit.sqlite3", + audit_path=".cache/codeclone/db/audit.sqlite3", quiet=False, json_summary=True, ) @@ -431,7 +431,7 @@ def test_audit_json_summary_without_footprint(tmp_path: Path) -> None: console=printer, root_path=tmp_path, audit_enabled=True, - audit_path=".cache/codeclone/audit.sqlite3", + audit_path=".cache/codeclone/db/audit.sqlite3", quiet=False, json_summary=True, ) @@ -444,7 +444,7 @@ def test_audit_json_summary_without_footprint(tmp_path: Path) -> None: def _write_event_without_tokens(root: Path) -> None: """Insert an event row directly with NULL token columns.""" - db_path = root / ".cache" / "codeclone" / "audit.sqlite3" + db_path = root / ".cache" / "codeclone" / "db" / "audit.sqlite3" db_path.parent.mkdir(parents=True, exist_ok=True) conn = sqlite3.connect(str(db_path)) try: @@ -528,7 +528,7 @@ def test_payload_footprint_to_dict_roundtrip() -> None: def test_read_audit_summary_includes_payload_footprint(tmp_path: Path) -> None: _write_multiple_events(tmp_path) - db_path = tmp_path / ".cache" / "codeclone" / "audit.sqlite3" + db_path = tmp_path / ".cache" / "codeclone" / "db" / "audit.sqlite3" summary = read_audit_summary(db_path=db_path, limit=50) assert summary.payload_footprint is not None @@ -545,7 +545,7 @@ def test_read_audit_summary_includes_payload_footprint(tmp_path: Path) -> None: def test_read_audit_summary_no_tokens_yields_no_footprint(tmp_path: Path) -> None: _write_event_without_tokens(tmp_path) - db_path = tmp_path / ".cache" / "codeclone" / "audit.sqlite3" + db_path = tmp_path / ".cache" / "codeclone" / "db" / "audit.sqlite3" summary = read_audit_summary(db_path=db_path, limit=50) # Event has NULL estimated_tokens → footprint should be None @@ -819,7 +819,7 @@ def _make_args(**attrs: object) -> CLIArgsLike: "no_color": True, "quiet": True, "audit_enabled": False, - "audit_path": ".cache/codeclone/audit.sqlite3", + "audit_path": ".cache/codeclone/db/audit.sqlite3", } defaults.update(attrs) return cast(CLIArgsLike, Namespace(**defaults)) diff --git a/tests/test_cli_session_stats.py b/tests/test_cli_session_stats.py index bf8a46cb..ee182074 100644 --- a/tests/test_cli_session_stats.py +++ b/tests/test_cli_session_stats.py @@ -1073,7 +1073,7 @@ def test_resolve_mcp_tokens_with_audit_data(tmp_path: Path) -> None: ) from codeclone.audit.writer import SqliteAuditWriter - db_path = tmp_path / ".cache" / "codeclone" / "audit.sqlite3" + db_path = tmp_path / ".cache" / "codeclone" / "db" / "audit.sqlite3" writer = SqliteAuditWriter(db_path=db_path, payloads="compact", retention_days=30) try: writer.emit( @@ -1110,7 +1110,7 @@ def test_resolve_mcp_tokens_no_db(tmp_path: Path) -> None: def test_resolve_mcp_tokens_corrupt_db(tmp_path: Path) -> None: """_read_audit_token_footprint tolerates corrupt audit storage.""" - db_path = tmp_path / ".cache" / "codeclone" / "audit.sqlite3" + db_path = tmp_path / ".cache" / "codeclone" / "db" / "audit.sqlite3" db_path.parent.mkdir(parents=True) db_path.write_text("NOT A DATABASE") From e8d1121fa2557a7f1a09cb97c562a1fa19f66f48 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:21:55 +0500 Subject: [PATCH 054/318] docs(cli): document audit flags and CLI-only options --- docs/book/04-config-and-defaults.md | 25 +++++++++++++++++++++++- docs/book/09-cli.md | 30 +++++++++++++++++++++++------ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/docs/book/04-config-and-defaults.md b/docs/book/04-config-and-defaults.md index 0710bd03..5de4020a 100644 --- a/docs/book/04-config-and-defaults.md +++ b/docs/book/04-config-and-defaults.md @@ -139,7 +139,7 @@ Controller audit trail: | Key | Type | Default | Meaning | Requires / Implies | |------------------------|--------|--------------------------------------|-----------------------------------------------------------|-------------------------------------| | `audit_enabled` | `bool` | `false` | Enable the optional local controller audit trail | Required for `--audit` output | -| `audit_path` | `str` | `.cache/codeclone/audit.sqlite3` | SQLite audit database path, relative to the analysis root | Used only when `audit_enabled=true` | +| `audit_path` | `str` | `.cache/codeclone/db/audit.sqlite3` | SQLite audit database path, relative to the analysis root; stored under `db/` to separate controller state from report/cache artifacts | Used only when `audit_enabled=true` | | `audit_payloads` | `str` | `compact` | Audit payload mode: `off`, `compact`, or `full` | Used only when `audit_enabled=true` | | `audit_retention_days` | `int` | `30` | Retention window for audit rows | Used only when `audit_enabled=true` | @@ -154,6 +154,29 @@ keys are contract errors. `--coverage FILE`. The same pattern applies to report outputs such as `html_out` ↔ `--html` and `json_out` ↔ `--json`. +CLI-only flags (no `[tool.codeclone]` key; authoritative spelling in +`tests/fixtures/contract_snapshots/cli_help.txt`): + +| CLI flag | Group | Meaning | +|----------|-------|---------| +| `--changed-only` | Analysis | Limit clone gating/summaries to git-selected files | +| `--diff-against GIT_REF` | Analysis | Resolve changed files from `git diff --name-only `; requires `--changed-only` | +| `--paths-from-git-diff GIT_REF` | Analysis | Shorthand for `--changed-only` + git diff selection | +| `--blast-radius FILE [FILE ...]` | Analysis | Render structural blast radius for given files after analysis | +| `--patch-verify` | Analysis | Verify current patch against trusted clone baseline budget | +| `--strictness LEVEL` | Analysis | `ci`, `strict`, or `relaxed`; valid only with `--patch-verify` (default: `ci`) | +| `--session-stats` | Analysis | Show workspace session status; read-only | +| `--audit` | Analysis | Show local Controller audit trail; requires `audit_enabled=true` | +| `--audit-json` | Analysis | JSON audit footprint; implies `--audit` | +| `--cache-dir [FILE]` | Analysis | Legacy alias for `--cache-path` | +| `--timestamped-report-paths` | Reporting | Append UTC timestamp to default report filenames | +| `--open-html-report` | Output and UI | Open generated HTML in browser; requires `--html` | +| `--progress` | Output and UI | Force-enable progress output | +| `--color` | Output and UI | Force-enable ANSI colors | + +Canonical help text, defaults, and exit-code epilog are locked by +`tests/test_cli_help_snapshot.py` and `tests/test_cli_unit.py::test_cli_help_text_consistency`. + !!! warning "Metrics-mode conflicts are enforced" Metrics update/gating flags are runtime contracts, not hints. Combinations such as `skip_metrics=true` together with metrics gating or metrics diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md index 21efd30e..4810fda4 100644 --- a/docs/book/09-cli.md +++ b/docs/book/09-cli.md @@ -28,7 +28,7 @@ CLI modes: - gating mode (`--ci`, `--fail-on-new`, explicit metric gates) - baseline update mode (`--update-baseline`, `--update-metrics-baseline`) - controller query mode (`--blast-radius`, `--patch-verify`) -- session query mode (`--session-stats`) +- workspace query modes (`--session-stats`, `--audit`, `--audit-json`) Summary metrics include: @@ -74,12 +74,23 @@ Refs: - `--patch-verify` compares the current run against the trusted clone baseline, previews gate status, and exits `3` for blocking violations in `ci` or `strict` mode. - - `--strictness {ci,strict,relaxed}` is valid only with `--patch-verify`. - - controller query mode does not write reports, baselines, or analysis - cache data. - Session query mode is terminal-only: - `--session-stats` shows workspace session status: active agents, intents, and lease health. Read-only, does not run analysis. +- Audit query mode is terminal-only: + - `--audit` shows the local Controller audit trail from the configured audit + database. Read-only, does not run analysis. Requires `audit_enabled=true` + in effective configuration (`[tool.codeclone]` or resolved defaults). + - `--audit-json` outputs audit payload footprint as JSON. Implies `--audit`. + Useful for cross-repository comparison. +- Controller and workspace query flags are mutually exclusive where enforced: + - `--blast-radius` and `--patch-verify` cannot be combined. + - `--strictness {ci,strict,relaxed}` is valid only with `--patch-verify`. + - `--session-stats` cannot combine with `--audit`, `--blast-radius`, or + `--patch-verify`. + - `--audit` cannot combine with `--blast-radius` or `--patch-verify`. + - controller and workspace query modes do not write reports, baselines, or + analysis cache data. - Contract errors use `CONTRACT ERROR:`. - Gating failures use `GATING FAILURE:`. - Internal errors use `fmt_internal_error` and include traceback only in debug mode. @@ -90,6 +101,7 @@ Refs: - `codeclone/ui_messages/__init__.py:fmt_contract_error` - `codeclone/ui_messages/__init__.py:fmt_internal_error` - `codeclone/surfaces/cli/changed_scope.py:_validate_changed_scope_args` +- `codeclone/surfaces/cli/workflow.py:_validate_controller_query_flags` ## Invariants (MUST) @@ -98,9 +110,13 @@ Refs: - `--timestamped-report-paths` requires at least one requested report output. - `--changed-only` requires a diff source. - `--blast-radius` and `--patch-verify` are mutually exclusive. -- Controller query mode is incompatible with report output flags and baseline - update flags. +- `--session-stats` cannot combine with `--audit`, `--blast-radius`, or + `--patch-verify`. +- `--audit` cannot combine with `--blast-radius` or `--patch-verify`. +- Controller and workspace query modes are incompatible with report output flags, + baseline update flags, and changed-scope flags. - `--patch-verify` requires a trusted clone baseline. +- `--audit` requires `audit_enabled=true` in effective configuration. - Browser-open failure after successful HTML write is warning-only. - In gating mode, unreadable source files are contract errors with higher priority than clone/metric gate failures. @@ -123,6 +139,7 @@ Refs: | Invalid output extension/path | contract | `2` | | Invalid changed-scope flag combination | contract | `2` | | Invalid controller query flag combination | contract | `2` | +| `--audit` with `audit_enabled=false` | contract | `2` | | `--patch-verify` without trusted baseline | contract | `2` | | Baseline untrusted in CI/gating | contract | `2` | | Coverage/API regression gate without required baseline capability | contract | `2` | @@ -148,6 +165,7 @@ Refs: ## Locked by tests - `tests/test_cli_unit.py::test_cli_help_text_consistency` +- `tests/test_cli_help_snapshot.py::test_cli_help_snapshot` - `tests/test_cli_unit.py::test_argument_parser_contract_error_marker_for_invalid_args` - `tests/test_cli_inprocess.py::test_cli_summary_format_stable` - `tests/test_cli_inprocess.py::test_cli_unreadable_source_fails_in_ci_with_contract_error` From 0348f01131817b9134639e079e2a90e7ff6f6bab Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:21:56 +0500 Subject: [PATCH 055/318] test(mcp): cover workflow mixin and patch-contract verify edges --- tests/test_mcp_service.py | 585 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 585 insertions(+) diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 56cb79d1..fceaf332 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -32,6 +32,7 @@ import codeclone.surfaces.mcp._session_runtime as mcp_runtime_mod import codeclone.surfaces.mcp._session_shared as mcp_shared_mod import codeclone.surfaces.mcp._session_state_mixin as mcp_state_mod +import codeclone.surfaces.mcp._session_workflow_mixin as workflow_mod import codeclone.surfaces.mcp._workspace_intents as mcp_workspace_intents_mod import codeclone.surfaces.mcp.server as mcp_server_mod import codeclone.surfaces.mcp.service as mcp_service_mod @@ -4165,6 +4166,245 @@ def fake_git_diff_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ... assert expired["contract_violations"] == ["intent_expired"] +def test_mcp_patch_contract_verify_profile_and_resolver_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=4) + before = _patch_contract_run_record( + tmp_path, + run_id="resolver12345678", + digest="resolver-digest", + include_regression=False, + complexity=6, + health=90, + ) + service._runs.register(before) + + original_renew = service._renew_lease_if_active + renew_calls = 0 + + def _spy_renew( + *, + record: MCPRunRecord, + intent: mcp_intent_mod.IntentRecord, + ) -> None: + nonlocal renew_calls + renew_calls += 1 + original_renew(record=record, intent=intent) + + monkeypatch.setattr(service, "_renew_lease_if_active", _spy_renew) + declared = service.manage_change_intent( + action="declare", + run_id="resolver", + scope={"allowed_files": ["pkg/a.py"]}, + intent="budget renew path", + ) + service.check_patch_contract( + mode="budget", + run_id="resolver", + intent_id=str(declared["intent_id"]), + ) + assert renew_calls == 1 + + assert service._before_run_id_from_intent("intent-missing") is None + unknown_intent = service.check_patch_contract( + mode="verify", + intent_id="intent-missing", + ) + assert unknown_intent["status"] == "unverified" + assert unknown_intent["reason"] == "no_before_run" + + assert service._optional_after_run(None) is None + assert service._optional_after_run("missing-after") is None + + def fake_git_diff_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ...]: + return (f"{root_path.name}:{git_diff_ref}",) + + monkeypatch.setattr(service, "_git_diff_paths", fake_git_diff_paths) + assert service._patch_changed_files_flexible( + before=before, + after_run_id="missing-after", + diff_ref=None, + changed_files=["README.md"], + ) == ("README.md",) + assert service._patch_changed_files_flexible( + before=before, + after_run_id="missing-after", + diff_ref="HEAD~1", + changed_files=None, + ) == (f"{tmp_path.name}:HEAD~1",) + + artifact_service = CodeCloneMCPService(history_limit=2) + artifact_before = _patch_contract_run_record( + tmp_path, + run_id="artifact123456789", + digest="artifact-digest", + include_regression=False, + complexity=6, + health=90, + ) + artifact_service._runs.register(artifact_before) + state_artifact = artifact_service.check_patch_contract( + mode="verify", + before_run_id="artifact1", + changed_files=["codeclone.baseline.json"], + ) + assert state_artifact["status"] == "violated" + assert state_artifact["reason"] == "state_artifact_mutation" + assert state_artifact["contract_violations"] == ["state_artifact_mutation"] + + scoped_artifact_service, scoped_intent = _seed_docs_intent( + tmp_path, + run_id="scopedart12345678", + digest="scoped-artifact-digest", + ) + scoped_violation = scoped_artifact_service.check_patch_contract( + mode="verify", + before_run_id="scopedart", + intent_id=scoped_intent, + changed_files=["codeclone.baseline.json"], + ) + assert scoped_violation["contract_violations"] == [ + "state_artifact_mutation", + "scope_violation", + ] + + governance = artifact_service.check_patch_contract( + mode="verify", + before_run_id="artifact1", + changed_files=["pyproject.toml"], + ) + assert governance["status"] == "unverified" + assert governance["reason"] == "after_run_required_for_governance" + + docs_service, docs_intent = _seed_docs_intent( + tmp_path, + run_id="scopeviol12345678", + digest="scope-violation-digest", + ) + scope_violation = docs_service.check_patch_contract( + mode="verify", + before_run_id="scopeviol", + intent_id=docs_intent, + changed_files=["pkg/a.py"], + strictness="ci", + ) + assert scope_violation["status"] == "violated" + assert scope_violation["reason"] == "scope_violation" + + report_document = copy.deepcopy(_blast_radius_report_document("path-index")) + findings = cast("dict[str, object]", report_document["findings"]) + groups = cast( + "dict[str, object]", cast("dict[str, object]", findings["groups"])["clones"] + ) + functions = cast("list[dict[str, object]]", groups["functions"]) + functions.append( + { + "id": "", + "items": [{"relative_path": "pkg/skip.py"}], + } + ) + functions.append( + { + "id": "clone:function:top", + "filepath": "pkg/top.py", + "items": [], + } + ) + path_record = replace( + before, + report_document=report_document, + ) + path_index = service._finding_path_index(path_record) + assert "" not in path_index + assert path_index["clone:function:top"] == frozenset({"pkg/top.py"}) + assert service._normalized_report_path(".") == "" + assert service._normalized_report_path("./pkg/a.py") == "pkg/a.py" + assert service._normalized_report_path("pkg/a.py/") == "pkg/a.py" + assert service._paths_in_intent_scope( + paths=frozenset(), + scope=mcp_intent_mod.IntentScope(allowed_files=("pkg/a.py",)), + ) + + after = _patch_contract_run_record( + tmp_path, + run_id="afterresolver1234", + digest="after-resolver", + include_regression=True, + complexity=6, + health=90, + regression_path="pkg/a.py", + ) + partition_service = CodeCloneMCPService(history_limit=4) + partition_service._runs.register(before) + partition_service._runs.register(after) + partition_service.manage_change_intent( + action="declare", + run_id="resolver", + scope={"allowed_files": ["pkg/a.py"]}, + intent="partition unknown regression id", + ) + + def unknown_regression_compare(**kwargs: object) -> dict[str, object]: + return { + "comparable": True, + "regressions": [{"id": "missing-regression-id", "kind": "function_clone"}], + "improvements": [], + "health_delta": 0, + "verdict": "regressed", + } + + monkeypatch.setattr(partition_service, "compare_runs", unknown_regression_compare) + partitioned = partition_service.check_patch_contract( + mode="verify", + before_run_id="resolver", + after_run_id="afterresolver", + changed_files=["pkg/a.py"], + ) + intent_regressions = cast( + "list[dict[str, object]]", + partitioned["intent_regressions"], + ) + assert intent_regressions[0]["paths"] == [] + assert partitioned["status"] == "violated" + + intent_scope = mcp_intent_mod.IntentScope(allowed_files=("pkg/a.py",)) + intent_worsened, external_worsened = service._partition_worsened( + worsened=[{"path": "pkg/b.py", "delta": 3}], + intent=mcp_intent_mod.IntentRecord( + intent_id="intent-partition", + run_id=before.run_id, + report_digest="digest", + status=mcp_intent_mod.IntentStatus.ACTIVE, + declared_at_utc="2026-01-01T00:00:00Z", + scope=intent_scope, + intent_description="partition", + expected_effects=(), + guards=(), + ), + ) + assert intent_worsened == [] + assert external_worsened[0]["path"] == "pkg/b.py" + + in_scope_worsened, out_scope_worsened = service._partition_worsened( + worsened=[{"path": "pkg/a.py", "delta": 1}, {"path": "", "delta": 2}], + intent=mcp_intent_mod.IntentRecord( + intent_id="intent-partition-in", + run_id=before.run_id, + report_digest="digest", + status=mcp_intent_mod.IntentStatus.ACTIVE, + declared_at_utc="2026-01-01T00:00:00Z", + scope=intent_scope, + intent_description="partition in scope", + expected_effects=(), + guards=(), + ), + ) + assert len(in_scope_worsened) == 2 + assert out_scope_worsened == [] + + def test_mcp_patch_contract_verify_incomparable_and_expired_edges( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, @@ -7601,3 +7841,348 @@ def test_mcp_service_clone_only_short_id_fallback_branch( "clone:block:one": f"blk:{one_digest}|x1", "clone:block:two": f"blk:{two_digest}|x1", } + + +def _register_docs_patch_run(service: CodeCloneMCPService, root: Path) -> None: + service._runs.register( + _patch_contract_run_record( + root, + run_id="workflow123456789", + digest="workflow-docs-digest", + include_regression=False, + complexity=6, + health=90, + ) + ) + + +def _start_docs_workflow(service: CodeCloneMCPService, root: Path) -> str: + _register_docs_patch_run(service, root) + started = service.start_controlled_change( + root=str(root), + scope={"allowed_files": ["README.md"]}, + intent="docs patch", + ) + return str(started["intent_id"]) + + +def test_mcp_workflow_start_controlled_change_contract(tmp_path: Path) -> None: + service = CodeCloneMCPService(history_limit=4) + needs = service.start_controlled_change( + root=str(tmp_path), + scope={"allowed_files": ["README.md"]}, + intent="docs-only edit", + ) + assert needs["status"] == "needs_analysis" + assert needs["edit_allowed"] is False + + _register_docs_patch_run(service, tmp_path) + started = service.start_controlled_change( + root=str(tmp_path), + scope={"allowed_files": ["README.md"]}, + intent="update readme", + blast_radius_depth="transitive", + ) + assert started["status"] == "active" + blast = cast("dict[str, object]", started["blast_radius"]) + assert "transitive_summary" in blast + + with pytest.raises(MCPServiceContractError, match="blast_radius_depth"): + service.start_controlled_change( + root=str(tmp_path), + scope={"allowed_files": ["README.md"]}, + intent="invalid depth", + blast_radius_depth="wide", + ) + + +def test_mcp_workflow_start_queued_and_latest_run( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service, _foreign_id = _two_agent_service(tmp_path, monkeypatch) + queued = service.start_controlled_change( + root=str(tmp_path), + scope={"allowed_files": ["pkg/a.py"]}, + intent="queued follow-up", + on_conflict="queue", + ) + assert queued["status"] == "queued" + assert "blast_radius" not in queued + + other_root = tmp_path / "other" + other_root.mkdir() + pinned = CodeCloneMCPService(history_limit=4) + pinned._runs.register( + _patch_contract_run_record( + other_root, + run_id="foreign1234567890", + digest="foreign-digest", + include_regression=False, + complexity=6, + health=90, + ) + ) + pinned._runs.register( + _patch_contract_run_record( + tmp_path, + run_id="older12345678901", + digest="older-digest", + include_regression=False, + complexity=6, + health=90, + ) + ) + pinned._runs.register( + _patch_contract_run_record( + tmp_path, + run_id="newer12345678901", + digest="newer-digest", + include_regression=False, + complexity=6, + health=91, + ) + ) + latest = pinned.start_controlled_change( + root=str(tmp_path), + scope={"allowed_files": ["README.md"]}, + intent="pin latest run", + ) + assert latest["run_id"] == "newer123" + + +def test_mcp_workflow_start_missing_intent_after_declare_raises( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=4) + _register_docs_patch_run(service, tmp_path) + original_declare = mcp_session_mod.MCPSession._declare_change_intent.__get__( + service, + type(service), + ) + + def declare_then_drop(**kwargs: object) -> dict[str, object]: + payload = cast( + "dict[str, object]", + original_declare(**kwargs), + ) + service._active_intents.pop(str(payload["intent_id"]), None) + return payload + + monkeypatch.setattr(service, "_declare_change_intent", declare_then_drop) + with pytest.raises(MCPServiceContractError, match="not found after declare"): + service.start_controlled_change( + root=str(tmp_path), + scope={"allowed_files": ["README.md"]}, + intent="broken declare", + ) + + +@pytest.mark.parametrize( + ("factory", "expected_status", "expected_reason"), + [ + ("queued", "unverified", "intent_not_active"), + ("scope", "violated", "scope_violation"), + ("expired", "expired", "report_digest_mismatch"), + ], +) +def test_mcp_workflow_finish_controlled_change_guardrails( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + factory: str, + expected_status: str, + expected_reason: str, +) -> None: + if factory == "queued": + service, _foreign_id = _two_agent_service(tmp_path, monkeypatch) + intent_id = _declare_queued_pkg_a(service) + evidence = {"changed_files": ["pkg/a.py"]} + else: + service, intent_id = _seed_docs_intent(tmp_path) + evidence = ( + {"changed_files": ["README.md"]} + if factory == "expired" + else {"changed_files": ["pkg/a.py"]} + ) + if factory == "expired": + monkeypatch.setattr(service, "_is_intent_expired", lambda **_: True) + + finished = service.finish_controlled_change(intent_id=intent_id, **evidence) + assert finished["status"] == expected_status + assert finished["reason"] == expected_reason + assert finished["intent_cleared"] is False + + +def test_mcp_workflow_finish_controlled_change_evidence_and_docs_path( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service, intent_id = _seed_docs_intent(tmp_path) + with pytest.raises(MCPServiceContractError, match="exactly one"): + service.finish_controlled_change( + intent_id=intent_id, + changed_files=["README.md"], + diff_ref="HEAD~1", + ) + with pytest.raises(MCPServiceContractError, match="changed_files or diff_ref"): + service.finish_controlled_change(intent_id=intent_id) + + monkeypatch.setattr( + service, + "_git_diff_paths", + lambda *, root_path, git_diff_ref: ("README.md",), + ) + diff_finished = service.finish_controlled_change( + intent_id=intent_id, + diff_ref="HEAD~1", + create_receipt=False, + auto_clear=False, + ) + assert diff_finished["status"] == "accepted" + + service2 = CodeCloneMCPService(history_limit=4) + intent_id = _start_docs_workflow(service2, tmp_path) + cleared = service2.finish_controlled_change( + intent_id=intent_id, + changed_files=["README.md"], + ) + assert cleared["status"] == "accepted" + assert cleared["intent_cleared"] is True + + +def test_mcp_workflow_finish_python_structural_and_receipt_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=4) + before, _after = _patch_contract_before_after_records( + tmp_path, + before_health=88, + ) + service._runs.register(before) + intent_id = str( + service.start_controlled_change( + root=str(tmp_path), + scope={"allowed_files": ["pkg/a.py"]}, + intent="edit pkg.a", + )["intent_id"] + ) + missing_after = service.finish_controlled_change( + intent_id=intent_id, + changed_files=["pkg/a.py"], + ) + assert missing_after["status"] == "unverified" + + stable_before = _patch_contract_run_record( + tmp_path, + run_id="before1234567890", + digest="before-digest", + include_regression=False, + complexity=6, + health=88, + ) + stable_after = _patch_contract_run_record( + tmp_path, + run_id="after1234567890", + digest="after-digest", + include_regression=False, + complexity=6, + health=88, + ) + stable = CodeCloneMCPService(history_limit=4) + stable._runs.register(stable_before) + stable_intent = str( + stable.start_controlled_change( + root=str(tmp_path), + scope={"allowed_files": ["pkg/a.py"]}, + intent="safe pkg.a tweak", + )["intent_id"] + ) + stable._runs.register(stable_after) + accepted = stable.finish_controlled_change( + intent_id=stable_intent, + changed_files=["pkg/a.py"], + after_run_id="after123", + ) + assert accepted["status"] in {"accepted", "accepted_with_external_changes"} + + docs_service, docs_intent = _seed_docs_intent(tmp_path) + + def fail_receipt(**kwargs: object) -> dict[str, object]: + raise MCPServiceContractError("receipt unavailable") + + monkeypatch.setattr(docs_service, "create_review_receipt", fail_receipt) + receipt_failed = docs_service.finish_controlled_change( + intent_id=docs_intent, + changed_files=["README.md"], + ) + assert receipt_failed["intent_cleared"] is False + assert receipt_failed["receipt_error"] == "receipt unavailable" + + monkeypatch.setattr( + docs_service, + "_patch_contract_verify", + lambda **_: { + "status": "accepted", + "claim_validation_recommended": True, + "message": "accepted", + }, + ) + monkeypatch.setattr( + docs_service, + "validate_review_claims", + lambda **_: {"valid": True, "citations_found": 1, "violations": []}, + ) + claims_run = docs_service.finish_controlled_change( + intent_id=docs_intent, + changed_files=["README.md"], + review_text="F-1 reviewed.", + create_receipt=False, + auto_clear=False, + ) + assert cast("dict[str, object]", claims_run["claims"])["valid"] is True + + +def test_mcp_workflow_helper_messages_and_validators() -> None: + assert workflow_mod._validated_blast_radius_depth("auto") == "auto" + with pytest.raises(MCPServiceContractError): + workflow_mod._validated_blast_radius_depth("invalid") + assert workflow_mod._workspace_summary({"total_agents": 2})["total_agents"] == 2 + assert workflow_mod._budget_summary({"gate_preview": {"would_fail": True}})[ + "gate_preview" + ] == {"would_fail": True} + assert ( + "high" + in workflow_mod._MCPSessionWorkflowMixin._start_message( + {"radius_level": "high"}, + {"gate_preview": {"would_fail": True}}, + ).lower() + ) + assert ( + "receipt creation failed" + in workflow_mod._MCPSessionWorkflowMixin._finish_message( + verify_status="accepted", + intent_cleared=False, + receipt_error="boom", + ).lower() + ) + + service = CodeCloneMCPService(history_limit=2) + record = _patch_contract_run_record( + Path("/tmp/root"), + run_id="claimskip1234567", + digest="claim-skip-digest", + include_regression=False, + complexity=6, + health=90, + ) + assert ( + workflow_mod._MCPSessionWorkflowMixin._conditional_claim_validation( + service, + record=record, + verify_payload={"claim_validation_recommended": False}, + review_text="F-1 reviewed.", + ) + is None + ) From 90e78b1956787c0cc6365cfd1597e2f7404d17a6 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:21:59 +0500 Subject: [PATCH 056/318] docs(mcp): narrow read-only guarantees and cache_policy limits --- SECURITY.md | 14 +++++++++++--- docs/architecture.md | 12 +++++++++--- docs/book/11-security-model.md | 16 ++++++++++++---- docs/book/20-mcp-interface.md | 3 ++- docs/mcp.md | 3 ++- docs/terms-of-use.md | 9 +++++++-- 6 files changed, 43 insertions(+), 14 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 72cf77a7..a5f8259e 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -9,6 +9,7 @@ The following versions currently receive security updates: | Version | Supported | |---------|-----------| +| 2.1.x | Yes | | 2.0.x | Yes | | 1.4.x | No | | 1.3.x | No | @@ -43,7 +44,9 @@ CodeClone operates purely on static input and follows a conservative execution m ### Baseline and cache integrity - Baseline files are schema/type validated with size limits and tamper-evident integrity fields - (`meta.generator` as trust gate, `meta.payload_sha256` as integrity hash in baseline schema `2.0`). + (`meta.generator` as trust gate, `meta.payload_sha256` as integrity hash in + baseline schema `2.1`; legacy `2.0` payloads remain readable under the trust + model). - Baseline integrity is tamper-evident (audit signal), not tamper-proof cryptographic signing. An actor who can rewrite baseline content and recompute `payload_sha256` can still alter it. - Baseline hash covers canonical clone payload (`clones.functions`, `clones.blocks`, @@ -64,8 +67,13 @@ CodeClone operates purely on static input and follows a conservative execution m CodeClone includes an optional read-only MCP server (`codeclone[mcp]`) that exposes analysis results over JSON-RPC (stdio transport). -- The MCP server is **read-only**: it never mutates baselines, source files, cache, or repo state. -- Session-local review markers are in-memory only and discarded on process exit. +- The MCP server is **read-only** with respect to source files, baselines, + analysis cache, and canonical report artifacts. +- Allowed repo-local writes are limited to ephemeral controller coordination + (`.cache/codeclone/intents/`) and optional audit trail + (`.cache/codeclone/db/audit.sqlite3` when `audit_enabled=true`). +- Session-local review markers and in-memory run history do not survive process + exit. - Tool arguments that accept git refs (`git_diff_ref`) are validated against a strict regex to prevent command injection via `subprocess` calls. - The MCP run store is bounded (`history_limit`) with FIFO eviction to prevent unbounded diff --git a/docs/architecture.md b/docs/architecture.md index 4a772611..bcf6e084 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -219,10 +219,16 @@ report document used by JSON/HTML/SARIF. Security boundaries: -- Read-only by design — no tool mutates source files, baselines, or repo state. +- Read-only with respect to source files, baselines, analysis cache + (`cache.json`), and canonical report artifacts. +- Allowed repo-local writes are limited to ephemeral controller coordination + (`.cache/codeclone/intents/`) and optional audit trail + (`.cache/codeclone/db/audit.sqlite3` when `audit_enabled=true`). +- Session-local review markers and in-memory run history do not survive + process restart. - `--allow-remote` guard required for non-local transports; default is `stdio`. -- Cache policies `reuse`, `refresh`, and `off` are accepted by MCP. -- Review markers are session-local in-memory state, never persisted. +- MCP accepts cache policies `reuse` and `off`; `refresh` is rejected at + runtime with a contract error. - Run history bounded by `--history-limit` to prevent unbounded memory growth. - `git_diff_ref` validated as a safe single revision expression before any `git diff` subprocess call. diff --git a/docs/book/11-security-model.md b/docs/book/11-security-model.md index 3ad7a68a..8615c985 100644 --- a/docs/book/11-security-model.md +++ b/docs/book/11-security-model.md @@ -29,11 +29,19 @@ Security-relevant input classes: - Sensitive root directories are blocked by scanner policy. - Symlink traversal outside the root is skipped. - HTML escapes text and attribute contexts before embedding. -- MCP is read-only by design: - no tool mutates source files, baselines, cache, or report artifacts. +- MCP is read-only with respect to source files, baselines, analysis cache + (`cache.json`), and canonical report artifacts. +- Allowed repo-local writes are limited to ephemeral controller coordination + (`.cache/codeclone/intents/`) and optional audit trail + (`.cache/codeclone/db/audit.sqlite3` when `audit_enabled=true`). +- Session-local review markers and in-memory run history do not survive + process restart. +- Five session/coordination tools are marked `destructiveHint` in MCP metadata + (`manage_change_intent`, `start_controlled_change`, + `finish_controlled_change`, `mark_finding_reviewed`, `clear_session_runs`). - `--allow-remote` is required for non-local transports. -- Cache policies `reuse`, `refresh`, and `off` are accepted by MCP. -- Review markers are session-local in-memory state only. +- MCP accepts cache policies `reuse` and `off`; `refresh` is rejected at + runtime with a contract error. - `git_diff_ref` is validated as a safe single revision expression before any `git diff` subprocess call. Refs: diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index 9534bf30..ed86f77f 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -214,6 +214,7 @@ not trigger analysis. - Change intent, blast-radius cache, and workspace registry state do not enter canonical report integrity, baseline, or cache artifacts. - Run history is process-local and does not survive restart. +- MCP accepts cache policies `reuse` and `off`; `refresh` is rejected at runtime. - Missing optional MCP dependency is surfaced explicitly by the launcher. - `metrics_detail(family="security_surfaces")` exposes a compact, report-only inventory of security-relevant capability surfaces. It does not claim @@ -230,7 +231,7 @@ not trigger analysis. | Default transport | Local `stdio` | | Remote exposure | Explicit `--allow-remote` required for non-loopback | | Lazy loading | Base installs and CI do not require MCP packages | -| Read-only | Never mutates repo state or synthesizes findings outside canonical report facts | +| Read-only | Never mutates source, baseline, cache, or report artifacts; optional ephemeral coordination under `.cache/codeclone/intents/` and audit DB when enabled | --- diff --git a/docs/mcp.md b/docs/mcp.md index b8efc577..717596b7 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -553,8 +553,9 @@ include `total`, `shown`, and `truncated` summaries. | Remote exposure | Explicit `--allow-remote` required for non-loopback | | Lazy loading | Base `codeclone` install does not require MCP packages | | Repository access | Limited to what the server process can read locally | -| Session state | In-memory only; does not survive restart | +| Session state | In-memory runs and review markers; do not survive restart | | Workspace intents | Ephemeral coordination under `.cache/codeclone/intents/` | +| Audit trail | Optional SQLite under `.cache/codeclone/db/` when enabled | --- diff --git a/docs/terms-of-use.md b/docs/terms-of-use.md index 0404835e..ef4ed121 100644 --- a/docs/terms-of-use.md +++ b/docs/terms-of-use.md @@ -39,7 +39,8 @@ Those platforms remain governed by their own applicable terms and policies. ## MCP and automation surfaces -The MCP interface is read-only by contract. +The MCP interface is read-only by contract with respect to source files, +baselines, analysis cache, and canonical report artifacts. CodeClone MCP integrations are intended for deterministic structural analysis, review, and triage workflows. They expose canonical findings, metrics, and @@ -48,9 +49,13 @@ review data, but do not mutate: - source files - git history - baselines -- repository state +- analysis cache or canonical report artifacts - CI configuration +Ephemeral controller coordination (`.cache/codeclone/intents/`) and optional +audit trail (`.cache/codeclone/db/audit.sqlite3` when `audit_enabled=true`) +are the only allowed repo-local writes. + Remote, shared, or network-exposed MCP deployments are the responsibility of the operator securing and governing those environments. From 78f28eca4a0c6b9045c3e4239a40c91e3ce5e768 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:22:00 +0500 Subject: [PATCH 057/318] docs: report 28 MCP tools and document Cursor plugin setup --- AGENTS.md | 13 +++++++++++-- README.md | 2 +- docs/README-pypi.md | 2 +- docs/book/01-architecture-map.md | 2 +- docs/book/README.md | 1 + docs/getting-started.md | 11 +++++++++++ 6 files changed, 26 insertions(+), 5 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index f2de0dd7..b55b3091 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -71,6 +71,8 @@ Key artifacts: - `plugins/codeclone/` + `.agents/plugins/marketplace.json` — stable Codex plugin as a native local discovery layer over `codeclone-mcp`, with bundled CodeClone skills under `plugins/codeclone/skills/` (`codeclone-review`, `codeclone-hotspots`, `codeclone-change-control`) +- `plugins/cursor-codeclone/` — stable Cursor plugin as a native local discovery layer over `codeclone-mcp`, with + bundled skills, rules, hooks, and an agent definition - MCP runs are in-memory only. Review markers are session-local. Change intent truth is session-local, with optional ephemeral workspace coordination records under `.cache/codeclone/intents/`; none of this may leak into @@ -441,6 +443,8 @@ Architecture is layered, but grounded in current code (not aspirational diagrams Claude Desktop that launches the same local `codeclone-mcp` server via local `stdio`. - **Codex plugin surface** (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) is a native local Codex plugin over `codeclone-mcp`, with repo-local discovery metadata and bundled skills under `plugins/codeclone/skills/`. +- **Cursor plugin surface** (`plugins/cursor-codeclone/*`) is a native local Cursor plugin over `codeclone-mcp` with + bundled skills, rules, hooks, and an agent definition. - **Tests-as-spec** (`tests/`) lock behavior, contracts, determinism, and architecture boundaries. Non-negotiable interpretation: @@ -453,7 +457,9 @@ Non-negotiable interpretation: - The Claude Desktop bundle is a local setup surface over `codeclone-mcp` and must not introduce a second server or truth path. - The Codex plugin is a local discovery and guidance surface over `codeclone-mcp` and must not introduce a second - analyzer, MCP server, or truth path. + analyzer, MCP server, or truth path. +- The Cursor plugin is a local discovery and guidance surface over `codeclone-mcp` and must not introduce a second + analyzer, MCP server, or truth path. ## 13) Module map @@ -518,7 +524,9 @@ Use this map to route changes to the right owner module. - `extensions/claude-desktop-codeclone/*` — stable Claude Desktop bundle surface; keep it local-stdio-only, launcher-focused, and faithful to `codeclone-mcp` rather than re-implementing MCP semantics in the bundle layer. - `plugins/codeclone/*`, `.agents/plugins/marketplace.json` — stable Codex plugin surface; keep it Codex-native, - conservative-first, skills-guided, and faithful to `codeclone-mcp` rather than inventing plugin-only analysis logic. + conservative-first, skills-guided, and faithful to `codeclone-mcp` rather than inventing plugin-only analysis logic. +- `plugins/cursor-codeclone/*` — stable Cursor plugin surface; keep it Cursor-native, skills/rules/hooks-guided, and + faithful to `codeclone-mcp` rather than inventing plugin-only analysis logic. - `tests/` — executable specification: architecture rules, contracts, goldens, invariants, regressions. ## 14) Dependency direction @@ -580,6 +588,7 @@ If you change a contract-sensitive zone, route docs/tests/approval deliberately. | VS Code extension surface (`extensions/vscode-codeclone/*`) | `README.md`, `docs/book/21-vscode-extension.md`, `docs/vscode-extension.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `node --check extensions/vscode-codeclone/src/support.js`, `node --check extensions/vscode-codeclone/src/mcpClient.js`, `node --check extensions/vscode-codeclone/src/extension.js`, `node --test extensions/vscode-codeclone/test/*.test.js`, plus local extension-host smoke and package smoke when surface/manifest/assets change | command/view UX, trust/runtime model, source-first review flow, or packaging metadata change | documented commands/views/setup/trust behavior, packaged assets, or publish metadata change | | Claude Desktop bundle surface (`extensions/claude-desktop-codeclone/*`) | `docs/book/22-claude-desktop-bundle.md`, `docs/claude-desktop-bundle.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `node --check extensions/claude-desktop-codeclone/server/index.js`, `node --check extensions/claude-desktop-codeclone/src/launcher.js`, `node --check extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs`, `node --test extensions/claude-desktop-codeclone/test/*.test.js`, plus `.mcpb` build smoke | bundle install/runtime model, launcher UX, local-stdio constraints, or bundle metadata change | documented Claude Desktop install/setup/runtime behavior or packaged bundle semantics change | | Codex plugin surface (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) | `docs/book/23-codex-plugin.md`, `docs/codex-plugin.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `python3 -m json.tool plugins/codeclone/.codex-plugin/plugin.json`, `python3 -m json.tool plugins/codeclone/.mcp.json`, `python3 -m json.tool .agents/plugins/marketplace.json`, `tests/test_codex_plugin.py` | plugin discovery/runtime model, bundled MCP config, bundled skill behavior, or plugin metadata change | documented Codex plugin install/discovery/runtime behavior or plugin manifest/marketplace semantics change | +| Cursor plugin surface (`plugins/cursor-codeclone/*`) | `docs/book/25-cursor-plugin.md`, `docs/cursor-plugin.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `tests/test_cursor_plugin.py`, `tests/test_cursor_plugin_hooks.py` | plugin discovery/runtime model, bundled MCP config, bundled skill/rule/hook behavior, or plugin metadata change | documented Cursor plugin install/discovery/runtime behavior or plugin manifest semantics change | | Docs site / sample report publication (`docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) | `docs/README.md`, `docs/publishing.md`, `docs/examples/report.md`, and any contract pages surfaced by the change, `CHANGELOG.md` when user-visible behavior changes | `mkdocs build --strict`, sample-report generation smoke path, and relevant report/html tests if generated examples or embeds change | published docs navigation, sample-report generation, or Pages workflow semantics change | published documentation behavior or sample-report generation contract changes | Golden rule: do not “fix” failures by snapshot refresh unless the underlying contract change is intentional, documented, diff --git a/README.md b/README.md index 559b82cc..a2562853 100644 --- a/README.md +++ b/README.md @@ -257,7 +257,7 @@ CodeClone ships an MCP control surface for AI agents and IDE clients, built on t Canonical analysis is **read-only by contract**: MCP tools never mutate source, baselines, generated reports, or analysis cache. Controller state is session-local or ephemeral workspace coordination state. -- **26 tools across 6 workflow phases** — *analyze → triage → drill down → focused checks → change control → session*. +- **28 tools across 6 workflow phases** — *analyze → triage → drill down → focused checks → change control → session*. Triage-first design avoids dumping the full report into agent context. - **Stable read-only resources** — `codeclone://latest/*` and `codeclone://runs/{run_id}/*` URIs return deterministic projections (summary, report, health, gates, changed, triage, schema) without re-triggering analysis. diff --git a/docs/README-pypi.md b/docs/README-pypi.md index cd6596b2..0f5923be 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -45,7 +45,7 @@ codeclone . --ci # CI mode - **Quality metrics** — complexity, coupling, cohesion, dead code, health score - **Baseline governance** — separates legacy debt from new regressions; CI fails only on what changed - **Change controller** — intent declaration, blast radius, patch contract, review receipt for AI agents -- **MCP server** — 26-tool read-only interface for IDE and agent clients +- **MCP server** — 28-tool read-only interface for IDE and agent clients - **Reports** — HTML, JSON, Markdown, SARIF, text from one canonical payload ## MCP Server diff --git a/docs/book/01-architecture-map.md b/docs/book/01-architecture-map.md index 841495c0..c63b1cc5 100644 --- a/docs/book/01-architecture-map.md +++ b/docs/book/01-architecture-map.md @@ -36,7 +36,7 @@ Main ownership layers: | Deterministic renderers | `codeclone/report/renderers/*` | Text/Markdown/SARIF/JSON projections over the canonical report | | HTML render layer | `codeclone/report/html/*` | Render-only HTML view over canonical report/meta facts | | MCP surface | `codeclone/surfaces/mcp/*` | Read-only MCP tools/resources, change-control projections, and claim validation over stored runs | -| Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | +| Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*`, `plugins/cursor-codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | Refs: diff --git a/docs/book/README.md b/docs/book/README.md index e8be7547..a49870bf 100644 --- a/docs/book/README.md +++ b/docs/book/README.md @@ -39,6 +39,7 @@ If a statement is not enforced by code/tests, it is explicitly marked as non-con - [23-codex-plugin.md](23-codex-plugin.md) - [25-cursor-plugin.md](25-cursor-plugin.md) - [24-structural-change-controller.md](24-structural-change-controller.md) +- [28-claim-guard.md](28-claim-guard.md) - [10-html-render.md](10-html-render.md) ### System properties diff --git a/docs/getting-started.md b/docs/getting-started.md index 0c8a9449..be497a62 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -190,6 +190,17 @@ codeclone-mcp --transport streamable-http # remote / HTTP clients See [Codex plugin guide](codex-plugin.md). +=== "Cursor" + + Install from the monorepo path + [`plugins/cursor-codeclone/`](https://github.com/orenlab/codeclone/tree/main/plugins/cursor-codeclone) + (symlink into `.cursor/` or use Cursor local plugin discovery). + + The Cursor plugin is **not** listed in `.agents/plugins/marketplace.json`; + that file is Codex-only for local monorepo development. + + See [Cursor plugin guide](cursor-plugin.md). + === "Manual registration" ```bash From 40dbd728b8e7e70446e1f8316dfc482ea93b8bd7 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:22:10 +0500 Subject: [PATCH 058/318] feat(claude-desktop): refresh bundle manifest for 28-tool MCP surface --- extensions/claude-desktop-codeclone/README.md | 2 +- extensions/claude-desktop-codeclone/manifest.json | 14 +++++++++++++- .../claude-desktop-codeclone/test/manifest.test.js | 13 ++++++++++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/extensions/claude-desktop-codeclone/README.md b/extensions/claude-desktop-codeclone/README.md index 9ad69732..33c4bf56 100644 --- a/extensions/claude-desktop-codeclone/README.md +++ b/extensions/claude-desktop-codeclone/README.md @@ -3,7 +3,7 @@ Structural change controller for Python — local MCP bundle wrapper for `codeclone-mcp`. Installs as a `.mcpb` package instead of manual JSON editing. -Same canonical 25-tool MCP surface used by CLI, VS Code, Codex, and Claude Code. +Same canonical 28-tool MCP surface used by CLI, VS Code, Codex, and Claude Code. Read-only, baseline-aware, local stdio only. As the local `codeclone-mcp` server gains new canonical surfaces, the bundle exposes them without adding a second client-side interpretation layer. diff --git a/extensions/claude-desktop-codeclone/manifest.json b/extensions/claude-desktop-codeclone/manifest.json index 3480368b..0c194207 100644 --- a/extensions/claude-desktop-codeclone/manifest.json +++ b/extensions/claude-desktop-codeclone/manifest.json @@ -4,7 +4,7 @@ "display_name": "CodeClone", "version": "2.1.0", "description": "Structural change controller for Python — deterministic, baseline-aware, built for CI and AI agents.", - "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. It keeps Claude on the same canonical 25-tool MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin — read-only, baseline-aware, local stdio only. The v2.1 change controller includes intent declaration, blast radius, patch contract, and review receipt; claim guard is a planned follow-up.", + "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. It keeps Claude on the same canonical 28-tool MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin — read-only, baseline-aware, local stdio only. The v2.1 change controller includes intent declaration, blast radius, patch contract, review receipt, and claim validation via validate_review_claims and finish_controlled_change.", "author": { "name": "Den Rozhnovskiy", "email": "pytelemonbot@mail.ru", @@ -56,6 +56,14 @@ "name": "manage_change_intent", "description": "Change intent lifecycle: declare scope, get status, check diff, clear intent." }, + { + "name": "start_controlled_change", + "description": "Pre-edit workflow: declare change intent, compute blast radius, and return patch budget in one call." + }, + { + "name": "finish_controlled_change", + "description": "Post-edit workflow: verify scope, run patch contract, validate claims, generate receipt, and clear intent." + }, { "name": "check_patch_contract", "description": "Patch contract checks: budget before editing, verify before/after runs after editing." @@ -64,6 +72,10 @@ "name": "create_review_receipt", "description": "Deterministic audit artifact: provenance, intent scope, blast radius, patch status, and claims-not-made." }, + { + "name": "validate_review_claims", + "description": "Validate cited review text against canonical report semantics and detect deterministic mischaracterizations." + }, { "name": "list_hotspots", "description": "Priority-ranked hotspot views by kind." diff --git a/extensions/claude-desktop-codeclone/test/manifest.test.js b/extensions/claude-desktop-codeclone/test/manifest.test.js index 903c8809..9d50f469 100644 --- a/extensions/claude-desktop-codeclone/test/manifest.test.js +++ b/extensions/claude-desktop-codeclone/test/manifest.test.js @@ -32,6 +32,17 @@ test("manifest keeps the setup surface bounded and local", () => { ]); assert.equal(manifest.documentation, "https://orenlab.github.io/codeclone/claude-desktop-bundle/"); assert.equal(manifest.tools_generated, true); - assert.equal(manifest.tools.length, 25); + assert.equal(manifest.tools.length, 28); assert.equal("instructions" in manifest, false); }); + +test("manifest tools match MCP contract snapshot", () => { + const snapshotPath = path.join( + rootDir, + "../../tests/fixtures/contract_snapshots/mcp_tool_schemas.json", + ); + const snapshot = JSON.parse(fs.readFileSync(snapshotPath, "utf8")); + const expected = snapshot.map((entry) => entry.name).sort(); + const actual = manifest.tools.map((entry) => entry.name).sort(); + assert.deepEqual(actual, expected); +}); From 039a2efcf1a0c11602c5f74667e7dcda93e38e31 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:22:11 +0500 Subject: [PATCH 059/318] fix(vscode): block remote MCP transport args in extension launcher --- extensions/vscode-codeclone/src/support.js | 20 +++++++++++++++++++ .../vscode-codeclone/test/support.test.js | 12 +++++++++++ 2 files changed, 32 insertions(+) diff --git a/extensions/vscode-codeclone/src/support.js b/extensions/vscode-codeclone/src/support.js index 04a9468f..4f73a25f 100644 --- a/extensions/vscode-codeclone/src/support.js +++ b/extensions/vscode-codeclone/src/support.js @@ -91,6 +91,25 @@ function staleMessage(reason) { return "Review data may be stale because the workspace changed after this run."; } +const BLOCKED_MCP_ARGS = new Set([ + "--transport", + "--host", + "--port", + "--allow-remote", + "--json-response", + "--stateless-http", +]); + +function assertSafeMcpArgs(args) { + for (const arg of args) { + if (BLOCKED_MCP_ARGS.has(arg)) { + throw new Error( + `CodeClone MCP argument ${arg} is not allowed in the VS Code extension.` + ); + } + } +} + function normalizedLaunchSpec(spec) { const command = String(spec?.command || "").trim(); if (!command) { @@ -102,6 +121,7 @@ function normalizedLaunchSpec(spec) { .map((value) => value.trim()) .filter(Boolean) : []; + assertSafeMcpArgs(args); const cwd = String(spec?.cwd || "").trim(); if (!cwd) { throw new Error("CodeClone MCP launcher cwd must not be empty."); diff --git a/extensions/vscode-codeclone/test/support.test.js b/extensions/vscode-codeclone/test/support.test.js index e3e5e690..e193b7af 100644 --- a/extensions/vscode-codeclone/test/support.test.js +++ b/extensions/vscode-codeclone/test/support.test.js @@ -123,6 +123,18 @@ test("normalizedLaunchSpec trims arguments and rejects empty command or cwd", () ); }); +test("normalizedLaunchSpec rejects blocked remote transport args", () => { + assert.throws( + () => + normalizedLaunchSpec({ + command: "codeclone-mcp", + args: ["--transport", "streamable-http"], + cwd: "/tmp/workspace", + }), + /--transport/ + ); +}); + test("launchSpecOrigin makes launcher provenance explicit", () => { assert.equal( launchSpecOrigin({ From cd53f534b9be318986db6d0f6170cc06993980b8 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:22:12 +0500 Subject: [PATCH 060/318] feat(cursor): add workspace-first MCP launcher and refresh plugin surface --- docs/book/25-cursor-plugin.md | 15 +++++++++++- docs/cursor-plugin.md | 10 ++++++++ plugins/cursor-codeclone/README.md | 12 ++++++++-- .../hooks/post-edit-reminder.py | 4 ++-- plugins/cursor-codeclone/mcp.json | 8 +++---- .../rules/codeclone-workflow.mdc | 5 ++++ .../cursor-codeclone/scripts/launch_mcp.py | 23 +++++++++++++++++++ tests/test_cursor_plugin.py | 7 +++--- 8 files changed, 71 insertions(+), 13 deletions(-) create mode 100644 plugins/cursor-codeclone/scripts/launch_mcp.py diff --git a/docs/book/25-cursor-plugin.md b/docs/book/25-cursor-plugin.md index f71d1acb..0b3ad3a7 100644 --- a/docs/book/25-cursor-plugin.md +++ b/docs/book/25-cursor-plugin.md @@ -65,7 +65,8 @@ The plugin currently provides: The plugin surface is additive: -- `mcp.json` contributes a local stdio MCP server definition +- `mcp.json` contributes a local stdio MCP server definition via + `python3 ./scripts/launch_mcp.py` (workspace `.venv` → Poetry env → PATH) - the skills contribute workflow guidance and starter prompts - the rules enforce MCP-first discipline and Python-aware context - the hooks provide automated reminders for re-analysis and intent hygiene @@ -75,6 +76,18 @@ The plugin surface is additive: The plugin does not rewrite user config or install CodeClone automatically. +## Distribution + +- **Monorepo source:** `plugins/cursor-codeclone/` +- **Marketplace:** not listed in `.agents/plugins/marketplace.json` (that file is + Codex-only for local development) +- **Install path:** symlink skills/rules/MCP into `.cursor/` or register the + plugin directory through Cursor local plugin discovery +- **Standalone releases:** ship a full copy of + `plugins/codeclone/scripts/launch_mcp.py` inside + `plugins/cursor-codeclone/scripts/`; the monorepo entrypoint delegates to the + Codex plugin launcher to avoid duplicate logic during development + ## Skill contract Each skill follows these invariants: diff --git a/docs/cursor-plugin.md b/docs/cursor-plugin.md index f56f419f..8602d006 100644 --- a/docs/cursor-plugin.md +++ b/docs/cursor-plugin.md @@ -58,6 +58,16 @@ ln -sfn "$(pwd)/plugins/cursor-codeclone/agents/structural-reviewer.md" \ Add `.cursor/` to `.gitignore` if it is not already there. +!!! note "Marketplace" + The Cursor plugin is **not** listed in `.agents/plugins/marketplace.json`. + That file is Codex-only for local monorepo development. Install from + `plugins/cursor-codeclone/` via symlinks or Cursor local plugin discovery. + +The bundled `mcp.json` runs `python3 ./scripts/launch_mcp.py`, which resolves +`.venv` → Poetry env → `PATH`. In the monorepo that entrypoint delegates to +`plugins/codeclone/scripts/launch_mcp.py`; standalone plugin releases must ship +the full launcher body. + ### Personal (global) setup ```bash diff --git a/plugins/cursor-codeclone/README.md b/plugins/cursor-codeclone/README.md index a14faa9d..7ab097a7 100644 --- a/plugins/cursor-codeclone/README.md +++ b/plugins/cursor-codeclone/README.md @@ -76,12 +76,20 @@ deterministic findings with file paths and evidence, not opinions. ## MCP Server -The plugin bundles a stdio-based `codeclone-mcp` server configuration. The -server exposes 21+ tools for repository analysis, findings, blast radius, +The plugin bundles a stdio-based `codeclone-mcp` server configuration via +`python3 ./scripts/launch_mcp.py` (workspace `.venv` → Poetry env → `PATH`). +The server exposes 28 tools for repository analysis, findings, blast radius, change control, and review workflows. See the [MCP interface contract](https://orenlab.github.io/codeclone/book/20-mcp-interface/) for the full tool list. +## Distribution + +- **Monorepo source:** `plugins/cursor-codeclone/` +- **Not in** `.agents/plugins/marketplace.json` (Codex-only local marketplace) +- **Standalone releases:** embed the full launcher from + `plugins/codeclone/scripts/launch_mcp.py`; the monorepo uses a thin delegator + --- ## Local development diff --git a/plugins/cursor-codeclone/hooks/post-edit-reminder.py b/plugins/cursor-codeclone/hooks/post-edit-reminder.py index 2ec695e2..3cfc8aca 100644 --- a/plugins/cursor-codeclone/hooks/post-edit-reminder.py +++ b/plugins/cursor-codeclone/hooks/post-edit-reminder.py @@ -47,8 +47,8 @@ def main() -> None: "A Python file was edited. Consider re-running " "`analyze_repository` to check for structural " "regressions before finishing. If you have an active " - "change intent, pass `intent_id` explicitly to " - "`check` and `verify`." + "change intent, run `finish_controlled_change` with " + "the declared `intent_id`." ) } ) diff --git a/plugins/cursor-codeclone/mcp.json b/plugins/cursor-codeclone/mcp.json index d855245b..97afc081 100644 --- a/plugins/cursor-codeclone/mcp.json +++ b/plugins/cursor-codeclone/mcp.json @@ -1,10 +1,10 @@ { "mcpServers": { "codeclone": { - "type": "stdio", - "command": "codeclone-mcp", - "args": ["--transport", "stdio"], - "env": {} + "command": "python3", + "args": [ + "./scripts/launch_mcp.py" + ] } } } diff --git a/plugins/cursor-codeclone/rules/codeclone-workflow.mdc b/plugins/cursor-codeclone/rules/codeclone-workflow.mdc index c27c0d2c..3e666564 100644 --- a/plugins/cursor-codeclone/rules/codeclone-workflow.mdc +++ b/plugins/cursor-codeclone/rules/codeclone-workflow.mdc @@ -27,3 +27,8 @@ You have access to CodeClone through the local CodeClone MCP server. - Prefer `list_hotspots` or `check_*` before broad `list_findings`. - Use `get_finding` or `get_remediation` for one finding. - Use `source_kind` filters to cut test noise. + +## Change control + +- For repository edits, use `start_controlled_change` → edit → + `finish_controlled_change` (see the change-control skill). diff --git a/plugins/cursor-codeclone/scripts/launch_mcp.py b/plugins/cursor-codeclone/scripts/launch_mcp.py new file mode 100644 index 00000000..fd5a3e58 --- /dev/null +++ b/plugins/cursor-codeclone/scripts/launch_mcp.py @@ -0,0 +1,23 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +"""Cursor plugin MCP launcher entrypoint. + +In the monorepo, delegates to the shared Codex plugin launcher so workspace +discovery logic stays in one place. Standalone plugin releases should ship the +full launcher body from ``plugins/codeclone/scripts/launch_mcp.py``. +""" + +from __future__ import annotations + +import runpy +from pathlib import Path + +SHARED_LAUNCHER = ( + Path(__file__).resolve().parents[2] / "codeclone" / "scripts" / "launch_mcp.py" +) + +if __name__ == "__main__": + runpy.run_path(str(SHARED_LAUNCHER), run_name="__main__") diff --git a/tests/test_cursor_plugin.py b/tests/test_cursor_plugin.py index c16cf415..b0718e1a 100644 --- a/tests/test_cursor_plugin.py +++ b/tests/test_cursor_plugin.py @@ -49,11 +49,10 @@ def test_cursor_mcp_json_is_valid() -> None: assert isinstance(mcp_config, dict) server = mcp_config["mcpServers"]["codeclone"] assert server == { - "type": "stdio", - "command": "codeclone-mcp", - "args": ["--transport", "stdio"], - "env": {}, + "command": "python3", + "args": ["./scripts/launch_mcp.py"], } + assert (plugin_root / "scripts" / "launch_mcp.py").is_file() def test_cursor_rules_have_valid_frontmatter() -> None: From 781bb44cc562a68435a711c2350146bebf449729 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:22:14 +0500 Subject: [PATCH 061/318] chore(codex): bump plugin manifest version to 2.1.0a1 --- docs/book/23-codex-plugin.md | 3 ++- docs/codex-plugin.md | 4 ++++ plugins/codeclone/.codex-plugin/plugin.json | 2 +- tests/test_codex_plugin.py | 10 +++++++++- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/book/23-codex-plugin.md b/docs/book/23-codex-plugin.md index 545133cb..aa8c794f 100644 --- a/docs/book/23-codex-plugin.md +++ b/docs/book/23-codex-plugin.md @@ -45,7 +45,8 @@ It must not: The plugin currently provides: -- `.codex-plugin/plugin.json` +- `.codex-plugin/plugin.json` — version tracks the CodeClone package release + line in `pyproject.toml` (currently `2.1.0a1`) - `.mcp.json` - `scripts/launch_mcp` - `README.md` diff --git a/docs/codex-plugin.md b/docs/codex-plugin.md index 788415ea..48ef1d5b 100644 --- a/docs/codex-plugin.md +++ b/docs/codex-plugin.md @@ -23,6 +23,10 @@ Install the plugin from the Codex marketplace: marketplace add orenlab/codeclone-codex ``` +The plugin manifest version tracks the CodeClone package release line (currently +`2.1.0a1` in this monorepo). It describes the bundled guidance surface, not the +live MCP tool count — tools come from the resolved `codeclone-mcp` server. + The plugin expects a local `codeclone-mcp` command. Install CodeClone with the MCP extra in the workspace or globally: diff --git a/plugins/codeclone/.codex-plugin/plugin.json b/plugins/codeclone/.codex-plugin/plugin.json index a5724d88..bb82ce53 100644 --- a/plugins/codeclone/.codex-plugin/plugin.json +++ b/plugins/codeclone/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "codeclone", - "version": "2.0.0", + "version": "2.1.0a1", "description": "Baseline-aware structural code quality analysis for Codex through the local CodeClone MCP server.", "author": { "name": "Den Rozhnovskiy", diff --git a/tests/test_codex_plugin.py b/tests/test_codex_plugin.py index 5a057719..d9038e44 100644 --- a/tests/test_codex_plugin.py +++ b/tests/test_codex_plugin.py @@ -18,16 +18,24 @@ def _assert_contains_all(text: str, needles: tuple[str, ...]) -> None: assert needle in text +def _codeclone_package_version(root: Path) -> str: + for line in (root / "pyproject.toml").read_text(encoding="utf-8").splitlines(): + if line.startswith("version = "): + return line.split("=", 1)[1].strip().strip('"') + raise AssertionError("pyproject.toml version not found") + + def test_codex_plugin_manifest_is_consistent() -> None: root = Path(__file__).resolve().parents[1] plugin_root = root / "plugins" / "codeclone" manifest = _load_json(plugin_root / ".codex-plugin" / "plugin.json") marketplace = _load_json(root / ".agents" / "plugins" / "marketplace.json") + package_version = _codeclone_package_version(root) assert isinstance(manifest, dict) assert manifest["name"] == plugin_root.name assert manifest["name"] == "codeclone" - assert manifest["version"] == "2.0.0" + assert manifest["version"] == package_version assert manifest["skills"] == "./skills/" assert manifest["mcpServers"] == "./.mcp.json" assert manifest["license"] == "MPL-2.0" From fa743a959182974ebafd74d4aad715bc379f4bdc Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:22:16 +0500 Subject: [PATCH 062/318] fix(sync): ship standalone launch_mcp.py when syncing cursor storefront --- scripts/sync_integrations.py | 8 +++++- tests/test_sync_integrations.py | 46 ++++++++++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/scripts/sync_integrations.py b/scripts/sync_integrations.py index d379e393..0f09064e 100644 --- a/scripts/sync_integrations.py +++ b/scripts/sync_integrations.py @@ -93,7 +93,13 @@ class SyncResult: ), "cursor": SyncTarget( name="cursor", - copies=(("plugins/cursor-codeclone", "."),), + copies=( + ("plugins/cursor-codeclone", "."), + ( + "plugins/codeclone/scripts/launch_mcp.py", + "scripts/launch_mcp.py", + ), + ), generated=(MANIFEST_NAME,), ), } diff --git a/tests/test_sync_integrations.py b/tests/test_sync_integrations.py index f47983a7..4b57f3fe 100644 --- a/tests/test_sync_integrations.py +++ b/tests/test_sync_integrations.py @@ -59,7 +59,14 @@ def _make_source(tmp_path: Path) -> Path: ) _write(source / "plugins" / "codeclone" / "README.md", "# Codex\n") _write(source / "plugins" / "codeclone" / "skills" / "review" / "SKILL.md") - _write(source / ".agents" / "plugins" / "marketplace.json", '{"plugins":[]}\n') + _write( + source / "plugins" / "codeclone" / "scripts" / "launch_mcp.py", + "def resolve_launch_target():\n return None\n", + ) + _write( + source / ".agents" / "plugins" / "marketplace.json", + '{"plugins":[]}\n', + ) _write( source / "extensions" / "claude-desktop-codeclone" / "manifest.json", "{}\n", @@ -71,6 +78,10 @@ def _make_source(tmp_path: Path) -> Path: "{}\n", ) _write(source / "plugins" / "cursor-codeclone" / "rules" / "workflow.mdc") + _write( + source / "plugins" / "cursor-codeclone" / "scripts" / "launch_mcp.py", + "import runpy\n", + ) _commit_all(source) return source @@ -99,7 +110,7 @@ def test_sync_copies_files_and_writes_manifest(tmp_path: Path) -> None: dry_run=False, ) - assert result.files_copied == 3 + assert result.files_copied == 4 assert result.files_deleted == 0 assert (target / "plugins" / "codeclone" / "README.md").is_file() assert (target / ".agents" / "plugins" / "marketplace.json").is_file() @@ -116,7 +127,7 @@ def test_sync_copies_files_and_writes_manifest(tmp_path: Path) -> None: "source_dirty": False, "codeclone_version": "9.8.7", "target": "codex", - "files_copied": 3, + "files_copied": 4, "files_deleted": 0, } @@ -196,7 +207,7 @@ def test_sync_dry_run_does_not_write(tmp_path: Path) -> None: ) assert result.dry_run is True - assert result.files_copied == 3 + assert result.files_copied == 4 assert not (target / "plugins").exists() assert not (target / "SYNC_MANIFEST.json").exists() @@ -328,3 +339,30 @@ def test_nested_layout_preserves_structure(tmp_path: Path) -> None: assert (target / "plugins" / "codeclone" / "README.md").is_file() assert not (target / "README.md").exists() + + +def test_sync_source_paths_exist() -> None: + root = Path(__file__).resolve().parents[1] + for target in SYNC_TARGETS.values(): + for source_rel, _destination_rel in target.copies: + source_path = root / source_rel + assert source_path.exists(), ( + f"missing sync source {source_rel} for target {target.name}" + ) + + +def test_cursor_sync_ships_standalone_launcher(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "cursor") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["cursor"], + allow_dirty=False, + dry_run=False, + ) + + launcher = (target / "scripts" / "launch_mcp.py").read_text(encoding="utf-8") + assert "resolve_launch_target" in launcher + assert "runpy" not in launcher From f337cc8aaff106534a6d323f7caca8887552c54e Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:22:17 +0500 Subject: [PATCH 063/318] docs: fix baseline schema version in CONTRIBUTING --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd105f66..a5829061 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -103,7 +103,7 @@ Such changes often require design-level discussion and may be staged across vers ### Baseline contract (v2) -- The baseline schema is versioned (`meta.schema_version`, currently `2.0`). +- The baseline schema is versioned (`meta.schema_version`, currently `2.1`). - Compatibility/trust gates include `schema_version`, `fingerprint_version`, `python_tag`, and `meta.generator.name`. - Integrity is tamper-evident via `meta.payload_sha256` over canonical payload. From 7969ee8a29a9ab49befb7786c24caeea76b793a1 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:39:34 +0500 Subject: [PATCH 064/318] fix(integrations): harden extension and plugin security surfaces --- .../claude-desktop-codeclone/src/launcher.js | 96 ++++++++++++- extensions/vscode-codeclone/src/extension.js | 28 +++- extensions/vscode-codeclone/src/mcpClient.js | 4 +- extensions/vscode-codeclone/src/support.js | 132 +++++++++++++++++- .../vscode-codeclone/test/support.test.js | 63 ++++++++- plugins/codeclone/scripts/launch_mcp.py | 69 +++++++-- plugins/cursor-codeclone/hooks/_hook_io.py | 19 +++ plugins/cursor-codeclone/hooks/hooks.json | 4 +- .../hooks/post-edit-reminder.py | 10 +- .../hooks/session-cleanup-check.py | 9 +- tests/test_codex_plugin_launcher.py | 49 ++++++- tests/test_cursor_plugin_hooks.py | 21 +++ 12 files changed, 474 insertions(+), 30 deletions(-) create mode 100644 plugins/cursor-codeclone/hooks/_hook_io.py diff --git a/extensions/claude-desktop-codeclone/src/launcher.js b/extensions/claude-desktop-codeclone/src/launcher.js index 509f86ac..feaec085 100644 --- a/extensions/claude-desktop-codeclone/src/launcher.js +++ b/extensions/claude-desktop-codeclone/src/launcher.js @@ -15,6 +15,33 @@ const BLOCKED_ARGS = new Set([ "--json-response", "--stateless-http", ]); +const SPAWN_ENV_EXACT_KEYS = new Set([ + "PATH", + "HOME", + "USERPROFILE", + "APPDATA", + "LOCALAPPDATA", + "SystemRoot", + "WINDIR", + "TEMP", + "TMP", + "LANG", + "LC_ALL", + "LC_CTYPE", + "TZ", + "TERM", + "PWD", + "OS", + "COMSPEC", + "PATHEXT", +]); +const SPAWN_ENV_PREFIXES = [ + "CODECLONE_", + "PYTHON", + "UV_", + "VIRTUAL_ENV", + "POETRY_", +]; /** * @typedef {{ @@ -116,6 +143,62 @@ function validateAdditionalArgs(args) { } } +/** + * @param {string} key + * @returns {boolean} + */ +function spawnEnvAllowsKey(key) { + if (SPAWN_ENV_EXACT_KEYS.has(key)) { + return true; + } + return SPAWN_ENV_PREFIXES.some((prefix) => key.startsWith(prefix)); +} + +/** + * @param {string | null | undefined} workspaceRoot + * @param {NodeJS.ProcessEnv} [baseEnv] + * @returns {NodeJS.ProcessEnv} + */ +function buildSpawnEnv(workspaceRoot, baseEnv = process.env) { + /** @type {NodeJS.ProcessEnv} */ + const env = {}; + for (const [key, value] of Object.entries(baseEnv)) { + if (typeof value === "string" && spawnEnvAllowsKey(key)) { + env[key] = value; + } + } + const root = normalizeConfiguredValue(workspaceRoot ?? ""); + if (root && !normalizeConfiguredValue(env.CODECLONE_WORKSPACE_ROOT)) { + env.CODECLONE_WORKSPACE_ROOT = root; + } + return env; +} + +/** + * @param {string} command + * @param {string} root + * @returns {boolean} + */ +function isLauncherWithinWorkspace(command, root) { + const launcher = String(command || "").trim(); + const workspaceRoot = String(root || "").trim(); + if (!launcher || !workspaceRoot) { + return false; + } + try { + const resolvedCommand = fsSync.realpathSync(launcher); + const resolvedRoot = fsSync.realpathSync(workspaceRoot); + const relative = path.relative(resolvedRoot, resolvedCommand); + return ( + relative !== "" && + !relative.startsWith("..") && + !path.isAbsolute(relative) + ); + } catch { + return false; + } +} + /** * @param {string} command * @returns {void} @@ -302,6 +385,9 @@ async function candidateWorkspaceCommands(env, platform, cwd) { continue; } if (await fileExists(candidate.command)) { + if (!isLauncherWithinWorkspace(candidate.command, candidate.root)) { + continue; + } existing.push(candidate); seen.add(candidate.command); } @@ -323,6 +409,9 @@ async function candidateWorkspaceCommands(env, platform, cwd) { continue; } if (await fileExists(command)) { + if (!isLauncherWithinWorkspace(command, ancestor)) { + continue; + } existing.push({command, root: ancestor}); seen.add(command); } @@ -621,10 +710,7 @@ async function runProxy(options = {}) { } const spawnCwd = spec.cwd && spec.cwd.length > 0 ? spec.cwd : undefined; - const childEnv = {...process.env}; - if (spawnCwd && !normalizeConfiguredValue(childEnv.CODECLONE_WORKSPACE_ROOT)) { - childEnv.CODECLONE_WORKSPACE_ROOT = spawnCwd; - } + const childEnv = buildSpawnEnv(spawnCwd ?? null); /** @type {string} */ let resolvedCommand; @@ -689,9 +775,11 @@ async function runProxy(options = {}) { module.exports = { BLOCKED_ARGS, buildSetupMessage, + buildSpawnEnv, candidateAutoCommands, candidateWorkspaceCommands, exitProxy, + isLauncherWithinWorkspace, normalizeConfiguredValue, parseLauncherArgsJson, resolveLaunchSpec, diff --git a/extensions/vscode-codeclone/src/extension.js b/extensions/vscode-codeclone/src/extension.js index 913ef6c9..8ab0b3f3 100644 --- a/extensions/vscode-codeclone/src/extension.js +++ b/extensions/vscode-codeclone/src/extension.js @@ -105,6 +105,7 @@ const { STALE_REASON_EDITOR, STALE_REASON_WORKSPACE, isMinimumSupportedCodeCloneVersion, + isLauncherWithinWorkspace, launchSpecOrigin, resolveAnalysisSettings, sameAnalysisSettings, @@ -665,7 +666,10 @@ class CodeCloneController { })) ); const localLauncher = candidateChecks.find((entry) => entry.exists)?.candidate; - if (localLauncher) { + if ( + localLauncher && + isLauncherWithinWorkspace(localLauncher, folder.uri.fsPath) + ) { return normalizedLaunchSpec({ command: localLauncher, args: Array.isArray(configuredArgs) ? configuredArgs : [], @@ -691,6 +695,11 @@ class CodeCloneController { } async ensureConnected(folder) { + if (!(await this.ensureWorkspaceTrust())) { + throw new MCPClientError( + "CodeClone requires a trusted workspace before starting the local MCP server." + ); + } const launchSpec = await this.resolveLaunchSpec(folder); if (this.client.isConnected() && this.connectionInfo.launchSpec) { const activeLaunchSpec = this.connectionInfo.launchSpec; @@ -2824,7 +2833,7 @@ class CodeCloneController { if (!input || !input.trim()) { return; } - files.push(input.trim()); + files.push(this.normalizeBlastRadiusFileInput(folder, input)); } try { await this.ensureConnected(folder); @@ -2859,6 +2868,9 @@ class CodeCloneController { if (!folder) { return; } + if (!(await this.ensureWorkspaceTrust())) { + return; + } const state = this.getWorkspaceState(folder); if (!state.currentRunId) { await vscode.window.showInformationMessage( @@ -2876,7 +2888,7 @@ class CodeCloneController { if (!input || !input.trim()) { return; } - files.push(input.trim()); + files.push(this.normalizeBlastRadiusFileInput(folder, input)); } try { await this.ensureConnected(folder); @@ -2911,6 +2923,16 @@ class CodeCloneController { return []; } + normalizeBlastRadiusFileInput(folder, input) { + const resolved = resolveWorkspacePath(folder.uri.fsPath, input); + if (!resolved) { + throw new MCPClientError( + "Blast radius path must be a workspace-relative file inside the open folder." + ); + } + return path.relative(folder.uri.fsPath, resolved).split(path.sep).join("/"); + } + async clearSessionState() { const folder = this.getPreferredFolder(); if (!folder) { diff --git a/extensions/vscode-codeclone/src/mcpClient.js b/extensions/vscode-codeclone/src/mcpClient.js index 898a3e2d..10ea47d1 100644 --- a/extensions/vscode-codeclone/src/mcpClient.js +++ b/extensions/vscode-codeclone/src/mcpClient.js @@ -4,7 +4,7 @@ const {spawn} = require("node:child_process"); const {EventEmitter} = require("node:events"); const {version: EXTENSION_VERSION} = require("../package.json"); -const {logChannelMessage, trimTail} = require("./support"); +const {logChannelMessage, spawnEnvForMcp, trimTail} = require("./support"); const MCP_PROTOCOL_VERSION = "2025-03-26"; const REQUEST_TIMEOUT_MS = 5 * 60 * 1000; @@ -233,7 +233,7 @@ class CodeCloneMcpClient extends EventEmitter { await new Promise((resolve, reject) => { const child = spawn(launchSpec.command, launchSpec.args, { cwd: launchSpec.cwd, - env: process.env, + env: spawnEnvForMcp(launchSpec.cwd), shell: false, stdio: ["pipe", "pipe", "pipe"], }); diff --git a/extensions/vscode-codeclone/src/support.js b/extensions/vscode-codeclone/src/support.js index 4f73a25f..7612fbb2 100644 --- a/extensions/vscode-codeclone/src/support.js +++ b/extensions/vscode-codeclone/src/support.js @@ -1,5 +1,6 @@ "use strict"; +const fs = require("node:fs"); const path = require("node:path"); const STALE_REASON_EDITOR = "unsaved editor changes"; @@ -99,10 +100,54 @@ const BLOCKED_MCP_ARGS = new Set([ "--json-response", "--stateless-http", ]); +const STDIO_TRANSPORT_ARGS = Object.freeze(["--transport", "stdio"]); +const SPAWN_ENV_EXACT_KEYS = new Set([ + "PATH", + "HOME", + "USERPROFILE", + "APPDATA", + "LOCALAPPDATA", + "SystemRoot", + "WINDIR", + "TEMP", + "TMP", + "LANG", + "LC_ALL", + "LC_CTYPE", + "TZ", + "TERM", + "PWD", + "OS", + "COMSPEC", + "PATHEXT", +]); +const SPAWN_ENV_PREFIXES = [ + "CODECLONE_", + "PYTHON", + "UV_", + "VIRTUAL_ENV", + "POETRY_", +]; + +function hasPathSeparator(value) { + return value.includes("/") || value.includes("\\"); +} + +function validateConfiguredCommand(command) { + if (!command) { + return; + } + if (hasPathSeparator(command) && !path.isAbsolute(command)) { + throw new Error( + "Configured CodeClone launcher must be an absolute path or a bare command name." + ); + } +} function assertSafeMcpArgs(args) { for (const arg of args) { - if (BLOCKED_MCP_ARGS.has(arg)) { + const head = arg.split("=", 1)[0]; + if (BLOCKED_MCP_ARGS.has(head)) { throw new Error( `CodeClone MCP argument ${arg} is not allowed in the VS Code extension.` ); @@ -110,24 +155,99 @@ function assertSafeMcpArgs(args) { } } +function forceStdioTransportArgs(args) { + return [...args, ...STDIO_TRANSPORT_ARGS]; +} + +function lockResolvedCommand(command) { + if (!path.isAbsolute(command)) { + return command; + } + try { + const real = fs.realpathSync(command); + const stat = fs.statSync(real); + if (!stat.isFile()) { + throw new Error(`Resolved launcher is not a regular file: ${real}`); + } + return real; + } catch (error) { + if ( + error instanceof Error && + error.message.startsWith("Resolved launcher is not a regular file:") + ) { + throw error; + } + return command; + } +} + +function isLauncherWithinWorkspace(command, rootPath) { + const root = String(rootPath || "").trim(); + const launcher = String(command || "").trim(); + if (!root || !launcher) { + return false; + } + try { + const resolvedCommand = fs.realpathSync(launcher); + const resolvedRoot = fs.realpathSync(root); + const relative = path.relative(resolvedRoot, resolvedCommand); + return ( + relative !== "" && + !relative.startsWith("..") && + !path.isAbsolute(relative) + ); + } catch { + return false; + } +} + +function spawnEnvAllowsKey(key) { + if (SPAWN_ENV_EXACT_KEYS.has(key)) { + return true; + } + return SPAWN_ENV_PREFIXES.some((prefix) => key.startsWith(prefix)); +} + +function spawnEnvForMcp(workspaceRoot, baseEnv = process.env) { + /** @type {NodeJS.ProcessEnv} */ + const env = {}; + for (const [key, value] of Object.entries(baseEnv)) { + if (typeof value === "string" && spawnEnvAllowsKey(key)) { + env[key] = value; + } + } + const root = String(workspaceRoot || "").trim(); + if (root && !String(env.CODECLONE_WORKSPACE_ROOT || "").trim()) { + env.CODECLONE_WORKSPACE_ROOT = root; + } + return env; +} + function normalizedLaunchSpec(spec) { const command = String(spec?.command || "").trim(); if (!command) { throw new Error("CodeClone MCP launcher command must not be empty."); } - const args = Array.isArray(spec?.args) + validateConfiguredCommand(command); + const userArgs = Array.isArray(spec?.args) ? spec.args .filter((value) => typeof value === "string") .map((value) => value.trim()) .filter(Boolean) : []; - assertSafeMcpArgs(args); + assertSafeMcpArgs(userArgs); + const args = forceStdioTransportArgs(userArgs); const cwd = String(spec?.cwd || "").trim(); if (!cwd) { throw new Error("CodeClone MCP launcher cwd must not be empty."); } const source = String(spec?.source || "").trim(); - return {command, args, cwd, source}; + return { + command: lockResolvedCommand(command), + args, + cwd, + source, + }; } function trimTail(value, maxChars) { @@ -415,9 +535,11 @@ module.exports = { analysisThresholdOverrides, compareCodeCloneVersions, customAnalysisThresholds, + isLauncherWithinWorkspace, isMinimumSupportedCodeCloneVersion, launchSpecOrigin, locationsNeedDetailHydration, + lockResolvedCommand, normalizedLaunchSpec, normalizeAnalysisProfile, parseUtcTimestamp, @@ -428,8 +550,10 @@ module.exports = { resolveAnalysisSettings, sameAnalysisSettings, signedInteger, + spawnEnvForMcp, staleMessage, trimTail, unsupportedVersionMessage, + validateConfiguredCommand, workspaceLocalLauncherCandidates, }; diff --git a/extensions/vscode-codeclone/test/support.test.js b/extensions/vscode-codeclone/test/support.test.js index e193b7af..3d8f3d1e 100644 --- a/extensions/vscode-codeclone/test/support.test.js +++ b/extensions/vscode-codeclone/test/support.test.js @@ -3,6 +3,8 @@ const test = require("node:test"); const assert = require("node:assert/strict"); +const path = require("node:path"); + const { ANALYSIS_PROFILE_CUSTOM, ANALYSIS_PROFILE_DEEPER_REVIEW, @@ -16,6 +18,7 @@ const { analysisThresholdOverrides, compareCodeCloneVersions, customAnalysisThresholds, + isLauncherWithinWorkspace, isMinimumSupportedCodeCloneVersion, launchSpecOrigin, logChannelMessage, @@ -29,9 +32,11 @@ const { resolveAnalysisSettings, sameAnalysisSettings, signedInteger, + spawnEnvForMcp, staleMessage, trimTail, unsupportedVersionMessage, + validateConfiguredCommand, workspaceLocalLauncherCandidates, } = require("../src/support"); @@ -108,7 +113,7 @@ test("normalizedLaunchSpec trims arguments and rejects empty command or cwd", () }), { command: "codeclone-mcp", - args: ["--stdio"], + args: ["--stdio", "--transport", "stdio"], cwd: "/tmp/workspace", source: "", } @@ -133,6 +138,62 @@ test("normalizedLaunchSpec rejects blocked remote transport args", () => { }), /--transport/ ); + assert.throws( + () => + normalizedLaunchSpec({ + command: "codeclone-mcp", + args: ["--transport=streamable-http"], + cwd: "/tmp/workspace", + }), + /--transport=streamable-http/ + ); +}); + +test("validateConfiguredCommand rejects relative paths with separators", () => { + assert.throws( + () => validateConfiguredCommand("./codeclone-mcp"), + /absolute path or a bare command name/ + ); + assert.doesNotThrow(() => validateConfiguredCommand("codeclone-mcp")); +}); + +test("isLauncherWithinWorkspace rejects launchers outside the workspace root", () => { + const fs = require("node:fs"); + const os = require("node:os"); + const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "codeclone-vscode-")); + const workspace = path.join(tmpRoot, "workspace"); + const outside = path.join(tmpRoot, "outside"); + fs.mkdirSync(workspace, {recursive: true}); + fs.mkdirSync(outside, {recursive: true}); + const launcher = path.join(workspace, ".venv", "bin", "codeclone-mcp"); + const malicious = path.join(outside, "codeclone-mcp"); + fs.mkdirSync(path.dirname(launcher), {recursive: true}); + fs.writeFileSync(malicious, ""); + fs.symlinkSync(malicious, launcher); + try { + assert.equal(isLauncherWithinWorkspace(launcher, workspace), false); + fs.unlinkSync(launcher); + fs.writeFileSync(launcher, ""); + assert.equal(isLauncherWithinWorkspace(launcher, workspace), true); + } finally { + fs.rmSync(tmpRoot, {recursive: true, force: true}); + } +}); + +test("spawnEnvForMcp keeps launcher-relevant env keys only", () => { + const env = spawnEnvForMcp("/workspace/repo", { + PATH: "/bin", + HOME: "/home/user", + SECRET_TOKEN: "hidden", + CODECLONE_MCP_SHUTDOWN_GRACE_MS: "1000", + PYTHONPATH: "/tmp", + }); + assert.equal(env.PATH, "/bin"); + assert.equal(env.HOME, "/home/user"); + assert.equal(env.CODECLONE_MCP_SHUTDOWN_GRACE_MS, "1000"); + assert.equal(env.PYTHONPATH, "/tmp"); + assert.equal(env.CODECLONE_WORKSPACE_ROOT, "/workspace/repo"); + assert.equal(env.SECRET_TOKEN, undefined); }); test("launchSpecOrigin makes launcher provenance explicit", () => { diff --git a/plugins/codeclone/scripts/launch_mcp.py b/plugins/codeclone/scripts/launch_mcp.py index a3463b8f..79ae8888 100644 --- a/plugins/codeclone/scripts/launch_mcp.py +++ b/plugins/codeclone/scripts/launch_mcp.py @@ -17,6 +17,30 @@ PLUGIN_ROOT = Path(__file__).resolve().parents[1] REPO_ROOT = PLUGIN_ROOT.parents[1] TRANSPORT_ARGS = ("--transport", "stdio") +MAX_STDIN_BYTES = 65536 +_SPAWN_ENV_EXACT_KEYS = frozenset( + { + "PATH", + "HOME", + "USERPROFILE", + "APPDATA", + "LOCALAPPDATA", + "SystemRoot", + "WINDIR", + "TEMP", + "TMP", + "LANG", + "LC_ALL", + "LC_CTYPE", + "TZ", + "TERM", + "PWD", + "OS", + "COMSPEC", + "PATHEXT", + } +) +_SPAWN_ENV_PREFIXES = ("CODECLONE_", "PYTHON", "UV_", "VIRTUAL_ENV", "POETRY_") @dataclass(frozen=True) @@ -69,14 +93,40 @@ def workspace_local_launcher_candidates(root: Path) -> tuple[Path, ...]: ) +def launcher_within_workspace(candidate: Path, root: Path) -> bool: + if not candidate.is_file(): + return False + try: + candidate.resolve().relative_to(root.resolve()) + except ValueError: + return False + return True + + +def minimal_child_env( + env: Mapping[str, str], + workspace_root: Path | None, +) -> dict[str, str]: + child_env = { + key: value + for key, value in env.items() + if key in _SPAWN_ENV_EXACT_KEYS or key.startswith(_SPAWN_ENV_PREFIXES) + } + if workspace_root is not None and not _normalized_env_value( + child_env.get("CODECLONE_WORKSPACE_ROOT") + ): + child_env["CODECLONE_WORKSPACE_ROOT"] = str(workspace_root) + return child_env + + def resolve_workspace_local_launcher( roots: tuple[Path, ...], ) -> LaunchTarget | None: for root in roots: for candidate in workspace_local_launcher_candidates(root): - if candidate.is_file(): + if launcher_within_workspace(candidate, root): return LaunchTarget( - command=str(candidate), + command=str(candidate.resolve()), source="workspaceLocal", workspace_root=root, ) @@ -101,7 +151,7 @@ def resolve_poetry_launcher( candidate = candidate / script_dir / executable if candidate.is_file(): return LaunchTarget( - command=str(candidate), + command=str(candidate.resolve()), source="poetryEnv", workspace_root=root, ) @@ -173,13 +223,12 @@ def build_setup_message() -> str: def exec_launch_target(target: LaunchTarget, env: Mapping[str, str]) -> None: - child_env = dict(env) - if target.workspace_root is not None and not _normalized_env_value( - child_env.get("CODECLONE_WORKSPACE_ROOT") - ): - child_env["CODECLONE_WORKSPACE_ROOT"] = str(target.workspace_root) - argv = [target.command, *TRANSPORT_ARGS] - os.execvpe(target.command, argv, child_env) + command = target.command + if os.path.isabs(command): + command = str(Path(command).resolve()) + child_env = minimal_child_env(env, target.workspace_root) + argv = [command, *TRANSPORT_ARGS] + os.execvpe(command, argv, child_env) def main() -> int: diff --git a/plugins/cursor-codeclone/hooks/_hook_io.py b/plugins/cursor-codeclone/hooks/_hook_io.py new file mode 100644 index 00000000..c16cbc62 --- /dev/null +++ b/plugins/cursor-codeclone/hooks/_hook_io.py @@ -0,0 +1,19 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +"""Shared hook I/O helpers for Cursor plugin hooks.""" + +from __future__ import annotations + +import sys + +MAX_STDIN_BYTES = 65536 + + +def read_bounded_stdin(max_bytes: int = MAX_STDIN_BYTES) -> str: + payload = sys.stdin.buffer.read(max_bytes + 1) + if len(payload) > max_bytes: + return "" + return payload.decode("utf-8", errors="replace") diff --git a/plugins/cursor-codeclone/hooks/hooks.json b/plugins/cursor-codeclone/hooks/hooks.json index 81037165..80858a67 100644 --- a/plugins/cursor-codeclone/hooks/hooks.json +++ b/plugins/cursor-codeclone/hooks/hooks.json @@ -3,14 +3,14 @@ "hooks": { "afterFileEdit": [ { - "command": "python3 ${CURSOR_PLUGIN_ROOT}/hooks/post-edit-reminder.py || python ${CURSOR_PLUGIN_ROOT}/hooks/post-edit-reminder.py", + "command": "python3 ${CURSOR_PLUGIN_ROOT}/hooks/post-edit-reminder.py", "type": "command", "timeout": 5 } ], "stop": [ { - "command": "python3 ${CURSOR_PLUGIN_ROOT}/hooks/session-cleanup-check.py || python ${CURSOR_PLUGIN_ROOT}/hooks/session-cleanup-check.py", + "command": "python3 ${CURSOR_PLUGIN_ROOT}/hooks/session-cleanup-check.py", "type": "command", "timeout": 5 } diff --git a/plugins/cursor-codeclone/hooks/post-edit-reminder.py b/plugins/cursor-codeclone/hooks/post-edit-reminder.py index 3cfc8aca..5e563068 100644 --- a/plugins/cursor-codeclone/hooks/post-edit-reminder.py +++ b/plugins/cursor-codeclone/hooks/post-edit-reminder.py @@ -17,11 +17,19 @@ import json import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from _hook_io import read_bounded_stdin def main() -> None: + raw = read_bounded_stdin() + if not raw: + print("{}") + return try: - data = json.loads(sys.stdin.read()) + data = json.loads(raw) except (json.JSONDecodeError, OSError): print("{}") return diff --git a/plugins/cursor-codeclone/hooks/session-cleanup-check.py b/plugins/cursor-codeclone/hooks/session-cleanup-check.py index d335bebb..e046d03e 100644 --- a/plugins/cursor-codeclone/hooks/session-cleanup-check.py +++ b/plugins/cursor-codeclone/hooks/session-cleanup-check.py @@ -21,6 +21,9 @@ import sys from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from _hook_io import read_bounded_stdin + _EMPTY = "{}" _WARNING = json.dumps( @@ -77,7 +80,11 @@ def _read_validated_transcript(stdin_payload: str) -> str | None: def main() -> None: - content = _read_validated_transcript(sys.stdin.read()) + raw = read_bounded_stdin() + if not raw: + print(_EMPTY) + return + content = _read_validated_transcript(raw) if content is None: print(_EMPTY) return diff --git a/tests/test_codex_plugin_launcher.py b/tests/test_codex_plugin_launcher.py index ed320da3..01c1df20 100644 --- a/tests/test_codex_plugin_launcher.py +++ b/tests/test_codex_plugin_launcher.py @@ -60,12 +60,57 @@ def test_resolve_launch_target_prefers_workspace_local_launcher(tmp_path: Path) ) assert target == launcher_mod.LaunchTarget( - command=str(launcher_path), + command=str(launcher_path.resolve()), source="workspaceLocal", workspace_root=workspace_root, ) +def test_resolve_launch_target_skips_workspace_local_launcher_outside_root( + tmp_path: Path, +) -> None: + workspace_root = tmp_path / "workspace" + outside_root = tmp_path / "outside" + workspace_root.mkdir(parents=True) + outside_root.mkdir(parents=True) + real_launcher = outside_root / "codeclone-mcp" + real_launcher.write_text("", encoding="utf-8") + symlink_path = launcher_mod.workspace_local_launcher_candidates(workspace_root)[0] + symlink_path.parent.mkdir(parents=True, exist_ok=True) + symlink_path.symlink_to(real_launcher) + + target = launcher_mod.resolve_launch_target( + env={"PWD": str(workspace_root)}, + cwd=str(workspace_root), + repo_root=workspace_root, + which=lambda _name: "/usr/local/bin/codeclone-mcp", + ) + + assert target == launcher_mod.LaunchTarget( + command="/usr/local/bin/codeclone-mcp", + source="path", + workspace_root=workspace_root, + ) + + +def test_minimal_child_env_filters_unrelated_secrets() -> None: + child_env = launcher_mod.minimal_child_env( + { + "PATH": "/bin", + "HOME": "/home/user", + "SECRET_TOKEN": "hidden", + "CODECLONE_WORKSPACE_ROOT": "", + "PYTHONPATH": "/tmp", + }, + workspace_root=Path("/workspace/repo"), + ) + assert child_env["PATH"] == "/bin" + assert child_env["HOME"] == "/home/user" + assert child_env["PYTHONPATH"] == "/tmp" + assert child_env["CODECLONE_WORKSPACE_ROOT"] == "/workspace/repo" + assert "SECRET_TOKEN" not in child_env + + def test_resolve_launch_target_prefers_poetry_before_path(tmp_path: Path) -> None: workspace_root = tmp_path / "workspace" poetry_root = tmp_path / "poetry-env" @@ -99,7 +144,7 @@ def fake_run(*_args: object, **kwargs: object) -> subprocess.CompletedProcess[st ) assert target == launcher_mod.LaunchTarget( - command=str(poetry_launcher), + command=str(poetry_launcher.resolve()), source="poetryEnv", workspace_root=workspace_root, ) diff --git a/tests/test_cursor_plugin_hooks.py b/tests/test_cursor_plugin_hooks.py index f0bbd65a..e27314fe 100644 --- a/tests/test_cursor_plugin_hooks.py +++ b/tests/test_cursor_plugin_hooks.py @@ -295,6 +295,27 @@ def test_hooks_referenced_scripts_exist() -> None: ) +def test_hooks_commands_avoid_shell_metacharacters() -> None: + hooks_json = json.loads((_HOOKS_DIR / "hooks.json").read_text(encoding="utf-8")) + for event, entries in hooks_json["hooks"].items(): + for entry in entries: + cmd = entry["command"] + assert "||" not in cmd, f"{event} hook still uses shell fallback: {cmd}" + assert "&&" not in cmd, f"{event} hook still uses shell chaining: {cmd}" + + +def test_post_edit_hook_rejects_oversized_stdin() -> None: + oversized = json.dumps({"path": "src/module.py"}) + (" " * 70000) + assert _run_hook(_POST_EDIT, oversized) == _EMPTY + + +def test_session_hook_rejects_oversized_stdin(tmp_path: Path) -> None: + transcript = tmp_path / "transcript.txt" + transcript.write_text('{"action":"declare"}\n', encoding="utf-8") + oversized = json.dumps({"transcript_path": str(transcript)}) + (" " * 70000) + assert _run_hook(_SESSION_CHECK, oversized) == _EMPTY + + def test_hooks_no_bash_scripts_remain() -> None: sh_files = list(_HOOKS_DIR.glob("*.sh")) assert sh_files == [], f"Stale bash scripts found: {sh_files}" From 6dfd588476e5ab9678780458c0d9bb525fdb14de Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 29 May 2026 23:42:02 +0500 Subject: [PATCH 065/318] fix(ci): align mypy with pyproject scope and exclude Cursor delegator --- .github/workflows/tests.yml | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dfe39a44..bbaf9677 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -73,4 +73,4 @@ jobs: run: uv run ruff check . - name: Mypy - run: uv run mypy . + run: uv run mypy diff --git a/pyproject.toml b/pyproject.toml index cf5136e0..ca3571c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -165,6 +165,7 @@ files = ["codeclone", "tests"] exclude = [ "^build/", "^site/", + "^plugins/cursor-codeclone/scripts/launch_mcp\\.py$", ] [tool.ruff] From 01d0ca8a032e8d6f26ddc1b2904bb63abbf67a6c Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 30 May 2026 00:19:49 +0500 Subject: [PATCH 066/318] fix(scanner,core): block symlink reads outside repository root --- codeclone/core/worker.py | 15 +++++++++++---- codeclone/scanner/__init__.py | 12 ++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/codeclone/core/worker.py b/codeclone/core/worker.py index 4cbd52ca..2f621b12 100644 --- a/codeclone/core/worker.py +++ b/codeclone/core/worker.py @@ -9,7 +9,6 @@ import inspect import os from collections.abc import Callable -from pathlib import Path from ..analysis.normalizer import NormalizationConfig from ..analysis.units import extract_units_and_stats_from_source @@ -20,7 +19,7 @@ DEFAULT_SEGMENT_MIN_LOC, DEFAULT_SEGMENT_MIN_STMT, ) -from ..scanner import module_name_from_path +from ..scanner import module_name_from_path, resolved_path_under_root from ._types import MAX_FILE_SIZE, FileProcessResult @@ -39,8 +38,16 @@ def process_file( segment_min_stmt: int = DEFAULT_SEGMENT_MIN_STMT, ) -> FileProcessResult: try: + resolved = resolved_path_under_root(filepath, root) + if resolved is None: + return FileProcessResult( + filepath=filepath, + success=False, + error="Source path resolves outside repository root.", + error_kind="source_read_error", + ) try: - stat_result = os.stat(filepath) + stat_result = os.stat(resolved) if stat_result.st_size > MAX_FILE_SIZE: return FileProcessResult( filepath=filepath, @@ -63,7 +70,7 @@ def process_file( "size": stat_result.st_size, } try: - source = Path(filepath).read_text("utf-8") + source = resolved.read_text("utf-8") except UnicodeDecodeError as exc: return FileProcessResult( filepath=filepath, diff --git a/codeclone/scanner/__init__.py b/codeclone/scanner/__init__.py index 4d89478d..66c427e0 100644 --- a/codeclone/scanner/__init__.py +++ b/codeclone/scanner/__init__.py @@ -57,6 +57,18 @@ def _is_under_root(path: Path, root: Path) -> bool: return False +def resolved_path_under_root(filepath: str, root: str) -> Path | None: + """Return the resolved source path when it stays under ``root``.""" + try: + root_path = Path(root).resolve() + resolved = Path(filepath).resolve() + except OSError: + return None + if _is_under_root(resolved, root_path): + return resolved + return None + + def _ensure_not_sensitive_root(*, rootp: Path, root_arg: str) -> None: root_str = str(rootp) temp_root = _get_tempdir() From 2b6d015a2690020c7030fff35cb49e4d8837062e Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 30 May 2026 00:19:50 +0500 Subject: [PATCH 067/318] fix(html): build report modals without innerHTML --- codeclone/report/html/assets/js.py | 39 ++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/codeclone/report/html/assets/js.py b/codeclone/report/html/assets/js.py index aec07d01..e4c948e5 100644 --- a/codeclone/report/html/assets/js.py +++ b/codeclone/report/html/assets/js.py @@ -346,8 +346,19 @@ const group=btn.closest('.group'); if(!group)return; const d=group.dataset; - const items=[]; - function add(label,val){if(val)items.push('
    '+label+'
    '+val+'
    ')} + const body=dlg.querySelector('#modal-body'); + body.replaceChildren(); + const list=document.createElement('dl'); + list.className='info-dl'; + function add(label,val){ + if(!val)return; + const dt=document.createElement('dt'); + dt.textContent=label; + const dd=document.createElement('dd'); + dd.textContent=val; + list.appendChild(dt); + list.appendChild(dd); + } add('Match rule',d.matchRule); add('Block size',d.blockSize); add('Signature',d.signatureKind); @@ -361,11 +372,18 @@ add('Group arity',d.groupArity); add('Clone type',d.cloneType); add('Source kind',d.sourceKind); - if(d.spreadFiles)add('Spread',d.spreadFunctions+' fn / '+d.spreadFiles+' files'); + if(d.spreadFiles){ + add('Spread',d.spreadFunctions+' fn / '+d.spreadFiles+' files'); + } + if(list.childNodes.length){ + body.appendChild(list); + }else{ + const empty=document.createElement('p'); + empty.className='muted'; + empty.textContent='No metadata available.'; + body.appendChild(empty); + } dlg.querySelector('#modal-title').textContent='Group: '+groupId; - dlg.querySelector('#modal-body').innerHTML=items.length - ?'
    '+items.join('')+'
    ' - :'

    No metadata available.

    '; dlg.showModal(); }); })(); @@ -586,7 +604,14 @@ var tplId=btn.getAttribute('data-finding-why-btn'); var tpl=document.getElementById(tplId); if(!tpl)return; - body.innerHTML=tpl.innerHTML; + body.replaceChildren(); + if(tpl.content){ + body.appendChild(document.importNode(tpl.content,true)); + }else{ + Array.from(tpl.childNodes).forEach(function(node){ + body.appendChild(node.cloneNode(true)); + }); + } dlg.showModal(); }); })(); From fd6bd421f1077e9c99a85daf53675e8bca404fab Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 30 May 2026 00:19:52 +0500 Subject: [PATCH 068/318] test(security): add trust-boundary invariant sentinels --- tests/test_security_invariants.py | 429 ++++++++++++++++++++++++++++++ 1 file changed, 429 insertions(+) create mode 100644 tests/test_security_invariants.py diff --git a/tests/test_security_invariants.py b/tests/test_security_invariants.py new file mode 100644 index 00000000..2d9476c6 --- /dev/null +++ b/tests/test_security_invariants.py @@ -0,0 +1,429 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +"""Security invariant sentinels for CodeClone trust boundaries. + +These tests lock documented security behavior without changing production +contracts. They complement integration tests in ``test_security.py`` and +surface-specific suites (MCP, scanner, baseline, cache). +""" + +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + +from codeclone.analysis.suppressions import ( + SUPPORTED_RULE_IDS, + extract_suppression_directives, +) +from codeclone.audit.validation import AuditConfigError, resolve_audit_path +from codeclone.cache.integrity import ( + sign_cache_payload, + verify_cache_payload_signature, +) +from codeclone.contracts.errors import ValidationError +from codeclone.report.html.primitives.escape import _escape_html +from codeclone.scanner import iter_py_files, resolved_path_under_root +from codeclone.surfaces.mcp._session_helpers import ( + _normalize_relative_path, + _resolve_optional_path, + _resolve_root, +) +from codeclone.surfaces.mcp.session import MCPServiceContractError +from codeclone.utils.git_diff import validate_git_diff_ref + +_REPO_ROOT = Path(__file__).resolve().parents[1] +_HTML_JS_PATH = _REPO_ROOT / "codeclone" / "report" / "html" / "assets" / "js.py" + + +def _symlink_or_skip(link: Path, target: Path) -> None: + if not hasattr(os, "symlink"): + pytest.skip("symlink is not supported on this platform") + try: + link.symlink_to(target) + except (OSError, NotImplementedError): + pytest.skip("symlink creation is not available in this environment") + + +# ── git diff ref validation (pre-subprocess gate) ───────────────────── + + +@pytest.mark.parametrize( + "ref", + [ + "HEAD", + "main", + "origin/main", + "v1.2.3", + "abc1234", + "HEAD~1", + "main^", + "release@{1}", + "abc..def", + ], +) +def test_validate_git_diff_ref_accepts_safe_revision_expressions(ref: str) -> None: + assert validate_git_diff_ref(ref) == ref + + +@pytest.mark.parametrize( + "ref", + [ + "", + " ", + " HEAD", + "HEAD ", + "HEAD\n", + "--cached", + "-", + "./main", + "../main", + "main;rm", + "main$(whoami)", + "main`id`", + "main|cat", + ], +) +def test_validate_git_diff_ref_rejects_unsafe_revision_expressions(ref: str) -> None: + with pytest.raises(ValueError, match="Invalid git diff ref"): + validate_git_diff_ref(ref) + + +# ── MCP path normalization and root resolution ─────────────────────── + + +@pytest.mark.parametrize( + ("path", "expected"), + [ + ("src/module.py", "src/module.py"), + ("./src/module.py", "src/module.py"), + ("src/nested/", "src/nested"), + (".", ""), + ], +) +def test_mcp_normalize_relative_path_accepts_in_repo_paths( + path: str, expected: str +) -> None: + assert _normalize_relative_path(path) == expected + + +@pytest.mark.parametrize( + "path", + [ + "../outside.py", + "src/../../outside.py", + "foo/../bar/../../etc/passwd", + ], +) +def test_mcp_normalize_relative_path_rejects_traversal(path: str) -> None: + with pytest.raises(MCPServiceContractError, match="path traversal not allowed"): + _normalize_relative_path(path) + + +def test_mcp_resolve_root_requires_absolute_existing_directory(tmp_path: Path) -> None: + assert _resolve_root(str(tmp_path.resolve())) == tmp_path.resolve() + + with pytest.raises(MCPServiceContractError, match="absolute repository root"): + _resolve_root("relative/path") + + with pytest.raises(MCPServiceContractError, match="absolute repository root"): + _resolve_root("") + + missing = tmp_path / "missing" + with pytest.raises(MCPServiceContractError, match="does not exist"): + _resolve_root(str(missing)) + + file_root = tmp_path / "file.py" + file_root.write_text("x = 1\n", encoding="utf-8") + with pytest.raises(MCPServiceContractError, match="not a directory"): + _resolve_root(str(file_root.resolve())) + + +def test_mcp_resolve_optional_path_keeps_documented_absolute_outside_root( + tmp_path: Path, +) -> None: + """Contract sentinel: optional MCP paths may point outside scan root today.""" + workspace = tmp_path / "workspace" + workspace.mkdir() + outside = tmp_path / "outside-cache.json" + outside.write_text("{}", encoding="utf-8") + + resolved = _resolve_optional_path(str(outside.resolve()), workspace) + assert resolved == outside.resolve() + + inside = workspace / "cache.json" + assert _resolve_optional_path("cache.json", workspace) == inside.resolve() + + +def test_mcp_resolve_optional_path_resolves_relative_under_root(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + nested = workspace / "nested" + nested.mkdir(parents=True) + target = nested / "coverage.xml" + target.write_text("", encoding="utf-8") + + assert _resolve_optional_path("nested/coverage.xml", workspace) == target.resolve() + + +# ── audit path containment (contrast with optional MCP paths) ──────── + + +def test_resolve_audit_path_rejects_absolute_and_traversal(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + + assert ( + resolve_audit_path( + root_path=root, + value=".cache/codeclone/db/audit.sqlite3", + ) + == root / ".cache" / "codeclone" / "db" / "audit.sqlite3" + ) + + with pytest.raises(AuditConfigError, match="relative to the repository root"): + resolve_audit_path(root_path=root, value="/tmp/audit.sqlite3") + + with pytest.raises(AuditConfigError, match="must not contain"): + resolve_audit_path(root_path=root, value="../outside.db") + + with pytest.raises(AuditConfigError, match="must end with"): + resolve_audit_path(root_path=root, value="audit.json") + + +# ── scanner / worker path helpers ──────────────────────────────────── + + +def test_resolved_path_under_root_accepts_in_repo_paths(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + module = workspace / "pkg" / "mod.py" + module.parent.mkdir(parents=True) + module.write_text("x = 1\n", encoding="utf-8") + + resolved = resolved_path_under_root(str(module), str(workspace)) + assert resolved == module.resolve() + + +def test_resolved_path_under_root_rejects_outside_targets(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + outside = tmp_path / "outside" + workspace.mkdir() + outside.mkdir() + link = workspace / "linked.py" + outside_file = outside / "secret.py" + outside_file.write_text("x = 1\n", encoding="utf-8") + _symlink_or_skip(link, outside_file) + + assert resolved_path_under_root(str(link), str(workspace)) is None + + +@pytest.mark.parametrize( + "root", + ["/etc", "/proc", "/var"], +) +def test_iter_py_files_rejects_sensitive_roots(root: str) -> None: + if not Path(root).exists(): + pytest.skip(f"{root} is not available on this platform") + with pytest.raises(ValidationError): + list(iter_py_files(root)) + + +def test_iter_py_files_rejects_non_directory_root(tmp_path: Path) -> None: + file_path = tmp_path / "not-a-dir.py" + file_path.write_text("x = 1\n", encoding="utf-8") + with pytest.raises(ValidationError, match="Root must be a directory"): + list(iter_py_files(str(file_path))) + + +# ── HTML escaping invariants ───────────────────────────────────────── + + +@pytest.mark.parametrize( + ("raw", "expected_fragment"), + [ + ("", "<script>alert(1)</script>"), + ('" onclick="alert(1)', "" onclick="alert(1)"), + ("`backtick`", "`backtick`"), + ("\u2028line sep", "
line sep"), + ("\u2029para sep", "
para sep"), + (None, ""), + ], +) +def test_escape_html_neutralizes_html_metacharacters( + raw: object, expected_fragment: str +) -> None: + escaped = _escape_html(raw) + assert expected_fragment in escaped + if isinstance(raw, str) and "<" in raw: + assert "<" not in escaped + + +def test_html_report_js_avoids_dataset_innerhtml_regression() -> None: + """Regression guard for DOM XSS pattern in clone metrics modal.""" + source = _HTML_JS_PATH.read_text(encoding="utf-8") + assert "dlg.querySelector('#modal-body').innerHTML=items" not in source + assert "body.innerHTML=tpl.innerHTML" not in source + assert "document.importNode(tpl.content" in source + assert "list.className='info-dl'" in source + + +# ── cache integrity (checksum contract; not secret-keyed) ──────────── + + +def test_cache_signature_verification_uses_constant_time_compare() -> None: + payload: dict[str, object] = {"version": "test", "files": {}} + signature = sign_cache_payload(payload) + assert verify_cache_payload_signature(payload, signature) is True + assert verify_cache_payload_signature(payload, "0" * len(signature)) is False + + +def test_cache_signature_is_stable_for_canonical_payload() -> None: + payload: dict[str, object] = {"b": 2, "a": 1, "files": {}} + first = sign_cache_payload(payload) + second = sign_cache_payload({"a": 1, "b": 2, "files": {}}) + assert first == second + + +# ── suppressions: malformed input must not crash extraction ───────── + + +@pytest.mark.parametrize( + "source", + [ + "# codeclone: ignore[dead-code, unknown-rule]\n", + "# codeclone: ignore[not a rule!]\n", + "# codeclone ignore[dead-code]\n", + '"""\n# codeclone: ignore[dead-code]\n', + ], +) +def test_extract_suppression_directives_ignores_malformed_or_unknown_rules( + source: str, +) -> None: + directives = extract_suppression_directives(source) + rule_ids = {rule for directive in directives for rule in directive.rules} + assert rule_ids.issubset(SUPPORTED_RULE_IDS) + + +def test_extract_suppression_directives_accepts_supported_rule_ids() -> None: + source = "# codeclone: ignore[dead-code]\ndef keep():\n return 1\n" + directives = extract_suppression_directives(source) + assert len(directives) == 1 + assert directives[0].rules == ("dead-code",) + + +# ── scanner file-count cap (DoS guard) ─────────────────────────────── + + +def test_iter_py_files_rejects_excessive_file_count(tmp_path: Path) -> None: + for index in range(5): + (tmp_path / f"mod_{index}.py").write_text("x = 1\n", encoding="utf-8") + + assert len(list(iter_py_files(str(tmp_path), max_files=10))) == 5 + + with pytest.raises(ValidationError, match="File count exceeds limit"): + list(iter_py_files(str(tmp_path), max_files=3)) + + +# ── baseline integrity tamper detection ────────────────────────────── + + +def test_baseline_verify_integrity_rejects_tampered_clone_payload( + tmp_path: Path, +) -> None: + """Trusted baseline comparison must fail closed on payload tampering.""" + import json + + import codeclone.baseline as baseline_mod + import codeclone.baseline.clone_baseline as clone_baseline_mod + from codeclone.baseline import Baseline + from codeclone.contracts.errors import BaselineValidationError + + func_id = f"{'a' * 40}|0-19" + block_id = "|".join(["a" * 40, "b" * 40, "c" * 40, "d" * 40]) + payload = clone_baseline_mod._baseline_payload( + functions={func_id}, + blocks={block_id}, + generator="codeclone", + schema_version="2.1", + fingerprint_version="1", + python_tag=baseline_mod.current_python_tag(), + generator_version="2.1.0", + created_at="2026-02-08T11:43:16Z", + ) + baseline_path = tmp_path / "codeclone.baseline.json" + baseline_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), "utf-8") + + baseline = Baseline(baseline_path) + baseline.load() + baseline.verify_integrity() + + clones = payload["clones"] + assert isinstance(clones, dict) + clones["functions"] = [func_id, f"{'b' * 40}|20-39"] + baseline_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), "utf-8") + tampered = Baseline(baseline_path) + tampered.load() + with pytest.raises(BaselineValidationError, match="payload_sha256 mismatch") as exc: + tampered.verify_integrity() + assert exc.value.status == "integrity_failed" + + +# ── workspace intent registry path safety ──────────────────────────── + + +def test_workspace_intent_path_helper_rejects_escape_attempts( + tmp_path: Path, +) -> None: + from codeclone.surfaces.mcp._workspace_intents import ( + _is_safe_intent_path, + intent_path, + registry_dir, + ) + + registry = registry_dir(tmp_path) + registry.mkdir(parents=True, exist_ok=True) + valid = intent_path( + root=tmp_path, + pid=123, + start_epoch=456, + intent_id="intent-aaa-001", + ) + assert _is_safe_intent_path(valid, registry) is True + + assert ( + _is_safe_intent_path( + Path("../outside/123-456-intent-aaa-001.json"), + registry, + ) + is False + ) + + outside = tmp_path / "outside.json" + outside.write_text("{}", encoding="utf-8") + symlink = registry / "123-456-intent-aaa-001.json" + _symlink_or_skip(symlink, outside) + assert _is_safe_intent_path(symlink, registry) is False + + +# ── git diff ref: control characters and injection payloads ────────── + + +@pytest.mark.parametrize( + "ref", + [ + "HEAD\x00", + "main\r\n", + "refs/heads/main;id", + "$(curl attacker)", + "HEAD && git status", + ], +) +def test_validate_git_diff_ref_rejects_control_and_shell_metacharacters( + ref: str, +) -> None: + with pytest.raises(ValueError, match="Invalid git diff ref"): + validate_git_diff_ref(ref) From f85d0f6254e998d5eb5903d1d87b68cad7afdef5 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 30 May 2026 00:19:53 +0500 Subject: [PATCH 069/318] test(security): extend HTML, MCP, and scanner security tests --- tests/test_security.py | 177 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 176 insertions(+), 1 deletion(-) diff --git a/tests/test_security.py b/tests/test_security.py index 18a74c96..aa7139d5 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -17,7 +17,11 @@ from codeclone.core.worker import process_file from codeclone.report.explain import build_block_group_facts from codeclone.report.html import build_html_report -from codeclone.scanner import iter_py_files +from codeclone.report.renderers.markdown import render_markdown_report_document +from codeclone.report.renderers.sarif import render_sarif_report_document +from codeclone.scanner import iter_py_files, resolved_path_under_root +from codeclone.surfaces.mcp.service import CodeCloneMCPService +from codeclone.surfaces.mcp.session import MCPAnalysisRequest, MCPServiceContractError def test_scanner_path_traversal() -> None: @@ -70,6 +74,29 @@ def _huge_stat(path: str, *args: object, **kwargs: object) -> os.stat_result: os.remove(tmp_path) +def test_process_file_rejects_symlink_target_outside_root(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + outside = tmp_path / "outside" + workspace.mkdir() + outside.mkdir() + cfg = NormalizationConfig() + + module = workspace / "module.py" + module.write_text("x = 1\n", encoding="utf-8") + assert process_file(str(module), str(workspace), cfg, 0, 0).success is True + + outside_target = outside / "secret.py" + outside_target.write_text("y = 2\n", encoding="utf-8") + module.unlink() + module.symlink_to(outside_target) + + result = process_file(str(module), str(workspace), cfg, 0, 0) + assert result.success is False + assert result.error_kind == "source_read_error" + assert result.error is not None + assert "outside repository root" in result.error + + def test_html_report_escapes_user_content(tmp_path: Path) -> None: bad_path = tmp_path / 'x" onmouseover="alert(1).py' good_path = tmp_path / "y.py" @@ -105,3 +132,151 @@ def test_html_report_escapes_user_content(tmp_path: Path) -> None: assert 'onmouseover="alert(1)' not in html assert 'data-qualname="<script>alert(1)</script>"' in html assert "" onmouseover="alert(1).py" in html + + +def test_html_report_escapes_title_and_does_not_emit_raw_script(tmp_path: Path) -> None: + module = tmp_path / "mod.py" + module.write_text("def f():\n return 1\n", encoding="utf-8") + payload = "" + html = build_html_report( + func_groups={ + "k": [ + { + "qualname": payload, + "filepath": str(module), + "start_line": 1, + "end_line": 2, + "loc": 2, + } + ] + }, + block_groups={}, + segment_groups={}, + block_group_facts=build_block_group_facts({}), + title=payload, + ) + assert payload not in html + assert "<img src=x onerror=alert(1)>" in html + + +def test_markdown_and_sarif_projections_do_not_emit_raw_html_tags( + tmp_path: Path, +) -> None: + report_payload: dict[str, object] = { + "report_schema_version": "2.11", + "meta": {"generator": {"name": "codeclone", "version": "2.1.0"}}, + "inventory": {"files": 0, "lines": 0, "functions": 0, "classes": 0}, + "findings": { + "groups": { + "clones": {"functions": [], "blocks": [], "segments": []}, + "structural": [], + "design": [], + } + }, + "summary": {}, + "metrics": {}, + } + markdown = render_markdown_report_document(report_payload) + sarif = render_sarif_report_document(report_payload) + assert "" + html = build_html_report( + func_groups={ + "k": [ + { + "qualname": payload, + "filepath": str(module), + "start_line": 1, + "end_line": 2, + "loc": 2, + } + ] + }, + block_groups={}, + segment_groups={}, + block_group_facts=build_block_group_facts({}), + ) + assert payload not in html + assert "', + workspaceName: "demo", + nonce: "abc123", + result: { + response: { + mode: "search", + detail_level: "compact", + semantic: {used: false, reason: "disabled"}, + payload: { + records: [ + { + id: "mem-30febd83c0b14c0f9f0e2a1b3c4d5e6f", + type: "change_rationale", + status: "draft", + confidence: "medium", + statement: "Test ", + }, + ], + record_count: 1, + truncated: false, + retrieval_policy: {drafts_included: false}, + }, + }, + }, + }); + assert.doesNotMatch(html, /', + "nonce-abc" + ); + assert.match(html, /style-src 'nonce-nonce-abc'/); + assert.match(html, /Workspace Session Stats/); + assert.doesNotMatch(html, /", duration_ms=1.0, status="ok" + ) + op = OperationView( + operation_id="o", + correlation_id="o", + surface="cli", + name="a&b", + started_at_utc="t", + duration_ms=1.0, + status="ok", + spans=(span,), + ) + trace = TraceView( + schema_version="1.0", + window_started_at_utc="t", + window_ended_at_utc="t", + aggregates=AggregatesView(operation_count=1, slowest=(op,)), + operation_tree=(op,), + ) + html = render_trace_html(trace) + assert "" not in html + assert "<script>" in html + assert "a&b" in html + + +def test_observability_main_writes_json_and_html(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="A", + correlation_id="A", + surface="cli", + name="cli.analyze", + started_at_utc="2026-06-10T04:00:00Z", + duration_ms=285.0, + status="ok", + spans=( + SpanRecord( + span_id="s", + operation_id="A", + name="pipeline.analyze", + started_at_utc="2026-06-10T04:00:00Z", + duration_ms=188.0, + status="ok", + ), + ), + ), + ) + finally: + conn.close() + json_path = tmp_path / "trace.json" + html_path = tmp_path / "trace.html" + code = observability_main( + [ + "trace", + "--root", + str(tmp_path), + "--json", + str(json_path), + "--html", + str(html_path), + ] + ) + assert code == 0 + payload = json.loads(json_path.read_text(encoding="utf-8")) + assert payload["operation_tree"][0]["name"] == "cli.analyze" + assert "Platform Observability" in html_path.read_text(encoding="utf-8") + + +def test_observability_main_no_store( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + code = observability_main(["trace", "--root", str(tmp_path)]) + assert code == 0 + assert "No observability store" in capsys.readouterr().out From 67955e0fb4872a47296c5fdee78963bb3848ab37 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 10 Jun 2026 09:57:40 +0500 Subject: [PATCH 223/318] feat(core): correlate finish->worker across processes via env handoff --- codeclone/memory/jobs/spawn.py | 19 ++++ codeclone/memory/jobs/worker.py | 20 +++- codeclone/observability/__init__.py | 2 + codeclone/observability/runtime.py | 11 +++ tests/test_observability_correlation.py | 120 ++++++++++++++++++++++++ 5 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 tests/test_observability_correlation.py diff --git a/codeclone/memory/jobs/spawn.py b/codeclone/memory/jobs/spawn.py index 433d9a56..3607b95f 100644 --- a/codeclone/memory/jobs/spawn.py +++ b/codeclone/memory/jobs/spawn.py @@ -6,11 +6,14 @@ from __future__ import annotations +import os import subprocess import sys from dataclasses import dataclass from pathlib import Path +from ...observability import current_operation_context + @dataclass(frozen=True, slots=True) class SpawnWorkerResult: @@ -19,6 +22,21 @@ class SpawnWorkerResult: pid: int | None +def _worker_env() -> dict[str, str] | None: + """Subprocess env carrying the observability correlation handoff, or ``None`` + to inherit the parent environment unchanged (no active operation). + """ + context = current_operation_context() + if context is None: + return None + operation_id, correlation_id = context + return { + **os.environ, + "CODECLONE_OBSERVABILITY_CORRELATION_ID": correlation_id, + "CODECLONE_OBSERVABILITY_PARENT_OPERATION_ID": operation_id, + } + + def spawn_projection_jobs_worker(*, root_path: Path) -> SpawnWorkerResult: root = root_path.resolve() argv = [ @@ -35,6 +53,7 @@ def spawn_projection_jobs_worker(*, root_path: Path) -> SpawnWorkerResult: proc = subprocess.Popen( argv, cwd=root, + env=_worker_env(), start_new_session=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, diff --git a/codeclone/memory/jobs/worker.py b/codeclone/memory/jobs/worker.py index 31a2e95d..a4d07cce 100644 --- a/codeclone/memory/jobs/worker.py +++ b/codeclone/memory/jobs/worker.py @@ -6,6 +6,7 @@ from __future__ import annotations +import os import sqlite3 from collections.abc import Mapping from dataclasses import dataclass @@ -102,6 +103,17 @@ def _trajectory_reason_kind( return "first_index" +def _correlation_handoff() -> tuple[str | None, str | None]: + """Read the cross-process observability handoff the spawner injected, so the + worker operation links under the finish operation that triggered it. Returns + ``(correlation_id, parent_operation_id)``, both None for a standalone run. + """ + return ( + os.environ.get("CODECLONE_OBSERVABILITY_CORRELATION_ID") or None, + os.environ.get("CODECLONE_OBSERVABILITY_PARENT_OPERATION_ID") or None, + ) + + def run_projection_job( conn: sqlite3.Connection, *, @@ -111,7 +123,13 @@ def run_projection_job( project: MemoryProject, stimulus: Mapping[str, object], ) -> tuple[ProjectionJobStatus, dict[str, object], str | None]: - with operation(name="memory.projection.job", surface="memory"): + correlation_id, parent_operation_id = _correlation_handoff() + with operation( + name="memory.projection.job", + surface="memory", + correlation_id=correlation_id, + parent_operation_id=parent_operation_id, + ): watermark = _trajectory_incremental_watermark(conn, project_id=project.id) with span( name="memory.trajectory.rebuild", diff --git a/codeclone/observability/__init__.py b/codeclone/observability/__init__.py index 7078ff50..806f8daa 100644 --- a/codeclone/observability/__init__.py +++ b/codeclone/observability/__init__.py @@ -19,6 +19,7 @@ SpanHandle, bind_root, bootstrap, + current_operation_context, is_observability_enabled, operation, payload_capture_enabled, @@ -31,6 +32,7 @@ "SpanHandle", "bind_root", "bootstrap", + "current_operation_context", "is_observability_enabled", "operation", "payload_capture_enabled", diff --git a/codeclone/observability/runtime.py b/codeclone/observability/runtime.py index 618cda9a..0694a77d 100644 --- a/codeclone/observability/runtime.py +++ b/codeclone/observability/runtime.py @@ -275,6 +275,16 @@ def is_observability_enabled() -> bool: return _ENABLED +def current_operation_context() -> tuple[str, str] | None: + """Return ``(operation_id, correlation_id)`` of the active operation for + cross-process handoff, or ``None`` when disabled or outside an operation. + """ + op = _CURRENT_OP.get() + if op is None or not op.operation_id: + return None + return op.operation_id, op.correlation_id + + def bind_root(root: Path) -> None: """Bind the store to ``root`` if the active runtime has none yet (no-op when disabled). Lets a root-less MCP-server session open its store on the first @@ -402,6 +412,7 @@ def span( "SpanHandle", "bind_root", "bootstrap", + "current_operation_context", "is_observability_enabled", "operation", "payload_capture_enabled", diff --git a/tests/test_observability_correlation.py b/tests/test_observability_correlation.py new file mode 100644 index 00000000..fcba7e0a --- /dev/null +++ b/tests/test_observability_correlation.py @@ -0,0 +1,120 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import subprocess +from collections.abc import Iterator +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +import codeclone.memory.jobs.spawn as spawn +import codeclone.memory.jobs.worker as worker +from codeclone.config.observability import ObservabilityConfig +from codeclone.observability import ( + bootstrap, + current_operation_context, + operation, + shutdown, +) +from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, +) + + +@pytest.fixture(autouse=True) +def _reset_runtime() -> Iterator[None]: + yield + shutdown() + + +def test_current_operation_context(tmp_path: Path) -> None: + assert current_operation_context() is None # disabled + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + assert current_operation_context() is None # enabled, outside an operation + with operation(name="finish", surface="mcp", correlation_id="corr-A") as op: + assert current_operation_context() == (op.operation_id, "corr-A") + + +def test_run_projection_job_links_under_finish( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setenv("CODECLONE_OBSERVABILITY_CORRELATION_ID", "A-corr") + monkeypatch.setenv("CODECLONE_OBSERVABILITY_PARENT_OPERATION_ID", "A-op") + monkeypatch.setattr( + worker, "execute_trajectory_rebuild", lambda **_k: {"status": "ok"} + ) + monkeypatch.setattr( + worker, "execute_semantic_index_rebuild", lambda **_k: {"status": "ok"} + ) + monkeypatch.setattr( + worker, "execute_experience_distillation", lambda **_k: {"status": "ok"} + ) + + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + worker.run_projection_job( + MagicMock(), + job_id="j1", + root_path=tmp_path, + config=MagicMock(), + project=MagicMock(), + stimulus={}, + ) + shutdown() + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + row = conn.execute( + "SELECT name, correlation_id, parent_operation_id FROM platform_operations" + ).fetchone() + finally: + conn.close() + assert row == ("memory.projection.job", "A-corr", "A-op") + + +def test_spawn_injects_correlation_env( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + captured: dict[str, object] = {} + + def _fake_popen(argv: object, **kwargs: object) -> object: + captured["env"] = kwargs.get("env") + proc = MagicMock() + proc.pid = 4321 + return proc + + monkeypatch.setattr(subprocess, "Popen", _fake_popen) + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + with operation( + name="finish_controlled_change", surface="mcp", correlation_id="A" + ) as op: + result = spawn.spawn_projection_jobs_worker(root_path=tmp_path) + + assert result.spawned is True + env = captured["env"] + assert isinstance(env, dict) + assert env["CODECLONE_OBSERVABILITY_CORRELATION_ID"] == "A" + assert env["CODECLONE_OBSERVABILITY_PARENT_OPERATION_ID"] == op.operation_id + + +def test_spawn_without_operation_inherits_env( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + captured: dict[str, object] = {} + + def _fake_popen(argv: object, **kwargs: object) -> object: + captured["env"] = kwargs.get("env") + proc = MagicMock() + proc.pid = 1 + return proc + + monkeypatch.setattr(subprocess, "Popen", _fake_popen) + # Observability disabled -> no active operation -> inherit parent env. + spawn.spawn_projection_jobs_worker(root_path=tmp_path) + assert captured["env"] is None From c599ba0cfbc1a45b414533e1672e84c4bb791cc8 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 10 Jun 2026 10:08:57 +0500 Subject: [PATCH 224/318] refactor(html): align observability trace report into a grid layout --- codeclone/observability/render_html.py | 187 ++++++++++++++----------- 1 file changed, 104 insertions(+), 83 deletions(-) diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index 413ab522..cd366d63 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -9,6 +9,8 @@ Self-contained single page: the CodeClone brand logo + brand tokens (Inter / JetBrains Mono / oklch indigo, auto dark-light), a focused embedded stylesheet, and inline SVG bars — no external assets, no ``report`` import, no JS required. +The trace is a column-aligned grid: names, bars, durations and metrics line up +across every row, and an operation's child operations nest under it. """ from __future__ import annotations @@ -35,88 +37,100 @@ :root{ --bg:oklch(15% 0.018 275);--surface:oklch(20% 0.022 275); --surface-2:oklch(24% 0.026 275);--border:oklch(31% 0.034 275); ---text:oklch(96% 0.010 275);--dim:oklch(74% 0.028 275);--mute:oklch(58% 0.030 275); ---accent:#818cf8;--track:oklch(28% 0.02 275); ---warn:#f59e0b;--ok:#34d399; +--text:oklch(96% 0.010 275);--dim:oklch(74% 0.028 275);--mute:oklch(56% 0.028 275); +--accent:#818cf8;--accent-soft:color-mix(in oklch,#818cf8 30%,transparent); +--track:oklch(28% 0.02 275);--warn:#f59e0b; --mcp:#818cf8;--cli:#2dd4bf;--memory:#fbbf24; --font:"Inter","Inter Variable",-apple-system,BlinkMacSystemFont,"Segoe UI", Roboto,sans-serif; --mono:"JetBrains Mono",ui-monospace,SFMono-Regular,Menlo,Consolas,monospace; ---r:10px; } @media (prefers-color-scheme:light){:root{ ---bg:oklch(98.5% 0.006 275);--surface:#fff;--surface-2:oklch(97.5% 0.006 275); +--bg:oklch(98.5% 0.006 275);--surface:#fff;--surface-2:oklch(97.3% 0.006 275); --border:oklch(89% 0.018 275);--text:oklch(24% 0.040 275); ---dim:oklch(44% 0.046 275);--mute:oklch(56% 0.040 275); ---accent:#4f46e5;--track:oklch(92% 0.012 275);--mcp:#4f46e5; ---cli:#0d9488;--memory:#b45309; +--dim:oklch(44% 0.046 275);--mute:oklch(55% 0.040 275); +--accent:#4f46e5;--accent-soft:color-mix(in oklch,#4f46e5 28%,transparent); +--track:oklch(92% 0.012 275);--mcp:#4f46e5;--cli:#0d9488;--memory:#b45309; }} html{-webkit-text-size-adjust:100%} body{background:var(--bg);color:var(--text);font-family:var(--font); -font-size:14px;line-height:1.55;-webkit-font-smoothing:antialiased; +font-size:14px;line-height:1.5;-webkit-font-smoothing:antialiased; padding:34px 20px 80px} -.wrap{max-width:980px;margin:0 auto} -a{color:var(--accent)} -.head{display:flex;align-items:center;gap:14px;margin-bottom:6px} +.wrap{max-width:1000px;margin:0 auto} +.head{display:flex;align-items:center;gap:13px;margin-bottom:5px} .logo{flex-shrink:0} -h1{font-size:20px;font-weight:650;letter-spacing:-0.01em} -.sub{color:var(--dim);font-size:13px;margin:0 0 26px 44px; -font-family:var(--mono)} +h1{font-size:20px;font-weight:600;letter-spacing:-0.01em} +.sub{color:var(--dim);font-size:12.5px;margin:0 0 28px 43px;font-family:var(--mono)} .sub b{color:var(--text);font-weight:550} -.grid{display:grid;grid-template-columns:repeat(4,1fr);gap:12px; -margin-bottom:30px} +.grid{display:grid;grid-template-columns:repeat(4,1fr);gap:12px;margin-bottom:32px} .card{background:var(--surface);border:1px solid var(--border); -border-radius:var(--r);padding:15px 16px} -.card .v{font-size:24px;font-weight:650;letter-spacing:-0.02em; -font-family:var(--mono)} -.card .l{color:var(--mute);font-size:11px;text-transform:uppercase; -letter-spacing:0.06em;margin-top:3px} +border-radius:10px;padding:14px 16px} +.card .v{font-size:23px;font-weight:600;letter-spacing:-0.02em;font-family:var(--mono)} +.card .l{color:var(--mute);font-size:10.5px;text-transform:uppercase; +letter-spacing:0.07em;margin-top:4px} .card.warn .v{color:var(--warn)} .card.accent .v{color:var(--accent)} -h2{font-size:12px;text-transform:uppercase;letter-spacing:0.07em; -color:var(--mute);font-weight:600;margin:0 0 11px 2px} -section{margin-bottom:28px} +h2{font-size:11px;text-transform:uppercase;letter-spacing:0.08em; +color:var(--mute);font-weight:600;margin:0 0 10px 2px} +section{margin-bottom:30px} .panel{background:var(--surface);border:1px solid var(--border); -border-radius:var(--r);overflow:hidden} -.row{display:flex;align-items:center;gap:11px;padding:9px 15px; -border-top:1px solid var(--border)} -.row:first-child{border-top:none} -.badge{font-size:10.5px;font-weight:600;font-family:var(--mono); -padding:2px 7px;border-radius:5px;text-transform:uppercase; -letter-spacing:0.03em;flex-shrink:0; -background:color-mix(in oklch,var(--c,var(--accent)) 16%,transparent); +border-radius:10px;overflow:hidden} +.badge{font-size:10px;font-weight:600;font-family:var(--mono);padding:2px 6px; +border-radius:5px;text-transform:uppercase;letter-spacing:0.03em; +justify-self:start; +background:color-mix(in oklch,var(--c,var(--accent)) 15%,transparent); color:var(--c,var(--accent))} .surf-mcp{--c:var(--mcp)}.surf-cli{--c:var(--cli)}.surf-memory{--c:var(--memory)} -.name{font-family:var(--mono);font-size:13px;flex:1;min-width:0; +.name{font-family:var(--mono);font-size:12.5px;min-width:0; overflow:hidden;text-overflow:ellipsis;white-space:nowrap} -.num{font-family:var(--mono);font-size:12.5px;color:var(--dim); -flex-shrink:0;text-align:right} -.bar{flex-shrink:0;display:block} +.dur{font-family:var(--mono);font-size:12.5px;text-align:right; +white-space:nowrap} .rss{font-family:var(--mono);font-size:11.5px;color:var(--warn); -flex-shrink:0;font-weight:550} -.chip{font-size:11px;font-family:var(--mono);padding:1px 7px;border-radius:20px; -background:var(--surface-2);color:var(--dim);flex-shrink:0; -border:1px solid var(--border)} -.chip.unknown{color:var(--warn); -background:color-mix(in oklch,var(--warn) 12%,transparent);border-color:transparent} -.kv{font-family:var(--mono);font-size:11px;color:var(--mute);margin-right:9px} -.kv b{color:var(--dim);font-weight:550} -.counters{flex-basis:100%;padding-left:1px;margin-top:-2px} -table{width:100%;border-collapse:collapse;font-size:13px} -th{text-align:left;padding:9px 15px;color:var(--mute);font-size:11px; -text-transform:uppercase;letter-spacing:0.05em;border-bottom:1px solid var(--border)} -td{padding:8px 15px;border-top:1px solid var(--border);font-family:var(--mono)} +text-align:right;white-space:nowrap;font-weight:550} +.bar{display:block;width:100%;height:7px} +.chip{font-size:10.5px;font-family:var(--mono);padding:1px 7px;border-radius:20px; +background:var(--surface-2);color:var(--dim);border:1px solid var(--border); +white-space:nowrap} +.chip.unknown{color:var(--warn);border-color:transparent; +background:color-mix(in oklch,var(--warn) 13%,transparent)} +.slow{display:grid; +grid-template-columns:58px minmax(0,1fr) 150px 56px 78px; +align-items:center;column-gap:13px;padding:9px 16px; +border-top:1px solid var(--border)} +.slow:first-child{border-top:none} +.slow .name{color:var(--text)} +.slow .dur{color:var(--dim)} +table{width:100%;border-collapse:collapse;font-size:12.5px} +th{text-align:left;padding:9px 16px;color:var(--mute);font-size:10.5px; +text-transform:uppercase;letter-spacing:0.05em; +border-bottom:1px solid var(--border)} +td{padding:8px 16px;border-top:1px solid var(--border);font-family:var(--mono)} td.t{font-family:var(--font)} th.r,td.r{text-align:right} -.tree{padding:6px 4px} -.op{margin:3px 0;border-radius:8px} -.op-head{display:flex;align-items:center;gap:11px;padding:8px 11px; -background:var(--surface-2);border-radius:8px;border:1px solid var(--border)} -.op>.span,.op>.op{margin-left:18px} -.span{display:flex;align-items:center;gap:11px;padding:6px 11px;flex-wrap:wrap} -.span .name{font-size:12.5px;color:var(--dim);flex:0 0 188px} -.empty{padding:26px;text-align:center;color:var(--mute);font-size:13px} -.foot{margin-top:34px;color:var(--mute);font-size:11.5px;text-align:center; +.tree{padding:8px} +.op{border:1px solid var(--border);border-radius:9px;overflow:hidden; +margin:7px 0;background:var(--surface)} +.op:first-child{margin-top:0} +.op .op{margin:8px 10px 10px 22px;border-left:2px solid var(--accent-soft)} +.op-head{display:flex;align-items:center;gap:10px;padding:9px 13px; +background:var(--surface-2)} +.op-head .name{flex:1;font-size:13px;font-weight:550;color:var(--text)} +.op-head .pay{font-family:var(--mono);font-size:11px;color:var(--mute); +white-space:nowrap} +.spans{padding:3px 0 5px} +.span{display:grid; +grid-template-columns:minmax(0,1fr) 150px 56px minmax(120px,0.9fr); +align-items:center;column-gap:13px;row-gap:1px;padding:4px 14px 4px 16px} +.span .name{grid-column:1;grid-row:1;color:var(--dim)} +.span .bar{grid-column:2;grid-row:1} +.span .dur{grid-column:3;grid-row:1;color:var(--dim)} +.span .smeta{grid-column:4;grid-row:1;display:flex;align-items:center; +gap:8px;min-width:0;overflow:hidden} +.span .counters{grid-column:2/-1;grid-row:2;font-family:var(--mono); +font-size:10.5px;color:var(--mute);display:flex;flex-wrap:wrap;gap:0 15px} +.kv b{color:var(--dim);font-weight:550;margin-right:4px} +.empty{padding:28px;text-align:center;color:var(--mute);font-size:13px} +.foot{margin-top:36px;color:var(--mute);font-size:11px;text-align:center; font-family:var(--mono)} """ @@ -144,14 +158,13 @@ def _bytes(value: int | None) -> str: def _bar(value: float, maximum: float, *, color: str = "var(--accent)") -> str: - width = 150 frac = value / maximum if maximum > 0 else 0.0 - fill = max(2.0, round(frac * width, 1)) + fill = max(1.5, round(frac * 100, 1)) return ( - f'' + '' + '' + f'' ) @@ -174,7 +187,7 @@ def _counters(counters: Mapping[str, int]) -> str: if not counters: return "" items = "".join( - f'{_esc(key)} {value}' + f"{_esc(key)}{value}" for key, value in sorted(counters.items()) ) return f'{items}' @@ -210,13 +223,12 @@ def _stat(value: str, label: str, variant: str = "") -> str: def _stats(agg: AggregatesView) -> str: - rss = _mb(agg.max_rss_delta_mb) unknown_variant = "warn" if agg.unknown_expensive_rebuild_count else "" anomaly_variant = "warn" if agg.anomaly_count else "" return ( '
    ' + _stat(str(agg.operation_count), "operations", "accent") - + _stat(rss, "peak rss Δ") + + _stat(_mb(agg.max_rss_delta_mb), "peak rss Δ") + _stat( str(agg.unknown_expensive_rebuild_count), "unknown heavy", @@ -232,10 +244,10 @@ def _slowest(agg: AggregatesView) -> str: return "" top = agg.slowest[0].duration_ms or 1.0 rows = "".join( - f'
    {_surface_badge(op.surface)}' - f'{_esc(op.name)}' - f"{_bar(op.duration_ms, top)}" - f'{_ms(op.duration_ms)}{_rss(op.rss_delta_mb)}
    ' + f'
    {_surface_badge(op.surface)}' + f'{_esc(op.name)}{_bar(op.duration_ms, top)}' + f'{_ms(op.duration_ms)}' + f'{_rss_value(op.rss_delta_mb)}
    ' for op in agg.slowest ) return ( @@ -243,6 +255,10 @@ def _slowest(agg: AggregatesView) -> str: ) +def _rss_value(value: float | None) -> str: + return "" if value is None or value < 0.05 else f"Δ{value:.1f} MB" + + def _mcp(tools: tuple[McpToolAggregate, ...]) -> str: if not tools: return "" @@ -263,37 +279,42 @@ def _mcp(tools: tuple[McpToolAggregate, ...]) -> str: def _payload(op: OperationView) -> str: - if op.response_bytes is None and op.request_bytes is None: - return "" parts = [] if op.request_bytes is not None: parts.append(f"↑{_bytes(op.request_bytes)}") if op.response_bytes is not None: parts.append(f"↓{_bytes(op.response_bytes)}") - return f'{" ".join(parts)}' + return f'{" ".join(parts)}' if parts else "" def _span_row(span: SpanView, op_duration: float) -> str: color = "var(--warn)" if span.reason_kind == "unknown" else "var(--cli)" + meta = _reason_chip(span.reason_kind) + _rss(span.rss_delta_mb) return ( f'
    {_esc(span.name)}' f"{_bar(span.duration_ms, op_duration, color=color)}" - f'{_ms(span.duration_ms)}' - f"{_reason_chip(span.reason_kind)}{_rss(span.rss_delta_mb)}" + f'{_ms(span.duration_ms)}' + f'{meta}' f"{_counters(span.counters)}
    " ) def _op_card(op: OperationView) -> str: op_duration = op.duration_ms or 1.0 - spans = "".join(_span_row(span, op_duration) for span in op.spans) - children = "".join(_op_card(child) for child in op.children) - return ( - f'
    {_surface_badge(op.surface)}' + head = ( + f'
    {_surface_badge(op.surface)}' f'{_esc(op.name)}' - f'{_ms(op.duration_ms)}' - f"{_rss(op.rss_delta_mb)}{_payload(op)}
    {spans}{children}
    " + f'{_ms(op.duration_ms)}' + f"{_rss(op.rss_delta_mb)}{_payload(op)}
    " + ) + spans = ( + f'
    {"".join(_span_row(s, op_duration) for s in op.spans)}' + "
    " + if op.spans + else "" ) + children = "".join(_op_card(child) for child in op.children) + return f'
    {head}{spans}{children}
    ' def _tree(trace: TraceView) -> str: From 82b42f8a42c7754e1bbef94848f082a83e0bd9ad Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 10 Jun 2026 23:34:44 +0500 Subject: [PATCH 225/318] feat(core): observability cost views and waterfall read model Extend the Phase 29 TraceView read model for the diagnosis cockpit: - SpanCostView with a deterministic no_op flag (productive counters present and summing to zero), AggregatesView.slowest_span / semantic_costs, and McpToolAggregate.p95_request_bytes / p95_response_tokens. - WaterfallGroup / WaterfallRow: one self-contained timeline per causal chain, offsets relative to the group start so an idle window never crushes the bars. SpanView gains started_at_utc for time positioning. Dataclasses are extended by appending defaulted fields only (render tests build McpToolAggregate positionally). --- codeclone/observability/store/reader.py | 138 +++++++++++++++++++++++- codeclone/observability/views.py | 60 +++++++++++ tests/test_observability_reader.py | 90 ++++++++++++++++ 3 files changed, 287 insertions(+), 1 deletion(-) diff --git a/codeclone/observability/store/reader.py b/codeclone/observability/store/reader.py index 41d1ad41..7accf597 100644 --- a/codeclone/observability/store/reader.py +++ b/codeclone/observability/store/reader.py @@ -14,6 +14,7 @@ import sqlite3 from collections import defaultdict +from datetime import datetime from pathlib import Path from typing import cast @@ -24,13 +25,21 @@ AggregatesView, McpToolAggregate, OperationView, + SpanCostView, SpanView, TraceView, + WaterfallGroup, + WaterfallRow, ) from .schema import observability_store_path _DEFAULT_WINDOW = 20 +# Counters whose presence marks a span as *meant* to do productive work; when +# they are all present-and-zero the span ran but touched nothing (a no-op). +_PRODUCTIVE_COUNTER_KEYS = ("embedded", "workflows_seen", "experiences_distilled") +_SEMANTIC_COST_LIMIT = 8 + def open_observability_store_readonly(root: Path) -> sqlite3.Connection | None: """Open the store read-only, or None when it does not exist yet.""" @@ -71,6 +80,7 @@ def _span_view(row: sqlite3.Row) -> SpanView: dedupe_key=row["dedupe_key"], counters=_parse_counters(row["counters_json"]), rss_delta_mb=row["rss_delta_mb"], + started_at_utc=str(row["started_at_utc"]), ) @@ -195,6 +205,28 @@ def build(operation_id: str) -> OperationView: return tuple(build(root) for root in sorted(children_ids[None], key=_order)) +def _span_cost_view(op: OperationView, span: SpanView) -> SpanCostView: + """Flatten a span with its owning operation's identity and classify whether + it did productive work (see ``SpanCostView.no_op``).""" + productive = [ + span.counters[key] for key in _PRODUCTIVE_COUNTER_KEYS if key in span.counters + ] + produced = sum(productive) + return SpanCostView( + span_id=span.span_id, + name=span.name, + surface=op.surface, + operation_id=op.operation_id, + operation_name=op.name, + duration_ms=span.duration_ms, + reason_kind=span.reason_kind, + rss_delta_mb=span.rss_delta_mb, + produced=produced, + skipped=int(span.counters.get("skipped_unchanged", 0)), + no_op=bool(productive) and produced == 0, + ) + + def _mcp_tool_aggregates(flat: list[OperationView]) -> tuple[McpToolAggregate, ...]: by_name: dict[str, list[OperationView]] = defaultdict(list) for view in flat: @@ -204,9 +236,15 @@ def _mcp_tool_aggregates(flat: list[OperationView]) -> tuple[McpToolAggregate, . for name in sorted(by_name): ops = by_name[name] durations = [op.duration_ms for op in ops] + requests = [ + float(op.request_bytes) for op in ops if op.request_bytes is not None + ] responses = [ float(op.response_bytes) for op in ops if op.response_bytes is not None ] + response_tokens = [ + float(op.response_tokens) for op in ops if op.response_tokens is not None + ] aggregates.append( McpToolAggregate( name=name, @@ -214,6 +252,8 @@ def _mcp_tool_aggregates(flat: list[OperationView]) -> tuple[McpToolAggregate, . p50_duration_ms=_percentile(durations, 0.5), p95_duration_ms=_percentile(durations, 0.95), p95_response_bytes=int(_percentile(responses, 0.95)), + p95_request_bytes=int(_percentile(requests, 0.95)), + p95_response_tokens=int(_percentile(response_tokens, 0.95)), ) ) return tuple(aggregates) @@ -242,6 +282,11 @@ def _aggregates( for span in spans if span.reason_kind == "unknown" ) + span_costs = sorted( + (_span_cost_view(op, span) for op in flat for span in op.spans), + key=lambda s: (-s.duration_ms, s.operation_id, s.span_id), + ) + semantic_costs = tuple(s for s in span_costs if s.surface == "memory") return AggregatesView( operation_count=len(flat), slowest=slowest, @@ -250,9 +295,98 @@ def _aggregates( anomaly_count=0, unknown_expensive_rebuild_count=unknown, mcp_tools=_mcp_tool_aggregates(flat), + slowest_span=span_costs[0] if span_costs else None, + semantic_costs=semantic_costs[:_SEMANTIC_COST_LIMIT], ) +def _epoch_ms(iso: str) -> float: + """Parse a store timestamp to epoch milliseconds (0.0 when absent/unparsable).""" + if not iso: + return 0.0 + try: + return datetime.fromisoformat(iso.replace("Z", "+00:00")).timestamp() * 1000.0 + except ValueError: + return 0.0 + + +def _wf_row( + *, + label: str, + surface: str, + kind: str, + depth: int, + start_iso: str, + duration_ms: float, + base_ms: float, + reason_kind: str | None = None, + status: str = "ok", +) -> WaterfallRow: + return WaterfallRow( + label=label, + surface=surface, + kind=kind, + depth=depth, + offset_ms=max(0.0, _epoch_ms(start_iso) - base_ms), + duration_ms=duration_ms, + reason_kind=reason_kind, + status=status, + ) + + +def _waterfall_rows( + op: OperationView, depth: int, base_ms: float +) -> list[WaterfallRow]: + rows = [ + _wf_row( + label=op.name, + surface=op.surface, + kind="operation", + depth=depth, + start_iso=op.started_at_utc, + duration_ms=op.duration_ms, + base_ms=base_ms, + status=op.status, + ) + ] + rows.extend( + _wf_row( + label=span.name, + surface=op.surface, + kind="span", + depth=depth + 1, + start_iso=span.started_at_utc, + duration_ms=span.duration_ms, + base_ms=base_ms, + reason_kind=span.reason_kind, + status=span.status, + ) + for span in op.spans + ) + for child in op.children: + rows.extend(_waterfall_rows(child, depth + 1, base_ms)) + return rows + + +def _waterfall_groups(tree: tuple[OperationView, ...]) -> tuple[WaterfallGroup, ...]: + """One self-contained timeline per causal chain (tree root); offsets are + relative to that root's start so a long-idle window never crushes the bars.""" + groups: list[WaterfallGroup] = [] + for root in tree: + base_ms = _epoch_ms(root.started_at_utc) + rows = tuple(_waterfall_rows(root, 0, base_ms)) + span_ms = max((row.offset_ms + row.duration_ms for row in rows), default=0.0) + groups.append( + WaterfallGroup( + correlation_id=root.correlation_id, + started_at_utc=root.started_at_utc, + duration_ms=span_ms, + rows=rows, + ) + ) + return tuple(groups) + + def build_trace_view( conn: sqlite3.Connection, *, @@ -276,14 +410,16 @@ def build_trace_view( ] by_id = {view.operation_id: view for view in flat} starts = [str(row["started_at_utc"]) for row in rows] + operation_tree = _build_forest(rows, spans_by_op) return TraceView( schema_version=PLATFORM_OBSERVABILITY_SCHEMA_VERSION, window_started_at_utc=min(starts) if starts else "", window_ended_at_utc=max(starts) if starts else "", aggregates=_aggregates(flat, spans_by_op), focus_operation=by_id.get(focus_id) if focus_id is not None else None, - operation_tree=_build_forest(rows, spans_by_op), + operation_tree=operation_tree, correlated_operations=tuple(flat), + waterfall=_waterfall_groups(operation_tree), ) diff --git a/codeclone/observability/views.py b/codeclone/observability/views.py index 7dd96a6f..1f7daf9b 100644 --- a/codeclone/observability/views.py +++ b/codeclone/observability/views.py @@ -28,6 +28,30 @@ class SpanView: dedupe_key: str | None = None counters: Mapping[str, int] = field(default_factory=dict) rss_delta_mb: float | None = None + started_at_utc: str = "" + + +@dataclass(frozen=True, slots=True) +class SpanCostView: + """A span flattened with its owning operation's identity, for the cockpit + cost views (slowest-span highlight, semantic/memory cost table). + + ``no_op`` is the deterministic answer to "did this span do productive work?": + true when the span declares productive counters and they sum to zero — a + rebuild/reindex that touched nothing yet still spent wall time and memory. + """ + + span_id: str + name: str + surface: str + operation_id: str + operation_name: str + duration_ms: float + reason_kind: str | None = None + rss_delta_mb: float | None = None + produced: int = 0 + skipped: int = 0 + no_op: bool = False @dataclass(frozen=True, slots=True) @@ -57,6 +81,8 @@ class McpToolAggregate: p50_duration_ms: float p95_duration_ms: float p95_response_bytes: int + p95_request_bytes: int = 0 + p95_response_tokens: int = 0 @dataclass(frozen=True, slots=True) @@ -68,6 +94,36 @@ class AggregatesView: anomaly_count: int = 0 unknown_expensive_rebuild_count: int = 0 mcp_tools: tuple[McpToolAggregate, ...] = () + slowest_span: SpanCostView | None = None + semantic_costs: tuple[SpanCostView, ...] = () + + +@dataclass(frozen=True, slots=True) +class WaterfallRow: + """One time-positioned bar in a waterfall: a span or operation placed at + ``offset_ms`` after its group's start, ``duration_ms`` wide. ``depth`` nests + spans under their operation and child operations under their parent.""" + + label: str + surface: str + kind: str # "operation" | "span" + depth: int + offset_ms: float + duration_ms: float + reason_kind: str | None = None + status: str = "ok" + + +@dataclass(frozen=True, slots=True) +class WaterfallGroup: + """One correlated causal chain rendered as a self-contained timeline; every + row's ``offset_ms`` is relative to ``started_at_utc`` and bounded by + ``duration_ms`` (the group's own window, not the whole trace).""" + + correlation_id: str + started_at_utc: str + duration_ms: float + rows: tuple[WaterfallRow, ...] = () @dataclass(frozen=True, slots=True) @@ -80,12 +136,16 @@ class TraceView: focus_operation: OperationView | None = None operation_tree: tuple[OperationView, ...] = () correlated_operations: tuple[OperationView, ...] = () + waterfall: tuple[WaterfallGroup, ...] = () __all__ = [ "AggregatesView", "McpToolAggregate", "OperationView", + "SpanCostView", "SpanView", "TraceView", + "WaterfallGroup", + "WaterfallRow", ] diff --git a/tests/test_observability_reader.py b/tests/test_observability_reader.py index d8fe5c68..259f20fd 100644 --- a/tests/test_observability_reader.py +++ b/tests/test_observability_reader.py @@ -115,6 +115,29 @@ def test_build_trace_view_tree_and_aggregates(tmp_path: Path) -> None: assert agg.mcp_tools[0].name == "finish_controlled_change" assert agg.mcp_tools[0].p95_response_bytes == 900 + assert agg.slowest_span is not None + assert agg.slowest_span.name == "memory.semantic.reindex" + assert agg.slowest_span.operation_name == "memory.projection.job" + assert agg.slowest_span.produced == 1423 + assert agg.slowest_span.no_op is False + assert [s.name for s in agg.semantic_costs] == [ + "memory.semantic.reindex", + "memory.trajectory.rebuild", + ] + + # Waterfall: one timeline for the A->B chain; B starts 1s after the root A, + # spans nest a level deeper, offsets are relative to the group start. + assert len(trace.waterfall) == 1 + group = trace.waterfall[0] + assert group.correlation_id == "A" + rows = {(row.label, row.kind): row for row in group.rows} + assert rows[("finish_controlled_change", "operation")].depth == 0 + assert rows[("finish_controlled_change", "operation")].offset_ms == 0.0 + job_row = rows[("memory.projection.job", "operation")] + assert job_row.depth == 1 + assert job_row.offset_ms == 1000.0 + assert rows[("memory.semantic.reindex", "span")].depth == 2 + def test_build_trace_view_focus_by_operation_id(tmp_path: Path) -> None: _seed(tmp_path) @@ -126,3 +149,70 @@ def test_build_trace_view_focus_by_operation_id(tmp_path: Path) -> None: read.close() assert trace.focus_operation is not None assert trace.focus_operation.operation_id == "B" + + +def test_no_op_span_and_mcp_payload_percentiles(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="M", + correlation_id="M", + surface="mcp", + name="finish_controlled_change", + started_at_utc="2026-06-09T00:00:01Z", + duration_ms=120.0, + status="ok", + request_bytes=51, + response_bytes=1873, + request_tokens=13, + response_tokens=469, + ), + ) + write_operation( + conn, + OperationRecord( + operation_id="W", + correlation_id="M", + surface="memory", + name="memory.projection.job", + started_at_utc="2026-06-09T00:00:02Z", + duration_ms=900.0, + status="ok", + parent_operation_id="M", + spans=( + SpanRecord( + span_id="sx", + operation_id="W", + name="memory.semantic.reindex", + started_at_utc="2026-06-09T00:00:02Z", + duration_ms=850.0, + status="ok", + reason_kind="content_changed", + counters={"embedded": 0, "skipped_unchanged": 1423}, + ), + ), + ), + ) + finally: + conn.close() + + read = open_observability_store_readonly(tmp_path) + assert read is not None + try: + trace = build_trace_view(read, correlation_id="M") + finally: + read.close() + + tool = trace.aggregates.mcp_tools[0] + assert tool.p95_request_bytes == 51 + assert tool.p95_response_bytes == 1873 + assert tool.p95_response_tokens == 469 + + costly = trace.aggregates.semantic_costs[0] + assert costly.name == "memory.semantic.reindex" + # embedded present and zero -> the reindex ran but produced nothing. + assert costly.no_op is True + assert costly.produced == 0 + assert costly.skipped == 1423 From ac6e090486dd09ae5f10e84211d32ce50b504edd Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 10 Jun 2026 23:36:07 +0500 Subject: [PATCH 226/318] feat(html): runtime-diagnosis cockpit with waterfall timeline Rewrite the observability HTML as a top-down diagnosis cockpit instead of a data dump: executive runtime summary (stat cards + slowest op / hottest span / peak memory highlights), correlated finish->worker event chains (horizontal causality breadcrumb + indented detail via a rail, no card-in-card), a memory-pipeline cost table that flags no-op-but-costly spans, an MCP tool matrix (latency + request/response/token payload), and a per-chain waterfall timeline that places bars by start offset and width by duration so the spawn handoff gap is visible. Self-contained branded SVG, auto dark/light, no JS. --- codeclone/observability/render_html.py | 530 ++++++++++++++++++------- tests/test_observability_render.py | 137 +++++++ 2 files changed, 517 insertions(+), 150 deletions(-) diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index cd366d63..49370d87 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -6,11 +6,16 @@ """Branded HTML renderer for the observability ``TraceView`` (Phase 29 output). -Self-contained single page: the CodeClone brand logo + brand tokens (Inter / -JetBrains Mono / oklch indigo, auto dark-light), a focused embedded stylesheet, -and inline SVG bars — no external assets, no ``report`` import, no JS required. -The trace is a column-aligned grid: names, bars, durations and metrics line up -across every row, and an operation's child operations nest under it. +A single self-contained page rendered as a *runtime-diagnosis cockpit*, not a +data dump. It is laid out for a top-down reading trajectory that answers the +operator's questions in order: an executive summary that names where time and +memory went; the correlated finish->worker event chains (a horizontal causality +breadcrumb plus indented detail — nesting is shown with an indent rail, never a +card inside a card); a memory-pipeline cost table that flags spans that ran but +produced nothing; and an MCP tool matrix that surfaces payload noise. + +CodeClone brand mark + brand tokens (Inter / JetBrains Mono / oklch indigo, auto +dark-light), inline SVG bars, no JS, no external assets, no ``report`` import. """ from __future__ import annotations @@ -18,7 +23,20 @@ from collections.abc import Mapping from html import escape -from .views import AggregatesView, McpToolAggregate, OperationView, SpanView, TraceView +from .views import ( + AggregatesView, + McpToolAggregate, + OperationView, + SpanCostView, + SpanView, + TraceView, + WaterfallGroup, + WaterfallRow, +) + +# A no-op span only deserves a "costly" warning once it has actually spent time. +_NOOP_COSTLY_MS = 50.0 +_KNOWN_SURFACES = frozenset({"mcp", "cli", "memory"}) # Reuse of the CodeClone brand mark (report/html/widgets/icons.py:BRAND_LOGO). _LOGO = ( @@ -40,6 +58,7 @@ --text:oklch(96% 0.010 275);--dim:oklch(74% 0.028 275);--mute:oklch(56% 0.028 275); --accent:#818cf8;--accent-soft:color-mix(in oklch,#818cf8 30%,transparent); --track:oklch(28% 0.02 275);--warn:#f59e0b; +--warn-soft:color-mix(in oklch,#f59e0b 14%,transparent); --mcp:#818cf8;--cli:#2dd4bf;--memory:#fbbf24; --font:"Inter","Inter Variable",-apple-system,BlinkMacSystemFont,"Segoe UI", Roboto,sans-serif; @@ -49,88 +68,116 @@ --bg:oklch(98.5% 0.006 275);--surface:#fff;--surface-2:oklch(97.3% 0.006 275); --border:oklch(89% 0.018 275);--text:oklch(24% 0.040 275); --dim:oklch(44% 0.046 275);--mute:oklch(55% 0.040 275); ---accent:#4f46e5;--accent-soft:color-mix(in oklch,#4f46e5 28%,transparent); ---track:oklch(92% 0.012 275);--mcp:#4f46e5;--cli:#0d9488;--memory:#b45309; +--accent:#4f46e5;--accent-soft:color-mix(in oklch,#4f46e5 26%,transparent); +--track:oklch(92% 0.012 275);--warn:#b45309; +--warn-soft:color-mix(in oklch,#b45309 12%,transparent); +--mcp:#4f46e5;--cli:#0d9488;--memory:#b45309; }} html{-webkit-text-size-adjust:100%} body{background:var(--bg);color:var(--text);font-family:var(--font); font-size:14px;line-height:1.5;-webkit-font-smoothing:antialiased; -padding:34px 20px 80px} -.wrap{max-width:1000px;margin:0 auto} +padding:36px 22px 90px} +.wrap{max-width:1040px;margin:0 auto} .head{display:flex;align-items:center;gap:13px;margin-bottom:5px} .logo{flex-shrink:0} h1{font-size:20px;font-weight:600;letter-spacing:-0.01em} -.sub{color:var(--dim);font-size:12.5px;margin:0 0 28px 43px;font-family:var(--mono)} +.sub{color:var(--dim);font-size:12.5px;margin:0 0 30px 43px;font-family:var(--mono)} .sub b{color:var(--text);font-weight:550} -.grid{display:grid;grid-template-columns:repeat(4,1fr);gap:12px;margin-bottom:32px} +section{margin-bottom:30px} +h2{font-size:11px;text-transform:uppercase;letter-spacing:0.09em; +color:var(--mute);font-weight:600;margin:0 0 4px 2px} +.shint{color:var(--mute);font-size:12px;margin:0 0 11px 2px} +.panel{background:var(--surface);border:1px solid var(--border); +border-radius:11px;overflow:hidden} +.grid{display:grid;grid-template-columns:repeat(4,1fr);gap:12px;margin-bottom:14px} .card{background:var(--surface);border:1px solid var(--border); -border-radius:10px;padding:14px 16px} -.card .v{font-size:23px;font-weight:600;letter-spacing:-0.02em;font-family:var(--mono)} +border-radius:11px;padding:14px 16px} +.card .v{font-size:24px;font-weight:600;letter-spacing:-0.02em; +font-family:var(--mono)} .card .l{color:var(--mute);font-size:10.5px;text-transform:uppercase; letter-spacing:0.07em;margin-top:4px} +.card.warn{border-color:var(--warn-soft)} .card.warn .v{color:var(--warn)} .card.accent .v{color:var(--accent)} -h2{font-size:11px;text-transform:uppercase;letter-spacing:0.08em; -color:var(--mute);font-weight:600;margin:0 0 10px 2px} -section{margin-bottom:30px} -.panel{background:var(--surface);border:1px solid var(--border); -border-radius:10px;overflow:hidden} -.badge{font-size:10px;font-weight:600;font-family:var(--mono);padding:2px 6px; -border-radius:5px;text-transform:uppercase;letter-spacing:0.03em; -justify-self:start; -background:color-mix(in oklch,var(--c,var(--accent)) 15%,transparent); +.lead{padding:4px 16px} +.lrow{display:grid;grid-template-columns:158px minmax(0,1fr) auto;align-items:center; +gap:14px;padding:11px 0;border-top:1px solid var(--border)} +.lrow:first-child{border-top:none} +.llabel{color:var(--mute);font-size:11px;text-transform:uppercase; +letter-spacing:0.05em} +.lval{display:flex;align-items:center;gap:9px;min-width:0} +.lname{font-family:var(--mono);font-size:13px;overflow:hidden; +text-overflow:ellipsis;white-space:nowrap} +.lin{color:var(--mute);font-size:11.5px;font-family:var(--mono)} +.lmetric{font-family:var(--mono);font-size:14px;font-weight:600;white-space:nowrap} +.badge{font-size:10px;font-weight:600;font-family:var(--mono);padding:2px 7px; +border-radius:5px;text-transform:uppercase;letter-spacing:0.03em;flex-shrink:0; +background:color-mix(in oklch,var(--c,var(--accent)) 16%,transparent); color:var(--c,var(--accent))} .surf-mcp{--c:var(--mcp)}.surf-cli{--c:var(--cli)}.surf-memory{--c:var(--memory)} -.name{font-family:var(--mono);font-size:12.5px;min-width:0; -overflow:hidden;text-overflow:ellipsis;white-space:nowrap} -.dur{font-family:var(--mono);font-size:12.5px;text-align:right; +.chip{font-size:10.5px;font-family:var(--mono);padding:1px 8px;border-radius:20px; +background:var(--surface-2);color:var(--dim);border:1px solid var(--border); white-space:nowrap} -.rss{font-family:var(--mono);font-size:11.5px;color:var(--warn); -text-align:right;white-space:nowrap;font-weight:550} +.chip.warn{color:var(--warn);border-color:transparent;background:var(--warn-soft); +font-weight:600} .bar{display:block;width:100%;height:7px} -.chip{font-size:10.5px;font-family:var(--mono);padding:1px 7px;border-radius:20px; -background:var(--surface-2);color:var(--dim);border:1px solid var(--border); +.dur{font-family:var(--mono);font-size:12.5px;text-align:right;white-space:nowrap; +color:var(--dim)} +.rss{font-family:var(--mono);font-size:11.5px;color:var(--warn);white-space:nowrap; +font-weight:550} +.meta{display:flex;align-items:center;justify-content:flex-end;gap:8px;min-width:0} +.pay{font-family:var(--mono);font-size:11px;color:var(--mute);white-space:nowrap} +.chain{padding:6px 16px 12px} +.group{padding:13px 0;border-top:1px solid var(--border)} +.group:first-child{border-top:none} +.crumb{display:flex;align-items:center;flex-wrap:wrap;gap:9px;margin-bottom:10px} +.crumb .node{display:flex;align-items:center;gap:7px} +.crumb .cname{font-family:var(--mono);font-size:12px;color:var(--text)} +.crumb .arrow{color:var(--mute);font-size:13px} +.oprow,.spanrow{display:grid; +grid-template-columns:minmax(0,1fr) 160px 64px minmax(92px,auto); +align-items:center;column-gap:14px;row-gap:2px;padding:5px 0} +.lead-cell{display:flex;align-items:center;gap:9px;min-width:0} +.opname{font-family:var(--mono);font-size:13px;font-weight:550;overflow:hidden; +text-overflow:ellipsis;white-space:nowrap} +.spanname{font-family:var(--mono);font-size:12px;color:var(--dim);overflow:hidden; +text-overflow:ellipsis;white-space:nowrap} +.tick{color:var(--accent);opacity:0.6;font-size:11px;flex-shrink:0} +.spanrow .counters{grid-column:2/-1;font-family:var(--mono);font-size:10.5px; +color:var(--mute);display:flex;flex-wrap:wrap;gap:0 15px} +.counters b{color:var(--dim);font-weight:550;margin-right:4px} +.spans{padding-left:17px} +.kids{margin-left:13px;padding-left:17px;border-left:2px solid var(--accent-soft)} +.wf{padding:8px 16px 12px} +.wf-group{padding:13px 0;border-top:1px solid var(--border)} +.wf-group:first-child{border-top:none} +.wf-cap{display:flex;align-items:center;gap:8px;margin-bottom:9px; +font-family:var(--mono);font-size:11px;color:var(--mute)} +.wf-cap b{color:var(--dim);font-weight:600} +.wf-row{display:grid;grid-template-columns:minmax(150px,238px) minmax(0,1fr) 58px; +align-items:center;column-gap:12px;padding:2px 0} +.wf-label{font-family:var(--mono);font-size:11.5px;overflow:hidden; +text-overflow:ellipsis;white-space:nowrap} +.wf-label.op{color:var(--text);font-weight:550} +.wf-label.span{color:var(--dim)} +.wf-track{position:relative;height:14px;background:var(--track);border-radius:4px} +.wf-bar{position:absolute;top:2px;height:10px;border-radius:3px; +background:var(--c,var(--accent))} +.wf-bar.span{top:3px;height:8px;opacity:0.8} +.wf-dur{font-family:var(--mono);font-size:11px;color:var(--mute);text-align:right; white-space:nowrap} -.chip.unknown{color:var(--warn);border-color:transparent; -background:color-mix(in oklch,var(--warn) 13%,transparent)} -.slow{display:grid; -grid-template-columns:58px minmax(0,1fr) 150px 56px 78px; -align-items:center;column-gap:13px;padding:9px 16px; -border-top:1px solid var(--border)} -.slow:first-child{border-top:none} -.slow .name{color:var(--text)} -.slow .dur{color:var(--dim)} table{width:100%;border-collapse:collapse;font-size:12.5px} th{text-align:left;padding:9px 16px;color:var(--mute);font-size:10.5px; text-transform:uppercase;letter-spacing:0.05em; -border-bottom:1px solid var(--border)} -td{padding:8px 16px;border-top:1px solid var(--border);font-family:var(--mono)} +border-bottom:1px solid var(--border);white-space:nowrap} +td{padding:9px 16px;border-top:1px solid var(--border);font-family:var(--mono); +white-space:nowrap} td.t{font-family:var(--font)} th.r,td.r{text-align:right} -.tree{padding:8px} -.op{border:1px solid var(--border);border-radius:9px;overflow:hidden; -margin:7px 0;background:var(--surface)} -.op:first-child{margin-top:0} -.op .op{margin:8px 10px 10px 22px;border-left:2px solid var(--accent-soft)} -.op-head{display:flex;align-items:center;gap:10px;padding:9px 13px; -background:var(--surface-2)} -.op-head .name{flex:1;font-size:13px;font-weight:550;color:var(--text)} -.op-head .pay{font-family:var(--mono);font-size:11px;color:var(--mute); -white-space:nowrap} -.spans{padding:3px 0 5px} -.span{display:grid; -grid-template-columns:minmax(0,1fr) 150px 56px minmax(120px,0.9fr); -align-items:center;column-gap:13px;row-gap:1px;padding:4px 14px 4px 16px} -.span .name{grid-column:1;grid-row:1;color:var(--dim)} -.span .bar{grid-column:2;grid-row:1} -.span .dur{grid-column:3;grid-row:1;color:var(--dim)} -.span .smeta{grid-column:4;grid-row:1;display:flex;align-items:center; -gap:8px;min-width:0;overflow:hidden} -.span .counters{grid-column:2/-1;grid-row:2;font-family:var(--mono); -font-size:10.5px;color:var(--mute);display:flex;flex-wrap:wrap;gap:0 15px} -.kv b{color:var(--dim);font-weight:550;margin-right:4px} -.empty{padding:28px;text-align:center;color:var(--mute);font-size:13px} -.foot{margin-top:36px;color:var(--mute);font-size:11px;text-align:center; +tr.flag td{background:var(--warn-soft)} +.muted{color:var(--mute)} +.empty{padding:30px;text-align:center;color:var(--mute);font-size:13px} +.foot{margin-top:38px;color:var(--mute);font-size:11px;text-align:center; font-family:var(--mono)} """ @@ -144,7 +191,9 @@ def _ms(value: float) -> str: def _mb(value: float | None) -> str: - return "—" if value is None else f"{value:.1f} MB" + if value is None: + return "—" + return f"{value / 1024:.1f} GB" if value >= 1024 else f"{value:.1f} MB" def _bytes(value: int | None) -> str: @@ -157,6 +206,12 @@ def _bytes(value: int | None) -> str: return f"{value} B" +def _tokens(value: int | None) -> str: + if not value: + return "—" + return f"{value / 1000:.1f}k" if value >= 1000 else str(value) + + def _bar(value: float, maximum: float, *, color: str = "var(--accent)") -> str: frac = value / maximum if maximum > 0 else 0.0 fill = max(1.5, round(frac * 100, 1)) @@ -168,9 +223,6 @@ def _bar(value: float, maximum: float, *, color: str = "var(--accent)") -> str: ) -_KNOWN_SURFACES = frozenset({"mcp", "cli", "memory"}) - - def _surface_badge(surface: str) -> str: cls = f"surf-{surface}" if surface in _KNOWN_SURFACES else "" return f'{_esc(surface)}' @@ -179,7 +231,7 @@ def _surface_badge(surface: str) -> str: def _reason_chip(reason_kind: str | None) -> str: if not reason_kind: return "" - extra = " unknown" if reason_kind == "unknown" else "" + extra = " warn" if reason_kind == "unknown" else "" return f'{_esc(reason_kind)}' @@ -193,10 +245,22 @@ def _counters(counters: Mapping[str, int]) -> str: return f'{items}' -def _rss(value: float | None) -> str: - if value is None or value < 0.05: - return "" - return f'Δ{value:.1f} MB' +def _rss_text(value: float | None) -> str: + return "" if value is None or value < 0.05 else f"Δ{_mb(value)}" + + +def _rss_badge(value: float | None) -> str: + text = _rss_text(value) + return f'{text}' if text else "" + + +def _payload(op: OperationView) -> str: + parts = [] + if op.request_bytes is not None: + parts.append(f"↑{_bytes(op.request_bytes)}") + if op.response_bytes is not None: + parts.append(f"↓{_bytes(op.response_bytes)}") + return f'{" ".join(parts)}' if parts else "" def _header(trace: TraceView) -> str: @@ -222,114 +286,279 @@ def _stat(value: str, label: str, variant: str = "") -> str: ) -def _stats(agg: AggregatesView) -> str: - unknown_variant = "warn" if agg.unknown_expensive_rebuild_count else "" - anomaly_variant = "warn" if agg.anomaly_count else "" +def _section(title: str, body: str, *, subtitle: str = "") -> str: + hint = f'

    {_esc(subtitle)}

    ' if subtitle else "" + return f"

    {_esc(title)}

    {hint}{body}
    " + + +def _table(headers: tuple[tuple[str, bool], ...], rows: str) -> str: + ths = "".join( + f'{_esc(label)}' if right else f"{_esc(label)}" + for label, right in headers + ) + return ( + f'
    {ths}' + f"{rows}
    " + ) + + +def _lead_row(label: str, value_html: str, metric: str) -> str: return ( + f'
    {_esc(label)}' + f'{value_html}' + f'{_esc(metric)}
    ' + ) + + +def _highlights(agg: AggregatesView) -> str: + rows: list[str] = [] + if agg.slowest: + op = agg.slowest[0] + rows.append( + _lead_row( + "Slowest operation", + f"{_surface_badge(op.surface)}" + f'{_esc(op.name)}', + _ms(op.duration_ms), + ) + ) + if agg.slowest_span is not None: + span = agg.slowest_span + reason = _reason_chip(span.reason_kind) + rows.append( + _lead_row( + "Hottest span", + f"{_surface_badge(span.surface)}" + f'{_esc(span.name)}' + f'in {_esc(span.operation_name)}{reason}', + _ms(span.duration_ms), + ) + ) + if agg.max_rss_delta_mb is not None: + rows.append( + _lead_row( + "Peak memory Δ", + 'resident set growth', + _mb(agg.max_rss_delta_mb), + ) + ) + return f'
    {"".join(rows)}
    ' if rows else "" + + +def _summary(trace: TraceView) -> str: + agg = trace.aggregates + costly = sum( + 1 + for span in agg.semantic_costs + if span.no_op and span.duration_ms >= _NOOP_COSTLY_MS + ) + unknown = agg.unknown_expensive_rebuild_count + cards = ( '
    ' + _stat(str(agg.operation_count), "operations", "accent") + _stat(_mb(agg.max_rss_delta_mb), "peak rss Δ") - + _stat( - str(agg.unknown_expensive_rebuild_count), - "unknown heavy", - unknown_variant, - ) - + _stat(str(agg.anomaly_count), "anomalies", anomaly_variant) + + _stat(str(costly), "costly no-ops", "warn" if costly else "") + + _stat(str(unknown), "unknown reason", "warn" if unknown else "") + "
    " ) + return _section("Runtime summary", cards + _highlights(agg)) + + +def _op_lineage(op: OperationView) -> list[OperationView]: + flat = [op] + for child in op.children: + flat.extend(_op_lineage(child)) + return flat -def _slowest(agg: AggregatesView) -> str: - if not agg.slowest: +def _breadcrumb(lineage: list[OperationView]) -> str: + if len(lineage) < 2: return "" - top = agg.slowest[0].duration_ms or 1.0 - rows = "".join( - f'
    {_surface_badge(op.surface)}' - f'{_esc(op.name)}{_bar(op.duration_ms, top)}' + nodes = ' '.join( + f'{_surface_badge(op.surface)}' + f'{_esc(op.name)}' + for op in lineage + ) + return f'
    {nodes}
    ' + + +def _op_row(op: OperationView, group_max: float) -> str: + return ( + '
    ' + f'{_surface_badge(op.surface)}{_esc(op.name)}' + f"{_bar(op.duration_ms, group_max)}" f'{_ms(op.duration_ms)}' - f'{_rss_value(op.rss_delta_mb)}
    ' - for op in agg.slowest + f'{_rss_badge(op.rss_delta_mb)}{_payload(op)}
    ' ) + + +def _span_row(span: SpanView, op_duration: float) -> str: + color = "var(--warn)" if span.reason_kind == "unknown" else "var(--accent)" + meta = f"{_reason_chip(span.reason_kind)}{_rss_badge(span.rss_delta_mb)}" return ( - f'

    Slowest operations

    {rows}
    ' + '
    ' + f'' + f'{_esc(span.name)}' + f"{_bar(span.duration_ms, op_duration, color=color)}" + f'{_ms(span.duration_ms)}' + f'{meta}{_counters(span.counters)}
    ' ) -def _rss_value(value: float | None) -> str: - return "" if value is None or value < 0.05 else f"Δ{value:.1f} MB" +def _op_block(op: OperationView, group_max: float) -> str: + op_duration = op.duration_ms or 1.0 + spans = "".join(_span_row(span, op_duration) for span in op.spans) + spans_block = f'
    {spans}
    ' if spans else "" + kids = "".join(_op_block(child, group_max) for child in op.children) + kids_block = f'
    {kids}
    ' if kids else "" + return ( + f'
    {_op_row(op, group_max)}{spans_block}
    {kids_block}' + ) -def _mcp(tools: tuple[McpToolAggregate, ...]) -> str: - if not tools: - return "" - rows = "".join( - f'{_esc(tool.name)}{tool.count}' - f'{_ms(tool.p50_duration_ms)}' - f'{_ms(tool.p95_duration_ms)}' - f'{_bytes(tool.p95_response_bytes)}' - for tool in tools +def _chain_group(root: OperationView) -> str: + lineage = _op_lineage(root) + group_max = max((op.duration_ms for op in lineage), default=1.0) or 1.0 + return ( + f'
    {_breadcrumb(lineage)}{_op_block(root, group_max)}
    ' + ) + + +def _chain(trace: TraceView) -> str: + if not trace.operation_tree: + body = ( + '
    ' + "No operations recorded yet.
    " + ) + return _section("Correlated event chains", body) + groups = "".join(_chain_group(op) for op in trace.operation_tree) + return _section( + "Correlated event chains", + f'
    {groups}
    ', + subtitle="What triggered what, across processes — finish → spawned worker.", + ) + + +def _semantic_row(span: SpanCostView) -> str: + costly = span.no_op and span.duration_ms >= _NOOP_COSTLY_MS + if costly: + verdict = 'no-op · costly' + elif span.no_op: + verdict = 'no-op' + else: + verdict = 'productive' + reason = ( + _esc(span.reason_kind) if span.reason_kind else '' ) return ( - '

    MCP tool payloads

    ' - '' - '' - '' - f"{rows}
    ToolCallsp50p95p95 response
    " + f'' + f'{_esc(span.name)}' + f'{_esc(span.operation_name)}' + f"{reason}" + f'{span.produced}' + f'{span.skipped}' + f'{_ms(span.duration_ms)}' + f'{_mb(span.rss_delta_mb)}' + f"{verdict}" ) -def _payload(op: OperationView) -> str: - parts = [] - if op.request_bytes is not None: - parts.append(f"↑{_bytes(op.request_bytes)}") - if op.response_bytes is not None: - parts.append(f"↓{_bytes(op.response_bytes)}") - return f'{" ".join(parts)}' if parts else "" +def _semantic(agg: AggregatesView) -> str: + if not agg.semantic_costs: + return "" + rows = "".join(_semantic_row(span) for span in agg.semantic_costs) + headers = ( + ("Span", False), + ("Operation", False), + ("Reason", False), + ("Produced", True), + ("Skipped", True), + ("Duration", True), + ("RSS Δ", True), + ("Verdict", False), + ) + return _section( + "Memory pipeline cost", + _table(headers, rows), + subtitle="Reindex and rebuild spans — flags work that ran but " + "produced nothing.", + ) -def _span_row(span: SpanView, op_duration: float) -> str: - color = "var(--warn)" if span.reason_kind == "unknown" else "var(--cli)" - meta = _reason_chip(span.reason_kind) + _rss(span.rss_delta_mb) +def _mcp_row(tool: McpToolAggregate) -> str: return ( - f'
    {_esc(span.name)}' - f"{_bar(span.duration_ms, op_duration, color=color)}" - f'{_ms(span.duration_ms)}' - f'{meta}' - f"{_counters(span.counters)}
    " + f'{_esc(tool.name)}' + f'{tool.count}' + f'{_ms(tool.p50_duration_ms)}' + f'{_ms(tool.p95_duration_ms)}' + f'{_bytes(tool.p95_request_bytes)}' + f'{_bytes(tool.p95_response_bytes)}' + f'{_tokens(tool.p95_response_tokens)}' ) -def _op_card(op: OperationView) -> str: - op_duration = op.duration_ms or 1.0 - head = ( - f'
    {_surface_badge(op.surface)}' - f'{_esc(op.name)}' - f'{_ms(op.duration_ms)}' - f"{_rss(op.rss_delta_mb)}{_payload(op)}
    " +def _mcp(tools: tuple[McpToolAggregate, ...]) -> str: + if not tools: + return "" + rows = "".join(_mcp_row(tool) for tool in tools) + headers = ( + ("Tool", False), + ("Calls", True), + ("p50", True), + ("p95", True), + ("↑ req p95", True), + ("↓ resp p95", True), + ("resp tok p95", True), ) - spans = ( - f'
    {"".join(_span_row(s, op_duration) for s in op.spans)}' - "
    " - if op.spans - else "" + return _section( + "MCP tool matrix", + _table(headers, rows), + subtitle="Per-tool latency and payload — spot tools that flood request " + "or response bytes.", ) - children = "".join(_op_card(child) for child in op.children) - return f'
    {head}{spans}{children}
    ' -def _tree(trace: TraceView) -> str: - if not trace.operation_tree: - return ( - '

    Trace

    ' - '
    No operations recorded yet.
    ' - "
    " - ) - cards = "".join(_op_card(op) for op in trace.operation_tree) - return f'

    Trace

    {cards}
    ' +def _wf_bar(row: WaterfallRow, total_ms: float) -> str: + span = total_ms if total_ms > 0 else 1.0 + left = round(min(row.offset_ms / span * 100, 99.0), 2) + width = max(0.6, round(row.duration_ms / span * 100, 2)) + kind = "op" if row.kind == "operation" else "span" + surf = f"surf-{row.surface}" if row.surface in _KNOWN_SURFACES else "" + tick = '' if kind == "span" else "" + return ( + '
    ' + f'' + f"{tick}{_esc(row.label)}" + f'
    ' + f'{_ms(row.duration_ms)}
    ' + ) + + +def _wf_group(group: WaterfallGroup) -> str: + rows = "".join(_wf_bar(row, group.duration_ms) for row in group.rows) + cid = group.correlation_id[:8] if group.correlation_id else "—" + return ( + f'
    {_esc(cid)}' + f"{_esc(group.started_at_utc)}" + f"span {_ms(group.duration_ms)}
    {rows}
    " + ) + + +def _waterfall(trace: TraceView) -> str: + if not trace.waterfall: + return "" + groups = "".join(_wf_group(group) for group in trace.waterfall) + return _section( + "Timeline", + f'
    {groups}
    ', + subtitle="Each causal chain on its own time axis — bars placed by start " + "offset, width by duration; a gap before a worker bar is the spawn handoff.", + ) def render_trace_html(trace: TraceView) -> str: - """Render a ``TraceView`` as a self-contained, branded HTML document.""" + """Render a ``TraceView`` as a self-contained, branded diagnosis cockpit.""" foot = f"CodeClone · platform observability · schema {_esc(trace.schema_version)}" return ( '' @@ -337,10 +566,11 @@ def render_trace_html(trace: TraceView) -> str: "CodeClone · Platform Observability" f'
    ' + _header(trace) - + _stats(trace.aggregates) - + _slowest(trace.aggregates) + + _summary(trace) + + _waterfall(trace) + + _chain(trace) + + _semantic(trace.aggregates) + _mcp(trace.aggregates.mcp_tools) - + _tree(trace) + f'

    {foot}

    ' + "
    " ) diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index eaea4f7d..93aaa0ba 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -23,8 +23,11 @@ AggregatesView, McpToolAggregate, OperationView, + SpanCostView, SpanView, TraceView, + WaterfallGroup, + WaterfallRow, ) from codeclone.surfaces.cli.observability import observability_main @@ -89,6 +92,140 @@ def test_render_trace_html_is_branded() -> None: assert "finish_controlled_change" in html +def _cockpit_trace() -> TraceView: + reindex = SpanView( + span_id="sx", + name="memory.semantic.reindex", + duration_ms=850.0, + status="ok", + reason_kind="content_changed", + counters={"embedded": 0, "skipped_unchanged": 1423}, + ) + worker = OperationView( + operation_id="W", + correlation_id="A", + surface="memory", + name="memory.projection.job", + started_at_utc="2026-06-10T04:00:01Z", + duration_ms=900.0, + status="ok", + parent_operation_id="A", + rss_delta_mb=512.0, + spans=(reindex,), + ) + finish = OperationView( + operation_id="A", + correlation_id="A", + surface="mcp", + name="finish_controlled_change", + started_at_utc="2026-06-10T04:00:00Z", + duration_ms=120.0, + status="ok", + request_bytes=51, + response_bytes=1873, + children=(worker,), + ) + costly = SpanCostView( + span_id="sx", + name="memory.semantic.reindex", + surface="memory", + operation_id="W", + operation_name="memory.projection.job", + duration_ms=850.0, + reason_kind="content_changed", + produced=0, + skipped=1423, + no_op=True, + ) + agg = AggregatesView( + operation_count=2, + slowest=(worker, finish), + max_rss_delta_mb=512.0, + mcp_tools=( + McpToolAggregate( + "finish_controlled_change", + 3, + 80.0, + 120.0, + 1873, + p95_request_bytes=51, + p95_response_tokens=469, + ), + ), + slowest_span=costly, + semantic_costs=(costly,), + ) + return TraceView( + schema_version="1.0", + window_started_at_utc="2026-06-10T04:00:00Z", + window_ended_at_utc="2026-06-10T04:00:02Z", + aggregates=agg, + operation_tree=(finish,), + correlated_operations=(finish, worker), + ) + + +def test_render_cockpit_sections() -> None: + html = render_trace_html(_cockpit_trace()) + # Section trajectory: summary -> chain -> memory cost -> MCP matrix. + assert "Runtime summary" in html + assert "Correlated event chains" in html + assert "Memory pipeline cost" in html + assert "MCP tool matrix" in html + # Cross-process correlation: a breadcrumb chains finish -> worker, and the + # worker nests under it via the indent rail (not a card inside a card). + assert "finish_controlled_change" in html + assert "memory.projection.job" in html + assert "→" in html + assert 'class="kids"' in html + # The reindex ran but embedded nothing -> flagged as a costly no-op. + assert "no-op" in html + assert "Hottest span" in html + # MCP matrix carries request bytes and response tokens, not just response bytes. + assert "51 B" in html + assert "469" in html + + +def test_render_waterfall_timeline() -> None: + group = WaterfallGroup( + correlation_id="corr1234abcd", + started_at_utc="2026-06-10T04:00:00Z", + duration_ms=1000.0, + rows=( + WaterfallRow( + label="finish_controlled_change", + surface="mcp", + kind="operation", + depth=0, + offset_ms=0.0, + duration_ms=120.0, + ), + WaterfallRow( + label="memory.projection.job", + surface="memory", + kind="operation", + depth=1, + offset_ms=300.0, + duration_ms=700.0, + ), + ), + ) + trace = TraceView( + schema_version="1.0", + window_started_at_utc="2026-06-10T04:00:00Z", + window_ended_at_utc="2026-06-10T04:00:01Z", + aggregates=AggregatesView(operation_count=2), + waterfall=(group,), + ) + html = render_trace_html(trace) + assert "Timeline" in html + assert "wf-bar" in html + # The worker bar is offset 300/1000 = 30% and 700/1000 = 70% wide. + assert "left:30.0%" in html + assert "width:70.0%" in html + assert "memory.projection.job" in html + + def test_render_trace_html_escapes_user_text() -> None: span = SpanView( span_id="s", name="", duration_ms=1.0, status="ok" From 0e796dcbc4cc82e5add2e75584420e9a7aa1dbe6 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 10 Jun 2026 23:36:42 +0500 Subject: [PATCH 227/318] feat(core): record memory.projection.spawn op for the worker chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add operation B (memory.projection.spawn) in execute_enqueue_projection_rebuild, wrapping the spawn decision. It inherits the active finish op (A) as parent + correlation via current_operation_context(), so the env handoff parents the worker (C) under B — completing the cross-process A->B->C tree. Inert when observability is disabled. --- codeclone/memory/jobs/workflow.py | 21 ++++++++- tests/test_projection_spawn_guard.py | 67 ++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/codeclone/memory/jobs/workflow.py b/codeclone/memory/jobs/workflow.py index d437ea8a..497eb512 100644 --- a/codeclone/memory/jobs/workflow.py +++ b/codeclone/memory/jobs/workflow.py @@ -12,7 +12,13 @@ from ...config.memory import MemoryConfig, resolve_memory_config from ...config.observability import resolve_observability_config -from ...observability import bootstrap, is_observability_enabled, shutdown +from ...observability import ( + bootstrap, + current_operation_context, + is_observability_enabled, + operation, + shutdown, +) from ...utils.ci import is_ci_environment from ..exceptions import MemoryContractError from ..models import MemoryProject @@ -166,7 +172,18 @@ def execute_enqueue_projection_rebuild( # redundant overlapping process. spawn_skipped_reason = "worker_already_running" else: - spawn_result = spawn_projection_jobs_worker(root_path=resolved_root) + # Op B of the finish->spawn->worker chain (spec §4.3). The spawn + # decision becomes the active operation, inheriting the finish op (A) + # as parent + correlation, so the env handoff in spawn.py parents the + # worker (C) under B. Inert when observability is disabled. + parent = current_operation_context() + with operation( + name="memory.projection.spawn", + surface="memory", + parent_operation_id=parent[0] if parent else None, + correlation_id=parent[1] if parent else None, + ): + spawn_result = spawn_projection_jobs_worker(root_path=resolved_root) spawned = spawn_result.spawned worker_pid = spawn_result.pid return { diff --git a/tests/test_projection_spawn_guard.py b/tests/test_projection_spawn_guard.py index bee47d93..68eb4720 100644 --- a/tests/test_projection_spawn_guard.py +++ b/tests/test_projection_spawn_guard.py @@ -12,8 +12,10 @@ import pytest from codeclone.config.memory import MemoryConfig, resolve_memory_config +from codeclone.config.observability import ObservabilityConfig from codeclone.memory.jobs import compute_projection_stimulus from codeclone.memory.jobs import workflow as jobs_workflow +from codeclone.memory.jobs.spawn import SpawnWorkerResult from codeclone.memory.jobs.store import ( enqueue_projection_job, has_live_running_job, @@ -23,6 +25,16 @@ from codeclone.memory.models import MemoryProject from codeclone.memory.project import resolve_memory_db_path from codeclone.memory.schema import open_memory_db +from codeclone.observability import ( + bootstrap, + current_operation_context, + operation, + shutdown, +) +from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, +) from codeclone.report.meta import current_report_timestamp_utc from .memory_fixtures import cli_memory_repo @@ -151,3 +163,58 @@ def test_enqueue_skips_spawn_when_worker_running( assert payload["spawned"] is False assert payload["spawn_skipped_reason"] == "worker_already_running" assert payload["status"] == "enqueued" + + +def test_enqueue_records_spawn_op_b_under_finish( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr(jobs_workflow, "is_ci_environment", lambda: False) + captured: dict[str, tuple[str, str] | None] = {} + + def _fake_spawn(*, root_path: Path) -> SpawnWorkerResult: + # The spawn handoff reads the active operation here; under op B it must + # see B (not the finish op A), so the worker links parent=B. + captured["ctx"] = current_operation_context() + return SpawnWorkerResult(spawned=True, reason=None, pid=4242) + + monkeypatch.setattr(jobs_workflow, "spawn_projection_jobs_worker", _fake_spawn) + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): + config = resolve_memory_config(root) + bootstrap(ObservabilityConfig(enabled=True), root=root) + try: + with operation( + name="finish_controlled_change", + surface="mcp", + correlation_id="A-corr", + ) as finish_op: + finish_op_id = finish_op.operation_id + payload = execute_enqueue_projection_rebuild( + root_path=root, + config=config, + trigger="mcp_finish", + force=True, + spawn_worker=True, + ) + finally: + shutdown() + + assert payload["spawned"] is True + ctx = captured["ctx"] + assert ctx is not None + spawn_op_id, spawn_corr = ctx + assert spawn_corr == "A-corr" # B inherits A's correlation + assert spawn_op_id != finish_op_id # B is its own operation, not A + + obs = open_observability_store(observability_store_path(root)) + try: + row = obs.execute( + "SELECT operation_id, parent_operation_id, correlation_id " + "FROM platform_operations WHERE name='memory.projection.spawn'" + ).fetchone() + finally: + obs.close() + # Op B persisted, parented to the finish op (A) with A's correlation. + assert row is not None + assert row[0] == spawn_op_id + assert row[1] == finish_op_id + assert row[2] == "A-corr" From 14d087d05a97ece1a001dcc5e6c8c640e6483202 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 10 Jun 2026 23:36:55 +0500 Subject: [PATCH 228/318] feat(mcp): add stage spans to analyze_repository Wrap the bootstrap/discover/process/analyze phases of the MCP analyze_repository handler in pipeline.* spans so mcp.analyze_repository carries the same stage timing as cli.analyze. This path calls the stages directly and bypasses run_analysis_stages, so the spans are inline; they attach to the active operation opened by the server registrar and are inert when observability is disabled. --- codeclone/surfaces/mcp/session.py | 37 ++++++++++++++++--------- tests/test_observability_correlation.py | 31 +++++++++++++++++++++ 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 5e7cd059..2dd2528d 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -15,6 +15,7 @@ from ...audit.runtime import open_audit_writer_for_root from ...cache.store import resolve_cache_status from ...memory.ide_governance import IdeGovernanceSessionState +from ...observability import span from ...report.meta import build_report_meta as _build_report_meta from ...report.meta import current_report_timestamp_utc as _current_report_timestamp_utc from . import _session_helpers as _helpers @@ -251,19 +252,29 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: ) console = _BufferConsole() - boot = bootstrap( - args=args, - root=root_path, - output_paths=OutputPaths(json=_REPORT_DUMMY_PATH), - cache_path=cache_path, - ) - discovery_result = discover(boot=boot, cache=cache) - processing_result = process(boot=boot, discovery=discovery_result, cache=cache) - analysis_result = analyze( - boot=boot, - discovery=discovery_result, - processing=processing_result, - ) + # Stage spans so mcp.analyze_repository carries the same discover/process/ + # analyze timing as cli.analyze (this path bypasses run_analysis_stages, + # spec §6.1). Spans attach to the active operation from the MCP registrar; + # inert when observability is disabled or no operation is open. + with span(name="pipeline.bootstrap"): + boot = bootstrap( + args=args, + root=root_path, + output_paths=OutputPaths(json=_REPORT_DUMMY_PATH), + cache_path=cache_path, + ) + with span(name="pipeline.discover"): + discovery_result = discover(boot=boot, cache=cache) + with span(name="pipeline.process"): + processing_result = process( + boot=boot, discovery=discovery_result, cache=cache + ) + with span(name="pipeline.analyze"): + analysis_result = analyze( + boot=boot, + discovery=discovery_result, + processing=processing_result, + ) clone_baseline_state = resolve_clone_baseline_state( baseline_path=baseline_path, diff --git a/tests/test_observability_correlation.py b/tests/test_observability_correlation.py index fcba7e0a..ab202106 100644 --- a/tests/test_observability_correlation.py +++ b/tests/test_observability_correlation.py @@ -118,3 +118,34 @@ def _fake_popen(argv: object, **kwargs: object) -> object: # Observability disabled -> no active operation -> inherit parent env. spawn.spawn_projection_jobs_worker(root_path=tmp_path) assert captured["env"] is None + + +def test_mcp_analyze_repository_emits_pipeline_spans(tmp_path: Path) -> None: + from codeclone.surfaces.mcp.service import CodeCloneMCPService + from codeclone.surfaces.mcp.session import MCPAnalysisRequest + + (tmp_path / "module.py").write_text( + "def add(a, b):\n return a + b\n", encoding="utf-8" + ) + service = CodeCloneMCPService(history_limit=4) + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + try: + # The registrar opens this op around each MCP tool; emulate it so the + # session's stage spans have an operation to attach to. + with operation(name="mcp.analyze_repository", surface="mcp") as op: + op_id = op.operation_id + service.analyze_repository( + MCPAnalysisRequest(root=str(tmp_path), respect_pyproject=False) + ) + finally: + shutdown() + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + rows = conn.execute( + "SELECT name FROM platform_spans WHERE operation_id=?", (op_id,) + ).fetchall() + finally: + conn.close() + names = {row[0] for row in rows} + assert {"pipeline.discover", "pipeline.process", "pipeline.analyze"} <= names From cae7ac34099a01e47102ccda62c73a0162e131f3 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 10 Jun 2026 23:39:59 +0500 Subject: [PATCH 229/318] chore(deps): bump transitive lockfile pins uv.lock re-resolution only: cryptography 44.0->45.0, filelock 3.29.1->3.29.3, readme-renderer 48.0.0->48.0.1. Transitive/dev pins; no direct dependency or package-set change. --- uv.lock | 114 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/uv.lock b/uv.lock index 9e0f2db9..f3e78ea8 100644 --- a/uv.lock +++ b/uv.lock @@ -548,62 +548,62 @@ toml = [ [[package]] name = "cryptography" -version = "48.0.0" +version = "48.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9f/a9/db8f313fdcd85d767d4973515e1db101f9c71f95fced83233de224673757/cryptography-48.0.0.tar.gz", hash = "sha256:5c3932f4436d1cccb036cb0eaef46e6e2db91035166f1ad6505c3c9d5a635920", size = 832984, upload-time = "2026-05-04T22:59:38.133Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/df/3d/01f6dd9190170a5a241e0e98c2d04be3664a9e6f5b9b872cde63aff1c3dd/cryptography-48.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:0c558d2cdffd8f4bbb30fc7134c74d2ca9a476f830bb053074498fbc86f41ed6", size = 8001587, upload-time = "2026-05-04T22:57:36.803Z" }, - { url = "https://files.pythonhosted.org/packages/b2/6e/e90527eef33f309beb811cf7c982c3aeffcce8e3edb178baa4ca3ae4a6fa/cryptography-48.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f5333311663ea94f75dd408665686aaf426563556bb5283554a3539177e03b8c", size = 4690433, upload-time = "2026-05-04T22:57:40.373Z" }, - { url = "https://files.pythonhosted.org/packages/90/04/673510ed51ddff56575f306cf1617d80411ee76831ccd3097599140efdfe/cryptography-48.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7995ef305d7165c3f11ae07f2517e5a4f1d5c18da1376a0a9ed496336b69e5f3", size = 4710620, upload-time = "2026-05-04T22:57:42.935Z" }, - { url = "https://files.pythonhosted.org/packages/14/d5/e9c4ef932c8d800490c34d8bd589d64a31d5890e27ec9e9ad532be893294/cryptography-48.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:40ba1f85eaa6959837b1d51c9767e230e14612eea4ef110ee8854ada22da1bf5", size = 4696283, upload-time = "2026-05-04T22:57:45.294Z" }, - { url = "https://files.pythonhosted.org/packages/0c/29/174b9dfb60b12d59ecfc6cfa04bc88c21b42a54f01b8aae09bb6e51e4c7f/cryptography-48.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:369a6348999f94bbd53435c894377b20ab95f25a9065c283570e70150d8abc3c", size = 5296573, upload-time = "2026-05-04T22:57:47.933Z" }, - { url = "https://files.pythonhosted.org/packages/95/38/0d29a6fd7d0d1373f0c0c88a04ba20e359b257753ac497564cd660fc1d55/cryptography-48.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a0e692c683f4df67815a2d258b324e66f4738bd7a96a218c826dce4f4bd05d8f", size = 4743677, upload-time = "2026-05-04T22:57:50.067Z" }, - { url = "https://files.pythonhosted.org/packages/30/be/eef653013d5c63b6a490529e0316f9ac14a37602965d4903efed1399f32b/cryptography-48.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:18349bbc56f4743c8b12dc32e2bccb2cf83ee8b69a3bba74ef8ae857e26b3d25", size = 4330808, upload-time = "2026-05-04T22:57:52.301Z" }, - { url = "https://files.pythonhosted.org/packages/84/9e/500463e87abb7a0a0f9f256ec21123ecde0a7b5541a15e840ea54551fd81/cryptography-48.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7e8eac43dfca5c4cccc6dad9a80504436fca53bb9bc3100a2386d730fbe6b602", size = 4695941, upload-time = "2026-05-04T22:57:54.603Z" }, - { url = "https://files.pythonhosted.org/packages/e3/dc/7303087450c2ec9e7fbb750e17c2abfbc658f23cbd0e54009509b7cc4091/cryptography-48.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9ccdac7d40688ecb5a3b4a604b8a88c8002e3442d6c60aead1db2a89a041560c", size = 5252579, upload-time = "2026-05-04T22:57:57.207Z" }, - { url = "https://files.pythonhosted.org/packages/d0/c0/7101d3b7215edcdc90c45da544961fd8ed2d6448f77577460fa75a8443f7/cryptography-48.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:bd72e68b06bb1e96913f97dd4901119bc17f39d4586a5adf2d3e47bc2b9d58b5", size = 4743326, upload-time = "2026-05-04T22:57:59.535Z" }, - { url = "https://files.pythonhosted.org/packages/ac/d8/5b833bad13016f562ab9d063d68199a4bd121d18458e439515601d3357ec/cryptography-48.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:59baa2cb386c4f0b9905bd6eb4c2a79a69a128408fd31d32ca4d7102d4156321", size = 4826672, upload-time = "2026-05-04T22:58:01.996Z" }, - { url = "https://files.pythonhosted.org/packages/98/e1/7074eb8bf3c135558c73fc2bcf0f5633f912e6fb87e868a55c454080ef09/cryptography-48.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9249e3cd978541d665967ac2cb2787fd6a62bddf1e75b3e347a594d7dacf4f74", size = 4972574, upload-time = "2026-05-04T22:58:03.968Z" }, - { url = "https://files.pythonhosted.org/packages/04/70/e5a1b41d325f797f39427aa44ef8baf0be500065ab6d8e10369d850d4a4f/cryptography-48.0.0-cp311-abi3-win32.whl", hash = "sha256:9c459db21422be75e2809370b829a87eb37f74cd785fc4aa9ea1e5f43b47cda4", size = 3294868, upload-time = "2026-05-04T22:58:06.467Z" }, - { url = "https://files.pythonhosted.org/packages/f4/ac/8ac51b4a5fc5932eb7ee5c517ba7dc8cd834f0048962b6b352f00f41ebf9/cryptography-48.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:5b012212e08b8dd5edc78ef54da83dd9892fd9105323b3993eff6bea65dc21d7", size = 3817107, upload-time = "2026-05-04T22:58:08.845Z" }, - { url = "https://files.pythonhosted.org/packages/6b/84/70e3feea9feea87fd7cbe77efb2712ae1e3e6edf10749dc6e95f4e60e455/cryptography-48.0.0-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:3cb07a3ed6431663cd321ea8a000a1314c74211f823e4177fefa2255e057d1ec", size = 7986556, upload-time = "2026-05-04T22:58:11.172Z" }, - { url = "https://files.pythonhosted.org/packages/89/6e/18e07a618bb5442ba10cf4df16e99c071365528aa570dfcb8c02e25a303b/cryptography-48.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c7378637d7d88016fa6791c159f698b3d3eed28ebf844ac36b9dc04a14dae18", size = 4684776, upload-time = "2026-05-04T22:58:13.712Z" }, - { url = "https://files.pythonhosted.org/packages/be/6a/4ea3b4c6c6759794d5ee2103c304a5076dc4b19ae1f9fe47dba439e159e9/cryptography-48.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc90c0b39b2e3c65ef52c804b72e3c58f8a04ab2a1871272798e5f9572c17d20", size = 4698121, upload-time = "2026-05-04T22:58:16.448Z" }, - { url = "https://files.pythonhosted.org/packages/2f/59/6ff6ad6cae03bb887da2a5860b2c9805f8dac969ef01ce563336c49bd1d1/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:76341972e1eff8b4bea859f09c0d3e64b96ce931b084f9b9b7db8ef364c30eff", size = 4690042, upload-time = "2026-05-04T22:58:18.544Z" }, - { url = "https://files.pythonhosted.org/packages/ca/b4/fc334ed8cfd705aca282fe4d8f5ae64a8e0f74932e9feecb344610cf6e4d/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:55b7718303bf06a5753dcdccf2f3945cf18ad7bffde41b61226e4db31ab89a9c", size = 5282526, upload-time = "2026-05-04T22:58:20.75Z" }, - { url = "https://files.pythonhosted.org/packages/11/08/9f8c5386cc4cd90d8255c7cdd0f5baf459a08502a09de30dc51f553d38dc/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:a64697c641c7b1b2178e573cbc31c7c6684cd56883a478d75143dbb7118036db", size = 4733116, upload-time = "2026-05-04T22:58:23.627Z" }, - { url = "https://files.pythonhosted.org/packages/b8/77/99307d7574045699f8805aa500fa0fb83422d115b5400a064ddd306d7750/cryptography-48.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:561215ea3879cb1cbbf272867e2efda62476f240fb58c64de6b393ae19246741", size = 4316030, upload-time = "2026-05-04T22:58:25.581Z" }, - { url = "https://files.pythonhosted.org/packages/fd/36/a608b98337af3cb2aff4818e406649d30572b7031918b04c87d979495348/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:ad64688338ed4bc1a6618076ba75fd7194a5f1797ac60b47afe926285adb3166", size = 4689640, upload-time = "2026-05-04T22:58:27.747Z" }, - { url = "https://files.pythonhosted.org/packages/dd/a6/825010a291b4438aecc1f568bc428189fc1175515223632477c07dc0a6df/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:906cbf0670286c6e0044156bc7d4af9cbb0ef6db9f73e52c3ec56ba6bdde5336", size = 5237657, upload-time = "2026-05-04T22:58:29.848Z" }, - { url = "https://files.pythonhosted.org/packages/b9/09/4e76a09b4caa29aad535ddc806f5d4c5d01885bd978bd984fbc6ca032cae/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:ea8990436d914540a40ab24b6a77c0969695ed52f4a4874c5137ccf7045a7057", size = 4732362, upload-time = "2026-05-04T22:58:32.009Z" }, - { url = "https://files.pythonhosted.org/packages/18/78/444fa04a77d0cb95f417dda20d450e13c56ba8e5220fc892a1658f44f882/cryptography-48.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c18684a7f0cc9a3cb60328f496b8e3372def7c5d2df39ac267878b05565aaaae", size = 4819580, upload-time = "2026-05-04T22:58:34.254Z" }, - { url = "https://files.pythonhosted.org/packages/38/85/ea67067c70a1fd4be2c63d35eeed82658023021affccc7b17705f8527dd2/cryptography-48.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9be5aafa5736574f8f15f262adc81b2a9869e2cfe9014d52a44633905b40d52c", size = 4963283, upload-time = "2026-05-04T22:58:36.376Z" }, - { url = "https://files.pythonhosted.org/packages/75/54/cc6d0f3deac3e81c7f847e8a189a12b6cdd65059b43dad25d4316abd849a/cryptography-48.0.0-cp314-cp314t-win32.whl", hash = "sha256:c17dfe85494deaeddc5ce251aebd1d60bbe6afc8b62071bb0b469431a000124f", size = 3270954, upload-time = "2026-05-04T22:58:38.791Z" }, - { url = "https://files.pythonhosted.org/packages/49/67/cc947e288c0758a4e5473d1dcb743037ab7785541265a969240b8885441a/cryptography-48.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27241b1dc9962e056062a8eef1991d02c3a24569c95975bd2322a8a52c6e5e12", size = 3797313, upload-time = "2026-05-04T22:58:40.746Z" }, - { url = "https://files.pythonhosted.org/packages/f2/63/61d4a4e1c6b6bab6ce1e213cd36a24c415d90e76d78c5eb8577c5541d2e8/cryptography-48.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:58d00498e8933e4a194f3076aee1b4a97dfec1a6da444535755822fe5d8b0b86", size = 7983482, upload-time = "2026-05-04T22:58:43.769Z" }, - { url = "https://files.pythonhosted.org/packages/d5/ac/f5b5995b87770c693e2596559ffafe195b4033a57f14a82268a2842953f3/cryptography-48.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:614d0949f4790582d2cc25553abd09dd723025f0c0e7c67376a1d77196743d6e", size = 4683266, upload-time = "2026-05-04T22:58:46.064Z" }, - { url = "https://files.pythonhosted.org/packages/ec/c6/8b14f67e18338fbc4adb76f66c001f5c3610b3e2d1837f268f47a347dbbb/cryptography-48.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ce4bfae76319a532a2dc68f82cc32f5676ee792a983187dac07183690e5c66f", size = 4696228, upload-time = "2026-05-04T22:58:48.22Z" }, - { url = "https://files.pythonhosted.org/packages/ea/73/f808fbae9514bd91b47875b003f13e284c8c6bdfd904b7944e803937eec1/cryptography-48.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:2eb992bbd4661238c5a397594c83f5b4dc2bc5b848c365c8f991b6780efcc5c7", size = 4689097, upload-time = "2026-05-04T22:58:50.9Z" }, - { url = "https://files.pythonhosted.org/packages/93/01/d86632d7d28db8ae83221995752eeb6639ffb374c2d22955648cf8d52797/cryptography-48.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:22a5cb272895dce158b2cacdfdc3debd299019659f42947dbdac6f32d68fe832", size = 5283582, upload-time = "2026-05-04T22:58:53.017Z" }, - { url = "https://files.pythonhosted.org/packages/02/e1/50edc7a50334807cc4791fc4a0ce7468b4a1416d9138eab358bfc9a3d70b/cryptography-48.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2b4d59804e8408e2fea7d1fbaf218e5ec984325221db76e6a241a9abd6cdd95c", size = 4730479, upload-time = "2026-05-04T22:58:55.611Z" }, - { url = "https://files.pythonhosted.org/packages/6f/af/99a582b1b1641ff5911ac559beb45097cf79efd4ead4657f578ef1af2d47/cryptography-48.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:984a20b0f62a26f48a3396c72e4bc34c66e356d356bf370053066b3b6d54634a", size = 4326481, upload-time = "2026-05-04T22:58:57.607Z" }, - { url = "https://files.pythonhosted.org/packages/90/ee/89aa26a06ef0a7d7611788ffd571a7c50e368cc6a4d5eef8b4884e866edb/cryptography-48.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5a5ed8fde7a1d09376ca0b40e68cd59c69fe23b1f9768bd5824f54681626032a", size = 4688713, upload-time = "2026-05-04T22:59:00.077Z" }, - { url = "https://files.pythonhosted.org/packages/70/ba/bcb1b0bb7a33d4c7c0c4d4c7874b4a62ae4f56113a5f4baefa362dfb1f0f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:8cd666227ef7af430aa5914a9910e0ddd703e75f039cef0825cd0da71b6b711a", size = 5238165, upload-time = "2026-05-04T22:59:02.317Z" }, - { url = "https://files.pythonhosted.org/packages/c9/70/ca4003b1ce5ca3dc3186ada51908c8a9b9ff7d5cab83cc0d43ee14ec144f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9071196d81abc88b3516ac8cdfad32e2b66dd4a5393a8e68a961e9161ddc6239", size = 4729947, upload-time = "2026-05-04T22:59:05.255Z" }, - { url = "https://files.pythonhosted.org/packages/44/a0/4ec7cf774207905aef1a8d11c3750d5a1db805eb380ee4e16df317870128/cryptography-48.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1e2d54c8be6152856a36f0882ab231e70f8ec7f14e93cf87db8a2ed056bf160c", size = 4822059, upload-time = "2026-05-04T22:59:07.802Z" }, - { url = "https://files.pythonhosted.org/packages/1e/75/a2e55f99c16fcac7b5d6c1eb19ad8e00799854d6be5ca845f9259eae1681/cryptography-48.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a5da777e32ffed6f85a7b2b3f7c5cbc88c146bfcd0a1d7baf5fcc6c52ee35dd4", size = 4960575, upload-time = "2026-05-04T22:59:09.851Z" }, - { url = "https://files.pythonhosted.org/packages/b8/23/6e6f32143ab5d8b36ca848a502c4bcd477ae75b9e1677e3530d669062578/cryptography-48.0.0-cp39-abi3-win32.whl", hash = "sha256:77a2ccbbe917f6710e05ba9adaa25fb5075620bf3ea6fb751997875aff4ae4bd", size = 3279117, upload-time = "2026-05-04T22:59:12.019Z" }, - { url = "https://files.pythonhosted.org/packages/9d/9a/0fea98a70cf1749d41d738836f6349d97945f7c89433a259a6c2642eefeb/cryptography-48.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:16cd65b9330583e4619939b3a3843eec1e6e789744bb01e7c7e2e62e33c239c8", size = 3792100, upload-time = "2026-05-04T22:59:14.884Z" }, - { url = "https://files.pythonhosted.org/packages/be/d2/024b5e06be9d44cb021fb0e1a03d34d63989cf56a0fe62f3dfbab695b9b4/cryptography-48.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:84cf79f0dc8b36ac5da873481716e87aef31fcfa0444f9e1d8b4b2cece142855", size = 3950391, upload-time = "2026-05-04T22:59:17.415Z" }, - { url = "https://files.pythonhosted.org/packages/bc/17/3861e17c56fa0fd37491a14a8673fdb77c57fc5693cafe745ea8b06dba75/cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:fdfef35d751d510fcef5252703621574364fec16418c4a1e5e1055248401054b", size = 4637126, upload-time = "2026-05-04T22:59:20.197Z" }, - { url = "https://files.pythonhosted.org/packages/f0/0a/7e226dbff530f21480727eb764973a7bff2b912f8e15cd4f129e71b56d1d/cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:0890f502ddf7d9c6426129c3f49f5c0a39278ed7cd6322c8755ffca6ee675a13", size = 4667270, upload-time = "2026-05-04T22:59:22.647Z" }, - { url = "https://files.pythonhosted.org/packages/3b/f2/5a72274ca9f1b2a8b44a662ee0bf1b435909deb473d6f97bcd035bcdbc71/cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:ecde28a596bead48b0cfd2a1b4416c3d43074c2d785e3a398d7ec1fc4d0f7fbb", size = 4636797, upload-time = "2026-05-04T22:59:24.912Z" }, - { url = "https://files.pythonhosted.org/packages/b4/e1/48cedb2fe63626e91ded1edad159e2a4fb8b6906c4425eb7749673077ce7/cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:4defde8685ae324a9eb9d818717e93b4638ef67070ac9bc15b8ca85f63048355", size = 4666800, upload-time = "2026-05-04T22:59:27.474Z" }, - { url = "https://files.pythonhosted.org/packages/a2/ca/7e8365deec19afb2b2c7be7c1c0aa8f99633b54e90c570999acda93260fc/cryptography-48.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:db63bf618e5dea46c07de12e900fe1cdd2541e6dc9dbae772a70b7d4d4765f6a", size = 3739536, upload-time = "2026-05-04T22:59:29.61Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/12/45/870e7f4bef50e5f53b9f51d4428aee5290eedf58ba443f16b1ebb7ab8e66/cryptography-48.0.1.tar.gz", hash = "sha256:266f4ee051abb2f725b74ef8072b521ce1feacf685a3364fa6a6b45548db791a", size = 832989, upload-time = "2026-06-09T22:32:31.8Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/bc/ee4137cbbe105652c0ee4252792b78fc8e7afa4b8e61d9d5dc05a7f45731/cryptography-48.0.1-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:3e4a1a3232eef2e6c732827d5722db29a0cc8b27af2a4d865b094cf954be9ca1", size = 8008324, upload-time = "2026-06-09T22:31:00.702Z" }, + { url = "https://files.pythonhosted.org/packages/d5/85/6379d42181bfc713094f081360fc5784d6c816b599d45e7f082502d173ce/cryptography-48.0.1-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:32143b24adb918f078134e1e230f1eb8cc04886b92c28b5f0041aaf3e5699225", size = 4696243, upload-time = "2026-06-09T22:32:33.446Z" }, + { url = "https://files.pythonhosted.org/packages/9c/87/c85d147b53323c7eb4d850920c8901377323c2a0ff8d79c262d4fee89aa2/cryptography-48.0.1-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0d27a5696721ef7a672b8c810f6aded391058e0b9486e63e6d93baf765da691", size = 4713235, upload-time = "2026-06-09T22:31:40.141Z" }, + { url = "https://files.pythonhosted.org/packages/79/58/67cbf8cf1ee7c54b439ca07bbecf8362c07afc11a3724fea70f745784add/cryptography-48.0.1-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eb86ce1af36fe65041b6db9a8bb064ee621a7e5fded0f80d475ec243477cd242", size = 4702323, upload-time = "2026-06-09T22:31:42.191Z" }, + { url = "https://files.pythonhosted.org/packages/89/c6/24266ac10c47f6cd2a865f4446062b466da1d1f10b27189eac00e61bf0c9/cryptography-48.0.1-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:b024e784ad6c077ee0147b35ea9cbfc1e34e1fd4c1dcca214c2794d73a12df08", size = 5300085, upload-time = "2026-06-09T22:31:58.703Z" }, + { url = "https://files.pythonhosted.org/packages/d2/bb/cc4b78784f97efc8c5874c2a9743708d172be6663024b34a0467885ae0c8/cryptography-48.0.1-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3752f2dbc8f07a30aad2932c986cea495b03bb554887828225da104f732852b6", size = 4746137, upload-time = "2026-06-09T22:31:31.01Z" }, + { url = "https://files.pythonhosted.org/packages/1f/52/0c44de3f5267f8fbe8e835138017522a333436166e406f0db9b9e6e3033f/cryptography-48.0.1-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:bd81490cd5801d755cf97bb68ac191f14b708470b1c7cf4580f669b9c9264cd8", size = 4333867, upload-time = "2026-06-09T22:32:28.096Z" }, + { url = "https://files.pythonhosted.org/packages/9a/2e/772d7adbfa931537bc401640b7cac9976bff689bda187833e5d63b428e49/cryptography-48.0.1-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:66fd0771e7b9c6dcd44cf1120690d2338d16d72795cf40cae2786a39eba65429", size = 4701805, upload-time = "2026-06-09T22:31:38.284Z" }, + { url = "https://files.pythonhosted.org/packages/f8/a3/b06844f303873493c963caf581c04df31c7035e0c1b0f02c4814d319ec80/cryptography-48.0.1-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:3fd2ca57062b241c856670b073487d2e86c4637937ca5601e48f97bf8e11fc8f", size = 5258461, upload-time = "2026-06-09T22:31:04.187Z" }, + { url = "https://files.pythonhosted.org/packages/9f/13/8b765e2e12b07c74941caadb9d1c8fdc006c4dfbf2b8f2d610519758954d/cryptography-48.0.1-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:0ee6ea481db1ab889cba043ec1eda17bb9c1ea79db6722f779c3667f9f70322f", size = 4745488, upload-time = "2026-06-09T22:32:30.07Z" }, + { url = "https://files.pythonhosted.org/packages/2e/aa/48972bce55049b32a94f4907eda4d75fa385aad8a39506cc2fc72196ecf0/cryptography-48.0.1-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f2ceef93cb096aa3c4cc4b5c94ca6131f9196d28c64d6111533402a9b2054d41", size = 4830256, upload-time = "2026-06-09T22:31:43.868Z" }, + { url = "https://files.pythonhosted.org/packages/47/a2/e5079a032fb85cf6005046ca92bbd78b0c82dad2b5751ab8c311659da06f/cryptography-48.0.1-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9bd3f92d76217892b15df84ca256c2c113d386fdda7a7d8691aeeced976507c6", size = 4979117, upload-time = "2026-06-09T22:31:05.845Z" }, + { url = "https://files.pythonhosted.org/packages/b7/a0/8f50cae9c74e718ed769d63ed5c74bd0ea830c9550a74629cebd1b9c7bc7/cryptography-48.0.1-cp311-abi3-win32.whl", hash = "sha256:b9a32b876490d66c8bcc9963ef220199569748434ab01a9d6aaeabf88e7f5158", size = 3304154, upload-time = "2026-06-09T22:32:16.845Z" }, + { url = "https://files.pythonhosted.org/packages/c5/69/0572c77dbace6fef72f33755bd52ea399c71367250d366237f8691826b9e/cryptography-48.0.1-cp311-abi3-win_amd64.whl", hash = "sha256:39489bfca54c7a1f6b297efcd8bc608ab92d16c4ca631b0cad4da46724588b24", size = 3817138, upload-time = "2026-06-09T22:32:00.388Z" }, + { url = "https://files.pythonhosted.org/packages/42/06/3e768b4c3bc78201583fa35a0e18f640dd782ff41afba88f8545481a8874/cryptography-48.0.1-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:f817adc181390bd54f2f700107a7419040fb7c1bdf2fc26f36551a06a68c3345", size = 7989830, upload-time = "2026-06-09T22:31:07.8Z" }, + { url = "https://files.pythonhosted.org/packages/8a/13/6476736484b94041110c8340a3eb63962fea4975baea8cb4a512adb44d4d/cryptography-48.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d5d30989c6917b478b5817902e85fddaea2261efa8648383d965381ccb9e1ac4", size = 4689201, upload-time = "2026-06-09T22:31:09.745Z" }, + { url = "https://files.pythonhosted.org/packages/79/62/65a87f34d2a431546e2509b85d55e8c90df86d668f6731da64d538512ac2/cryptography-48.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:df637c05205ea7c1d7fbcbe54bbfea648a52951155f997af13d895d0ecc96991", size = 4702822, upload-time = "2026-06-09T22:32:24.409Z" }, + { url = "https://files.pythonhosted.org/packages/7f/59/810b5204b0a9b10f4b6bc06bd551a8b609803cd931806bc3b71884b225e5/cryptography-48.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:869c3b8a53bfe27147832df48b32adadf558249d50e76cb3769d40e986b13265", size = 4694875, upload-time = "2026-06-09T22:32:08.737Z" }, + { url = "https://files.pythonhosted.org/packages/24/dc/d8ca05ffea724eec6d232ea6f18e74c269eb6bdfdcc9bfba689790d1325f/cryptography-48.0.1-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:e361afba8918070d376df76f408a4f67fec0ee9cff81a99e48fe9a233ef59e17", size = 5290385, upload-time = "2026-06-09T22:31:15.212Z" }, + { url = "https://files.pythonhosted.org/packages/03/8c/3be6cb4da181f5bb6c19cf560c2359d60644a6b5fc5b57854e528f47b296/cryptography-48.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:d069066deead00ac7f090be101be875a06855908f7ec004c27b8fefb4acfb411", size = 4737082, upload-time = "2026-06-09T22:32:22.66Z" }, + { url = "https://files.pythonhosted.org/packages/aa/f6/d5f60a5a1434dbfd949e227fd0065d194c7e6b6ac526b17f5c06152b8231/cryptography-48.0.1-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:09f73a725d582cef64b91281a322cd798d14a33b2b6f2b7ad9531dc336d84c02", size = 4325328, upload-time = "2026-06-09T22:32:10.777Z" }, + { url = "https://files.pythonhosted.org/packages/17/b7/ba75dd947a14b6ad907b01ae8f6b5b348cdd1b48142f0063dee9e20c1d9d/cryptography-48.0.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:15254441469dd6bf027039453288e2072124f8b6603563f5d759e1c9b69273fa", size = 4694530, upload-time = "2026-06-09T22:31:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/62/29/50d6b9e8aff12d8b67afaeb3569335e32dc83a5723e3bbded24fdac9f809/cryptography-48.0.1-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:8ace4507d1e6533c125f4fac754f8bb8b6a74c08e92179dabd7e16571a3efbf3", size = 5245046, upload-time = "2026-06-09T22:31:25.774Z" }, + { url = "https://files.pythonhosted.org/packages/9f/04/618f4115cfc0add0838c82507aa18a346089428da8653ad38b3ff36f5cb3/cryptography-48.0.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:b4e391975f038e66432328639620a4aff2d307513b004f1ca06d6225bced815c", size = 4736660, upload-time = "2026-06-09T22:32:12.676Z" }, + { url = "https://files.pythonhosted.org/packages/24/9c/06e062462a0de28a3b3911322eded4c16deb9f441b1b7575d3dc59488ab5/cryptography-48.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42fcd8e26fe555d9b3577a135f5091fefa0aa4e99129c23fb56787a1bd4ada72", size = 4822229, upload-time = "2026-06-09T22:31:17.062Z" }, + { url = "https://files.pythonhosted.org/packages/f4/be/0561971eaaee4b8a0e7d5113c536921063ab91aaf23278ac374eaf881e11/cryptography-48.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c1400da5e32a43253392277eac7490a60e497d810a63dd5608d71bbd7af507c9", size = 4966364, upload-time = "2026-06-09T22:31:32.842Z" }, + { url = "https://files.pythonhosted.org/packages/a4/27/728c77876f12b000820b69ae490f3c4083775e79e07827e9e60be07ad209/cryptography-48.0.1-cp314-cp314t-win32.whl", hash = "sha256:0df56b056bc17c1b7d6821dfa65216e62bd232d8ab05eb3db44e71d235651471", size = 3278498, upload-time = "2026-06-09T22:31:29.154Z" }, + { url = "https://files.pythonhosted.org/packages/06/e3/79a612c6d7b1e6ee0edd43633d53035bec2cfb78c82b76f7864f39e36f34/cryptography-48.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:9de21387aa95e2a895823d0745b430bed4f33503ba9ab5e0b5311f33e37d66d2", size = 3798790, upload-time = "2026-06-09T22:31:56.697Z" }, + { url = "https://files.pythonhosted.org/packages/ca/6c/00fa2a95997164c8b2072ce327c23d4ab20809ccc323ea5fab91e53a4bba/cryptography-48.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:4fdc69f8e4316bcf0c8c8ec1f26f285d12e8142d88d96c876a59a03be3f6ae67", size = 7987408, upload-time = "2026-06-09T22:32:20.777Z" }, + { url = "https://files.pythonhosted.org/packages/b0/d9/45f309a7e4e5f3f8f121d6d3be9e94024a7726ec598d6e08ae04edb2f04d/cryptography-48.0.1-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48fe40804d4caa2288f24e70ca8c64c42dd826da0ad7e4f1b41b2128d679e6c8", size = 4690196, upload-time = "2026-06-09T22:31:54.74Z" }, + { url = "https://files.pythonhosted.org/packages/5f/9f/a1bc8bcc798811b8527eb374bbccf30a3f3e806829d967118222bf1125eb/cryptography-48.0.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:86be3b1b0b6bf09482fb50a979c508d2950ed95f5621ec77f4e385962006b83a", size = 4696782, upload-time = "2026-06-09T22:31:45.615Z" }, + { url = "https://files.pythonhosted.org/packages/66/c2/81a4fb4e4373c500bb526bc337ac5719dd31dd15b970b84a238168c6aa08/cryptography-48.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:4ab0a343c807bbcd90c971cd1ecf072937cd01847a9e002bef88fb47ac6be577", size = 4696618, upload-time = "2026-06-09T22:31:11.564Z" }, + { url = "https://files.pythonhosted.org/packages/e5/0b/aa68b221dde92d09cb29a024ede17550ee21e77a404e59fc093c82bb51e1/cryptography-48.0.1-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9621de99d2da096006b629979efd8ae7eb2d8b822488d0c89ee4000c306c59b1", size = 5289970, upload-time = "2026-06-09T22:31:20.368Z" }, + { url = "https://files.pythonhosted.org/packages/78/13/fba657f958d2af66ea959a4ba01212632089249d34af1ae48054136344d7/cryptography-48.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:88c852a0ae366e262e5a1744b685e6a433dc8788dd2a277e418bf4904203609d", size = 4731873, upload-time = "2026-06-09T22:31:22.253Z" }, + { url = "https://files.pythonhosted.org/packages/4c/4c/9a964756d24a26b3e34dfcb16f961b89838786e6700b635b0d1e3adff4b6/cryptography-48.0.1-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:43c5835e2cb98c8733d86f57d6fc879b613f5c3478607281c3e36daffc6dd8a6", size = 4330804, upload-time = "2026-06-09T22:31:36.56Z" }, + { url = "https://files.pythonhosted.org/packages/4b/0f/a10f3a6eb12950a10e3a874070283aa2dd5875b2bfd15fad8a3e17b3f13e/cryptography-48.0.1-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:fe0180af5bf9236518a087e35bf2d9a347d5f5f51e63c579d683ddff424e3d46", size = 4696217, upload-time = "2026-06-09T22:31:13.351Z" }, + { url = "https://files.pythonhosted.org/packages/f3/6f/5cd12f951165ea73ef85266775d97e4c763b2474ccfd816dd69d3a18d6f8/cryptography-48.0.1-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:b7a2d1a937a738a881737cec135a38bb61470589b17515b9f73f571d0ae10401", size = 5245252, upload-time = "2026-06-09T22:32:02.193Z" }, + { url = "https://files.pythonhosted.org/packages/68/ab/8aaa12e4516ec4464033ab79b6f3b592bd5a92102467c4ace8a0d970203f/cryptography-48.0.1-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:b74ca3b8e5ecdd833bf6a002ca41b4793bb27fb8f1c06ffaf2643c9e9140e31b", size = 4731388, upload-time = "2026-06-09T22:32:04.019Z" }, + { url = "https://files.pythonhosted.org/packages/1b/24/50027ea4dca85ec1f40688f3c24fb32ccacd520583c9592c3cc95628e6fb/cryptography-48.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2c37f2461406063b417837f5f3daab668652acd82423efcd7f0a9f04be972de1", size = 4824186, upload-time = "2026-06-09T22:32:18.707Z" }, + { url = "https://files.pythonhosted.org/packages/52/41/04cb5eb17085ade6f50cc611fb657df6a0f5885350de8764ece89c050197/cryptography-48.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:86fe77abb1bd87afb251d4d02ada7ecf53a32cee9b67d976abb2e45a13297475", size = 4964539, upload-time = "2026-06-09T22:31:18.793Z" }, + { url = "https://files.pythonhosted.org/packages/36/bf/ed70785c496e89d7e73b7cda2d21f2447fd6d4e821714b8d04ff217fed92/cryptography-48.0.1-cp39-abi3-win32.whl", hash = "sha256:6b2c0c3e6ccf3ade7750f836ef3ee36eea250cc467d45c256895573ac08cc6f1", size = 3282307, upload-time = "2026-06-09T22:30:53.162Z" }, + { url = "https://files.pythonhosted.org/packages/b3/ff/371ea7d252656ee1eb6d83eeeef3d1d0c6baf1d6497687d081ea03814670/cryptography-48.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:9a49ca6c81417f6a5edb50375a60cccdd70fa0a91a5211829dbea74eba94d2ac", size = 3793408, upload-time = "2026-06-09T22:32:15.191Z" }, + { url = "https://files.pythonhosted.org/packages/a9/d3/eb4e394e587341fdad09a09101fa76478ead3a78b0ad63e55c22f0d75c02/cryptography-48.0.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:08a597acce1ff37f347400087776599e2348a3a8bc53b44120e463cd274efe4a", size = 3951747, upload-time = "2026-06-09T22:31:23.871Z" }, + { url = "https://files.pythonhosted.org/packages/e0/4a/3f43451b4f858bfceaaaffc649e6e787e8d4fb332a1d443af39ab02cc8f1/cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:735824ec41b7f74a7c45fb1591349333e4c696cb6c044e5f46356e560143e4cd", size = 4641226, upload-time = "2026-06-09T22:31:02.532Z" }, + { url = "https://files.pythonhosted.org/packages/73/4e/855584c2c23b09e4ce2d3b9c30e983e679cd60b068c513c6bbdb91e11782/cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:92a46e1d638daa264ba2971c0b0489c9409787943efae4d60ffda3d091ef832c", size = 4668958, upload-time = "2026-06-09T22:32:06.213Z" }, + { url = "https://files.pythonhosted.org/packages/42/3b/d35750e41d803d1e516fd6d6011f065424924da7af1748cef4cc9cb3ede1/cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:7e234ac052af99f2700826a5c29ea99d9c1b1f80341cde62d11c8154dc8e0bd9", size = 4640793, upload-time = "2026-06-09T22:32:26.331Z" }, + { url = "https://files.pythonhosted.org/packages/ca/aa/cdb7181fe865285e87e96825aaab239400f1de0c3bfba9bd9769b79f1a92/cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:33842cf0888951cef5bc7ac724ab844a42044c1727b967b7f8997289a0464f92", size = 4668505, upload-time = "2026-06-09T22:31:27.534Z" }, + { url = "https://files.pythonhosted.org/packages/5d/8c/ce3823c06c2804f194f9e64f0d67fa3f4094a39f2bb1a990cd03603af8fc/cryptography-48.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6184ca7b174f28d7c703f1290d4b297217c45355f77a98f67e9b7f14549ac54a", size = 3742204, upload-time = "2026-06-09T22:31:34.773Z" }, ] [[package]] @@ -682,11 +682,11 @@ wheels = [ [[package]] name = "filelock" -version = "3.29.1" +version = "3.29.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1f/f9/f38573ed5844586db374d085911740a501ccfa373b455fc9413f09f85237/filelock-3.29.1.tar.gz", hash = "sha256:d97e6b1b9757569626c58caa07dc4beb1613f4a2938b1e8cc81afca398906c9e", size = 59335, upload-time = "2026-06-03T15:19:04.053Z" } +sdist = { url = "https://files.pythonhosted.org/packages/91/f5/3557bf28e0f1943e4849154c821533706e6dea010f96fb6aa0b6949037d1/filelock-3.29.3.tar.gz", hash = "sha256:7fc1b3f39cf172fd8203812043c57b8a65aef9969f38b6704f628b881f761a84", size = 61956, upload-time = "2026-06-10T17:37:11.832Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/a0/614c5fe402fd88951df45f4dda2fa3b4e17a99ecd92340771929169b3b95/filelock-3.29.1-py3-none-any.whl", hash = "sha256:85199dfd706869641b72b2e8955d5416a4b2b7dc4b0e8e6d97b4cc1299a6983b", size = 40750, upload-time = "2026-06-03T15:19:02.959Z" }, + { url = "https://files.pythonhosted.org/packages/81/8f/b61d427c4f49a8bdadc93f4e7e74df8a6df6f77ee6e26bf0df53d3925363/filelock-3.29.3-py3-none-any.whl", hash = "sha256:e58333029cc9b925f39aad59b1d8f0a1ad836af4e60d7217f4a4dba87461261d", size = 42324, upload-time = "2026-06-10T17:37:10.37Z" }, ] [[package]] @@ -2407,16 +2407,16 @@ wheels = [ [[package]] name = "readme-renderer" -version = "44.0" +version = "45.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "docutils" }, { name = "nh3" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/a9/104ec9234c8448c4379768221ea6df01260cd6c2ce13182d4eac531c8342/readme_renderer-44.0.tar.gz", hash = "sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1", size = 32056, upload-time = "2024-07-08T15:00:57.805Z" } +sdist = { url = "https://files.pythonhosted.org/packages/02/51/d3a6ea424652c60f05600d8c2e01a55c913755e7cdad64afabbd1aa16f44/readme_renderer-45.0.tar.gz", hash = "sha256:030a8fac74904f8fba11ad1bb6964e3f76e896dc7e5e71f16af190c9056696d1", size = 36172, upload-time = "2026-06-09T21:05:17.37Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e1/67/921ec3024056483db83953ae8e48079ad62b92db7880013ca77632921dd0/readme_renderer-44.0-py3-none-any.whl", hash = "sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151", size = 13310, upload-time = "2024-07-08T15:00:56.577Z" }, + { url = "https://files.pythonhosted.org/packages/97/1b/295bf2fa3e740131778065e5ffa2c481f0e7210182d408e9a2c244ff5b0c/readme_renderer-45.0-py3-none-any.whl", hash = "sha256:3385ed220117104a2bceb4a9dac8c5fdf6d1f96890d7ea2a9c7174fd5c84091f", size = 14134, upload-time = "2026-06-09T21:05:15.85Z" }, ] [[package]] From 66aa8abec1e02b34c2f07e2ba9eb3fbaac687f15 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 10 Jun 2026 23:51:57 +0500 Subject: [PATCH 230/318] feat(html): name the top memory consumer in the runtime summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turn the Peak RSS metric into a conclusion: the reader records AggregatesView.peak_memory_span (the span with the largest rss_delta) and the cockpit highlight names it — "Top memory consumer: in · %" — so the peak points at who took the memory instead of leaving the reader to do the math. --- codeclone/observability/render_html.py | 16 ++++++++++++++- codeclone/observability/store/reader.py | 5 +++++ codeclone/observability/views.py | 1 + tests/test_observability_reader.py | 5 +++++ tests/test_observability_render.py | 27 +++++++++++++++++++++++++ 5 files changed, 53 insertions(+), 1 deletion(-) diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index 49370d87..36ce6d66 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -334,7 +334,21 @@ def _highlights(agg: AggregatesView) -> str: _ms(span.duration_ms), ) ) - if agg.max_rss_delta_mb is not None: + if agg.peak_memory_span is not None and agg.max_rss_delta_mb: + # Name who took the memory, not just how much — the metric becomes a + # conclusion ("X grew the RSS", with its share of the peak). + peak = agg.peak_memory_span + share = round((peak.rss_delta_mb or 0.0) / agg.max_rss_delta_mb * 100) + rows.append( + _lead_row( + "Top memory consumer", + f"{_surface_badge(peak.surface)}" + f'{_esc(peak.name)}' + f'in {_esc(peak.operation_name)}', + f"{_mb(peak.rss_delta_mb)} · {share}%", + ) + ) + elif agg.max_rss_delta_mb is not None: rows.append( _lead_row( "Peak memory Δ", diff --git a/codeclone/observability/store/reader.py b/codeclone/observability/store/reader.py index 7accf597..7ba4e3d9 100644 --- a/codeclone/observability/store/reader.py +++ b/codeclone/observability/store/reader.py @@ -287,6 +287,10 @@ def _aggregates( key=lambda s: (-s.duration_ms, s.operation_id, s.span_id), ) semantic_costs = tuple(s for s in span_costs if s.surface == "memory") + memory_ranked = sorted( + (s for s in span_costs if s.rss_delta_mb is not None), + key=lambda s: (-(s.rss_delta_mb or 0.0), s.operation_id, s.span_id), + ) return AggregatesView( operation_count=len(flat), slowest=slowest, @@ -297,6 +301,7 @@ def _aggregates( mcp_tools=_mcp_tool_aggregates(flat), slowest_span=span_costs[0] if span_costs else None, semantic_costs=semantic_costs[:_SEMANTIC_COST_LIMIT], + peak_memory_span=memory_ranked[0] if memory_ranked else None, ) diff --git a/codeclone/observability/views.py b/codeclone/observability/views.py index 1f7daf9b..56b75226 100644 --- a/codeclone/observability/views.py +++ b/codeclone/observability/views.py @@ -96,6 +96,7 @@ class AggregatesView: mcp_tools: tuple[McpToolAggregate, ...] = () slowest_span: SpanCostView | None = None semantic_costs: tuple[SpanCostView, ...] = () + peak_memory_span: SpanCostView | None = None @dataclass(frozen=True, slots=True) diff --git a/tests/test_observability_reader.py b/tests/test_observability_reader.py index 259f20fd..6012d871 100644 --- a/tests/test_observability_reader.py +++ b/tests/test_observability_reader.py @@ -138,6 +138,11 @@ def test_build_trace_view_tree_and_aggregates(tmp_path: Path) -> None: assert job_row.offset_ms == 1000.0 assert rows[("memory.semantic.reindex", "span")].depth == 2 + # Top memory consumer: the reindex span carries the largest rss delta. + assert agg.peak_memory_span is not None + assert agg.peak_memory_span.name == "memory.semantic.reindex" + assert agg.peak_memory_span.rss_delta_mb == 6144.0 + def test_build_trace_view_focus_by_operation_id(tmp_path: Path) -> None: _seed(tmp_path) diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index 93aaa0ba..afb2d609 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -186,6 +186,33 @@ def test_render_cockpit_sections() -> None: assert "469" in html +def test_render_peak_memory_contributor() -> None: + consumer = SpanCostView( + span_id="s", + name="memory.semantic.reindex", + surface="memory", + operation_id="W", + operation_name="memory.projection.job", + duration_ms=1700.0, + rss_delta_mb=480.0, + ) + trace = TraceView( + schema_version="1.0", + window_started_at_utc="t", + window_ended_at_utc="t", + aggregates=AggregatesView( + operation_count=1, + max_rss_delta_mb=600.0, + peak_memory_span=consumer, + ), + ) + html = render_trace_html(trace) + # The peak-memory highlight names the consumer + its share, not a bare number. + assert "Top memory consumer" in html + assert "memory.semantic.reindex" in html + assert "80%" in html # 480 / 600 = 80% + + def test_render_waterfall_timeline() -> None: group = WaterfallGroup( correlation_id="corr1234abcd", From 06edcc47493e601ebb837e7de518759430c4218a Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 12:20:39 +0500 Subject: [PATCH 231/318] feat(core): count SQL queries per observability span (29.DB) First slice of DB observability (performance-truth, not audit-truth): observability.runtime.record_db_query is a sqlite set_trace_callback that attributes each statement to the active span as a db_queries counter (plus db_writes for insert/update/delete/replace). instrument_db_connection registers it only when observability is enabled, so disabled processes pay zero per-query trace overhead; it is hooked into open_memory_db. The counter flows into the existing span counters, so "semantic.reindex is expensive" can now be read as "expensive due to N SQL reads". Resolves the add_counter forward-declared dead-code. Timing/rows/slow and the scattered raw-connect sites are later slices. --- codeclone/memory/schema.py | 8 ++++- codeclone/observability/__init__.py | 4 +++ codeclone/observability/runtime.py | 41 ++++++++++++++++++--- tests/test_observability_correlation.py | 47 +++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 5 deletions(-) diff --git a/codeclone/memory/schema.py b/codeclone/memory/schema.py index 5963288a..8015d79f 100644 --- a/codeclone/memory/schema.py +++ b/codeclone/memory/schema.py @@ -233,12 +233,18 @@ def open_memory_db(path: Path) -> sqlite3.Connection: # synchronous=FULL: every commit survives unclean process exit. # Memory records are few, each governance-governed and valuable. - return open_sqlite_db( + conn = open_sqlite_db( path, ensure_schema=ensure_schema, foreign_keys=True, synchronous="FULL", ) + # Performance telemetry only: count SQL per active observability span so the + # cockpit can attribute span cost to DB work. No-op when disabled. + from ..observability import instrument_db_connection + + instrument_db_connection(conn) + return conn def ensure_schema(conn: sqlite3.Connection) -> None: diff --git a/codeclone/observability/__init__.py b/codeclone/observability/__init__.py index 806f8daa..c1dab3ce 100644 --- a/codeclone/observability/__init__.py +++ b/codeclone/observability/__init__.py @@ -20,9 +20,11 @@ bind_root, bootstrap, current_operation_context, + instrument_db_connection, is_observability_enabled, operation, payload_capture_enabled, + record_db_query, shutdown, span, ) @@ -33,9 +35,11 @@ "bind_root", "bootstrap", "current_operation_context", + "instrument_db_connection", "is_observability_enabled", "operation", "payload_capture_enabled", + "record_db_query", "shutdown", "span", ] diff --git a/codeclone/observability/runtime.py b/codeclone/observability/runtime.py index 0694a77d..cc384167 100644 --- a/codeclone/observability/runtime.py +++ b/codeclone/observability/runtime.py @@ -15,6 +15,7 @@ from __future__ import annotations +import sqlite3 import time import uuid from collections.abc import Iterator @@ -139,10 +140,9 @@ def __init__( self._status = "ok" self._counters: dict[str, int] = {} - # set_counter is wired by the 29.10 worker instrumentation. add_counter and - # set_reason_kind stay forward-declared until a caller needs them - # (loop-accumulation counters; post-hoc reason classification, e.g. semantic). - # codeclone: ignore[dead-code] + # set_counter is wired by the 29.10 worker instrumentation; add_counter by + # the 29.DB query-trace hook (record_db_query). set_reason_kind stays + # forward-declared until a caller needs post-hoc reason classification. def add_counter(self, key: str, value: int = 1) -> None: self._counters[key] = self._counters.get(key, 0) + value @@ -407,15 +407,48 @@ def span( ) +_DB_WRITE_KINDS = frozenset({"insert", "update", "delete", "replace"}) + + +def _classify_sql(sql: str) -> str: + stripped = sql.lstrip() + if not stripped: + return "" + return stripped.split(None, 1)[0].lower() + + +def record_db_query(sql: str) -> None: + """Trace-callback sink: attribute one SQL statement to the active span as a + ``db_queries`` counter (plus ``db_writes`` for mutations). No-op outside a + span. Performance telemetry only — never audit or contract truth. + """ + span_handle = _CURRENT_SPAN.get() + if span_handle is None: + return + span_handle.add_counter("db_queries", 1) + if _classify_sql(sql) in _DB_WRITE_KINDS: + span_handle.add_counter("db_writes", 1) + + +def instrument_db_connection(conn: sqlite3.Connection) -> None: + """Attach the per-span DB-query counter to ``conn``. No-op (and no per-query + trace overhead) when observability is disabled for this process. + """ + if _ENABLED: + conn.set_trace_callback(record_db_query) + + __all__ = [ "OperationHandle", "SpanHandle", "bind_root", "bootstrap", "current_operation_context", + "instrument_db_connection", "is_observability_enabled", "operation", "payload_capture_enabled", + "record_db_query", "shutdown", "span", ] diff --git a/tests/test_observability_correlation.py b/tests/test_observability_correlation.py index ab202106..7124bec7 100644 --- a/tests/test_observability_correlation.py +++ b/tests/test_observability_correlation.py @@ -6,6 +6,8 @@ from __future__ import annotations +import json +import sqlite3 import subprocess from collections.abc import Iterator from pathlib import Path @@ -19,8 +21,10 @@ from codeclone.observability import ( bootstrap, current_operation_context, + instrument_db_connection, operation, shutdown, + span, ) from codeclone.observability.store.schema import ( observability_store_path, @@ -149,3 +153,46 @@ def test_mcp_analyze_repository_emits_pipeline_spans(tmp_path: Path) -> None: conn.close() names = {row[0] for row in rows} assert {"pipeline.discover", "pipeline.process", "pipeline.analyze"} <= names + + +def test_db_query_counter_attaches_to_active_span(tmp_path: Path) -> None: + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + try: + with ( + operation(name="memory.projection.job", surface="memory"), + span(name="memory.semantic.reindex"), + ): + conn = sqlite3.connect(":memory:") + instrument_db_connection(conn) + conn.execute("CREATE TABLE t (a INTEGER)") + conn.execute("INSERT INTO t VALUES (1)") + conn.execute("INSERT INTO t VALUES (2)") + conn.execute("SELECT * FROM t").fetchall() + conn.close() + finally: + shutdown() + + obs = open_observability_store(observability_store_path(tmp_path)) + try: + row = obs.execute( + "SELECT counters_json FROM platform_spans " + "WHERE name='memory.semantic.reindex'" + ).fetchone() + finally: + obs.close() + counters = json.loads(row[0]) if row and row[0] else {} + # create + 2 inserts + select all land on the active span; only the inserts + # are writes. + assert counters.get("db_queries", 0) >= 4 + assert counters.get("db_writes", 0) == 2 + + +def test_instrument_db_connection_is_inert_when_disabled() -> None: + # Disabled process: no trace callback, no counting, no error, zero overhead. + conn = sqlite3.connect(":memory:") + try: + instrument_db_connection(conn) + conn.execute("CREATE TABLE t (a)") + assert current_operation_context() is None + finally: + conn.close() From 710e87cbf62913fb3be7032228bf615be0ce8d51 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 12:29:58 +0500 Subject: [PATCH 232/318] feat(html): add DB cost table to the cockpit Aggregate the span-level db_queries/db_writes counters into a DB COST section (DbCostRow on AggregatesView.db_costs): per span class, the number of spans, total queries, total writes, queries-per-call (an N+1 signal) and the worst single instance. On real data this immediately shows memory.semantic.reindex is SQL-read-bound (1306 queries, 0 writes), turning "reindex is slow" into "reindex runs N queries". Op-level DB for spanless MCP operations is a later collection slice. --- codeclone/observability/render_html.py | 34 +++++++++++++++ codeclone/observability/store/reader.py | 24 +++++++++++ codeclone/observability/views.py | 18 ++++++++ tests/test_observability_reader.py | 56 +++++++++++++++++++++++++ tests/test_observability_render.py | 27 ++++++++++++ 5 files changed, 159 insertions(+) diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index 36ce6d66..32f67f52 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -25,6 +25,7 @@ from .views import ( AggregatesView, + DbCostRow, McpToolAggregate, OperationView, SpanCostView, @@ -571,6 +572,38 @@ def _waterfall(trace: TraceView) -> str: ) +def _db_row(row: DbCostRow) -> str: + per_call = round(row.total_queries / row.span_count) if row.span_count else 0 + return ( + f'{_esc(row.span_name)}' + f'{row.span_count}' + f'{row.total_queries}' + f'{row.total_writes}' + f'{per_call}' + f'{row.max_queries}' + ) + + +def _db_cost(agg: AggregatesView) -> str: + if not agg.db_costs: + return "" + rows = "".join(_db_row(row) for row in agg.db_costs) + headers = ( + ("Span", False), + ("Spans", True), + ("Queries", True), + ("Writes", True), + ("Q / call", True), + ("Max", True), + ) + return _section( + "DB cost", + _table(headers, rows), + subtitle="SQLite work per span (performance-truth) — a high Q/call is " + "N+1-shaped: many reads for little produced.", + ) + + def render_trace_html(trace: TraceView) -> str: """Render a ``TraceView`` as a self-contained, branded diagnosis cockpit.""" foot = f"CodeClone · platform observability · schema {_esc(trace.schema_version)}" @@ -584,6 +617,7 @@ def render_trace_html(trace: TraceView) -> str: + _waterfall(trace) + _chain(trace) + _semantic(trace.aggregates) + + _db_cost(trace.aggregates) + _mcp(trace.aggregates.mcp_tools) + f'

    {foot}

    ' + "
    " diff --git a/codeclone/observability/store/reader.py b/codeclone/observability/store/reader.py index 7ba4e3d9..8e4b12b2 100644 --- a/codeclone/observability/store/reader.py +++ b/codeclone/observability/store/reader.py @@ -23,6 +23,7 @@ from ...contracts import PLATFORM_OBSERVABILITY_SCHEMA_VERSION from ..views import ( AggregatesView, + DbCostRow, McpToolAggregate, OperationView, SpanCostView, @@ -259,6 +260,28 @@ def _mcp_tool_aggregates(flat: list[OperationView]) -> tuple[McpToolAggregate, . return tuple(aggregates) +def _db_costs(flat: list[OperationView]) -> tuple[DbCostRow, ...]: + grouped: dict[str, list[SpanView]] = defaultdict(list) + surface_of: dict[str, str] = {} + for op in flat: + for span in op.spans: + if "db_queries" in span.counters: + grouped[span.name].append(span) + surface_of.setdefault(span.name, op.surface) + rows = [ + DbCostRow( + span_name=name, + surface=surface_of[name], + span_count=len(spans), + total_queries=sum(s.counters.get("db_queries", 0) for s in spans), + total_writes=sum(s.counters.get("db_writes", 0) for s in spans), + max_queries=max(s.counters.get("db_queries", 0) for s in spans), + ) + for name, spans in grouped.items() + ] + return tuple(sorted(rows, key=lambda r: (-r.total_queries, r.span_name))) + + def _aggregates( flat: list[OperationView], spans_by_op: dict[str, tuple[SpanView, ...]] ) -> AggregatesView: @@ -302,6 +325,7 @@ def _aggregates( slowest_span=span_costs[0] if span_costs else None, semantic_costs=semantic_costs[:_SEMANTIC_COST_LIMIT], peak_memory_span=memory_ranked[0] if memory_ranked else None, + db_costs=_db_costs(flat), ) diff --git a/codeclone/observability/views.py b/codeclone/observability/views.py index 56b75226..6a65359c 100644 --- a/codeclone/observability/views.py +++ b/codeclone/observability/views.py @@ -85,6 +85,22 @@ class McpToolAggregate: p95_response_tokens: int = 0 +@dataclass(frozen=True, slots=True) +class DbCostRow: + """SQLite work attributed to a span class (performance-truth, not audit). + + Aggregated from span db_queries/db_writes counters; ``max_queries`` is the + worst single instance and ``queries`` ÷ a per-row productive count exposes + N+1-shaped access (many reads, little produced).""" + + span_name: str + surface: str + span_count: int + total_queries: int + total_writes: int + max_queries: int + + @dataclass(frozen=True, slots=True) class AggregatesView: operation_count: int @@ -97,6 +113,7 @@ class AggregatesView: slowest_span: SpanCostView | None = None semantic_costs: tuple[SpanCostView, ...] = () peak_memory_span: SpanCostView | None = None + db_costs: tuple[DbCostRow, ...] = () @dataclass(frozen=True, slots=True) @@ -142,6 +159,7 @@ class TraceView: __all__ = [ "AggregatesView", + "DbCostRow", "McpToolAggregate", "OperationView", "SpanCostView", diff --git a/tests/test_observability_reader.py b/tests/test_observability_reader.py index 6012d871..b91dc75b 100644 --- a/tests/test_observability_reader.py +++ b/tests/test_observability_reader.py @@ -221,3 +221,59 @@ def test_no_op_span_and_mcp_payload_percentiles(tmp_path: Path) -> None: assert costly.no_op is True assert costly.produced == 0 assert costly.skipped == 1423 + + +def test_db_costs_aggregate_per_span(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="W", + correlation_id="W", + surface="memory", + name="memory.projection.job", + started_at_utc="2026-06-09T00:00:01Z", + duration_ms=900.0, + status="ok", + spans=( + SpanRecord( + span_id="s1", + operation_id="W", + name="memory.semantic.reindex", + started_at_utc="2026-06-09T00:00:01Z", + duration_ms=800.0, + status="ok", + counters={"db_queries": 1306, "embedded": 9}, + ), + SpanRecord( + span_id="s2", + operation_id="W", + name="memory.experience.distill", + started_at_utc="2026-06-09T00:00:02Z", + duration_ms=20.0, + status="ok", + counters={"db_queries": 1875, "db_writes": 768}, + ), + ), + ), + ) + finally: + conn.close() + + read = open_observability_store_readonly(tmp_path) + assert read is not None + try: + trace = build_trace_view(read, correlation_id="W") + finally: + read.close() + + costs = trace.aggregates.db_costs + # Sorted by total queries desc: distill (1875) before reindex (1306). + assert costs[0].span_name == "memory.experience.distill" + assert costs[0].total_queries == 1875 + assert costs[0].total_writes == 768 + by_name = {row.span_name: row for row in costs} + assert by_name["memory.semantic.reindex"].total_queries == 1306 + assert by_name["memory.semantic.reindex"].total_writes == 0 + assert by_name["memory.semantic.reindex"].max_queries == 1306 diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index afb2d609..c687e5fc 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -21,6 +21,7 @@ from codeclone.observability.store.writer import write_operation from codeclone.observability.views import ( AggregatesView, + DbCostRow, McpToolAggregate, OperationView, SpanCostView, @@ -213,6 +214,32 @@ def test_render_peak_memory_contributor() -> None: assert "80%" in html # 480 / 600 = 80% +def test_render_db_cost() -> None: + trace = TraceView( + schema_version="1.0", + window_started_at_utc="t", + window_ended_at_utc="t", + aggregates=AggregatesView( + operation_count=1, + db_costs=( + DbCostRow( + span_name="memory.semantic.reindex", + surface="memory", + span_count=2, + total_queries=1306, + total_writes=0, + max_queries=1000, + ), + ), + ), + ) + html = render_trace_html(trace) + assert "DB cost" in html + assert "memory.semantic.reindex" in html + assert "1306" in html + assert "653" in html # 1306 / 2 queries per call + + def test_render_waterfall_timeline() -> None: group = WaterfallGroup( correlation_id="corr1234abcd", From 6281a36af2b540c51b2fc70686c771e0c8899d61 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 12:37:21 +0500 Subject: [PATCH 233/318] feat(html): add Agent context view (token/context pressure) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aggregate the already-collected MCP token economics into an Agent context cockpit section (AgentView on AggregatesView.agent): total context pressure (response tokens pushed back into the agent), total sent, call count, and a per-tool consumer ranking with each tool's share of context. On real data it surfaces get_relevant_memory as the dominant consumer (≈59% of context), answering "which tool eats the agent's context" — the per-call detail stays in the MCP tool matrix below it. --- codeclone/observability/render_html.py | 41 +++++++++++++++++++++++++ codeclone/observability/store/reader.py | 28 +++++++++++++++++ codeclone/observability/views.py | 25 +++++++++++++++ tests/test_observability_reader.py | 8 +++++ tests/test_observability_render.py | 37 ++++++++++++++++++++++ 5 files changed, 139 insertions(+) diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index 32f67f52..18331b25 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -24,6 +24,7 @@ from html import escape from .views import ( + AgentTokenRow, AggregatesView, DbCostRow, McpToolAggregate, @@ -572,6 +573,45 @@ def _waterfall(trace: TraceView) -> str: ) +def _agent_row(row: AgentTokenRow, total_response: int) -> str: + share = round(row.response_tokens / total_response * 100) if total_response else 0 + return ( + f'{_esc(row.name)}' + f'{row.calls}' + f'{_tokens(row.request_tokens)}' + f'{_tokens(row.response_tokens)}' + f'{share}%' + ) + + +def _agent(agg: AggregatesView) -> str: + view = agg.agent + if view is None: + return "" + cards = ( + '
    ' + + _stat(_tokens(view.response_tokens), "context pressure (tok)", "accent") + + _stat(_tokens(view.request_tokens), "sent (tok)") + + _stat(str(view.mcp_calls), "mcp calls") + + _stat(str(len(view.consumers)), "tools") + + "
    " + ) + rows = "".join(_agent_row(row, view.response_tokens) for row in view.consumers) + headers = ( + ("Tool", False), + ("Calls", True), + ("↑ tok", True), + ("↓ tok", True), + ("Context %", True), + ) + return _section( + "Agent context", + cards + _table(headers, rows), + subtitle="Tokens MCP tools push back into the agent's context — the real " + "per-call cost for an LLM. The top row is your biggest context consumer.", + ) + + def _db_row(row: DbCostRow) -> str: per_call = round(row.total_queries / row.span_count) if row.span_count else 0 return ( @@ -618,6 +658,7 @@ def render_trace_html(trace: TraceView) -> str: + _chain(trace) + _semantic(trace.aggregates) + _db_cost(trace.aggregates) + + _agent(trace.aggregates) + _mcp(trace.aggregates.mcp_tools) + f'

    {foot}

    ' + "" diff --git a/codeclone/observability/store/reader.py b/codeclone/observability/store/reader.py index 8e4b12b2..c3c38124 100644 --- a/codeclone/observability/store/reader.py +++ b/codeclone/observability/store/reader.py @@ -22,6 +22,8 @@ from ...contracts import PLATFORM_OBSERVABILITY_SCHEMA_VERSION from ..views import ( + AgentTokenRow, + AgentView, AggregatesView, DbCostRow, McpToolAggregate, @@ -260,6 +262,31 @@ def _mcp_tool_aggregates(flat: list[OperationView]) -> tuple[McpToolAggregate, . return tuple(aggregates) +def _agent_view(flat: list[OperationView]) -> AgentView | None: + mcp_ops = [op for op in flat if op.surface == "mcp"] + if not mcp_ops: + return None + grouped: dict[str, list[OperationView]] = defaultdict(list) + for op in mcp_ops: + grouped[op.name].append(op) + rows = [ + AgentTokenRow( + name=name, + calls=len(ops), + request_tokens=sum(op.request_tokens or 0 for op in ops), + response_tokens=sum(op.response_tokens or 0 for op in ops), + ) + for name, ops in grouped.items() + ] + rows.sort(key=lambda r: (-r.response_tokens, r.name)) + return AgentView( + mcp_calls=len(mcp_ops), + request_tokens=sum(row.request_tokens for row in rows), + response_tokens=sum(row.response_tokens for row in rows), + consumers=tuple(rows), + ) + + def _db_costs(flat: list[OperationView]) -> tuple[DbCostRow, ...]: grouped: dict[str, list[SpanView]] = defaultdict(list) surface_of: dict[str, str] = {} @@ -326,6 +353,7 @@ def _aggregates( semantic_costs=semantic_costs[:_SEMANTIC_COST_LIMIT], peak_memory_span=memory_ranked[0] if memory_ranked else None, db_costs=_db_costs(flat), + agent=_agent_view(flat), ) diff --git a/codeclone/observability/views.py b/codeclone/observability/views.py index 6a65359c..05af3d48 100644 --- a/codeclone/observability/views.py +++ b/codeclone/observability/views.py @@ -101,6 +101,28 @@ class DbCostRow: max_queries: int +@dataclass(frozen=True, slots=True) +class AgentTokenRow: + """One MCP tool's cumulative token economics across the window.""" + + name: str + calls: int + request_tokens: int + response_tokens: int + + +@dataclass(frozen=True, slots=True) +class AgentView: + """Agentic context economics: how many tokens MCP tools pushed back into + the agent's context (``response_tokens`` = context pressure), ranked by the + biggest consumer. Built only when MCP operations are present.""" + + mcp_calls: int = 0 + request_tokens: int = 0 + response_tokens: int = 0 + consumers: tuple[AgentTokenRow, ...] = () + + @dataclass(frozen=True, slots=True) class AggregatesView: operation_count: int @@ -114,6 +136,7 @@ class AggregatesView: semantic_costs: tuple[SpanCostView, ...] = () peak_memory_span: SpanCostView | None = None db_costs: tuple[DbCostRow, ...] = () + agent: AgentView | None = None @dataclass(frozen=True, slots=True) @@ -158,6 +181,8 @@ class TraceView: __all__ = [ + "AgentTokenRow", + "AgentView", "AggregatesView", "DbCostRow", "McpToolAggregate", diff --git a/tests/test_observability_reader.py b/tests/test_observability_reader.py index b91dc75b..ec7cf899 100644 --- a/tests/test_observability_reader.py +++ b/tests/test_observability_reader.py @@ -222,6 +222,14 @@ def test_no_op_span_and_mcp_payload_percentiles(tmp_path: Path) -> None: assert costly.produced == 0 assert costly.skipped == 1423 + # Agent context: the one MCP op contributes its response tokens. + agent = trace.aggregates.agent + assert agent is not None + assert agent.mcp_calls == 1 + assert agent.response_tokens == 469 + assert agent.consumers[0].name == "finish_controlled_change" + assert agent.consumers[0].response_tokens == 469 + def test_db_costs_aggregate_per_span(tmp_path: Path) -> None: conn = open_observability_store(observability_store_path(tmp_path)) diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index c687e5fc..413ed317 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -20,6 +20,8 @@ ) from codeclone.observability.store.writer import write_operation from codeclone.observability.views import ( + AgentTokenRow, + AgentView, AggregatesView, DbCostRow, McpToolAggregate, @@ -214,6 +216,41 @@ def test_render_peak_memory_contributor() -> None: assert "80%" in html # 480 / 600 = 80% +def test_render_agent_context() -> None: + trace = TraceView( + schema_version="1.0", + window_started_at_utc="t", + window_ended_at_utc="t", + aggregates=AggregatesView( + operation_count=2, + agent=AgentView( + mcp_calls=5, + request_tokens=300, + response_tokens=1000, + consumers=( + AgentTokenRow( + name="get_relevant_memory", + calls=4, + request_tokens=200, + response_tokens=800, + ), + AgentTokenRow( + name="finish_controlled_change", + calls=1, + request_tokens=100, + response_tokens=200, + ), + ), + ), + ), + ) + html = render_trace_html(trace) + assert "Agent context" in html + assert "context pressure" in html + assert "get_relevant_memory" in html + assert "80%" in html # 800 / 1000 context share for the top consumer + + def test_render_db_cost() -> None: trace = TraceView( schema_version="1.0", From b693f0e4c0a9e6436a45432e0f6a1e5529017892 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 12:48:21 +0500 Subject: [PATCH 234/318] feat(html): add Waste view (no-op + high-payload fix candidates) Consolidate already-collected waste signals into one ranked Waste cockpit section (WasteItem on AggregatesView.waste), placed under the runtime summary as the executive "what to fix" digest: costly no-op rebuild spans (produced nothing yet spent time/memory) and payload-heavy MCP tools (p95 response over a threshold), sorted by severity. On real data it flags get_relevant_memory as a high-payload candidate. Duplicate-work via dedupe_key is deferred (the field is not yet populated). The seed-based reader tests now share a _read_trace helper so the open->build->close boilerplate no longer fingerprints as a block clone. --- codeclone/observability/render_html.py | 24 +++++++ codeclone/observability/store/reader.py | 51 +++++++++++++- codeclone/observability/views.py | 15 +++++ tests/test_observability_reader.py | 88 ++++++++++++++++++++----- tests/test_observability_render.py | 34 ++++++++++ 5 files changed, 193 insertions(+), 19 deletions(-) diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index 18331b25..c399e26d 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -32,6 +32,7 @@ SpanCostView, SpanView, TraceView, + WasteItem, WaterfallGroup, WaterfallRow, ) @@ -380,6 +381,28 @@ def _summary(trace: TraceView) -> str: return _section("Runtime summary", cards + _highlights(agg)) +def _waste_row(item: WasteItem) -> str: + return ( + '' + f'{_esc(item.kind)}' + f'{_surface_badge(item.surface)} {_esc(item.subject)}' + f'{_esc(item.detail)}' + ) + + +def _waste_section(agg: AggregatesView) -> str: + if not agg.waste: + return "" + rows = "".join(_waste_row(item) for item in agg.waste) + headers = (("Kind", False), ("What", False), ("Cost", False)) + return _section( + "Waste", + _table(headers, rows), + subtitle="Resources spent without payoff — no-op rebuilds and " + "payload-heavy calls. Ranked fix candidates.", + ) + + def _op_lineage(op: OperationView) -> list[OperationView]: flat = [op] for child in op.children: @@ -654,6 +677,7 @@ def render_trace_html(trace: TraceView) -> str: f'
    ' + _header(trace) + _summary(trace) + + _waste_section(trace.aggregates) + _waterfall(trace) + _chain(trace) + _semantic(trace.aggregates) diff --git a/codeclone/observability/store/reader.py b/codeclone/observability/store/reader.py index c3c38124..cd47e50f 100644 --- a/codeclone/observability/store/reader.py +++ b/codeclone/observability/store/reader.py @@ -31,6 +31,7 @@ SpanCostView, SpanView, TraceView, + WasteItem, WaterfallGroup, WaterfallRow, ) @@ -43,6 +44,12 @@ _PRODUCTIVE_COUNTER_KEYS = ("embedded", "workflows_seen", "experiences_distilled") _SEMANTIC_COST_LIMIT = 8 +# Waste thresholds: a no-op span is only worth flagging once it has spent time; +# an MCP response is "heavy" past these payload sizes. +_WASTE_NOOP_MS = 50.0 +_HIGH_PAYLOAD_BYTES = 16 * 1024 +_HIGH_PAYLOAD_TOKENS = 4000 + def open_observability_store_readonly(root: Path) -> sqlite3.Connection | None: """Open the store read-only, or None when it does not exist yet.""" @@ -262,6 +269,46 @@ def _mcp_tool_aggregates(flat: list[OperationView]) -> tuple[McpToolAggregate, . return tuple(aggregates) +def _waste( + semantic_costs: tuple[SpanCostView, ...], + mcp_tools: tuple[McpToolAggregate, ...], +) -> tuple[WasteItem, ...]: + items: list[WasteItem] = [] + for span in semantic_costs: + if span.no_op and span.duration_ms >= _WASTE_NOOP_MS: + rss = ( + f", +{span.rss_delta_mb:.0f} MB" + if span.rss_delta_mb and span.rss_delta_mb >= 1 + else "" + ) + items.append( + WasteItem( + kind="no-op", + subject=span.name, + surface=span.surface, + detail=f"ran {span.duration_ms:.0f}ms{rss}, skipped {span.skipped}", + severity=span.duration_ms, + ) + ) + items.extend( + WasteItem( + kind="high payload", + subject=tool.name, + surface="mcp", + detail=( + f"p95 {tool.p95_response_bytes / 1024:.0f} KB resp · " + f"{tool.p95_response_tokens} tok" + ), + severity=float(tool.p95_response_bytes), + ) + for tool in mcp_tools + if tool.p95_response_bytes >= _HIGH_PAYLOAD_BYTES + or tool.p95_response_tokens >= _HIGH_PAYLOAD_TOKENS + ) + items.sort(key=lambda w: (-w.severity, w.kind, w.subject)) + return tuple(items) + + def _agent_view(flat: list[OperationView]) -> AgentView | None: mcp_ops = [op for op in flat if op.surface == "mcp"] if not mcp_ops: @@ -341,6 +388,7 @@ def _aggregates( (s for s in span_costs if s.rss_delta_mb is not None), key=lambda s: (-(s.rss_delta_mb or 0.0), s.operation_id, s.span_id), ) + mcp_tools = _mcp_tool_aggregates(flat) return AggregatesView( operation_count=len(flat), slowest=slowest, @@ -348,12 +396,13 @@ def _aggregates( max_rss_delta_mb=max(rss) if rss else None, anomaly_count=0, unknown_expensive_rebuild_count=unknown, - mcp_tools=_mcp_tool_aggregates(flat), + mcp_tools=mcp_tools, slowest_span=span_costs[0] if span_costs else None, semantic_costs=semantic_costs[:_SEMANTIC_COST_LIMIT], peak_memory_span=memory_ranked[0] if memory_ranked else None, db_costs=_db_costs(flat), agent=_agent_view(flat), + waste=_waste(semantic_costs, mcp_tools), ) diff --git a/codeclone/observability/views.py b/codeclone/observability/views.py index 05af3d48..c3073c00 100644 --- a/codeclone/observability/views.py +++ b/codeclone/observability/views.py @@ -123,6 +123,19 @@ class AgentView: consumers: tuple[AgentTokenRow, ...] = () +@dataclass(frozen=True, slots=True) +class WasteItem: + """One ranked "fix candidate": resources spent without payoff — a no-op + rebuild that ran but produced nothing, or a payload-heavy call. ``severity`` + is the descending sort key (magnitude of the wasted cost).""" + + kind: str + subject: str + surface: str + detail: str + severity: float = 0.0 + + @dataclass(frozen=True, slots=True) class AggregatesView: operation_count: int @@ -137,6 +150,7 @@ class AggregatesView: peak_memory_span: SpanCostView | None = None db_costs: tuple[DbCostRow, ...] = () agent: AgentView | None = None + waste: tuple[WasteItem, ...] = () @dataclass(frozen=True, slots=True) @@ -190,6 +204,7 @@ class TraceView: "SpanCostView", "SpanView", "TraceView", + "WasteItem", "WaterfallGroup", "WaterfallRow", ] diff --git a/tests/test_observability_reader.py b/tests/test_observability_reader.py index ec7cf899..8218887e 100644 --- a/tests/test_observability_reader.py +++ b/tests/test_observability_reader.py @@ -23,6 +23,7 @@ open_observability_store, ) from codeclone.observability.store.writer import write_operation +from codeclone.observability.views import TraceView def _seed(tmp_path: Path) -> None: @@ -80,18 +81,22 @@ def _seed(tmp_path: Path) -> None: conn.close() +def _read_trace(tmp_path: Path, *, correlation_id: str) -> TraceView: + read = open_observability_store_readonly(tmp_path) + assert read is not None + try: + return build_trace_view(read, correlation_id=correlation_id) + finally: + read.close() + + def test_open_readonly_missing_store_returns_none(tmp_path: Path) -> None: assert open_observability_store_readonly(tmp_path) is None def test_build_trace_view_tree_and_aggregates(tmp_path: Path) -> None: _seed(tmp_path) - read = open_observability_store_readonly(tmp_path) - assert read is not None - try: - trace = build_trace_view(read, correlation_id="A") - finally: - read.close() + trace = _read_trace(tmp_path, correlation_id="A") assert trace.schema_version == PLATFORM_OBSERVABILITY_SCHEMA_VERSION assert len(trace.operation_tree) == 1 @@ -203,12 +208,7 @@ def test_no_op_span_and_mcp_payload_percentiles(tmp_path: Path) -> None: finally: conn.close() - read = open_observability_store_readonly(tmp_path) - assert read is not None - try: - trace = build_trace_view(read, correlation_id="M") - finally: - read.close() + trace = _read_trace(tmp_path, correlation_id="M") tool = trace.aggregates.mcp_tools[0] assert tool.p95_request_bytes == 51 @@ -269,12 +269,7 @@ def test_db_costs_aggregate_per_span(tmp_path: Path) -> None: finally: conn.close() - read = open_observability_store_readonly(tmp_path) - assert read is not None - try: - trace = build_trace_view(read, correlation_id="W") - finally: - read.close() + trace = _read_trace(tmp_path, correlation_id="W") costs = trace.aggregates.db_costs # Sorted by total queries desc: distill (1875) before reindex (1306). @@ -285,3 +280,60 @@ def test_db_costs_aggregate_per_span(tmp_path: Path) -> None: assert by_name["memory.semantic.reindex"].total_queries == 1306 assert by_name["memory.semantic.reindex"].total_writes == 0 assert by_name["memory.semantic.reindex"].max_queries == 1306 + + +def test_waste_ranks_no_op_and_high_payload(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="M", + correlation_id="M", + surface="mcp", + name="get_relevant_memory", + started_at_utc="2026-06-09T00:00:00Z", + duration_ms=200.0, + status="ok", + response_bytes=20480, + response_tokens=11000, + ), + ) + write_operation( + conn, + OperationRecord( + operation_id="W", + correlation_id="M", + surface="memory", + name="memory.projection.job", + started_at_utc="2026-06-09T00:00:01Z", + duration_ms=850.0, + status="ok", + parent_operation_id="M", + spans=( + SpanRecord( + span_id="s", + operation_id="W", + name="memory.semantic.reindex", + started_at_utc="2026-06-09T00:00:01Z", + duration_ms=800.0, + status="ok", + counters={"embedded": 0, "skipped_unchanged": 826}, + ), + ), + ), + ) + finally: + conn.close() + + trace = _read_trace(tmp_path, correlation_id="M") + + waste = trace.aggregates.waste + assert {w.kind for w in waste} == {"no-op", "high payload"} + noop = next(w for w in waste if w.kind == "no-op") + assert noop.subject == "memory.semantic.reindex" + assert "skipped 826" in noop.detail + high = next(w for w in waste if w.kind == "high payload") + assert high.subject == "get_relevant_memory" + # High payload (20 KB) outranks the no-op span (800 ms) by severity. + assert waste[0].kind == "high payload" diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index 413ed317..e2bc3293 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -29,6 +29,7 @@ SpanCostView, SpanView, TraceView, + WasteItem, WaterfallGroup, WaterfallRow, ) @@ -251,6 +252,39 @@ def test_render_agent_context() -> None: assert "80%" in html # 800 / 1000 context share for the top consumer +def test_render_waste_section() -> None: + trace = TraceView( + schema_version="1.0", + window_started_at_utc="t", + window_ended_at_utc="t", + aggregates=AggregatesView( + operation_count=1, + waste=( + WasteItem( + kind="high payload", + subject="get_relevant_memory", + surface="mcp", + detail="p95 20 KB resp · 11000 tok", + severity=20480.0, + ), + WasteItem( + kind="no-op", + subject="memory.semantic.reindex", + surface="memory", + detail="ran 800ms, skipped 826", + severity=800.0, + ), + ), + ), + ) + html = render_trace_html(trace) + assert "Waste" in html + assert "no-op" in html + assert "high payload" in html + assert "skipped 826" in html + assert "get_relevant_memory" in html + + def test_render_db_cost() -> None: trace = TraceView( schema_version="1.0", From 022a4485aaa04d76a62af98464241225c251bcb8 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 13:08:46 +0500 Subject: [PATCH 235/318] feat(html): surface CPU cost and a pipeline-by-subsystem roll-up Complete the Cost view with the last collected axis. OperationView now carries cpu_user_ms/cpu_system_ms; AggregatesView.heaviest_cpu drives a "Heaviest CPU" summary highlight that prints the CPU-to-wall ratio, so a parallel op (CPU > wall) reads differently from an I/O-bound one. A new Pipeline section rolls operations up by subsystem (memory / analysis / controller / mcp query / cli) with op count, total wall and total CPU, showing where the run's time and compute actually go. On real data the memory subsystem is the CPU hog (6.5s CPU on 5.5s wall, parallel) while analysis is wall-bound. --- codeclone/observability/render_html.py | 35 +++++++++++++++++ codeclone/observability/store/reader.py | 51 +++++++++++++++++++++++++ codeclone/observability/views.py | 16 ++++++++ tests/test_observability_reader.py | 44 +++++++++++++++++++++ tests/test_observability_render.py | 34 +++++++++++++++++ 5 files changed, 180 insertions(+) diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index c399e26d..2612b6da 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -29,6 +29,7 @@ DbCostRow, McpToolAggregate, OperationView, + PipelineGroup, SpanCostView, SpanView, TraceView, @@ -359,6 +360,18 @@ def _highlights(agg: AggregatesView) -> str: _mb(agg.max_rss_delta_mb), ) ) + if agg.heaviest_cpu is not None: + op = agg.heaviest_cpu + cpu_ms = (op.cpu_user_ms or 0.0) + (op.cpu_system_ms or 0.0) + ratio = cpu_ms / op.duration_ms if op.duration_ms else 0.0 + rows.append( + _lead_row( + "Heaviest CPU", + f"{_surface_badge(op.surface)}" + f'{_esc(op.name)}', + f"{_ms(cpu_ms)} · {ratio:.1f}x wall", + ) + ) return f'
    {"".join(rows)}
    ' if rows else "" @@ -667,6 +680,27 @@ def _db_cost(agg: AggregatesView) -> str: ) +def _pipeline_row(group: PipelineGroup) -> str: + return ( + f'{_esc(group.name)}' + f'{group.op_count}' + f'{_ms(group.duration_ms)}' + f'{_ms(group.cpu_ms)}' + ) + + +def _pipeline_section(agg: AggregatesView) -> str: + if not agg.pipeline: + return "" + rows = "".join(_pipeline_row(group) for group in agg.pipeline) + headers = (("Subsystem", False), ("Ops", True), ("Wall", True), ("CPU", True)) + return _section( + "Pipeline", + _table(headers, rows), + subtitle="Where the run spends wall time and CPU, grouped by subsystem.", + ) + + def render_trace_html(trace: TraceView) -> str: """Render a ``TraceView`` as a self-contained, branded diagnosis cockpit.""" foot = f"CodeClone · platform observability · schema {_esc(trace.schema_version)}" @@ -684,6 +718,7 @@ def render_trace_html(trace: TraceView) -> str: + _db_cost(trace.aggregates) + _agent(trace.aggregates) + _mcp(trace.aggregates.mcp_tools) + + _pipeline_section(trace.aggregates) + f'

    {foot}

    ' + "
    " ) diff --git a/codeclone/observability/store/reader.py b/codeclone/observability/store/reader.py index cd47e50f..c8fc59ed 100644 --- a/codeclone/observability/store/reader.py +++ b/codeclone/observability/store/reader.py @@ -28,6 +28,7 @@ DbCostRow, McpToolAggregate, OperationView, + PipelineGroup, SpanCostView, SpanView, TraceView, @@ -133,6 +134,8 @@ def _operation_view( rss_delta_mb=row["rss_delta_mb"], spans=spans, children=children, + cpu_user_ms=row["cpu_user_ms"], + cpu_system_ms=row["cpu_system_ms"], ) @@ -309,6 +312,50 @@ def _waste( return tuple(items) +_CONTROLLER_TOOLS = frozenset( + { + "mcp.start_controlled_change", + "mcp.finish_controlled_change", + "mcp.manage_change_intent", + "mcp.check_patch_contract", + "mcp.create_review_receipt", + "mcp.validate_review_claims", + } +) + + +def _cpu_ms(op: OperationView) -> float: + return (op.cpu_user_ms or 0.0) + (op.cpu_system_ms or 0.0) + + +def _subsystem(op: OperationView) -> str: + if op.surface == "memory": + return "memory" + if "analyze" in op.name: + return "analysis" + if op.name in _CONTROLLER_TOOLS: + return "controller" + if op.surface == "mcp": + return "mcp query" + return op.surface or "other" + + +def _pipeline(flat: list[OperationView]) -> tuple[PipelineGroup, ...]: + grouped: dict[str, list[OperationView]] = defaultdict(list) + for op in flat: + grouped[_subsystem(op)].append(op) + rows = [ + PipelineGroup( + name=name, + op_count=len(ops), + duration_ms=sum(op.duration_ms for op in ops), + cpu_ms=sum(_cpu_ms(op) for op in ops), + ) + for name, ops in grouped.items() + ] + return tuple(sorted(rows, key=lambda g: (-g.duration_ms, g.name))) + + def _agent_view(flat: list[OperationView]) -> AgentView | None: mcp_ops = [op for op in flat if op.surface == "mcp"] if not mcp_ops: @@ -389,6 +436,8 @@ def _aggregates( key=lambda s: (-(s.rss_delta_mb or 0.0), s.operation_id, s.span_id), ) mcp_tools = _mcp_tool_aggregates(flat) + cpu_ranked = sorted(flat, key=lambda v: (-_cpu_ms(v), v.operation_id)) + heaviest_cpu = cpu_ranked[0] if cpu_ranked and _cpu_ms(cpu_ranked[0]) > 0 else None return AggregatesView( operation_count=len(flat), slowest=slowest, @@ -403,6 +452,8 @@ def _aggregates( db_costs=_db_costs(flat), agent=_agent_view(flat), waste=_waste(semantic_costs, mcp_tools), + heaviest_cpu=heaviest_cpu, + pipeline=_pipeline(flat), ) diff --git a/codeclone/observability/views.py b/codeclone/observability/views.py index c3073c00..fd8d4281 100644 --- a/codeclone/observability/views.py +++ b/codeclone/observability/views.py @@ -72,6 +72,8 @@ class OperationView: rss_delta_mb: float | None = None spans: tuple[SpanView, ...] = () children: tuple[OperationView, ...] = () + cpu_user_ms: float | None = None + cpu_system_ms: float | None = None @dataclass(frozen=True, slots=True) @@ -136,6 +138,17 @@ class WasteItem: severity: float = 0.0 +@dataclass(frozen=True, slots=True) +class PipelineGroup: + """Operations rolled up by subsystem (memory / analysis / controller / …), + showing where the run spends wall time and CPU.""" + + name: str + op_count: int + duration_ms: float + cpu_ms: float + + @dataclass(frozen=True, slots=True) class AggregatesView: operation_count: int @@ -151,6 +164,8 @@ class AggregatesView: db_costs: tuple[DbCostRow, ...] = () agent: AgentView | None = None waste: tuple[WasteItem, ...] = () + heaviest_cpu: OperationView | None = None + pipeline: tuple[PipelineGroup, ...] = () @dataclass(frozen=True, slots=True) @@ -201,6 +216,7 @@ class TraceView: "DbCostRow", "McpToolAggregate", "OperationView", + "PipelineGroup", "SpanCostView", "SpanView", "TraceView", diff --git a/tests/test_observability_reader.py b/tests/test_observability_reader.py index 8218887e..7b72ae3d 100644 --- a/tests/test_observability_reader.py +++ b/tests/test_observability_reader.py @@ -337,3 +337,47 @@ def test_waste_ranks_no_op_and_high_payload(tmp_path: Path) -> None: assert high.subject == "get_relevant_memory" # High payload (20 KB) outranks the no-op span (800 ms) by severity. assert waste[0].kind == "high payload" + + +def test_cpu_and_pipeline_rollup(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="W", + correlation_id="W", + surface="memory", + name="memory.projection.job", + started_at_utc="2026-06-09T00:00:00Z", + duration_ms=1000.0, + status="ok", + profile=ProfileSample(cpu_user_ms=1800.0, cpu_system_ms=200.0), + ), + ) + write_operation( + conn, + OperationRecord( + operation_id="A", + correlation_id="W", + surface="mcp", + name="mcp.analyze_repository", + started_at_utc="2026-06-09T00:00:01Z", + duration_ms=500.0, + status="ok", + parent_operation_id="W", + profile=ProfileSample(cpu_user_ms=100.0, cpu_system_ms=50.0), + ), + ) + finally: + conn.close() + + agg = _read_trace(tmp_path, correlation_id="W").aggregates + # Heaviest CPU: the memory job spent 2000ms CPU on 1000ms wall (parallel). + assert agg.heaviest_cpu is not None + assert agg.heaviest_cpu.name == "memory.projection.job" + assert agg.heaviest_cpu.cpu_user_ms == 1800.0 + # Pipeline rolls ops up by subsystem. + pipe = {group.name: group for group in agg.pipeline} + assert pipe["memory"].cpu_ms == 2000.0 + assert pipe["analysis"].op_count == 1 diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index e2bc3293..8073d5b0 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -26,6 +26,7 @@ DbCostRow, McpToolAggregate, OperationView, + PipelineGroup, SpanCostView, SpanView, TraceView, @@ -217,6 +218,39 @@ def test_render_peak_memory_contributor() -> None: assert "80%" in html # 480 / 600 = 80% +def test_render_cpu_highlight_and_pipeline() -> None: + op = OperationView( + operation_id="W", + correlation_id="W", + surface="memory", + name="memory.projection.job", + started_at_utc="t", + duration_ms=1000.0, + status="ok", + cpu_user_ms=1800.0, + cpu_system_ms=200.0, + ) + trace = TraceView( + schema_version="1.0", + window_started_at_utc="t", + window_ended_at_utc="t", + aggregates=AggregatesView( + operation_count=1, + heaviest_cpu=op, + pipeline=( + PipelineGroup( + name="memory", op_count=2, duration_ms=2500.0, cpu_ms=3000.0 + ), + ), + ), + ) + html = render_trace_html(trace) + assert "Heaviest CPU" in html + assert "2.0x wall" in html # 2000ms CPU / 1000ms wall + assert "Pipeline" in html + assert "memory" in html + + def test_render_agent_context() -> None: trace = TraceView( schema_version="1.0", From 1f4af260bcc616888c83d68611010f40add418fd Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 13:18:45 +0500 Subject: [PATCH 236/318] feat(core): attribute worker cold-start as a bootstrap span Make the spawn->job handoff honest. observability.runtime.record_elapsed_span emits a span with explicit started_at + duration for work that finished before instrumentation could wrap it; profile.worker_bootstrap_sample reads the process create_time (psutil) and the elapsed time to now. A spawned projection worker (one carrying the env handoff) now emits a memory.projection.worker_bootstrap span positioned at its process creation, so the previously empty spawn->job gap in the waterfall is labelled with the real cold-start (process spawn + interpreter + imports + claim). In-process runs skip it (they share the caller's process); inert when disabled or psutil is absent. --- codeclone/memory/jobs/worker.py | 31 ++++++++++++++++++++++++- codeclone/observability/__init__.py | 2 ++ codeclone/observability/profile.py | 24 ++++++++++++++++++- codeclone/observability/runtime.py | 28 ++++++++++++++++++++++ tests/test_observability_correlation.py | 8 +++++++ 5 files changed, 91 insertions(+), 2 deletions(-) diff --git a/codeclone/memory/jobs/worker.py b/codeclone/memory/jobs/worker.py index a4d07cce..a4b52f9f 100644 --- a/codeclone/memory/jobs/worker.py +++ b/codeclone/memory/jobs/worker.py @@ -14,7 +14,13 @@ from typing import TYPE_CHECKING from ...config.memory import MemoryConfig -from ...observability import operation, span +from ...observability import ( + is_observability_enabled, + operation, + record_elapsed_span, + span, +) +from ...observability.profile import worker_bootstrap_sample from ..experience.distillation_workflow import execute_experience_distillation from ..models import MemoryProject from ..semantic.rebuild_workflow import execute_semantic_index_rebuild @@ -114,6 +120,25 @@ def _correlation_handoff() -> tuple[str | None, str | None]: ) +def _emit_worker_bootstrap_span() -> None: + """Record the worker cold-start (process spawn -> first job instrumentation) + as a ``memory.projection.worker_bootstrap`` span, positioned at the process + creation time so the spawn->job gap in the waterfall is labelled rather than + left as empty space. No-op when disabled or psutil is unavailable. + """ + if not is_observability_enabled(): + return + sample = worker_bootstrap_sample() + if sample is None: + return + started_at_utc, elapsed_ms = sample + record_elapsed_span( + "memory.projection.worker_bootstrap", + started_at_utc=started_at_utc, + duration_ms=elapsed_ms, + ) + + def run_projection_job( conn: sqlite3.Connection, *, @@ -130,6 +155,10 @@ def run_projection_job( correlation_id=correlation_id, parent_operation_id=parent_operation_id, ): + # Only a spawned worker (one that carries the env handoff) has a real + # cold-start to attribute; an in-process run shares the caller's process. + if parent_operation_id is not None: + _emit_worker_bootstrap_span() watermark = _trajectory_incremental_watermark(conn, project_id=project.id) with span( name="memory.trajectory.rebuild", diff --git a/codeclone/observability/__init__.py b/codeclone/observability/__init__.py index c1dab3ce..18a89f2e 100644 --- a/codeclone/observability/__init__.py +++ b/codeclone/observability/__init__.py @@ -25,6 +25,7 @@ operation, payload_capture_enabled, record_db_query, + record_elapsed_span, shutdown, span, ) @@ -40,6 +41,7 @@ "operation", "payload_capture_enabled", "record_db_query", + "record_elapsed_span", "shutdown", "span", ] diff --git a/codeclone/observability/profile.py b/codeclone/observability/profile.py index a0111222..12f5e4de 100644 --- a/codeclone/observability/profile.py +++ b/codeclone/observability/profile.py @@ -14,11 +14,33 @@ from __future__ import annotations +import time +from datetime import datetime, timezone + from .models import ProfileSample _BYTES_PER_MB = 1024 * 1024 +def worker_bootstrap_sample() -> tuple[str, float] | None: + """Process cold-start as ``(creation_timestamp_iso, ms_elapsed_to_now)``. + + The elapsed time spans process spawn, interpreter startup, imports and setup + up to this call — the part of the spawn->job handoff a worker cannot wrap + with a normal span. Returns ``None`` when psutil is unavailable. + """ + try: + import psutil + except ImportError: + return None + created = psutil.Process().create_time() # epoch seconds + elapsed_ms = max(0.0, (time.time() - created) * 1000.0) + created_iso = datetime.fromtimestamp(created, tz=timezone.utc).strftime( + "%Y-%m-%dT%H:%M:%S.%fZ" + ) + return created_iso, elapsed_ms + + def capture_rss_cpu() -> tuple[int, float, float] | None: """Snapshot ``(rss_bytes, cpu_user_s, cpu_system_s)`` for this process. @@ -66,4 +88,4 @@ def build_profile_sample( ) -__all__ = ["build_profile_sample", "capture_rss_cpu"] +__all__ = ["build_profile_sample", "capture_rss_cpu", "worker_bootstrap_sample"] diff --git a/codeclone/observability/runtime.py b/codeclone/observability/runtime.py index cc384167..9fa8a83f 100644 --- a/codeclone/observability/runtime.py +++ b/codeclone/observability/runtime.py @@ -407,6 +407,33 @@ def span( ) +def record_elapsed_span( + name: str, + *, + started_at_utc: str, + duration_ms: float, + reason_kind: ReasonKind | None = None, +) -> None: + """Attach a span with explicit timing to the active operation, for work that + finished before instrumentation could wrap it (e.g. a worker's cold-start). + No-op when disabled or outside an operation. + """ + parent_op = _CURRENT_OP.get() + if not _ENABLED or parent_op is None: + return + handle = SpanHandle( + span_id=_new_id(), + operation_id=parent_op.operation_id, + name=name, + started_at_utc=started_at_utc, + parent_span_id=None, + reason_kind=reason_kind, + reason=None, + dedupe_key=None, + ) + parent_op._spans.append(handle._to_record(duration_ms=duration_ms)) + + _DB_WRITE_KINDS = frozenset({"insert", "update", "delete", "replace"}) @@ -449,6 +476,7 @@ def instrument_db_connection(conn: sqlite3.Connection) -> None: "operation", "payload_capture_enabled", "record_db_query", + "record_elapsed_span", "shutdown", "span", ] diff --git a/tests/test_observability_correlation.py b/tests/test_observability_correlation.py index 7124bec7..bccb6c7a 100644 --- a/tests/test_observability_correlation.py +++ b/tests/test_observability_correlation.py @@ -77,9 +77,17 @@ def test_run_projection_job_links_under_finish( row = conn.execute( "SELECT name, correlation_id, parent_operation_id FROM platform_operations" ).fetchone() + boot = conn.execute( + "SELECT name, duration_ms FROM platform_spans " + "WHERE name='memory.projection.worker_bootstrap'" + ).fetchone() finally: conn.close() assert row == ("memory.projection.job", "A-corr", "A-op") + # The spawned worker (env handoff present) attributes its cold-start as a + # span, so the spawn->job gap is labelled in the waterfall. + assert boot is not None + assert boot[1] >= 0.0 def test_spawn_injects_correlation_env( From 44e4bdff037d447e5c4b6477bbd381c2f781084d Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 14:04:51 +0500 Subject: [PATCH 237/318] fix(memory): honor compact retrieval detail --- codeclone/memory/retrieval/service.py | 35 +++++- codeclone/memory/trajectory/retrieval.py | 136 ++++++++++++++++++++-- tests/test_memory_experience_retrieval.py | 50 ++++++++ tests/test_memory_trajectory_retrieval.py | 64 ++++++++++ 4 files changed, 273 insertions(+), 12 deletions(-) diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index cbc91468..efa22e15 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -231,8 +231,18 @@ def _scope_families(scope_paths: Sequence[str]) -> frozenset[str]: ) -def _serialize_experience(experience: Experience) -> dict[str, object]: - return { +def _serialize_experience( + experience: Experience, + *, + detail_level: MemoryDetailLevel, +) -> dict[str, object]: + statement_length = len(experience.statement) + statement = ( + experience.statement + if detail_level == "full" + else _statement_preview(experience.statement) + ) + payload: dict[str, object] = { "id": experience.id, "subject_family": experience.subject_family, "signal": experience.signal, @@ -240,14 +250,23 @@ def _serialize_experience(experience: Experience) -> dict[str, object]: "support": experience.support, "information_value": experience.information_value, "status": experience.status, - "statement": experience.statement, + "statement": statement, "agent_facets": [ {"agent_family": facet.facet_value, "count": facet.count} for facet in experience.facets if facet.facet_kind == "agent_family" ], - "evidence_trajectory_ids": [item.trajectory_id for item in experience.evidence], } + if detail_level == "full": + payload["evidence_trajectory_ids"] = [ + item.trajectory_id for item in experience.evidence + ] + else: + payload["statement_length"] = statement_length + payload["evidence_count"] = len(experience.evidence) + if statement_length > len(statement): + payload["statement_truncated"] = True + return payload def _relevant_experiences( @@ -256,6 +275,7 @@ def _relevant_experiences( project_id: str, families: frozenset[str], max_results: int, + detail_level: MemoryDetailLevel, ) -> list[dict[str, object]]: """Advisory experiences whose subject_family matches the scope, highest support first. Dormant experiences are kept but never surfaced.""" @@ -273,7 +293,10 @@ def _relevant_experiences( experience.id, ) ) - return [_serialize_experience(experience) for experience in matched[:max_results]] + return [ + _serialize_experience(experience, detail_level=detail_level) + for experience in matched[:max_results] + ] def _serialize_subject(subject: MemorySubject) -> dict[str, object]: @@ -536,6 +559,7 @@ def get_relevant_memory( max_results=min(max_records, DEFAULT_TRAJECTORY_PREVIEW_LIMIT), include_routine=include_routine, patch_trails=patch_trails, + detail_level=normalized_detail, ) patch_trail_summary = None if trajectories_payload: @@ -547,6 +571,7 @@ def get_relevant_memory( project_id=project_id, families=_scope_families(normalized_scope), max_results=min(max_records, DEFAULT_EXPERIENCE_PREVIEW_LIMIT), + detail_level=normalized_detail, ) coverage: dict[str, object] if normalized_scope: diff --git a/codeclone/memory/trajectory/retrieval.py b/codeclone/memory/trajectory/retrieval.py index b25e6bc4..56b45735 100644 --- a/codeclone/memory/trajectory/retrieval.py +++ b/codeclone/memory/trajectory/retrieval.py @@ -8,6 +8,7 @@ from collections.abc import Iterable, Mapping, Sequence from dataclasses import dataclass +from typing import Literal from ..paths import normalize_memory_scope_path, repo_path_to_module_key from ..search_index import SearchMatchMode, tokenize_query @@ -22,7 +23,9 @@ DEFAULT_TRAJECTORY_PREVIEW_LIMIT = 5 DEFAULT_TRAJECTORY_STEP_LIMIT = 12 +COMPACT_TRAJECTORY_SUBJECT_LIMIT = 8 TRAJECTORY_PREVIEW_CHARS = 220 +TrajectoryDetailLevel = Literal["compact", "full"] def trajectory_excluded_from_default_retrieval( @@ -101,7 +104,15 @@ def serialize_trajectory_preview( *, relevance_score: float | None = None, patch_trail_payload: Mapping[str, object] | None = None, + detail_level: TrajectoryDetailLevel = "full", + preferred_subjects: frozenset[tuple[str, str]] = frozenset(), ) -> dict[str, object]: + subjects = trajectory.subjects + serialized_subjects, matched_subject_count = _preview_subjects( + subjects, + detail_level=detail_level, + preferred_subjects=preferred_subjects, + ) payload: dict[str, object] = { "type": "trajectory", "trajectory_id": trajectory.id, @@ -112,7 +123,7 @@ def serialize_trajectory_preview( "summary": _preview_text(trajectory.summary), "labels": list(trajectory.labels), "agent_label": trajectory_agent_label(trajectory), - "subjects": [_serialize_subject(subject) for subject in trajectory.subjects], + "subjects": serialized_subjects, "evidence_count": len(trajectory.evidence), "event_count": trajectory.event_count, "step_count": trajectory.step_count, @@ -125,20 +136,71 @@ def serialize_trajectory_preview( summary = serialize_patch_trail_summary(patch_trail_payload) if summary is not None: payload["patch_trail_summary"] = summary - contract = compute_trajectory_quality_contract( - trajectory, + _add_quality_fields( + payload, + trajectory=trajectory, patch_trail_payload=patch_trail_payload, + detail_level=detail_level, + subject_count=len(subjects), + matched_subject_count=matched_subject_count, + serialized_subject_count=len(serialized_subjects), ) - payload["quality_contract"] = serialize_trajectory_quality_contract( - contract, - trajectory=trajectory, + return payload + + +def _preview_subjects( + subjects: Sequence[object], + *, + detail_level: TrajectoryDetailLevel, + preferred_subjects: frozenset[tuple[str, str]], +) -> tuple[list[dict[str, object]], int]: + matched_subject_count = sum( + ( + str(getattr(subject, "subject_kind", "")), + str(getattr(subject, "subject_key", "")), + ) + in preferred_subjects + for subject in subjects + ) + selected = ( + _compact_trajectory_subjects( + subjects, + preferred_subjects=preferred_subjects, + ) + if detail_level == "compact" + else tuple(subjects) + ) + return [_serialize_subject(subject) for subject in selected], matched_subject_count + + +def _add_quality_fields( + payload: dict[str, object], + *, + trajectory: Trajectory, + patch_trail_payload: Mapping[str, object] | None, + detail_level: TrajectoryDetailLevel, + subject_count: int, + matched_subject_count: int, + serialized_subject_count: int, +) -> None: + contract = compute_trajectory_quality_contract( + trajectory, patch_trail_payload=patch_trail_payload, ) + if detail_level == "full": + payload["quality_contract"] = serialize_trajectory_quality_contract( + contract, + trajectory=trajectory, + patch_trail_payload=patch_trail_payload, + ) + else: + payload["subject_count"] = subject_count + payload["matched_subject_count"] = matched_subject_count + payload["subjects_truncated"] = serialized_subject_count < subject_count payload["complexity_score"] = contract.complexity_score payload["scope_accuracy"] = contract.scope_accuracy payload["duration_seconds"] = contract.duration_seconds payload["anomaly_count"] = contract.anomaly_count - return payload def serialize_patch_trail_summary( @@ -224,8 +286,13 @@ def rank_trajectories_for_scope( max_results: int = DEFAULT_TRAJECTORY_PREVIEW_LIMIT, include_routine: bool = False, patch_trails: Mapping[str, Mapping[str, object]] | None = None, + detail_level: TrajectoryDetailLevel = "full", ) -> tuple[list[dict[str, object]], bool]: normalized_scope = tuple(normalize_memory_scope_path(path) for path in scope_paths) + preferred_subjects = _preferred_subjects( + scope_paths=normalized_scope, + symbols=symbols, + ) visible = filter_trajectories_for_default_retrieval( trajectories, include_routine=include_routine, @@ -241,6 +308,8 @@ def rank_trajectories_for_scope( scored, max_results=max_results, patch_trails=patch_trails or {}, + detail_level=detail_level, + preferred_subjects=preferred_subjects, ) @@ -391,6 +460,8 @@ def _preview_results( *, max_results: int, patch_trails: Mapping[str, Mapping[str, object]] | None = None, + detail_level: TrajectoryDetailLevel = "full", + preferred_subjects: frozenset[tuple[str, str]] = frozenset(), ) -> tuple[list[dict[str, object]], bool]: limit = max(1, int(max_results)) truncated = len(results) > limit @@ -401,11 +472,61 @@ def _preview_results( item.trajectory, relevance_score=item.relevance_score, patch_trail_payload=trails.get(item.trajectory.id), + detail_level=detail_level, + preferred_subjects=preferred_subjects, ) for item in selected ], truncated +def _preferred_subjects( + *, + scope_paths: Sequence[str], + symbols: Sequence[str], +) -> frozenset[tuple[str, str]]: + subject_keys = trajectory_subject_keys( + scope_paths=scope_paths, + symbols=symbols, + ) + return frozenset((kind, key) for kind, keys in subject_keys.items() for key in keys) + + +def _compact_trajectory_subjects( + subjects: Sequence[object], + *, + preferred_subjects: frozenset[tuple[str, str]], +) -> tuple[object, ...]: + indexed = tuple(enumerate(subjects)) + ranked = sorted( + indexed, + key=lambda item: _compact_subject_sort_key( + item[1], + index=item[0], + preferred_subjects=preferred_subjects, + ), + ) + return tuple( + subject for _index, subject in ranked[:COMPACT_TRAJECTORY_SUBJECT_LIMIT] + ) + + +def _compact_subject_sort_key( + subject: object, + *, + index: int, + preferred_subjects: frozenset[tuple[str, str]], +) -> tuple[int, int, int, int]: + kind = str(getattr(subject, "subject_kind", "")) + key = str(getattr(subject, "subject_key", "")) + relation = str(getattr(subject, "relation", "")) + return ( + 0 if (kind, key) in preferred_subjects else 1, + {"touched": 0, "untouched": 1, "about": 2}.get(relation, 3), + {"path": 0, "symbol": 1, "module": 2}.get(kind, 3), + index, + ) + + def _patch_trail_untouched_overlap( *, scope_paths: Sequence[str], @@ -483,6 +604,7 @@ def trajectory_semantic_text_parts(trajectory: Trajectory) -> Iterable[str]: __all__ = [ + "COMPACT_TRAJECTORY_SUBJECT_LIMIT", "DEFAULT_TRAJECTORY_PREVIEW_LIMIT", "DEFAULT_TRAJECTORY_STEP_LIMIT", "TrajectorySearchResult", diff --git a/tests/test_memory_experience_retrieval.py b/tests/test_memory_experience_retrieval.py index 633abe5b..626efe29 100644 --- a/tests/test_memory_experience_retrieval.py +++ b/tests/test_memory_experience_retrieval.py @@ -45,6 +45,7 @@ def test_relevant_memory_surfaces_experiences(tmp_path: Path) -> None: project_id=project.id, scope_paths=(f"{_FAMILY}/store.py",), scope_resolved_from="explicit", + detail_level="full", ) experiences = result["experiences"] @@ -65,6 +66,55 @@ def test_relevant_memory_surfaces_experiences(tmp_path: Path) -> None: assert policy["experiences_do_not_authorize_edits"] is True +def test_relevant_memory_compacts_experience_statement_and_evidence( + tmp_path: Path, +) -> None: + with memory_store(tmp_path) as (root, project, store, _db): + _seed_and_distill(root, project, store) + experience = store.list_experiences(project_id=project.id)[0] + store.replace_experiences( + project_id=project.id, + experiences=[replace(experience, statement="x" * 300)], + ) + + compact = get_relevant_memory( + store, + project_id=project.id, + scope_paths=(f"{_FAMILY}/store.py",), + scope_resolved_from="explicit", + detail_level="compact", + ) + full = get_relevant_memory( + store, + project_id=project.id, + scope_paths=(f"{_FAMILY}/store.py",), + scope_resolved_from="explicit", + detail_level="full", + ) + + by_detail: dict[str, dict[str, object]] = {} + for detail, payload in (("compact", compact), ("full", full)): + experiences = payload["experiences"] + assert isinstance(experiences, list) + experience_payload = experiences[0] + assert isinstance(experience_payload, dict) + by_detail[detail] = experience_payload + compact_experience = by_detail["compact"] + full_experience = by_detail["full"] + assert compact_experience["statement_length"] == 300 + assert compact_experience["statement_truncated"] is True + compact_statement = compact_experience["statement"] + assert isinstance(compact_statement, str) + assert len(compact_statement) < 300 + assert compact_experience["evidence_count"] == 5 + assert "evidence_trajectory_ids" not in compact_experience + assert full_experience["statement"] == "x" * 300 + full_evidence = full_experience["evidence_trajectory_ids"] + assert isinstance(full_evidence, list) + assert len(full_evidence) == 5 + assert "evidence_count" not in full_experience + + def test_experiences_are_typed_separate_from_records(tmp_path: Path) -> None: with memory_store(tmp_path) as (root, project, store, _db): _seed_and_distill(root, project, store) diff --git a/tests/test_memory_trajectory_retrieval.py b/tests/test_memory_trajectory_retrieval.py index 98a5e941..980203ca 100644 --- a/tests/test_memory_trajectory_retrieval.py +++ b/tests/test_memory_trajectory_retrieval.py @@ -6,6 +6,8 @@ from __future__ import annotations +import json +from dataclasses import replace from pathlib import Path import pytest @@ -14,6 +16,11 @@ from codeclone.memory.exceptions import MemoryContractError from codeclone.memory.models import MemoryEvidence, generate_memory_id from codeclone.memory.retrieval import get_relevant_memory, query_engineering_memory +from codeclone.memory.trajectory.models import TrajectorySubject +from codeclone.memory.trajectory.retrieval import ( + rank_trajectories_for_scope, + serialize_trajectory_preview, +) from codeclone.report.meta import current_report_timestamp_utc from .memory_fixtures import ( @@ -57,6 +64,63 @@ def test_get_relevant_memory_returns_scoped_trajectories(tmp_path: Path) -> None assert trajectories[0]["type"] == "trajectory" assert trajectories[0]["trajectory_id"].startswith("traj-") assert trajectories[0]["relevance_score"] > 1.0 + assert "quality_contract" not in trajectories[0] + assert isinstance(trajectories[0]["subjects_truncated"], bool) + + +def test_compact_trajectory_preview_preserves_scope_subjects_and_slims_payload( + tmp_path: Path, +) -> None: + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + projection = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ) + trajectory = replace( + projection.trajectories[0], + subjects=( + *( + TrajectorySubject("path", f"pkg/noise_{index}.py", "about") + for index in range(12) + ), + TrajectorySubject("path", "pkg/service.py", "about"), + TrajectorySubject("path", "pkg/service.py", "touched"), + ), + ) + + compact_results, _truncated = rank_trajectories_for_scope( + (trajectory,), + scope_paths=("pkg/service.py",), + symbols=(), + detail_level="compact", + ) + + compact = compact_results[0] + full = serialize_trajectory_preview(trajectory, detail_level="full") + assert full == serialize_trajectory_preview(trajectory) + assert "quality_contract" not in compact + assert "quality_contract" in full + assert compact["subject_count"] == 14 + assert compact["matched_subject_count"] == 2 + assert compact["subjects_truncated"] is True + compact_subjects = compact["subjects"] + assert isinstance(compact_subjects, list) + assert len(compact_subjects) == 8 + assert [ + subject["subject_key"] + for subject in compact_subjects + if isinstance(subject, dict) + ][:2] == ["pkg/service.py", "pkg/service.py"] + full_subjects = full["subjects"] + assert isinstance(full_subjects, list) + assert len(full_subjects) == 14 + assert "subjects_truncated" not in full + compact_size = len(json.dumps(compact, sort_keys=True, separators=(",", ":"))) + full_size = len(json.dumps(full, sort_keys=True, separators=(",", ":"))) + assert compact_size < full_size * 0.6 def test_get_relevant_memory_returns_patch_trail_summary(tmp_path: Path) -> None: From 004ad10da0ec949bb4a2726c5d0e8f334d10e3fd Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 14:27:00 +0500 Subject: [PATCH 238/318] fix(memory): refine retrieval evidence context --- .../memory/retrieval/context_coverage.py | 168 ++++++++++++++++++ codeclone/memory/retrieval/ranking.py | 21 ++- codeclone/memory/retrieval/service.py | 52 ++++-- tests/test_memory_experience_retrieval.py | 14 ++ tests/test_memory_retrieval.py | 100 +++++++++++ tests/test_memory_trajectory_retrieval.py | 55 ++++++ 6 files changed, 397 insertions(+), 13 deletions(-) create mode 100644 codeclone/memory/retrieval/context_coverage.py diff --git a/codeclone/memory/retrieval/context_coverage.py b/codeclone/memory/retrieval/context_coverage.py new file mode 100644 index 00000000..bc2df9ba --- /dev/null +++ b/codeclone/memory/retrieval/context_coverage.py @@ -0,0 +1,168 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence + +from ..experience.models import Experience +from ..paths import normalize_memory_scope_path, repo_path_to_module_key +from ..trajectory.models import Trajectory + + +def _percent(covered: int, total: int) -> int | None: + return round(covered * 100 / total) if total else None + + +def _trajectory_coverage( + *, + scope_paths: Sequence[str], + trajectories: Sequence[Trajectory], +) -> tuple[dict[str, object], frozenset[str]]: + normalized = tuple(normalize_memory_scope_path(path) for path in scope_paths) + matched_paths: set[str] = set() + agent_labels: set[str] = set() + for trajectory in trajectories: + subject_pairs = { + (subject.subject_kind, subject.subject_key) + for subject in trajectory.subjects + } + for path in normalized: + if ("path", path) in subject_pairs or ( + "module", + repo_path_to_module_key(path), + ) in subject_pairs: + matched_paths.add(path) + agent_labels.update( + key.strip() + for kind, key in subject_pairs + if kind == "agent" and key.strip() + ) + total = len(normalized) + return ( + { + "scope_paths_with_trajectories": len(matched_paths), + "scope_paths_total": total, + "coverage_percent": _percent(len(matched_paths), total), + }, + frozenset(agent_labels), + ) + + +def _experience_coverage( + *, + scope_families: frozenset[str], + experiences: Sequence[Experience], +) -> tuple[dict[str, object], frozenset[str]]: + matched_families = { + experience.subject_family + for experience in experiences + if experience.subject_family in scope_families + } + agent_families = { + facet.facet_value + for experience in experiences + for facet in experience.facets + if facet.facet_kind == "agent_family" and facet.facet_value + } + total = len(scope_families) + return ( + { + "scope_families_with_experiences": len(matched_families), + "scope_families_total": total, + "coverage_percent": _percent(len(matched_families), total), + }, + frozenset(agent_families), + ) + + +def _count(coverage: Mapping[str, object], key: str) -> int: + value = coverage.get(key) + return value if isinstance(value, int) and not isinstance(value, bool) else 0 + + +def _observation_confidence( + *, + record_coverage: Mapping[str, object], + trajectory_coverage: Mapping[str, object], + experience_coverage: Mapping[str, object], +) -> dict[str, object]: + basis = [ + lane + for lane, coverage, key in ( + ("records", record_coverage, "scope_paths_with_memory"), + ( + "trajectories", + trajectory_coverage, + "scope_paths_with_trajectories", + ), + ( + "experiences", + experience_coverage, + "scope_families_with_experiences", + ), + ) + if _count(coverage, key) > 0 + ] + observed_paths = max( + _count(record_coverage, "scope_paths_with_memory"), + _count(trajectory_coverage, "scope_paths_with_trajectories"), + ) + path_total = _count(record_coverage, "scope_paths_total") + level = "unknown" + if basis: + level = ( + "supported" + if observed_paths >= path_total and len(basis) >= 2 + else "partial" + ) + return { + "level": level, + "basis": basis, + "note": ( + "Evidence availability only; not correctness, approval, or edit " + "authorization." + ), + } + + +def build_context_coverage( + *, + record_coverage: Mapping[str, object], + scope_paths: Sequence[str], + scope_families: frozenset[str], + trajectories: Sequence[Trajectory], + experiences: Sequence[Experience], +) -> dict[str, object]: + trajectory_coverage, trajectory_agents = _trajectory_coverage( + scope_paths=scope_paths, + trajectories=trajectories, + ) + experience_coverage, experience_agents = _experience_coverage( + scope_families=scope_families, + experiences=experiences, + ) + record_payload = dict(record_coverage) + return { + **record_payload, + "record_coverage": record_payload, + "trajectory_coverage": trajectory_coverage, + "experience_coverage": experience_coverage, + "agent_diversity": { + "trajectory_agent_labels": sorted(trajectory_agents), + "trajectory_agent_label_count": len(trajectory_agents), + "experience_agent_families": sorted(experience_agents), + "experience_agent_family_count": len(experience_agents), + }, + "observation_confidence": _observation_confidence( + record_coverage=record_payload, + trajectory_coverage=trajectory_coverage, + experience_coverage=experience_coverage, + ), + } + + +__all__ = ["build_context_coverage"] diff --git a/codeclone/memory/retrieval/ranking.py b/codeclone/memory/retrieval/ranking.py index f68c5bd9..859ce938 100644 --- a/codeclone/memory/retrieval/ranking.py +++ b/codeclone/memory/retrieval/ranking.py @@ -32,6 +32,10 @@ # never dominates an exact subject match (1.0) or a scope match. It is applied # only after the scoped short-circuit, so it cannot inject out-of-scope records. _SEMANTIC_WEIGHT = 0.3 +# Git churn is useful review context, but it is not a durable architectural +# assertion. Keep exact-scope hotspots visible without letting their type and +# ingest boosts outrank richer memory by default. +_CHANGE_HOTSPOT_PENALTY = 0.35 @dataclass(frozen=True, slots=True) @@ -56,6 +60,20 @@ def from_scope( ) +def is_git_change_hotspot(record: MemoryRecord) -> bool: + payload = record.payload + return ( + record.type == "risk_note" + and record.ingest_source == "git" + and isinstance(payload, dict) + and payload.get("risk_kind") == "change_hotspot" + ) + + +def _context_signal_adjustment(record: MemoryRecord) -> float: + return -_CHANGE_HOTSPOT_PENALTY if is_git_change_hotspot(record) else 0.0 + + def relevance_score( *, record: MemoryRecord, @@ -101,8 +119,9 @@ def relevance_score( score += 0.05 if record.status == "stale": score -= 0.5 + score += _context_signal_adjustment(record) score += semantic_proximity * _SEMANTIC_WEIGHT return round(score, 4) -__all__ = ["RankingContext", "relevance_score"] +__all__ = ["RankingContext", "is_git_change_hotspot", "relevance_score"] diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index efa22e15..3bbc446b 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -34,13 +34,15 @@ ) from ..trajectory.retrieval import ( DEFAULT_TRAJECTORY_PREVIEW_LIMIT, + filter_trajectories_for_default_retrieval, rank_trajectories_for_query, rank_trajectories_for_scope, serialize_trajectory_detail, trajectory_status_payload, trajectory_subject_keys, ) -from .ranking import RankingContext, relevance_score +from .context_coverage import build_context_coverage +from .ranking import RankingContext, is_git_change_hotspot, relevance_score from .semantic import audit_event_row if TYPE_CHECKING: @@ -269,16 +271,13 @@ def _serialize_experience( return payload -def _relevant_experiences( +def _matching_experiences( store: SqliteEngineeringMemoryStore, *, project_id: str, families: frozenset[str], - max_results: int, - detail_level: MemoryDetailLevel, -) -> list[dict[str, object]]: - """Advisory experiences whose subject_family matches the scope, highest - support first. Dormant experiences are kept but never surfaced.""" +) -> list[Experience]: + """Active advisory experiences matching the requested scope families.""" if not families: return [] matched = [ @@ -293,9 +292,18 @@ def _relevant_experiences( experience.id, ) ) + return matched + + +def _serialize_relevant_experiences( + experiences: Sequence[Experience], + *, + max_results: int, + detail_level: MemoryDetailLevel, +) -> list[dict[str, object]]: return [ _serialize_experience(experience, detail_level=detail_level) - for experience in matched[:max_results] + for experience in experiences[:max_results] ] @@ -319,6 +327,12 @@ def _serialize_evidence(evidence: MemoryEvidence) -> dict[str, object]: } +def _retrieval_lane_payload(record: MemoryRecord) -> dict[str, object]: + return ( + {"retrieval_lane": "hotspot_context"} if is_git_change_hotspot(record) else {} + ) + + def _serialize_record_summary( *, record: MemoryRecord, @@ -352,6 +366,7 @@ def _serialize_record_summary( payload["stale_reason"] = record.stale_reason if record.status == "draft": payload["draft_unverified"] = True + payload.update(_retrieval_lane_payload(record)) if relevance_score is not None: payload["relevance_score"] = relevance_score return payload @@ -484,6 +499,7 @@ def _coverage_summary( "scope_paths_with_memory": with_memory, "scope_paths_total": total, "coverage_percent": percent, + "coverage_kind": "record_subject_coverage", } @@ -566,19 +582,31 @@ def get_relevant_memory( first_summary = trajectories_payload[0].get("patch_trail_summary") if isinstance(first_summary, dict): patch_trail_summary = first_summary - experiences_payload = _relevant_experiences( + matching_experiences = _matching_experiences( store, project_id=project_id, families=_scope_families(normalized_scope), + ) + experiences_payload = _serialize_relevant_experiences( + matching_experiences, max_results=min(max_records, DEFAULT_EXPERIENCE_PREVIEW_LIMIT), detail_level=normalized_detail, ) coverage: dict[str, object] if normalized_scope: - coverage = _coverage_summary( - store, - project_id=project_id, + coverage = build_context_coverage( + record_coverage=_coverage_summary( + store, + project_id=project_id, + scope_paths=normalized_scope, + ), scope_paths=normalized_scope, + scope_families=_scope_families(normalized_scope), + trajectories=filter_trajectories_for_default_retrieval( + trajectory_candidates, + include_routine=include_routine, + ), + experiences=matching_experiences, ) else: coverage = { diff --git a/tests/test_memory_experience_retrieval.py b/tests/test_memory_experience_retrieval.py index 626efe29..da3f9994 100644 --- a/tests/test_memory_experience_retrieval.py +++ b/tests/test_memory_experience_retrieval.py @@ -60,6 +60,20 @@ def test_relevant_memory_surfaces_experiences(tmp_path: Path) -> None: "cursor-vscode", } assert len(experience["evidence_trajectory_ids"]) >= 1 + coverage = result["coverage"] + assert isinstance(coverage, dict) + assert coverage["experience_coverage"] == { + "scope_families_with_experiences": 1, + "scope_families_total": 1, + "coverage_percent": 100, + } + agent_diversity = coverage["agent_diversity"] + assert isinstance(agent_diversity, dict) + assert agent_diversity["experience_agent_families"] == [ + "claude-code", + "cursor-vscode", + ] + assert agent_diversity["experience_agent_family_count"] == 2 # Advisory contract, exactly like trajectories. policy = result["retrieval_policy"] assert isinstance(policy, dict) diff --git a/tests/test_memory_retrieval.py b/tests/test_memory_retrieval.py index 77649894..8c371101 100644 --- a/tests/test_memory_retrieval.py +++ b/tests/test_memory_retrieval.py @@ -6,16 +6,19 @@ from __future__ import annotations +from dataclasses import replace from pathlib import Path from codeclone.memory.governance import record_candidate from codeclone.memory.identity import make_identity_key from codeclone.memory.models import MemoryRecord, MemorySubject, generate_memory_id from codeclone.memory.retrieval import get_relevant_memory, query_engineering_memory +from codeclone.memory.retrieval import service as retrieval_service from codeclone.memory.retrieval.ranking import RankingContext, relevance_score from codeclone.report.meta import current_report_timestamp_utc from .memory_fixtures import ( + make_module_record, memory_store, seed_module_role, seed_path_subject_record, @@ -169,6 +172,60 @@ def test_relevance_score_filters_global_contract_notes_for_scope() -> None: assert score == 0.0 +def test_relevance_score_keeps_git_hotspot_below_durable_scope_context() -> None: + module_record = make_module_record("proj-1", "pkg.service") + hotspot_record = replace( + module_record, + id="mem-hotspot", + identity_key="hotspot", + type="risk_note", + confidence="verified", + ingest_source="git", + statement="pkg/service.py changed 12 times in the last 90 days.", + payload={ + "risk_kind": "change_hotspot", + "change_count": 12, + "period_days": 90, + }, + ) + subjects = [ + MemorySubject( + id="subj-service", + memory_id=module_record.id, + subject_kind="path", + subject_key="pkg/service.py", + relation="about", + ) + ] + context = RankingContext.from_scope( + scope_paths=("pkg/service.py",), + symbols=(), + blast_dependents=(), + ) + + module_score = relevance_score( + record=module_record, + subjects=subjects, + context=context, + evidence_count=7, + ) + hotspot_score = relevance_score( + record=hotspot_record, + subjects=subjects, + context=context, + evidence_count=7, + ) + summary = retrieval_service._serialize_record_summary( + record=hotspot_record, + subjects=subjects, + evidence_count=7, + relevance_score=hotspot_score, + ) + + assert 0.0 < hotspot_score < module_score + assert summary["retrieval_lane"] == "hotspot_context" + + def test_get_relevant_memory_ranks_module_role_for_scoped_path(tmp_path: Path) -> None: with memory_store(tmp_path) as (_root, project, store, _db_path): seed_module_role( @@ -290,6 +347,49 @@ def test_get_relevant_memory_ranks_scope_records(tmp_path: Path) -> None: assert isinstance(records, list) assert records assert records[0]["statement"] == "sqlite store module" + coverage = result["coverage"] + assert isinstance(coverage, dict) + assert coverage["coverage_kind"] == "record_subject_coverage" + assert coverage["observation_confidence"] == { + "level": "partial", + "basis": ["records"], + "note": ( + "Evidence availability only; not correctness, approval, or edit " + "authorization." + ), + } + + +def test_get_relevant_memory_reports_unknown_observation_coverage( + tmp_path: Path, +) -> None: + with memory_store(tmp_path) as (_root, project, store, _db_path): + result = get_relevant_memory( + store, + project_id=project.id, + scope_paths=("pkg/unknown.py",), + scope_resolved_from="explicit", + ) + + coverage = result["coverage"] + assert isinstance(coverage, dict) + assert coverage["record_coverage"] == { + "scope_paths_with_memory": 0, + "scope_paths_total": 1, + "coverage_percent": 0, + "coverage_kind": "record_subject_coverage", + } + assert coverage["trajectory_coverage"] == { + "scope_paths_with_trajectories": 0, + "scope_paths_total": 1, + "coverage_percent": 0, + } + assert coverage["experience_coverage"] == { + "scope_families_with_experiences": 0, + "scope_families_total": 1, + "coverage_percent": 0, + } + assert coverage["observation_confidence"]["level"] == "unknown" def test_query_engineering_memory_search_and_status(tmp_path: Path) -> None: diff --git a/tests/test_memory_trajectory_retrieval.py b/tests/test_memory_trajectory_retrieval.py index 980203ca..a04c9be2 100644 --- a/tests/test_memory_trajectory_retrieval.py +++ b/tests/test_memory_trajectory_retrieval.py @@ -16,6 +16,7 @@ from codeclone.memory.exceptions import MemoryContractError from codeclone.memory.models import MemoryEvidence, generate_memory_id from codeclone.memory.retrieval import get_relevant_memory, query_engineering_memory +from codeclone.memory.retrieval.context_coverage import build_context_coverage from codeclone.memory.trajectory.models import TrajectorySubject from codeclone.memory.trajectory.retrieval import ( rank_trajectories_for_scope, @@ -66,6 +67,20 @@ def test_get_relevant_memory_returns_scoped_trajectories(tmp_path: Path) -> None assert trajectories[0]["relevance_score"] > 1.0 assert "quality_contract" not in trajectories[0] assert isinstance(trajectories[0]["subjects_truncated"], bool) + coverage = result["coverage"] + assert isinstance(coverage, dict) + assert coverage["trajectory_coverage"] == { + "scope_paths_with_trajectories": 1, + "scope_paths_total": 1, + "coverage_percent": 100, + } + assert coverage["agent_diversity"] == { + "trajectory_agent_labels": ["test-agent"], + "trajectory_agent_label_count": 1, + "experience_agent_families": [], + "experience_agent_family_count": 0, + } + assert coverage["observation_confidence"]["level"] == "supported" def test_compact_trajectory_preview_preserves_scope_subjects_and_slims_payload( @@ -123,6 +138,46 @@ def test_compact_trajectory_preview_preserves_scope_subjects_and_slims_payload( assert compact_size < full_size * 0.6 +def test_context_coverage_matches_trajectory_module_subject(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + projection = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ) + trajectory = replace( + projection.trajectories[0], + subjects=( + TrajectorySubject("module", "pkg.service", "about"), + TrajectorySubject("agent", "test-agent/1", "actor"), + ), + ) + + coverage = build_context_coverage( + record_coverage={ + "scope_paths_with_memory": 0, + "scope_paths_total": 1, + "coverage_percent": 0, + "coverage_kind": "record_subject_coverage", + }, + scope_paths=("pkg/service.py", "pkg/other.py"), + scope_families=frozenset({"pkg"}), + trajectories=(trajectory,), + experiences=(), + ) + + assert coverage["trajectory_coverage"] == { + "scope_paths_with_trajectories": 1, + "scope_paths_total": 2, + "coverage_percent": 50, + } + observation_confidence = coverage["observation_confidence"] + assert isinstance(observation_confidence, dict) + assert observation_confidence["level"] == "partial" + + def test_get_relevant_memory_returns_patch_trail_summary(tmp_path: Path) -> None: with memory_store(tmp_path) as (root, project, store, _db_path): audit_db = tmp_path / "audit.sqlite3" From 5b5ea059ea2487eb168c39ba0810d5f73581cfa3 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 14:53:35 +0500 Subject: [PATCH 239/318] fix(memory): prioritize reliable retrieval context --- .../memory/retrieval/context_coverage.py | 18 ++-- codeclone/memory/retrieval/ranking.py | 24 ++++- codeclone/memory/retrieval/service.py | 7 +- codeclone/memory/trajectory/retrieval.py | 37 ++++++- tests/test_memory_experience_retrieval.py | 1 + tests/test_memory_retrieval.py | 97 +++++++++++++------ tests/test_memory_trajectory_retrieval.py | 32 ++++++ 7 files changed, 172 insertions(+), 44 deletions(-) diff --git a/codeclone/memory/retrieval/context_coverage.py b/codeclone/memory/retrieval/context_coverage.py index bc2df9ba..6730418a 100644 --- a/codeclone/memory/retrieval/context_coverage.py +++ b/codeclone/memory/retrieval/context_coverage.py @@ -107,18 +107,20 @@ def _observation_confidence( ) if _count(coverage, key) > 0 ] - observed_paths = max( - _count(record_coverage, "scope_paths_with_memory"), - _count(trajectory_coverage, "scope_paths_with_trajectories"), - ) path_total = _count(record_coverage, "scope_paths_total") + record_paths = _count(record_coverage, "scope_paths_with_memory") + trajectory_paths = _count( + trajectory_coverage, + "scope_paths_with_trajectories", + ) level = "unknown" if basis: - level = ( - "supported" - if observed_paths >= path_total and len(basis) >= 2 - else "partial" + complete_path_evidence = ( + path_total > 0 + and record_paths >= path_total + and trajectory_paths >= path_total ) + level = "supported" if complete_path_evidence else "partial" return { "level": level, "basis": basis, diff --git a/codeclone/memory/retrieval/ranking.py b/codeclone/memory/retrieval/ranking.py index 859ce938..78ca36ed 100644 --- a/codeclone/memory/retrieval/ranking.py +++ b/codeclone/memory/retrieval/ranking.py @@ -36,6 +36,9 @@ # assertion. Keep exact-scope hotspots visible without letting their type and # ingest boosts outrank richer memory by default. _CHANGE_HOTSPOT_PENALTY = 0.35 +# Finish-hook module-role drafts are workflow reminders, not durable module +# descriptions. Keep them visible while placing substantive memory first. +_WORKFLOW_CONTEXT_PENALTY = 0.65 @dataclass(frozen=True, slots=True) @@ -70,8 +73,25 @@ def is_git_change_hotspot(record: MemoryRecord) -> bool: ) +def retrieval_lane(record: MemoryRecord) -> str | None: + if is_git_change_hotspot(record): + return "hotspot_context" + if ( + record.type == "module_role" + and record.status == "draft" + and record.created_by == "finish_hook" + ): + return "workflow_context" + return None + + def _context_signal_adjustment(record: MemoryRecord) -> float: - return -_CHANGE_HOTSPOT_PENALTY if is_git_change_hotspot(record) else 0.0 + lane = retrieval_lane(record) + if lane == "hotspot_context": + return -_CHANGE_HOTSPOT_PENALTY + if lane == "workflow_context": + return -_WORKFLOW_CONTEXT_PENALTY + return 0.0 def relevance_score( @@ -124,4 +144,4 @@ def relevance_score( return round(score, 4) -__all__ = ["RankingContext", "is_git_change_hotspot", "relevance_score"] +__all__ = ["RankingContext", "relevance_score", "retrieval_lane"] diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index 3bbc446b..90731830 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -42,7 +42,7 @@ trajectory_subject_keys, ) from .context_coverage import build_context_coverage -from .ranking import RankingContext, is_git_change_hotspot, relevance_score +from .ranking import RankingContext, relevance_score, retrieval_lane from .semantic import audit_event_row if TYPE_CHECKING: @@ -328,9 +328,8 @@ def _serialize_evidence(evidence: MemoryEvidence) -> dict[str, object]: def _retrieval_lane_payload(record: MemoryRecord) -> dict[str, object]: - return ( - {"retrieval_lane": "hotspot_context"} if is_git_change_hotspot(record) else {} - ) + lane = retrieval_lane(record) + return {"retrieval_lane": lane} if lane is not None else {} def _serialize_record_summary( diff --git a/codeclone/memory/trajectory/retrieval.py b/codeclone/memory/trajectory/retrieval.py index 56b45735..7dbfb728 100644 --- a/codeclone/memory/trajectory/retrieval.py +++ b/codeclone/memory/trajectory/retrieval.py @@ -310,6 +310,7 @@ def rank_trajectories_for_scope( patch_trails=patch_trails or {}, detail_level=detail_level, preferred_subjects=preferred_subjects, + diversify=True, ) @@ -462,10 +463,15 @@ def _preview_results( patch_trails: Mapping[str, Mapping[str, object]] | None = None, detail_level: TrajectoryDetailLevel = "full", preferred_subjects: frozenset[tuple[str, str]] = frozenset(), + diversify: bool = False, ) -> tuple[list[dict[str, object]], bool]: limit = max(1, int(max_results)) truncated = len(results) > limit - selected = results[:limit] + selected = ( + _select_diverse_scope_results(results, limit=limit) + if diversify + else results[:limit] + ) trails = patch_trails or {} return [ serialize_trajectory_preview( @@ -479,6 +485,35 @@ def _preview_results( ], truncated +def _select_diverse_scope_results( + results: Sequence[TrajectorySearchResult], + *, + limit: int, +) -> Sequence[TrajectorySearchResult]: + selected: list[TrajectorySearchResult] = [] + selected_ids: set[str] = set() + seen_examples: set[tuple[str, str, str]] = set() + for item in results: + trajectory = item.trajectory + example_key = ( + trajectory.outcome, + trajectory.quality_tier, + trajectory_agent_label(trajectory) or "", + ) + if example_key not in seen_examples: + selected.append(item) + selected_ids.add(trajectory.id) + seen_examples.add(example_key) + if len(selected) >= limit: + return selected + for item in results: + if item.trajectory.id not in selected_ids: + selected.append(item) + if len(selected) >= limit: + break + return selected + + def _preferred_subjects( *, scope_paths: Sequence[str], diff --git a/tests/test_memory_experience_retrieval.py b/tests/test_memory_experience_retrieval.py index da3f9994..e52b9785 100644 --- a/tests/test_memory_experience_retrieval.py +++ b/tests/test_memory_experience_retrieval.py @@ -74,6 +74,7 @@ def test_relevant_memory_surfaces_experiences(tmp_path: Path) -> None: "cursor-vscode", ] assert agent_diversity["experience_agent_family_count"] == 2 + assert coverage["observation_confidence"]["level"] == "partial" # Advisory contract, exactly like trajectories. policy = result["retrieval_policy"] assert isinstance(policy, dict) diff --git a/tests/test_memory_retrieval.py b/tests/test_memory_retrieval.py index 8c371101..f26736df 100644 --- a/tests/test_memory_retrieval.py +++ b/tests/test_memory_retrieval.py @@ -25,6 +25,40 @@ ) +def _score_scoped_record( + record: MemoryRecord, + *, + path: str = "pkg/service.py", + evidence_count: int = 0, +) -> tuple[float, dict[str, object]]: + subjects = [ + MemorySubject( + id=f"subj-{record.id}", + memory_id=record.id, + subject_kind="path", + subject_key=path, + relation="about", + ) + ] + score = relevance_score( + record=record, + subjects=subjects, + context=RankingContext.from_scope( + scope_paths=(path,), + symbols=(), + blast_dependents=(), + ), + evidence_count=evidence_count, + ) + summary = retrieval_service._serialize_record_summary( + record=record, + subjects=subjects, + evidence_count=evidence_count, + relevance_score=score, + ) + return score, summary + + def test_relevance_score_prefers_scope_path_match() -> None: now = current_report_timestamp_utc() record = MemoryRecord( @@ -188,44 +222,49 @@ def test_relevance_score_keeps_git_hotspot_below_durable_scope_context() -> None "period_days": 90, }, ) - subjects = [ - MemorySubject( - id="subj-service", - memory_id=module_record.id, - subject_kind="path", - subject_key="pkg/service.py", - relation="about", - ) - ] - context = RankingContext.from_scope( - scope_paths=("pkg/service.py",), - symbols=(), - blast_dependents=(), - ) - - module_score = relevance_score( - record=module_record, - subjects=subjects, - context=context, + module_score, _module_summary = _score_scoped_record( + module_record, evidence_count=7, ) - hotspot_score = relevance_score( - record=hotspot_record, - subjects=subjects, - context=context, + hotspot_score, summary = _score_scoped_record( + hotspot_record, evidence_count=7, ) - summary = retrieval_service._serialize_record_summary( - record=hotspot_record, - subjects=subjects, - evidence_count=7, - relevance_score=hotspot_score, - ) assert 0.0 < hotspot_score < module_score assert summary["retrieval_lane"] == "hotspot_context" +def test_finish_hook_module_role_is_bounded_workflow_context() -> None: + module_record = make_module_record("proj-1", "pkg.service") + substantive_record = replace( + module_record, + id="mem-rationale", + identity_key="rationale", + type="change_rationale", + status="draft", + origin="agent", + ingest_source="agent", + statement="Keep retrieval provenance separate from durable assertions.", + created_by="agent", + ) + workflow_record = replace( + module_record, + id="mem-workflow", + identity_key="workflow", + status="draft", + origin="agent", + ingest_source="agent", + statement="Patch touched scope includes pkg/service.py.", + created_by="finish_hook", + ) + substantive_score, _substantive_summary = _score_scoped_record(substantive_record) + workflow_score, summary = _score_scoped_record(workflow_record) + + assert 0.0 < workflow_score < substantive_score + assert summary["retrieval_lane"] == "workflow_context" + + def test_get_relevant_memory_ranks_module_role_for_scoped_path(tmp_path: Path) -> None: with memory_store(tmp_path) as (_root, project, store, _db_path): seed_module_role( diff --git a/tests/test_memory_trajectory_retrieval.py b/tests/test_memory_trajectory_retrieval.py index a04c9be2..e87e5ce9 100644 --- a/tests/test_memory_trajectory_retrieval.py +++ b/tests/test_memory_trajectory_retrieval.py @@ -138,6 +138,38 @@ def test_compact_trajectory_preview_preserves_scope_subjects_and_slims_payload( assert compact_size < full_size * 0.6 +def test_scoped_trajectory_preview_prefers_diverse_examples(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + projection = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ) + base = projection.trajectories[0] + duplicate = replace(base, id="traj-duplicate") + partial = replace( + base, + id="traj-partial", + outcome="partial", + quality_tier="corrected", + ) + + results, truncated = rank_trajectories_for_scope( + (base, duplicate, partial), + scope_paths=("pkg/service.py",), + symbols=(), + max_results=2, + detail_level="compact", + ) + + assert truncated is True + trajectory_ids = [item["trajectory_id"] for item in results] + assert trajectory_ids[0] == "traj-partial" + assert trajectory_ids[1] in {base.id, "traj-duplicate"} + + def test_context_coverage_matches_trajectory_module_subject(tmp_path: Path) -> None: with memory_store(tmp_path) as (root, project, store, _db_path): audit_db = tmp_path / "audit.sqlite3" From e59e6726b35630e391e61416fd6e20dd37758d15 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 19:27:45 +0500 Subject: [PATCH 240/318] test(memory): add coverage tests for memory and observability surfaces --- tests/test_audit_events_coverage.py | 209 +++ tests/test_cli_memory_surface_coverage.py | 317 ++++ tests/test_memory_coverage_gaps.py | 1528 +++++++++++++++++ ...test_memory_retrieval_semantic_coverage.py | 120 ++ tests/test_memory_trajectory_coverage.py | 183 +- tests/test_memory_trajectory_retrieval.py | 67 + tests/test_observability_profile.py | 46 +- tests/test_observability_reason_kind.py | 22 + 8 files changed, 2490 insertions(+), 2 deletions(-) create mode 100644 tests/test_audit_events_coverage.py create mode 100644 tests/test_cli_memory_surface_coverage.py create mode 100644 tests/test_memory_retrieval_semantic_coverage.py create mode 100644 tests/test_observability_reason_kind.py diff --git a/tests/test_audit_events_coverage.py b/tests/test_audit_events_coverage.py new file mode 100644 index 00000000..f67d3ea1 --- /dev/null +++ b/tests/test_audit_events_coverage.py @@ -0,0 +1,209 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json + +from codeclone.audit.events import ( + EVENT_ANALYSIS_COMPLETED, + EVENT_INTENT_CLEARED, + EVENT_INTENT_QUEUE_BLOCKED, + EVENT_PATCH_TRAIL_COMPUTED, + EVENT_RECEIPT_CREATED, + EVENT_WORKSPACE_GC, + AuditEvent, + compact_payload_for_event, + event_core_for_event, + event_summary, + projection_supplement_facts_from_payload, +) + + +def _facts(core: dict[str, object]) -> dict[str, object]: + value = core["facts"] + assert isinstance(value, dict) + return value + + +def _event(event_type: str, **payload: object) -> AuditEvent: + return AuditEvent( + event_type=event_type, + severity="info", + repo_root_digest="digest", + agent_pid=1, + agent_label="agent", + status="ok", + payload=payload, + ) + + +def test_compact_payload_for_patch_trail_and_receipt() -> None: + patch_trail = compact_payload_for_event( + event_type=EVENT_PATCH_TRAIL_COMPUTED, + payload={ + "patch_trail_digest": "abc", + "scope_check_status": "clean", + "verification_status": "accepted", + "declared_files": ["a.py"], + "changed_files": ["a.py"], + "untouched_in_declared": [], + "unexpected_files": [], + "forbidden_touched": [], + "truncation": {"declared_files": True}, + }, + ) + assert patch_trail["declared"] == 1 + assert patch_trail["truncation"] is True + + receipt = compact_payload_for_event( + event_type=EVENT_RECEIPT_CREATED, + payload={ + "format": "markdown", + "receipt": {"verdict": "clean", "human_decision_points": [{"id": "x"}]}, + }, + ) + assert receipt["verdict"] == "clean" + assert receipt["human_decisions"] == 1 + + +def test_event_core_for_workspace_and_queue_events() -> None: + queue = event_core_for_event( + _event( + EVENT_INTENT_QUEUE_BLOCKED, + intent_id="intent-q", + blocking_count=2, + ) + ) + assert _facts(queue)["blocking_count"] == 2 + + cleared = event_core_for_event( + _event(EVENT_INTENT_CLEARED, cleared=1, workspace_cleared=True) + ) + assert _facts(cleared)["cleared"] == 1 + assert _facts(cleared)["workspace_cleared"] is True + + gc = event_core_for_event( + _event(EVENT_WORKSPACE_GC, removed=3, stale_count=1, orphaned_count=2) + ) + assert _facts(gc)["removed"] == 3 + assert _facts(gc)["orphaned_count"] == 2 + + +def test_event_core_for_receipt_and_check_paths() -> None: + receipt_core = event_core_for_event( + _event( + EVENT_RECEIPT_CREATED, + format="json", + receipt={"verdict": "needs_attention", "human_decision_points": [1, 2]}, + ) + ) + facts = _facts(receipt_core) + assert facts["format"] == "json" + assert facts["human_decisions"] == 2 + + check_core = event_core_for_event( + _event( + "intent.checked", + status="clean", + declared_scope=["pkg/a.py", "pkg/b.py"], + actual_changed_files=["pkg/a.py"], + unexpected_files=["extra.py"], + forbidden_touched=["codeclone.baseline.json"], + ) + ) + check_facts = _facts(check_core) + assert check_facts["unexpected_files_list"] == ["extra.py"] + assert check_facts["forbidden_touched_list"] == ["codeclone.baseline.json"] + + +def test_analysis_completed_summary_and_projection_supplement() -> None: + summary = event_summary( + EVENT_ANALYSIS_COMPLETED, + {"source": "mcp", "health": {"score": 91}}, + ) + assert summary == "analysis completed (mcp): health=91" + + payload = json.dumps( + { + "scope": {"allowed_files": ["pkg/a.py"]}, + "intent_description": "test", + } + ) + supplement = projection_supplement_facts_from_payload( + "intent.declared", + payload, + ) + assert supplement.get("scope_paths") == ["pkg/a.py"] + assert projection_supplement_facts_from_payload("intent.declared", "{bad") == {} + assert projection_supplement_facts_from_payload("intent.declared", None) == {} + + intent_cleared = event_core_for_event( + _event("intent.cleared", cleared=2, workspace_cleared=False) + ) + assert _facts(intent_cleared)["cleared"] == 2 + + workspace_conflict = compact_payload_for_event( + event_type="workspace.conflict_detected", + payload={"concurrent_intents": [{"id": "a"}]}, + ) + assert workspace_conflict["concurrent_intents"] == 1 + + analysis = compact_payload_for_event( + event_type=EVENT_ANALYSIS_COMPLETED, + payload={ + "source": "cli", + "mode": "full", + "health": {"score": 88, "grade": "B"}, + "findings": {"total": 3, "new": 1}, + "inventory": {"files": 10}, + }, + ) + assert analysis["health_score"] == 88 + assert analysis["findings_total"] == 3 + + cleared_core = event_core_for_event( + _event(EVENT_INTENT_CLEARED, cleared=1, workspace_cleared=True) + ) + assert _facts(cleared_core)["workspace_cleared"] is True + + patch_trail_core = event_core_for_event( + _event( + EVENT_PATCH_TRAIL_COMPUTED, + counts={"declared": 2, "changed": 1}, + scope_check_status="clean", + verification_status="accepted", + patch_trail_digest="digest", + truncation={}, + ) + ) + assert _facts(patch_trail_core)["declared"] == 2 + + many_citations = [ + {"kind": "finding", "cited_id": f"f-{index}", "valid": True} + for index in range(40) + ] + claim_payload = event_core_for_event( + _event( + "claim_validation.completed", + valid=True, + citations_found=40, + validated_citations=[ + *many_citations, + {"kind": "", "cited_id": "", "valid": False}, + ], + ) + ) + claim_facts = claim_payload["facts"] + assert isinstance(claim_facts, dict) + assert claim_facts.get("citations_truncated") is True + assert len(claim_facts.get("citations", [])) == 32 + + supplement = projection_supplement_facts_from_payload( + "intent.declared", + json.dumps(["not", "mapping"]), + ) + assert supplement == {} diff --git a/tests/test_cli_memory_surface_coverage.py b/tests/test_cli_memory_surface_coverage.py new file mode 100644 index 00000000..0ceeece7 --- /dev/null +++ b/tests/test_cli_memory_surface_coverage.py @@ -0,0 +1,317 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path +from codeclone.contracts import ExitCode +from codeclone.memory.exceptions import MemoryContractError +from codeclone.surfaces.cli.memory import memory_main + +from .memory_fixtures import cli_memory_repo +from .test_cli_memory_trajectory import _seed_cli_audit + + +def _trajectory_repo(tmp_path: Path) -> tuple[Path, str]: + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, store): + _seed_cli_audit(root) + store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=resolve_audit_path(root_path=root, value=DEFAULT_AUDIT_PATH), + ) + trajectory_id = store.list_trajectories(project_id=project.id, limit=1)[0].id + store.close() + return root, trajectory_id + + +def test_trajectory_cli_agents_anomalies_dashboard_text_and_json( + tmp_path: Path, +) -> None: + root, _trajectory_id = _trajectory_repo(tmp_path) + root_arg = str(root.resolve()) + for action in ("agents", "anomalies", "dashboard"): + assert memory_main(["trajectory", action, "--root", root_arg]) == int( + ExitCode.SUCCESS + ) + assert memory_main(["trajectory", action, "--root", root_arg, "--json"]) == int( + ExitCode.SUCCESS + ) + assert memory_main( + [ + "trajectory", + "agents", + "--root", + root_arg, + "--include-routine", + ] + ) == int(ExitCode.SUCCESS) + + +@pytest.mark.parametrize( + "argv", + [ + ["trajectory", "rebuild"], + ["trajectory", "agents"], + ["trajectory", "anomalies"], + ["trajectory", "dashboard"], + ["trajectory", "show", "traj-missing"], + [ + "trajectory", + "export", + "--profile", + "agent-memory-retrieval-v1", + "--out", + "out.jsonl", + ], + ], +) +def test_trajectory_cli_missing_db_reports_error( + tmp_path: Path, + argv: list[str], +) -> None: + missing = tmp_path / "missing" + missing.mkdir() + assert memory_main([*argv, "--root", str(missing)]) == int(ExitCode.CONTRACT_ERROR) + + +def test_trajectory_cli_rebuild_disabled_and_missing_db(tmp_path: Path) -> None: + disabled_root = tmp_path / "disabled" + disabled_root.mkdir() + (disabled_root / "pyproject.toml").write_text( + "[tool.codeclone.memory]\ntrajectories_enabled = false\n", + encoding="utf-8", + ) + with cli_memory_repo(disabled_root, with_draft=False) as (root, _project, store): + store.close() + code = memory_main(["trajectory", "rebuild", "--root", str(root.resolve())]) + assert code == int(ExitCode.CONTRACT_ERROR) + + missing = tmp_path / "missing" + missing.mkdir() + assert memory_main(["trajectory", "list", "--root", str(missing)]) == int( + ExitCode.CONTRACT_ERROR + ) + assert memory_main( + ["trajectory", "search", "exercise", "--root", str(missing)] + ) == int(ExitCode.CONTRACT_ERROR) + + +def test_trajectory_cli_show_missing_and_export_json(tmp_path: Path) -> None: + root, _trajectory_id = _trajectory_repo(tmp_path) + root_arg = str(root.resolve()) + assert memory_main( + ["trajectory", "show", "traj-missing", "--root", root_arg] + ) == int(ExitCode.CONTRACT_ERROR) + out_path = root / "exports" / "out.jsonl" + assert memory_main( + [ + "trajectory", + "export", + "--root", + root_arg, + "--profile", + "agent-memory-retrieval-v1", + "--out", + str(out_path), + "--force", + "--json", + ] + ) == int(ExitCode.SUCCESS) + + +@pytest.mark.parametrize( + "action, patch_return", + [ + ("agents", {"payload": "not-a-dict"}), + ("anomalies", {"payload": None}), + ("dashboard", {}), + ], +) +def test_trajectory_cli_payload_guards( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + action: str, + patch_return: dict[str, object], +) -> None: + root, _ = _trajectory_repo(tmp_path) + root_arg = str(root.resolve()) + monkeypatch.setattr( + "codeclone.surfaces.cli.memory.query_engineering_memory", + lambda *_args, **_kwargs: patch_return, + ) + code = memory_main(["trajectory", action, "--root", root_arg]) + assert code == int(ExitCode.INTERNAL_ERROR) + + +def test_trajectory_cli_rebuild_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, store): + _seed_cli_audit(root) + store.close() + root_arg = str(root.resolve()) + + class _BrokenStore: + def rebuild_trajectories_from_audit(self, **_kwargs: object) -> None: + raise RuntimeError("boom") + + def close(self) -> None: + return None + + monkeypatch.setattr( + "codeclone.surfaces.cli.memory.SqliteEngineeringMemoryStore", + lambda _path: _BrokenStore(), + ) + code = memory_main(["trajectory", "rebuild", "--root", root_arg]) + assert code == int(ExitCode.CONTRACT_ERROR) + + +def test_trajectory_export_contract_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + root, _ = _trajectory_repo(tmp_path) + root_arg = str(root.resolve()) + + def _raise(*_args: object, **_kwargs: object) -> None: + raise MemoryContractError("export blocked") + + monkeypatch.setattr( + "codeclone.surfaces.cli.memory.export_trajectories_jsonl", + _raise, + ) + code = memory_main( + [ + "trajectory", + "export", + "--root", + root_arg, + "--profile", + "agent-memory-retrieval-v1", + "--out", + "out.jsonl", + "--force", + ] + ) + assert code == int(ExitCode.CONTRACT_ERROR) + + +def test_jobs_list_and_run_once(tmp_path: Path) -> None: + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): + root_arg = str(root.resolve()) + assert memory_main(["jobs", "list", "--root", root_arg]) == int( + ExitCode.SUCCESS + ) + assert memory_main( + ["jobs", "list", "--root", root_arg, "--json", "--limit", "3"] + ) == int(ExitCode.SUCCESS) + assert memory_main(["jobs", "run-once", "--root", root_arg]) == int( + ExitCode.SUCCESS + ) + + +def test_jobs_contract_error_renders_json( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): + root_arg = str(root.resolve()) + + def _raise(*_args: object, **_kwargs: object) -> dict[str, object]: + raise MemoryContractError("jobs blocked") + + monkeypatch.setattr( + "codeclone.surfaces.cli.memory.execute_run_projection_jobs_once", + _raise, + ) + code = memory_main(["jobs", "run-once", "--root", root_arg]) + assert code == int(ExitCode.CONTRACT_ERROR) + + +def test_jobs_list_contract_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): + root_arg = str(root.resolve()) + monkeypatch.setattr( + "codeclone.surfaces.cli.memory.execute_projection_rebuild_status", + lambda **_kwargs: (_ for _ in ()).throw( + MemoryContractError("jobs list blocked") + ), + ) + code = memory_main(["jobs", "list", "--root", root_arg]) + assert code == int(ExitCode.CONTRACT_ERROR) + + +def test_jobs_list_with_populated_queue( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): + root_arg = str(root.resolve()) + monkeypatch.setattr( + "codeclone.surfaces.cli.memory.execute_projection_rebuild_status", + lambda **_kwargs: { + "status": "ok", + "jobs": [ + { + "id": "job-1", + "status": "completed", + "trigger": "cli", + "requested_at_utc": "2026-01-01T00:00:00Z", + } + ], + }, + ) + code = memory_main(["jobs", "list", "--root", root_arg]) + assert code == int(ExitCode.SUCCESS) + + +def test_search_semantic_advisory_when_provider_unavailable( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + with cli_memory_repo(tmp_path) as (root, _project, _store): + root_arg = str(root.resolve()) + monkeypatch.setattr( + "codeclone.surfaces.cli.memory.resolve_embedding_provider", + lambda _cfg: (_ for _ in ()).throw( + __import__( + "codeclone.memory.exceptions", + fromlist=["MemorySemanticUnavailableError"], + ).MemorySemanticUnavailableError("no provider") + ), + ) + monkeypatch.setattr( + "codeclone.surfaces.cli.memory.query_engineering_memory", + lambda *_args, **_kwargs: { + "payload": {"records": []}, + "semantic": {"used": False, "reason": "provider missing"}, + }, + ) + code = memory_main(["search", "fixture", "--root", root_arg, "--semantic"]) + assert code == int(ExitCode.SUCCESS) + + +def test_dashboard_json_payload_is_valid( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + root, _ = _trajectory_repo(tmp_path) + code = memory_main( + ["trajectory", "dashboard", "--root", str(root.resolve()), "--json"] + ) + assert code == int(ExitCode.SUCCESS) + output = capsys.readouterr().out + assert '"trajectory_count"' in output + assert '"agents"' in output diff --git a/tests/test_memory_coverage_gaps.py b/tests/test_memory_coverage_gaps.py index b092522a..e12d013e 100644 --- a/tests/test_memory_coverage_gaps.py +++ b/tests/test_memory_coverage_gaps.py @@ -14,6 +14,7 @@ import pytest +from codeclone.config.memory import IngestConfig, resolve_memory_config from codeclone.config.memory_defaults import ( DEFAULT_MEMORY_SOFT_STATEMENT_CHARS, DEFAULT_MEMORY_TARGET_STATEMENT_CHARS, @@ -24,32 +25,59 @@ from codeclone.memory.ingest.runner import _registry_paths, build_init_batch from codeclone.memory.models import ( MemoryEvidence, + MemoryProject, MemoryRecord, MemorySubject, RecordBatch, + generate_memory_id, parse_payload_json, ) from codeclone.memory.project import ( + GitProvenance, analysis_fingerprint_from_report, read_git_provenance, + resolve_memory_db_path, resolve_project_identity, ) from codeclone.memory.retrieval import get_relevant_memory from codeclone.memory.retrieval import service as retrieval_service from codeclone.memory.retrieval.ranking import RankingContext, relevance_score from codeclone.memory.retrieval.semantic import audit_event_row +from codeclone.memory.schema import open_memory_db from codeclone.memory.search_index import build_search_text from codeclone.memory.semantic.models import SemanticHit from codeclone.memory.semantic.sources import AuditIndexSource, MemoryIndexSource from codeclone.memory.staleness import apply_refresh_staleness, apply_scope_staleness from codeclone.report.meta import current_report_timestamp_utc from tests.memory_fixtures import ( + cli_memory_repo, git_repo_with_cached_report, make_module_record, memory_store, + seed_trajectory_audit_workflow, ) +def _tool_count_contradiction_repo( + tmp_path: Path, + *, + tools_json: str, + doc_name: str, + doc_text: str, +) -> tuple[Path, MemoryProject, GitProvenance, IngestConfig]: + root = tmp_path / "repo" + root.mkdir() + (root / "tools.json").write_text(tools_json, encoding="utf-8") + (root / doc_name).write_text(doc_text, encoding="utf-8") + project = resolve_project_identity(root) + git = read_git_provenance(root) + ingest = IngestConfig( + mcp_tool_schema_snapshot_path="tools.json", + mcp_tool_count_doc_paths=(doc_name,), + ) + return root, project, git, ingest + + def test_resolve_doc_anchor_path_normalizes_registry_and_filesystem( tmp_path: Path, ) -> None: @@ -577,6 +605,1506 @@ def test_registry_paths_rejects_non_mapping_inventory() -> None: assert _registry_paths({"inventory": {"file_registry": "bad"}}) == frozenset() +def test_project_trajectory_edge_outcomes_and_labels() -> None: + from codeclone.memory.trajectory.projector import ( + TrajectoryProjectionError, + project_trajectory, + ) + + from .test_memory_trajectory_projector import _record + + with pytest.raises(TrajectoryProjectionError, match="requires events"): + project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=(), + ) + + blocked = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record(1, "intent.declared", status="active"), + _record(2, "intent.queue_blocked", status="blocked"), + ), + ) + assert blocked.outcome == "blocked" + + conflict = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record(1, "intent.declared", status="active"), + _record(2, "workspace.conflict_detected", status="blocked"), + ), + ) + assert conflict.outcome == "blocked" + assert "foreign_conflict_seen" in conflict.labels + + external = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record(1, "intent.declared", status="active"), + _record( + 2, + "patch_contract.verified", + status="accepted_with_external_changes", + ), + ), + ) + assert external.outcome == "accepted_with_external_changes" + assert "external_changes_accepted" in external.labels + + expanded = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record(1, "intent.declared", status="active"), + _record(2, "intent.expanded", status="expanded"), + _record(3, "patch_contract.verified", status="accepted"), + ), + ) + assert "scope_expanded" in expanded.labels + + queued = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record(1, "intent.declared", status="active"), + _record(2, "intent.queued", status="queued"), + _record(3, "intent.promoted", status="active"), + _record(4, "patch_contract.verified", status="accepted"), + ), + ) + assert "queue_used" in queued.labels + assert "recovered" in queued.labels + + claim_failed = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record(1, "intent.declared", status="active"), + _record(2, "claim_validation.violated", status="violated"), + ), + ) + assert "claim_guard_failed" in claim_failed.labels + + broken = _record(1, "intent.declared", status="active") + missing_seq = replace(broken, audit_sequence=None) + with pytest.raises(TrajectoryProjectionError, match="audit_sequence"): + project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=(missing_seq,), + ) + + wrong_workflow = replace(broken, workflow_id="intent:other") + with pytest.raises(TrajectoryProjectionError, match="mixed workflow"): + project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=(wrong_workflow,), + ) + + +def test_retrieval_service_semantic_helpers_and_scope_family() -> None: + from codeclone.memory.embedding import DeterministicHashEmbeddingProvider + from codeclone.memory.retrieval import service as retrieval_service + from codeclone.memory.semantic.models import SemanticHit, SemanticIndexStatus + + class _Index: + def search(self, vector: object, *, k: int) -> list[SemanticHit]: + return [ + SemanticHit(source_id="mem-1", source="memory", score=0.9), + SemanticHit(source_id="evt-1", source="audit", score=0.8), + SemanticHit(source_id="traj-1", source="trajectory", score=0.7), + ] + + def status(self) -> SemanticIndexStatus: + return SemanticIndexStatus(available=True, indexed_count=3) + + proximity, audit_hits, trajectory_hits = retrieval_service._semantic_hits( + index=_Index(), + provider=DeterministicHashEmbeddingProvider(dimension=8), + query="recover", + k=5, + ) + assert "mem-1" in proximity + assert len(audit_hits) == 1 + assert len(trajectory_hits) == 1 + assert retrieval_service._scope_family("") is None + assert retrieval_service._scope_family("pkg/mod.py") == "pkg" + + assert retrieval_service._scope_family("../escape") is None + + +def test_trajectory_anomalies_projector_and_export_helpers() -> None: + from codeclone.memory.trajectory.anomalies import ( + anomaly_summary, + detect_trajectory_anomalies, + serialize_anomaly, + ) + from codeclone.memory.trajectory.export_context import extract_trajectory_citations + from codeclone.memory.trajectory.patch_trail import compute_patch_trail + from codeclone.memory.trajectory.projector import ( + TrajectoryProjectionError, + project_trajectory, + ) + from codeclone.memory.trajectory.retrieval import ( + serialize_patch_trail_summary, + serialize_trajectory_preview, + ) + + from .test_memory_trajectory_coverage import _patch_trail_inputs + from .test_memory_trajectory_projector import _record + + blocked = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record(1, "intent.declared", status="active"), + _record(2, "intent.queue_blocked", status="blocked"), + ), + ) + blocked_anomalies = detect_trajectory_anomalies(blocked) + assert any(item.kind == "outcome_blocked" for item in blocked_anomalies) + + abandoned = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record(1, "intent.declared", status="active"), + _record(2, "intent.expired", status="expired"), + ), + ) + assert any( + item.kind == "outcome_abandoned" + for item in detect_trajectory_anomalies(abandoned) + ) + + hook = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + replace( + _record(1, "intent.declared", status="active"), + surface="hook", + severity="warn", + ), + _record(2, "patch_contract.verified", status="accepted"), + ), + ) + assert "hook_blocked" in hook.labels + + memory_tool = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + replace( + _record(1, "intent.declared", status="active"), + tool_name="manage_engineering_memory", + ), + _record(2, "patch_contract.verified", status="accepted"), + ), + ) + assert "memory_used" in memory_tool.labels + + missing_core = replace( + _record(1, "intent.declared"), event_core_json="", event_core_sha256="" + ) + with pytest.raises(TrajectoryProjectionError, match="missing event core"): + project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=(missing_core,), + ) + + with_citations = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record(1, "intent.declared", status="active"), + _record( + 2, + "claim_validation.completed", + status="accepted", + citations=[ + {"kind": "finding", "cited_id": "finding-1", "valid": True}, + {"kind": "", "cited_id": "", "valid": False}, + ], + ), + _record(3, "patch_contract.verified", status="accepted"), + ), + ) + extracted = extract_trajectory_citations(with_citations) + assert extracted + assert extracted[0]["kind"] == "finding" + + trail = compute_patch_trail(_patch_trail_inputs()) + violated_trail = replace( + trail, + scope_check_status="violated", + verification_status="not_reached", + ) + partial = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record(1, "intent.declared", status="active"), + _record(2, "intent.queue_blocked", status="blocked"), + ), + ) + trail_anomalies = detect_trajectory_anomalies( + partial, + patch_trail_payload=violated_trail.to_payload(detail_level="summary"), + ) + assert any(item.kind == "scope_violation" for item in trail_anomalies) + summary = anomaly_summary([(partial, trail_anomalies)]) + error_count = summary["error_count"] + assert isinstance(error_count, int) + assert error_count >= 1 + assert serialize_anomaly(trail_anomalies[0])["kind"] + + preview = serialize_trajectory_preview( + replace(with_citations, summary="x" * 500), + detail_level="compact", + ) + assert len(str(preview["summary"])) < 500 + patch_summary = serialize_patch_trail_summary( + violated_trail.to_payload(detail_level="full") + ) + assert patch_summary is not None + assert patch_summary["scope_check_status"] == "violated" + + +def test_hydrate_trajectory_hits_skips_foreign_project(tmp_path: Path) -> None: + from codeclone.memory.retrieval import service as retrieval_service + from codeclone.memory.semantic.models import SemanticHit + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + trajectory = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + hits = retrieval_service._hydrate_trajectory_hits( + store, + project_id=project.id, + hits=[SemanticHit(source_id=trajectory.id, source="trajectory", score=0.5)], + ) + assert hits + assert hits[0]["semantic_score"] == 0.5 + missing = retrieval_service._hydrate_trajectory_hits( + store, + project_id="other-project", + hits=[SemanticHit(source_id=trajectory.id, source="trajectory", score=0.5)], + ) + assert missing == [] + + +def test_audit_reader_missing_db_and_connect_errors( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.audit.reader import ( + count_audit_event_core_gaps, + list_workflow_ids_with_events_after, + read_audit_event_core_records, + read_audit_summary, + ) + from codeclone.audit.schema import ensure_schema + from codeclone.audit.validation import AuditReadError + + missing = tmp_path / "missing.sqlite3" + with pytest.raises(AuditReadError, match="no audit data"): + read_audit_event_core_records(db_path=missing, repo_root_digest="digest") + assert ( + list_workflow_ids_with_events_after( + db_path=missing, + repo_root_digest="digest", + after_id=0, + ) + == () + ) + assert count_audit_event_core_gaps(db_path=missing, repo_root_digest="digest") == 0 + + audit_db = tmp_path / "audit.sqlite3" + conn = sqlite3.connect(audit_db) + try: + ensure_schema(conn) + conn.commit() + finally: + conn.close() + + real_connect = sqlite3.connect + + def _fail_connect(database: str, *args: Any, **kwargs: Any) -> sqlite3.Connection: + if database == str(audit_db): + raise sqlite3.Error("connect failed") + return cast(sqlite3.Connection, real_connect(database, *args, **kwargs)) + + monkeypatch.setattr(sqlite3, "connect", _fail_connect) + with pytest.raises(AuditReadError, match="cannot open audit database"): + list_workflow_ids_with_events_after( + db_path=audit_db, + repo_root_digest="digest", + after_id=0, + ) + with pytest.raises(AuditReadError, match="cannot open audit database"): + count_audit_event_core_gaps(db_path=audit_db, repo_root_digest="digest") + + monkeypatch.setattr(sqlite3, "connect", real_connect) + + class _BrokenConn: + def execute(self, *_args: object, **_kwargs: object) -> None: + raise sqlite3.Error("query failed") + + def close(self) -> None: + return None + + monkeypatch.setattr(sqlite3, "connect", lambda *_a, **_k: _BrokenConn()) + with pytest.raises(AuditReadError, match="cannot read audit database"): + read_audit_summary(db_path=audit_db, limit=5) + + +def test_projection_job_pid_alive_and_reclaim_paths( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.jobs.store import _pid_alive, _reclaim_stale_running_jobs + + assert _pid_alive(None) is False + assert _pid_alive("not-a-pid@host") is False + assert _pid_alive("0@host") is False + assert _pid_alive(f"{__import__('os').getpid()}@host") is True + + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, _store): + config = resolve_memory_config(root) + db_path = resolve_memory_db_path(root, config) + conn = open_memory_db(db_path) + try: + now = current_report_timestamp_utc() + conn.execute( + "INSERT INTO memory_projection_jobs(" + "id, project_id, job_kind, status, trigger, requested_at_utc, " + "started_at_utc, claimed_by, attempt, stimulus_json" + ") VALUES (?, ?, 'projection_bundle', 'running', 'cli', ?, ?, ?, 1, ?)", + ( + "job-stale", + project.id, + now, + "not-a-timestamp", + "999999@dead", + "{}", + ), + ) + conn.commit() + _reclaim_stale_running_jobs( + conn, + project_id=project.id, + running_timeout_seconds=1, + ) + row = conn.execute( + "SELECT status FROM memory_projection_jobs WHERE id=?", + ("job-stale",), + ).fetchone() + finally: + conn.close() + assert row is not None + assert str(row[0]) == "failed" + + +def test_parse_contract_constants_and_patch_trail_projector_edges( + tmp_path: Path, +) -> None: + from codeclone.memory.ingest.extractors import _parse_contract_constants + from codeclone.memory.trajectory.patch_trail_projector import ( + project_patch_trail_from_audit, + ) + + from .test_memory_trajectory_projector import _record + + broken = tmp_path / "broken.py" + broken.write_text("def (\n", encoding="utf-8") + assert _parse_contract_constants(broken) == {} + + constants = tmp_path / "constants.py" + constants.write_text( + "CACHE_VERSION = 2\nIGNORED = 1\n", + encoding="utf-8", + ) + parsed = _parse_contract_constants(constants) + assert parsed.get("CACHE_VERSION") == "2" + + assert project_patch_trail_from_audit(records=(), repo_root_digest="digest") is None + non_intent = replace( + _record(1, "intent.declared", status="active", scope_paths=["pkg/a.py"]), + workflow_id="analysis:run-1", + ) + assert ( + project_patch_trail_from_audit( + records=(non_intent,), + repo_root_digest="digest", + ) + is None + ) + + +def test_extract_contradictions_handles_broken_snapshot_and_docs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.ingest.extractors import extract_contradictions + + root, project, git, ingest = _tool_count_contradiction_repo( + tmp_path, + tools_json="{bad", + doc_name="docs.md", + doc_text="The server exposes 3 MCP tools for agents.", + ) + docs = root / "docs.md" + broken = extract_contradictions( + project=project, + root_path=root, + git=git, + report_digest="digest", + analysis_fingerprint="fp", + ingest=ingest, + ) + assert broken.records == [] + + (root / "tools.json").write_text( + '{"tools": {"a": {}, "b": {}}}', + encoding="utf-8", + ) + + original_read_text = Path.read_text + + def _raise_oserror(self: Path, *args: object, **kwargs: object) -> str: + if self == docs: + raise OSError("unreadable") + return original_read_text(self, *args, **kwargs) # type: ignore[arg-type] + + monkeypatch.setattr(Path, "read_text", _raise_oserror) + skipped = extract_contradictions( + project=project, + root_path=root, + git=git, + report_digest="digest", + analysis_fingerprint="fp", + ingest=ingest, + ) + assert skipped.records == [] + + +def test_export_context_observability_and_audit_validation_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.audit.events import ( + EVENT_INTENT_CHECKED, + EVENT_INTENT_DECLARED, + EVENT_PATCH_VERIFIED, + ) + from codeclone.audit.reader import read_audit_event_core_records + from codeclone.audit.validation import ( + AuditReadError, + AuditValidationError, + EventRow, + validate_event_row, + ) + from codeclone.contracts import PLATFORM_OBSERVABILITY_SCHEMA_VERSION + from codeclone.memory.trajectory.export_context import ( + _effective_scope_paths, + _load_event_core, + _prefer_trajectory_projection, + _preview_text, + build_export_context, + extract_trajectory_citations, + select_canonical_trajectories, + ) + from codeclone.memory.trajectory.patch_trail_projector import ( + project_patch_trail_from_audit, + ) + from codeclone.memory.trajectory.projector import TrajectoryProjectionError + from codeclone.observability.models import OperationRecord + from codeclone.observability.store.reader import ( + build_trace_view, + open_observability_store_readonly, + ) + from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, + ) + from codeclone.observability.store.writer import write_operation + from codeclone.report.meta import current_report_timestamp_utc + + from .memory_fixtures import memory_store, seed_trajectory_audit_workflow + from .test_memory_trajectory_projector import _core, _record + + assert _load_event_core("{not-json") == {} + assert _load_event_core('["list"]') == {} + assert _preview_text("x" * 500).endswith("...") + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + projection = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ) + trajectory = projection.trajectories[0] + dup_subject = replace( + trajectory, + subjects=( + *trajectory.subjects, + trajectory.subjects[0], + ), + ) + assert extract_trajectory_citations(dup_subject) + assert ( + _effective_scope_paths( + trajectory, + scope_paths=(), + patch_trail_payload=None, + ) + == () + ) + no_precedents = build_export_context( + store._conn, + project_id=project.id, + trajectory=trajectory, + scope_paths=(), + patch_trail_payload=None, + canonical_by_workflow={trajectory.workflow_id: trajectory}, + ) + no_precedents_context = no_precedents["context"] + assert isinstance(no_precedents_context, dict) + assert no_precedents_context["trajectory_precedents"] == [] + + for index in range(8): + note = MemoryRecord( + id=generate_memory_id(), + project_id=project.id, + identity_key=f"risk_note:test:{index}", + type="risk_note", + status="active", + confidence="supported", + origin="system", + ingest_source="analysis", + statement=f"linked precedent {index}", + summary=None, + payload={}, + created_at_utc=current_report_timestamp_utc(), + updated_at_utc=current_report_timestamp_utc(), + last_verified_at_utc=current_report_timestamp_utc(), + expires_at_utc=None, + created_by="test", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=None, + code_fingerprint=None, + stale_reason=None, + created_on_branch=None, + created_at_commit=None, + verified_on_branch=None, + verified_at_commit=None, + ) + store.write_record(note) + store.write_evidence( + MemoryEvidence( + id=generate_memory_id(prefix="evid"), + memory_id=note.id, + evidence_kind="trajectory", + ref=trajectory.id, + locator=None, + quote=None, + digest=trajectory.trajectory_digest, + created_at_utc=current_report_timestamp_utc(), + ) + ) + store.commit() + capped = build_export_context( + store._conn, + project_id=project.id, + trajectory=trajectory, + scope_paths=("pkg/service.py",), + patch_trail_payload=None, + canonical_by_workflow={trajectory.workflow_id: trajectory}, + ) + capped_context = capped["context"] + assert isinstance(capped_context, dict) + assert len(capped_context["memory_precedents"]) == 8 + + older = replace( + trajectory, + id="traj-older-export", + finished_at_utc="2020-01-01T00:00:00Z", + started_at_utc="2020-01-01T00:00:00Z", + ) + newer_same_version = replace( + trajectory, + id="traj-newer-export", + finished_at_utc="2026-06-01T00:00:00Z", + ) + canonical = select_canonical_trajectories([older, newer_same_version]) + assert len(canonical) == 1 + assert canonical[0].id == "traj-newer-export" + assert _prefer_trajectory_projection(newer_same_version, older) is True + tie_a = replace( + newer_same_version, + finished_at_utc=trajectory.finished_at_utc, + id="traj-a", + ) + tie_b = replace( + newer_same_version, + finished_at_utc=trajectory.finished_at_utc, + id="traj-b", + ) + assert _prefer_trajectory_projection(tie_b, tie_a) is True + + core_json, core_sha = _core( + EVENT_INTENT_CHECKED, + status="partial", + declared_scope_paths=["pkg/a.py"], + changed_files=["pkg/a.py"], + ) + partial_check = replace( + _record( + 2, + EVENT_INTENT_CHECKED, + declared_scope_paths=["pkg/a.py"], + changed_files=["pkg/a.py"], + ), + status=None, + event_core_json=core_json, + event_core_sha256=core_sha, + ) + declared_only = _record(1, EVENT_INTENT_DECLARED, status="active") + trail = project_patch_trail_from_audit( + records=( + declared_only, + partial_check, + _record(3, "receipt.created"), + _record(4, EVENT_PATCH_VERIFIED), + ), + repo_root_digest="digest", + ) + assert trail is None or trail.scope_check_status == "partial" + + bad_digest = replace( + _record(1, EVENT_INTENT_DECLARED, status="active", scope_paths=["pkg/a.py"]), + event_core_sha256="0" * 64, + ) + with pytest.raises(TrajectoryProjectionError, match="digest mismatch"): + project_patch_trail_from_audit( + records=(bad_digest,), + repo_root_digest="digest", + ) + + audit_db = tmp_path / "broken-audit.sqlite3" + audit_db.write_text("not sqlite", encoding="utf-8") + real_connect = sqlite3.connect + + def _fail_event_core_connect( + database: str, *args: Any, **kwargs: Any + ) -> sqlite3.Connection: + if database == str(audit_db): + raise sqlite3.Error("connect failed") + return cast(sqlite3.Connection, real_connect(database, *args, **kwargs)) + + monkeypatch.setattr(sqlite3, "connect", _fail_event_core_connect) + with pytest.raises(AuditReadError, match="cannot open audit database"): + read_audit_event_core_records(db_path=audit_db, repo_root_digest="digest") + + row = EventRow( + event_id="evt_surface", + event_type="analysis.completed", + severity="info", + created_at_utc="2026-05-25T00:00:00Z", + repo_root_digest="a" * 16, + run_id="run1234567890abcdef", + intent_id=None, + report_digest="b" * 64, + agent_label="agent", + agent_pid=1, + agent_start_epoch=None, + status="full", + payload_json="{}", + surface="bogus", + ) + with pytest.raises(AuditValidationError, match="invalid surface"): + validate_event_row(row) + + row_sha_only = EventRow( + event_id="evt_core", + event_type="analysis.completed", + severity="info", + created_at_utc="2026-05-25T00:00:00Z", + repo_root_digest="a" * 16, + run_id="run1234567890abcdef", + intent_id=None, + report_digest="b" * 64, + agent_label="agent", + agent_pid=1, + agent_start_epoch=None, + status="full", + payload_json="{}", + event_core_json=None, + event_core_sha256="c" * 64, + ) + with pytest.raises(AuditValidationError, match="event_core_sha256 requires"): + validate_event_row(row_sha_only) + + conn = open_observability_store(observability_store_path(tmp_path / "obs")) + try: + write_operation( + conn, + OperationRecord( + operation_id="missing-op", + correlation_id="missing-op", + surface="mcp", + name="mcp.check_patch_contract", + started_at_utc="not-a-timestamp", + duration_ms=10.0, + status="ok", + session_id="sess-1", + ), + ) + write_operation( + conn, + OperationRecord( + operation_id="analyze-op", + correlation_id="analyze-op", + surface="mcp", + name="mcp.analyze_repository", + started_at_utc="2026-06-09T00:00:01Z", + duration_ms=20.0, + status="ok", + session_id="sess-1", + ), + ) + write_operation( + conn, + OperationRecord( + operation_id="query-op", + correlation_id="query-op", + surface="mcp", + name="mcp.get_finding", + started_at_utc="2026-06-09T00:00:02Z", + duration_ms=15.0, + status="ok", + ), + ) + finally: + conn.close() + + read_conn = open_observability_store_readonly(tmp_path / "obs") + assert read_conn is not None + try: + missing = build_trace_view(read_conn, operation_id="does-not-exist") + assert missing.operation_tree == () + by_session = build_trace_view(read_conn, session_id="sess-1") + assert by_session.aggregates.operation_count == 2 + recent = build_trace_view(read_conn, last=1) + assert recent.schema_version == PLATFORM_OBSERVABILITY_SCHEMA_VERSION + pipe = {group.name for group in recent.aggregates.pipeline} + assert {"controller", "analysis", "mcp query"} & pipe + assert recent.waterfall + finally: + read_conn.close() + + +def test_patch_trail_projector_additional_audit_branches() -> None: + from codeclone.audit.events import EVENT_INTENT_DECLARED + from codeclone.memory.trajectory.patch_trail_projector import ( + _apply_audit_record, + _WorkflowAuditState, + project_patch_trail_from_audit, + ) + + from .test_memory_trajectory_projector import _record + + state = _WorkflowAuditState() + _apply_audit_record( + state, + replace( + _record(1, EVENT_INTENT_DECLARED, status="active"), audit_sequence=None + ), + ) + assert state.declared_files == () + + assert ( + project_patch_trail_from_audit( + records=(_record(1, EVENT_INTENT_DECLARED, status="active"),), + repo_root_digest="digest", + ) + is None + ) + + +def test_staleness_audit_validation_and_events_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.audit.events import ( + EVENT_INTENT_CHECKED, + EVENT_WORKSPACE_CONFLICT, + event_core_for_event, + event_summary, + ) + from codeclone.audit.reader import ( + count_audit_event_core_gaps, + list_workflow_ids_with_events_after, + read_audit_event_core_records, + ) + from codeclone.audit.validation import ( + AuditReadError, + AuditValidationError, + EventRow, + validate_event_row, + ) + from codeclone.memory.staleness import ( + _batch_evidence_index, + apply_refresh_staleness, + ) + + from .memory_fixtures import memory_store + from .test_audit_events_coverage import _event, _facts + + orphan_evidence = MemoryEvidence( + id=generate_memory_id(prefix="evid"), + memory_id="missing-record", + evidence_kind="report", + ref="digest", + locator=None, + quote=None, + digest="abc", + created_at_utc="2026-01-01T00:00:00Z", + ) + assert _batch_evidence_index(RecordBatch(evidence=[orphan_evidence])) == {} + + conflict_core = event_core_for_event( + _event(EVENT_WORKSPACE_CONFLICT, concurrent_intents=[{"intent_id": "a"}]) + ) + assert _facts(conflict_core)["concurrent_intents"] == 1 + + many_declared = [f"pkg/file_{index}.py" for index in range(60)] + check_payload = event_core_for_event( + _event( + EVENT_INTENT_CHECKED, + declared_scope=[*many_declared, 123, "../escape", "/abs"], + actual_changed_files=["pkg/file_0.py"], + status="clean", + ) + ) + check_facts = check_payload["facts"] + assert isinstance(check_facts, dict) + assert check_facts.get("paths_truncated") is True + assert check_facts.get("untouched_in_declared") + + summary = event_summary( + "analysis.completed", + {"source": "mcp", "health": {"score": "high"}}, + ) + assert summary == "analysis completed (mcp)" + + base_row = EventRow( + event_id="evt_val", + event_type="analysis.completed", + severity="info", + created_at_utc="2026-05-25T00:00:00Z", + repo_root_digest="a" * 16, + run_id="run1234567890abcdef", + intent_id=None, + report_digest="b" * 64, + agent_label="agent", + agent_pid=1, + agent_start_epoch=None, + status="full", + payload_json="{}", + ) + with pytest.raises(AuditValidationError, match="event_core_json must be JSON"): + validate_event_row( + replace( + base_row, + event_core_json="{bad", + event_core_sha256="c" * 64, + ) + ) + with pytest.raises(AuditValidationError, match="must be a JSON object"): + validate_event_row( + replace( + base_row, + event_core_json='["list"]', + event_core_sha256="d" * 64, + ) + ) + import hashlib + import json + + bad_version = json.dumps( + { + "core_schema_version": "0", + "event_family": "analysis", + "event_type": "analysis.completed", + "facts": {}, + "status": "", + "truncated": False, + }, + sort_keys=True, + separators=(",", ":"), + ) + with pytest.raises(AuditValidationError, match="unsupported core_schema_version"): + validate_event_row( + replace( + base_row, + event_core_json=bad_version, + event_core_sha256=hashlib.sha256(bad_version.encode()).hexdigest(), + ) + ) + good_core = json.dumps( + { + "core_schema_version": "2", + "event_family": "analysis", + "event_type": "analysis.completed", + "facts": {}, + "status": "", + "truncated": False, + }, + sort_keys=True, + separators=(",", ":"), + ) + with pytest.raises(AuditValidationError, match="does not match event_core_json"): + validate_event_row( + replace( + base_row, + event_core_json=good_core, + event_core_sha256="f" * 64, + ) + ) + with pytest.raises(AuditValidationError, match="must be lowercase sha256 hex"): + validate_event_row( + replace( + base_row, + event_core_json=good_core, + event_core_sha256="G" * 64, + ) + ) + + report: dict[str, object] = {"inventory": {"file_registry": {"items": []}}} + memory_root = tmp_path / "memory" + memory_root.mkdir() + with memory_store(memory_root) as (root, project, store, _db_path): + draft = MemoryRecord( + id=generate_memory_id(), + project_id=project.id, + identity_key="draft:note:1", + type="risk_note", + status="draft", + confidence="inferred", + origin="agent", + ingest_source="agent", + statement="draft only", + summary=None, + payload={}, + created_at_utc="2026-01-01T00:00:00Z", + updated_at_utc="2026-01-01T00:00:00Z", + last_verified_at_utc=None, + expires_at_utc=None, + created_by="agent", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=None, + code_fingerprint=None, + stale_reason=None, + created_on_branch=None, + created_at_commit=None, + verified_on_branch=None, + verified_at_commit=None, + ) + store.write_record(draft) + stale = replace( + draft, id=generate_memory_id(), status="stale", identity_key="stale:1" + ) + store.write_record(stale) + store.commit() + draft_result = apply_refresh_staleness( + store, + project_id=project.id, + batch=RecordBatch(records=[]), + report_document=report, + root_path=root, + ) + assert draft_result.records_marked_stale == 0 + + audit_db = tmp_path / "audit-read.sqlite3" + conn = sqlite3.connect(audit_db) + try: + from codeclone.audit.schema import ensure_schema + + ensure_schema(conn) + conn.commit() + finally: + conn.close() + + real_connect = sqlite3.connect + + class _BrokenConn: + def execute(self, *_args: object, **_kwargs: object) -> None: + raise sqlite3.Error("query failed") + + def close(self) -> None: + return None + + monkeypatch.setattr(sqlite3, "connect", lambda *_a, **_k: _BrokenConn()) + with pytest.raises(AuditReadError, match="cannot read audit database"): + read_audit_event_core_records(db_path=audit_db, repo_root_digest="digest") + with pytest.raises(AuditReadError, match="cannot read audit database"): + list_workflow_ids_with_events_after( + db_path=audit_db, + repo_root_digest="digest", + after_id=0, + ) + with pytest.raises(AuditReadError, match="cannot read audit database"): + count_audit_event_core_gaps(db_path=audit_db, repo_root_digest="digest") + monkeypatch.setattr(sqlite3, "connect", real_connect) + + +def test_trajectory_projector_and_retrieval_residual_edges(tmp_path: Path) -> None: + import hashlib + + from codeclone.audit.events import EVENT_INTENT_DECLARED + from codeclone.memory.trajectory.projector import ( + TrajectoryProjectionError, + project_trajectory, + ) + from codeclone.memory.trajectory.retrieval import ( + rank_trajectories_for_query, + serialize_patch_trail_summary, + ) + + from .test_memory_trajectory_projector import _record + + list_core = '["not","object"]' + list_sha = hashlib.sha256(list_core.encode("utf-8")).hexdigest() + bad_core = replace( + _record(1, EVENT_INTENT_DECLARED, status="active"), + event_core_json=list_core, + event_core_sha256=list_sha, + ) + with pytest.raises(TrajectoryProjectionError, match="JSON object"): + project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=[bad_core], + ) + + missing_sequence = replace( + _record(1, EVENT_INTENT_DECLARED, status="active"), + audit_sequence=None, + ) + with pytest.raises(TrajectoryProjectionError, match="audit_sequence"): + project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=[missing_sequence], + ) + + assert serialize_patch_trail_summary(None) is None + assert serialize_patch_trail_summary({"not": "trail"}) is None + + empty_hits, truncated = rank_trajectories_for_query( + [], + query="", + max_results=5, + match_mode="any", + ) + assert empty_hits == [] + assert truncated is False + + missing_core = replace( + _record(1, EVENT_INTENT_DECLARED, status="active"), + event_core_json=None, + event_core_sha256=None, + ) + with pytest.raises(TrajectoryProjectionError, match="missing event core"): + project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=[missing_core], + ) + + from codeclone.audit.events import EVENT_PATCH_VERIFIED + + abused = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record( + 1, EVENT_INTENT_DECLARED, status="active", scope_paths=["pkg/a.py"] + ), + _record(2, EVENT_PATCH_VERIFIED, status="accepted", baseline_abuse=True), + ), + ) + assert abused.outcome == "violated" + + from codeclone.config.memory import IngestConfig, resolve_memory_config + + ingest = IngestConfig.model_validate( + { + "contract_constants_paths": "codeclone/contracts/__init__.py", + "mcp_tool_count_doc_paths": ["docs/book/25-mcp-interface/index.md"], + "mcp_tool_schema_snapshot_path": "", + } + ) + assert ingest.mcp_tool_schema_snapshot_path is None + assert ingest.contract_constants_paths == ("codeclone/contracts/__init__.py",) + + root = tmp_path / "cfg-root" + root.mkdir() + outside_db = tmp_path / "outside.sqlite3" + outside_db.write_text("", encoding="utf-8") + (root / "pyproject.toml").write_text( + f'[tool.codeclone.memory]\ndb_path = "{outside_db}"\n', + encoding="utf-8", + ) + with pytest.raises(ValueError, match="must stay under the repository root"): + resolve_memory_config(root) + + +def test_trajectory_quality_timestamp_and_band_edges() -> None: + from codeclone.audit.events import EVENT_INTENT_DECLARED, EVENT_PATCH_VERIFIED + from codeclone.memory.trajectory.projector import project_trajectory + from codeclone.memory.trajectory.quality import ( + _complexity_band_label, + _parse_utc_timestamp, + compute_trajectory_duration_seconds, + ) + + from .test_memory_trajectory_projector import _record + + trajectory = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record( + 1, EVENT_INTENT_DECLARED, status="active", scope_paths=["pkg/a.py"] + ), + _record(2, EVENT_PATCH_VERIFIED, status="accepted"), + ), + ) + broken_times = replace( + trajectory, + started_at_utc="not-a-timestamp", + finished_at_utc="also-bad", + ) + assert compute_trajectory_duration_seconds(broken_times) == 0 + assert _parse_utc_timestamp("") is None + assert _parse_utc_timestamp("bad-ts") is None + assert _parse_utc_timestamp("2026-01-01T00:00:00Z") is not None + assert _complexity_band_label(70) == ("high", "High") + assert _complexity_band_label(35) == ("moderate", "Moderate") + assert _complexity_band_label(10) == ("low", "Low") + naive = _parse_utc_timestamp("2026-01-01T00:00:00") + assert naive is not None + assert naive.tzinfo is not None + + +def test_project_trajectory_external_changes_outcome() -> None: + from codeclone.audit.events import EVENT_INTENT_DECLARED, EVENT_PATCH_VERIFIED + from codeclone.memory.trajectory.projector import project_trajectory + + from .test_memory_trajectory_projector import _record + + trajectory = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + _record( + 1, EVENT_INTENT_DECLARED, status="active", scope_paths=["pkg/a.py"] + ), + _record( + 2, + EVENT_PATCH_VERIFIED, + status="accepted_with_external_changes", + ), + ), + ) + assert trajectory.outcome == "accepted_with_external_changes" + + +def test_extract_contradictions_records_tool_count_mismatch(tmp_path: Path) -> None: + from codeclone.memory.ingest.extractors import extract_contradictions + + root, project, git, ingest = _tool_count_contradiction_repo( + tmp_path, + tools_json='{"tools": {"a": {}, "b": {}}}', + doc_name="docs.md", + doc_text="The bundle exposes 3 MCP tools.", + ) + batch = extract_contradictions( + project=project, + root_path=root, + git=git, + report_digest="digest", + analysis_fingerprint="fp", + ingest=ingest, + ) + assert len(batch.records) == 1 + assert batch.records[0].type == "contradiction_note" + + (root / "docs-match.md").write_text( + "The bundle exposes 2 MCP tools.", encoding="utf-8" + ) + matching = extract_contradictions( + project=project, + root_path=root, + git=git, + report_digest="digest", + analysis_fingerprint="fp", + ingest=IngestConfig( + mcp_tool_schema_snapshot_path="tools.json", + mcp_tool_count_doc_paths=("docs-match.md",), + ), + ) + assert matching.records == [] + + +def test_project_trajectory_agent_fallback_and_noncanonical_digest() -> None: + import hashlib + import json + + from codeclone.audit.events import EVENT_INTENT_CHECKED, EVENT_INTENT_DECLARED + from codeclone.memory.trajectory.projector import project_trajectory + + from .test_memory_trajectory_projector import _core, _record + + checked = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + replace( + _record( + 1, EVENT_INTENT_DECLARED, status="active", scope_paths=["pkg/a.py"] + ), + agent_label=" ", + ), + replace( + _record( + 2, + EVENT_INTENT_CHECKED, + status="clean", + declared_scope_paths=["pkg/a.py"], + changed_files=["pkg/a.py"], + ), + agent_label="backup-agent", + ), + ), + ) + agent_subjects = { + (subject.subject_kind, subject.subject_key) + for subject in checked.subjects + if subject.subject_kind == "agent" + } + assert agent_subjects == {("agent", "backup-agent")} + + bad_facts_core, _bad_facts_sha = _core( + EVENT_INTENT_DECLARED, + status="active", + scope_paths=["pkg/a.py"], + ) + broken_facts = json.loads(bad_facts_core) + broken_facts["facts"] = "not-a-mapping" + broken_text = json.dumps(broken_facts, sort_keys=True, separators=(",", ":")) + broken_sha = hashlib.sha256(broken_text.encode("utf-8")).hexdigest() + short_digest = project_trajectory( + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + records=( + replace( + _record( + 1, EVENT_INTENT_DECLARED, status="active", scope_paths=["pkg/a.py"] + ), + event_core_json=broken_text, + event_core_sha256=broken_sha, + report_digest="short-digest", + ), + ), + ) + assert short_digest.report_digest == "short-digest" + + +def test_serialize_patch_trail_summary_from_computed_trail() -> None: + from codeclone.memory.trajectory.patch_trail import compute_patch_trail + from codeclone.memory.trajectory.retrieval import serialize_patch_trail_summary + + from .test_memory_trajectory_coverage import _patch_trail_inputs + + trail = compute_patch_trail(_patch_trail_inputs()) + summary = serialize_patch_trail_summary( + trail.to_payload(detail_level="summary"), + ) + assert summary is not None + assert summary["verification_status"] == "accepted" + + +def test_refresh_stale_primary_reason_skips_stale_records(tmp_path: Path) -> None: + from codeclone.memory.staleness import _refresh_stale_primary_reason + + with memory_store(tmp_path) as (_root, project, store, _db_path): + stale = MemoryRecord( + id=generate_memory_id(), + project_id=project.id, + identity_key="risk_note:stale:1", + type="risk_note", + status="stale", + confidence="supported", + origin="system", + ingest_source="analysis", + statement="already stale", + summary=None, + payload={}, + created_at_utc="2026-01-01T00:00:00Z", + updated_at_utc="2026-01-01T00:00:00Z", + last_verified_at_utc=None, + expires_at_utc=None, + created_by="test", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest="digest-a", + code_fingerprint=None, + stale_reason="missing_from_refresh", + created_on_branch=None, + created_at_commit=None, + verified_on_branch=None, + verified_at_commit=None, + ) + store.write_record(stale) + store.commit() + assert ( + _refresh_stale_primary_reason( + store, + stale, + batch_identity_keys=frozenset(), + batch_by_identity={}, + batch_evidence={}, + report_digest="digest-b", + ) + is None + ) + + +def test_experience_distillation_opens_store_when_not_passed( + tmp_path: Path, +) -> None: + from codeclone.memory.exceptions import MemoryContractError + from codeclone.memory.experience.distillation_workflow import ( + execute_experience_distillation, + ) + + empty_root = tmp_path / "empty-repo" + empty_root.mkdir() + missing_config = resolve_memory_config(empty_root) + with pytest.raises(MemoryContractError, match="not found"): + execute_experience_distillation( + root_path=empty_root, + config=missing_config, + ) + + with memory_store(tmp_path) as (root, project, _store, db_path): + config = replace(resolve_memory_config(root), db_path=db_path) + payload = execute_experience_distillation( + root_path=root, + config=config, + project=project, + ) + assert payload["status"] == "ok" + assert payload["experiences_distilled"] == 0 + + +def test_observability_span_error_and_sql_classification(tmp_path: Path) -> None: + from codeclone.config.observability import ObservabilityConfig + from codeclone.observability import ( + bootstrap, + operation, + record_elapsed_span, + shutdown, + span, + ) + from codeclone.observability.runtime import _classify_sql + from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, + ) + + assert _classify_sql(" ") == "" + + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + with operation(name="job", surface="cli"): + record_elapsed_span( + "cold-start", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=12.5, + ) + with pytest.raises(RuntimeError, match="boom"), span(name="failing-stage"): + raise RuntimeError("boom") + shutdown() + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + span_row = conn.execute( + "SELECT status FROM platform_spans WHERE name=?", + ("failing-stage",), + ).fetchone() + elapsed_row = conn.execute( + "SELECT name FROM platform_spans WHERE name=?", + ("cold-start",), + ).fetchone() + finally: + conn.close() + assert span_row is not None + assert str(span_row[0]) == "error" + assert elapsed_row is not None + + +def test_ingest_config_validator_passthrough_non_dict() -> None: + from codeclone.config.memory import IngestConfig + + assert IngestConfig._normalize_path_lists.__func__(IngestConfig, 42) == 42 + + def test_build_init_batch_rejects_invalid_project_and_git( tmp_path: Path, ) -> None: diff --git a/tests/test_memory_retrieval_semantic_coverage.py b/tests/test_memory_retrieval_semantic_coverage.py new file mode 100644 index 00000000..6d743be2 --- /dev/null +++ b/tests/test_memory_retrieval_semantic_coverage.py @@ -0,0 +1,120 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import replace +from pathlib import Path + +from codeclone.memory.embedding import DeterministicHashEmbeddingProvider +from codeclone.memory.models import MemoryRecord, MemorySubject +from codeclone.memory.retrieval.semantic import semantic_search +from codeclone.memory.semantic.models import SemanticHit, SemanticIndexStatus +from codeclone.memory.trajectory.models import Trajectory, TrajectorySubject + +from .memory_fixtures import memory_store, seed_trajectory_audit_workflow + +_PROVIDER = DeterministicHashEmbeddingProvider(dimension=8) + + +class _FakeIndex: + def __init__(self, hits: list[SemanticHit]) -> None: + self._hits = hits + + def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: + return self._hits[:k] + + def status(self) -> SemanticIndexStatus: + return SemanticIndexStatus(available=True, indexed_count=len(self._hits)) + + +class _StoreWithoutTrajectory: + def find_record(self, record_id: str) -> MemoryRecord | None: + return None + + def list_subjects_for_memory(self, memory_id: str) -> list[MemorySubject]: + return [] + + +def test_semantic_search_hydrates_trajectory_hit(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + trajectory = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + trajectory = replace( + trajectory, + subjects=( + TrajectorySubject( + subject_kind="path", + subject_key="pkg/service.py", + relation="about", + ), + ), + ) + index = _FakeIndex( + [SemanticHit(source_id=trajectory.id, source="trajectory", score=0.75)] + ) + + class _TrajectoryStore: + def find_record(self, record_id: str) -> MemoryRecord | None: + return None + + def list_subjects_for_memory(self, memory_id: str) -> list[MemorySubject]: + return [] + + def find_trajectory(self, trajectory_id: str) -> Trajectory | None: + return trajectory if trajectory_id == trajectory.id else None + + results = semantic_search( + index=index, + provider=_PROVIDER, + store=_TrajectoryStore(), + audit_db_path=Path("missing.sqlite3"), + query="service workflow", + limit=5, + preview_chars=80, + ) + assert len(results) == 1 + assert results[0].source == "trajectory" + assert results[0].subject_path == "pkg/service.py" + assert results[0].preview + + +def test_semantic_search_skips_trajectory_without_store_method() -> None: + index = _FakeIndex( + [SemanticHit(source_id="traj-1", source="trajectory", score=0.5)] + ) + results = semantic_search( + index=index, + provider=_PROVIDER, + store=_StoreWithoutTrajectory(), + audit_db_path=Path("missing.sqlite3"), + query="workflow", + limit=5, + preview_chars=40, + ) + assert results == [] + + +def test_semantic_search_skips_unknown_trajectory_source_without_store() -> None: + index = _FakeIndex( + [SemanticHit(source_id="traj-1", source="trajectory", score=0.5)] + ) + results = semantic_search( + index=index, + provider=_PROVIDER, + store=None, + audit_db_path=Path("missing.sqlite3"), + query="workflow", + limit=5, + preview_chars=40, + ) + assert results == [] diff --git a/tests/test_memory_trajectory_coverage.py b/tests/test_memory_trajectory_coverage.py index 3b403fd6..b23b386c 100644 --- a/tests/test_memory_trajectory_coverage.py +++ b/tests/test_memory_trajectory_coverage.py @@ -16,6 +16,8 @@ from codeclone.memory.exceptions import MemoryContractError from codeclone.memory.trajectory.cli_render import ( render_projection_run, + render_trajectory_agents, + render_trajectory_anomalies, render_trajectory_detail, render_trajectory_list, render_trajectory_search_results, @@ -31,8 +33,11 @@ from codeclone.memory.trajectory.export_context import ( build_export_context, build_export_record, + extract_trajectory_citations, projection_version_rank, + resolve_export_scope_paths, select_canonical_trajectories, + trajectory_path_subjects, ) from codeclone.memory.trajectory.models import ( TRAJECTORY_PROJECTION_VERSION_V1, @@ -54,7 +59,17 @@ trajectory_subject_keys, ) -from .memory_fixtures import memory_store, seed_trajectory_audit_workflow +from .memory_fixtures import ( + memory_store, + seed_path_subject_record, + seed_trajectory_audit_workflow, +) + + +def _export_context(payload: dict[str, object]) -> dict[str, object]: + context = payload["context"] + assert isinstance(context, dict) + return context class _CapturePrinter: @@ -304,3 +319,169 @@ def test_trajectory_retrieval_helpers_handle_empty_query_and_subjects( ) assert "path" in keys assert keys["symbol"] == ("pkg.service",) + + +def test_cli_render_agents_and_anomalies_cover_populated_payloads() -> None: + printer = _CapturePrinter() + render_trajectory_agents( + console=printer, + payload={ + "agent_count": 1, + "trajectory_count": 2, + "unlabeled_trajectory_count": 0, + "agents": [ + { + "agent_label": "test-agent", + "trajectory_count": 2, + "intent_count": 1, + "failed_outcome_count": 0, + "anomaly_count": 1, + } + ], + }, + ) + render_trajectory_agents(console=printer, payload={"agents": []}) + render_trajectory_anomalies( + console=printer, + payload={ + "summary": { + "trajectories_with_anomalies": 1, + "anomaly_count": 2, + "error_count": 1, + "warn_count": 1, + }, + "trajectories": [ + { + "trajectory_id": "traj-1", + "agent_label": "test-agent", + "outcome": "accepted", + "quality_tier": "verified", + "anomalies": [ + { + "severity": "warn", + "kind": "scope_expanded", + "message": "expanded related files", + } + ], + } + ], + }, + ) + render_trajectory_anomalies(console=printer, payload={"trajectories": []}) + assert any("Agents:" in line for line in printer.lines) + assert any("Anomaly summary:" in line for line in printer.lines) + + +def test_export_context_record_citations_and_scope_paths(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + projection = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ) + trajectory = projection.trajectories[0] + profile = resolve_export_profile("agent-memory-retrieval-v1") + record = build_export_record( + trajectory=trajectory, + profile=profile, + project=project, + include_payloads=True, + enrichment={"context": "not-a-dict", "citations": "not-a-list"}, + scope_paths=("pkg/service.py",), + ) + assert isinstance(record["context"], dict) + assert record["citations"] == [] + assert "steps" in record + citations = extract_trajectory_citations(trajectory) + assert isinstance(citations, list) + scope = resolve_export_scope_paths(trajectory, patch_trail_payload=None) + assert scope + conn = store._conn + context_payload = build_export_context( + conn, + project_id=project.id, + trajectory=trajectory, + scope_paths=scope, + patch_trail_payload=None, + canonical_by_workflow={trajectory.workflow_id: trajectory}, + ) + assert _export_context(context_payload)["memory_precedents"] is not None + + +def test_export_scope_paths_and_precedents_from_patch_trail(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + projection = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ) + trajectory = projection.trajectories[0] + trail = compute_patch_trail(_patch_trail_inputs()) + scope = resolve_export_scope_paths( + replace(trajectory, subjects=()), + patch_trail_payload=trail.to_payload(detail_level="full"), + ) + assert scope + assert "pkg/service.py" in trajectory_path_subjects( + trajectory, relations={"about"} + ) + older = replace( + trajectory, + id="traj-older", + workflow_id="intent:intent-older", + finished_at_utc="2020-01-01T00:00:00Z", + started_at_utc="2020-01-01T00:00:00Z", + ) + context_payload = build_export_context( + store._conn, + project_id=project.id, + trajectory=trajectory, + scope_paths=("pkg/service.py",), + patch_trail_payload=trail.to_payload(detail_level="summary"), + canonical_by_workflow={ + trajectory.workflow_id: trajectory, + older.workflow_id: older, + }, + ) + export_ctx = _export_context(context_payload) + trajectory_precedents = export_ctx["trajectory_precedents"] + assert isinstance(trajectory_precedents, list) + citations = extract_trajectory_citations(trajectory) + assert isinstance(citations, list) + profile = resolve_export_profile("agent-change-control-v1") + enriched = build_export_record( + trajectory=trajectory, + profile=profile, + project=project, + include_payloads=False, + enrichment={ + "patch_trail_summary": {"declared": 1, "changed": 1}, + "context": {"memory_precedents": [], "trajectory_precedents": []}, + "citations": [{"kind": "finding", "cited_id": "f-1"}], + }, + scope_paths=("pkg/service.py",), + ) + assert enriched["patch_trail_summary"] == {"declared": 1, "changed": 1} + assert enriched["citations"] == [{"kind": "finding", "cited_id": "f-1"}] + + seed_path_subject_record( + store, + project_id=project.id, + path="pkg/other.py", + statement="overlap memory for export precedents", + ) + overlap_context = build_export_context( + store._conn, + project_id=project.id, + trajectory=trajectory, + scope_paths=("pkg/service.py", "pkg/other.py"), + patch_trail_payload=None, + canonical_by_workflow={trajectory.workflow_id: trajectory}, + ) + memory_precedents = _export_context(overlap_context)["memory_precedents"] + assert isinstance(memory_precedents, list) + assert memory_precedents diff --git a/tests/test_memory_trajectory_retrieval.py b/tests/test_memory_trajectory_retrieval.py index e87e5ce9..c00fd92a 100644 --- a/tests/test_memory_trajectory_retrieval.py +++ b/tests/test_memory_trajectory_retrieval.py @@ -19,8 +19,10 @@ from codeclone.memory.retrieval.context_coverage import build_context_coverage from codeclone.memory.trajectory.models import TrajectorySubject from codeclone.memory.trajectory.retrieval import ( + rank_trajectories_for_query, rank_trajectories_for_scope, serialize_trajectory_preview, + trajectory_status_payload, ) from codeclone.report.meta import current_report_timestamp_utc @@ -466,3 +468,68 @@ def test_trajectory_search_excludes_run_only_routine_by_default(tmp_path: Path) assert isinstance(include_payload, dict) assert default_payload.get("trajectory_count") == 0 assert (include_payload.get("trajectory_count") or 0) >= 1 + + +def test_trajectory_status_payload_without_latest_run() -> None: + payload = trajectory_status_payload(count=3, latest_run=None) + assert payload["trajectory_count"] == 3 + assert payload["latest_projection"] is None + + +def test_rank_trajectories_for_query_returns_matches(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + trajectory = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + + empty, truncated = rank_trajectories_for_query( + [trajectory], + query="", + max_results=5, + match_mode="any", + ) + assert empty == [] + assert truncated is False + + hits, truncated_hits = rank_trajectories_for_query( + [trajectory], + query="recover", + max_results=5, + match_mode="any", + ) + assert hits + assert isinstance(truncated_hits, bool) + + +def test_rank_trajectories_for_scope_with_patch_trail_and_long_summary( + tmp_path: Path, +) -> None: + from codeclone.memory.trajectory.patch_trail import compute_patch_trail + + from .test_memory_trajectory_coverage import _patch_trail_inputs + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + trajectory = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + trajectory = replace(trajectory, summary="z" * 400) + trail = compute_patch_trail(_patch_trail_inputs()) + previews, truncated = rank_trajectories_for_scope( + [trajectory], + scope_paths=("pkg/service.py",), + symbols=("pkg.service",), + max_results=5, + patch_trails={trajectory.id: trail.to_payload(detail_level="summary")}, + detail_level="compact", + ) + assert previews + assert isinstance(truncated, bool) + assert "…" in str(previews[0]["summary"]) diff --git a/tests/test_observability_profile.py b/tests/test_observability_profile.py index 8fd1dcf3..12b0cda2 100644 --- a/tests/test_observability_profile.py +++ b/tests/test_observability_profile.py @@ -16,7 +16,11 @@ from codeclone.config.observability import ObservabilityConfig from codeclone.observability import bootstrap, operation, shutdown, span -from codeclone.observability.profile import build_profile_sample +from codeclone.observability.profile import ( + build_profile_sample, + capture_rss_cpu, + worker_bootstrap_sample, +) from codeclone.observability.store.schema import ( observability_store_path, open_observability_store, @@ -78,3 +82,43 @@ def test_profile_false_leaves_columns_null(tmp_path: Path) -> None: conn.close() assert op_row == (None, None, None) assert span_row[0] is None + + +def test_worker_bootstrap_sample_and_capture_rss_cpu_return_values() -> None: + bootstrap = worker_bootstrap_sample() + assert bootstrap is not None + created_iso, elapsed_ms = bootstrap + assert created_iso.endswith("Z") + assert elapsed_ms >= 0.0 + + snapshot = capture_rss_cpu() + assert snapshot is not None + rss, user_cpu, system_cpu = snapshot + assert rss > 0 + assert user_cpu >= 0.0 + assert system_cpu >= 0.0 + + +def test_profile_helpers_return_none_without_psutil( + monkeypatch: pytest.MonkeyPatch, +) -> None: + import builtins + from collections.abc import Mapping, Sequence + + real_import = builtins.__import__ + + def _import( + name: str, + globals: Mapping[str, object] | None = None, + locals: Mapping[str, object] | None = None, + fromlist: Sequence[str] = (), + level: int = 0, + ) -> object: + if name == "psutil": + raise ImportError("psutil unavailable in test") + return real_import(name, globals, locals, fromlist, level) + + monkeypatch.setattr(builtins, "__import__", _import) + assert worker_bootstrap_sample() is None + assert capture_rss_cpu() is None + assert build_profile_sample((0, 0.0, 0.0)) is None diff --git a/tests/test_observability_reason_kind.py b/tests/test_observability_reason_kind.py new file mode 100644 index 00000000..2f2cddab --- /dev/null +++ b/tests/test_observability_reason_kind.py @@ -0,0 +1,22 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import pytest + +from codeclone.observability.reason_kind import REASON_KINDS, validate_reason_kind + + +def test_validate_reason_kind_accepts_known_and_none() -> None: + assert validate_reason_kind(None) is None + for kind in sorted(REASON_KINDS): + assert validate_reason_kind(kind) == kind + + +def test_validate_reason_kind_rejects_unknown() -> None: + with pytest.raises(ValueError, match="unknown reason_kind"): + validate_reason_kind("not-a-kind") From 3129c0528595d25f537d30c5703ec21160c9285a Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 23:27:58 +0500 Subject: [PATCH 241/318] feat(core): centralize DB query instrumentation via read-only openers Route raw sqlite3.connect bypasses (audit reader, intent gate, semantic sources, staleness) through instrumented domain openers so the per-span db_queries/db_writes counters cover read paths, not just writes. Add open_sqlite_db_readonly plus read-only audit/intent openers (mode=ro URI + query_only pragma) that still attach the trace counter. The projection worker now runs on a single SqliteEngineeringMemoryStore session instead of reopening a connection per step. --- codeclone/audit/reader.py | 92 ++++++---- codeclone/audit/schema.py | 55 +++++- codeclone/memory/jobs/staleness.py | 3 +- codeclone/memory/jobs/worker.py | 12 +- codeclone/memory/jobs/workflow.py | 27 +-- codeclone/memory/retrieval/semantic.py | 10 +- codeclone/memory/semantic/sources.py | 8 +- codeclone/surfaces/cli/workflow.py | 8 +- .../surfaces/mcp/_workspace_intent_schema.py | 50 +++++- codeclone/utils/sqlite_store.py | 25 +++ codeclone/workspace_intent/gate.py | 12 +- tests/conftest.py | 8 + tests/test_audit_reader.py | 12 +- tests/test_memory_experience_distillation.py | 2 +- tests/test_memory_jobs_coverage.py | 112 ++++++------- tests/test_observability_correlation.py | 3 +- tests/test_observability_worker_chain.py | 17 +- tests/test_sqlite_readonly_openers.py | 157 ++++++++++++++++++ tests/test_workspace_intent_gate.py | 13 +- 19 files changed, 483 insertions(+), 143 deletions(-) create mode 100644 tests/test_sqlite_readonly_openers.py diff --git a/codeclone/audit/reader.py b/codeclone/audit/reader.py index 00d066a5..ca186ab3 100644 --- a/codeclone/audit/reader.py +++ b/codeclone/audit/reader.py @@ -19,7 +19,7 @@ EVENT_ANALYSIS_COMPLETED, repo_root_digest, ) -from .schema import ensure_schema, get_meta +from .schema import get_meta, open_audit_db_readonly from .validation import AuditReadError, AuditSchemaError @@ -142,11 +142,10 @@ def read_latest_analysis_run( return None digest = repo_root_digest(repo_root.resolve()) try: - conn = sqlite3.connect(str(db_path)) - except sqlite3.Error as exc: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: raise AuditReadError(f"cannot open audit database: {exc}") from exc try: - ensure_schema(conn) row = conn.execute( "SELECT run_id, created_at_utc, payload_json " "FROM controller_events " @@ -190,11 +189,10 @@ def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: if not db_path.is_file(): raise AuditReadError("no audit data") try: - conn = sqlite3.connect(str(db_path)) - except sqlite3.Error as exc: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: raise AuditReadError(f"cannot open audit database: {exc}") from exc try: - ensure_schema(conn) retention_days = _int_meta(conn, "retention_days") total = _count(conn, "SELECT COUNT(*) FROM controller_events") intent_events = _count( @@ -223,13 +221,13 @@ def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: ) oldest = _text_scalar(conn, "SELECT MIN(created_at_utc) FROM controller_events") latest = _text_scalar(conn, "SELECT MAX(created_at_utc) FROM controller_events") - token_cols = _has_token_columns(conn) + event_columns = _event_columns(conn) + token_cols = _has_token_columns(event_columns) + select_prefix = _audit_record_select_prefix(event_columns) if token_cols: rows = conn.execute( - "SELECT id, event_id, event_type, severity, created_at_utc, run_id, " - "intent_id, report_digest, workflow_id, surface, tool_name, " - "event_core_json, event_core_sha256, payload_sha256, " - "status, agent_label, summary, " + f"SELECT {select_prefix}, " + f"{_column_or_null(event_columns, 'summary')}, " "estimated_tokens, token_encoding, payload_characters " "FROM controller_events " "ORDER BY created_at_utc DESC, id DESC " @@ -239,10 +237,7 @@ def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: token_summary = _token_summary(conn) else: rows = conn.execute( - "SELECT id, event_id, event_type, severity, created_at_utc, run_id, " - "intent_id, report_digest, workflow_id, surface, tool_name, " - "event_core_json, event_core_sha256, payload_sha256, " - "status, agent_label " + f"SELECT {select_prefix} " "FROM controller_events " "ORDER BY created_at_utc DESC, id DESC " "LIMIT ?", @@ -285,11 +280,10 @@ def read_audit_event_core_records( if not db_path.is_file(): raise AuditReadError("no audit data") try: - conn = sqlite3.connect(str(db_path)) - except sqlite3.Error as exc: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: raise AuditReadError(f"cannot open audit database: {exc}") from exc try: - ensure_schema(conn) where = [ "repo_root_digest = ?", "workflow_id IS NOT NULL", @@ -335,11 +329,10 @@ def list_workflow_ids_with_events_after( if not db_path.is_file(): return () try: - conn = sqlite3.connect(str(db_path)) - except sqlite3.Error as exc: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: raise AuditReadError(f"cannot open audit database: {exc}") from exc try: - ensure_schema(conn) rows = conn.execute( "SELECT DISTINCT workflow_id FROM controller_events " "WHERE repo_root_digest = ? AND id > ? " @@ -365,11 +358,10 @@ def count_audit_event_core_gaps( if not db_path.is_file(): return 0 try: - conn = sqlite3.connect(str(db_path)) - except sqlite3.Error as exc: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: raise AuditReadError(f"cannot open audit database: {exc}") from exc try: - ensure_schema(conn) row = conn.execute( "SELECT COUNT(*) FROM controller_events " "WHERE repo_root_digest = ? " @@ -410,13 +402,51 @@ def _record_from_row(row: tuple[object, ...]) -> AuditRecord: ) -def _has_token_columns(conn: sqlite3.Connection) -> bool: - """Check whether the controller_events table has token columns.""" - columns = { - row[1] +def _event_columns(conn: sqlite3.Connection) -> frozenset[str]: + return frozenset( + str(row[1]) for row in conn.execute("PRAGMA table_info(controller_events)").fetchall() - } - return "estimated_tokens" in columns + if len(row) > 1 + ) + + +def _has_token_columns(columns: frozenset[str]) -> bool: + """Return whether the complete token-accounting projection is readable.""" + + return { + "estimated_tokens", + "token_encoding", + "payload_characters", + }.issubset(columns) + + +def _column_or_null(columns: frozenset[str], name: str) -> str: + return name if name in columns else f"NULL AS {name}" + + +def _audit_record_select_prefix(columns: frozenset[str]) -> str: + required = ( + "id", + "event_id", + "event_type", + "severity", + "created_at_utc", + "run_id", + "intent_id", + "report_digest", + ) + optional = ( + "workflow_id", + "surface", + "tool_name", + "event_core_json", + "event_core_sha256", + "payload_sha256", + ) + suffix = ("status", "agent_label") + return ", ".join( + (*required, *(_column_or_null(columns, name) for name in optional), *suffix) + ) def _token_summary( diff --git a/codeclone/audit/schema.py b/codeclone/audit/schema.py index 02af095a..867031ac 100644 --- a/codeclone/audit/schema.py +++ b/codeclone/audit/schema.py @@ -15,6 +15,7 @@ get_meta_value, initialize_schema_v1, open_sqlite_db, + open_sqlite_db_readonly, ) from .validation import AUDIT_SCHEMA_VERSION, AuditSchemaError @@ -97,9 +98,41 @@ ("agent_start_epoch", "INTEGER"), ) +_READABLE_EVENT_COLUMNS = frozenset( + { + "id", + "event_id", + "event_type", + "severity", + "created_at_utc", + "repo_root_digest", + "run_id", + "intent_id", + "report_digest", + "agent_label", + "agent_pid", + "status", + "payload_json", + } +) + def open_audit_db(path: Path) -> sqlite3.Connection: - return open_sqlite_db(path, ensure_schema=ensure_schema) + conn = open_sqlite_db(path, ensure_schema=ensure_schema) + from ..observability import instrument_db_connection + + instrument_db_connection(conn) + return conn + + +def open_audit_db_readonly(path: Path) -> sqlite3.Connection: + """Open a structurally readable audit database without mutating it.""" + + conn = open_sqlite_db_readonly(path, validate_schema=_validate_readonly_schema) + from ..observability import instrument_db_connection + + instrument_db_connection(conn) + return conn def ensure_schema(conn: sqlite3.Connection) -> None: @@ -157,6 +190,25 @@ def _ensure_event_columns(conn: sqlite3.Connection) -> None: conn.commit() +def _validate_readonly_schema(conn: sqlite3.Connection) -> None: + current = get_meta(conn, "schema_version") + if current is not None and current not in _MIGRATABLE_VERSIONS: + raise AuditSchemaError(f"Unsupported audit schema version: {current}") + missing = sorted(_READABLE_EVENT_COLUMNS - _event_columns(conn)) + if missing: + raise AuditSchemaError( + "Audit database is missing required columns: " + ", ".join(missing) + ) + + +def _event_columns(conn: sqlite3.Connection) -> frozenset[str]: + return frozenset( + str(row[1]) + for row in conn.execute("PRAGMA table_info(controller_events)").fetchall() + if len(row) > 1 + ) + + def _set_meta(conn: sqlite3.Connection, key: str, value: str) -> None: conn.execute( f"INSERT OR REPLACE INTO {_AUDIT_META_TABLE}(key, value) VALUES (?, ?)", @@ -174,4 +226,5 @@ def get_meta(conn: sqlite3.Connection, key: str) -> str | None: "ensure_schema", "get_meta", "open_audit_db", + "open_audit_db_readonly", ] diff --git a/codeclone/memory/jobs/staleness.py b/codeclone/memory/jobs/staleness.py index 2ddb408a..db99235a 100644 --- a/codeclone/memory/jobs/staleness.py +++ b/codeclone/memory/jobs/staleness.py @@ -15,6 +15,7 @@ from ...audit.events import repo_root_digest from ...audit.reader import count_audit_event_core_gaps +from ...audit.schema import open_audit_db_readonly from ...audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path from ...config.memory import MemoryConfig from ..models import MemoryProject @@ -33,7 +34,7 @@ def _audit_event_core_fingerprint( "event_core_max_id": 0, "legacy_event_count": 0, } - conn = sqlite3.connect(str(audit_db_path)) + conn = open_audit_db_readonly(audit_db_path) try: row = conn.execute( "SELECT COUNT(*), COALESCE(MAX(id), 0) FROM controller_events " diff --git a/codeclone/memory/jobs/worker.py b/codeclone/memory/jobs/worker.py index a4b52f9f..f17825f3 100644 --- a/codeclone/memory/jobs/worker.py +++ b/codeclone/memory/jobs/worker.py @@ -24,6 +24,7 @@ from ..experience.distillation_workflow import execute_experience_distillation from ..models import MemoryProject from ..semantic.rebuild_workflow import execute_semantic_index_rebuild +from ..sqlite_store import SqliteEngineeringMemoryStore from ..trajectory.rebuild_workflow import execute_trajectory_rebuild from .models import ProjectionJobStatus from .store import claim_next_projection_job as _claim_next @@ -140,7 +141,7 @@ def _emit_worker_bootstrap_span() -> None: def run_projection_job( - conn: sqlite3.Connection, + store: SqliteEngineeringMemoryStore, *, job_id: str, root_path: Path, @@ -148,6 +149,7 @@ def run_projection_job( project: MemoryProject, stimulus: Mapping[str, object], ) -> tuple[ProjectionJobStatus, dict[str, object], str | None]: + conn = store.connection correlation_id, parent_operation_id = _correlation_handoff() with operation( name="memory.projection.job", @@ -169,6 +171,7 @@ def run_projection_job( trajectory_payload = execute_trajectory_rebuild( root_path=root_path, config=config, + store=store, project=project, incremental_after_event_core_id=watermark, ) @@ -179,6 +182,7 @@ def run_projection_job( semantic_payload = execute_semantic_index_rebuild( root_path=root_path, config=config, + store=store, project=project, ) semantic_span.set_counter( @@ -194,6 +198,7 @@ def run_projection_job( experience_payload = execute_experience_distillation( root_path=root_path, config=config, + store=store, project=project, ) experience_span.set_counter( @@ -221,13 +226,14 @@ def run_projection_job( def run_projection_jobs_once( - conn: sqlite3.Connection, + store: SqliteEngineeringMemoryStore, *, root_path: Path, config: MemoryConfig, project: MemoryProject, running_timeout_seconds: int, ) -> ProjectionWorkerResult: + conn = store.connection claimed = _claim_next( conn, project_id=project.id, @@ -247,7 +253,7 @@ def run_projection_jobs_once( stimulus = parse_stimulus_json(claimed.stimulus_json) try: final_status, result, error = run_projection_job( - conn, + store, job_id=claimed.id, root_path=root_path, config=config, diff --git a/codeclone/memory/jobs/workflow.py b/codeclone/memory/jobs/workflow.py index 497eb512..0d2d9304 100644 --- a/codeclone/memory/jobs/workflow.py +++ b/codeclone/memory/jobs/workflow.py @@ -6,7 +6,6 @@ from __future__ import annotations -import sqlite3 from pathlib import Path from typing import Literal @@ -23,7 +22,7 @@ from ..exceptions import MemoryContractError from ..models import MemoryProject from ..project import resolve_memory_db_path, resolve_project_identity -from ..schema import open_memory_db +from ..sqlite_store import SqliteEngineeringMemoryStore from .models import ProjectionJobRecord from .spawn import spawn_projection_jobs_worker from .staleness import ( @@ -42,10 +41,10 @@ ProjectionRebuildPolicy = Literal["off", "enqueue_when_stale"] -def _require_memory_db_session( +def _require_memory_store_session( root_path: Path, config: MemoryConfig | None = None, -) -> tuple[Path, MemoryConfig, MemoryProject, sqlite3.Connection]: +) -> tuple[Path, MemoryConfig, MemoryProject, SqliteEngineeringMemoryStore]: resolved_root = root_path.resolve() resolved_config = config or resolve_memory_config(resolved_root) db_path = resolve_memory_db_path(resolved_root, resolved_config) @@ -55,8 +54,8 @@ def _require_memory_db_session( "Run memory init or refresh_from_run first." ) project = resolve_project_identity(resolved_root) - conn = open_memory_db(db_path) - return resolved_root, resolved_config, project, conn + store = SqliteEngineeringMemoryStore(db_path) + return resolved_root, resolved_config, project, store def execute_projection_rebuild_status( @@ -65,10 +64,11 @@ def execute_projection_rebuild_status( config: MemoryConfig | None = None, limit: int = 10, ) -> dict[str, object]: - resolved_root, resolved_config, project, conn = _require_memory_db_session( + resolved_root, resolved_config, project, store = _require_memory_store_session( root_path, config=config, ) + conn = store.connection try: current = compute_projection_stimulus( conn=conn, @@ -80,7 +80,7 @@ def execute_projection_rebuild_status( active = pending_projection_job(conn, project_id=project.id) jobs = list_projection_jobs(conn, project_id=project.id, limit=limit) finally: - conn.close() + store.close() return { "action": "projection_rebuild_status", "policy": resolved_config.projection_rebuild_policy, @@ -118,10 +118,11 @@ def execute_enqueue_projection_rebuild( "job_id": None, "spawned": False, } - resolved_root, resolved_config, project, conn = _require_memory_db_session( + resolved_root, resolved_config, project, store = _require_memory_store_session( root_path, config=resolved_config, ) + conn = store.connection try: stimulus = compute_projection_stimulus( conn=conn, @@ -156,7 +157,7 @@ def execute_enqueue_projection_rebuild( ), ) finally: - conn.close() + store.close() base_should_spawn = ( resolved_config.projection_rebuild_spawn_worker if spawn_worker is None @@ -203,7 +204,7 @@ def execute_run_projection_jobs_once( root_path: Path, config: MemoryConfig | None = None, ) -> dict[str, object]: - resolved_root, resolved_config, project, conn = _require_memory_db_session( + resolved_root, resolved_config, project, store = _require_memory_store_session( root_path, config=config, ) @@ -215,7 +216,7 @@ def execute_run_projection_jobs_once( bootstrap(resolve_observability_config(), root=resolved_root) try: worker_result = run_projection_jobs_once( - conn, + store, root_path=resolved_root, config=resolved_config, project=project, @@ -224,7 +225,7 @@ def execute_run_projection_jobs_once( ), ) finally: - conn.close() + store.close() if owns_observability: shutdown() return { diff --git a/codeclone/memory/retrieval/semantic.py b/codeclone/memory/retrieval/semantic.py index 02c7084a..a1a744b3 100644 --- a/codeclone/memory/retrieval/semantic.py +++ b/codeclone/memory/retrieval/semantic.py @@ -11,6 +11,8 @@ from pathlib import Path from typing import TYPE_CHECKING, Protocol, cast +from ...audit.schema import open_audit_db_readonly +from ...audit.validation import AuditSchemaError from ..embedding import embed_query from ..semantic.models import SemanticSearchResult @@ -103,7 +105,7 @@ def _hydrate_audit( def _hydrate_trajectory( - hit: SemanticHit, store: _RecordStore, preview_chars: int + hit: SemanticHit, store: object, preview_chars: int ) -> SemanticSearchResult | None: find_trajectory = getattr(store, "find_trajectory", None) if not callable(find_trajectory): @@ -130,8 +132,8 @@ def audit_event_row( if not audit_db_path.is_file(): return None try: - conn = sqlite3.connect(str(audit_db_path)) - except sqlite3.Error: + conn = open_audit_db_readonly(audit_db_path) + except (sqlite3.Error, AuditSchemaError, OSError): return None try: row = conn.execute( @@ -139,7 +141,7 @@ def audit_event_row( "WHERE event_id = ?", (event_id,), ).fetchone() - except sqlite3.Error: + except (sqlite3.Error, AuditSchemaError): return None finally: conn.close() diff --git a/codeclone/memory/semantic/sources.py b/codeclone/memory/semantic/sources.py index e25f7728..379598db 100644 --- a/codeclone/memory/semantic/sources.py +++ b/codeclone/memory/semantic/sources.py @@ -11,6 +11,8 @@ from pathlib import Path from typing import Protocol +from ...audit.schema import open_audit_db_readonly +from ...audit.validation import AuditSchemaError from ..models import MemoryQuery, MemoryRecord, MemorySubject from ..trajectory.models import Trajectory, TrajectoryListItem from .models import SemanticProjection @@ -171,8 +173,8 @@ def _read_projections(self) -> Iterator[SemanticProjection]: event_types = tuple(sorted(INDEXED_AUDIT_EVENTS)) placeholders = ", ".join("?" for _ in event_types) try: - conn = sqlite3.connect(str(self._db_path)) - except sqlite3.Error: + conn = open_audit_db_readonly(self._db_path) + except (sqlite3.Error, AuditSchemaError, OSError): return try: rows = conn.execute( @@ -182,7 +184,7 @@ def _read_projections(self) -> Iterator[SemanticProjection]: "ORDER BY created_at_utc ASC, id ASC", event_types, ).fetchall() - except sqlite3.Error: + except (sqlite3.Error, AuditSchemaError): return finally: conn.close() diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index 2ad54fc8..920fc245 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -9,6 +9,7 @@ import sys import time from pathlib import Path +from typing import Protocol from ... import __version__ from ... import ui_messages as ui @@ -52,6 +53,11 @@ _CLI_SESSION_START_EPOCH = int(time.time()) + +class _AuditEnabledArgs(Protocol): + audit_enabled: bool + + __all__ = [ "LEGACY_CACHE_PATH", "Baseline", @@ -756,7 +762,7 @@ def _main_impl() -> None: def _emit_cli_analysis_completed_if_enabled( *, - args: CLIArgsLike, + args: _AuditEnabledArgs, root_path: Path, report_document: object, new_func_count: int, diff --git a/codeclone/surfaces/mcp/_workspace_intent_schema.py b/codeclone/surfaces/mcp/_workspace_intent_schema.py index f1c87cd8..d4f93efb 100644 --- a/codeclone/surfaces/mcp/_workspace_intent_schema.py +++ b/codeclone/surfaces/mcp/_workspace_intent_schema.py @@ -15,6 +15,7 @@ get_meta_value, initialize_schema_v1, open_sqlite_db, + open_sqlite_db_readonly, ) INTENT_REGISTRY_SCHEMA_VERSION = "2" @@ -50,13 +51,39 @@ "ON workspace_intents(closed_at_utc)", ) +_REQUIRED_INTENT_COLUMNS = frozenset( + { + "agent_pid", + "agent_start_epoch", + "intent_id", + "declared_at_utc", + "payload_json", + "closed_at_utc", + "updated_at_utc", + } +) + class IntentRegistrySchemaError(RuntimeError): """Raised for unsupported or corrupt intent registry database schemas.""" def open_intent_registry_db(path: Path) -> sqlite3.Connection: - return open_sqlite_db(path, ensure_schema=ensure_schema) + conn = open_sqlite_db(path, ensure_schema=ensure_schema) + from ...observability import instrument_db_connection + + instrument_db_connection(conn) + return conn + + +def open_intent_registry_db_readonly(path: Path) -> sqlite3.Connection: + """Open a current registry without creating or migrating coordination state.""" + + conn = open_sqlite_db_readonly(path, validate_schema=_validate_readonly_schema) + from ...observability import instrument_db_connection + + instrument_db_connection(conn) + return conn def ensure_schema(conn: sqlite3.Connection) -> None: @@ -114,6 +141,26 @@ def _migrate_v1_to_v2(conn: sqlite3.Connection) -> None: conn.commit() +def _validate_readonly_schema(conn: sqlite3.Connection) -> None: + current = get_meta(conn, "schema_version") + if current != INTENT_REGISTRY_SCHEMA_VERSION: + rendered = current if current is not None else "missing" + raise IntentRegistrySchemaError( + "Intent registry requires writable schema initialization or migration: " + f"found {rendered}, expected {INTENT_REGISTRY_SCHEMA_VERSION}" + ) + columns = { + str(row[1]) + for row in conn.execute("PRAGMA table_info(workspace_intents)").fetchall() + if len(row) > 1 + } + missing = sorted(_REQUIRED_INTENT_COLUMNS - columns) + if missing: + raise IntentRegistrySchemaError( + "Intent registry is missing required columns: " + ", ".join(missing) + ) + + def get_meta(conn: sqlite3.Connection, key: str) -> str | None: return get_meta_value(conn, meta_table=_INTENT_META_TABLE, key=key) @@ -125,4 +172,5 @@ def get_meta(conn: sqlite3.Connection, key: str) -> str | None: "ensure_schema", "get_meta", "open_intent_registry_db", + "open_intent_registry_db_readonly", ] diff --git a/codeclone/utils/sqlite_store.py b/codeclone/utils/sqlite_store.py index f017251d..9492c83c 100644 --- a/codeclone/utils/sqlite_store.py +++ b/codeclone/utils/sqlite_store.py @@ -9,6 +9,7 @@ import sqlite3 from collections.abc import Callable, Mapping, Sequence from pathlib import Path +from urllib.parse import quote _SQLITE_PRAGMAS = ( "PRAGMA journal_mode=WAL", @@ -66,6 +67,29 @@ def open_sqlite_db( return conn +def open_sqlite_db_readonly( + path: Path, + *, + validate_schema: Callable[[sqlite3.Connection], None], +) -> sqlite3.Connection: + """Open an existing SQLite database without allowing writes or creation.""" + + resolved = path.resolve(strict=True) + uri = f"file:{quote(str(resolved), safe='/')}?mode=ro" + conn = sqlite3.connect( + uri, + uri=True, + ) + try: + conn.execute("PRAGMA query_only=ON") + conn.execute("PRAGMA busy_timeout=5000") + validate_schema(conn) + except Exception: + conn.close() + raise + return conn + + def get_meta_value( conn: sqlite3.Connection, *, @@ -108,4 +132,5 @@ def initialize_schema_v1( "get_meta_value", "initialize_schema_v1", "open_sqlite_db", + "open_sqlite_db_readonly", ] diff --git a/codeclone/workspace_intent/gate.py b/codeclone/workspace_intent/gate.py index e0279345..0115c7ac 100644 --- a/codeclone/workspace_intent/gate.py +++ b/codeclone/workspace_intent/gate.py @@ -21,7 +21,6 @@ from datetime import datetime from pathlib import Path from typing import Literal -from urllib.parse import quote from codeclone.config.intent_registry import ( IntentRegistryConfig, @@ -45,6 +44,10 @@ record_sort_key, registry_files, ) +from codeclone.surfaces.mcp._workspace_intent_schema import ( + IntentRegistrySchemaError, + open_intent_registry_db_readonly, +) GateReason = Literal[ "active_intent", @@ -107,7 +110,7 @@ def evaluate_workspace_edit_gate(root: Path | str) -> WorkspaceEditGateDecision: try: records = _load_registry_records_read_only(root_path, config) - except (OSError, sqlite3.Error, ValueError) as exc: + except (OSError, sqlite3.Error, IntentRegistrySchemaError, ValueError) as exc: return WorkspaceEditGateDecision( allowed=False, reason="registry_error", @@ -240,7 +243,7 @@ def _list_unclosed_workspace_intents_filtered( try: records = _load_registry_records_read_only(root_path, config) - except (OSError, sqlite3.Error, ValueError) as exc: + except (OSError, sqlite3.Error, IntentRegistrySchemaError, ValueError) as exc: raise WorkspaceIntentRegistryUnavailable(str(exc)) from exc current_time = utc_now() @@ -373,8 +376,7 @@ def _load_file_records(root: Path) -> tuple[WorkspaceIntentRecord, ...]: def _load_sqlite_records(db_path: Path) -> tuple[WorkspaceIntentRecord, ...]: if not db_path.is_file(): return () - uri = f"file:{quote(str(db_path), safe='/')}?mode=ro" - conn = sqlite3.connect(uri, uri=True) + conn = open_intent_registry_db_readonly(db_path) try: rows = conn.execute( """ diff --git a/tests/conftest.py b/tests/conftest.py index efe40d9c..1e23d097 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,6 +28,14 @@ def _clear_workspace_intent_store_cache() -> Generator[None, None, None]: clear_workspace_intent_store_cache() +@pytest.fixture(autouse=True) +def _reset_observability_runtime() -> Generator[None, None, None]: + yield + from codeclone.observability.runtime import shutdown + + shutdown() + + @pytest.fixture def report_meta_factory() -> ReportMetaFactory: def _make(**overrides: object) -> dict[str, object]: diff --git a/tests/test_audit_reader.py b/tests/test_audit_reader.py index 9851ded1..1743d3b1 100644 --- a/tests/test_audit_reader.py +++ b/tests/test_audit_reader.py @@ -73,10 +73,18 @@ def test_read_latest_analysis_run_connect_error(tmp_path: Path) -> None: def test_read_latest_analysis_run_read_error(tmp_path: Path) -> None: db_path = _write_cli_analysis_event(tmp_path) + + class _FailingConnection: + def execute(self, *_args: object, **_kwargs: object) -> object: + raise sqlite3.Error("query failed") + + def close(self) -> None: + return None + with ( patch( - "codeclone.audit.reader.ensure_schema", - side_effect=sqlite3.Error("query failed"), + "codeclone.audit.reader.open_audit_db_readonly", + return_value=_FailingConnection(), ), pytest.raises(AuditReadError, match="cannot read audit database"), ): diff --git a/tests/test_memory_experience_distillation.py b/tests/test_memory_experience_distillation.py index 43abc862..0b57696b 100644 --- a/tests/test_memory_experience_distillation.py +++ b/tests/test_memory_experience_distillation.py @@ -107,7 +107,7 @@ def test_projection_job_includes_experience_step(tmp_path: Path) -> None: config = replace(resolve_memory_config(root), trajectories_enabled=False) _final_status, result, _error = run_projection_job( - store.connection, + store, job_id="job-1", root_path=root, config=config, diff --git a/tests/test_memory_jobs_coverage.py b/tests/test_memory_jobs_coverage.py index 5b9cbcfc..b5019b8d 100644 --- a/tests/test_memory_jobs_coverage.py +++ b/tests/test_memory_jobs_coverage.py @@ -267,7 +267,7 @@ def test_store_list_and_latest_done_projection_job(tmp_path: Path) -> None: def test_run_projection_job_failed_and_skipped() -> None: project = MagicMock() config = MagicMock() - conn = MagicMock() + store = MagicMock() with ( patch( "codeclone.memory.jobs.worker.execute_trajectory_rebuild", @@ -283,7 +283,7 @@ def test_run_projection_job_failed_and_skipped() -> None: ), ): status, _payload, reason = run_projection_job( - conn, + store, job_id="job-1", root_path=Path("/tmp"), config=config, @@ -308,7 +308,7 @@ def test_run_projection_job_failed_and_skipped() -> None: ), ): status, _payload, reason = run_projection_job( - conn, + store, job_id="job-2", root_path=Path("/tmp"), config=config, @@ -320,36 +320,31 @@ def test_run_projection_job_failed_and_skipped() -> None: def test_run_projection_jobs_once_handles_worker_exception(tmp_path: Path) -> None: - with cli_memory_repo(tmp_path, with_draft=False) as (root, project, _store): + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, store): config = resolve_memory_config(root) - db_path = resolve_memory_db_path(root, config) - conn = open_memory_db(db_path) - try: - stimulus = compute_projection_stimulus( - conn=conn, - project=project, + stimulus = compute_projection_stimulus( + conn=store.connection, + project=project, + root_path=root, + config=config, + ) + enqueue_projection_job( + store.connection, + project=project, + trigger="cli", + stimulus=stimulus, + ) + with patch( + "codeclone.memory.jobs.worker.run_projection_job", + side_effect=RuntimeError("boom"), + ): + result = run_projection_jobs_once( + store, root_path=root, config=config, - ) - enqueue_projection_job( - conn, project=project, - trigger="cli", - stimulus=stimulus, + running_timeout_seconds=60, ) - with patch( - "codeclone.memory.jobs.worker.run_projection_job", - side_effect=RuntimeError("boom"), - ): - result = run_projection_jobs_once( - conn, - root_path=root, - config=config, - project=project, - running_timeout_seconds=60, - ) - finally: - conn.close() assert result.status == "failed" assert result.reason == "boom" @@ -431,44 +426,39 @@ def test_is_ci_environment_detects_common_keys() -> None: def test_run_projection_jobs_once_completes_pending_job(tmp_path: Path) -> None: - with cli_memory_repo(tmp_path, with_draft=False) as (root, project, _store): + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, store): config = resolve_memory_config(root) - db_path = resolve_memory_db_path(root, config) - conn = open_memory_db(db_path) - try: - stimulus = compute_projection_stimulus( - conn=conn, - project=project, + stimulus = compute_projection_stimulus( + conn=store.connection, + project=project, + root_path=root, + config=config, + ) + enqueue_projection_job( + store.connection, + project=project, + trigger="cli", + stimulus=stimulus, + ) + with patch( + "codeclone.memory.jobs.worker.run_projection_job", + return_value=( + "done", + { + "trajectory": {"status": "ok"}, + "semantic": {"status": "skipped"}, + "applied_stimulus": stimulus, + }, + None, + ), + ): + result = run_projection_jobs_once( + store, root_path=root, config=config, - ) - enqueue_projection_job( - conn, project=project, - trigger="cli", - stimulus=stimulus, + running_timeout_seconds=60, ) - with patch( - "codeclone.memory.jobs.worker.run_projection_job", - return_value=( - "done", - { - "trajectory": {"status": "ok"}, - "semantic": {"status": "skipped"}, - "applied_stimulus": stimulus, - }, - None, - ), - ): - result = run_projection_jobs_once( - conn, - root_path=root, - config=config, - project=project, - running_timeout_seconds=60, - ) - finally: - conn.close() assert result.status == "done" assert result.job_id is not None diff --git a/tests/test_observability_correlation.py b/tests/test_observability_correlation.py index bccb6c7a..05b41211 100644 --- a/tests/test_observability_correlation.py +++ b/tests/test_observability_correlation.py @@ -62,8 +62,9 @@ def test_run_projection_job_links_under_finish( ) bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + store = MagicMock() worker.run_projection_job( - MagicMock(), + store, job_id="j1", root_path=tmp_path, config=MagicMock(), diff --git a/tests/test_observability_worker_chain.py b/tests/test_observability_worker_chain.py index ef38a172..7a45df14 100644 --- a/tests/test_observability_worker_chain.py +++ b/tests/test_observability_worker_chain.py @@ -17,8 +17,6 @@ from codeclone.config.observability import ObservabilityConfig from codeclone.memory.jobs import worker as worker_module from codeclone.memory.jobs.worker import run_projection_job -from codeclone.memory.project import resolve_memory_db_path -from codeclone.memory.schema import open_memory_db from codeclone.observability import bootstrap, shutdown from codeclone.observability.store.schema import ( observability_store_path, @@ -35,9 +33,8 @@ def _reset_runtime() -> Iterator[None]: def test_run_projection_job_emits_operation_and_spans(tmp_path: Path) -> None: - with cli_memory_repo(tmp_path, with_draft=False) as (root, project, _store): + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, store): config = resolve_memory_config(root) - conn = open_memory_db(resolve_memory_db_path(root, config)) bootstrap(ObservabilityConfig(enabled=True), root=root) try: with ( @@ -45,7 +42,7 @@ def test_run_projection_job_emits_operation_and_spans(tmp_path: Path) -> None: worker_module, "execute_trajectory_rebuild", return_value={"status": "ok", "mode": "full", "workflows_seen": 7}, - ), + ) as trajectory_rebuild, patch.object( worker_module, "execute_semantic_index_rebuild", @@ -54,15 +51,15 @@ def test_run_projection_job_emits_operation_and_spans(tmp_path: Path) -> None: "embedded": 1423, "skipped_unchanged": 11, }, - ), + ) as semantic_rebuild, patch.object( worker_module, "execute_experience_distillation", return_value={"status": "ok", "experiences_distilled": 3}, - ), + ) as experience_distillation, ): status, _result, _reason = run_projection_job( - conn, + store, job_id="job-1", root_path=root, config=config, @@ -70,10 +67,12 @@ def test_run_projection_job_emits_operation_and_spans(tmp_path: Path) -> None: stimulus={}, ) finally: - conn.close() shutdown() assert status == "done" + assert trajectory_rebuild.call_args.kwargs["store"] is store + assert semantic_rebuild.call_args.kwargs["store"] is store + assert experience_distillation.call_args.kwargs["store"] is store obs = open_observability_store(observability_store_path(root)) try: diff --git a/tests/test_sqlite_readonly_openers.py b/tests/test_sqlite_readonly_openers.py new file mode 100644 index 00000000..05bb207c --- /dev/null +++ b/tests/test_sqlite_readonly_openers.py @@ -0,0 +1,157 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +import pytest + +from codeclone.audit.schema import ( + open_audit_db, + open_audit_db_readonly, +) +from codeclone.audit.validation import AuditSchemaError +from codeclone.surfaces.mcp._workspace_intent_schema import ( + IntentRegistrySchemaError, + open_intent_registry_db, + open_intent_registry_db_readonly, +) +from codeclone.utils.sqlite_store import open_sqlite_db_readonly + + +def test_generic_readonly_opener_does_not_create_missing_database( + tmp_path: Path, +) -> None: + db_path = tmp_path / "missing.sqlite3" + + with pytest.raises(FileNotFoundError): + open_sqlite_db_readonly(db_path, validate_schema=lambda _conn: None) + + assert not db_path.exists() + + +def test_audit_readonly_opener_rejects_writes(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + writable = open_audit_db(db_path) + writable.close() + + conn = open_audit_db_readonly(db_path) + try: + assert conn.execute("SELECT COUNT(*) FROM controller_events").fetchone() == (0,) + with pytest.raises(sqlite3.OperationalError, match="readonly"): + conn.execute("DELETE FROM controller_events") + finally: + conn.close() + + +def test_audit_readonly_opener_accepts_migratable_schema_without_migration( + tmp_path: Path, +) -> None: + db_path = tmp_path / "audit.sqlite3" + writable = open_audit_db(db_path) + try: + writable.execute("UPDATE audit_meta SET value='3' WHERE key='schema_version'") + writable.commit() + finally: + writable.close() + + readonly = open_audit_db_readonly(db_path) + readonly.close() + + raw = sqlite3.connect(db_path) + try: + assert raw.execute( + "SELECT value FROM audit_meta WHERE key='schema_version'" + ).fetchone() == ("3",) + finally: + raw.close() + + +def test_audit_readonly_opener_rejects_unsupported_schema(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + writable = open_audit_db(db_path) + try: + writable.execute("UPDATE audit_meta SET value='999' WHERE key='schema_version'") + writable.commit() + finally: + writable.close() + + with pytest.raises(AuditSchemaError, match="Unsupported audit schema"): + open_audit_db_readonly(db_path) + + +def test_intent_readonly_opener_rejects_stale_schema_without_migration( + tmp_path: Path, +) -> None: + db_path = tmp_path / "intents.sqlite3" + writable = open_intent_registry_db(db_path) + try: + writable.execute( + "UPDATE intent_registry_meta SET value='1' WHERE key='schema_version'" + ) + writable.commit() + finally: + writable.close() + + with pytest.raises(IntentRegistrySchemaError, match="requires writable"): + open_intent_registry_db_readonly(db_path) + + raw = sqlite3.connect(db_path) + try: + assert raw.execute( + "SELECT value FROM intent_registry_meta WHERE key='schema_version'" + ).fetchone() == ("1",) + finally: + raw.close() + + +@pytest.mark.parametrize( + "opener", + [open_audit_db, open_audit_db_readonly, open_intent_registry_db], +) +def test_domain_openers_attach_observability( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + opener: object, +) -> None: + audit_path = tmp_path / "audit.sqlite3" + intent_path = tmp_path / "intents.sqlite3" + open_audit_db(audit_path).close() + calls: list[sqlite3.Connection] = [] + monkeypatch.setattr( + "codeclone.observability.instrument_db_connection", + calls.append, + ) + + selected = opener + assert callable(selected) + path = intent_path if selected is open_intent_registry_db else audit_path + conn = selected(path) + try: + assert calls == [conn] + finally: + conn.close() + + +def test_intent_readonly_opener_attaches_observability( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + db_path = tmp_path / "intents.sqlite3" + open_intent_registry_db(db_path).close() + calls: list[sqlite3.Connection] = [] + monkeypatch.setattr( + "codeclone.observability.instrument_db_connection", + calls.append, + ) + + conn = open_intent_registry_db_readonly(db_path) + try: + assert calls == [conn] + finally: + conn.close() diff --git a/tests/test_workspace_intent_gate.py b/tests/test_workspace_intent_gate.py index 819d36f5..2f864444 100644 --- a/tests/test_workspace_intent_gate.py +++ b/tests/test_workspace_intent_gate.py @@ -219,17 +219,18 @@ def close(self) -> None: fake = _FakeConnection() - def _connect(database: str, *, uri: bool) -> _FakeConnection: - seen["uri"] = database - seen["uri_flag"] = uri + def _open_readonly(database: Path) -> _FakeConnection: + seen["database"] = database return fake - monkeypatch.setattr("codeclone.workspace_intent.gate.sqlite3.connect", _connect) + monkeypatch.setattr( + "codeclone.workspace_intent.gate.open_intent_registry_db_readonly", + _open_readonly, + ) assert_gate_denied(tmp_path, reason="no_active_intent") assert fake.closed is True - assert seen["uri_flag"] is True - assert str(seen["uri"]).endswith("?mode=ro") + assert seen["database"] == db_path assert "SELECT payload_json" in str(seen["sql"]) From a571a5d056132e9c03fc81f722d5300c68f954d4 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 23:27:58 +0500 Subject: [PATCH 242/318] perf(memory): batch retrieval subject/evidence/trail loads Replace per-memory N+1 lookups in relevance ranking with batched IN-queries (list_subjects_for_memories, count_evidence_for_memories, load_trajectory_patch_trails), chunked at the SQLite variable limit. Wire the batch loaders into the retrieval service and trajectory hydration, and honor compact detail_level so list responses stay bounded. --- codeclone/memory/retrieval/service.py | 198 +++++++++++++++--- codeclone/memory/sqlite_store.py | 69 ++++++ codeclone/memory/trajectory/analytics.py | 9 +- codeclone/memory/trajectory/retrieval.py | 7 +- codeclone/memory/trajectory/store.py | 29 +++ tests/test_memory_compact_contract.py | 137 ++++++++++++ tests/test_memory_coverage_gaps.py | 40 ++-- tests/test_memory_experience_retrieval.py | 10 + tests/test_memory_retrieval_batching.py | 131 ++++++++++++ .../test_memory_retrieval_service_coverage.py | 62 +++++- tests/test_memory_trajectory_retrieval.py | 15 +- 11 files changed, 650 insertions(+), 57 deletions(-) create mode 100644 tests/test_memory_compact_contract.py create mode 100644 tests/test_memory_retrieval_batching.py diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index 90731830..d217d747 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -23,6 +23,7 @@ normalize_memory_scope_paths, normalize_repo_path, repo_path_to_module_key, + subject_matches_scope, ) from ..search_index import SearchMatchMode from ..sqlite_store import SqliteEngineeringMemoryStore @@ -38,6 +39,7 @@ rank_trajectories_for_query, rank_trajectories_for_scope, serialize_trajectory_detail, + serialize_trajectory_preview, trajectory_status_payload, trajectory_subject_keys, ) @@ -212,6 +214,15 @@ def _retrieval_policy(*, include_drafts: bool) -> dict[str, object]: DEFAULT_EXPERIENCE_PREVIEW_LIMIT = 10 +COMPACT_MEMORY_SUBJECT_LIMIT = 6 + +_MEMORY_SUBJECT_KIND_ORDER = { + "path": 0, + "module": 1, + "symbol": 2, + "intent": 3, + "workflow": 4, +} def _scope_family(path: str) -> str | None: @@ -253,21 +264,55 @@ def _serialize_experience( "information_value": experience.information_value, "status": experience.status, "statement": statement, - "agent_facets": [ - {"agent_family": facet.facet_value, "count": facet.count} + } + agent_facet_items = sorted( + ( + (facet.facet_value, facet.count) for facet in experience.facets if facet.facet_kind == "agent_family" - ], - } + ), + key=lambda item: (-item[1], item[0]), + ) + agent_facets: list[dict[str, object]] = [ + {"agent_family": family, "count": count} for family, count in agent_facet_items + ] + payload.update( + _experience_detail_payload( + experience, + detail_level=detail_level, + statement_length=statement_length, + statement=statement, + agent_facets=agent_facets, + ) + ) + return payload + + +def _experience_detail_payload( + experience: Experience, + *, + detail_level: MemoryDetailLevel, + statement_length: int, + statement: str, + agent_facets: list[dict[str, object]], +) -> dict[str, object]: if detail_level == "full": - payload["evidence_trajectory_ids"] = [ - item.trajectory_id for item in experience.evidence - ] - else: - payload["statement_length"] = statement_length - payload["evidence_count"] = len(experience.evidence) - if statement_length > len(statement): - payload["statement_truncated"] = True + return { + "agent_facets": agent_facets, + "evidence_trajectory_ids": [ + item.trajectory_id for item in experience.evidence + ], + } + payload: dict[str, object] = { + "statement_length": statement_length, + "evidence_count": len(experience.evidence), + "agent_family_count": len(agent_facets), + "multi_agent": len(agent_facets) > 1, + } + if agent_facets: + payload["dominant_agent_facet"] = agent_facets[0] + if statement_length > len(statement): + payload["statement_truncated"] = True return payload @@ -315,6 +360,50 @@ def _serialize_subject(subject: MemorySubject) -> dict[str, object]: } +def _memory_subject_priority( + subject: MemorySubject, + *, + context: RankingContext | None, +) -> tuple[object, ...]: + key = subject.subject_key.replace("\\", "/").strip("/") + context_group = 3 + context_score = 0.0 + if context is not None: + if key in context.symbols: + context_group = 0 + context_score = 1.0 + else: + scope_score = subject_matches_scope(key, scope_paths=context.scope_paths) + if scope_score > 0.0: + context_group = 1 + context_score = scope_score + elif key in context.blast_dependents: + context_group = 2 + context_score = 0.7 + return ( + context_group, + -context_score, + _MEMORY_SUBJECT_KIND_ORDER.get(subject.subject_kind, 99), + key, + subject.relation, + subject.id, + ) + + +def _preview_memory_subjects( + subjects: Sequence[MemorySubject], + *, + detail_level: MemoryDetailLevel, + context: RankingContext | None, +) -> list[MemorySubject]: + if detail_level == "full": + return list(subjects) + return sorted( + subjects, + key=lambda subject: _memory_subject_priority(subject, context=context), + )[:COMPACT_MEMORY_SUBJECT_LIMIT] + + def _serialize_evidence(evidence: MemoryEvidence) -> dict[str, object]: return { "id": evidence.id, @@ -339,12 +428,18 @@ def _serialize_record_summary( evidence_count: int, relevance_score: float | None = None, detail_level: MemoryDetailLevel = "compact", + context: RankingContext | None = None, ) -> dict[str, object]: statement_length = len(record.statement) if detail_level == "full": statement_value: str = record.statement else: statement_value = _statement_preview(record.statement) + serialized_subjects = _preview_memory_subjects( + subjects, + detail_level=detail_level, + context=context, + ) payload: dict[str, object] = { "id": record.id, "type": record.type, @@ -353,14 +448,17 @@ def _serialize_record_summary( "approved": record.approved_by is not None, "statement": statement_value, "statement_length": statement_length, - "subjects": [_serialize_subject(item) for item in subjects], + "subjects": [_serialize_subject(item) for item in serialized_subjects], "evidence_count": evidence_count, "stale": record.status == "stale", } if detail_level == "full": payload["payload"] = record.payload - elif detail_level == "compact" and statement_length > len(statement_value): - payload["statement_truncated"] = True + else: + payload["subject_count"] = len(subjects) + payload["subjects_truncated"] = len(serialized_subjects) < len(subjects) + if statement_length > len(statement_value): + payload["statement_truncated"] = True if record.stale_reason: payload["stale_reason"] = record.stale_reason if record.status == "draft": @@ -441,10 +539,13 @@ def _rank_records( proximity: Mapping[str, float] | None = None, ) -> tuple[list[dict[str, object]], bool]: proximity_map = proximity or {} + candidate_ids = tuple(record.id for record in candidates) + subjects_by_id = store.list_subjects_for_memories(candidate_ids) + evidence_counts = store.count_evidence_for_memories(candidate_ids) base: list[tuple[float, MemoryRecord, list[MemorySubject], int]] = [] for record in candidates: - subjects = store.list_subjects_for_memory(record.id) - evidence_count = store.count_evidence_for_memory(record.id) + subjects = subjects_by_id[record.id] + evidence_count = evidence_counts[record.id] score = relevance_score( record=record, subjects=subjects, @@ -468,6 +569,7 @@ def _rank_records( evidence_count=evidence_count, relevance_score=adjusted, detail_level=detail_level, + context=context, ) if record_relations is not None: summary["relations"] = record_relations @@ -576,11 +678,6 @@ def get_relevant_memory( patch_trails=patch_trails, detail_level=normalized_detail, ) - patch_trail_summary = None - if trajectories_payload: - first_summary = trajectories_payload[0].get("patch_trail_summary") - if isinstance(first_summary, dict): - patch_trail_summary = first_summary matching_experiences = _matching_experiences( store, project_id=project_id, @@ -614,13 +711,12 @@ def get_relevant_memory( "coverage_percent": None, "coverage_note": "symbol_scoped_retrieval", } - return { + payload: dict[str, object] = { "project_id": project_id, "scope_resolved_from": scope_resolved_from, "records": records_payload, "trajectories": trajectories_payload, "experiences": experiences_payload, - "patch_trail_summary": patch_trail_summary, "record_count": len(records_payload), "trajectory_count": len(trajectories_payload), "experience_count": len(experiences_payload), @@ -630,6 +726,26 @@ def get_relevant_memory( "detail_level": normalized_detail, "retrieval_policy": _retrieval_policy(include_drafts=effective_include_drafts), } + payload.update( + _root_patch_trail_payload( + detail_level=normalized_detail, + trajectories=trajectories_payload, + ) + ) + return payload + + +def _root_patch_trail_payload( + *, + detail_level: MemoryDetailLevel, + trajectories: Sequence[Mapping[str, object]], +) -> dict[str, object]: + if detail_level != "full" or not trajectories: + return {} + first_summary = trajectories[0].get("patch_trail_summary") + if not isinstance(first_summary, dict): + return {} + return {"patch_trail_summary": first_summary} def _load_patch_trails_for_trajectories( @@ -637,12 +753,7 @@ def _load_patch_trails_for_trajectories( *, trajectory_ids: Sequence[str], ) -> dict[str, dict[str, object]]: - trails: dict[str, dict[str, object]] = {} - for trajectory_id in trajectory_ids: - loaded = store.load_trajectory_patch_trail(trajectory_id) - if loaded is not None: - trails[trajectory_id] = loaded - return trails + return store.load_trajectory_patch_trails(trajectory_ids) def _parse_filters( @@ -875,6 +986,7 @@ def _handle_trajectory_search_mode( max_results: int, match_mode: SearchMatchMode, include_routine: bool = False, + detail_level: MemoryDetailLevel = "compact", ) -> dict[str, object]: statement = _require_query_field(query, mode=mode, field="query") candidates = store.search_trajectories( @@ -889,11 +1001,12 @@ def _handle_trajectory_search_mode( max_results=max_results, match_mode=match_mode, include_routine=include_routine, + detail_level=detail_level, ) return { "mode": mode, "status": "ok", - "detail_level": "compact", + "detail_level": detail_level, "payload": { "trajectories": trajectories, "trajectory_count": len(trajectories), @@ -910,16 +1023,18 @@ def _handle_trajectory_anomalies_mode( project_id: str, max_results: int, include_routine: bool = False, + detail_level: MemoryDetailLevel = "compact", ) -> dict[str, object]: return { "mode": mode, "status": "ok", - "detail_level": "compact", + "detail_level": detail_level, "payload": build_trajectory_anomalies_payload( store, project_id=project_id, max_results=max_results, include_routine=include_routine, + detail_level=detail_level, ), } @@ -950,16 +1065,18 @@ def _handle_trajectory_dashboard_mode( project_id: str, max_results: int, include_routine: bool = False, + detail_level: MemoryDetailLevel = "compact", ) -> dict[str, object]: return { "mode": mode, "status": "ok", - "detail_level": "compact", + "detail_level": detail_level, "payload": build_trajectory_dashboard_payload( store, project_id=project_id, max_results=max_results, include_routine=include_routine, + detail_level=detail_level, ), } @@ -1221,13 +1338,21 @@ def _hydrate_trajectory_hits( *, project_id: str, hits: Sequence[SemanticHit], + detail_level: MemoryDetailLevel, ) -> list[dict[str, object]]: trajectories: list[dict[str, object]] = [] for hit in hits: trajectory = store.find_trajectory(hit.source_id) if trajectory is None or trajectory.project_id != project_id: continue - payload = serialize_trajectory_detail(trajectory, max_steps=4) + payload = ( + serialize_trajectory_detail(trajectory, max_steps=4) + if detail_level == "full" + else serialize_trajectory_preview( + trajectory, + detail_level="compact", + ) + ) payload["semantic_score"] = hit.score trajectories.append(payload) return trajectories @@ -1306,6 +1431,7 @@ def _handle_semantic_search_mode( store, project_id=project_id, hits=trajectory_hits, + detail_level=detail_level, ) semantic_block = _semantic_status_block( status, @@ -1473,6 +1599,7 @@ def query_engineering_memory( project_id=project_id, max_results=max_results, include_routine=include_routine, + detail_level=normalized_detail, ) if mode == "trajectory_agents": return _handle_trajectory_agents_mode( @@ -1488,6 +1615,7 @@ def query_engineering_memory( project_id=project_id, max_results=max_results, include_routine=include_routine, + detail_level=normalized_detail, ) if mode == "trajectory_search": return _handle_trajectory_search_mode( @@ -1498,6 +1626,7 @@ def query_engineering_memory( max_results=max_results, match_mode=match_mode, include_routine=include_routine, + detail_level=normalized_detail, ) statuses = _search_statuses_for_mode( mode, @@ -1566,6 +1695,7 @@ def query_engineering_memory( __all__ = [ + "COMPACT_MEMORY_SUBJECT_LIMIT", "QUERY_MODES", "MemoryDetailLevel", "QueryMode", diff --git a/codeclone/memory/sqlite_store.py b/codeclone/memory/sqlite_store.py index b25e7a90..5b9a240b 100644 --- a/codeclone/memory/sqlite_store.py +++ b/codeclone/memory/sqlite_store.py @@ -13,6 +13,7 @@ from typing import cast from ..report.meta import current_report_timestamp_utc +from ..utils.iterutils import chunked from .enums import LinkRelation from .experience.models import Experience from .locks import memory_init_lock @@ -46,6 +47,8 @@ TrajectoryProjectionRun, ) +_SQLITE_IN_QUERY_BATCH = 500 + class SqliteEngineeringMemoryStore: def __init__(self, db_path: Path) -> None: @@ -203,6 +206,17 @@ def load_trajectory_patch_trail( return load_trajectory_patch_trail(self._conn, trajectory_id=trajectory_id) + def load_trajectory_patch_trails( + self, + trajectory_ids: Sequence[str], + ) -> dict[str, dict[str, object]]: + from .trajectory.store import load_trajectory_patch_trails + + return load_trajectory_patch_trails( + self._conn, + trajectory_ids=trajectory_ids, + ) + def list_canonical_trajectories_for_export( self, *, @@ -447,6 +461,39 @@ def list_subjects_for_memory(self, memory_id: str) -> list[MemorySubject]: for row in rows ] + def list_subjects_for_memories( + self, + memory_ids: Sequence[str], + ) -> dict[str, list[MemorySubject]]: + normalized_ids = tuple(sorted(set(memory_ids))) + grouped: dict[str, list[MemorySubject]] = { + memory_id: [] for memory_id in normalized_ids + } + for batch in chunked(normalized_ids, _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + rows = self._conn.execute( + f""" + SELECT MIN(id) AS id, memory_id, subject_kind, subject_key, relation + FROM memory_subjects + WHERE memory_id IN ({placeholders}) + GROUP BY memory_id, subject_kind, subject_key, relation + ORDER BY memory_id ASC, subject_kind ASC, subject_key ASC, id ASC + """, + batch, + ).fetchall() + for row in rows: + memory_id = str(row["memory_id"]) + grouped[memory_id].append( + MemorySubject( + id=str(row["id"]), + memory_id=memory_id, + subject_kind=str(row["subject_kind"]), # type: ignore[arg-type] + subject_key=str(row["subject_key"]), + relation=str(row["relation"]), # type: ignore[arg-type] + ) + ) + return grouped + def list_evidence_for_memory(self, memory_id: str) -> list[MemoryEvidence]: rows = self._conn.execute( """ @@ -479,6 +526,28 @@ def count_evidence_for_memory(self, memory_id: str) -> int: ).fetchone() return int(row[0]) if row is not None else 0 + def count_evidence_for_memories( + self, + memory_ids: Sequence[str], + ) -> dict[str, int]: + normalized_ids = tuple(sorted(set(memory_ids))) + counts = dict.fromkeys(normalized_ids, 0) + for batch in chunked(normalized_ids, _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + rows = self._conn.execute( + f""" + SELECT memory_id, COUNT(*) AS evidence_count + FROM memory_evidence + WHERE memory_id IN ({placeholders}) + GROUP BY memory_id + ORDER BY memory_id ASC + """, + batch, + ).fetchall() + for row in rows: + counts[str(row["memory_id"])] = int(row["evidence_count"]) + return counts + def search_records( self, *, diff --git a/codeclone/memory/trajectory/analytics.py b/codeclone/memory/trajectory/analytics.py index 9cc42f19..6a20ef81 100644 --- a/codeclone/memory/trajectory/analytics.py +++ b/codeclone/memory/trajectory/analytics.py @@ -18,6 +18,7 @@ ) from .models import Trajectory from .retrieval import ( + TrajectoryDetailLevel, filter_trajectories_for_default_retrieval, serialize_trajectory_preview, trajectory_list_item_to_preview, @@ -94,6 +95,7 @@ def build_trajectory_anomalies_payload( max_results: int = DEFAULT_ANOMALY_PREVIEW_LIMIT, limit: int = DEFAULT_ANALYTICS_LIMIT, include_routine: bool = False, + detail_level: TrajectoryDetailLevel = "full", ) -> dict[str, object]: trajectories = _load_trajectories( store, @@ -123,7 +125,10 @@ def build_trajectory_anomalies_payload( selected = hits[: max(1, int(max_results))] payload_items: list[dict[str, object]] = [] for trajectory, anomalies in selected: - preview = serialize_trajectory_preview(trajectory) + preview = serialize_trajectory_preview( + trajectory, + detail_level=detail_level, + ) preview["agent_label"] = trajectory_agent_label(trajectory) preview["anomalies"] = [serialize_anomaly(item) for item in anomalies] payload_items.append(preview) @@ -141,6 +146,7 @@ def build_trajectory_dashboard_payload( project_id: str, max_results: int = DEFAULT_ANOMALY_PREVIEW_LIMIT, include_routine: bool = False, + detail_level: TrajectoryDetailLevel = "full", ) -> dict[str, object]: status = trajectory_status_payload( count=store.count_trajectories(project_id=project_id), @@ -156,6 +162,7 @@ def build_trajectory_dashboard_payload( project_id=project_id, max_results=max_results, include_routine=include_routine, + detail_level=detail_level, ) recent_items = store.list_trajectories( project_id=project_id, diff --git a/codeclone/memory/trajectory/retrieval.py b/codeclone/memory/trajectory/retrieval.py index 7dbfb728..4ab585ca 100644 --- a/codeclone/memory/trajectory/retrieval.py +++ b/codeclone/memory/trajectory/retrieval.py @@ -321,6 +321,7 @@ def rank_trajectories_for_query( max_results: int, match_mode: SearchMatchMode, include_routine: bool = False, + detail_level: TrajectoryDetailLevel = "full", ) -> tuple[list[dict[str, object]], bool]: tokens = tokenize_query(query) if not tokens: @@ -336,7 +337,11 @@ def rank_trajectories_for_query( query_tokens=tokens, match_mode=match_mode, ) - return _preview_results(scored, max_results=max_results) + return _preview_results( + scored, + max_results=max_results, + detail_level=detail_level, + ) def filter_trajectories_for_query( diff --git a/codeclone/memory/trajectory/store.py b/codeclone/memory/trajectory/store.py index 8373190c..a65b46bd 100644 --- a/codeclone/memory/trajectory/store.py +++ b/codeclone/memory/trajectory/store.py @@ -23,6 +23,7 @@ read_audit_event_core_records, ) from ...report.meta import current_report_timestamp_utc +from ...utils.iterutils import chunked from ...utils.json_io import json_text from ..models import MemoryProject from ..search_index import SearchMatchMode, tokenize_query @@ -40,6 +41,8 @@ from .projector import project_trajectory from .quality import apply_trajectory_quality_score +_SQLITE_IN_QUERY_BATCH = 500 + def _project_and_upsert_workflow( conn: sqlite3.Connection, @@ -882,6 +885,31 @@ def load_trajectory_patch_trail( return loaded if isinstance(loaded, dict) else None +def load_trajectory_patch_trails( + conn: sqlite3.Connection, + *, + trajectory_ids: Sequence[str], +) -> dict[str, dict[str, object]]: + loaded_by_id: dict[str, dict[str, object]] = {} + normalized_ids = tuple(sorted(set(trajectory_ids))) + for batch in chunked(normalized_ids, _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + rows = conn.execute( + f""" + SELECT trajectory_id, patch_trail_json + FROM memory_trajectory_patch_trails + WHERE trajectory_id IN ({placeholders}) + ORDER BY trajectory_id ASC + """, + batch, + ).fetchall() + for row in rows: + loaded = orjson.loads(str(row["patch_trail_json"])) + if isinstance(loaded, dict): + loaded_by_id[str(row["trajectory_id"])] = loaded + return loaded_by_id + + __all__ = [ "count_trajectories", "find_trajectory", @@ -889,6 +917,7 @@ def load_trajectory_patch_trail( "list_trajectories", "list_trajectories_for_subjects", "load_trajectory_patch_trail", + "load_trajectory_patch_trails", "rebuild_trajectories_from_audit", "rebuild_trajectories_incremental", "search_trajectories", diff --git a/tests/test_memory_compact_contract.py b/tests/test_memory_compact_contract.py new file mode 100644 index 00000000..dcc0abdd --- /dev/null +++ b/tests/test_memory_compact_contract.py @@ -0,0 +1,137 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.memory.retrieval import query_engineering_memory +from codeclone.memory.retrieval import service as retrieval_service +from codeclone.memory.semantic.models import SemanticHit +from codeclone.memory.trajectory.retrieval import ( + COMPACT_TRAJECTORY_SUBJECT_LIMIT, +) + +from .memory_fixtures import memory_store, seed_trajectory_audit_workflow + + +def _assert_compact_trajectory(payload: object) -> None: + assert isinstance(payload, dict) + assert "quality_contract" not in payload + assert "steps" not in payload + assert "evidence" not in payload + subjects = payload.get("subjects") + assert isinstance(subjects, list) + assert len(subjects) <= COMPACT_TRAJECTORY_SUBJECT_LIMIT + assert isinstance(payload.get("subject_count"), int) + assert isinstance(payload.get("subjects_truncated"), bool) + + +@pytest.mark.parametrize( + "mode", + ("trajectory_search", "trajectory_anomalies", "trajectory_dashboard"), +) +def test_compact_trajectory_modes_honor_declared_detail_level( + tmp_path: Path, + mode: str, +) -> None: + with memory_store(tmp_path) as (root, project, store, db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ) + result = query_engineering_memory( + store, + project_id=project.id, + root_path=root, + backend="sqlite", + db_path=db_path, + mode=mode, + query="recover service" if mode == "trajectory_search" else None, + detail_level="compact", + ) + + assert result["detail_level"] == "compact" + payload = result["payload"] + assert isinstance(payload, dict) + if mode == "trajectory_dashboard": + anomalies = payload["anomalies"] + assert isinstance(anomalies, dict) + trajectories = anomalies["trajectories"] + else: + trajectories = payload["trajectories"] + assert isinstance(trajectories, list) + assert trajectories + _assert_compact_trajectory(trajectories[0]) + + +def test_trajectory_search_full_keeps_quality_contract(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ) + result = query_engineering_memory( + store, + project_id=project.id, + root_path=root, + backend="sqlite", + db_path=db_path, + mode="trajectory_search", + query="recover service", + detail_level="full", + ) + + assert result["detail_level"] == "full" + payload = result["payload"] + assert isinstance(payload, dict) + trajectories = payload["trajectories"] + assert isinstance(trajectories, list) + assert trajectories + assert "quality_contract" in trajectories[0] + + +def test_semantic_trajectory_hydration_respects_detail_level(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + trajectory = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + hits = [ + SemanticHit( + source_id=trajectory.id, + source="trajectory", + score=0.75, + ) + ] + compact = retrieval_service._hydrate_trajectory_hits( + store, + project_id=project.id, + hits=hits, + detail_level="compact", + ) + full = retrieval_service._hydrate_trajectory_hits( + store, + project_id=project.id, + hits=hits, + detail_level="full", + ) + + _assert_compact_trajectory(compact[0]) + assert compact[0]["semantic_score"] == 0.75 + assert "quality_contract" in full[0] + assert "steps" in full[0] diff --git a/tests/test_memory_coverage_gaps.py b/tests/test_memory_coverage_gaps.py index e12d013e..f1c773eb 100644 --- a/tests/test_memory_coverage_gaps.py +++ b/tests/test_memory_coverage_gaps.py @@ -910,6 +910,7 @@ def test_hydrate_trajectory_hits_skips_foreign_project(tmp_path: Path) -> None: store, project_id=project.id, hits=[SemanticHit(source_id=trajectory.id, source="trajectory", score=0.5)], + detail_level="compact", ) assert hits assert hits[0]["semantic_score"] == 0.5 @@ -917,6 +918,7 @@ def test_hydrate_trajectory_hits_skips_foreign_project(tmp_path: Path) -> None: store, project_id="other-project", hits=[SemanticHit(source_id=trajectory.id, source="trajectory", score=0.5)], + detail_level="compact", ) assert missing == [] @@ -955,14 +957,13 @@ def test_audit_reader_missing_db_and_connect_errors( finally: conn.close() - real_connect = sqlite3.connect + def _fail_open(_path: Path) -> sqlite3.Connection: + raise sqlite3.Error("connect failed") - def _fail_connect(database: str, *args: Any, **kwargs: Any) -> sqlite3.Connection: - if database == str(audit_db): - raise sqlite3.Error("connect failed") - return cast(sqlite3.Connection, real_connect(database, *args, **kwargs)) - - monkeypatch.setattr(sqlite3, "connect", _fail_connect) + monkeypatch.setattr( + "codeclone.audit.reader.open_audit_db_readonly", + _fail_open, + ) with pytest.raises(AuditReadError, match="cannot open audit database"): list_workflow_ids_with_events_after( db_path=audit_db, @@ -972,8 +973,6 @@ def _fail_connect(database: str, *args: Any, **kwargs: Any) -> sqlite3.Connectio with pytest.raises(AuditReadError, match="cannot open audit database"): count_audit_event_core_gaps(db_path=audit_db, repo_root_digest="digest") - monkeypatch.setattr(sqlite3, "connect", real_connect) - class _BrokenConn: def execute(self, *_args: object, **_kwargs: object) -> None: raise sqlite3.Error("query failed") @@ -981,7 +980,10 @@ def execute(self, *_args: object, **_kwargs: object) -> None: def close(self) -> None: return None - monkeypatch.setattr(sqlite3, "connect", lambda *_a, **_k: _BrokenConn()) + monkeypatch.setattr( + "codeclone.audit.reader.open_audit_db_readonly", + lambda *_a, **_k: _BrokenConn(), + ) with pytest.raises(AuditReadError, match="cannot read audit database"): read_audit_summary(db_path=audit_db, limit=5) @@ -1670,8 +1672,6 @@ def test_staleness_audit_validation_and_events_edges( finally: conn.close() - real_connect = sqlite3.connect - class _BrokenConn: def execute(self, *_args: object, **_kwargs: object) -> None: raise sqlite3.Error("query failed") @@ -1679,18 +1679,24 @@ def execute(self, *_args: object, **_kwargs: object) -> None: def close(self) -> None: return None - monkeypatch.setattr(sqlite3, "connect", lambda *_a, **_k: _BrokenConn()) - with pytest.raises(AuditReadError, match="cannot read audit database"): + def _broken_open(_path: Path) -> _BrokenConn: + return _BrokenConn() + + monkeypatch.setattr( + "codeclone.audit.reader.open_audit_db_readonly", + _broken_open, + ) + audit_db_error = "cannot .* audit database" + with pytest.raises(AuditReadError, match=audit_db_error): read_audit_event_core_records(db_path=audit_db, repo_root_digest="digest") - with pytest.raises(AuditReadError, match="cannot read audit database"): + with pytest.raises(AuditReadError, match=audit_db_error): list_workflow_ids_with_events_after( db_path=audit_db, repo_root_digest="digest", after_id=0, ) - with pytest.raises(AuditReadError, match="cannot read audit database"): + with pytest.raises(AuditReadError, match=audit_db_error): count_audit_event_core_gaps(db_path=audit_db, repo_root_digest="digest") - monkeypatch.setattr(sqlite3, "connect", real_connect) def test_trajectory_projector_and_retrieval_residual_edges(tmp_path: Path) -> None: diff --git a/tests/test_memory_experience_retrieval.py b/tests/test_memory_experience_retrieval.py index e52b9785..aa1ad734 100644 --- a/tests/test_memory_experience_retrieval.py +++ b/tests/test_memory_experience_retrieval.py @@ -123,7 +123,17 @@ def test_relevant_memory_compacts_experience_statement_and_evidence( assert len(compact_statement) < 300 assert compact_experience["evidence_count"] == 5 assert "evidence_trajectory_ids" not in compact_experience + assert "agent_facets" not in compact_experience + assert compact_experience["agent_family_count"] == 2 + assert compact_experience["multi_agent"] is True + assert compact_experience["dominant_agent_facet"] == { + "agent_family": "claude-code", + "count": 3, + } assert full_experience["statement"] == "x" * 300 + full_facets = full_experience["agent_facets"] + assert isinstance(full_facets, list) + assert len(full_facets) == 2 full_evidence = full_experience["evidence_trajectory_ids"] assert isinstance(full_evidence, list) assert len(full_evidence) == 5 diff --git a/tests/test_memory_retrieval_batching.py b/tests/test_memory_retrieval_batching.py new file mode 100644 index 00000000..0d1467ba --- /dev/null +++ b/tests/test_memory_retrieval_batching.py @@ -0,0 +1,131 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +from codeclone.memory.models import MemoryEvidence, MemorySubject, generate_memory_id +from codeclone.memory.retrieval import service as retrieval_service +from codeclone.memory.retrieval.ranking import RankingContext +from codeclone.report.meta import current_report_timestamp_utc + +from .memory_fixtures import ( + memory_store, + seed_module_role, + seed_trajectory_audit_workflow, +) + + +def test_store_batch_loaders_preserve_empty_and_populated_results( + tmp_path: Path, +) -> None: + with memory_store(tmp_path) as (_root, project, store, _db_path): + first = seed_module_role( + store, + project_id=project.id, + file_path="pkg/first.py", + ) + second = seed_module_role( + store, + project_id=project.id, + file_path="pkg/second.py", + ) + store.write_evidence( + MemoryEvidence( + id=generate_memory_id(prefix="evid"), + memory_id=first.id, + evidence_kind="report", + ref="report-a", + locator=None, + quote=None, + digest=None, + created_at_utc=current_report_timestamp_utc(), + ) + ) + missing_id = "mem-missing" + + subjects = store.list_subjects_for_memories( + (second.id, first.id, first.id, missing_id) + ) + evidence = store.count_evidence_for_memories( + (second.id, first.id, first.id, missing_id) + ) + + assert list(subjects) == sorted({first.id, second.id, missing_id}) + assert all(isinstance(item, MemorySubject) for item in subjects[first.id]) + assert subjects[missing_id] == [] + assert evidence[first.id] == 1 + assert evidence[second.id] == 0 + assert evidence[missing_id] == 0 + + +def test_rank_records_uses_bounded_batch_queries(tmp_path: Path) -> None: + with memory_store(tmp_path) as (_root, project, store, _db_path): + records = [ + seed_module_role( + store, + project_id=project.id, + file_path=f"pkg/mod_{index}.py", + ) + for index in range(20) + ] + statements: list[str] = [] + store.connection.set_trace_callback(statements.append) + try: + payload, truncated = retrieval_service._rank_records( + store, + project_id=project.id, + candidates=records, + context=RankingContext.from_scope( + scope_paths=(), + symbols=(), + blast_dependents=(), + ), + max_records=20, + detail_level="compact", + ) + finally: + store.connection.set_trace_callback(None) + + subject_queries = [ + statement for statement in statements if "FROM memory_subjects" in statement + ] + evidence_queries = [ + statement for statement in statements if "FROM memory_evidence" in statement + ] + assert len(payload) == 20 + assert truncated is False + assert len(subject_queries) == 1 + assert len(evidence_queries) == 1 + + +def test_patch_trails_load_in_one_batch(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + trajectory = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + statements: list[str] = [] + store.connection.set_trace_callback(statements.append) + try: + trails = retrieval_service._load_patch_trails_for_trajectories( + store, + trajectory_ids=(trajectory.id, trajectory.id, "traj-missing"), + ) + finally: + store.connection.set_trace_callback(None) + + patch_trail_queries = [ + statement + for statement in statements + if "FROM memory_trajectory_patch_trails" in statement + ] + assert set(trails) == {trajectory.id} + assert len(patch_trail_queries) == 1 diff --git a/tests/test_memory_retrieval_service_coverage.py b/tests/test_memory_retrieval_service_coverage.py index 16afffa3..713a7749 100644 --- a/tests/test_memory_retrieval_service_coverage.py +++ b/tests/test_memory_retrieval_service_coverage.py @@ -12,8 +12,9 @@ import pytest from codeclone.memory.exceptions import MemoryContractError -from codeclone.memory.models import MemoryEvidence, MemoryRecord +from codeclone.memory.models import MemoryEvidence, MemoryRecord, MemorySubject from codeclone.memory.retrieval import service as retrieval_service +from codeclone.memory.retrieval.ranking import RankingContext from codeclone.report.meta import current_report_timestamp_utc @@ -198,3 +199,62 @@ def test_normalize_detail_level_and_compact_serialization() -> None: assert "payload" not in compact assert full["statement"] == "x" * 200 assert full["payload"] is None + + +def test_compact_record_subjects_are_bounded_and_scope_relevant() -> None: + record = _record() + subjects = [ + MemorySubject( + id=f"subject-{index}", + memory_id=record.id, + subject_kind="path", + subject_key=f"noise/path_{index}.py", + relation="about", + ) + for index in range(10) + ] + subjects.append( + MemorySubject( + id="subject-relevant", + memory_id=record.id, + subject_kind="path", + subject_key="pkg/service.py", + relation="about", + ) + ) + context = RankingContext.from_scope( + scope_paths=("pkg/service.py",), + symbols=(), + blast_dependents=(), + ) + + compact = retrieval_service._serialize_record_summary( + record=record, + subjects=subjects, + evidence_count=0, + detail_level="compact", + context=context, + ) + full = retrieval_service._serialize_record_summary( + record=record, + subjects=subjects, + evidence_count=0, + detail_level="full", + context=context, + ) + + assert { + "subject_count": compact["subject_count"], + "subjects_truncated": compact["subjects_truncated"], + } == { + "subject_count": 11, + "subjects_truncated": True, + } + compact_subjects = compact["subjects"] + assert isinstance(compact_subjects, list) + assert len(compact_subjects) == retrieval_service.COMPACT_MEMORY_SUBJECT_LIMIT + assert compact_subjects[0]["subject_key"] == "pkg/service.py" + full_subjects = full.get("subjects") + assert isinstance(full_subjects, list) + assert len(full_subjects) == 11 + assert {"subject_count", "subjects_truncated"}.isdisjoint(full) diff --git a/tests/test_memory_trajectory_retrieval.py b/tests/test_memory_trajectory_retrieval.py index c00fd92a..91840f45 100644 --- a/tests/test_memory_trajectory_retrieval.py +++ b/tests/test_memory_trajectory_retrieval.py @@ -305,19 +305,28 @@ def test_get_relevant_memory_returns_patch_trail_summary(tmp_path: Path) -> None audit_db_path=audit_db, ) - result = get_relevant_memory( + compact = get_relevant_memory( + store, + project_id=project.id, + scope_paths=("pkg/helper.py",), + scope_resolved_from="explicit", + max_records=5, + ) + full = get_relevant_memory( store, project_id=project.id, scope_paths=("pkg/helper.py",), scope_resolved_from="explicit", max_records=5, + detail_level="full", ) - trajectories = result["trajectories"] + trajectories = compact["trajectories"] assert isinstance(trajectories, list) assert trajectories assert trajectories[0].get("patch_trail_summary") is not None - summary = result.get("patch_trail_summary") + assert "patch_trail_summary" not in compact + summary = full.get("patch_trail_summary") assert isinstance(summary, dict) assert summary.get("counts", {}).get("untouched_in_declared") == 1 From 7b1b393a8423eba07d60796cdcb34c3f8f5d3dfa Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 23:27:58 +0500 Subject: [PATCH 243/318] docs(memory): document records/experiences/trajectories evidence lanes Explain the three evidence lanes and compact bounds (subject_count / subjects_truncated, mode=get / detail_level=full for full subjects, agent facets, trajectory contracts, and root Patch Trail drill-down) across CLAUDE.md, the engineering-memory book, the MCP tool docs, both skill manifests, and the engineering_memory help topic. --- CLAUDE.md | 6 +++++- codeclone/surfaces/mcp/messages/help_topics.py | 15 ++++++++++++--- docs/book/13-engineering-memory/mcp-surface.md | 14 ++++++++++++++ .../trajectory-and-patch-trail.md | 7 +++---- .../tools/atomic-change-control.md | 4 ++-- .../skills/codeclone-engineering-memory/SKILL.md | 11 +++++++++-- .../skills/codeclone-engineering-memory/SKILL.md | 11 +++++++++-- tests/test_mcp_service.py | 4 ++++ 8 files changed, 58 insertions(+), 14 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 995ddd5b..945f305a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -122,7 +122,11 @@ After `start_controlled_change` returns `edit_allowed: true`: **Scope and token hygiene:** never use project root as memory scope. Compress `record_candidate` statements to one durable fact (target ≤300 chars; `validate_claims` warns above 500; hard limit 1000). List responses default to -compact previews — use `mode=get` or `detail_level=full` for complete text. +compact previews. Treat `records[]`, `experiences[]`, and `trajectories[]` as +separate evidence lanes; `subject_count` / `subjects_truncated` means more +subjects exist, not that evidence disappeared. Use `mode=get` or +`detail_level=full` for complete subjects, agent facets, trajectory contracts, +steps, evidence ids, payloads, and root Patch Trail drill-down. ### Before `finish`: incident / complexity memory (MANDATORY) diff --git a/codeclone/surfaces/mcp/messages/help_topics.py b/codeclone/surfaces/mcp/messages/help_topics.py index ae6dec47..78d0c469 100644 --- a/codeclone/surfaces/mcp/messages/help_topics.py +++ b/codeclone/surfaces/mcp/messages/help_topics.py @@ -546,8 +546,16 @@ class MCPHelpTopicSpec: "trajectory_dashboard after rebuild_trajectories." ), ( - "Scoped response may include trajectories[] and " - "patch_trail_summary — forensics only, not edit authorization." + "Scoped response lanes: records[]=durable assertions, " + "experiences[]=advisory patterns, trajectories[]=bounded examples, " + "coverage=availability/trust context." + ), + ( + "compact (default): record/trajectory subjects are bounded with " + "subject_count+subjects_truncated; experiences expose multi_agent " + "+ dominant_agent_facet; no quality_contract, steps, evidence ids, " + "payload, or duplicated root patch_trail_summary. Use full/get " + "for drill-down." ), ( "Semantic (off by default): enable sidecar, rebuild_semantic_index, " @@ -576,7 +584,8 @@ class MCPHelpTopicSpec: doc_links=(ENGINEERING_MEMORY_DOC_LINK, MCP_INTERFACE_DOC_LINK), warnings=( "Draft, inferred, and stale records are not established policy.", - "trajectories[] and patch_trail_summary do not override findings.", + "trajectories[] and Patch Trail context do not override findings.", + "Truncation metadata means more evidence exists; it is not evidence loss.", ), anti_patterns=( "Using memory to justify do_not_touch edits or scope expansion.", diff --git a/docs/book/13-engineering-memory/mcp-surface.md b/docs/book/13-engineering-memory/mcp-surface.md index 8820d3f1..40b66169 100644 --- a/docs/book/13-engineering-memory/mcp-surface.md +++ b/docs/book/13-engineering-memory/mcp-surface.md @@ -52,6 +52,20 @@ List modes (`search`, `stale`, `drafts`, scoped `get_relevant_memory`) default to **compact** payloads: statement preview, `statement_length`, no `payload`. Use `mode=get` or `detail_level=full` for complete statements and payload. +Scoped retrieval keeps four typed lanes: + +| Lane | Meaning | `compact` | `full` | +|------------------|----------------------------------------------|---------------------------------------------------------------|---------------------------------------------| +| `records[]` | Durable asserted/project memory | Preview; relevance-first bounded `subjects`; count/truncation | Full statement, subjects, record payload | +| `experiences[]` | Advisory patterns distilled from trajectories | Preview; agent-family count, multi-agent flag, dominant facet | Full agent facets and trajectory evidence ids | +| `trajectories[]` | Prior workflow examples/evidence | Bounded preview; no steps or `quality_contract` | Full contract/subjects; use `trajectory_get` for steps | +| `coverage` | Availability of record/trajectory/experience context | Same factual coverage metadata | Same factual coverage metadata | + +`subject_count` and `subjects_truncated=true` mean more linked subjects exist; +they do not downgrade or discard the record. Each compact trajectory retains +its own `patch_trail_summary`. The duplicate top-level `patch_trail_summary` is +full-only. + **Filters** (`filters` object): | Key | Values | Notes | diff --git a/docs/book/13-engineering-memory/trajectory-and-patch-trail.md b/docs/book/13-engineering-memory/trajectory-and-patch-trail.md index bbc74a60..8cb222e2 100644 --- a/docs/book/13-engineering-memory/trajectory-and-patch-trail.md +++ b/docs/book/13-engineering-memory/trajectory-and-patch-trail.md @@ -153,8 +153,9 @@ flowchart LR subjects match (declare `scope_paths`, check `changed_files`, or `untouched_in_declared`). When a stored Patch Trail exists for a matched trajectory, each preview includes **`patch_trail_summary`** (counts, digest, -verification status). The top-ranked trajectory also surfaces -**`patch_trail_summary`** at the response root for quick scope context. +verification status). With `detail_level=full`, the top-ranked trajectory also +surfaces **`patch_trail_summary`** at the response root. Compact retrieval omits +that root duplicate; the summary remains on the trajectory preview. `query_engineering_memory(mode=trajectory_get)` returns **`patch_trail`** on the trajectory payload when persisted for that workflow. @@ -196,5 +197,3 @@ Refs: - `codeclone/memory/trajectory/rebuild_workflow.py:execute_trajectory_rebuild` - `codeclone/memory/trajectory/export.py:export_trajectories_jsonl` - `tests/test_memory_trajectory_*.py`, `tests/test_audit_event_core_v2.py` - ---- diff --git a/docs/book/25-mcp-interface/tools/atomic-change-control.md b/docs/book/25-mcp-interface/tools/atomic-change-control.md index 3310e326..c77587f2 100644 --- a/docs/book/25-mcp-interface/tools/atomic-change-control.md +++ b/docs/book/25-mcp-interface/tools/atomic-change-control.md @@ -4,8 +4,8 @@ |-----------------------------|--------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `on_conflict`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, promote, list_workspace, gc_workspace, recover, reset_workspace. Use for queue/promote/recover operations alongside workflow tools | | `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: full transitive graph, custom include filters | -| `get_relevant_memory` | `root`, `scope`, `intent_id`, `symbols`, `max_records`, `include_stale`, `include_drafts` | Ranked engineering memory for declared edit scope. Auto-bootstraps store when `mcp_sync_policy=bootstrap_if_missing` (default). See [Engineering Memory](../../13-engineering-memory/index.md) | -| `query_engineering_memory` | `root`, `mode`, …, optional `semantic` (search only) | Mode router: search, get, for_path, for_symbol, stale, drafts, coverage, status, trajectory_status, trajectory_search, trajectory_get. `filters` supports `types`, `statuses`, `confidences`, and `match_mode` (`any`\|`all`) for search. `semantic=true` blends LanceDB proximity when `[tool.codeclone.memory.semantic] enabled` and index built (default off). See [Engineering Memory](../../13-engineering-memory/index.md) | +| `get_relevant_memory` | `root`, `scope`, `intent_id`, `symbols`, `max_records`, `include_stale`, `include_drafts`, `detail_level` | Ranked engineering memory for declared edit scope. Compact by default: bounded record/trajectory subjects plus typed `records`, `experiences`, `trajectories`, and `coverage` lanes. Auto-bootstraps store when `mcp_sync_policy=bootstrap_if_missing` (default). See [Engineering Memory](../../13-engineering-memory/index.md) | +| `query_engineering_memory` | `root`, `mode`, …, optional `semantic` (search only), `detail_level` | Mode router: search, get, for_path, for_symbol, stale, drafts, coverage, status, trajectory_status, trajectory_search, trajectory_get. List/search modes default compact; `get`, `trajectory_get`, or `detail_level=full` are explicit drill-down. `filters` supports `types`, `statuses`, `confidences`, and `match_mode` (`any`\|`all`) for search. `semantic=true` blends LanceDB proximity when `[tool.codeclone.memory.semantic] enabled` and index built (default off). See [Engineering Memory](../../13-engineering-memory/index.md) | | `manage_engineering_memory` | `root`, `action`, … | Agent-side: `refresh_from_run`, `rebuild_semantic_index`, `record_candidate`, `validate_claims`, `propose_from_receipt`, `rebuild_trajectories`, `enqueue_projection_rebuild`, `projection_rebuild_status`, `run_projection_jobs_once`. Human approve/reject/archive: VS Code Memory view **or** `codeclone memory approve` (not MCP agents). See [Engineering Memory](../../13-engineering-memory/index.md) | | `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Manual budget query or step-by-step verification | | `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Manual receipt generation | diff --git a/plugins/codeclone/skills/codeclone-engineering-memory/SKILL.md b/plugins/codeclone/skills/codeclone-engineering-memory/SKILL.md index ea9e91dc..53551f1c 100644 --- a/plugins/codeclone/skills/codeclone-engineering-memory/SKILL.md +++ b/plugins/codeclone/skills/codeclone-engineering-memory/SKILL.md @@ -93,8 +93,15 @@ semantic-quality embeddings — do not present hits as LLM recall. - **Never** use project root as memory scope (`"."`, `""`, unscoped retrieval) - Compress observations before `record_candidate`: one durable fact, target ≤300 chars; rewrite if >500; hard reject >1000 -- List responses are compact by default — use `mode=get` or `detail_level=full` - for complete statements +- Read compact lanes separately: `records[]` are durable assertions, + `experiences[]` are advisory patterns, `trajectories[]` are bounded examples, + and `coverage` describes evidence availability +- Compact is default: subject lists are bounded with + `subject_count`/`subjects_truncated`; experience diversity uses + `multi_agent`/`dominant_agent_facet`; trajectory contracts, steps, evidence + ids, payloads, and the duplicated root Patch Trail are omitted +- Use `mode=get`, `trajectory_get`, or `detail_level=full` for complete + statements, subjects, agent facets, contracts, evidence, and payloads ## When NOT to use memory diff --git a/plugins/cursor-codeclone/skills/codeclone-engineering-memory/SKILL.md b/plugins/cursor-codeclone/skills/codeclone-engineering-memory/SKILL.md index ea9e91dc..53551f1c 100644 --- a/plugins/cursor-codeclone/skills/codeclone-engineering-memory/SKILL.md +++ b/plugins/cursor-codeclone/skills/codeclone-engineering-memory/SKILL.md @@ -93,8 +93,15 @@ semantic-quality embeddings — do not present hits as LLM recall. - **Never** use project root as memory scope (`"."`, `""`, unscoped retrieval) - Compress observations before `record_candidate`: one durable fact, target ≤300 chars; rewrite if >500; hard reject >1000 -- List responses are compact by default — use `mode=get` or `detail_level=full` - for complete statements +- Read compact lanes separately: `records[]` are durable assertions, + `experiences[]` are advisory patterns, `trajectories[]` are bounded examples, + and `coverage` describes evidence availability +- Compact is default: subject lists are bounded with + `subject_count`/`subjects_truncated`; experience diversity uses + `multi_agent`/`dominant_agent_facet`; trajectory contracts, steps, evidence + ids, payloads, and the duplicated root Patch Trail are omitted +- Use `mode=get`, `trajectory_get`, or `detail_level=full` for complete + statements, subjects, agent facets, contracts, evidence, and payloads ## When NOT to use memory diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index fa14dae5..9edb4bee 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -1351,6 +1351,10 @@ def test_mcp_service_help_validates_topic_and_detail() -> None: "list[str]", memory_help["recommended_tools"], ) + memory_points = str(memory_help["key_points"]) + assert "subject_count+subjects_truncated" in memory_points + assert "dominant_agent_facet" in memory_points + assert "duplicated root patch_trail_summary" in memory_points def _memory_sync_service_with_run( From 7e17462cb2c2cf88d5c43ff154bd195e6c1642b1 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 23:27:58 +0500 Subject: [PATCH 244/318] test: add CI coverage-uplift tests --- tests/test_coverage_ci_uplift.py | 1026 ++++++++++++++++++++++++++++++ 1 file changed, 1026 insertions(+) create mode 100644 tests/test_coverage_ci_uplift.py diff --git a/tests/test_coverage_ci_uplift.py b/tests/test_coverage_ci_uplift.py new file mode 100644 index 00000000..674f2305 --- /dev/null +++ b/tests/test_coverage_ci_uplift.py @@ -0,0 +1,1026 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import os +from dataclasses import replace +from pathlib import Path +from typing import cast + +import pytest + +from codeclone.audit.analysis_completed import _sequence +from codeclone.audit.events import AuditEvent, event_core_for_event +from codeclone.audit.writer import _event_core_json +from codeclone.cache.integrity import read_json_document +from codeclone.config.intent_registry import IntentRegistryConfigError +from codeclone.config.memory import IngestConfig +from codeclone.contracts import ExitCode +from codeclone.memory.experience.store import _facet_kind, _status +from codeclone.memory.ingest.paths import ( + resolve_contract_constants_paths, + resolve_document_link_paths, + resolve_mcp_tool_contradiction_sources, + resolve_mcp_tool_schema_snapshot_path, +) +from codeclone.memory.trajectory.agents import ( + aggregate_agent_rows, + trajectory_agent_label, +) +from codeclone.memory.trajectory.cli_render import ( + render_projection_run, + render_trajectory_agents, + render_trajectory_anomalies, + render_trajectory_detail, + render_trajectory_list, + render_trajectory_search_results, + render_trajectory_status, +) +from codeclone.memory.trajectory.models import ( + Trajectory, + TrajectoryListItem, + TrajectoryOutcome, + TrajectoryProjectionRun, + TrajectoryStep, + TrajectorySubject, +) +from codeclone.surfaces.cli.observability import observability_main +from codeclone.surfaces.mcp.payloads import measure_payload +from codeclone.workspace_intent.gate import ( + HOOK_AUTHORIZE_FOREIGN_ENV, + WorkspaceIntentRegistryUnavailable, + _hook_authorizes_foreign_active, + _include_record_in_hook_cleanup, + list_unclosed_workspace_intents_for_hook_cleanup, +) +from tests.test_workspace_intents import _record +from tests.workspace_intent_gate_helpers import write_workspace_record + + +class _CapturePrinter: + def __init__(self) -> None: + self.lines: list[str] = [] + + def print(self, *objects: object, **kwargs: object) -> None: + self.lines.append(" ".join(str(item) for item in objects)) + + +def _projection_run(*, legacy: int = 0) -> TrajectoryProjectionRun: + return TrajectoryProjectionRun( + id="run-1", + project_id="proj", + repo_root_digest="digest", + projection_version="2", + started_at_utc="2026-01-01T00:00:00Z", + finished_at_utc="2026-01-01T00:01:00Z", + status="ok", + workflows_seen=2, + trajectories_created=1, + trajectories_updated=0, + trajectories_unchanged=1, + legacy_event_count=legacy, + message=None, + ) + + +def _trajectory(*, outcome: str = "accepted", agent: bool = True) -> Trajectory: + subjects = ( + ( + TrajectorySubject( + subject_kind="agent", + subject_key="cursor-vscode/1.0.0", + relation="actor", + ), + ) + if agent + else () + ) + return Trajectory( + id="traj-1", + project_id="proj", + repo_root_digest="digest", + workflow_id="intent:intent-a-001", + intent_id="intent-a", + primary_run_id="run1234567890abcdef", + first_run_id="run1234567890abcdef", + last_run_id="run1234567890abcdef", + report_digest="a" * 64, + outcome=cast(TrajectoryOutcome, outcome), + quality_tier="verified", + quality_score=90, + labels=(), + summary="workflow summary", + trajectory_digest="b" * 64, + source_event_stream_digest="c" * 64, + projection_version="2", + event_count=2, + step_count=2, + incident_count=1, + started_at_utc="2026-01-01T00:00:00Z", + finished_at_utc="2026-01-01T00:01:00Z", + projected_at_utc="2026-01-01T00:01:00Z", + updated_at_utc="2026-01-01T00:01:00Z", + steps=( + TrajectoryStep( + step_index=0, + audit_sequence=1, + event_id="evt-1", + event_type="intent.declared", + status="active", + run_id="run1234567890abcdef", + report_digest=None, + event_core_sha256="d" * 64, + event_core_json="{}", + summary="declared", + created_at_utc="2026-01-01T00:00:00Z", + ), + ), + subjects=subjects, + evidence=(), + ) + + +def test_observability_cli_help_and_stdout_trace( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + assert observability_main([]) == int(ExitCode.CONTRACT_ERROR) + assert "trace" in capsys.readouterr().out + + from codeclone.config.observability import ObservabilityConfig + from codeclone.observability import bootstrap, operation, shutdown + from codeclone.observability.models import OperationRecord + from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, + ) + from codeclone.observability.store.writer import write_operation + + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + try: + with operation(name="cli.analyze", surface="cli"): + pass + finally: + shutdown() + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="op-1", + correlation_id="corr", + surface="cli", + name="cli.analyze", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=1.0, + status="ok", + spans=(), + ), + ) + finally: + conn.close() + + code = observability_main(["trace", "--root", str(tmp_path)]) + out = capsys.readouterr().out + assert code == int(ExitCode.SUCCESS) + assert '"operation_tree"' in out + + +def test_measure_payload_handles_unserializable_values() -> None: + class _Bad: + def __str__(self) -> str: + raise TypeError("nope") + + bytes_size, tokens = measure_payload({"bad": _Bad()}) + assert bytes_size == 0 + assert tokens == 0 + + +def test_cache_integrity_read_json_document_forwards_max_bytes(tmp_path: Path) -> None: + path = tmp_path / "doc.json" + path.write_text('{"ok": true}', encoding="utf-8") + assert read_json_document(path, max_bytes=64) == {"ok": True} + + +def test_analysis_completed_sequence_helper() -> None: + assert _sequence("not-a-list") == () + assert _sequence([1, 2]) == (1, 2) + assert _sequence(42) == () + + +def test_event_core_json_fallback_on_canonical_failure( + monkeypatch: pytest.MonkeyPatch, +) -> None: + calls = {"count": 0} + + def _canonical_or_fallback(payload: object) -> str: + calls["count"] += 1 + if calls["count"] == 1: + raise TypeError("cannot serialize") + return json.dumps(payload, sort_keys=True, separators=(",", ":")) + + monkeypatch.setattr( + "codeclone.audit.writer._canonical_json", + _canonical_or_fallback, + ) + event = AuditEvent( + event_type="intent.declared", + severity="info", + repo_root_digest="digest", + agent_pid=1, + agent_label="agent", + status="active", + payload={}, + ) + payload = json.loads(_event_core_json(event)) + assert payload["truncated"] is True + assert payload["event_type"] == "intent.declared" + assert event_core_for_event(event)["event_type"] == "intent.declared" + + +def test_workspace_hook_cleanup_resolves_env_pid( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_intent_pid.is_agent_pid_alive", + lambda _pid: True, + ) + own_pid = os.getpid() + own = replace( + _record(intent_id="intent-own-env-001", status="active"), + agent_pid=own_pid, + agent_start_epoch=42, + ) + write_workspace_record(tmp_path, own) + monkeypatch.setenv("CODECLONE_HOOK_OWN_AGENT_PID", str(own_pid)) + monkeypatch.setenv("CODECLONE_HOOK_OWN_AGENT_START_EPOCH", "42") + + unclosed = list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) + + assert len(unclosed) == 1 + assert unclosed[0].intent_id == "intent-own-env-001" + + +def test_workspace_hook_cleanup_registry_unavailable( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + def _boom(_root: Path) -> object: + raise ValueError("broken registry") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate.resolve_intent_registry_config", + _boom, + ) + with pytest.raises(WorkspaceIntentRegistryUnavailable, match="broken registry"): + list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) + + +def test_workspace_hook_include_record_edges() -> None: + from codeclone.surfaces.mcp._workspace_intent_lifecycle import utc_now + + recoverable = replace( + _record(intent_id="intent-rec-001", status="active"), + agent_pid=os.getpid() + 5000, + agent_label="cursor-vscode/dead", + ) + now = utc_now() + assert ( + _include_record_in_hook_cleanup( + recoverable, + own_pid=os.getpid(), + own_start_epoch=1, + recoverable_agent_label_prefix=None, + include_foreign=False, + now=now, + ) + is False + ) + assert ( + _include_record_in_hook_cleanup( + recoverable, + own_pid=os.getpid(), + own_start_epoch=1, + recoverable_agent_label_prefix="cursor-vscode/", + include_foreign=False, + now=now, + ) + is True + ) + + +def test_hook_authorizes_foreign_active_env_values( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv(HOOK_AUTHORIZE_FOREIGN_ENV, raising=False) + assert _hook_authorizes_foreign_active() is True + monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "maybe") + assert _hook_authorizes_foreign_active() is False + monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "off") + assert _hook_authorizes_foreign_active() is False + + +def test_experience_store_private_validators() -> None: + with pytest.raises(ValueError, match="unknown experience facet kind"): + _facet_kind("not-a-facet") + with pytest.raises(ValueError, match="unknown experience status"): + _status("archived") + + +def test_trajectory_agents_aggregate_covers_failed_and_anomalies() -> None: + assert trajectory_agent_label(_trajectory(agent=False)) is None + violated = _trajectory(outcome="violated") + rows = aggregate_agent_rows( + (violated,), + anomaly_by_id={"traj-1": ()}, + ) + assert rows[0].failed_outcome_count == 1 + assert rows[0].anomaly_count == 0 + assert rows[0].intent_count == 1 + + +def test_trajectory_cli_render_populated_and_empty_paths() -> None: + printer = _CapturePrinter() + render_trajectory_status( + console=printer, + enabled=True, + count=1, + latest_run=_projection_run(legacy=3), + ) + assert any("legacy events" in line for line in printer.lines) + + printer = _CapturePrinter() + render_projection_run(console=printer, run=_projection_run(legacy=2)) + assert any("legacy audit events" in line for line in printer.lines) + + printer = _CapturePrinter() + render_trajectory_list(console=printer, items=[]) + assert printer.lines == ["No trajectories found."] + + item = TrajectoryListItem( + id="traj-1", + workflow_id="intent:a", + outcome="accepted", + quality_tier="verified", + quality_score=90, + event_count=2, + started_at_utc="2026-01-01T00:00:00Z", + finished_at_utc="2026-01-01T00:01:00Z", + summary="summary", + ) + printer = _CapturePrinter() + render_trajectory_list(console=printer, items=[item]) + assert any("traj-1" in line for line in printer.lines) + + printer = _CapturePrinter() + render_trajectory_search_results( + console=printer, + query="recover", + trajectories=[], + ) + assert any("No matching trajectories" in line for line in printer.lines) + + printer = _CapturePrinter() + render_trajectory_agents(console=printer, payload={"agents": []}) + assert any("No agent-labeled" in line for line in printer.lines) + + printer = _CapturePrinter() + render_trajectory_agents( + console=printer, + payload={ + "agent_count": 1, + "trajectory_count": 1, + "unlabeled_trajectory_count": 0, + "agents": [ + "not-a-mapping", + {"agent_label": "agent", "trajectory_count": 1}, + ], + }, + ) + assert any("agent" in line for line in printer.lines) + + printer = _CapturePrinter() + render_trajectory_anomalies( + console=printer, + payload={ + "summary": { + "trajectories_with_anomalies": 1, + "anomaly_count": 1, + "error_count": 1, + "warn_count": 0, + }, + "trajectories": [ + "skip", + { + "trajectory_id": "traj-1", + "agent_label": "agent", + "outcome": "violated", + "quality_tier": "incident", + "anomalies": [ + "skip", + { + "severity": "error", + "kind": "scope_violation", + "message": "bad scope", + }, + ], + }, + ], + }, + ) + assert any("scope_violation" in line for line in printer.lines) + + printer = _CapturePrinter() + trajectory = _trajectory() + render_trajectory_detail(console=printer, trajectory=trajectory) + joined = "\n".join(printer.lines) + assert "workflow summary" in joined + assert "labels:" not in joined + + +def test_ingest_path_resolvers_skip_missing_and_escape(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + ingest = IngestConfig( + contract_constants_paths=("missing/contracts.py",), + document_link_paths=("../escape.md",), + mcp_tool_schema_snapshot_path="missing-tools.json", + mcp_tool_count_doc_paths=("missing-doc.md",), + ) + assert ( + resolve_contract_constants_paths( + root_path=root, + registry_paths=frozenset(), + ingest=ingest, + ) + == () + ) + assert ( + resolve_document_link_paths( + root_path=root, + registry_paths=frozenset({"docs/book/01.md"}), + ingest=ingest, + ) + == () + ) + assert resolve_mcp_tool_schema_snapshot_path(root_path=root, ingest=ingest) is None + assert resolve_mcp_tool_contradiction_sources(root_path=root, ingest=ingest) is None + + +def test_intent_registry_path_outside_repo_raises(tmp_path: Path) -> None: + from codeclone.config.intent_registry import resolve_intent_registry_db_path + + root = tmp_path / "repo" + root.mkdir() + outside = (tmp_path / "outside" / "intents.sqlite3").resolve() + with pytest.raises(IntentRegistryConfigError, match="relative to the repository"): + resolve_intent_registry_db_path( + root_path=root, + value=str(outside), + ) + + +def test_core_worker_signature_cache_handles_broken_callable() -> None: + from codeclone.core import worker as core_worker + + core_worker._supported_process_file_kwarg_names.cache_clear() + + def _broken(*_args: object, **_kwargs: object) -> object: + return None + + assert core_worker._supported_process_file_kwarg_names(_broken) is None + core_worker._supported_process_file_kwarg_names.cache_clear() + + +def test_measure_payload_estimate_failure_uses_char_fallback( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def _boom(_payload: object) -> object: + raise TypeError("estimate failed") + + monkeypatch.setattr( + "codeclone.surfaces.mcp.payloads.estimate_payload", + _boom, + ) + byte_size, tokens = measure_payload({"ok": True}) + assert byte_size > 0 + assert tokens > 0 + + +def test_observability_cli_missing_store_and_file_outputs( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + empty_root = tmp_path / "empty" + empty_root.mkdir() + code = observability_main(["trace", "--root", str(empty_root)]) + assert code == int(ExitCode.SUCCESS) + assert "No observability store" in capsys.readouterr().out + + from codeclone.observability.models import OperationRecord + from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, + ) + from codeclone.observability.store.writer import write_operation + + repo = tmp_path / "repo" + repo.mkdir() + conn = open_observability_store(observability_store_path(repo)) + try: + write_operation( + conn, + OperationRecord( + operation_id="op-cli", + correlation_id="op-cli", + surface="cli", + name="cli.analyze", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=1.0, + status="ok", + spans=(), + ), + ) + finally: + conn.close() + + json_path = tmp_path / "trace.json" + html_path = tmp_path / "trace.html" + code = observability_main( + [ + "trace", + "--root", + str(repo), + "--json", + str(json_path), + "--html", + str(html_path), + ] + ) + out = capsys.readouterr().out + assert code == int(ExitCode.SUCCESS) + assert json_path.is_file() + assert html_path.is_file() + assert f"Wrote {json_path}" in out + assert f"Wrote {html_path}" in out + + +def test_render_html_format_helpers_and_semantic_row() -> None: + from codeclone.observability.render_html import _bytes, _mb, _semantic_row, _tokens + from codeclone.observability.views import SpanCostView + + assert _mb(None) == "—" + assert "GB" in _mb(2048.0) + assert "MB" in _mb(512.0) + assert _bytes(None) == "—" + assert "MB" in _bytes(1024 * 1024) + assert "KB" in _bytes(2048) + assert _bytes(12).endswith(" B") + assert _tokens(None) == "—" + assert _tokens(0) == "—" + assert _tokens(1500).endswith("k") + + costly = SpanCostView( + span_id="s1", + name="memory.semantic.reindex", + surface="memory", + operation_id="op", + operation_name="memory.projection.job", + duration_ms=6000.0, + no_op=True, + reason_kind="schema_version_changed", + ) + costly_html = _semantic_row(costly) + assert "no-op · costly" in costly_html + assert "schema_version_changed" in costly_html + + noop = replace(costly, duration_ms=10.0) + assert "no-op" in _semantic_row(noop) + assert "costly" not in _semantic_row(noop) + + productive = replace(noop, no_op=False, reason_kind=None) + assert "productive" in _semantic_row(productive) + + +def test_observability_reader_epoch_ms_and_empty_correlation_filter( + tmp_path: Path, +) -> None: + from codeclone.observability.store.reader import _by_correlations, _epoch_ms + from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, + ) + + assert _epoch_ms("") == 0.0 + assert _epoch_ms("not-a-date") == 0.0 + assert _epoch_ms("2026-01-01T00:00:00Z") > 0.0 + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + assert _by_correlations(conn, []) == [] + finally: + conn.close() + + +def test_pyproject_loader_symlink_and_invalid_ingest_table(tmp_path: Path) -> None: + from codeclone.config.pyproject_loader import ( + ConfigValidationError, + _validate_nested_ingest_table, + load_pyproject_config, + open_repo_config, + ) + + broken = tmp_path / "pyproject.toml" + broken.symlink_to(tmp_path / "missing.toml") + with pytest.raises(ConfigValidationError, match="must not be a symlink"): + load_pyproject_config(tmp_path) + + real = tmp_path / "real.toml" + real.write_text("[tool.codeclone]\n", encoding="utf-8") + broken.unlink() + link = tmp_path / "pyproject.toml" + link.symlink_to(real) + with pytest.raises(ConfigValidationError, match="must not be a symlink"): + open_repo_config(tmp_path) + + with pytest.raises(ConfigValidationError, match="must be object"): + _validate_nested_ingest_table( + ingest_obj="not-a-table", + config_path=tmp_path / "pyproject.toml", + ) + + +def test_resolve_semantic_index_writer_disabled_returns_none() -> None: + from codeclone.config.memory import SemanticConfig + from codeclone.memory.semantic import resolve_semantic_index_writer + + assert resolve_semantic_index_writer(SemanticConfig(enabled=False)) is None + + +def test_semantic_retrieval_hydrate_trajectory_edges() -> None: + from codeclone.memory.retrieval.semantic import _hydrate_trajectory + from codeclone.memory.semantic.models import SemanticHit + + hit = SemanticHit(source_id="traj-1", source="trajectory", score=0.4) + + class _StoreWithoutTrajectoryApi: + pass + + assert _hydrate_trajectory(hit, _StoreWithoutTrajectoryApi(), 80) is None + + class _StoreMissingTrajectory: + def find_trajectory(self, _trajectory_id: str) -> None: + return None + + assert _hydrate_trajectory(hit, _StoreMissingTrajectory(), 80) is None + + +def test_execute_trajectory_rebuild_incremental_mode(tmp_path: Path) -> None: + from codeclone.config.memory import resolve_memory_config + from codeclone.memory.trajectory.rebuild_workflow import execute_trajectory_rebuild + + from .memory_fixtures import memory_store, seed_trajectory_audit_workflow + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = root / ".codeclone" / "db" / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + config = resolve_memory_config(root) + full = execute_trajectory_rebuild( + root_path=root, + config=config, + store=store, + project=project, + ) + assert full["status"] == "ok" + assert full["mode"] == "full" + incremental = execute_trajectory_rebuild( + root_path=root, + config=config, + store=store, + project=project, + incremental_after_event_core_id=1, + ) + assert incremental["status"] == "ok" + assert incremental["mode"] == "incremental" + + +def test_memory_state_path_validation_errors(tmp_path: Path) -> None: + from codeclone.config.memory import _resolve_memory_state_path + + root = tmp_path / "repo" + root.mkdir() + with pytest.raises(TypeError, match="must resolve to a string path"): + _resolve_memory_state_path( + key="memory.semantic.index_path", + value=123, + root_path=root, + ) + with pytest.raises(ValueError, match="must stay under the repository root"): + _resolve_memory_state_path( + key="memory.semantic.index_path", + value="../outside.lance", + root_path=root, + ) + + +def test_hook_authorizes_foreign_active_truthy_env( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "yes") + assert _hook_authorizes_foreign_active() is True + + +def test_hydrate_trajectory_hits_detail_levels(tmp_path: Path) -> None: + from codeclone.memory.retrieval import service as retrieval_service + from codeclone.memory.semantic.models import SemanticHit + + from .memory_fixtures import memory_store, seed_trajectory_audit_workflow + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + trajectory = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + hit = SemanticHit(source_id=trajectory.id, source="trajectory", score=0.5) + compact = retrieval_service._hydrate_trajectory_hits( + store, + project_id=project.id, + hits=[hit], + detail_level="compact", + ) + full = retrieval_service._hydrate_trajectory_hits( + store, + project_id=project.id, + hits=[hit], + detail_level="full", + ) + assert compact and full + assert compact[0]["semantic_score"] == 0.5 + assert full[0]["semantic_score"] == 0.5 + assert "steps" in full[0] + + +def test_mcp_payload_paginate_and_finding_resolution() -> None: + from codeclone.surfaces.mcp.payloads import ( + PageWindow, + paginate, + resolve_finding_id, + short_id, + ) + + window = paginate([1, 2, 3, 4], offset=1, limit=2, max_limit=10) + assert isinstance(window, PageWindow) + assert window.items == [2, 3] + assert window.next_offset == 3 + + tail = paginate([9], offset=0, limit=5, max_limit=10) + assert tail.next_offset is None + + canonical = {"finding-abcdef12": "short"} + assert ( + resolve_finding_id( + canonical_to_short=canonical, + short_to_canonical={"short": "finding-abcdef12"}, + finding_id="finding-abcdef12", + ) + == "finding-abcdef12" + ) + assert ( + resolve_finding_id( + canonical_to_short=canonical, + short_to_canonical={"short": "finding-abcdef12"}, + finding_id="short", + ) + == "finding-abcdef12" + ) + assert ( + resolve_finding_id( + canonical_to_short=canonical, + short_to_canonical={}, + finding_id="missing", + ) + is None + ) + assert short_id("finding-abcdef12", length=8) == "finding-" + + +def test_workspace_hook_cleanup_sqlite_load_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + class _Config: + backend = "sqlite" + storage_path = Path(".codeclone/db/intents.sqlite3") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate.resolve_intent_registry_config", + lambda _root: _Config(), + ) + + def _load_fail(*_args: object, **_kwargs: object) -> object: + raise OSError("cannot read sqlite") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate._load_registry_records_read_only", + _load_fail, + ) + with pytest.raises(WorkspaceIntentRegistryUnavailable, match="cannot read sqlite"): + list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) + + +def test_workspace_ownership_authorizes_foreign_active( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.mcp import _workspace_intents as workspace_intents + from codeclone.workspace_intent import gate as gate_mod + + monkeypatch.setattr(gate_mod, "_hook_authorizes_foreign_active", lambda: True) + assert ( + gate_mod._ownership_authorizes_hook( + workspace_intents.IntentOwnership.FOREIGN_ACTIVE, + liveness=workspace_intents.PidLiveness.ALIVE, + ) + is True + ) + monkeypatch.setattr(gate_mod, "_hook_authorizes_foreign_active", lambda: False) + assert ( + gate_mod._ownership_authorizes_hook( + workspace_intents.IntentOwnership.FOREIGN_ACTIVE, + liveness=workspace_intents.PidLiveness.ALIVE, + ) + is False + ) + + +def test_agent_pid_liveness_honors_monkeypatched_boolean_probe( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.mcp import _workspace_intent_pid as pid_mod + from codeclone.surfaces.mcp._workspace_intent_lifecycle import PidLiveness + + monkeypatch.setattr(pid_mod, "is_agent_pid_alive", lambda _pid: False) + assert pid_mod.agent_pid_liveness(123) is PidLiveness.DEAD + + +def test_record_elapsed_span_noop_without_active_operation(tmp_path: Path) -> None: + from codeclone.config.observability import ObservabilityConfig + from codeclone.observability import bootstrap, record_elapsed_span, shutdown + + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + try: + record_elapsed_span( + "orphan-span", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=1.0, + ) + finally: + shutdown() + + +def test_staleness_anchor_drift_status_edges(tmp_path: Path) -> None: + from codeclone.memory.models import MemorySubject, generate_memory_id + from codeclone.memory.staleness import _evaluate_anchor_drift_status + + from .memory_fixtures import make_module_record, memory_store + + with memory_store(tmp_path) as (root, project, store, _db_path): + record = replace( + make_module_record(project.id, "pkg.mod"), + created_at_commit="abc123", + code_fingerprint="fp-1", + status="active", + ) + store.upsert_record(record) + subject = MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record.id, + subject_kind="path", + subject_key="pkg/missing.py", + relation="about", + ) + store.write_subject(subject) + assert ( + _evaluate_anchor_drift_status( + record, + anchor_subject=subject, + root_path=root, + ) + == "historical" + ) + historical = replace(record, status="historical") + assert ( + _evaluate_anchor_drift_status( + historical, + anchor_subject=subject, + root_path=root, + ) + is None + ) + stale_record = replace( + record, status="stale", stale_reason="subject_fingerprint_drift" + ) + assert ( + _evaluate_anchor_drift_status( + stale_record, + anchor_subject=subject, + root_path=root, + ) + == "historical" + ) + + +def test_instance_methods_decorator_and_base_name_fallbacks() -> None: + import ast + + import codeclone.findings.design.instance_methods as instance_methods_mod + + assert instance_methods_mod._simple_decorator_name(ast.Constant(value=1)) == "" + assert instance_methods_mod._simple_base_name(ast.Constant(value=1)) == "" + + +def test_workflow_audit_emit_and_digest_helpers( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + import sys + + from codeclone.surfaces.cli import workflow as cli_workflow + + class _Args: + audit_enabled = True + + cli_workflow._emit_cli_analysis_completed_if_enabled( + args=_Args(), + root_path=tmp_path, + report_document="not-a-dict", + new_func_count=0, + new_block_count=0, + ) + cli_workflow._emit_cli_analysis_completed_if_enabled( + args=_Args(), + root_path=tmp_path, + report_document={"integrity": {"digest": {"value": ""}}}, + new_func_count=0, + new_block_count=0, + ) + + def _boom(**_kwargs: object) -> None: + raise RuntimeError("audit unavailable") + + monkeypatch.setattr( + "codeclone.audit.analysis_completed.emit_analysis_completed_from_report", + _boom, + ) + cli_workflow._emit_cli_analysis_completed_if_enabled( + args=_Args(), + root_path=tmp_path, + report_document={"integrity": {"digest": {"value": "a" * 64}}}, + new_func_count=1, + new_block_count=0, + ) + + assert cli_workflow._report_digest_from_document({}) == "" + assert ( + cli_workflow._report_digest_from_document( + {"integrity": {"digest": "not-a-mapping"}} + ) + == "" + ) + + monkeypatch.setattr(sys, "argv", ["codeclone", "observability"]) + with pytest.raises(SystemExit): + cli_workflow.main() + monkeypatch.setattr(sys, "argv", ["codeclone", "memory", "--help"]) + with pytest.raises(SystemExit): + cli_workflow.main() + + +def test_observability_profile_open_fds_degrades_gracefully( + monkeypatch: pytest.MonkeyPatch, +) -> None: + import sys + from unittest.mock import MagicMock + + from codeclone.observability.profile import build_profile_sample + + process = MagicMock() + process.memory_info.return_value = MagicMock(rss=1024 * 1024) + process.cpu_times.return_value = MagicMock(user=0.1, system=0.2) + process.num_fds.side_effect = OSError("unsupported") + process.num_threads.return_value = 3 + mock_psutil = MagicMock() + mock_psutil.Process.return_value = process + monkeypatch.setitem(sys.modules, "psutil", mock_psutil) + + sample = build_profile_sample((512 * 1024, 0.0, 0.0)) + assert sample is not None + assert sample.open_fds is None From d4c202405e489b2d3f5e6038b6927bf332cc4717 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 11 Jun 2026 23:54:35 +0500 Subject: [PATCH 245/318] feat(core): add literal-free SQL fingerprint normalizer (29.DB Track B) Pure helper: reduce a SQL statement to its normalized shape (string/number literals -> ?, collapsed IN/VALUES placeholder lists), a table hint, and a kind. Idempotent on its own output, so a persisted fingerprint re-parses for table_hint/kind without storing them. Not wired into the trace sink yet. --- codeclone/observability/db_fingerprint.py | 77 ++++++++++++++++++ tests/test_observability_db_fingerprint.py | 90 ++++++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 codeclone/observability/db_fingerprint.py create mode 100644 tests/test_observability_db_fingerprint.py diff --git a/codeclone/observability/db_fingerprint.py b/codeclone/observability/db_fingerprint.py new file mode 100644 index 00000000..de3109f8 --- /dev/null +++ b/codeclone/observability/db_fingerprint.py @@ -0,0 +1,77 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""SQL statement fingerprinting for DB observability (Phase 29.DB, Track B). + +Performance-truth only: reduce a SQL statement to its normalized *shape* so the +cockpit can turn "1892 queries" into "1200x SELECT evidence by trajectory_id". +The fingerprint is literal-free by construction — every string/number value is +replaced with ``?`` — so it is safe to persist without leaking row data. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +# Bound the persisted shape; pathological statements must not bloat the column. +_MAX_FINGERPRINT_CHARS = 200 + +_WHITESPACE_RE = re.compile(r"\s+") +# Single-quoted string literal with doubled-quote ('') escapes; unrolled so it +# stays linear-time (no nested quantifier to backtrack on). +_STRING_RE = re.compile(r"'[^']*(?:''[^']*)*'") +_HEX_RE = re.compile(r"\b0x[0-9a-f]+\b") +_NUMBER_RE = re.compile(r"\b\d+(?:\.\d+)?\b") +# ``( ?, ?, ? )`` / ``( ? )`` -> ``(?)`` so IN/VALUES arity does not fan out +# distinct shapes for the same statement. +_PLACEHOLDER_LIST_RE = re.compile(r"\(\s*\?(?:\s*,\s*\?)*\s*\)") +# First identifier after a table-introducing keyword. +_TABLE_HINT_RE = re.compile(r"\b(?:from|into|update|join)\s+([a-z_][a-z0-9_$]*)") + +_KINDS = frozenset({"select", "insert", "update", "delete"}) + + +@dataclass(frozen=True, slots=True) +class SqlFingerprint: + """Normalized shape of one SQL statement (literal-free).""" + + fingerprint: str + table_hint: str | None + kind: str # select | insert | update | delete | other + + +def _normalize(sql: str) -> str: + normalized = _WHITESPACE_RE.sub(" ", sql.strip().lower()) + normalized = _STRING_RE.sub("?", normalized) + normalized = _HEX_RE.sub("?", normalized) + normalized = _NUMBER_RE.sub("?", normalized) + normalized = _PLACEHOLDER_LIST_RE.sub("(?)", normalized) + return normalized.strip() + + +def fingerprint_sql(sql: str) -> SqlFingerprint: + """Reduce a SQL statement to its literal-free shape, table hint, and kind. + + Idempotent on its own output: fingerprinting an already-normalized statement + returns the same shape, so a persisted fingerprint can be re-parsed for its + table hint and kind without storing them separately. + """ + normalized = _normalize(sql) + if not normalized: + return SqlFingerprint(fingerprint="", table_hint=None, kind="other") + head = normalized.split(" ", 1)[0] + kind = head if head in _KINDS else "other" + table_match = _TABLE_HINT_RE.search(normalized) + table_hint = table_match.group(1) if table_match else None + return SqlFingerprint( + fingerprint=normalized[:_MAX_FINGERPRINT_CHARS], + table_hint=table_hint, + kind=kind, + ) + + +__all__ = ["SqlFingerprint", "fingerprint_sql"] diff --git a/tests/test_observability_db_fingerprint.py b/tests/test_observability_db_fingerprint.py new file mode 100644 index 00000000..970282ed --- /dev/null +++ b/tests/test_observability_db_fingerprint.py @@ -0,0 +1,90 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import pytest + +from codeclone.observability.db_fingerprint import SqlFingerprint, fingerprint_sql + + +@pytest.mark.parametrize( + ("sql", "expected_fp", "table_hint", "kind"), + [ + ( + "SELECT * FROM memory_evidence WHERE memory_id = 'abc'", + "select * from memory_evidence where memory_id = ?", + "memory_evidence", + "select", + ), + ( + "select *\n from memory_records where id = 42", + "select * from memory_records where id = ?", + "memory_records", + "select", + ), + ( + "INSERT INTO memory_subjects (a, b) VALUES (?, ?, ?)", + "insert into memory_subjects (a, b) values (?)", + "memory_subjects", + "insert", + ), + ( + "UPDATE platform_spans SET counters_json = '{}' WHERE span_id = 'x'", + "update platform_spans set counters_json = ? where span_id = ?", + "platform_spans", + "update", + ), + ( + "DELETE FROM memory_links WHERE id IN (1, 2, 3)", + "delete from memory_links where id in (?)", + "memory_links", + "delete", + ), + ( + "SELECT e.* FROM memory_evidence e " + "JOIN memory_records r ON r.id = e.memory_id", + "select e.* from memory_evidence e " + "join memory_records r on r.id = e.memory_id", + "memory_evidence", + "select", + ), + ("PRAGMA query_only = ON", "pragma query_only = on", None, "other"), + ], +) +def test_fingerprint_sql_shapes( + sql: str, expected_fp: str, table_hint: str | None, kind: str +) -> None: + assert fingerprint_sql(sql) == SqlFingerprint( + fingerprint=expected_fp, table_hint=table_hint, kind=kind + ) + + +def test_fingerprint_strips_numbers_and_hex() -> None: + fp = fingerprint_sql("select * from t where a = 3.14 and b = 0xFF") + assert fp.fingerprint == "select * from t where a = ? and b = ?" + assert fp.table_hint == "t" + + +def test_fingerprint_is_idempotent_on_its_own_output() -> None: + once = fingerprint_sql("SELECT * FROM memory_evidence WHERE memory_id IN (10, 20)") + twice = fingerprint_sql(once.fingerprint) + assert twice == once + assert once.table_hint == "memory_evidence" + + +def test_fingerprint_empty_sql() -> None: + assert fingerprint_sql(" \n ") == SqlFingerprint( + fingerprint="", table_hint=None, kind="other" + ) + + +def test_fingerprint_caps_length() -> None: + long_sql = "select " + ", ".join(f"col{i}" for i in range(200)) + " from big_table" + fp = fingerprint_sql(long_sql) + assert len(fp.fingerprint) <= 200 + assert fp.kind == "select" + assert fp.table_hint == "big_table" From 8769e1279908e95ea537dd20a32005fd13704863 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 00:01:00 +0500 Subject: [PATCH 246/318] feat(core): capture per-span SQL query fingerprints (29.DB Track B) record_db_query now also derives the literal-free shape of each statement, and SpanHandle flushes the top-8 shapes into a new platform_spans.db_fingerprints column. The cockpit can then decompose a span's query total into named shapes ('1200x SELECT evidence by memory_id') instead of a bare count. An additive ALTER-COLUMN migration keeps older stores readable; db_writes semantics (via _classify_sql, incl. REPLACE) are unchanged. --- codeclone/observability/models.py | 2 ++ codeclone/observability/runtime.py | 23 +++++++++++++++++ codeclone/observability/store/schema.py | 16 ++++++++++++ codeclone/observability/store/writer.py | 12 ++++++--- tests/test_observability_correlation.py | 33 +++++++++++++++++++++++++ 5 files changed, 83 insertions(+), 3 deletions(-) diff --git a/codeclone/observability/models.py b/codeclone/observability/models.py index b8c124a1..92e433ac 100644 --- a/codeclone/observability/models.py +++ b/codeclone/observability/models.py @@ -44,6 +44,8 @@ class SpanRecord: reason: str | None = None dedupe_key: str | None = None counters: Mapping[str, int] = field(default_factory=dict) + # Top-N literal-free SQL shapes seen on this span -> occurrence count. + db_fingerprints: Mapping[str, int] = field(default_factory=dict) profile: ProfileSample | None = None diff --git a/codeclone/observability/runtime.py b/codeclone/observability/runtime.py index 9fa8a83f..5fddb4d4 100644 --- a/codeclone/observability/runtime.py +++ b/codeclone/observability/runtime.py @@ -25,9 +25,14 @@ from pathlib import Path from ..config.observability import ObservabilityConfig, resolve_observability_config +from .db_fingerprint import fingerprint_sql from .models import OperationRecord, ProfileSample, SpanRecord from .reason_kind import ReasonKind +# Bound how many distinct SQL shapes a span persists; the diagnostic value is in +# the few high-count statements, not the long tail. +_DB_FINGERPRINT_TOP_N = 8 + _ENABLED: bool = False _RUNTIME: _ActiveRuntime | None = None _CURRENT_OP: ContextVar[OperationHandle | None] = ContextVar("_obs_op", default=None) @@ -139,6 +144,7 @@ def __init__( self._dedupe_key = dedupe_key self._status = "ok" self._counters: dict[str, int] = {} + self._db_fingerprints: dict[str, int] = {} # set_counter is wired by the 29.10 worker instrumentation; add_counter by # the 29.DB query-trace hook (record_db_query). set_reason_kind stays @@ -149,6 +155,19 @@ def add_counter(self, key: str, value: int = 1) -> None: def set_counter(self, key: str, value: int) -> None: self._counters[key] = value + # Wired by the 29.DB query-trace hook (record_db_query): accumulate the + # normalized SQL shape so _to_record can flush the top-N per span. + def add_db_fingerprint(self, fingerprint: str) -> None: + self._db_fingerprints[fingerprint] = ( + self._db_fingerprints.get(fingerprint, 0) + 1 + ) + + def _top_db_fingerprints(self) -> dict[str, int]: + if not self._db_fingerprints: + return {} + ranked = sorted(self._db_fingerprints.items(), key=lambda kv: (-kv[1], kv[0])) + return dict(ranked[:_DB_FINGERPRINT_TOP_N]) + # codeclone: ignore[dead-code] def set_reason_kind(self, reason_kind: ReasonKind) -> None: self._reason_kind = reason_kind @@ -168,6 +187,7 @@ def _to_record( reason=self._reason, dedupe_key=self._dedupe_key, counters=dict(self._counters), + db_fingerprints=self._top_db_fingerprints(), profile=profile, ) @@ -455,6 +475,9 @@ def record_db_query(sql: str) -> None: span_handle.add_counter("db_queries", 1) if _classify_sql(sql) in _DB_WRITE_KINDS: span_handle.add_counter("db_writes", 1) + fingerprint = fingerprint_sql(sql).fingerprint + if fingerprint: + span_handle.add_db_fingerprint(fingerprint) def instrument_db_connection(conn: sqlite3.Connection) -> None: diff --git a/codeclone/observability/store/schema.py b/codeclone/observability/store/schema.py index 3bbf0f6e..c7c68666 100644 --- a/codeclone/observability/store/schema.py +++ b/codeclone/observability/store/schema.py @@ -62,6 +62,7 @@ reason TEXT, dedupe_key TEXT, counters_json TEXT, + db_fingerprints TEXT, rss_mb REAL, rss_delta_mb REAL, cpu_user_ms REAL, @@ -85,8 +86,23 @@ def observability_store_path(root: Path) -> Path: return root.resolve() / _OBSERVABILITY_DB_RELATIVE +def _ensure_span_columns(conn: sqlite3.Connection) -> None: + """Additive migration for stores created before a span column existed. + + ``CREATE TABLE IF NOT EXISTS`` never alters an existing table, so a store + written by an older build keeps its old shape. This backfills the column + with ``ALTER TABLE ... ADD COLUMN`` (a no-op on fresh stores, which already + have it from ``_SCHEMA``) so writes/reads stay forward-compatible without a + destructive rebuild of disposable telemetry. + """ + existing = {row[1] for row in conn.execute("PRAGMA table_info(platform_spans)")} + if "db_fingerprints" not in existing: + conn.execute("ALTER TABLE platform_spans ADD COLUMN db_fingerprints TEXT") + + def create_observability_schema(conn: sqlite3.Connection) -> None: conn.executescript(_SCHEMA) + _ensure_span_columns(conn) conn.execute( "INSERT OR REPLACE INTO platform_meta(key, value) VALUES('schema_version', ?)", (PLATFORM_OBSERVABILITY_SCHEMA_VERSION,), diff --git a/codeclone/observability/store/writer.py b/codeclone/observability/store/writer.py index 33988466..94af19a3 100644 --- a/codeclone/observability/store/writer.py +++ b/codeclone/observability/store/writer.py @@ -32,9 +32,9 @@ _SPAN_SQL = ( "INSERT OR REPLACE INTO platform_spans(" "span_id, operation_id, parent_span_id, name, started_at_utc, duration_ms, " - "status, reason_kind, reason, dedupe_key, counters_json, rss_mb, " - "rss_delta_mb, cpu_user_ms, cpu_system_ms, open_fds, thread_count) " - "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" + "status, reason_kind, reason, dedupe_key, counters_json, db_fingerprints, " + "rss_mb, rss_delta_mb, cpu_user_ms, cpu_system_ms, open_fds, thread_count) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" ) @@ -78,6 +78,11 @@ def _span_row(span: SpanRecord) -> tuple[object, ...]: counters_json = ( json_text(dict(span.counters), sort_keys=True) if span.counters else None ) + db_fingerprints_json = ( + json_text(dict(span.db_fingerprints), sort_keys=True) + if span.db_fingerprints + else None + ) return ( span.span_id, span.operation_id, @@ -90,6 +95,7 @@ def _span_row(span: SpanRecord) -> tuple[object, ...]: span.reason, span.dedupe_key, counters_json, + db_fingerprints_json, *_profile_cols(span.profile), ) diff --git a/tests/test_observability_correlation.py b/tests/test_observability_correlation.py index 05b41211..ee3740c3 100644 --- a/tests/test_observability_correlation.py +++ b/tests/test_observability_correlation.py @@ -196,6 +196,39 @@ def test_db_query_counter_attaches_to_active_span(tmp_path: Path) -> None: assert counters.get("db_writes", 0) == 2 +def test_db_fingerprints_capture_query_shapes_per_span(tmp_path: Path) -> None: + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + try: + with ( + operation(name="memory.projection.job", surface="memory"), + span(name="memory.experience.distill"), + ): + conn = sqlite3.connect(":memory:") + instrument_db_connection(conn) + conn.execute("CREATE TABLE memory_evidence (memory_id INTEGER)") + # The N+1 the fingerprints are meant to name: the same SELECT shape, + # different literals, run repeatedly. + for memory_id in range(3): + conn.execute( + "SELECT * FROM memory_evidence WHERE memory_id = ?", (memory_id,) + ).fetchall() + conn.close() + finally: + shutdown() + + obs = open_observability_store(observability_store_path(tmp_path)) + try: + row = obs.execute( + "SELECT db_fingerprints FROM platform_spans " + "WHERE name='memory.experience.distill'" + ).fetchone() + finally: + obs.close() + shapes = json.loads(row[0]) if row and row[0] else {} + # Three differently-valued selects collapse to one literal-free shape. + assert shapes.get("select * from memory_evidence where memory_id = ?") == 3 + + def test_instrument_db_connection_is_inert_when_disabled() -> None: # Disabled process: no trace callback, no counting, no error, zero overhead. conn = sqlite3.connect(":memory:") From cf081cc665dc0082906ec1b783d2908f88e781c3 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 00:22:52 +0500 Subject: [PATCH 247/318] feat(html): surface DB query shapes in the cockpit (29.DB Track B.3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aggregate the persisted per-span db_fingerprints into a DbFingerprintRow read-model (reader._db_fingerprints; table_hint re-derived from the stored shape) and render a 'DB query shapes' section in the HTML cockpit. A span's query total now decomposes into named statement shapes ranked by count, so an N+1 reads as '1200x SELECT evidence by memory_id' — concrete to batch and fix. --- codeclone/observability/render_html.py | 30 ++++++++++++++++ codeclone/observability/store/reader.py | 32 +++++++++++++++++ codeclone/observability/views.py | 18 ++++++++++ tests/test_observability_reader.py | 48 ++++++++++++++++++++++++- tests/test_observability_render.py | 26 ++++++++++++++ 5 files changed, 153 insertions(+), 1 deletion(-) diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index 2612b6da..f0abfabc 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -27,6 +27,7 @@ AgentTokenRow, AggregatesView, DbCostRow, + DbFingerprintRow, McpToolAggregate, OperationView, PipelineGroup, @@ -680,6 +681,34 @@ def _db_cost(agg: AggregatesView) -> str: ) +def _db_fingerprint_row(row: DbFingerprintRow) -> str: + table = _esc(row.table_hint) if row.table_hint else "—" + return ( + f'{_esc(row.span_name)}' + f'{_esc(row.fingerprint)}' + f'{table}' + f'{row.count}' + ) + + +def _db_fingerprints(agg: AggregatesView) -> str: + if not agg.db_fingerprints: + return "" + rows = "".join(_db_fingerprint_row(row) for row in agg.db_fingerprints) + headers = ( + ("Span", False), + ("Query shape", False), + ("Table", False), + ("Count", True), + ) + return _section( + "DB query shapes", + _table(headers, rows), + subtitle="The literal-free statement shapes behind the query counts — the " + "high-count rows name the N+1 to fix (batch these reads).", + ) + + def _pipeline_row(group: PipelineGroup) -> str: return ( f'{_esc(group.name)}' @@ -716,6 +745,7 @@ def render_trace_html(trace: TraceView) -> str: + _chain(trace) + _semantic(trace.aggregates) + _db_cost(trace.aggregates) + + _db_fingerprints(trace.aggregates) + _agent(trace.aggregates) + _mcp(trace.aggregates.mcp_tools) + _pipeline_section(trace.aggregates) diff --git a/codeclone/observability/store/reader.py b/codeclone/observability/store/reader.py index c8fc59ed..1f5c55e4 100644 --- a/codeclone/observability/store/reader.py +++ b/codeclone/observability/store/reader.py @@ -21,11 +21,13 @@ import orjson from ...contracts import PLATFORM_OBSERVABILITY_SCHEMA_VERSION +from ..db_fingerprint import fingerprint_sql from ..views import ( AgentTokenRow, AgentView, AggregatesView, DbCostRow, + DbFingerprintRow, McpToolAggregate, OperationView, PipelineGroup, @@ -44,6 +46,7 @@ # they are all present-and-zero the span ran but touched nothing (a no-op). _PRODUCTIVE_COUNTER_KEYS = ("embedded", "workflows_seen", "experiences_distilled") _SEMANTIC_COST_LIMIT = 8 +_DB_FINGERPRINT_ROW_LIMIT = 15 # Waste thresholds: a no-op span is only worth flagging once it has spent time; # an MCP response is "heavy" past these payload sizes. @@ -80,6 +83,9 @@ def _parse_counters(raw: object) -> dict[str, int]: def _span_view(row: sqlite3.Row) -> SpanView: + # sqlite3.Row membership (`x in row`) tests values, so probe column names via + # keys() to stay tolerant of stores written before db_fingerprints existed. + columns = row.keys() return SpanView( span_id=str(row["span_id"]), name=str(row["name"]), @@ -92,6 +98,9 @@ def _span_view(row: sqlite3.Row) -> SpanView: counters=_parse_counters(row["counters_json"]), rss_delta_mb=row["rss_delta_mb"], started_at_utc=str(row["started_at_utc"]), + db_fingerprints=_parse_counters( + row["db_fingerprints"] if "db_fingerprints" in columns else None + ), ) @@ -403,6 +412,28 @@ def _db_costs(flat: list[OperationView]) -> tuple[DbCostRow, ...]: return tuple(sorted(rows, key=lambda r: (-r.total_queries, r.span_name))) +def _db_fingerprints(flat: list[OperationView]) -> tuple[DbFingerprintRow, ...]: + grouped: dict[tuple[str, str], int] = defaultdict(int) + surface_of: dict[str, str] = {} + for op in flat: + for span in op.spans: + for fingerprint, count in span.db_fingerprints.items(): + grouped[(span.name, fingerprint)] += count + surface_of.setdefault(span.name, op.surface) + rows = [ + DbFingerprintRow( + span_name=span_name, + surface=surface_of[span_name], + fingerprint=fingerprint, + table_hint=fingerprint_sql(fingerprint).table_hint, + count=count, + ) + for (span_name, fingerprint), count in grouped.items() + ] + rows.sort(key=lambda r: (-r.count, r.span_name, r.fingerprint)) + return tuple(rows[:_DB_FINGERPRINT_ROW_LIMIT]) + + def _aggregates( flat: list[OperationView], spans_by_op: dict[str, tuple[SpanView, ...]] ) -> AggregatesView: @@ -454,6 +485,7 @@ def _aggregates( waste=_waste(semantic_costs, mcp_tools), heaviest_cpu=heaviest_cpu, pipeline=_pipeline(flat), + db_fingerprints=_db_fingerprints(flat), ) diff --git a/codeclone/observability/views.py b/codeclone/observability/views.py index fd8d4281..259ac351 100644 --- a/codeclone/observability/views.py +++ b/codeclone/observability/views.py @@ -29,6 +29,8 @@ class SpanView: counters: Mapping[str, int] = field(default_factory=dict) rss_delta_mb: float | None = None started_at_utc: str = "" + # Top-N literal-free SQL shapes seen on this span -> occurrence count. + db_fingerprints: Mapping[str, int] = field(default_factory=dict) @dataclass(frozen=True, slots=True) @@ -103,6 +105,20 @@ class DbCostRow: max_queries: int +@dataclass(frozen=True, slots=True) +class DbFingerprintRow: + """One literal-free SQL shape attributed to a span class, with how often it + ran — the decomposition of a span's ``db_queries`` total into named + statements, so an N+1 reads as "1200x SELECT evidence by memory_id" instead + of a bare count. ``table_hint`` is re-derived from the stored shape.""" + + span_name: str + surface: str + fingerprint: str + table_hint: str | None + count: int + + @dataclass(frozen=True, slots=True) class AgentTokenRow: """One MCP tool's cumulative token economics across the window.""" @@ -166,6 +182,7 @@ class AggregatesView: waste: tuple[WasteItem, ...] = () heaviest_cpu: OperationView | None = None pipeline: tuple[PipelineGroup, ...] = () + db_fingerprints: tuple[DbFingerprintRow, ...] = () @dataclass(frozen=True, slots=True) @@ -214,6 +231,7 @@ class TraceView: "AgentView", "AggregatesView", "DbCostRow", + "DbFingerprintRow", "McpToolAggregate", "OperationView", "PipelineGroup", diff --git a/tests/test_observability_reader.py b/tests/test_observability_reader.py index 7b72ae3d..baf70016 100644 --- a/tests/test_observability_reader.py +++ b/tests/test_observability_reader.py @@ -23,7 +23,7 @@ open_observability_store, ) from codeclone.observability.store.writer import write_operation -from codeclone.observability.views import TraceView +from codeclone.observability.views import DbFingerprintRow, TraceView def _seed(tmp_path: Path) -> None: @@ -282,6 +282,52 @@ def test_db_costs_aggregate_per_span(tmp_path: Path) -> None: assert by_name["memory.semantic.reindex"].max_queries == 1306 +def test_db_fingerprints_aggregate_per_shape(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="F", + correlation_id="F", + surface="memory", + name="memory.projection.job", + started_at_utc="2026-06-09T00:00:01Z", + duration_ms=500.0, + status="ok", + spans=( + SpanRecord( + span_id="f1", + operation_id="F", + name="memory.experience.distill", + started_at_utc="2026-06-09T00:00:01Z", + duration_ms=400.0, + status="ok", + db_fingerprints={ + "select * from memory_evidence where memory_id = ?": 1200, + "select * from memory_subjects where id = ?": 500, + }, + ), + ), + ), + ) + finally: + conn.close() + + shapes = _read_trace(tmp_path, correlation_id="F").aggregates.db_fingerprints + + # Ranked by count desc; table_hint is re-derived from the stored shape. Assert + # the whole row at once (a flat run of per-field asserts clones other tests). + assert shapes[0] == DbFingerprintRow( + span_name="memory.experience.distill", + surface="memory", + fingerprint="select * from memory_evidence where memory_id = ?", + table_hint="memory_evidence", + count=1200, + ) + assert shapes[1].table_hint == "memory_subjects" + + def test_waste_ranks_no_op_and_high_payload(tmp_path: Path) -> None: conn = open_observability_store(observability_store_path(tmp_path)) try: diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index 8073d5b0..39c4b95d 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -24,6 +24,7 @@ AgentView, AggregatesView, DbCostRow, + DbFingerprintRow, McpToolAggregate, OperationView, PipelineGroup, @@ -97,6 +98,31 @@ def test_render_trace_html_is_branded() -> None: assert "finish_controlled_change" in html +def test_render_trace_html_shows_db_query_shapes() -> None: + agg = AggregatesView( + operation_count=1, + db_fingerprints=( + DbFingerprintRow( + span_name="memory.experience.distill", + surface="memory", + fingerprint="select * from memory_evidence where memory_id = ?", + table_hint="memory_evidence", + count=1200, + ), + ), + ) + trace = TraceView( + schema_version="1.0", + window_started_at_utc="2026-06-10T04:00:00Z", + window_ended_at_utc="2026-06-10T04:00:01Z", + aggregates=agg, + ) + html = render_trace_html(trace) + assert "DB query shapes" in html + assert "select * from memory_evidence where memory_id = ?" in html + assert "memory_evidence" in html + + def _cockpit_trace() -> TraceView: reindex = SpanView( span_id="sx", From edbb1cd9ee9299039b81cf7e4a706356d8d55b94 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 13:02:44 +0500 Subject: [PATCH 248/318] feat(html): interpret DB query shapes in the cockpit (29.DB) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit describe_fingerprint() parses a normalized fingerprint into kind/table/ where_columns and a one-line predicate summary ('count by repo_root_digest, workflow_id'). The DB query shapes table is now SPAN | TABLE | KIND | COUNT | SHAPE — the interpreted predicate is the headline, the raw statement a bounded secondary line — instead of an unbounded raw-SQL column that overflowed. TOTAL/MAX timing columns remain pending B.2. --- codeclone/observability/db_fingerprint.py | 81 +++++++++++++++++++++- codeclone/observability/render_html.py | 20 ++++-- codeclone/observability/store/reader.py | 25 ++++--- codeclone/observability/views.py | 3 + tests/test_observability_db_fingerprint.py | 75 +++++++++++++++++++- tests/test_observability_reader.py | 2 + tests/test_observability_render.py | 14 +++- 7 files changed, 201 insertions(+), 19 deletions(-) diff --git a/codeclone/observability/db_fingerprint.py b/codeclone/observability/db_fingerprint.py index de3109f8..b9ce496e 100644 --- a/codeclone/observability/db_fingerprint.py +++ b/codeclone/observability/db_fingerprint.py @@ -34,6 +34,16 @@ _KINDS = frozenset({"select", "insert", "update", "delete"}) +# Everything after the first WHERE — the predicate columns live here. +_WHERE_RE = re.compile(r"\bwhere\b(.*)") +# An identifier immediately left of a comparison operator — a filter column. +_WHERE_COLUMN_RE = re.compile( + r"([a-z_][a-z0-9_$.]*)\s*(?:<=|>=|!=|<>|=|<|>|\bin\b|\bis\b|\blike\b)" +) +# The projection list between SELECT and FROM (count(*) / distinct x / columns). +_PROJECTION_RE = re.compile(r"^select\s+(.*?)\s+from\b") +_MAX_WHERE_COLUMNS = 4 + @dataclass(frozen=True, slots=True) class SqlFingerprint: @@ -44,6 +54,21 @@ class SqlFingerprint: kind: str # select | insert | update | delete | other +@dataclass(frozen=True, slots=True) +class SqlShape: + """Human-facing interpretation of a fingerprint for the cockpit. + + ``summary`` reads like "count by repo_root_digest, workflow_id" or + "by memory_id" — the predicate, not the raw SQL, so a query count decodes + into *what it filters on*. + """ + + kind: str + table: str | None + where_columns: tuple[str, ...] + summary: str + + def _normalize(sql: str) -> str: normalized = _WHITESPACE_RE.sub(" ", sql.strip().lower()) normalized = _STRING_RE.sub("?", normalized) @@ -74,4 +99,58 @@ def fingerprint_sql(sql: str) -> SqlFingerprint: ) -__all__ = ["SqlFingerprint", "fingerprint_sql"] +def _where_columns(normalized: str) -> tuple[str, ...]: + match = _WHERE_RE.search(normalized) + if not match: + return () + seen: list[str] = [] + for raw in _WHERE_COLUMN_RE.findall(match.group(1)): + # Strip a table/alias prefix (t.id -> id); keep first-seen order. + column = raw.split(".")[-1] + if column not in seen: + seen.append(column) + return tuple(seen) + + +def _projection(normalized: str) -> str | None: + match = _PROJECTION_RE.match(normalized) + if not match: + return None + columns = match.group(1).strip() + if columns.startswith("count("): + return "count" + if columns.startswith("distinct "): + target = columns[len("distinct ") :].split(",", 1)[0].strip() + return f"distinct {target}" + return None + + +def _summarize(kind: str, normalized: str, where_columns: tuple[str, ...]) -> str: + shown = ", ".join(where_columns[:_MAX_WHERE_COLUMNS]) + if len(where_columns) > _MAX_WHERE_COLUMNS: + shown += ", …" + head = _projection(normalized) or "" + if shown and head: + return f"{head} by {shown}" + if shown: + return f"by {shown}" + if head: + return head + return "all rows" if kind == "select" else "" + + +def describe_fingerprint(fingerprint: str) -> SqlShape: + """Interpret a (normalized or raw) statement into a cockpit-facing shape: + its kind, table, predicate columns, and a one-line ``summary``. + """ + fp = fingerprint_sql(fingerprint) + where_columns = _where_columns(fp.fingerprint) + return SqlShape( + kind=fp.kind, + table=fp.table_hint, + where_columns=where_columns, + summary=_summarize(fp.kind, fp.fingerprint, where_columns), + ) + + +__all__ = ["SqlFingerprint", "SqlShape", "describe_fingerprint", "fingerprint_sql"] diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index f0abfabc..a5214ba6 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -179,6 +179,9 @@ white-space:nowrap} td.t{font-family:var(--font)} th.r,td.r{text-align:right} +.shape{font-family:var(--font);font-size:12.5px} +.sqlraw{font-family:var(--mono);font-size:11px;color:var(--mute);max-width:440px; +overflow:hidden;text-overflow:ellipsis;white-space:nowrap;margin-top:3px} tr.flag td{background:var(--warn-soft)} .muted{color:var(--mute)} .empty{padding:30px;text-align:center;color:var(--mute);font-size:13px} @@ -683,11 +686,15 @@ def _db_cost(agg: AggregatesView) -> str: def _db_fingerprint_row(row: DbFingerprintRow) -> str: table = _esc(row.table_hint) if row.table_hint else "—" + shape = _esc(row.summary) if row.summary else "—" + raw = _esc(row.fingerprint) return ( f'{_esc(row.span_name)}' - f'{_esc(row.fingerprint)}' - f'{table}' - f'{row.count}' + f"{table}" + f'{_esc(row.kind.upper())}' + f'{row.count}' + f'
    {shape}
    ' + f'
    {raw}
    ' ) @@ -697,15 +704,16 @@ def _db_fingerprints(agg: AggregatesView) -> str: rows = "".join(_db_fingerprint_row(row) for row in agg.db_fingerprints) headers = ( ("Span", False), - ("Query shape", False), ("Table", False), + ("Kind", False), ("Count", True), + ("Shape", False), ) return _section( "DB query shapes", _table(headers, rows), - subtitle="The literal-free statement shapes behind the query counts — the " - "high-count rows name the N+1 to fix (batch these reads).", + subtitle="Each query count decoded into what it filters on — the high-count " + "rows name the N+1 to batch. Raw shape is the second line.", ) diff --git a/codeclone/observability/store/reader.py b/codeclone/observability/store/reader.py index 1f5c55e4..48628932 100644 --- a/codeclone/observability/store/reader.py +++ b/codeclone/observability/store/reader.py @@ -21,7 +21,7 @@ import orjson from ...contracts import PLATFORM_OBSERVABILITY_SCHEMA_VERSION -from ..db_fingerprint import fingerprint_sql +from ..db_fingerprint import describe_fingerprint from ..views import ( AgentTokenRow, AgentView, @@ -412,6 +412,21 @@ def _db_costs(flat: list[OperationView]) -> tuple[DbCostRow, ...]: return tuple(sorted(rows, key=lambda r: (-r.total_queries, r.span_name))) +def _fingerprint_row( + span_name: str, surface: str, fingerprint: str, count: int +) -> DbFingerprintRow: + shape = describe_fingerprint(fingerprint) + return DbFingerprintRow( + span_name=span_name, + surface=surface, + fingerprint=fingerprint, + table_hint=shape.table, + count=count, + kind=shape.kind, + summary=shape.summary, + ) + + def _db_fingerprints(flat: list[OperationView]) -> tuple[DbFingerprintRow, ...]: grouped: dict[tuple[str, str], int] = defaultdict(int) surface_of: dict[str, str] = {} @@ -421,13 +436,7 @@ def _db_fingerprints(flat: list[OperationView]) -> tuple[DbFingerprintRow, ...]: grouped[(span.name, fingerprint)] += count surface_of.setdefault(span.name, op.surface) rows = [ - DbFingerprintRow( - span_name=span_name, - surface=surface_of[span_name], - fingerprint=fingerprint, - table_hint=fingerprint_sql(fingerprint).table_hint, - count=count, - ) + _fingerprint_row(span_name, surface_of[span_name], fingerprint, count) for (span_name, fingerprint), count in grouped.items() ] rows.sort(key=lambda r: (-r.count, r.span_name, r.fingerprint)) diff --git a/codeclone/observability/views.py b/codeclone/observability/views.py index 259ac351..769a4052 100644 --- a/codeclone/observability/views.py +++ b/codeclone/observability/views.py @@ -117,6 +117,9 @@ class DbFingerprintRow: fingerprint: str table_hint: str | None count: int + kind: str = "other" + # Human predicate summary, e.g. "count by repo_root_digest, workflow_id". + summary: str = "" @dataclass(frozen=True, slots=True) diff --git a/tests/test_observability_db_fingerprint.py b/tests/test_observability_db_fingerprint.py index 970282ed..a8ad56be 100644 --- a/tests/test_observability_db_fingerprint.py +++ b/tests/test_observability_db_fingerprint.py @@ -8,7 +8,12 @@ import pytest -from codeclone.observability.db_fingerprint import SqlFingerprint, fingerprint_sql +from codeclone.observability.db_fingerprint import ( + SqlFingerprint, + SqlShape, + describe_fingerprint, + fingerprint_sql, +) @pytest.mark.parametrize( @@ -88,3 +93,71 @@ def test_fingerprint_caps_length() -> None: assert len(fp.fingerprint) <= 200 assert fp.kind == "select" assert fp.table_hint == "big_table" + + +@pytest.mark.parametrize( + ("sql", "kind", "table", "where_columns", "summary"), + [ + ( + "select count(*) from controller_events " + "where repo_root_digest = ? and (workflow_id is null or workflow_id = ?)", + "select", + "controller_events", + ("repo_root_digest", "workflow_id"), + "count by repo_root_digest, workflow_id", + ), + ( + "select distinct workflow_id from controller_events " + "where repo_root_digest = ? and id > ? and workflow_id is not null", + "select", + "controller_events", + ("repo_root_digest", "id", "workflow_id"), + "distinct workflow_id by repo_root_digest, id, workflow_id", + ), + ( + "SELECT * FROM memory_evidence WHERE memory_id = 7", + "select", + "memory_evidence", + ("memory_id",), + "by memory_id", + ), + ( + "SELECT * FROM controller_events", + "select", + "controller_events", + (), + "all rows", + ), + ( + "UPDATE memory_records SET status = ? WHERE id = ?", + "update", + "memory_records", + ("id",), + "by id", + ), + ], +) +def test_describe_fingerprint( + sql: str, + kind: str, + table: str, + where_columns: tuple[str, ...], + summary: str, +) -> None: + assert describe_fingerprint(sql) == SqlShape( + kind=kind, table=table, where_columns=where_columns, summary=summary + ) + + +def test_describe_fingerprint_caps_where_columns_in_summary() -> None: + shape = describe_fingerprint( + "select * from t where a = ? and b = ? and c = ? and d = ? and e = ?" + ) + assert shape.where_columns == ("a", "b", "c", "d", "e") + assert shape.summary == "by a, b, c, d, …" + + +def test_describe_fingerprint_insert_has_no_predicate_summary() -> None: + shape = describe_fingerprint("INSERT INTO memory_records (id) VALUES (?)") + assert shape.kind == "insert" + assert shape.summary == "" diff --git a/tests/test_observability_reader.py b/tests/test_observability_reader.py index baf70016..9cda6425 100644 --- a/tests/test_observability_reader.py +++ b/tests/test_observability_reader.py @@ -324,6 +324,8 @@ def test_db_fingerprints_aggregate_per_shape(tmp_path: Path) -> None: fingerprint="select * from memory_evidence where memory_id = ?", table_hint="memory_evidence", count=1200, + kind="select", + summary="by memory_id", ) assert shapes[1].table_hint == "memory_subjects" diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index 39c4b95d..c6b78537 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -108,6 +108,8 @@ def test_render_trace_html_shows_db_query_shapes() -> None: fingerprint="select * from memory_evidence where memory_id = ?", table_hint="memory_evidence", count=1200, + kind="select", + summary="by memory_id", ), ), ) @@ -118,9 +120,15 @@ def test_render_trace_html_shows_db_query_shapes() -> None: aggregates=agg, ) html = render_trace_html(trace) - assert "DB query shapes" in html - assert "select * from memory_evidence where memory_id = ?" in html - assert "memory_evidence" in html + # Interpreted columns (shape/kind/table) plus the raw shape as a secondary line. + for needle in ( + "DB query shapes", + "by memory_id", + ">SELECT<", + "memory_evidence", + "select * from memory_evidence where memory_id = ?", + ): + assert needle in html def _cockpit_trace() -> TraceView: From 5fc5027fca0fc1a172f75fd963db08f6aad32004 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 13:07:37 +0500 Subject: [PATCH 249/318] fix(html): align correlated-chains metric columns The chains rows used a variable last grid column, so per-row meta (reason chip / payload / rss) stole width from the flexible name column and floated the bars and durations around as nesting changed. Give .oprow/.spanrow a fixed 5-column grid [name | bar | dur | mem | extra] with rss in its own mem cell and payload/reason-chip in extra, so every metric column is right- anchored and lines up across depths. Drop the now-orphan _rss_badge + .rss/ .meta CSS. --- codeclone/observability/render_html.py | 27 +++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index a5214ba6..46d98f57 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -128,9 +128,10 @@ .bar{display:block;width:100%;height:7px} .dur{font-family:var(--mono);font-size:12.5px;text-align:right;white-space:nowrap; color:var(--dim)} -.rss{font-family:var(--mono);font-size:11.5px;color:var(--warn);white-space:nowrap; -font-weight:550} -.meta{display:flex;align-items:center;justify-content:flex-end;gap:8px;min-width:0} +.mem{font-family:var(--mono);font-size:11.5px;color:var(--warn);font-weight:550; +text-align:right;white-space:nowrap;overflow:hidden} +.extra{display:flex;align-items:center;justify-content:flex-end;gap:6px; +min-width:0;overflow:hidden} .pay{font-family:var(--mono);font-size:11px;color:var(--mute);white-space:nowrap} .chain{padding:6px 16px 12px} .group{padding:13px 0;border-top:1px solid var(--border)} @@ -140,8 +141,8 @@ .crumb .cname{font-family:var(--mono);font-size:12px;color:var(--text)} .crumb .arrow{color:var(--mute);font-size:13px} .oprow,.spanrow{display:grid; -grid-template-columns:minmax(0,1fr) 160px 64px minmax(92px,auto); -align-items:center;column-gap:14px;row-gap:2px;padding:5px 0} +grid-template-columns:minmax(0,1fr) 140px 56px 70px 120px; +align-items:center;column-gap:13px;row-gap:2px;padding:5px 0} .lead-cell{display:flex;align-items:center;gap:9px;min-width:0} .opname{font-family:var(--mono);font-size:13px;font-weight:550;overflow:hidden; text-overflow:ellipsis;white-space:nowrap} @@ -257,11 +258,6 @@ def _rss_text(value: float | None) -> str: return "" if value is None or value < 0.05 else f"Δ{_mb(value)}" -def _rss_badge(value: float | None) -> str: - text = _rss_text(value) - return f'{text}' if text else "" - - def _payload(op: OperationView) -> str: parts = [] if op.request_bytes is not None: @@ -439,25 +435,30 @@ def _breadcrumb(lineage: list[OperationView]) -> str: def _op_row(op: OperationView, group_max: float) -> str: + # Fixed metric columns: name | bar | dur | mem | extra. Splitting rss (mem) + # and payload (extra) into their own cells keeps every column right-anchored + # so bars and durations line up across nesting depths. return ( '
    ' f'{_surface_badge(op.surface)}{_esc(op.name)}' f"{_bar(op.duration_ms, group_max)}" f'{_ms(op.duration_ms)}' - f'{_rss_badge(op.rss_delta_mb)}{_payload(op)}
    ' + f'{_rss_text(op.rss_delta_mb)}' + f'{_payload(op)}' ) def _span_row(span: SpanView, op_duration: float) -> str: color = "var(--warn)" if span.reason_kind == "unknown" else "var(--accent)" - meta = f"{_reason_chip(span.reason_kind)}{_rss_badge(span.rss_delta_mb)}" return ( '
    ' f'' f'{_esc(span.name)}' f"{_bar(span.duration_ms, op_duration, color=color)}" f'{_ms(span.duration_ms)}' - f'{meta}{_counters(span.counters)}
    ' + f'{_rss_text(span.rss_delta_mb)}' + f'{_reason_chip(span.reason_kind)}' + f"{_counters(span.counters)}" ) From a9325be937a8a989143b30da4576f69abecc404a Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 13:50:39 +0500 Subject: [PATCH 250/318] feat(core): add query_platform_observability slicer core (29 RFC) A read-only, sectioned diagnostics slicer over build_trace_view/AggregatesView (RFC specs/rfc-29-observability-query-tool). Nine bounded aggregate sections (summary, slow_operations, memory_pipeline_cost, db_cost, agent_context, mcp_tool_matrix, correlated_chains, costly_noops, pipeline) behind a dev-only guard envelope, with top_diagnostics + recommended_next_sections routing, limit clamp [1,50]->10, full->normal downgrade (requested_detail_level echoed), disabled-vs-no_store split, ignored_parameters echo. Slicer, not export API: numeric metrics only, never raw SQL/payload, never the full trace. MCP registration lands in a follow-up. --- codeclone/observability/query.py | 423 ++++++++++++++++++++++++++++++ tests/test_observability_query.py | 245 +++++++++++++++++ 2 files changed, 668 insertions(+) create mode 100644 codeclone/observability/query.py create mode 100644 tests/test_observability_query.py diff --git a/codeclone/observability/query.py b/codeclone/observability/query.py new file mode 100644 index 00000000..23916eae --- /dev/null +++ b/codeclone/observability/query.py @@ -0,0 +1,423 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""``query_platform_observability`` — a sectioned, read-only diagnostics slicer +over the Phase 29 runtime telemetry (RFC specs/rfc-29-observability-query-tool). + +A **slicer, not a trace export API**: each call returns one bounded *section* +projected from the already-computed ``AggregatesView``; no response embeds the +full trace. Dev-only telemetry about the runtime of *CodeClone itself* — it is +NOT a repository-quality signal and MUST NOT affect reports, gates, baselines, +memory facts, or edit authorization. Numeric metrics only: no raw SQL, no raw +payload bodies, no prompts. +""" + +from __future__ import annotations + +from collections.abc import Callable +from pathlib import Path + +from ..config.observability import resolve_observability_config +from .store.reader import build_trace_view, open_observability_store_readonly +from .views import AggregatesView, OperationView, TraceView + +_DETAIL_LEVELS = ("compact", "normal", "full") +_LIMIT_MIN = 1 +_LIMIT_MAX = 50 +_LIMIT_DEFAULT = 10 +_COMPACT_ROWS = 5 +_CHAIN_CHILD_CAP = 12 +_MAX_DIAGNOSTICS = 3 + +# Heuristic thresholds — telemetry hints, NOT report findings. +_DB_CHATTY_QPC = 200 +_CONTEXT_HEAVY_PCT = 25 +_MEMORY_HEAVY_MB = 200.0 +_CONTEXT_PRESSURE_TOKENS = 8000 + +_AGGREGATE_SECTIONS = ( + "summary", + "slow_operations", + "memory_pipeline_cost", + "db_cost", + "agent_context", + "mcp_tool_matrix", + "correlated_chains", + "costly_noops", + "pipeline", +) + + +def _round1(value: float | None) -> float | None: + return round(value, 1) if value is not None else None + + +def _db_per_call(total_queries: int, span_count: int) -> int: + return round(total_queries / span_count) if span_count else 0 + + +def _envelope(section: str, detail_level: str, window: str) -> dict[str, object]: + return { + "surface": "platform_observability", + "audience": "codeclone_development", + "user_facing": False, + "affects_analysis_truth": False, + "affects_edit_permission": False, + "section": section, + "detail_level": detail_level, + "window": window, + } + + +def _resolve_detail(detail_level: str, warnings: list[str]) -> str: + if detail_level not in _DETAIL_LEVELS: + warnings.append(f"unknown detail_level {detail_level!r}; using compact") + return "compact" + if detail_level == "full": + # No aggregate section supports full; only operation_detail/span_detail + # (a future phase) do. Downgrade rather than error so an agent never + # stalls mid-diagnosis. + warnings.append( + "full detail is only available for operation_detail/span_detail; " + "downgraded to normal" + ) + return "normal" + return detail_level + + +def _clamp_limit(limit: int, warnings: list[str]) -> int: + if not isinstance(limit, int) or isinstance(limit, bool) or limit < _LIMIT_MIN: + warnings.append(f"limit {limit!r} invalid; using {_LIMIT_DEFAULT}") + return _LIMIT_DEFAULT + if limit > _LIMIT_MAX: + warnings.append(f"limit {limit} clamped to {_LIMIT_MAX}") + return _LIMIT_MAX + return limit + + +def _ignored_parameters(operation_id: str | None, span_id: str | None) -> list[str]: + # P1 has only aggregate sections; the by-id selectors are not consumed yet. + ignored = [] + if operation_id is not None: + ignored.append("operation_id") + if span_id is not None: + ignored.append("span_id") + return ignored + + +def _absent_status() -> str: + # Two distinct diagnoses: observability is configured off ("disabled") vs. + # it could collect but no store exists for this root yet ("no_store"). + return "no_store" if resolve_observability_config().enabled else "disabled" + + +def _build_trace(conn: object, window: str) -> TraceView: + if window == "latest": + return build_trace_view(conn) # type: ignore[arg-type] + return build_trace_view(conn, correlation_id=window) # type: ignore[arg-type] + + +def _slow_operations(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + return [ + { + "operation": op.name, + "surface": op.surface, + "duration_ms": round(op.duration_ms, 1), + "rss_delta_mb": _round1(op.rss_delta_mb), + } + for op in agg.slowest[:cap] + ] + + +def _memory_pipeline_cost(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + return [ + { + "span": s.name, + "operation": s.operation_name, + "duration_ms": round(s.duration_ms, 1), + "produced": s.produced, + "skipped": s.skipped, + "no_op": s.no_op, + } + for s in agg.semantic_costs[:cap] + ] + + +def _db_cost(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + rows: list[dict[str, object]] = [] + for r in agg.db_costs[:cap]: + per_call = _db_per_call(r.total_queries, r.span_count) + rows.append( + { + "span": r.span_name, + "calls": r.span_count, + "queries": r.total_queries, + "writes": r.total_writes, + "queries_per_call": per_call, + "verdict": "query_chatty" if per_call >= _DB_CHATTY_QPC else "ok", + } + ) + return rows + + +def _mcp_tool_matrix(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + return [ + { + "tool": t.name, + "calls": t.count, + "p50_ms": round(t.p50_duration_ms, 1), + "p95_ms": round(t.p95_duration_ms, 1), + "p95_request_bytes": t.p95_request_bytes, + "p95_response_bytes": t.p95_response_bytes, + "p95_response_tokens": t.p95_response_tokens, + } + for t in agg.mcp_tools[:cap] + ] + + +def _costly_noops(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + noops = [s for s in agg.semantic_costs if s.no_op] + return [ + { + "span": s.name, + "operation": s.operation_name, + "duration_ms": round(s.duration_ms, 1), + "rss_delta_mb": _round1(s.rss_delta_mb), + } + for s in noops[:cap] + ] + + +def _pipeline(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + return [ + { + "subsystem": g.name, + "operations": g.op_count, + "duration_ms": round(g.duration_ms, 1), + "cpu_ms": round(g.cpu_ms, 1), + } + for g in agg.pipeline[:cap] + ] + + +def _agent_context_body(agg: AggregatesView, cap: int) -> dict[str, object]: + agent = agg.agent + if agent is None: + return {"total_response_tokens": 0, "rows": []} + total = agent.response_tokens + rows = [ + { + "tool": c.name, + "calls": c.calls, + "response_tokens": c.response_tokens, + "context_percent": round(100 * c.response_tokens / total) if total else 0, + "verdict": ( + "context_heavy" + if total and 100 * c.response_tokens / total >= _CONTEXT_HEAVY_PCT + else "ok" + ), + } + for c in agent.consumers[:cap] + ] + return {"total_response_tokens": total, "rows": rows} + + +def _chain_descendant_names(op: OperationView) -> list[str]: + names: list[str] = [] + for child in op.children: + names.append(child.name) + names.extend(span.name for span in child.spans) + names.extend(_chain_descendant_names(child)) + return names + + +def _chain_peak_rss(op: OperationView) -> float | None: + values = [op.rss_delta_mb] if op.rss_delta_mb is not None else [] + values.extend(s.rss_delta_mb for s in op.spans if s.rss_delta_mb is not None) + for child in op.children: + child_peak = _chain_peak_rss(child) + if child_peak is not None: + values.append(child_peak) + return max(values) if values else None + + +def _correlated_chains(trace: TraceView, cap: int) -> list[dict[str, object]]: + return [ + { + "root": root.name, + "children": _chain_descendant_names(root)[:_CHAIN_CHILD_CAP], + "duration_ms": round(root.duration_ms, 1), + "peak_rss_delta_mb": _round1(_chain_peak_rss(root)), + } + for root in trace.operation_tree[:cap] + ] + + +def _memory_diagnostic(agg: AggregatesView) -> dict[str, object] | None: + span = agg.peak_memory_span + if ( + span is None + or span.rss_delta_mb is None + or span.rss_delta_mb < _MEMORY_HEAVY_MB + ): + return None + return { + "kind": "memory", + "message": ( + f"{span.name} used {round(span.rss_delta_mb)} MB " + f"(produced {span.produced})." + ), + } + + +def _db_diagnostic(agg: AggregatesView) -> dict[str, object] | None: + if not agg.db_costs: + return None + top = agg.db_costs[0] + per_call = _db_per_call(top.total_queries, top.span_count) + if per_call < _DB_CHATTY_QPC: + return None + return { + "kind": "db", + "message": f"{top.span_name} executed {per_call} queries per call.", + } + + +def _context_diagnostic(agg: AggregatesView) -> dict[str, object] | None: + agent = agg.agent + if agent is None or not agent.consumers or not agent.response_tokens: + return None + lead = agent.consumers[0] + pct = round(100 * lead.response_tokens / agent.response_tokens) + if pct < _CONTEXT_HEAVY_PCT: + return None + return { + "kind": "context", + "message": f"{lead.name} consumed {pct}% of returned tokens.", + } + + +def _top_diagnostics(agg: AggregatesView) -> list[dict[str, object]]: + candidates = ( + _memory_diagnostic(agg), + _db_diagnostic(agg), + _context_diagnostic(agg), + ) + return [d for d in candidates if d is not None][:_MAX_DIAGNOSTICS] + + +def _summary_body(trace: TraceView) -> dict[str, object]: + agg = trace.aggregates + return { + "operations": agg.operation_count, + "peak_rss_delta_mb": _round1(agg.max_rss_delta_mb), + "context_pressure_tokens": agg.agent.response_tokens if agg.agent else 0, + "costly_noops": sum(1 for s in agg.semantic_costs if s.no_op), + "top_diagnostics": _top_diagnostics(agg), + } + + +def _recommended_next_sections( + section: str, agg: AggregatesView +) -> list[dict[str, object]]: + if section != "summary": + return [] + recs: list[dict[str, object]] = [] + if agg.db_costs: + top = agg.db_costs[0] + if _db_per_call(top.total_queries, top.span_count) >= _DB_CHATTY_QPC: + recs.append( + { + "section": "db_cost", + "reason": f"high query count in {top.span_name}.", + } + ) + if agg.agent and agg.agent.response_tokens >= _CONTEXT_PRESSURE_TOKENS: + recs.append( + {"section": "agent_context", "reason": "high context-token pressure."} + ) + if any(s.no_op for s in agg.semantic_costs): + recs.append( + {"section": "costly_noops", "reason": "a span ran but produced nothing."} + ) + return recs + + +_ROW_SECTIONS: dict[str, Callable[[AggregatesView, int], list[dict[str, object]]]] = { + "slow_operations": _slow_operations, + "memory_pipeline_cost": _memory_pipeline_cost, + "db_cost": _db_cost, + "mcp_tool_matrix": _mcp_tool_matrix, + "costly_noops": _costly_noops, + "pipeline": _pipeline, +} + + +def query_platform_observability( + *, + root: str | Path, + section: str, + detail_level: str = "compact", + limit: int = _LIMIT_DEFAULT, + window: str = "latest", + operation_id: str | None = None, + span_id: str | None = None, +) -> dict[str, object]: + """Return one bounded telemetry section. Read-only; never raises on missing + data — an absent store yields an inert ``disabled``/``no_store`` envelope. + """ + warnings: list[str] = [] + detail = _resolve_detail(detail_level, warnings) + clamped = _clamp_limit(limit, warnings) + row_cap = clamped if detail == "normal" else min(clamped, _COMPACT_ROWS) + + response = _envelope(section, detail, window) + if detail != detail_level: + response["requested_detail_level"] = detail_level + ignored = _ignored_parameters(operation_id, span_id) + if ignored: + response["ignored_parameters"] = ignored + + if section not in _AGGREGATE_SECTIONS: + response["status"] = "invalid_section" + response["error"] = f"unknown section {section!r}" + response["available_sections"] = list(_AGGREGATE_SECTIONS) + response["rows"] = [] + return _finalize(response, warnings) + + conn = open_observability_store_readonly(Path(root)) + if conn is None: + response["status"] = _absent_status() + response["rows"] = [] + return _finalize(response, warnings) + try: + trace = _build_trace(conn, window) + finally: + conn.close() + + agg = trace.aggregates + if section == "summary": + response.update(_summary_body(trace)) + elif section == "agent_context": + response.update(_agent_context_body(agg, row_cap)) + elif section == "correlated_chains": + response["rows"] = _correlated_chains(trace, row_cap) + else: + response["rows"] = _ROW_SECTIONS[section](agg, row_cap) + + recommended = _recommended_next_sections(section, agg) + if recommended: + response["recommended_next_sections"] = recommended + return _finalize(response, warnings) + + +def _finalize(response: dict[str, object], warnings: list[str]) -> dict[str, object]: + if warnings: + response["warnings"] = warnings + return response + + +__all__ = ["query_platform_observability"] diff --git a/tests/test_observability_query.py b/tests/test_observability_query.py new file mode 100644 index 00000000..20d85c65 --- /dev/null +++ b/tests/test_observability_query.py @@ -0,0 +1,245 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path +from typing import cast + +import pytest + +from codeclone.config.observability import ObservabilityConfig +from codeclone.observability import query as query_mod +from codeclone.observability.models import OperationRecord, ProfileSample, SpanRecord +from codeclone.observability.query import query_platform_observability +from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, +) +from codeclone.observability.store.writer import write_operation + + +def _rows(value: object) -> list[dict[str, object]]: + return cast("list[dict[str, object]]", value) + + +def _texts(value: object) -> list[str]: + return cast("list[str]", value) + + +def _seed(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="F", + correlation_id="A", + surface="mcp", + name="mcp.finish_controlled_change", + started_at_utc="2026-06-12T00:00:00Z", + duration_ms=975.0, + status="ok", + response_bytes=8800, + response_tokens=2200, + ), + ) + write_operation( + conn, + OperationRecord( + operation_id="S", + correlation_id="A", + surface="memory", + name="memory.projection.spawn", + parent_operation_id="F", + started_at_utc="2026-06-12T00:00:00Z", + duration_ms=3.0, + status="ok", + ), + ) + write_operation( + conn, + OperationRecord( + operation_id="J", + correlation_id="A", + surface="memory", + name="memory.projection.job", + parent_operation_id="S", + started_at_utc="2026-06-12T00:00:01Z", + duration_ms=2170.0, + status="ok", + spans=( + SpanRecord( + span_id="r", + operation_id="J", + name="memory.semantic.reindex", + started_at_utc="2026-06-12T00:00:01Z", + duration_ms=2120.0, + status="ok", + counters={"db_queries": 1370, "embedded": 0}, + profile=ProfileSample(rss_delta_mb=440.0), + ), + SpanRecord( + span_id="d", + operation_id="J", + name="memory.experience.distill", + started_at_utc="2026-06-12T00:00:03Z", + duration_ms=33.0, + status="ok", + counters={ + "db_queries": 1892, + "db_writes": 773, + "experiences_distilled": 47, + }, + ), + ), + ), + ) + write_operation( + conn, + OperationRecord( + operation_id="G", + correlation_id="B", + surface="mcp", + name="mcp.get_relevant_memory", + started_at_utc="2026-06-12T00:00:05Z", + duration_ms=277.0, + status="ok", + response_bytes=18900, + response_tokens=8900, + request_tokens=356, + ), + ) + finally: + conn.close() + + +def test_summary_returns_envelope_diagnostics_and_routing(tmp_path: Path) -> None: + _seed(tmp_path) + out = query_platform_observability(root=tmp_path, section="summary") + assert out["surface"] == "platform_observability" + assert out["user_facing"] is False + assert out["operations"] == 4 + assert out["costly_noops"] == 1 + kinds = {d["kind"] for d in _rows(out["top_diagnostics"])} + assert {"memory", "db", "context"} <= kinds + routed = {r["section"] for r in _rows(out["recommended_next_sections"])} + assert {"db_cost", "agent_context", "costly_noops"} <= routed + + +def test_summary_does_not_embed_raw_trace(tmp_path: Path) -> None: + _seed(tmp_path) + out = query_platform_observability(root=tmp_path, section="summary") + for forbidden in ( + "operation_tree", + "spans", + "rows", + "trace", + "correlated_operations", + ): + assert forbidden not in out + + +def test_full_downgrades_to_normal_with_warning(tmp_path: Path) -> None: + _seed(tmp_path) + out = query_platform_observability( + root=tmp_path, section="db_cost", detail_level="full" + ) + assert out["detail_level"] == "normal" + assert out["requested_detail_level"] == "full" + assert any("downgraded to normal" in w for w in _texts(out["warnings"])) + + +def test_detail_selectors_ignored_and_echoed(tmp_path: Path) -> None: + _seed(tmp_path) + out = query_platform_observability( + root=tmp_path, section="db_cost", operation_id="F", span_id="r" + ) + assert out["ignored_parameters"] == ["operation_id", "span_id"] + + +def test_absent_store_is_inert_not_error(tmp_path: Path) -> None: + out = query_platform_observability(root=tmp_path, section="summary") + assert out["status"] in {"disabled", "no_store"} + assert out["rows"] == [] + assert out["user_facing"] is False + + +def test_disabled_vs_no_store_split( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr( + query_mod, "resolve_observability_config", lambda: ObservabilityConfig(True) + ) + assert ( + query_platform_observability(root=tmp_path, section="db_cost")["status"] + == "no_store" + ) + monkeypatch.setattr( + query_mod, "resolve_observability_config", lambda: ObservabilityConfig(False) + ) + assert ( + query_platform_observability(root=tmp_path, section="db_cost")["status"] + == "disabled" + ) + + +def test_limit_is_clamped_and_floored(tmp_path: Path) -> None: + _seed(tmp_path) + big = query_platform_observability( + root=tmp_path, section="db_cost", detail_level="normal", limit=10000 + ) + assert any("clamped to 50" in w for w in _texts(big["warnings"])) + bad = query_platform_observability(root=tmp_path, section="db_cost", limit=0) + assert any("invalid" in w for w in _texts(bad["warnings"])) + + +def test_aggregate_section_is_projection_without_raw_trace(tmp_path: Path) -> None: + _seed(tmp_path) + out = query_platform_observability( + root=tmp_path, section="db_cost", detail_level="normal" + ) + forbidden = {"operation_tree", "spans", "trace", "fingerprint", "sql", "payload"} + assert not (forbidden & set(out)) + for row in _rows(out["rows"]): + assert not (forbidden & set(row)) + assert isinstance(row["queries"], int) + distill = next( + r for r in _rows(out["rows"]) if r["span"] == "memory.experience.distill" + ) + assert distill["queries_per_call"] == 1892 + assert distill["verdict"] == "query_chatty" + + +def test_unknown_section_returns_validation_envelope(tmp_path: Path) -> None: + _seed(tmp_path) + out = query_platform_observability(root=tmp_path, section="bogus") + assert out["status"] == "invalid_section" + assert out["section"] == "bogus" + assert "summary" in _texts(out["available_sections"]) + assert out["rows"] == [] + + +def test_correlated_chains_flattens_root_and_children(tmp_path: Path) -> None: + _seed(tmp_path) + out = query_platform_observability(root=tmp_path, section="correlated_chains") + chain = next( + r for r in _rows(out["rows"]) if r["root"] == "mcp.finish_controlled_change" + ) + assert "memory.projection.job" in _texts(chain["children"]) + assert "memory.semantic.reindex" in _texts(chain["children"]) + assert chain["peak_rss_delta_mb"] == 440.0 + + +def test_agent_context_ranks_token_consumers(tmp_path: Path) -> None: + _seed(tmp_path) + out = query_platform_observability( + root=tmp_path, section="agent_context", detail_level="normal" + ) + assert out["total_response_tokens"] == 11100 + top = _rows(out["rows"])[0] + assert top["tool"] == "mcp.get_relevant_memory" + assert top["verdict"] == "context_heavy" From 802e6218e47ee83d864a34117445c083c9f2f682 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 14:04:45 +0500 Subject: [PATCH 251/318] feat(mcp): register query_platform_observability tool (29 RFC) Expose the dev-only observability slicer over MCP: title + positioning/anti- inference description (messages), Annotated param types, a read-only MCPSession handler delegating to observability.query, a service delegate, and the server @tool registration. Regenerate the additive tool-schema and public-API-surface contract snapshots, and extend the expected tool-name / readOnlyHint sets. Read-only, idempotent, structured output; numeric metrics only. --- .../surfaces/mcp/_session_state_mixin.py | 28 ++++++++ codeclone/surfaces/mcp/messages/params.py | 36 ++++++++++ codeclone/surfaces/mcp/messages/tools.py | 18 +++++ codeclone/surfaces/mcp/server.py | 31 +++++++++ codeclone/surfaces/mcp/service.py | 5 ++ .../contract_snapshots/mcp_tool_schemas.json | 67 +++++++++++++++++++ .../public_api_surface.json | 4 ++ tests/test_mcp_server.py | 5 ++ tests/test_mcp_service.py | 11 +++ 9 files changed, 205 insertions(+) diff --git a/codeclone/surfaces/mcp/_session_state_mixin.py b/codeclone/surfaces/mcp/_session_state_mixin.py index 213a911e..cf94d9d8 100644 --- a/codeclone/surfaces/mcp/_session_state_mixin.py +++ b/codeclone/surfaces/mcp/_session_state_mixin.py @@ -1086,6 +1086,34 @@ def get_help( payload["warnings"] = list(spec.warnings) return payload + def query_platform_observability( + self, + *, + root: str, + section: str, + detail_level: str = "compact", + limit: int = 10, + window: str = "latest", + operation_id: str | None = None, + span_id: str | None = None, + ) -> dict[str, object]: + # Dev-only telemetry slicer; read-only, never touches analysis/memory/ + # audit state. Local import keeps the MCP session import-light and avoids + # shadowing this method name. + from ...observability.query import ( + query_platform_observability as _query_observability, + ) + + return _query_observability( + root=root, + section=section, + detail_level=detail_level, + limit=limit, + window=window, + operation_id=operation_id, + span_id=span_id, + ) + def generate_pr_summary( self, *, diff --git a/codeclone/surfaces/mcp/messages/params.py b/codeclone/surfaces/mcp/messages/params.py index c21e886d..34a55b0f 100644 --- a/codeclone/surfaces/mcp/messages/params.py +++ b/codeclone/surfaces/mcp/messages/params.py @@ -546,3 +546,39 @@ ), ), ] + +ObservabilitySectionParam = Annotated[ + str, + Field( + description=( + "Telemetry section to project: summary | slow_operations | " + "memory_pipeline_cost | db_cost | agent_context | mcp_tool_matrix | " + "correlated_chains | costly_noops | pipeline." + ), + ), +] +ObservabilityDetailParam = Annotated[ + str, + Field( + description=( + "compact (bounded top rows) or normal (rows up to limit); full " + "downgrades to normal for aggregate sections." + ), + ), +] +ObservabilityLimitParam = Annotated[ + int, + Field(description="Row cap per section; clamped to [1, 50], else 10."), +] +ObservabilityWindowParam = Annotated[ + str, + Field(description="'latest' for the recent window, or a correlation_id."), +] +ObservabilityOperationIdParam = Annotated[ + str | None, + Field(description="Reserved for detail sections; echoed in ignored_parameters."), +] +ObservabilitySpanIdParam = Annotated[ + str | None, + Field(description="Reserved for detail sections; echoed in ignored_parameters."), +] diff --git a/codeclone/surfaces/mcp/messages/tools.py b/codeclone/surfaces/mcp/messages/tools.py index 91e607a4..e292941a 100644 --- a/codeclone/surfaces/mcp/messages/tools.py +++ b/codeclone/surfaces/mcp/messages/tools.py @@ -111,6 +111,23 @@ "verification_profiles." ) +QUERY_PLATFORM_OBSERVABILITY: Final = ( + "Read-only sectioned diagnostics over CodeClone's own runtime telemetry " + "(Phase 29). Observability is for CodeClone self-development and " + "diagnostics. It is NOT part of user-facing CodeClone analysis. It MUST " + "NOT affect reports, gates, baselines, memory facts, or edit " + "authorization. A slicer, not a trace export API: each call returns one " + "bounded section, never the full trace, numeric metrics only (no raw SQL " + "or payloads). Anti-inference guard: this describes the runtime of " + "CodeClone itself, not the user repository — high DB queries != repository " + "bad; high MCP payload != code quality low; hot semantic reindex != unsafe " + "change. Sections: summary, slow_operations, memory_pipeline_cost, " + "db_cost, agent_context, mcp_tool_matrix, correlated_chains, costly_noops, " + "pipeline. detail_level compact|normal (full downgrades to normal for " + "aggregate sections). Intended for CodeClone maintainers and development " + "agents; do not use it to make user-facing quality claims about a repo." +) + EVALUATE_GATES: Final = ( "Evaluate CodeClone gate conditions against an existing MCP run without " "modifying baselines or exiting the process." @@ -275,6 +292,7 @@ TITLE_CREATE_REVIEW_RECEIPT: Final = "Create Review Receipt" TITLE_VALIDATE_REVIEW_CLAIMS: Final = "Validate Review Claims" TITLE_HELP: Final = "Help" +TITLE_QUERY_PLATFORM_OBSERVABILITY: Final = "Query Platform Observability" TITLE_EVALUATE_GATES: Final = "Evaluate Gates" TITLE_GET_REPORT_SECTION: Final = "Get Report Section" TITLE_LIST_FINDINGS: Final = "List Findings" diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index d98934e0..77b43093 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -118,6 +118,12 @@ MinComplexityParam, MinSeverityParam, NoveltyParam, + ObservabilityDetailParam, + ObservabilityLimitParam, + ObservabilityOperationIdParam, + ObservabilitySectionParam, + ObservabilitySpanIdParam, + ObservabilityWindowParam, OffsetParam, OnConflictParam, OptionalIntentIdParam, @@ -803,6 +809,31 @@ def get_report_section( limit=limit, ) + @tool( + title=mcp_tools.TITLE_QUERY_PLATFORM_OBSERVABILITY, + description=mcp_tools.QUERY_PLATFORM_OBSERVABILITY, + annotations=read_only_tool, + structured_output=True, + ) + def query_platform_observability( + root: RootParam, + section: ObservabilitySectionParam, + detail_level: ObservabilityDetailParam = "compact", + limit: ObservabilityLimitParam = 10, + window: ObservabilityWindowParam = "latest", + operation_id: ObservabilityOperationIdParam = None, + span_id: ObservabilitySpanIdParam = None, + ) -> dict[str, object]: + return service.query_platform_observability( + root=root, + section=section, + detail_level=detail_level, + limit=limit, + window=window, + operation_id=operation_id, + span_id=span_id, + ) + @tool( title=mcp_tools.TITLE_LIST_FINDINGS, description=mcp_tools.LIST_FINDINGS, diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index 2a224d54..5421bd92 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -53,6 +53,11 @@ def get_production_triage( def get_help(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("get_help", **params) + def query_platform_observability( + self: _RunDictService, **params: object + ) -> dict[str, object]: + return self._run_dict("query_platform_observability", **params) + def get_blast_radius( self: _RunDictService, **params: object, diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index 7d7815be..36c0e8d1 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -2660,6 +2660,73 @@ "type": "object" } }, + { + "name": "query_platform_observability", + "input_schema": { + "properties": { + "root": { + "description": "Absolute repository root path.", + "title": "Root", + "type": "string" + }, + "section": { + "description": "Telemetry section to project: summary | slow_operations | memory_pipeline_cost | db_cost | agent_context | mcp_tool_matrix | correlated_chains | costly_noops | pipeline.", + "title": "Section", + "type": "string" + }, + "detail_level": { + "default": "compact", + "description": "compact (bounded top rows) or normal (rows up to limit); full downgrades to normal for aggregate sections.", + "title": "Detail Level", + "type": "string" + }, + "limit": { + "default": 10, + "description": "Row cap per section; clamped to [1, 50], else 10.", + "title": "Limit", + "type": "integer" + }, + "window": { + "default": "latest", + "description": "'latest' for the recent window, or a correlation_id.", + "title": "Window", + "type": "string" + }, + "operation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Reserved for detail sections; echoed in ignored_parameters.", + "title": "Operation Id" + }, + "span_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Reserved for detail sections; echoed in ignored_parameters.", + "title": "Span Id" + } + }, + "required": [ + "root", + "section" + ], + "title": "query_platform_observabilityArguments", + "type": "object" + } + }, { "name": "start_controlled_change", "input_schema": { diff --git a/tests/fixtures/contract_snapshots/public_api_surface.json b/tests/fixtures/contract_snapshots/public_api_surface.json index 583039bd..24ee900b 100644 --- a/tests/fixtures/contract_snapshots/public_api_surface.json +++ b/tests/fixtures/contract_snapshots/public_api_surface.json @@ -139,6 +139,10 @@ "name": "query_engineering_memory", "signature": "(self, *, root: 'str', mode: 'str', record_id: 'str | None' = None, path: 'str | None' = None, symbol: 'str | None' = None, query: 'str | None' = None, scope: 'Sequence[str] | None' = None, filters: 'Mapping[str, object] | None' = None, max_results: 'int' = 20, include_stale: 'bool' = False, include_drafts: 'bool' = False) -> 'dict[str, object]'" }, + { + "name": "query_platform_observability", + "signature": "(self: '_RunDictService', **params: 'object') -> 'dict[str, object]'" + }, { "name": "read_resource", "signature": "(self, uri: 'str') -> 'str'" diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 7890450c..4452e130 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -142,6 +142,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "analyze_changed_paths", "clear_session_runs", "help", + "query_platform_observability", "get_run_summary", "get_production_triage", "get_blast_radius", @@ -192,6 +193,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "validate_review_claims", "evaluate_gates", "help", + "query_platform_observability", "get_report_section", "list_findings", "get_finding", @@ -242,6 +244,9 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: assert "change_control, trust_boundaries, engineering_memory" in str( tools["help"].description ) + obs_description = str(tools["query_platform_observability"].description) + assert "slicer, not a trace export API" in obs_description + assert "high DB queries != repository" in obs_description assert init_options.server_version == CODECLONE_VERSION assert "Prefer list_hotspots or focused check_* tools" in str( tools["list_findings"].description diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 9edb4bee..7e8e5b3a 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -9650,3 +9650,14 @@ def test_mcp_validate_review_claims_warns_on_health_regression_overclaim( assert any( item.get("pattern") == "health_regression_overclaim" for item in violations ) + + +def test_query_platform_observability_wires_dev_envelope(tmp_path: Path) -> None: + service = CodeCloneMCPService(history_limit=4) + out = service.query_platform_observability(root=str(tmp_path), section="summary") + assert out["surface"] == "platform_observability" + assert out["user_facing"] is False + assert out["affects_edit_permission"] is False + # No store under a fresh root -> inert envelope, never an error. + assert out["status"] in {"disabled", "no_store"} + assert out["rows"] == [] From 3f88987f66f2ecadfc1a5e4b54227e2b343c04c4 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 14:16:33 +0500 Subject: [PATCH 252/318] docs(mcp): add observability help topic for the new tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit help(topic="observability") now surfaces query_platform_observability — its dev-only / anti-inference framing, the nine sections, detail_level and limit bounds, and disabled/no_store behavior. Registered in _VALID_HELP_TOPICS and listed in the help tool description. --- codeclone/surfaces/mcp/_session_shared.py | 1 + .../surfaces/mcp/messages/help_topics.py | 44 +++++++++++++++++++ codeclone/surfaces/mcp/messages/tools.py | 2 +- tests/test_mcp_service.py | 9 ++++ 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/codeclone/surfaces/mcp/_session_shared.py b/codeclone/surfaces/mcp/_session_shared.py index 7f04bd88..4dd6e1e7 100644 --- a/codeclone/surfaces/mcp/_session_shared.py +++ b/codeclone/surfaces/mcp/_session_shared.py @@ -246,6 +246,7 @@ "changed_scope", "change_control", "trust_boundaries", + "observability", "engineering_memory", "verification_profiles", } diff --git a/codeclone/surfaces/mcp/messages/help_topics.py b/codeclone/surfaces/mcp/messages/help_topics.py index 78d0c469..e73509f1 100644 --- a/codeclone/surfaces/mcp/messages/help_topics.py +++ b/codeclone/surfaces/mcp/messages/help_topics.py @@ -525,6 +525,50 @@ class MCPHelpTopicSpec: "as privileged.", ), ), + "observability": MCPHelpTopicSpec( + summary=( + "query_platform_observability: read-only, dev-only diagnostics over " + "CodeClone's OWN runtime telemetry (Phase 29). A sectioned slicer, " + "not a trace export API — for building CodeClone itself, never a " + "user-facing repository signal." + ), + key_points=( + "Dev-only: never affects reports, gates, baselines, memory facts, " + "or edit authorization; numeric metrics only, no raw SQL/payloads.", + ( + "Sections: summary | slow_operations | memory_pipeline_cost | " + "db_cost | agent_context | mcp_tool_matrix | correlated_chains " + "| costly_noops | pipeline. Start at summary, then follow " + "recommended_next_sections." + ), + ( + "detail_level compact|normal; full is reserved for future " + "by-id detail sections and downgrades to normal here. limit " + "clamps to [1, 50]." + ), + ( + "Anti-inference: this is CodeClone's runtime, not the user " + "repo. High DB queries != repository bad; high MCP payload != " + "code quality low; hot semantic reindex != unsafe change." + ), + ( + "Absent/disabled telemetry returns an inert " + "status=disabled|no_store envelope, never an error; the " + "branded HTML cockpit stays the humans' everything-view." + ), + ), + recommended_tools=("query_platform_observability",), + doc_links=(MCP_INTERFACE_DOC_LINK,), + warnings=( + "Sections return status=disabled unless " + "CODECLONE_OBSERVABILITY_ENABLED was set for the producing process.", + ), + anti_patterns=( + "Using runtime telemetry to make user-facing quality claims about " + "a repository.", + "Reading db_queries or payload sizes as a code-quality verdict.", + ), + ), "engineering_memory": MCPHelpTopicSpec( summary=( "Ranked scope context before edits, FTS search, optional semantic " diff --git a/codeclone/surfaces/mcp/messages/tools.py b/codeclone/surfaces/mcp/messages/tools.py index e292941a..025ebff5 100644 --- a/codeclone/surfaces/mcp/messages/tools.py +++ b/codeclone/surfaces/mcp/messages/tools.py @@ -108,7 +108,7 @@ "anti_patterns; normal adds warnings. Topics: workflow, analysis_profile, " "suppressions, baseline, coverage, latest_runs, review_state, " "changed_scope, change_control, trust_boundaries, engineering_memory, " - "verification_profiles." + "verification_profiles, observability." ) QUERY_PLATFORM_OBSERVABILITY: Final = ( diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 7e8e5b3a..b782524a 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -1310,6 +1310,15 @@ def test_help_topic_specs_match_valid_help_topics() -> None: assert set(HELP_TOPIC_SPECS) == set(_VALID_HELP_TOPICS) +def test_help_observability_topic_surfaces_query_tool() -> None: + service = CodeCloneMCPService(history_limit=4) + payload = service.get_help(topic="observability", detail="normal") + assert payload["topic"] == "observability" + assert "query_platform_observability" in str(payload["recommended_tools"]) + text = str(payload["key_points"]).lower() + assert "anti-inference" in text and "dev-only" in text + + def test_mcp_service_help_validates_topic_and_detail() -> None: service = CodeCloneMCPService(history_limit=4) From 056d05f30a19ef611310320a5190cc722506ee70 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 18:20:21 +0500 Subject: [PATCH 253/318] refactor(memory): dedup get_relevant_memory payload (Track A) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One signal, one place: drop fields that duplicated semantic state already carried by status or by a nested block — record `stale` (==status), `draft_unverified` (==status=="draft"), the flat top-level coverage keys that mirrored `record_coverage`, and the root `patch_trail_summary` copy of trajectories[0]. The draft guardrail now lives once in `retrieval_policy` (status_is_authoritative, draft_records_are_unverified). Evidence-bearing metadata (subject_count, *_truncated, evidence_count, relations, stale_reason) is kept. Co-Authored-By: Claude Opus 4.8 --- .../memory/retrieval/context_coverage.py | 1 - codeclone/memory/retrieval/service.py | 34 +++++-------------- tests/test_memory_retrieval.py | 2 +- tests/test_memory_trajectory_retrieval.py | 8 +++-- 4 files changed, 15 insertions(+), 30 deletions(-) diff --git a/codeclone/memory/retrieval/context_coverage.py b/codeclone/memory/retrieval/context_coverage.py index 6730418a..fa8e6a7d 100644 --- a/codeclone/memory/retrieval/context_coverage.py +++ b/codeclone/memory/retrieval/context_coverage.py @@ -149,7 +149,6 @@ def build_context_coverage( ) record_payload = dict(record_coverage) return { - **record_payload, "record_coverage": record_payload, "trajectory_coverage": trajectory_coverage, "experience_coverage": experience_coverage, diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index d217d747..2f082b75 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -210,6 +210,10 @@ def _retrieval_policy(*, include_drafts: bool) -> dict[str, object]: "memory_does_not_override_findings": True, "trajectories_do_not_authorize_edits": True, "experiences_do_not_authorize_edits": True, + # Per-record guardrail lives here once, not duplicated on every record: + # status is the single source of truth (status=="draft" => unverified). + "status_is_authoritative": True, + "draft_records_are_unverified": True, } @@ -450,7 +454,6 @@ def _serialize_record_summary( "statement_length": statement_length, "subjects": [_serialize_subject(item) for item in serialized_subjects], "evidence_count": evidence_count, - "stale": record.status == "stale", } if detail_level == "full": payload["payload"] = record.payload @@ -461,8 +464,6 @@ def _serialize_record_summary( payload["statement_truncated"] = True if record.stale_reason: payload["stale_reason"] = record.stale_reason - if record.status == "draft": - payload["draft_unverified"] = True payload.update(_retrieval_lane_payload(record)) if relevance_score is not None: payload["relevance_score"] = relevance_score @@ -706,9 +707,11 @@ def get_relevant_memory( ) else: coverage = { - "scope_paths_with_memory": 0, - "scope_paths_total": 0, - "coverage_percent": None, + "record_coverage": { + "scope_paths_with_memory": 0, + "scope_paths_total": 0, + "coverage_percent": None, + }, "coverage_note": "symbol_scoped_retrieval", } payload: dict[str, object] = { @@ -726,28 +729,9 @@ def get_relevant_memory( "detail_level": normalized_detail, "retrieval_policy": _retrieval_policy(include_drafts=effective_include_drafts), } - payload.update( - _root_patch_trail_payload( - detail_level=normalized_detail, - trajectories=trajectories_payload, - ) - ) return payload -def _root_patch_trail_payload( - *, - detail_level: MemoryDetailLevel, - trajectories: Sequence[Mapping[str, object]], -) -> dict[str, object]: - if detail_level != "full" or not trajectories: - return {} - first_summary = trajectories[0].get("patch_trail_summary") - if not isinstance(first_summary, dict): - return {} - return {"patch_trail_summary": first_summary} - - def _load_patch_trails_for_trajectories( store: SqliteEngineeringMemoryStore, *, diff --git a/tests/test_memory_retrieval.py b/tests/test_memory_retrieval.py index f26736df..a6a35407 100644 --- a/tests/test_memory_retrieval.py +++ b/tests/test_memory_retrieval.py @@ -388,7 +388,7 @@ def test_get_relevant_memory_ranks_scope_records(tmp_path: Path) -> None: assert records[0]["statement"] == "sqlite store module" coverage = result["coverage"] assert isinstance(coverage, dict) - assert coverage["coverage_kind"] == "record_subject_coverage" + assert coverage["record_coverage"]["coverage_kind"] == "record_subject_coverage" assert coverage["observation_confidence"] == { "level": "partial", "basis": ["records"], diff --git a/tests/test_memory_trajectory_retrieval.py b/tests/test_memory_trajectory_retrieval.py index 91840f45..cf74640d 100644 --- a/tests/test_memory_trajectory_retrieval.py +++ b/tests/test_memory_trajectory_retrieval.py @@ -324,11 +324,13 @@ def test_get_relevant_memory_returns_patch_trail_summary(tmp_path: Path) -> None trajectories = compact["trajectories"] assert isinstance(trajectories, list) assert trajectories - assert trajectories[0].get("patch_trail_summary") is not None - assert "patch_trail_summary" not in compact - summary = full.get("patch_trail_summary") + summary = trajectories[0].get("patch_trail_summary") assert isinstance(summary, dict) assert summary.get("counts", {}).get("untouched_in_declared") == 1 + # patch_trail_summary lives on each trajectory; it is no longer duplicated at + # the payload root in any detail level. + assert "patch_trail_summary" not in compact + assert "patch_trail_summary" not in full def test_query_engineering_memory_trajectory_modes(tmp_path: Path) -> None: From e5138f7484628edb0254128ca7c9739e2fe22ef6 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 18:24:21 +0500 Subject: [PATCH 254/318] docs(memory): document lane semantics + fix stale patch_trail refs (Track D) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Engineering Memory retrieval returns independent signal lanes — records = asserted knowledge, trajectories = episodic evidence, experiences = advisory patterns, coverage = visibility metadata. Document the invariant in CLAUDE.md and the engineering_memory help topic: scores are lane-local (never compare relevance_score across lanes), for_path and plain (non-semantic) search are unranked. Correct the now-stale 'root Patch Trail' references — Track A removed the duplicated root patch_trail_summary; it rides each trajectory. Co-Authored-By: Claude Opus 4.8 --- CLAUDE.md | 13 +++++++++---- codeclone/surfaces/mcp/messages/help_topics.py | 8 +++++--- tests/test_mcp_service.py | 3 ++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 945f305a..be0e8b72 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -123,10 +123,15 @@ After `start_controlled_change` returns `edit_allowed: true`: `record_candidate` statements to one durable fact (target ≤300 chars; `validate_claims` warns above 500; hard limit 1000). List responses default to compact previews. Treat `records[]`, `experiences[]`, and `trajectories[]` as -separate evidence lanes; `subject_count` / `subjects_truncated` means more -subjects exist, not that evidence disappeared. Use `mode=get` or -`detail_level=full` for complete subjects, agent facets, trajectory contracts, -steps, evidence ids, payloads, and root Patch Trail drill-down. +separate evidence lanes: records = asserted knowledge, trajectories = episodic +workflow evidence, experiences = advisory patterns, `coverage` = visibility +metadata. **Scores are lane-local — never compare `relevance_score` across +lanes; `for_path` and plain (non-semantic) search are unranked.** +`subject_count` / `subjects_truncated` means more subjects exist, not that +evidence disappeared. Use `mode=get` or `detail_level=full` for complete +subjects, agent facets, trajectory contracts, steps, evidence ids, and +payloads; `patch_trail_summary` rides each trajectory (never duplicated at the +payload root). ### Before `finish`: incident / complexity memory (MANDATORY) diff --git a/codeclone/surfaces/mcp/messages/help_topics.py b/codeclone/surfaces/mcp/messages/help_topics.py index e73509f1..49aeea78 100644 --- a/codeclone/surfaces/mcp/messages/help_topics.py +++ b/codeclone/surfaces/mcp/messages/help_topics.py @@ -592,14 +592,16 @@ class MCPHelpTopicSpec: ( "Scoped response lanes: records[]=durable assertions, " "experiences[]=advisory patterns, trajectories[]=bounded examples, " - "coverage=availability/trust context." + "coverage=availability/trust context. Scores are lane-local: " + "never compare relevance_score across lanes; for_path and plain " + "(non-semantic) search are unranked." ), ( "compact (default): record/trajectory subjects are bounded with " "subject_count+subjects_truncated; experiences expose multi_agent " "+ dominant_agent_facet; no quality_contract, steps, evidence ids, " - "payload, or duplicated root patch_trail_summary. Use full/get " - "for drill-down." + "or payload. patch_trail_summary rides each trajectory, never " + "duplicated at the payload root. Use full/get for drill-down." ), ( "Semantic (off by default): enable sidecar, rebuild_semantic_index, " diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index b782524a..7d70d5be 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -1363,7 +1363,8 @@ def test_mcp_service_help_validates_topic_and_detail() -> None: memory_points = str(memory_help["key_points"]) assert "subject_count+subjects_truncated" in memory_points assert "dominant_agent_facet" in memory_points - assert "duplicated root patch_trail_summary" in memory_points + assert "never duplicated at the payload root" in memory_points + assert "Scores are lane-local" in memory_points def _memory_sync_service_with_run( From 20a0777708e99244cb1da514651238ec285fe3ca Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 18:31:33 +0500 Subject: [PATCH 255/318] refactor(memory): slim compact trajectory + coverage payloads (Track B) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compact diet (decision-dense, not dedup): compact trajectory previews drop the contract-component numbers complexity_score / scope_accuracy / duration_seconds (full/detail only), keeping the headline quality_score + anomaly_count + tier + outcome + patch_trail_summary + summary; labels are sorted and capped at 8. coverage.agent_diversity is now detail-only (build_context_coverage takes detail_level) — it is analytics, not a pre-edit signal. Co-Authored-By: Claude Opus 4.8 --- .../memory/retrieval/context_coverage.py | 18 +++++++++------- codeclone/memory/retrieval/service.py | 1 + codeclone/memory/trajectory/retrieval.py | 21 +++++++++++++++---- tests/test_memory_trajectory_retrieval.py | 8 ++----- 4 files changed, 31 insertions(+), 17 deletions(-) diff --git a/codeclone/memory/retrieval/context_coverage.py b/codeclone/memory/retrieval/context_coverage.py index fa8e6a7d..692691ca 100644 --- a/codeclone/memory/retrieval/context_coverage.py +++ b/codeclone/memory/retrieval/context_coverage.py @@ -138,6 +138,7 @@ def build_context_coverage( scope_families: frozenset[str], trajectories: Sequence[Trajectory], experiences: Sequence[Experience], + detail_level: str = "compact", ) -> dict[str, object]: trajectory_coverage, trajectory_agents = _trajectory_coverage( scope_paths=scope_paths, @@ -148,22 +149,25 @@ def build_context_coverage( experiences=experiences, ) record_payload = dict(record_coverage) - return { + coverage: dict[str, object] = { "record_coverage": record_payload, "trajectory_coverage": trajectory_coverage, "experience_coverage": experience_coverage, - "agent_diversity": { - "trajectory_agent_labels": sorted(trajectory_agents), - "trajectory_agent_label_count": len(trajectory_agents), - "experience_agent_families": sorted(experience_agents), - "experience_agent_family_count": len(experience_agents), - }, "observation_confidence": _observation_confidence( record_coverage=record_payload, trajectory_coverage=trajectory_coverage, experience_coverage=experience_coverage, ), } + # agent_diversity is analytics, not pre-edit signal — full/detail only. + if detail_level != "compact": + coverage["agent_diversity"] = { + "trajectory_agent_labels": sorted(trajectory_agents), + "trajectory_agent_label_count": len(trajectory_agents), + "experience_agent_families": sorted(experience_agents), + "experience_agent_family_count": len(experience_agents), + } + return coverage __all__ = ["build_context_coverage"] diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index 2f082b75..d0d4132e 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -704,6 +704,7 @@ def get_relevant_memory( include_routine=include_routine, ), experiences=matching_experiences, + detail_level=normalized_detail, ) else: coverage = { diff --git a/codeclone/memory/trajectory/retrieval.py b/codeclone/memory/trajectory/retrieval.py index 4ab585ca..f0533603 100644 --- a/codeclone/memory/trajectory/retrieval.py +++ b/codeclone/memory/trajectory/retrieval.py @@ -24,10 +24,21 @@ DEFAULT_TRAJECTORY_PREVIEW_LIMIT = 5 DEFAULT_TRAJECTORY_STEP_LIMIT = 12 COMPACT_TRAJECTORY_SUBJECT_LIMIT = 8 +COMPACT_TRAJECTORY_LABEL_LIMIT = 8 TRAJECTORY_PREVIEW_CHARS = 220 TrajectoryDetailLevel = Literal["compact", "full"] +def _preview_labels( + labels: Sequence[str], *, detail_level: TrajectoryDetailLevel +) -> list[str]: + # Full keeps the raw label list; compact sorts (deterministic) and caps so a + # trajectory that accumulated many labels cannot bloat the preview. + if detail_level == "full": + return list(labels) + return sorted(labels)[:COMPACT_TRAJECTORY_LABEL_LIMIT] + + def trajectory_excluded_from_default_retrieval( trajectory: Trajectory, *, @@ -121,7 +132,7 @@ def serialize_trajectory_preview( "quality_tier": trajectory.quality_tier, "quality_score": trajectory.quality_score, "summary": _preview_text(trajectory.summary), - "labels": list(trajectory.labels), + "labels": _preview_labels(trajectory.labels, detail_level=detail_level), "agent_label": trajectory_agent_label(trajectory), "subjects": serialized_subjects, "evidence_count": len(trajectory.evidence), @@ -193,13 +204,15 @@ def _add_quality_fields( trajectory=trajectory, patch_trail_payload=patch_trail_payload, ) + # Contract-component numbers are interpretable only alongside the full + # breakdown; compact keeps quality_score + anomaly_count as the headline. + payload["complexity_score"] = contract.complexity_score + payload["scope_accuracy"] = contract.scope_accuracy + payload["duration_seconds"] = contract.duration_seconds else: payload["subject_count"] = subject_count payload["matched_subject_count"] = matched_subject_count payload["subjects_truncated"] = serialized_subject_count < subject_count - payload["complexity_score"] = contract.complexity_score - payload["scope_accuracy"] = contract.scope_accuracy - payload["duration_seconds"] = contract.duration_seconds payload["anomaly_count"] = contract.anomaly_count diff --git a/tests/test_memory_trajectory_retrieval.py b/tests/test_memory_trajectory_retrieval.py index cf74640d..0ad2f890 100644 --- a/tests/test_memory_trajectory_retrieval.py +++ b/tests/test_memory_trajectory_retrieval.py @@ -76,12 +76,8 @@ def test_get_relevant_memory_returns_scoped_trajectories(tmp_path: Path) -> None "scope_paths_total": 1, "coverage_percent": 100, } - assert coverage["agent_diversity"] == { - "trajectory_agent_labels": ["test-agent"], - "trajectory_agent_label_count": 1, - "experience_agent_families": [], - "experience_agent_family_count": 0, - } + # agent_diversity is detail-only now; compact coverage omits it. + assert "agent_diversity" not in coverage assert coverage["observation_confidence"]["level"] == "supported" From 9f9d6fa51458c03f1e3364db0724fa5b3b2ba99e Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 18:50:39 +0500 Subject: [PATCH 256/318] fix(memory): bootstrap observability before opening worker store (29.DB) --- codeclone/memory/jobs/workflow.py | 37 ++++++++++++++---------- tests/test_observability_worker_chain.py | 28 ++++++++++++++++-- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/codeclone/memory/jobs/workflow.py b/codeclone/memory/jobs/workflow.py index 0d2d9304..ffe7fc10 100644 --- a/codeclone/memory/jobs/workflow.py +++ b/codeclone/memory/jobs/workflow.py @@ -204,28 +204,33 @@ def execute_run_projection_jobs_once( root_path: Path, config: MemoryConfig | None = None, ) -> dict[str, object]: - resolved_root, resolved_config, project, store = _require_memory_store_session( - root_path, - config=config, - ) - # Worker-process bootstrap: freeze the env-resolved observability decision - # once at the worker entry (spec §4.1). owns_observability guards against a - # caller that already bootstrapped (e.g. an MCP session) being shut down here. + # Bootstrap observability BEFORE opening the store: open_memory_db attaches + # the per-span DB-query counter only while observability is enabled, so a + # store opened pre-bootstrap stays uninstrumented and the worker's whole + # query stream is invisible to the cockpit. owns_observability guards against + # a caller that already bootstrapped (e.g. an MCP session). + resolved_root = root_path.resolve() owns_observability = not is_observability_enabled() if owns_observability: bootstrap(resolve_observability_config(), root=resolved_root) try: - worker_result = run_projection_jobs_once( - store, - root_path=resolved_root, - config=resolved_config, - project=project, - running_timeout_seconds=( - resolved_config.projection_rebuild_running_timeout_seconds - ), + resolved_root, resolved_config, project, store = _require_memory_store_session( + resolved_root, + config=config, ) + try: + worker_result = run_projection_jobs_once( + store, + root_path=resolved_root, + config=resolved_config, + project=project, + running_timeout_seconds=( + resolved_config.projection_rebuild_running_timeout_seconds + ), + ) + finally: + store.close() finally: - store.close() if owns_observability: shutdown() return { diff --git a/tests/test_observability_worker_chain.py b/tests/test_observability_worker_chain.py index 7a45df14..3d4766ec 100644 --- a/tests/test_observability_worker_chain.py +++ b/tests/test_observability_worker_chain.py @@ -13,11 +13,12 @@ import orjson import pytest -from codeclone.config.memory import resolve_memory_config +from codeclone.config.memory import MemoryConfig, resolve_memory_config from codeclone.config.observability import ObservabilityConfig from codeclone.memory.jobs import worker as worker_module +from codeclone.memory.jobs import workflow as workflow_module from codeclone.memory.jobs.worker import run_projection_job -from codeclone.observability import bootstrap, shutdown +from codeclone.observability import bootstrap, is_observability_enabled, shutdown from codeclone.observability.store.schema import ( observability_store_path, open_observability_store, @@ -110,3 +111,26 @@ def test_run_projection_job_emits_operation_and_spans(tmp_path: Path) -> None: assert orjson.loads(by_name["memory.experience.distill"][2]) == { "experiences_distilled": 3 } + + +def test_worker_bootstraps_observability_before_opening_store( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr( + workflow_module, + "resolve_observability_config", + lambda: ObservabilityConfig(enabled=True), + ) + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): + enabled_at_open: list[bool] = [] + real_session = workflow_module._require_memory_store_session + + def _spy(root_path: Path, config: MemoryConfig | None = None) -> object: + # open_memory_db only instruments while observability is enabled. + enabled_at_open.append(is_observability_enabled()) + return real_session(root_path, config=config) + + monkeypatch.setattr(workflow_module, "_require_memory_store_session", _spy) + workflow_module.execute_run_projection_jobs_once(root_path=root) + + assert enabled_at_open == [True] From 512d24fdcc08f1c5a46cbabf715c4f4a026d2a3e Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 19:12:48 +0500 Subject: [PATCH 257/318] fix(memory): apply query filters to semantic candidates (#1) --- codeclone/memory/retrieval/service.py | 34 ++++++++++++++++++++- tests/test_semantic_search_service.py | 44 +++++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index d0d4132e..2058835b 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -1343,12 +1343,32 @@ def _hydrate_trajectory_hits( return trajectories +def _record_passes_filters( + record: MemoryRecord, + *, + types: tuple[MemoryRecordType, ...], + statuses: tuple[MemoryStatus, ...], + confidences: tuple[MemoryConfidence, ...], +) -> bool: + # Mirror the types/statuses/confidences predicate the FTS branch applies in + # SQL (store.search_records), so semantic candidates cannot bypass the + # public query filter contract. An empty category tuple means "no filter". + return ( + (not types or record.type in types) + and (not statuses or record.status in statuses) + and (not confidences or record.confidence in confidences) + ) + + def _semantic_search_candidates( store: SqliteEngineeringMemoryStore, *, project_id: str, fts_records: Sequence[MemoryRecord], proximity: Mapping[str, float], + filter_types: tuple[MemoryRecordType, ...], + statuses: tuple[MemoryStatus, ...], + filter_confidences: tuple[MemoryConfidence, ...], ) -> list[MemoryRecord]: seen = {record.id for record in fts_records} candidates = list(fts_records) @@ -1356,7 +1376,16 @@ def _semantic_search_candidates( if record_id in seen: continue record = store.find_record(record_id) - if record is not None and record.project_id == project_id: + if ( + record is not None + and record.project_id == project_id + and _record_passes_filters( + record, + types=filter_types, + statuses=statuses, + confidences=filter_confidences, + ) + ): candidates.append(record) seen.add(record_id) return candidates @@ -1410,6 +1439,9 @@ def _handle_semantic_search_mode( project_id=project_id, fts_records=fts_records, proximity=proximity, + filter_types=filter_types, + statuses=statuses, + filter_confidences=filter_confidences, ) audit_events = _hydrate_audit_events(audit_db_path, audit_hits) trajectories = _hydrate_trajectory_hits( diff --git a/tests/test_semantic_search_service.py b/tests/test_semantic_search_service.py index c38cc0ee..78e425c9 100644 --- a/tests/test_semantic_search_service.py +++ b/tests/test_semantic_search_service.py @@ -5,13 +5,18 @@ # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations -from collections.abc import Sequence +from collections.abc import Mapping, Sequence from pathlib import Path from codeclone.memory.retrieval import query_engineering_memory from codeclone.memory.semantic.models import SemanticHit, SemanticIndexStatus from codeclone.memory.sqlite_store import SqliteEngineeringMemoryStore -from tests.memory_fixtures import insert_audit_event, memory_store, seed_module_role +from tests.memory_fixtures import ( + insert_audit_event, + memory_store, + seed_document_link, + seed_module_role, +) class _FakeProvider: @@ -55,6 +60,7 @@ def _search( query: str, index: _FakeIndex, audit: Path | None = None, + filters: Mapping[str, object] | None = None, ) -> dict[str, object]: return query_engineering_memory( store, @@ -69,6 +75,7 @@ def _search( embedding_provider=_FakeProvider(), provider_label="diagnostic", audit_db_path=audit, + filters=filters, ) @@ -117,6 +124,39 @@ def test_hybrid_merges_semantic_only_record(tmp_path: Path) -> None: assert semantic_only.id in ids +def test_semantic_only_record_respects_type_filter(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, db_path): + kept = seed_document_link( + store, + project_id=project.id, + doc_file="codeclone/a.py", + ref_path="codeclone/a.py", + statement="alpha beta gamma", + ) + filtered = seed_module_role( + store, + project_id=project.id, + file_path="codeclone/b.py", + statement="delta epsilon zeta", + ) + index = _FakeIndex( + [SemanticHit(source_id=filtered.id, source="memory", score=0.9)] + ) + result = _search( + store, + root=root, + project_id=project.id, + db_path=db_path, + query="alpha", + index=index, + filters={"types": ["document_link"]}, + ) + ids = _record_ids(result) + # FTS hit kept; the semantic-only module_role no longer bypasses the filter. + assert kept.id in ids + assert filtered.id not in ids + + def test_unavailable_index_falls_back_to_fts(tmp_path: Path) -> None: with memory_store(tmp_path) as (root, project, store, db_path): fts = seed_module_role( From f27404a7f92e359da5d70c9c06a3055883f4af71 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 19:13:43 +0500 Subject: [PATCH 258/318] fix(integrations): sync MCP surfaces with current contract --- CHANGELOG.md | 34 +++++++++++---- docs/README-pypi.md | 2 +- docs/book/02-architecture-map.md | 5 ++- docs/book/10-config-and-defaults.md | 2 +- .../patch-trail.md | 5 ++- .../book/13-engineering-memory/cli-surface.md | 4 +- docs/book/13-engineering-memory/index.md | 6 ++- .../book/13-engineering-memory/mcp-surface.md | 8 +++- .../13-engineering-memory/projection-jobs.md | 8 ++-- .../trajectory-and-patch-trail.md | 43 ++++++++++--------- docs/book/24-compatibility-and-versioning.md | 15 +++++-- .../25-mcp-interface/determinism-and-tests.md | 2 +- docs/book/25-mcp-interface/index.md | 6 +-- .../tools/atomic-change-control.md | 4 +- .../tools/session-and-memory.md | 11 +++++ docs/book/appendix/b-schema-layouts.md | 17 ++++++-- .../integrations/claude-desktop-bundle.md | 2 +- docs/book/integrations/codex-plugin.md | 6 +-- docs/getting-started.md | 4 +- .../integrations/claude-desktop/setup.md | 3 +- .../integrations/cursor/install-and-skills.md | 3 +- docs/guide/mcp/architecture.md | 7 ++- extensions/claude-desktop-codeclone/README.md | 3 +- .../claude-desktop-codeclone/manifest.json | 14 +++--- plugins/codeclone/.codex-plugin/plugin.json | 2 +- plugins/codeclone/README.md | 5 ++- .../codeclone-engineering-memory/SKILL.md | 4 +- plugins/cursor-codeclone/CHANGELOG.md | 4 +- plugins/cursor-codeclone/README.md | 7 +-- .../codeclone-engineering-memory/SKILL.md | 4 +- 30 files changed, 156 insertions(+), 84 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb9d0e99..cf1b3d90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,8 @@ ## [2.1.0a1] - Unreleased `2.1.0a1` opens the v2.1 alpha line with the structural change controller, -Engineering Memory, semantic retrieval, and a fully reorganized documentation -site. +Engineering Memory with trajectory and experience layers, semantic retrieval, +Platform Observability, and a fully reorganized documentation site. ### Added @@ -13,7 +13,7 @@ site. to 3-4. Blast radius projection (`get_blast_radius`), patch contract verification with profile-aware depth (`check_patch_contract`), citation-based claim guard (`validate_review_claims`), and deterministic review receipts - (`create_review_receipt`). 31 MCP tools total. + (`create_review_receipt`). 32 default agent-visible MCP tools. - **Change intent lifecycle.** `manage_change_intent`: declare, check, clear, queue, promote, recover. Renewable ownership leases with own/recoverable/foreign-active classification. Optional SQLite backend with @@ -28,22 +28,31 @@ site. `codeclone memory approve`). Scope coverage metrics and `finish_controlled_change(propose_memory=true)` for draft candidates on accepted patches. -- **Trajectory memory** Deterministic audit-derived workflow +- **Trajectory memory.** Deterministic audit-derived workflow timelines in Engineering Memory SQLite (`memory trajectory rebuild`), scoped MCP/CLI retrieval (`trajectories[]`, `trajectory_*` query modes), optional semantic source, and disabled-by-default local JSONL export profiles. + The `trajectory-v3` projection adds trajectory passports, contract-quality + and complexity scoring, anomaly detection, agent profiles, and dashboard + views. +- **Experience Layer.** Deterministic `experience-v1` advisory patterns are + distilled from verified trajectories and surfaced through a separate + `experiences[]` retrieval lane with evidence and agent-diversity facets. + Projection jobs can distill experiences automatically, while + `promote_experience` converts a selected pattern into a human-governed + Engineering Memory draft rather than treating it as authority. - **Semantic retrieval.** Opt-in `[tool.codeclone.memory.semantic]` with LanceDB sidecar. Local `fastembed` provider (`BAAI/bge-small-en-v1.5`) via `codeclone[semantic-local]`; `api` provider reserved for Team+; `local_model` reserved for Enterprise. CLI `codeclone memory semantic status|rebuild|search`, MCP `query_engineering_memory(mode=search, semantic=true)`. -- **Patch Trail** Deterministic scope narrative at +- **Patch Trail.** Deterministic scope narrative at `finish_controlled_change`: declared/changed/untouched-in-declared, boundary-held paths, verification outcome, and audit anchors - (`patch_trail.computed`). Engineering Memory schema **1.4** adds - `memory_trajectory_patch_trails`; trajectory projection version - **`trajectory-v2`**. Rebuild persists Patch Trail from audit; scoped retrieval - exposes `patch_trail_summary`. MCP finish accepts optional `patch_trail_detail`. + (`patch_trail.computed`). Rebuild persists Patch Trail from audit into + Engineering Memory schema **1.6** and the current **`trajectory-v3`** + projection; scoped retrieval exposes `patch_trail_summary`. MCP finish accepts + optional `patch_trail_detail`. - **Trajectory export enrichment (schema 2).** JSONL export rows now populate `memory_precedents`, `trajectory_precedents`, `citations`, and `patch_trail_summary`; export deduplicates superseded projection versions; @@ -56,6 +65,13 @@ site. governance channel (`--ide-governance-channel`) with session HMAC attestation. Workspace session stats and controller audit trail webviews (IDE-only MCP tools; shared payloads in `codeclone/controller_insights/`). +- **Platform Observability.** Opt-in, development-only operation/span telemetry + correlates CLI, MCP, analysis, and projection-worker execution without + affecting canonical reports, gates, baselines, memory facts, or edit + authorization. The local SQLite trace captures RSS/CPU, MCP payload sizes, + database query shapes, pipeline costs, agent context pressure, and avoidable + work. CLI JSON/HTML trace views provide a diagnostic cockpit and waterfall; + MCP exposes the bounded `query_platform_observability` slicer. - **Cursor plugin** (`plugins/cursor-codeclone/`): six skills, three rules (including always-on `change-control-gate`), fail-closed `preToolUse` hook via `codeclone.workspace_intent`, project hook installer with diff --git a/docs/README-pypi.md b/docs/README-pypi.md index ac3ea79b..322b8811 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -46,7 +46,7 @@ codeclone . --ci # CI mode - **Quality metrics** — complexity, coupling, cohesion, dead code, health score - **Baseline governance** — separates legacy debt from new regressions; CI fails only on what changed - **Change controller** — intent declaration, blast radius, patch contract, review receipt for AI agents -- **MCP server** — 31-tool read-only interface for IDE and agent clients +- **MCP server** — 32-tool default interface for IDE and agent clients - **Reports** — HTML, JSON, Markdown, SARIF, text from one canonical payload ## MCP Server diff --git a/docs/book/02-architecture-map.md b/docs/book/02-architecture-map.md index b4f53397..8626926a 100644 --- a/docs/book/02-architecture-map.md +++ b/docs/book/02-architecture-map.md @@ -41,8 +41,9 @@ Main ownership layers: | Canonical report | `codeclone/report/document/*`, `codeclone/report/gates/*`, `codeclone/report/*.py` | Canonical report payload, derived projections, explainability, suggestions, gate reasons | | Deterministic renderers | `codeclone/report/renderers/*` | Text/Markdown/SARIF/JSON projections over the canonical report | | HTML render layer | `codeclone/report/html/*` | Render-only HTML view over canonical report/meta facts | -| MCP surface | `codeclone/surfaces/mcp/*`, `codeclone/surfaces/mcp/messages/*` | Read-only MCP tools/resources, change-control projections (intent, blast radius, patch budget/verify, claim validation), engineering memory retrieval/governance, and centralized agent-facing copy | -| Engineering Memory | `codeclone/memory/*`, `codeclone/config/memory*.py` | Local SQLite store, init ingest, scoped retrieval, optional LanceDB semantic sidecar (`memory/semantic/*`, `memory/embedding/*`), trajectory projection + Patch Trail (`memory/trajectory/*`), coalesced rebuild jobs (`memory/jobs/*`), staleness, governance, CLI/MCP surfaces over deterministic report/git/doc/audit facts | +| MCP surface | `codeclone/surfaces/mcp/*`, `codeclone/surfaces/mcp/messages/*` | Read-only MCP tools/resources, change-control projections, Engineering Memory retrieval/governance, dev-only Platform Observability slices, and centralized agent-facing copy | +| Engineering Memory | `codeclone/memory/*`, `codeclone/config/memory*.py` | Local SQLite store, scoped retrieval, semantic sidecar, trajectory + Patch Trail projection, Experience distillation, coalesced rebuild jobs, staleness, governance, and CLI/MCP surfaces over deterministic report/git/doc/audit facts | +| Platform Observability | `codeclone/observability/*` | Opt-in operation/span telemetry, local SQLite store, bounded MCP slicer, and CLI JSON/HTML diagnostics; never analysis truth or a gate input | | Controller insights | `codeclone/controller_insights/*` | Shared session-stats and audit-trail payloads for CLI `--session-stats` / `--audit` and IDE-only MCP `get_workspace_session_stats` / `get_controller_audit_trail` | | Audit trail | `codeclone/audit/*` | Optional controller event and MCP payload footprint recording under `.codeclone/db/` when enabled | | Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*`, `plugins/cursor-codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | diff --git a/docs/book/10-config-and-defaults.md b/docs/book/10-config-and-defaults.md index bc377e80..4863d540 100644 --- a/docs/book/10-config-and-defaults.md +++ b/docs/book/10-config-and-defaults.md @@ -344,7 +344,7 @@ listed field only. Paths resolve under the repository root like pyproject paths. | Variable | Values | Overrides | Effect | |--------------------------------------------------|-------------------------------------------------|----------------------------------------|--------------------------------------------------------------------------------| | `CODECLONE_MEMORY_DB_PATH` | repo-relative or absolute path under root | `memory.db_path` | SQLite Engineering Memory store location | -| `CODECLONE_PROJECTION_REBUILD_POLICY` | `off`, `enqueue_when_stale` | `memory.projection_rebuild_policy` | When accepted MCP finish may enqueue async trajectory/semantic projection jobs | +| `CODECLONE_PROJECTION_REBUILD_POLICY` | `off`, `enqueue_when_stale` | `memory.projection_rebuild_policy` | When accepted MCP finish may enqueue async trajectory/Experience/semantic projection jobs | | `CODECLONE_MEMORY_SEMANTIC_ENABLED` | `true` / `false` | `memory.semantic.enabled` | Turn semantic index sidecar on or off | | `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_PROVIDER` | `diagnostic`, `fastembed`, `local_model`, `api` | `memory.semantic.embedding_provider` | Embedding backend for semantic rebuild/search | | `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_MODEL` | model name string | `memory.semantic.embedding_model` | Provider model id (for example FastEmbed model name) | diff --git a/docs/book/12-structural-change-controller/patch-trail.md b/docs/book/12-structural-change-controller/patch-trail.md index 07a90311..86716da5 100644 --- a/docs/book/12-structural-change-controller/patch-trail.md +++ b/docs/book/12-structural-change-controller/patch-trail.md @@ -42,8 +42,9 @@ counts, verification status) for trajectory projection. Requires `audit_enabled= **Persistence:** manual or job-driven trajectory rebuild projects Patch Trail into `memory_trajectory_patch_trails` and bumps trajectory projection to -`trajectory-v2` (digest includes `patch_trail_digest`). Scoped retrieval surfaces -`patch_trail_summary` / full `patch_trail` — see +`trajectory-v2` or later (digest includes `patch_trail_digest`). The active +`trajectory-v3` projection also carries deterministic quality scoring and agent +subjects. Scoped retrieval surfaces `patch_trail_summary` / full `patch_trail` — see [Engineering Memory — Trajectory memory](../13-engineering-memory/trajectory-and-patch-trail.md). Refs: `codeclone/memory/trajectory/patch_trail.py`, `codeclone/audit/events.py`, diff --git a/docs/book/13-engineering-memory/cli-surface.md b/docs/book/13-engineering-memory/cli-surface.md index 5c7b1ef0..19e07dde 100644 --- a/docs/book/13-engineering-memory/cli-surface.md +++ b/docs/book/13-engineering-memory/cli-surface.md @@ -18,8 +18,8 @@ All commands live under `codeclone memory` and accept `--root` (default `.`). | `approve RECORD_ID [--verified-by NAME]` | Promote draft → active | | `reject RECORD_ID [--reason TEXT]` | Reject draft | | `archive RECORD_ID [--reason TEXT]` | Archive record | -| `trajectory status\|rebuild\|list\|search\|show\|export` | Trajectory projection read/rebuild/export | -| `jobs status\|enqueue\|run-once\|list` | Projection rebuild job queue (semantic + traj.) | +| `trajectory status\|rebuild\|list\|search\|show\|agents\|anomalies\|dashboard\|export` | Trajectory projection, passport analytics, and export | +| `jobs status\|enqueue\|run-once\|list` | Trajectory + Experience + semantic projection queue | Human governance (`approve`, `reject`, `archive`) is available through the **CodeClone VS Code Memory** view (IDE governance channel) and the diff --git a/docs/book/13-engineering-memory/index.md b/docs/book/13-engineering-memory/index.md index a0892b04..0cb9c3e7 100644 --- a/docs/book/13-engineering-memory/index.md +++ b/docs/book/13-engineering-memory/index.md @@ -36,9 +36,12 @@ controlled edits. | 24 | Scoped trajectory retrieval + memory evidence | MCP `get_relevant_memory.trajectories[]`; `query_engineering_memory(mode=trajectory_*)` | | 25 | Disabled-by-default local JSONL export profiles | CLI `memory trajectory export --profile ... --out ...` | | 26 | Patch Trail persistence + scoped retrieval | `memory_trajectory_patch_trails`; `patch_trail_summary` on scoped retrieval | +| 28 | Incremental projection jobs | Watermarked trajectory rebuild, semantic hash-skip, coalesced worker | +| RFC | Trajectory quality and passport analytics | Quality/complexity contract, anomalies, agents, dashboard | +| RFC | Experience Layer | Distillation job, scoped `experiences[]`, `promote_experience` draft bridge | Schema version constant: `ENGINEERING_MEMORY_SCHEMA_VERSION` in -`codeclone/contracts/__init__.py` (currently **`1.4`**). +`codeclone/contracts/__init__.py` (currently **`1.6`**). Semantic index format (separate contract): `SEMANTIC_INDEX_FORMAT_VERSION` (currently **`1`**) in the same module. The vector sidecar is independent of @@ -97,6 +100,7 @@ Module ownership: | `codeclone/memory/staleness.py` | Refresh-time and scope-time staleness | | `codeclone/memory/jobs/store.py` | Coalesced projection rebuild jobs (schema 1.3+) | | `codeclone/memory/trajectory/*` | Audit → trajectory projection, Patch Trail, export | +| `codeclone/memory/experience/*` | Deterministic Experience distillation + persistence | | `codeclone/config/memory*.py` | `[tool.codeclone.memory]` resolution | | `codeclone/surfaces/cli/memory*.py` | Human CLI + Rich rendering | | `codeclone/surfaces/mcp/_session_memory_mixin.py` | MCP memory tools + finish hook | diff --git a/docs/book/13-engineering-memory/mcp-surface.md b/docs/book/13-engineering-memory/mcp-surface.md index 40b66169..58081c78 100644 --- a/docs/book/13-engineering-memory/mcp-surface.md +++ b/docs/book/13-engineering-memory/mcp-surface.md @@ -47,6 +47,9 @@ Mode router for inspection and search. | `trajectory_status` | — | Trajectory projection run metadata | | `trajectory_search` | `query`; optional `intent_id` | Search stored trajectories | | `trajectory_get` | `record_id` (trajectory id) | One trajectory + steps (compact default) | +| `trajectory_anomalies` | optional `filters.include_routine` | Detected trajectory contract anomalies | +| `trajectory_agents` | optional `filters.include_routine` | Aggregate quality/outcomes by agent family | +| `trajectory_dashboard` | optional `filters.include_routine` | Combined status, agent, and anomaly view | List modes (`search`, `stale`, `drafts`, scoped `get_relevant_memory`) default to **compact** payloads: statement preview, `statement_length`, no `payload`. @@ -86,10 +89,11 @@ CLI equivalent: `codeclone memory search QUERY --match any|all`. | `refresh_from_run` | optional `run_id` (defaults to latest MCP run) | Force ingest from MCP run report | | `rebuild_semantic_index` | (none) | Rebuild LanceDB sidecar when `memory.semantic.enabled` | | `rebuild_trajectories` | (none) | Rebuild trajectory projections from audit event core | -| `enqueue_projection_rebuild` | optional `force` | Queue semantic + trajectory rebuild job | +| `enqueue_projection_rebuild` | (none) | Queue trajectory + Experience + semantic projection job | | `projection_rebuild_status` | (none) | Latest projection job status | | `run_projection_jobs_once` | (none) | Run one queued projection job inline | | `record_candidate` | `record_type`, `statement`, **`subject_path`** | Creates **draft** record | +| `promote_experience` | `experience_id` | Convert advisory Experience into human-reviewable draft | | `validate_claims` | `text` | Memory-layer claim guard (warnings/errors) | | `propose_from_receipt` | optional `text`, `intent_id` | Draft proposals from finish-like payload (atomic fallback) | @@ -113,7 +117,7 @@ On **accepted** or **accepted_with_external_changes** finish: - returns `memory_candidates`, `memory_staleness`, `memory_coverage_delta` - when `memory.projection_rebuild_policy` is not `off` and the environment is not CI, may enqueue a projection rebuild job (`projection_rebuild` in the - finish payload — semantic + trajectory sidecars) + finish payload — trajectory, Experience, and semantic projections) This is the preferred post-edit memory update path when using the workflow tools. diff --git a/docs/book/13-engineering-memory/projection-jobs.md b/docs/book/13-engineering-memory/projection-jobs.md index 60d51ffd..f856e685 100644 --- a/docs/book/13-engineering-memory/projection-jobs.md +++ b/docs/book/13-engineering-memory/projection-jobs.md @@ -1,7 +1,9 @@ -### Projection rebuild jobs (schema 1.3) +### Projection rebuild jobs (schema 1.3+) -Trajectory + semantic projections can be rebuilt asynchronously via a -coalesced job row in Engineering Memory SQLite (`memory_projection_jobs`). +Trajectory, Experience, and semantic projections can be rebuilt asynchronously +via a coalesced job row in Engineering Memory SQLite +(`memory_projection_jobs`). The worker rebuilds trajectories first, distills +Experiences from the resulting corpus, then refreshes the semantic sidecar. Default policy is **`off`**; opt in with: ```toml diff --git a/docs/book/13-engineering-memory/trajectory-and-patch-trail.md b/docs/book/13-engineering-memory/trajectory-and-patch-trail.md index 8cb222e2..5e40a8ab 100644 --- a/docs/book/13-engineering-memory/trajectory-and-patch-trail.md +++ b/docs/book/13-engineering-memory/trajectory-and-patch-trail.md @@ -1,4 +1,4 @@ -## Trajectory memory (Phases 22–26) {#trajectory-memory-phases-2226} +## Trajectory memory {#trajectory-memory} Trajectory memory is a **deterministic process narrative** derived from the audit event core. It complements governed memory cards: cards hold durable repository @@ -40,8 +40,9 @@ flowchart TB end subgraph Project["Trajectory rebuild"] - PRJ[projector trajectory-v2] + PRJ[projector trajectory-v3] PTP[patch_trail_projector] + QLT[quality + anomaly analytics] SUP[supersede stale rows] end @@ -54,17 +55,20 @@ flowchart TB subgraph Read["Read surfaces"] GR[get_relevant_memory.trajectories] QEM[query_engineering_memory trajectory_*] + DASH[dashboard + agents + anomalies] EXP[JSONL export v2] end AUDE --> EC EC --> PRJ EC --> PTP + PRJ --> QLT PRJ --> TRJ PTP --> PTR PRJ --> SUP TRJ --> GR TRJ --> QEM + QLT --> DASH PTR --> GR PTR --> QEM TRJ --> EXP @@ -79,7 +83,9 @@ Module ownership: | `codeclone/audit/events.py` | Bounded `event_core_json`; `patch_trail.computed` compaction | | `codeclone/memory/trajectory/patch_trail.py` | Finish-time Patch Trail compute (`PATCH_TRAIL_SCHEMA_VERSION`) | | `codeclone/memory/trajectory/patch_trail_projector.py` | Rebuild Patch Trail from audit event cores | -| `codeclone/memory/trajectory/projector.py` | Deterministic trajectory projection (`trajectory-v2`) | +| `codeclone/memory/trajectory/projector.py` | Deterministic trajectory projection (`trajectory-v3`) | +| `codeclone/memory/trajectory/quality.py` | Contract-quality and separate complexity scoring | +| `codeclone/memory/trajectory/analytics.py` | Dashboard, anomaly, and per-agent aggregates | | `codeclone/memory/trajectory/store.py` | SQLite persistence, supersede, rebuild orchestration | | `codeclone/memory/trajectory/retrieval.py` | Scoped ranking + `patch_trail_summary` | | `codeclone/memory/trajectory/export_context.py` | Export v2 context: precedents, citations, scope paths | @@ -130,23 +136,13 @@ includes: | `citations` | Claim-validation event cores + report digests | | `scope.paths` | Resolved from Patch Trail / declare / check event cores | | `patch_trail_summary` | When persisted in `memory_trajectory_patch_trails` | -| `projection_version` | `trajectory-v1` or `trajectory-v2` (v2 includes `patch_trail_digest`) | +| `projection_version` | `trajectory-v1`, `trajectory-v2`, or active `trajectory-v3`; v2 adds Patch Trail digest and v3 adds quality score + agent subject | Rebuild supersedes older projection rows for the same workflow (one canonical trajectory per `workflow_id` in export). Legacy audit rows without path facts in frozen event core are supplemented deterministically from stored audit payloads during projection. Changing profile shape requires a profile version bump. -```mermaid -flowchart LR - CAN[Canonical trajectories] --> CTX[export_context.build_export_context] - CAN --> REC[build_export_record] - CTX --> REC - MEM[(memory_records FTS)] --> CTX - PTR[(patch_trails)] --> CTX - REC --> JSONL[JSONL file cap-enforced] -``` - ### MCP retrieval `get_relevant_memory` adds **`trajectories[]`** beside **`records[]`** when path @@ -158,13 +154,17 @@ surfaces **`patch_trail_summary`** at the response root. Compact retrieval omits that root duplicate; the summary remains on the trajectory preview. `query_engineering_memory(mode=trajectory_get)` returns **`patch_trail`** on the -trajectory payload when persisted for that workflow. +trajectory payload when persisted for that workflow. Full detail also includes +the explainable **`quality_contract`**; compact payloads retain headline +`quality_score`, `complexity_score`, and anomaly count. Trajectory rebuild (`memory trajectory rebuild` / MCP `manage_engineering_memory(action=rebuild_trajectories)`) synthesizes Patch Trail from audit event cores (`intent.declared`, `intent.checked`, verify events) and stores it in **`memory_trajectory_patch_trails`**. Trajectory digest -(`trajectory-v2`) incorporates **`patch_trail_digest`** when present. +(`trajectory-v2` and later) incorporates **`patch_trail_digest`** when present. +The active **`trajectory-v3`** digest additionally incorporates the persisted +quality score and records the primary agent as an `agent` subject. Scoped ranking adds a small boost when query scope paths intersect **`untouched_in_declared`** paths from the stored Patch Trail. @@ -176,6 +176,9 @@ Scoped ranking adds a small boost when query scope paths intersect | `trajectory_status` | project | Projection run manifest | | `trajectory_search` | query text | Requires `query`; excludes `run:*` routine by default | | `trajectory_get` | trajectory id | `record_id` = trajectory id | +| `trajectory_anomalies` | project | Contract anomalies, optionally including routine runs | +| `trajectory_agents` | project | Outcome and quality aggregates by agent family | +| `trajectory_dashboard` | project | Combined status, agent, and anomaly payload | Filter: `filters.include_routine=true` on `trajectory_search` includes single-event `run:*` analysis workflows. @@ -192,8 +195,6 @@ Community CodeClone writes **local JSONL only** — no remote API, upload, or training pipeline. Corporate policy packs, signing, approval workflows, and dataset registry are out of scope unless explicitly requested. -Refs: - -- `codeclone/memory/trajectory/rebuild_workflow.py:execute_trajectory_rebuild` -- `codeclone/memory/trajectory/export.py:export_trajectories_jsonl` -- `tests/test_memory_trajectory_*.py`, `tests/test_audit_event_core_v2.py` +Refs: `codeclone/memory/trajectory/rebuild_workflow.py`, +`codeclone/memory/trajectory/export.py`, `tests/test_memory_trajectory_*.py`, +`tests/test_audit_event_core_v2.py`. diff --git a/docs/book/24-compatibility-and-versioning.md b/docs/book/24-compatibility-and-versioning.md index 998bee42..9ad214ea 100644 --- a/docs/book/24-compatibility-and-versioning.md +++ b/docs/book/24-compatibility-and-versioning.md @@ -33,10 +33,14 @@ Current contract versions: - `CACHE_VERSION = "2.8"` - `REPORT_SCHEMA_VERSION = "2.11"` - `METRICS_BASELINE_SCHEMA_VERSION = "1.2"` -- `ENGINEERING_MEMORY_SCHEMA_VERSION = "1.4"` +- `ENGINEERING_MEMORY_SCHEMA_VERSION = "1.6"` - `PATCH_TRAIL_SCHEMA_VERSION = "1"` (finish-time Patch Trail JSON; audit + SQLite sidecar) - `TRAJECTORY_EXPORT_SCHEMA_VERSION = "2"` (JSONL export rows; `codeclone/memory/trajectory/profiles.py`) +- `TRAJECTORY_PROJECTION_VERSION = "trajectory-v3"` (derived trajectory rows) +- `TRAJECTORY_QUALITY_SCORE_VERSION = "2"` (quality contract formula) +- `EXPERIENCE_DISTILLATION_VERSION = "experience-v1"` (derived Experience rows) - `SEMANTIC_INDEX_FORMAT_VERSION = "1"` (LanceDB sidecar; separate from SQLite memory schema) +- `PLATFORM_OBSERVABILITY_SCHEMA_VERSION = "1.0"` (dev-only telemetry SQLite) Refs: @@ -52,14 +56,19 @@ Version bump rules: - bump **report schema** for canonical report document shape/meaning changes - bump **metrics-baseline schema** only for standalone metrics-baseline payload changes - bump **engineering memory schema** for SQLite DDL / governed record-shape changes - (`codeclone/memory/schema_migrate.py`) — e.g. **`1.4`** adds - `memory_trajectory_patch_trails` + (`codeclone/memory/schema_migrate.py`) — **`1.4`** added Patch Trail + persistence, **`1.5`** quality scoring, and **`1.6`** Experience tables - bump **patch trail schema** (`PATCH_TRAIL_SCHEMA_VERSION`) when finish-time Patch Trail JSON shape changes incompatibly - bump **trajectory export schema** (`TRAJECTORY_EXPORT_SCHEMA_VERSION`) when JSONL row shape changes incompatibly +- bump **trajectory projection**, **quality score**, or **Experience + distillation** versions when their derived identity/formula changes; rebuild + derived rows rather than migrating source evidence - bump **semantic index format** when LanceDB projection or stored row fields change incompatibly — forces index rebuild, not SQLite migration (see [13-engineering-memory/index.md](13-engineering-memory/index.md)) +- bump **Platform Observability schema** only for incompatible telemetry-store + changes; it remains separate from reports, gates, baselines, and memory facts Operational compatibility rules: diff --git a/docs/book/25-mcp-interface/determinism-and-tests.md b/docs/book/25-mcp-interface/determinism-and-tests.md index 9769c01d..d7902ddd 100644 --- a/docs/book/25-mcp-interface/determinism-and-tests.md +++ b/docs/book/25-mcp-interface/determinism-and-tests.md @@ -5,7 +5,7 @@ | Default transport | Local `stdio` | | Remote exposure | Explicit `--allow-remote` required for non-loopback | | Lazy loading | Base installs and CI do not require MCP packages | -| Read-only | Never mutates source, baseline, cache, or canonical report artifacts; may write ephemeral workspace intent registry (`.cache/codeclone/intents/`), optional audit DB, Engineering Memory **draft** rows, and projection job metadata under `.codeclone/` when enabled | +| Read-only | Never mutates source, baseline, cache, or canonical report artifacts; may write the ephemeral workspace intent registry under `.codeclone/`, optional audit/observability DBs, Engineering Memory **draft** rows, and projection job metadata when enabled | --- diff --git a/docs/book/25-mcp-interface/index.md b/docs/book/25-mcp-interface/index.md index 684e9369..aa57a6e6 100644 --- a/docs/book/25-mcp-interface/index.md +++ b/docs/book/25-mcp-interface/index.md @@ -104,13 +104,11 @@ Current server characteristics: ## Tools -## Tools - -Current tool set: **31 tools** for agent clients, organized by workflow phase. +Current tool set: **32 tools** for agent clients, organized by workflow phase. When the MCP server starts with `--ide-governance-channel` (CodeClone VS Code extension), two additional read-only tools register: -`get_workspace_session_stats` and `get_controller_audit_trail` (**33 tools** +`get_workspace_session_stats` and `get_controller_audit_trail` (**34 tools** total). They are not listed in generic agent tool catalogs; payloads mirror CLI `--session-stats` and `--audit` via `codeclone/controller_insights/`. diff --git a/docs/book/25-mcp-interface/tools/atomic-change-control.md b/docs/book/25-mcp-interface/tools/atomic-change-control.md index c77587f2..8e0f0a3b 100644 --- a/docs/book/25-mcp-interface/tools/atomic-change-control.md +++ b/docs/book/25-mcp-interface/tools/atomic-change-control.md @@ -5,8 +5,8 @@ | `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `on_conflict`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, promote, list_workspace, gc_workspace, recover, reset_workspace. Use for queue/promote/recover operations alongside workflow tools | | `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: full transitive graph, custom include filters | | `get_relevant_memory` | `root`, `scope`, `intent_id`, `symbols`, `max_records`, `include_stale`, `include_drafts`, `detail_level` | Ranked engineering memory for declared edit scope. Compact by default: bounded record/trajectory subjects plus typed `records`, `experiences`, `trajectories`, and `coverage` lanes. Auto-bootstraps store when `mcp_sync_policy=bootstrap_if_missing` (default). See [Engineering Memory](../../13-engineering-memory/index.md) | -| `query_engineering_memory` | `root`, `mode`, …, optional `semantic` (search only), `detail_level` | Mode router: search, get, for_path, for_symbol, stale, drafts, coverage, status, trajectory_status, trajectory_search, trajectory_get. List/search modes default compact; `get`, `trajectory_get`, or `detail_level=full` are explicit drill-down. `filters` supports `types`, `statuses`, `confidences`, and `match_mode` (`any`\|`all`) for search. `semantic=true` blends LanceDB proximity when `[tool.codeclone.memory.semantic] enabled` and index built (default off). See [Engineering Memory](../../13-engineering-memory/index.md) | -| `manage_engineering_memory` | `root`, `action`, … | Agent-side: `refresh_from_run`, `rebuild_semantic_index`, `record_candidate`, `validate_claims`, `propose_from_receipt`, `rebuild_trajectories`, `enqueue_projection_rebuild`, `projection_rebuild_status`, `run_projection_jobs_once`. Human approve/reject/archive: VS Code Memory view **or** `codeclone memory approve` (not MCP agents). See [Engineering Memory](../../13-engineering-memory/index.md) | +| `query_engineering_memory` | `root`, `mode`, …, optional `semantic` (search only), `detail_level` | Mode router: search, get, for_path, for_symbol, stale, drafts, coverage, status, trajectory_status, trajectory_search, trajectory_get, trajectory_anomalies, trajectory_agents, trajectory_dashboard. List/search modes default compact; `get`, `trajectory_get`, or `detail_level=full` are explicit drill-down. `filters` supports `types`, `statuses`, `confidences`, and `match_mode` (`any`\|`all`) for search. `semantic=true` blends LanceDB proximity when `[tool.codeclone.memory.semantic] enabled` and index built (default off). See [Engineering Memory](../../13-engineering-memory/index.md) | +| `manage_engineering_memory` | `root`, `action`, … | Agent-side: `refresh_from_run`, `record_candidate`, `promote_experience`, `validate_claims`, `propose_from_receipt`, `rebuild_semantic_index`, `rebuild_trajectories`, `enqueue_projection_rebuild`, `projection_rebuild_status`, `run_projection_jobs_once`. `promote_experience` creates a human-reviewable draft; human approve/reject/archive remains VS Code/CLI only. See [Engineering Memory](../../13-engineering-memory/index.md) | | `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Manual budget query or step-by-step verification | | `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Manual receipt generation | | `validate_review_claims` | `text`, `run_id`, `require_citations`, `patch_health_delta` | Standalone citation-based overclaim detection; pass `patch_health_delta` from verify when using the atomic workflow | diff --git a/docs/book/25-mcp-interface/tools/session-and-memory.md b/docs/book/25-mcp-interface/tools/session-and-memory.md index 3a0b6fb7..ecdc9c83 100644 --- a/docs/book/25-mcp-interface/tools/session-and-memory.md +++ b/docs/book/25-mcp-interface/tools/session-and-memory.md @@ -5,3 +5,14 @@ | `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Session-local review marker (in-memory) | | `list_reviewed_findings` | `run_id` | List reviewed markers for a run | | `clear_session_runs` | — | Reset in-memory runs, session review markers, and workspace intent registry state for the MCP process | + +### Platform observability + +| Tool | Key parameters | Purpose | +|----------------------------------|-------------------------------------------------|-------------------------------------------------------------------------| +| `query_platform_observability` | `root`, `section`, `window`, `detail_level`, `limit` | Bounded, read-only slices of CodeClone's own runtime telemetry | + +This tool is **development-only**. It reports numeric operation/span, +database-cost, payload, agent-context, and pipeline diagnostics for CodeClone +itself. It never contributes repository findings, gates, baselines, memory +facts, or edit authorization, and it does not expose raw SQL or payload bodies. diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index fffdf3bf..f4e47da9 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -805,7 +805,7 @@ DESIGN FINDINGS INTEGRITY ``` -## Engineering Memory schema (`1.4`) +## Engineering Memory schema (`1.6`) SQLite database at `.codeclone/memory/engineering_memory.sqlite3` (default). Schema version stored in `memory_meta.schema_version`. @@ -820,19 +820,28 @@ Core tables: | `memory_fts` | FTS5 search index (schema 1.1+) | | `memory_revisions` | Governance audit trail | | `memory_ingestion_runs` | Init/refresh run metadata | -| `memory_projection_jobs` | Coalesced trajectory/semantic rebuild jobs (schema 1.3+) | +| `memory_projection_jobs` | Coalesced trajectory/Experience/semantic jobs (schema 1.3+) | -Trajectory tables (schema **`1.2`**+ trajectory DDL, active projection **`trajectory-v2`**): +Trajectory tables (schema **`1.2`**+ trajectory DDL, active projection +**`trajectory-v3`**): | Table | Role | |-------------------------------------|-----------------------------------------------------------------------| -| `memory_trajectories` | One row per `(project_id, workflow_id, projection_version)` | +| `memory_trajectories` | One row per `(project_id, workflow_id, projection_version)` with quality score | | `memory_trajectory_steps` | Ordered audit steps with frozen `event_core_json` | | `memory_trajectory_subjects` | Path/module subjects linked to a trajectory | | `memory_trajectory_evidence` | Report/run/audit evidence refs | | `memory_trajectory_patch_trails` | Patch Trail JSON + digest per trajectory (schema **`1.4`**, Phase 26) | | `memory_trajectory_projection_runs` | Rebuild run manifest | +Experience tables (schema **`1.6`**, derived from trajectory evidence): + +| Table | Role | +|------------------------------|------------------------------------------------------------| +| `memory_experiences` | Advisory distilled patterns (`experience-v1`) | +| `memory_experience_facets` | Agent/profile/intent diversity facets | +| `memory_experience_evidence` | Contributing trajectory ids and outcomes | + Patch Trail JSON uses `PATCH_TRAIL_SCHEMA_VERSION` (currently **`1`**) in `codeclone/contracts/__init__.py`. Trajectory JSONL export rows use `TRAJECTORY_EXPORT_SCHEMA_VERSION` (**`2`**) in diff --git a/docs/book/integrations/claude-desktop-bundle.md b/docs/book/integrations/claude-desktop-bundle.md index 3c2c500e..f1285673 100644 --- a/docs/book/integrations/claude-desktop-bundle.md +++ b/docs/book/integrations/claude-desktop-bundle.md @@ -32,7 +32,7 @@ The wrapper prefers a workspace-local `.venv`, then a Poetry environment, then user-local install paths, then `PATH`. The bundle does **not** pass `--ide-governance-channel`. Agents see the standard -**31** MCP tools. VS Code session stats, audit trail webviews, and IDE Memory +**32** default MCP tools. VS Code session stats, audit trail webviews, and IDE Memory governance (`prepare_governance` / `commit_governance`) require the VS Code extension launcher. diff --git a/docs/book/integrations/codex-plugin.md b/docs/book/integrations/codex-plugin.md index 9f7a9de5..74219ddc 100644 --- a/docs/book/integrations/codex-plugin.md +++ b/docs/book/integrations/codex-plugin.md @@ -22,9 +22,9 @@ skills. New canonical MCP surfaces from the local `codeclone-mcp` version flow through directly, including Coverage Join facts and the optional `coverage` help topic when supported. The plugin does not mutate `~/.codex/config.toml` or install a second server binary. The bundled launcher does not filter MCP tools; -agents receive the **31-tool** agent surface from the resolved `codeclone-mcp` -server (no `--ide-governance-channel` — IDE-only session/audit tools are VS Code -only). +agents receive the full default agent surface from the resolved +`codeclone-mcp` server (no `--ide-governance-channel` — IDE-only session/audit +tools are VS Code only). `.agents/plugins/marketplace.json` is the monorepo-local source entry used for development and packaging into `orenlab/codeclone-codex`; it is not the public diff --git a/docs/getting-started.md b/docs/getting-started.md index a9d1a502..70b7ac95 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -153,8 +153,8 @@ See [Exit codes](book/09-exit-codes.md). ## MCP Setup -The MCP server exposes **31 tools** for agent clients over the same canonical -pipeline (33 when VS Code starts the server with `--ide-governance-channel` for +The MCP server exposes **32 tools** for agent clients over the same canonical +pipeline (34 when VS Code starts the server with `--ide-governance-channel` for session stats and audit insights). ### Start the server diff --git a/docs/guide/integrations/claude-desktop/setup.md b/docs/guide/integrations/claude-desktop/setup.md index 207e501d..04dd61fe 100644 --- a/docs/guide/integrations/claude-desktop/setup.md +++ b/docs/guide/integrations/claude-desktop/setup.md @@ -60,7 +60,8 @@ to an absolute path to `codeclone-mcp`. The MCP server never mutates repository source, baselines, analysis cache, or canonical reports. It may write ephemeral coordination state under -`.cache/codeclone/intents/`, optional audit records when enabled, and Engineering +`.codeclone/intents/` (file backend) or `.codeclone/db/intents.sqlite3` +(SQLite backend), optional audit records when enabled, and Engineering Memory **draft** rows through agent tools. Human approve/reject stays in VS Code Memory or `codeclone memory approve`. diff --git a/docs/guide/integrations/cursor/install-and-skills.md b/docs/guide/integrations/cursor/install-and-skills.md index 06196968..e5d2b927 100644 --- a/docs/guide/integrations/cursor/install-and-skills.md +++ b/docs/guide/integrations/cursor/install-and-skills.md @@ -150,7 +150,8 @@ does not treat report-only signals as CI failures or vulnerability claims. ## Runtime model Additive: local MCP via `launch_mcp.py`, six skills, three rules (two -`alwaysApply` + one Python glob), optional hooks. **31** MCP tools for agents — launcher does **not** +`alwaysApply` + one Python glob), optional hooks. The full default agent MCP +surface is passed through — the launcher does **not** pass `--ide-governance-channel` (VS Code adds +2 IDE-only tools and Memory governance). New server tools from upgraded `codeclone-mcp` pass through unfiltered. diff --git a/docs/guide/mcp/architecture.md b/docs/guide/mcp/architecture.md index 36dd3cf7..ce0deb55 100644 --- a/docs/guide/mcp/architecture.md +++ b/docs/guide/mcp/architecture.md @@ -36,6 +36,7 @@ graph TD WIR["Workspace Intent Registry
    .codeclone/intents/ or intents.sqlite3"] MEM["Engineering Memory SQLite
    .codeclone/memory/"] AUD["Audit trail (optional)
    .codeclone/db/"] + OBS["Platform Observability (dev-only)
    .codeclone/db/"] end MCPSession -->|" coordination + drafts "| Disk @@ -52,7 +53,9 @@ graph TD **Read-only contract (analysis truth):** MCP never mutates source files, baselines, analysis cache, or canonical report artifacts. It **may** write ephemeral workspace intent records, Engineering Memory **drafts** (human approve -required for promotion), and optional audit evidence when enabled. +required for promotion), optional audit evidence, and opt-in development +telemetry when enabled. Platform Observability remains separate from repository +findings, reports, gates, baselines, and memory facts. ## Mixin chain @@ -62,7 +65,7 @@ sit outermost. ```mermaid graph BT - STM["_MCPSessionStateMixin
    runs, markers, gates; embeds finding/report stack"] + STM["_MCPSessionStateMixin
    runs, markers, gates, observability query"] INS["_MCPSessionInsightsMixin
    session stats, audit queries"] BR["_MCPSessionBlastRadiusMixin"] MM["_MCPSessionMemoryMixin"] diff --git a/extensions/claude-desktop-codeclone/README.md b/extensions/claude-desktop-codeclone/README.md index 01142077..130466bf 100644 --- a/extensions/claude-desktop-codeclone/README.md +++ b/extensions/claude-desktop-codeclone/README.md @@ -3,7 +3,8 @@ **Structural Change Controller for AI-assisted Python development** — local MCP bundle wrapper for `codeclone-mcp`. Installs as a `.mcpb` package instead of manual JSON editing. -Same canonical 31-tool MCP surface used by CLI, VS Code, Codex, and Claude Code. +Same canonical default agent MCP surface used by CLI, VS Code, Codex, and +Claude Code. Repository read-only (source, baselines, cache, canonical reports); local stdio only. The bundle proxies the full MCP server, including change-control and session tools — ephemeral coordination under `.codeclone/intents/` and diff --git a/extensions/claude-desktop-codeclone/manifest.json b/extensions/claude-desktop-codeclone/manifest.json index 98364722..f57a9327 100644 --- a/extensions/claude-desktop-codeclone/manifest.json +++ b/extensions/claude-desktop-codeclone/manifest.json @@ -4,7 +4,7 @@ "display_name": "CodeClone", "version": "2.1.0", "description": "Structural Change Controller for AI-assisted Python development — local MCP bundle for Claude Desktop.", - "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. Structural change controller for AI-assisted Python development — same canonical 31-tool MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin. Repository read-only (source, baselines, cache, reports), baseline-aware, local stdio only. Includes intent declaration, blast radius, patch contract, review receipt, engineering memory, and claim validation via validate_review_claims and finish_controlled_change. Ephemeral coordination may be stored under .codeclone/intents/ (file backend) or .codeclone/db/ (SQLite backend).", + "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. Structural change controller for AI-assisted Python development — same canonical default agent MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin. Repository read-only (source, baselines, cache, reports), baseline-aware, local stdio only. Includes intent declaration, blast radius, patch contract, review receipt, engineering memory, platform observability, and claim validation via validate_review_claims and finish_controlled_change. Ephemeral coordination may be stored under .codeclone/intents/ (file backend) or .codeclone/db/ (SQLite backend).", "author": { "name": "Den Rozhnovskiy", "email": "pytelemonbot@mail.ru", @@ -54,19 +54,23 @@ }, { "name": "get_relevant_memory", - "description": "Ranked, evidence-linked engineering memory for the declared edit scope. Read-only." + "description": "Ranked, evidence-linked records, experiences, and trajectories for the declared edit scope. Read-only." }, { "name": "query_engineering_memory", - "description": "Mode-based engineering memory inspection: search, get, for_path, for_symbol, stale, drafts, coverage, status. Read-only." + "description": "Engineering Memory router: record search/status, trajectory detail/search/status, anomalies, agents, and dashboard views. Read-only." }, { "name": "manage_engineering_memory", - "description": "Engineering memory governance router: refresh_from_run, rebuild_semantic_index, record_candidate (draft), validate_claims, propose_from_receipt. Human approve/reject use the VS Code Memory view." + "description": "Engineering Memory actions: refresh, draft recording, experience promotion, claim validation, semantic/trajectory rebuilds, and projection jobs. Human approve/reject/archive require the VS Code Memory view." + }, + { + "name": "query_platform_observability", + "description": "Read-only, development-only diagnostics over CodeClone runtime telemetry; never a repository quality signal." }, { "name": "manage_change_intent", - "description": "Change intent lifecycle: declare scope, get status, check diff, clear intent." + "description": "Change intent lifecycle: list, declare, check, renew, queue/promote, recover, clear, and workspace cleanup." }, { "name": "start_controlled_change", diff --git a/plugins/codeclone/.codex-plugin/plugin.json b/plugins/codeclone/.codex-plugin/plugin.json index 631dcaf8..51a738c7 100644 --- a/plugins/codeclone/.codex-plugin/plugin.json +++ b/plugins/codeclone/.codex-plugin/plugin.json @@ -25,7 +25,7 @@ "interface": { "displayName": "CodeClone", "shortDescription": "Structural Change Controller for AI-assisted Python development.", - "longDescription": "CodeClone for Codex — structural change controller for AI-assisted Python development. Clone detection, quality metrics, baseline-aware governance, engineering memory, and intent-first change control over the canonical codeclone-mcp server. Repository read-only (source, baselines, cache, reports); full 31-tool MCP passthrough. Ships review, hotspot, change-control, and engineering-memory skills for conservative-first structural review and scoped repository edits.", + "longDescription": "CodeClone for Codex — structural change controller for AI-assisted Python development. Clone detection, quality metrics, baseline-aware governance, engineering memory, and intent-first change control over the canonical codeclone-mcp server. Repository read-only (source, baselines, cache, reports); full default agent MCP passthrough. Ships review, hotspot, change-control, and engineering-memory skills for conservative-first structural review and scoped repository edits.", "developerName": "OrenLab", "category": "Developer Tools", "capabilities": [ diff --git a/plugins/codeclone/README.md b/plugins/codeclone/README.md index b720434c..2ec8569c 100644 --- a/plugins/codeclone/README.md +++ b/plugins/codeclone/README.md @@ -5,8 +5,9 @@ development** — over `codeclone-mcp`. Same canonical MCP surface used by CLI, VS Code, Claude Desktop, and Claude Code. Repository read-only (source, baselines, cache, canonical reports); local stdio -only. The bundled launcher exposes the full 31-tool MCP server, including -change-control and session tools — ephemeral coordination under +only. The bundled launcher exposes the full default agent MCP surface, including +change-control, Engineering Memory, Platform Observability, and session tools — +ephemeral coordination under `.codeclone/intents/` and optional audit records when enabled. Current-run metric surfaces from the local `codeclone-mcp` version flow through directly, including `Coverage Join` facts and the optional `coverage` help topic. diff --git a/plugins/codeclone/skills/codeclone-engineering-memory/SKILL.md b/plugins/codeclone/skills/codeclone-engineering-memory/SKILL.md index 53551f1c..c9f26439 100644 --- a/plugins/codeclone/skills/codeclone-engineering-memory/SKILL.md +++ b/plugins/codeclone/skills/codeclone-engineering-memory/SKILL.md @@ -37,6 +37,7 @@ Do not invent memory from local files or report dumps. | Store health | `query_engineering_memory` | `mode=status` | | Stale inventory | `query_engineering_memory` | `mode=stale` | | Trajectory forensics | `query_engineering_memory` | `mode=trajectory_get\|trajectory_search\|trajectory_status` | +| Trajectory analytics | `query_engineering_memory` | `mode=trajectory_anomalies\|trajectory_agents\|trajectory_dashboard` | Defaults exclude **stale**. Keyword `search` excludes drafts unless `include_drafts=true`; scoped `get_relevant_memory` and `for_path` / @@ -74,7 +75,8 @@ semantic-quality embeddings — do not present hits as LLM recall. | Refresh system facts from run | `manage_engineering_memory(action=refresh_from_run, run_id?)` | Force ingest | | Rebuild semantic LanceDB sidecar | `manage_engineering_memory(action=rebuild_semantic_index)` | After semantic enabled + extras | | Rebuild trajectories | `manage_engineering_memory(action=rebuild_trajectories)` | After audit-enabled workflows | -| Projection jobs | `enqueue_projection_rebuild` / `projection_rebuild_status` / `run_projection_jobs_once` | When policy enabled | +| Promote an Experience | `manage_engineering_memory(action=promote_experience, experience_id=…)` | Creates a human-reviewable draft | +| Projection jobs | `manage_engineering_memory(action=enqueue_projection_rebuild)` / `action=projection_rebuild_status` / `action=run_projection_jobs_once` | When policy enabled | | Atomic fallback | `manage_engineering_memory(action=propose_from_receipt, text=…, intent_id?)` | When finish hook unavailable | ### Write rules diff --git a/plugins/cursor-codeclone/CHANGELOG.md b/plugins/cursor-codeclone/CHANGELOG.md index 49bb0ed7..265c8c7d 100644 --- a/plugins/cursor-codeclone/CHANGELOG.md +++ b/plugins/cursor-codeclone/CHANGELOG.md @@ -13,7 +13,7 @@ - **Three hooks** via `hooks/run_hook.py`: fail-closed `preToolUse` intent gate (`codeclone.workspace_intent`), `postToolUse` Python edit reminder (`additional_context`), `stop` unclosed-intent advisory (`followup_message`) -- **MCP:** `mcp.json` runs `python3 ./scripts/launch_mcp.py` (full 31-tool - passthrough; no `--ide-governance-channel`) +- **MCP:** `mcp.json` runs `python3 ./scripts/launch_mcp.py` (full default + agent-tool passthrough; no `--ide-governance-channel`) - **Installer:** `scripts/install-project-hooks.py` → `.cursor/hooks.json` and `.cursor/codeclone-hooks.json` (`enforce_scope` `python` | `repo`) diff --git a/plugins/cursor-codeclone/README.md b/plugins/cursor-codeclone/README.md index fe194cad..d06080fe 100644 --- a/plugins/cursor-codeclone/README.md +++ b/plugins/cursor-codeclone/README.md @@ -118,7 +118,7 @@ Reload Cursor or reopen the workspace after installing. Project hooks require a The plugin bundles a stdio-based `codeclone-mcp` server configuration via `python3 ./scripts/launch_mcp.py` (workspace `.venv` → Poetry env → `PATH`). -The server exposes all **31** MCP tools for agents (full passthrough; no +The server exposes the full default agent MCP surface (no `--ide-governance-channel`). Skills and rules steer agents toward the documented workflow; the plugin does not filter tools at the transport layer. IDE-only `get_workspace_session_stats` / `get_controller_audit_trail` require the VS Code @@ -148,8 +148,9 @@ ln -s /path/to/codeclone/plugins/cursor-codeclone ~/.cursor/plugins/local/codecl - **No second truth model** — health, findings, and drift come exclusively from `codeclone-mcp` and canonical report semantics. - **Repository read-only** — the plugin never edits source files, baselines, - caches, or report artifacts. Agents reach the full MCP server (31 tools), - including change-control and session tools, via the bundled stdio launcher. + caches, or report artifacts. Agents reach the full default MCP server, + including change-control, Engineering Memory, Platform Observability, and + session tools, via the bundled stdio launcher. - **Intent-first edits** — the change control skill enforces the full declare / blast-radius / edit / verify / clear cycle. - **Deterministic, not opinionated** — the agent reports what CodeClone finds, diff --git a/plugins/cursor-codeclone/skills/codeclone-engineering-memory/SKILL.md b/plugins/cursor-codeclone/skills/codeclone-engineering-memory/SKILL.md index 53551f1c..c9f26439 100644 --- a/plugins/cursor-codeclone/skills/codeclone-engineering-memory/SKILL.md +++ b/plugins/cursor-codeclone/skills/codeclone-engineering-memory/SKILL.md @@ -37,6 +37,7 @@ Do not invent memory from local files or report dumps. | Store health | `query_engineering_memory` | `mode=status` | | Stale inventory | `query_engineering_memory` | `mode=stale` | | Trajectory forensics | `query_engineering_memory` | `mode=trajectory_get\|trajectory_search\|trajectory_status` | +| Trajectory analytics | `query_engineering_memory` | `mode=trajectory_anomalies\|trajectory_agents\|trajectory_dashboard` | Defaults exclude **stale**. Keyword `search` excludes drafts unless `include_drafts=true`; scoped `get_relevant_memory` and `for_path` / @@ -74,7 +75,8 @@ semantic-quality embeddings — do not present hits as LLM recall. | Refresh system facts from run | `manage_engineering_memory(action=refresh_from_run, run_id?)` | Force ingest | | Rebuild semantic LanceDB sidecar | `manage_engineering_memory(action=rebuild_semantic_index)` | After semantic enabled + extras | | Rebuild trajectories | `manage_engineering_memory(action=rebuild_trajectories)` | After audit-enabled workflows | -| Projection jobs | `enqueue_projection_rebuild` / `projection_rebuild_status` / `run_projection_jobs_once` | When policy enabled | +| Promote an Experience | `manage_engineering_memory(action=promote_experience, experience_id=…)` | Creates a human-reviewable draft | +| Projection jobs | `manage_engineering_memory(action=enqueue_projection_rebuild)` / `action=projection_rebuild_status` / `action=run_projection_jobs_once` | When policy enabled | | Atomic fallback | `manage_engineering_memory(action=propose_from_receipt, text=…, intent_id?)` | When finish hook unavailable | ### Write rules From 29d07e9fa81d93d3c1850e40d3b1fa23c4b363c3 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 20:04:09 +0500 Subject: [PATCH 259/318] feat(core): per-source vector top-K budgets in semantic retrieval --- codeclone/memory/retrieval/service.py | 16 ++++--- codeclone/memory/semantic/__init__.py | 16 +++++-- codeclone/memory/semantic/lancedb_backend.py | 9 +++- tests/test_cli_memory_semantic.py | 11 ++++- tests/test_mcp_memory_semantic.py | 6 ++- tests/test_memory_coverage_gaps.py | 9 +++- ...test_memory_retrieval_semantic_coverage.py | 11 ++++- tests/test_semantic_rebuild.py | 11 ++++- tests/test_semantic_rebuild_incremental.py | 4 +- tests/test_semantic_retrieval.py | 11 ++++- tests/test_semantic_search_service.py | 42 ++++++++++++++++++- 11 files changed, 117 insertions(+), 29 deletions(-) diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index 2058835b..a29dfb31 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -1281,17 +1281,15 @@ def _semantic_hits( query: str, k: int, ) -> tuple[dict[str, float], list[SemanticHit], list[SemanticHit]]: + # Search each lane with its own top-k budget so a dense source (e.g. audit) + # cannot crowd memory hits out of one shared top-k (#3). The index applies + # the source filter, so results arrive already lane-scoped. vector = embed_query(provider, query) proximity: dict[str, float] = {} - audit_hits: list[SemanticHit] = [] - trajectory_hits: list[SemanticHit] = [] - for hit in index.search(vector, k=k): - if hit.source == "memory": - proximity.setdefault(hit.source_id, hit.score) - elif hit.source == "audit": - audit_hits.append(hit) - elif hit.source == "trajectory": - trajectory_hits.append(hit) + for hit in index.search(vector, k=k, source="memory"): + proximity.setdefault(hit.source_id, hit.score) + audit_hits = list(index.search(vector, k=k, source="audit")) + trajectory_hits = list(index.search(vector, k=k, source="trajectory")) return proximity, audit_hits, trajectory_hits diff --git a/codeclone/memory/semantic/__init__.py b/codeclone/memory/semantic/__init__.py index 02d9b6dc..6cdc707a 100644 --- a/codeclone/memory/semantic/__init__.py +++ b/codeclone/memory/semantic/__init__.py @@ -54,7 +54,13 @@ class SemanticIndex(Protocol): module level. """ - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: ... + def search( + self, vector: Sequence[float], *, k: int, source: SemanticSource | None = None + ) -> list[SemanticHit]: + """Top-k nearest hits. ``source`` restricts the search to one lane + (memory/audit/trajectory) so each lane gets its own budget; None + searches every lane in a single shared top-k.""" + ... def status(self) -> SemanticIndexStatus: ... @@ -80,7 +86,9 @@ def row_fingerprints(self, ids: Sequence[str]) -> dict[str, SemanticRowFingerpri class NullSemanticIndex: """Disabled index: every read is empty.""" - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: + def search( + self, vector: Sequence[float], *, k: int, source: SemanticSource | None = None + ) -> list[SemanticHit]: return [] def status(self) -> SemanticIndexStatus: @@ -97,7 +105,9 @@ class UnavailableSemanticIndex: def __init__(self, *, reason: str) -> None: self._reason = reason - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: + def search( + self, vector: Sequence[float], *, k: int, source: SemanticSource | None = None + ) -> list[SemanticHit]: return [] def status(self) -> SemanticIndexStatus: diff --git a/codeclone/memory/semantic/lancedb_backend.py b/codeclone/memory/semantic/lancedb_backend.py index 0429a7b9..31316a98 100644 --- a/codeclone/memory/semantic/lancedb_backend.py +++ b/codeclone/memory/semantic/lancedb_backend.py @@ -185,10 +185,15 @@ def _schema_matches(self, table: _LanceTable) -> bool: return False return getattr(vector_type, "list_size", None) == self._dimension - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: + def search( + self, vector: Sequence[float], *, k: int, source: SemanticSource | None = None + ) -> list[SemanticHit]: if self._table is None: return [] - rows = self._table.search(list(vector)).limit(k).to_list() + query = self._table.search(list(vector)) + if source is not None: + query = query.where(f"source = {_sql_quote(source)}") + rows = query.limit(k).to_list() hits: list[SemanticHit] = [] for row in rows: distance = row.get("_distance", 0) diff --git a/tests/test_cli_memory_semantic.py b/tests/test_cli_memory_semantic.py index 1687f0cc..a8846aba 100644 --- a/tests/test_cli_memory_semantic.py +++ b/tests/test_cli_memory_semantic.py @@ -34,10 +34,17 @@ class _FakeSemanticIndex: def __init__(self) -> None: self.rows: list[SemanticRow] = [] - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + rows = ( + self.rows + if source is None + else [row for row in self.rows if row.source == source] + ) return [ SemanticHit(source_id=row.id, source=row.source, score=1.0 - index * 0.01) - for index, row in enumerate(self.rows[:k]) + for index, row in enumerate(rows[:k]) ] def status(self) -> SemanticIndexStatus: diff --git a/tests/test_mcp_memory_semantic.py b/tests/test_mcp_memory_semantic.py index 329a10e7..a5bb75b8 100644 --- a/tests/test_mcp_memory_semantic.py +++ b/tests/test_mcp_memory_semantic.py @@ -87,8 +87,10 @@ def test_mcp_query_semantic_closes_read_index( class _ClosableIndex: closed = False - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: - del vector, k + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + del vector, k, source return [] def status(self) -> SemanticIndexStatus: diff --git a/tests/test_memory_coverage_gaps.py b/tests/test_memory_coverage_gaps.py index f1c773eb..5a0a4baa 100644 --- a/tests/test_memory_coverage_gaps.py +++ b/tests/test_memory_coverage_gaps.py @@ -723,12 +723,17 @@ def test_retrieval_service_semantic_helpers_and_scope_family() -> None: from codeclone.memory.semantic.models import SemanticHit, SemanticIndexStatus class _Index: - def search(self, vector: object, *, k: int) -> list[SemanticHit]: - return [ + def search( + self, vector: object, *, k: int, source: str | None = None + ) -> list[SemanticHit]: + hits = [ SemanticHit(source_id="mem-1", source="memory", score=0.9), SemanticHit(source_id="evt-1", source="audit", score=0.8), SemanticHit(source_id="traj-1", source="trajectory", score=0.7), ] + if source is not None: + hits = [hit for hit in hits if hit.source == source] + return hits[:k] def status(self) -> SemanticIndexStatus: return SemanticIndexStatus(available=True, indexed_count=3) diff --git a/tests/test_memory_retrieval_semantic_coverage.py b/tests/test_memory_retrieval_semantic_coverage.py index 6d743be2..52de7913 100644 --- a/tests/test_memory_retrieval_semantic_coverage.py +++ b/tests/test_memory_retrieval_semantic_coverage.py @@ -25,8 +25,15 @@ class _FakeIndex: def __init__(self, hits: list[SemanticHit]) -> None: self._hits = hits - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: - return self._hits[:k] + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + hits = ( + self._hits + if source is None + else [hit for hit in self._hits if hit.source == source] + ) + return hits[:k] def status(self) -> SemanticIndexStatus: return SemanticIndexStatus(available=True, indexed_count=len(self._hits)) diff --git a/tests/test_semantic_rebuild.py b/tests/test_semantic_rebuild.py index 637e1cef..f222e119 100644 --- a/tests/test_semantic_rebuild.py +++ b/tests/test_semantic_rebuild.py @@ -23,10 +23,17 @@ class _FakeWriter: def __init__(self) -> None: self.rows: list[SemanticRow] = [] - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + rows = ( + self.rows + if source is None + else [row for row in self.rows if row.source == source] + ) return [ SemanticHit(source_id=row.id, source=row.source, score=0.0) - for row in self.rows[:k] + for row in rows[:k] ] def status(self) -> SemanticIndexStatus: diff --git a/tests/test_semantic_rebuild_incremental.py b/tests/test_semantic_rebuild_incremental.py index 5b49a652..3b39458d 100644 --- a/tests/test_semantic_rebuild_incremental.py +++ b/tests/test_semantic_rebuild_incremental.py @@ -27,7 +27,9 @@ class _InMemoryWriter: def __init__(self) -> None: self.rows: dict[str, SemanticRow] = {} - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: return [] def status(self) -> SemanticIndexStatus: diff --git a/tests/test_semantic_retrieval.py b/tests/test_semantic_retrieval.py index 630accfb..95232a2d 100644 --- a/tests/test_semantic_retrieval.py +++ b/tests/test_semantic_retrieval.py @@ -27,8 +27,15 @@ class _FakeIndex: def __init__(self, hits: list[SemanticHit]) -> None: self._hits = hits - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: - return self._hits[:k] + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + hits = ( + self._hits + if source is None + else [hit for hit in self._hits if hit.source == source] + ) + return hits[:k] def status(self) -> SemanticIndexStatus: return SemanticIndexStatus(available=True, indexed_count=len(self._hits)) diff --git a/tests/test_semantic_search_service.py b/tests/test_semantic_search_service.py index 78e425c9..53415a3b 100644 --- a/tests/test_semantic_search_service.py +++ b/tests/test_semantic_search_service.py @@ -39,8 +39,15 @@ def __init__( self._available = available self._reason = reason - def search(self, vector: Sequence[float], *, k: int) -> list[SemanticHit]: - return self._hits[:k] + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + hits = ( + self._hits + if source is None + else [hit for hit in self._hits if hit.source == source] + ) + return hits[:k] def status(self) -> SemanticIndexStatus: return SemanticIndexStatus( @@ -157,6 +164,37 @@ def test_semantic_only_record_respects_type_filter(tmp_path: Path) -> None: assert filtered.id not in ids +def test_semantic_hits_searches_each_source_with_its_own_budget() -> None: + from codeclone.memory.retrieval import service as retrieval_service + + captured: list[tuple[str | None, int]] = [] + + class _RecordingIndex: + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + captured.append((source, k)) + return [] + + def status(self) -> SemanticIndexStatus: + return SemanticIndexStatus(available=True) + + proximity, audit_hits, trajectory_hits = retrieval_service._semantic_hits( + index=_RecordingIndex(), + provider=_FakeProvider(), + query="alpha", + k=7, + ) + + # Each lane is searched independently with the full k budget, so a dense + # source (e.g. audit) cannot crowd another lane out of one shared top-k. + assert len(captured) == 3 + assert dict(captured) == {"memory": 7, "audit": 7, "trajectory": 7} + assert proximity == {} + assert audit_hits == [] + assert trajectory_hits == [] + + def test_unavailable_index_falls_back_to_fts(tmp_path: Path) -> None: with memory_store(tmp_path) as (root, project, store, db_path): fts = seed_module_role( From b7378d4474236b91576b257b732f95c949a8b905 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 20:45:01 +0500 Subject: [PATCH 260/318] docs: align documentation with current platform features --- CHANGELOG.md | 11 +- README.md | 24 +++- codeclone/surfaces/cli/memory.py | 6 +- codeclone/surfaces/mcp/messages/params.py | 3 +- docs/README-pypi.md | 12 ++ docs/book/02-architecture-map.md | 2 + docs/book/10-config-and-defaults.md | 24 +++- docs/book/11-cli.md | 16 ++- .../book/13-engineering-memory/cli-surface.md | 2 +- .../13-engineering-memory/experience-layer.md | 99 +++++++++++++ docs/book/13-engineering-memory/index.md | 15 +- .../book/13-engineering-memory/mcp-surface.md | 15 +- .../13-engineering-memory/projection-jobs.md | 41 +++++- .../trajectory-and-patch-trail.md | 14 +- .../trajectory-quality-and-passport.md | 121 ++++++++++++++++ docs/book/21-security-model.md | 24 +++- docs/book/23-testing-as-spec.md | 9 ++ docs/book/24-compatibility-and-versioning.md | 2 + .../25-mcp-interface/determinism-and-tests.md | 10 ++ docs/book/25-mcp-interface/index.md | 4 + .../tools/platform-observability.md | 60 ++++++++ .../tools/session-and-memory.md | 2 + docs/book/26-platform-observability.md | 132 ++++++++++++++++++ docs/book/README.md | 2 + docs/book/appendix/b-schema-layouts.md | 25 +++- docs/book/integrations/vs-code-extension.md | 5 + docs/getting-started.md | 3 + docs/guide/README.md | 2 + docs/guide/integrations/vscode/setup.md | 7 + docs/guide/mcp/README.md | 4 +- docs/guide/mcp/architecture.md | 2 +- docs/guide/mcp/workflows/memory-recipes.md | 11 +- docs/guide/memory/overview.md | 5 +- .../memory/trajectories-and-experiences.md | 76 ++++++++++ docs/guide/observability/diagnostics.md | 89 ++++++++++++ docs/index.md | 2 + docs/plans-and-retention.md | 13 ++ docs/privacy-policy.md | 10 ++ extensions/vscode-codeclone/README.md | 10 ++ .../contract_snapshots/mcp_tool_schemas.json | 2 +- tests/test_docs_ia_contract.py | 59 ++++++++ zensical.toml | 8 ++ 42 files changed, 941 insertions(+), 42 deletions(-) create mode 100644 docs/book/13-engineering-memory/experience-layer.md create mode 100644 docs/book/13-engineering-memory/trajectory-quality-and-passport.md create mode 100644 docs/book/25-mcp-interface/tools/platform-observability.md create mode 100644 docs/book/26-platform-observability.md create mode 100644 docs/guide/memory/trajectories-and-experiences.md create mode 100644 docs/guide/observability/diagnostics.md diff --git a/CHANGELOG.md b/CHANGELOG.md index cf1b3d90..092aacfe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,7 +36,7 @@ Platform Observability, and a fully reorganized documentation site. and complexity scoring, anomaly detection, agent profiles, and dashboard views. - **Experience Layer.** Deterministic `experience-v1` advisory patterns are - distilled from verified trajectories and surfaced through a separate + distilled from canonical trajectories across all outcomes and surfaced through a separate `experiences[]` retrieval lane with evidence and agent-diversity facets. Projection jobs can distill experiences automatically, while `promote_experience` converts a selected pattern into a human-governed @@ -64,7 +64,9 @@ Platform Observability, and a fully reorganized documentation site. UX, QuickPick search, memory-for-active-file, search results webview. IDE governance channel (`--ide-governance-channel`) with session HMAC attestation. Workspace session stats and controller audit trail webviews (IDE-only MCP - tools; shared payloads in `codeclone/controller_insights/`). + tools; shared payloads in `codeclone/controller_insights/`). Trajectory + dashboard/detail views expose quality passports, anomalies, agent aggregates, + Patch Trail evidence, and a copyable dashboard brief. - **Platform Observability.** Opt-in, development-only operation/span telemetry correlates CLI, MCP, analysis, and projection-worker execution without affecting canonical reports, gates, baselines, memory facts, or edit @@ -79,11 +81,12 @@ Platform Observability, and a fully reorganized documentation site. agent definition. - **CLI controller query modes:** `--blast-radius`, `--patch-verify`, `--session-stats`, `--audit`. -- **Documentation reorganization.** Book chapters renumbered 00-25 in thematic +- **Documentation reorganization.** Book chapters organized 00-26 in thematic groups. Four integration guide+contract splits merged into single pages (VS Code, Claude Desktop, Codex, Cursor). Six-tab nav (Home, Get started, Guides, Reference, Legal & plans, Maintainers). Doc-scope ownership comments - on all 45 documentation files. + across guide and contract leaves. Dedicated chapters cover trajectory quality, + the Experience Layer, and Platform Observability with cross-linked diagrams. - **Edition-specific feature tiers** (plans-and-retention): Engineering Memory limits and retention, semantic provider editions (fastembed/api/local_model), audit trail retention, and workspace intent registry limits per Open Source / diff --git a/README.md b/README.md index b0d33edc..13ef143d 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ reviewers, CI pipelines, and pre-merge gates. - **Change control before the diff** — declare intent, inspect blast radius, bound the edit, verify the patch contract, validate review claims, leave an auditable receipt. - **Engineering Memory** — typed, evidence-linked project facts (contracts, risks, decisions, prior changes); - durable agent context, human-governed promotion, never LLM-as-truth. + durable agent context, trajectory passports, recurring Experiences, human-governed promotion, never LLM-as-truth. - **One canonical report, many surfaces** — duplication, structural drift, dead code, complexity / coupling / cohesion, health — the same deterministic facts everywhere, no second engine. - **Baseline-aware CI** — gates fail only on what got *worse*; accepted legacy debt stays separate from real @@ -208,13 +208,32 @@ and governed drafts. After `start_controlled_change`, agents read ranked scope c memory is **human-governed** — agent drafts never become truth automatically. The store auto-bootstraps from the latest MCP run (`mcp_sync_policy=bootstrap_if_missing`); `codeclone memory init` remains for CI/offline. +Audit-derived trajectories preserve workflow and Patch Trail evidence. Quality +passports, anomaly and agent dashboards, and deterministic Experiences make +recurring project patterns visible without promoting them to authority. + ```bash codeclone memory init --root . codeclone memory search "baseline schema" --match all codeclone memory approve mem-… # human-only governance ``` -[Engineering Memory docs](https://orenlab.github.io/codeclone/book/13-engineering-memory/) +[Engineering Memory docs](https://orenlab.github.io/codeclone/book/13-engineering-memory/) · +[Trajectories and Experiences](https://orenlab.github.io/codeclone/guide/memory/trajectories-and-experiences/) + +### Platform Observability + +Opt-in local diagnostics trace CodeClone's own CLI, MCP, database, analysis, +and projection-worker costs. The observer is disabled by default, stores no raw +payload bodies, and never influences findings, gates, baselines, memory facts, +or edit authorization. + +```bash +CODECLONE_OBSERVABILITY_ENABLED=1 codeclone . +codeclone observability trace --root . --html /tmp/codeclone-observer.html +``` + +[Platform Observability](https://orenlab.github.io/codeclone/book/26-platform-observability/) ### Native agent and IDE clients @@ -341,6 +360,7 @@ Full docs and contract book: [orenlab.github.io/codeclone](https://orenlab.githu [MCP guide](https://orenlab.github.io/codeclone/guide/mcp/) · [Structural Change Controller](https://orenlab.github.io/codeclone/book/12-structural-change-controller/) · [Engineering Memory](https://orenlab.github.io/codeclone/book/13-engineering-memory/) · +[Platform Observability](https://orenlab.github.io/codeclone/book/26-platform-observability/) · [CLI](https://orenlab.github.io/codeclone/book/11-cli/) · [Benchmarking](https://orenlab.github.io/codeclone/book/20-benchmarking/) diff --git a/codeclone/surfaces/cli/memory.py b/codeclone/surfaces/cli/memory.py index 0b4bacc3..2425c84d 100644 --- a/codeclone/surfaces/cli/memory.py +++ b/codeclone/surfaces/cli/memory.py @@ -264,7 +264,11 @@ def _add_root(sub: argparse.ArgumentParser) -> None: trajectory_parser = subparsers.add_parser( "trajectory", - help="Trajectory projection storage (status / rebuild / list / show).", + help=( + "Trajectory projections and analytics " + "(status / rebuild / list / search / show / agents / " + "anomalies / dashboard / export)." + ), ) trajectory_sub = trajectory_parser.add_subparsers( dest="trajectory_action", diff --git a/codeclone/surfaces/mcp/messages/params.py b/codeclone/surfaces/mcp/messages/params.py index 34a55b0f..4297ee7b 100644 --- a/codeclone/surfaces/mcp/messages/params.py +++ b/codeclone/surfaces/mcp/messages/params.py @@ -164,7 +164,8 @@ description=( "workflow, analysis_profile, suppressions, baseline, coverage, " "latest_runs, review_state, changed_scope, change_control, " - "trust_boundaries, engineering_memory, verification_profiles" + "trust_boundaries, engineering_memory, verification_profiles, " + "observability" ) ), ] diff --git a/docs/README-pypi.md b/docs/README-pypi.md index 322b8811..1430a370 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -46,7 +46,9 @@ codeclone . --ci # CI mode - **Quality metrics** — complexity, coupling, cohesion, dead code, health score - **Baseline governance** — separates legacy debt from new regressions; CI fails only on what changed - **Change controller** — intent declaration, blast radius, patch contract, review receipt for AI agents +- **Engineering Memory** — governed records, trajectory passports, and advisory Experiences - **MCP server** — 32-tool default interface for IDE and agent clients +- **Platform Observability** — opt-in local diagnostics for CodeClone's own runtime - **Reports** — HTML, JSON, Markdown, SARIF, text from one canonical payload ## MCP Server @@ -58,9 +60,19 @@ codeclone-mcp --transport stdio Native clients: VS Code extension, Claude Desktop bundle, Codex plugin. +Engineering Memory and runtime diagnostics: + +```bash +codeclone memory trajectory dashboard --root . +CODECLONE_OBSERVABILITY_ENABLED=1 codeclone . +codeclone observability trace --root . --html /tmp/codeclone-observer.html +``` + ## Links - Documentation: +- Engineering Memory: +- Platform Observability: - Source: - Issues: diff --git a/docs/book/02-architecture-map.md b/docs/book/02-architecture-map.md index 8626926a..8a8a3713 100644 --- a/docs/book/02-architecture-map.md +++ b/docs/book/02-architecture-map.md @@ -145,6 +145,8 @@ Refs: | Cache trust and fail-open behavior | [08-cache.md](08-cache.md) | | Report schema and provenance | [05-report.md](05-report.md), [06-html-render.md](06-html-render.md) | | MCP agent surface | [25-mcp-interface/index.md](25-mcp-interface/index.md), [14-claim-guard.md](14-claim-guard.md) | +| Engineering Memory evidence layers | [13-engineering-memory/index.md](13-engineering-memory/index.md), [13-engineering-memory/trajectory-quality-and-passport.md](13-engineering-memory/trajectory-quality-and-passport.md), [13-engineering-memory/experience-layer.md](13-engineering-memory/experience-layer.md) | +| Platform runtime diagnostics | [26-platform-observability.md](26-platform-observability.md) | | Health score model | [15-health-score.md](15-health-score.md) | | Metrics gates and metrics baseline | [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) | | Dead-code liveness policy | [17-dead-code-contract.md](17-dead-code-contract.md) | diff --git a/docs/book/10-config-and-defaults.md b/docs/book/10-config-and-defaults.md index 4863d540..d4976b5b 100644 --- a/docs/book/10-config-and-defaults.md +++ b/docs/book/10-config-and-defaults.md @@ -325,6 +325,7 @@ instead of duplicating tables. | Workspace intent registry | `resolve_intent_registry_config` | Documented env > `[tool.codeclone]` registry keys > defaults | | MCP workspace intent TTL / lease | `resolved_ttl_seconds`, `resolved_lease_seconds` | Explicit MCP tool parameter > env > built-in default | | Finish hygiene strict mode | `_strict_finish_enabled` | Env only (no pyproject key) | +| Platform Observability | `resolve_observability_config` | Env only; disabled by default, no pyproject table | | Cursor / IDE hooks | hook helpers | Env > repo config file (where noted) > built-in default | There is no generic `CODECLONE_MEMORY__*` nested env convention. Each variable @@ -336,6 +337,27 @@ name is flat and listed below. |-------------------|-------------|--------------------------------------------------------------------------------------------------------------------------------------| | `CODECLONE_DEBUG` | `1` enables | Turns on CLI debug diagnostics (`codeclone/surfaces/cli/console.py`). Independent of analysis, gating, and `[tool.codeclone] debug`. | +### Platform Observability + +Platform Observability is environment-only and disabled by default. It has no +`[tool.codeclone.observability]` table. See +[Platform Observability](26-platform-observability.md) for the data and trust +contracts. + +| Variable | Values | Effect | +|---|---|---| +| `CODECLONE_OBSERVABILITY_ENABLED` | truthy / falsy | Enable local operation/span instrumentation. | +| `CODECLONE_OBSERVABILITY_FORCE` | truthy / falsy | Lift the CI collection guard; does not enable collection by itself. | +| `CODECLONE_OBSERVABILITY_PROFILE` | truthy / falsy | Capture process metrics; requires `codeclone[perf]`. | +| `CODECLONE_OBSERVABILITY_PERSIST` | truthy / falsy | Persist completed operations; default true when enabled. | +| `CODECLONE_OBSERVABILITY_CAPTURE_PAYLOAD_SIZES` | truthy / falsy | Capture bounded size/token estimates; default true. | +| `CODECLONE_OBSERVABILITY_PAYLOAD_SNAPSHOT` | reserved | Rejected; raw payload snapshots are unsupported. | +| `CODECLONE_OBSERVABILITY_CORRELATION_ID` | internal ID | Worker handoff for cross-process correlation; set by CodeClone. | +| `CODECLONE_OBSERVABILITY_PARENT_OPERATION_ID` | internal ID | Worker handoff for the parent operation; set by CodeClone. | + +The internal correlation variables are launcher/worker protocol, not operator +tuning knobs. + ### Engineering Memory Overrides `[tool.codeclone.memory]` and `[tool.codeclone.memory.semantic]` for the @@ -344,7 +366,7 @@ listed field only. Paths resolve under the repository root like pyproject paths. | Variable | Values | Overrides | Effect | |--------------------------------------------------|-------------------------------------------------|----------------------------------------|--------------------------------------------------------------------------------| | `CODECLONE_MEMORY_DB_PATH` | repo-relative or absolute path under root | `memory.db_path` | SQLite Engineering Memory store location | -| `CODECLONE_PROJECTION_REBUILD_POLICY` | `off`, `enqueue_when_stale` | `memory.projection_rebuild_policy` | When accepted MCP finish may enqueue async trajectory/Experience/semantic projection jobs | +| `CODECLONE_PROJECTION_REBUILD_POLICY` | `off`, `enqueue_when_stale` | `memory.projection_rebuild_policy` | When accepted MCP finish may enqueue async trajectory/semantic/Experience projection jobs | | `CODECLONE_MEMORY_SEMANTIC_ENABLED` | `true` / `false` | `memory.semantic.enabled` | Turn semantic index sidecar on or off | | `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_PROVIDER` | `diagnostic`, `fastembed`, `local_model`, `api` | `memory.semantic.embedding_provider` | Embedding backend for semantic rebuild/search | | `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_MODEL` | model name string | `memory.semantic.embedding_model` | Provider model id (for example FastEmbed model name) | diff --git a/docs/book/11-cli.md b/docs/book/11-cli.md index 91cf97bd..b618f16a 100644 --- a/docs/book/11-cli.md +++ b/docs/book/11-cli.md @@ -36,6 +36,7 @@ CLI modes: - baseline update mode (`--update-baseline`, `--update-metrics-baseline`) - controller query mode (`--blast-radius`, `--patch-verify`) - workspace query modes (`--session-stats`, `--audit`, `--audit-json`) +- development diagnostics mode (`codeclone observability trace`) Summary metrics include: @@ -118,14 +119,23 @@ Refs: alone can still run the deterministic diagnostic provider. - `review-candidates`, `approve`, `reject`, `archive` — human governance for draft records (CLI and VS Code Memory; not MCP agent tools). - - `trajectory status|rebuild|list|search|show|export` — audit-derived process - narratives and Patch Trail export (requires audit + rebuild). + - `trajectory status|rebuild|list|search|show|agents|anomalies|dashboard|export` + — audit-derived narratives, quality passports, analytics, and local + Patch Trail export (requires audit + rebuild). - `jobs status|enqueue|run-once|list` — projection rebuild queue (semantic + - trajectory sidecars). + trajectory + Experience projections). - `search` accepts `--match any|all` for FTS token matching (default `any`) and `--semantic` to blend vector proximity when the index is available. - Requires a prior normal analysis run or cached report for `init`. - Full contract: [Engineering Memory](13-engineering-memory/index.md). +- Platform Observability commands are terminal-only, read-only diagnostics of + CodeClone's own runtime: + - `codeclone observability trace --root .` prints JSON. + - `--last`, `--operation`, and `--correlation` select a bounded trace. + - `--json PATH` and `--html PATH` write machine-readable or self-contained + cockpit views. + - A missing local store is an informational success state. + - Full contract: [Platform Observability](26-platform-observability.md). - Controller and workspace query flags are mutually exclusive where enforced: - `--blast-radius` and `--patch-verify` cannot be combined. - `--strictness {ci,strict,relaxed}` is valid only with `--patch-verify`. diff --git a/docs/book/13-engineering-memory/cli-surface.md b/docs/book/13-engineering-memory/cli-surface.md index 19e07dde..8c2830ad 100644 --- a/docs/book/13-engineering-memory/cli-surface.md +++ b/docs/book/13-engineering-memory/cli-surface.md @@ -19,7 +19,7 @@ All commands live under `codeclone memory` and accept `--root` (default `.`). | `reject RECORD_ID [--reason TEXT]` | Reject draft | | `archive RECORD_ID [--reason TEXT]` | Archive record | | `trajectory status\|rebuild\|list\|search\|show\|agents\|anomalies\|dashboard\|export` | Trajectory projection, passport analytics, and export | -| `jobs status\|enqueue\|run-once\|list` | Trajectory + Experience + semantic projection queue | +| `jobs status\|enqueue\|run-once\|list` | Trajectory + semantic + Experience projection queue | Human governance (`approve`, `reject`, `archive`) is available through the **CodeClone VS Code Memory** view (IDE governance channel) and the diff --git a/docs/book/13-engineering-memory/experience-layer.md b/docs/book/13-engineering-memory/experience-layer.md new file mode 100644 index 00000000..27b32aee --- /dev/null +++ b/docs/book/13-engineering-memory/experience-layer.md @@ -0,0 +1,99 @@ +# Experience Layer + + + +Experiences are the third Engineering Memory knowledge tier: + +1. memory records describe what the project knows; +2. trajectories describe what happened during agent work; +3. Experiences summarize recurring, evidence-linked patterns across + trajectories. + +Experiences are advisory. They do not authorize edits, override findings, or +replace the human-governed memory record lifecycle. + +## Distillation pipeline + +```mermaid +flowchart LR + A["Canonical trajectories
    all outcomes"] --> B["Extract subject families,
    signals, outcome classes"] + B --> C["Group by PatternKey"] + C --> D["Support and information-value gates"] + D --> E["Active Experiences"] + E --> F["Scoped retrieval"] + E --> G["Optional promotion"] + G --> H["Human-reviewable draft
    memory record"] +``` + +The current distillation version is `experience-v1`. Every canonical +trajectory may contribute, including partial, blocked, and incident-bearing +work. Distillation is not limited to verified or successful changes. + +## Pattern identity + +An Experience key contains: + +- `subject_family`: a deterministic directory family derived from touched + paths, with at most eight families per trajectory; +- `signal`: a non-ubiquitous label or a derived signal; +- `outcome_class`: `:`. + +Derived signals include: + +- `verification_incomplete` for partial or blocked work without a verified + finish; +- `incident_present` when a trajectory contains incidents. + +Agent and tool identity are deliberately excluded from `PatternKey`. They are +evidence facets, not pattern identity, so equivalent project behavior can +coalesce across agents. + +## Admission and scoring + +A candidate requires: + +- support from at least five trajectories; +- information value of at least `50`; +- no more than twenty retained evidence trajectory IDs. + +Information value is deterministic: + +- `+60` when evidence spans at least two agent families; +- `+25` for a structural signal; +- capped at `100`. + +A single-agent pattern therefore does not pass the current information-value +threshold by itself. + +The Experience ID and digest exclude timestamps. They include the pattern key, +sorted member trajectory IDs, and the distillation version. This keeps +replace-all rebuilds reproducible. + +## Storage and retrieval + +Distillation replaces the project's Experience projection atomically in +deterministic order. Current records are `active`. The domain model reserves a +`dormant` state, but dormant lifecycle management is not implemented. + +Scoped retrieval: + +- returns active Experiences only; +- exact-matches the requested directory `subject_family`; +- sorts by support descending, information value descending, then ID; +- returns compact evidence counts and agent-family summaries by default; +- adds agent facets and evidence trajectory IDs at full detail. + +The current distiller emits `agent_family` facets. Other facet kinds are +reserved by the domain types but are not currently populated. + +## Promotion boundary + +Promotion is explicit and idempotent. It creates a human-reviewable draft +memory candidate with the Experience statement, subject family, and trajectory +evidence. It obeys the project's draft capacity and does not silently approve +the result. + +Only the IDE governance channel can approve, reject, or archive memory records. +See [Trust and lifecycle](trust-and-lifecycle.md), +[MCP surface](mcp-surface.md), and the +[trajectories and Experiences guide](../../guide/memory/trajectories-and-experiences.md). diff --git a/docs/book/13-engineering-memory/index.md b/docs/book/13-engineering-memory/index.md index 0cb9c3e7..067a84fd 100644 --- a/docs/book/13-engineering-memory/index.md +++ b/docs/book/13-engineering-memory/index.md @@ -37,8 +37,8 @@ controlled edits. | 25 | Disabled-by-default local JSONL export profiles | CLI `memory trajectory export --profile ... --out ...` | | 26 | Patch Trail persistence + scoped retrieval | `memory_trajectory_patch_trails`; `patch_trail_summary` on scoped retrieval | | 28 | Incremental projection jobs | Watermarked trajectory rebuild, semantic hash-skip, coalesced worker | -| RFC | Trajectory quality and passport analytics | Quality/complexity contract, anomalies, agents, dashboard | -| RFC | Experience Layer | Distillation job, scoped `experiences[]`, `promote_experience` draft bridge | +| Live | Trajectory quality and passport analytics | Quality/complexity contract, anomalies, agents, dashboard | +| Live | Experience Layer | Distillation job, scoped `experiences[]`, `promote_experience` draft bridge | Schema version constant: `ENGINEERING_MEMORY_SCHEMA_VERSION` in `codeclone/contracts/__init__.py` (currently **`1.6`**). @@ -65,6 +65,8 @@ graph TB SUB[memory_subjects] EV[memory_evidence] FTS[memory_fts FTS5] + TRAJ[trajectory projection] + EXP[Experience projection] end subgraph Surfaces["Read / write surfaces"] @@ -78,6 +80,7 @@ graph TB CT -->|init / refresh ingest| MemoryStore GIT -->|init / refresh ingest| MemoryStore RC -->|propose_from_receipt / finish hook| MemoryStore + RC --> TRAJ --> EXP MemoryStore --> CLI MemoryStore --> MCP_R MCP_W -->|draft only| MemoryStore @@ -111,6 +114,14 @@ Refs: - `codeclone/memory/retrieval/service.py:query_engineering_memory` - `codeclone/surfaces/mcp/_session_memory_mixin.py` +Normative detail: + +- [Trajectory and Patch Trail](trajectory-and-patch-trail.md) +- [Trajectory quality and passport](trajectory-quality-and-passport.md) +- [Experience Layer](experience-layer.md) +- [Projection jobs](projection-jobs.md) +- [Practical trajectory and Experience guide](../../guide/memory/trajectories-and-experiences.md) + --- ## Regressions and UX fixes (2.1.0a1) diff --git a/docs/book/13-engineering-memory/mcp-surface.md b/docs/book/13-engineering-memory/mcp-surface.md index 58081c78..ad08a6ee 100644 --- a/docs/book/13-engineering-memory/mcp-surface.md +++ b/docs/book/13-engineering-memory/mcp-surface.md @@ -45,15 +45,16 @@ Mode router for inspection and search. | `status` | — | Store status (like CLI `status`) | | `drafts` | optional `limit` | Draft inbox (compact by default) | | `trajectory_status` | — | Trajectory projection run metadata | -| `trajectory_search` | `query`; optional `intent_id` | Search stored trajectories | -| `trajectory_get` | `record_id` (trajectory id) | One trajectory + steps (compact default) | +| `trajectory_search` | `query`; optional `filters.include_routine` | Search stored trajectories | +| `trajectory_get` | `record_id` (trajectory id) | One trajectory + steps (always full) | | `trajectory_anomalies` | optional `filters.include_routine` | Detected trajectory contract anomalies | -| `trajectory_agents` | optional `filters.include_routine` | Aggregate quality/outcomes by agent family | +| `trajectory_agents` | optional `filters.include_routine` | Aggregate quality/outcomes by exact agent label | | `trajectory_dashboard` | optional `filters.include_routine` | Combined status, agent, and anomaly view | List modes (`search`, `stale`, `drafts`, scoped `get_relevant_memory`) default to **compact** payloads: statement preview, `statement_length`, no `payload`. Use `mode=get` or `detail_level=full` for complete statements and payload. +`trajectory_get` is also always full regardless of requested detail level. Scoped retrieval keeps four typed lanes: @@ -89,7 +90,7 @@ CLI equivalent: `codeclone memory search QUERY --match any|all`. | `refresh_from_run` | optional `run_id` (defaults to latest MCP run) | Force ingest from MCP run report | | `rebuild_semantic_index` | (none) | Rebuild LanceDB sidecar when `memory.semantic.enabled` | | `rebuild_trajectories` | (none) | Rebuild trajectory projections from audit event core | -| `enqueue_projection_rebuild` | (none) | Queue trajectory + Experience + semantic projection job | +| `enqueue_projection_rebuild` | (none) | Queue trajectory + semantic + Experience projection job | | `projection_rebuild_status` | (none) | Latest projection job status | | `run_projection_jobs_once` | (none) | Run one queued projection job inline | | `record_candidate` | `record_type`, `statement`, **`subject_path`** | Creates **draft** record | @@ -117,7 +118,7 @@ On **accepted** or **accepted_with_external_changes** finish: - returns `memory_candidates`, `memory_staleness`, `memory_coverage_delta` - when `memory.projection_rebuild_policy` is not `off` and the environment is not CI, may enqueue a projection rebuild job (`projection_rebuild` in the - finish payload — trajectory, Experience, and semantic projections) + finish payload — trajectory, semantic, and Experience projections) This is the preferred post-edit memory update path when using the workflow tools. @@ -126,6 +127,10 @@ tools. `help(topic="engineering_memory")` — compact agent playbook summary. +Trajectory analytics and Experience semantics are specified in +[Trajectory quality and passport](trajectory-quality-and-passport.md) and +[Experience Layer](experience-layer.md). + Refs: - `codeclone/surfaces/mcp/server.py` diff --git a/docs/book/13-engineering-memory/projection-jobs.md b/docs/book/13-engineering-memory/projection-jobs.md index f856e685..ff0f454b 100644 --- a/docs/book/13-engineering-memory/projection-jobs.md +++ b/docs/book/13-engineering-memory/projection-jobs.md @@ -1,9 +1,10 @@ ### Projection rebuild jobs (schema 1.3+) -Trajectory, Experience, and semantic projections can be rebuilt asynchronously +Trajectory, semantic, and Experience projections can be rebuilt asynchronously via a coalesced job row in Engineering Memory SQLite -(`memory_projection_jobs`). The worker rebuilds trajectories first, distills -Experiences from the resulting corpus, then refreshes the semantic sidecar. +(`memory_projection_jobs`). The worker rebuilds trajectories first, refreshes +the semantic sidecar, then distills Experiences from the resulting trajectory +corpus. Default policy is **`off`**; opt in with: ```toml @@ -23,3 +24,37 @@ projection_rebuild_policy = "enqueue_when_stale" # off | enqueue_when_stale Jobs never run in CI environments (`CI`, `GITHUB_ACTIONS`, …). Sync rebuild escape hatches remain: `rebuild_trajectories` / `rebuild_semantic_index`. + +## Queue and worker contract + +```mermaid +flowchart LR + A["Accepted finish"] --> B["Compute projection stimulus"] + B --> C{"Stale?"} + C -- "no" --> D["No enqueue"] + C -- "yes" --> E["Coalesce pending job"] + E --> F["Detached or inline worker"] + F --> G["Trajectory projection"] + G --> H["Semantic sidecar"] + H --> I["Experience distillation"] + I --> J["Persist result / watermark"] +``` + +The stimulus includes repository digest, projection version and enablement, +audit event-core counts/watermarks, and active memory-record counts. Pending +work for the same project is coalesced instead of duplicated. + +The job store claims work with an immediate SQLite transaction and permits one +running job per project. Dead-worker and timeout states are reclaimed as +failed before new work is claimed. Trajectory rebuild is incremental when its +stored projection version and audit watermark are compatible; otherwise it +falls back to a full rebuild. Semantic projection may hash-skip unchanged +sources. + +Job states are `pending`, `running`, `done`, `failed`, and `skipped`. +`run-once` returns `nothing_to_do` when the queue is empty. Worker results and +bounded errors remain job metadata; they do not alter canonical analysis. + +Platform Observability can correlate accepted finish, worker spawn, and worker +execution without changing the queue contract. See +[Platform Observability](../26-platform-observability.md). diff --git a/docs/book/13-engineering-memory/trajectory-and-patch-trail.md b/docs/book/13-engineering-memory/trajectory-and-patch-trail.md index 5e40a8ab..2dda05e7 100644 --- a/docs/book/13-engineering-memory/trajectory-and-patch-trail.md +++ b/docs/book/13-engineering-memory/trajectory-and-patch-trail.md @@ -153,10 +153,9 @@ verification status). With `detail_level=full`, the top-ranked trajectory also surfaces **`patch_trail_summary`** at the response root. Compact retrieval omits that root duplicate; the summary remains on the trajectory preview. -`query_engineering_memory(mode=trajectory_get)` returns **`patch_trail`** on the -trajectory payload when persisted for that workflow. Full detail also includes -the explainable **`quality_contract`**; compact payloads retain headline -`quality_score`, `complexity_score`, and anomaly count. +`query_engineering_memory(mode=trajectory_get)` returns **`patch_trail`** when +persisted and always uses full detail, including **`quality_contract`**. +List/search previews retain headline quality, complexity, and anomaly counts. Trajectory rebuild (`memory trajectory rebuild` / MCP `manage_engineering_memory(action=rebuild_trajectories)`) synthesizes Patch Trail @@ -177,7 +176,7 @@ Scoped ranking adds a small boost when query scope paths intersect | `trajectory_search` | query text | Requires `query`; excludes `run:*` routine by default | | `trajectory_get` | trajectory id | `record_id` = trajectory id | | `trajectory_anomalies` | project | Contract anomalies, optionally including routine runs | -| `trajectory_agents` | project | Outcome and quality aggregates by agent family | +| `trajectory_agents` | project | Outcome and quality aggregates by exact agent label | | `trajectory_dashboard` | project | Combined status, agent, and anomaly payload | Filter: `filters.include_routine=true` on `trajectory_search` includes single-event @@ -186,8 +185,9 @@ Filter: `filters.include_routine=true` on `trajectory_search` includes single-ev Evidence kind **`trajectory`** links memory records to trajectories; human approve still required for agent drafts. -Label taxonomy and **`step_label`** display names: -[Trajectory labels](trajectory-labels.md). +See [Trajectory labels](trajectory-labels.md) for labels and +[Trajectory quality and passport](trajectory-quality-and-passport.md) for +scoring, anomalies, dashboards, and IDE passport semantics. ### Enterprise boundary (export) diff --git a/docs/book/13-engineering-memory/trajectory-quality-and-passport.md b/docs/book/13-engineering-memory/trajectory-quality-and-passport.md new file mode 100644 index 00000000..8d2aaade --- /dev/null +++ b/docs/book/13-engineering-memory/trajectory-quality-and-passport.md @@ -0,0 +1,121 @@ +# Trajectory Quality and Passport + + + +Trajectory projection version `3` adds explainable quality, complexity, +anomaly, agent, and dashboard views. These are derived diagnostics over +canonical audit evidence, not analysis findings or edit permissions. + +For the event-to-trajectory projection itself, see +[Trajectory and patch trail](trajectory-and-patch-trail.md). + +## Passport model + +```mermaid +flowchart TD + A["Canonical trajectory"] --> B["Outcome"] + A --> C["Verification"] + A --> D["Scope"] + A --> E["Incidents"] + A --> F["Anomalies"] + A --> G["Receipt"] + B --> H["Quality score = minimum component"] + C --> H + D --> H + E --> H + F --> H + G --> H + A --> I["Complexity score
    separate, non-grade"] + H --> J["Trajectory passport"] + I --> J +``` + +The passport keeps quality and complexity separate: + +- quality answers how well the workflow satisfied its contract; +- complexity describes how much declared scope, event activity, and workflow + structure the trajectory contained. + +High complexity is not a defect and does not reduce quality by itself. + +## Quality score + +Quality score version `2` is the minimum of six components: + +| Component | Scoring | +|---|---| +| Outcome | accepted `100`, accepted external `85`, partial `55`, abandoned `40`, blocked `30`, violated `20` | +| Verification | accepted `100`, accepted external `85`, unverified `50`, violated/blocked `0`, not reached `40` | +| Scope | clean `100`, expanded `85`, partial `70`, violated `0` | +| Incidents | `max(0, 100 - 10 × incident_count)` | +| Anomalies | starts at `100`; error costs `12`, warning costs `5` | +| Receipt | change-control trajectory with receipt `100`, without `85`; non-change workflow `100` | + +When patch-trail verification is unavailable, the verification component falls +back to quality tier: verified `100`, corrected `90`, routine `85`, partial +`60`, incident `45`. + +The minimum-component rule makes the limiting evidence visible instead of +averaging a contract failure away. + +## Complexity score + +Complexity is: + +```text +min(100, + min(40, declared_scope_count * 2) + + min(30, event_count * 3) + + min(20, workflow_step_count * 2)) +``` + +Bands are `low < 35`, `moderate 35..69`, and `high >= 70`. + +## Anomalies + +The projection can emit: + +- outcome anomalies: violated, blocked, or abandoned; +- quality incidents and elevated incident count; +- incident labels such as baseline abuse, claim-guard failure, foreign + conflict, hook failure, or recovered state; +- incomplete change cycles or missing intent cleanup; +- scope violations; +- verification gaps. + +Anomalies are deterministic review cues. They are not repository findings. + +## Analytics surfaces + +Agent analytics group by the exact canonical `agent_label`, not an inferred +agent family. The dashboard combines projection status, agent aggregates, +anomalies, and recent trajectories. + +Routine `run:*` workflows and trajectories with quality tier `routine` are +excluded by default. Callers can opt in with `include_routine=true`. + +Available CLI commands: + +```bash +codeclone memory trajectory status --root . +codeclone memory trajectory rebuild --root . +codeclone memory trajectory list --root . +codeclone memory trajectory search QUERY --root . +codeclone memory trajectory show TRAJECTORY_ID --root . +codeclone memory trajectory agents --root . +codeclone memory trajectory anomalies --root . +codeclone memory trajectory dashboard --root . +codeclone memory trajectory export --root . \ + --profile agent-change-control-v1 \ + --out trajectories.jsonl +``` + +MCP modes are `trajectory_status`, `trajectory_search`, `trajectory_get`, +`trajectory_anomalies`, `trajectory_agents`, and `trajectory_dashboard`. +`trajectory_get` uses `record_id` as the trajectory ID and always returns full +detail. + +The VS Code extension exposes a dashboard, detail view, copyable dashboard +brief, and passport sections for quality, complexity, duration, events, steps, +incidents, evidence, patch trail, contract gates, and score calculations. See +[VS Code integration](../integrations/vs-code-extension.md). diff --git a/docs/book/21-security-model.md b/docs/book/21-security-model.md index 362490d4..fa51fed1 100644 --- a/docs/book/21-security-model.md +++ b/docs/book/21-security-model.md @@ -36,11 +36,15 @@ Security-relevant input classes: - HTML escapes text and attribute contexts before embedding. - MCP is read-only with respect to source files, baselines, analysis cache (`cache.json`), and canonical report artifacts. -- Allowed repo-local writes are limited to ephemeral controller coordination - (workspace intent registry: file backend under `.codeclone/intents/`, - or SQLite under `.codeclone/db/intents.sqlite3` when configured) and - optional audit trail (`.codeclone/db/audit.sqlite3` when - `audit_enabled=true`). +- Allowed repo-local writes are explicit and isolated: ephemeral controller + coordination (file backend under `.codeclone/intents/` or SQLite under + `.codeclone/db/intents.sqlite3`), optional controller audit + (`.codeclone/db/audit.sqlite3`), Engineering Memory/projection state under + `.codeclone/memory/`, and opt-in Platform Observability + (`.codeclone/db/platform_observability.sqlite3`). +- Platform Observability stores bounded metadata and literal-free SQL + fingerprints, never raw payload bodies, and cannot affect analysis truth, + gates, baselines, memory facts, or edit authorization. - Session-local review markers and in-memory run history do not survive process restart. - Five session/coordination tools are marked `destructiveHint` in MCP metadata @@ -143,6 +147,16 @@ Refs: - `tests/test_mcp_http_auth.py` - `tests/test_mcp_server.py::test_mcp_server_main_rejects_non_loopback_host_without_opt_in` +### Platform Observability + +The observer is an optional local diagnostics boundary. Its CLI and MCP readers +open the telemetry store read-only; the instrumentation writer commits one +completed operation and its spans atomically. No network exporter is provided. + +The MCP slicer is bounded and declares that its output is CodeClone-development +telemetry, not repository quality evidence. See +[26-platform-observability.md](26-platform-observability.md). + Refs: - `codeclone/analysis/parser.py:_parse_with_limits` diff --git a/docs/book/23-testing-as-spec.md b/docs/book/23-testing-as-spec.md index 8c28e1b9..887fbc92 100644 --- a/docs/book/23-testing-as-spec.md +++ b/docs/book/23-testing-as-spec.md @@ -25,6 +25,10 @@ Contract tests are concentrated in: - `tests/test_detector_golden.py` - `tests/test_golden_v2.py` - `tests/test_memory_*.py`, `tests/test_semantic_*.py`, `tests/test_mcp_memory_management.py` +- `tests/test_memory_trajectory_*.py`, `tests/test_memory_experience_*.py` +- `tests/test_memory_projection_jobs*.py` +- `tests/test_observability_*.py` +- `tests/test_docs_ia_contract.py`, `tests/test_docs_build_contract.py` - `tests/test_architecture.py` ## Test taxonomy @@ -61,6 +65,11 @@ The following matrix is treated as executable contract: | Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` | | Engineering Memory SQLite schema, governance, retrieval | `tests/test_memory_schema.py`, `tests/test_memory_store.py`, `tests/test_memory_governance.py`, `tests/test_memory_retrieval.py`, `tests/test_memory_mcp_sync.py` | | Semantic index projection, rebuild, LanceDB backend | `tests/test_semantic_projection.py`, `tests/test_semantic_rebuild.py`, `tests/test_semantic_lancedb_backend.py`, `tests/test_semantic_embedding.py` | +| Trajectory projection, quality passport, anomalies, retrieval | `tests/test_memory_trajectory_projector.py`, `tests/test_memory_trajectory_quality.py`, `tests/test_memory_trajectory_anomalies.py`, `tests/test_memory_trajectory_retrieval.py` | +| Experience distillation, evidence diversity, scoped retrieval, promotion | `tests/test_memory_experience_distillation.py`, `tests/test_memory_experience_retrieval.py`, `tests/test_memory_experience_promotion.py` | +| Projection queue coalescing, watermarks, worker lifecycle | `tests/test_memory_projection_jobs.py`, `tests/test_memory_projection_jobs_schema.py`, `tests/test_projection_spawn_guard.py` | +| Platform Observability config, correlation, persistence, query, rendering, MCP | `tests/test_observability_config.py`, `tests/test_observability_correlation.py`, `tests/test_observability_store.py`, `tests/test_observability_query.py`, `tests/test_observability_render.py`, `tests/test_observability_mcp_registrar.py` | +| Documentation IA, line budgets, strict site build | `tests/test_docs_ia_contract.py`, `tests/test_docs_build_contract.py` | | Layer dependency direction | `tests/test_architecture.py` | ## Invariants (MUST) diff --git a/docs/book/24-compatibility-and-versioning.md b/docs/book/24-compatibility-and-versioning.md index 9ad214ea..c5713f95 100644 --- a/docs/book/24-compatibility-and-versioning.md +++ b/docs/book/24-compatibility-and-versioning.md @@ -69,6 +69,7 @@ Version bump rules: incompatibly — forces index rebuild, not SQLite migration (see [13-engineering-memory/index.md](13-engineering-memory/index.md)) - bump **Platform Observability schema** only for incompatible telemetry-store changes; it remains separate from reports, gates, baselines, and memory facts + (see [26-platform-observability.md](26-platform-observability.md)) Operational compatibility rules: @@ -124,6 +125,7 @@ Refs: | Metrics-baseline schema bump | Dedicated metrics-baseline files must be regenerated | | Engineering Memory schema bump | Older DBs migrate or re-init per `schema_migrate.py` | | Semantic index format bump | LanceDB sidecar invalidated; run `memory semantic rebuild` | +| Platform Observability bump | Local diagnostic store reader/writer must migrate together | ## Determinism / canonicalization diff --git a/docs/book/25-mcp-interface/determinism-and-tests.md b/docs/book/25-mcp-interface/determinism-and-tests.md index d7902ddd..ba5d10a3 100644 --- a/docs/book/25-mcp-interface/determinism-and-tests.md +++ b/docs/book/25-mcp-interface/determinism-and-tests.md @@ -1,3 +1,10 @@ + +# MCP Security, Determinism, and Tests + +Tool inventory and payload contracts: +[MCP interface](index.md). Platform diagnostics: +[Platform Observability tool](tools/platform-observability.md). + ## Security model | Property | Guarantee | @@ -28,6 +35,8 @@ - `tests/test_mcp_service.py` - `tests/test_mcp_server.py` - `tests/test_mcp_tool_schema_snapshot.py` +- `tests/test_observability_mcp_registrar.py` +- `tests/test_observability_query.py` --- @@ -40,3 +49,4 @@ - [11-cli.md](../11-cli.md) — CLI reference - [05-report.md](../05-report.md) — canonical report schema - [MCP deep dive](../../guide/mcp/README.md) — architecture, client setup, workflows, and prompt patterns +- [Platform Observability](../26-platform-observability.md) — observer storage, privacy, and anti-inference contract diff --git a/docs/book/25-mcp-interface/index.md b/docs/book/25-mcp-interface/index.md index aa57a6e6..4b462d61 100644 --- a/docs/book/25-mcp-interface/index.md +++ b/docs/book/25-mcp-interface/index.md @@ -127,3 +127,7 @@ graph LR The surface is intentionally triage-first: analyze → summarize/triage → drill into one finding or one hotspot family. + +Tool families and exact parameters are split under +[Tools](tools/analysis.md), including the +[Platform Observability slicer](tools/platform-observability.md). diff --git a/docs/book/25-mcp-interface/tools/platform-observability.md b/docs/book/25-mcp-interface/tools/platform-observability.md new file mode 100644 index 00000000..19885ccc --- /dev/null +++ b/docs/book/25-mcp-interface/tools/platform-observability.md @@ -0,0 +1,60 @@ +# Platform Observability Tool + + + +`query_platform_observability` projects bounded diagnostics from CodeClone's +local observer store. It is intended for CodeClone maintainers and development +agents, not for evaluating the analyzed repository. + +See [Platform Observability](../../26-platform-observability.md) for storage, +privacy, configuration, and trust boundaries. + +## Parameters + +| Parameter | Contract | +|---|---| +| `root` | Absolute repository root. | +| `section` | One supported diagnostics section. | +| `detail_level` | `compact`, `normal`, or `full`; `full` currently downgrades to `normal`. | +| `limit` | Row cap, clamped to `1..50`. | +| `window` | `latest` or a correlation ID. | +| `operation_id` | Reserved; reported in `ignored_parameters`. | +| `span_id` | Reserved; reported in `ignored_parameters`. | + +Supported sections: + +- `summary` +- `slow_operations` +- `memory_pipeline_cost` +- `db_cost` +- `agent_context` +- `mcp_tool_matrix` +- `correlated_chains` +- `costly_noops` +- `pipeline` + +Each call returns one section only. Compact detail is bounded to five rows; +normal detail is bounded by `limit`. + +## Inert states + +When observability is disabled, the tool returns a disabled status. When no +local store exists, it returns a no-store status. Neither state changes +analysis behavior. + +An invalid section returns the available section names. Reserved parameters +are echoed as ignored instead of changing the projection. + +## Interpretation boundary + +The envelope states that: + +- the audience is CodeClone development; +- the data is not user-facing repository quality evidence; +- it does not affect reports, gates, baselines, memory facts, or edit + authorization; +- reported heuristics are diagnostic hints, not findings. + +This anti-inference boundary is part of the tool contract. See +[Determinism and tests](../determinism-and-tests.md) and the +[diagnostics guide](../../../guide/observability/diagnostics.md). diff --git a/docs/book/25-mcp-interface/tools/session-and-memory.md b/docs/book/25-mcp-interface/tools/session-and-memory.md index ecdc9c83..53a4ce37 100644 --- a/docs/book/25-mcp-interface/tools/session-and-memory.md +++ b/docs/book/25-mcp-interface/tools/session-and-memory.md @@ -16,3 +16,5 @@ This tool is **development-only**. It reports numeric operation/span, database-cost, payload, agent-context, and pipeline diagnostics for CodeClone itself. It never contributes repository findings, gates, baselines, memory facts, or edit authorization, and it does not expose raw SQL or payload bodies. +See the dedicated +[Platform Observability tool contract](platform-observability.md). diff --git a/docs/book/26-platform-observability.md b/docs/book/26-platform-observability.md new file mode 100644 index 00000000..1e32b6e9 --- /dev/null +++ b/docs/book/26-platform-observability.md @@ -0,0 +1,132 @@ +# 26. Platform Observability + + + +Platform Observability is a local diagnostics surface for CodeClone development. +It explains the cost and shape of CodeClone's own execution. It does **not** +describe repository quality and must never affect analysis truth, gates, +baselines, cache compatibility, findings, or edit authorization. + +For practical commands, see the +[observability diagnostics guide](../guide/observability/diagnostics.md). For +the bounded MCP projection, see +[query_platform_observability](25-mcp-interface/tools/platform-observability.md). + +## Trust boundary + +```mermaid +flowchart LR + A["CLI / MCP / projection worker"] --> B["Operation and span instrumentation"] + B --> C["Local SQLite store
    .codeclone/db/platform_observability.sqlite3"] + C --> D["CLI JSON / self-contained HTML"] + C --> E["Bounded MCP diagnostics"] + D --> F["Human diagnosis"] + E --> F + B -. "must not influence" .-> G["Analysis, findings, gates,
    baseline, cache, permissions"] +``` + +The observer: + +- is disabled by default; +- stores data locally only; +- records metadata, counters, durations, bounded payload sizes, and normalized + literal-free SQL fingerprints; +- never records prompt or MCP payload bodies; +- exposes telemetry hints, not findings or vulnerabilities; +- remains inert when disabled or when no store exists. + +## Enabling instrumentation + +Configuration is environment-only. There is no `[tool.codeclone]` +observability table. + +| Variable | Meaning | +|---|---| +| `CODECLONE_OBSERVABILITY_ENABLED=1` | Enable instrumentation. | +| `CODECLONE_OBSERVABILITY_FORCE=1` | Permit observation in CI; it does not enable instrumentation by itself. | +| `CODECLONE_OBSERVABILITY_PROFILE=1` | Capture optional process metrics; requires `codeclone[perf]`. | +| `CODECLONE_OBSERVABILITY_PERSIST=0` | Instrument without persisting completed operations. | +| `CODECLONE_OBSERVABILITY_CAPTURE_PAYLOAD_SIZES=0` | Disable request/response size and token estimates. | +| `CODECLONE_OBSERVABILITY_PAYLOAD_SNAPSHOT=1` | Reserved and rejected: raw payload snapshots are not supported. | + +An explicit `CODECLONE_OBSERVABILITY_ENABLED=1` is sufficient in CI. +`CODECLONE_OBSERVABILITY_FORCE` never enables observation by itself and is +reserved as an explicit CI-gate override. + +Configuration fields for retention and row caps are reserved in the internal +model but are not automatic pruning guarantees in the current release. + +## Data model + +The local schema version is `1.0`. A completed operation and its spans are +written in one transaction. + +An operation records stable identifiers, parent/correlation IDs, surface, +operation name, timestamps, duration, status, bounded error classification, +session and root digests, request/response sizes, token estimates, and optional +process metrics. + +A span records its parent, duration, reason kind, deduplication state, numeric +counters, optional process metrics, and at most eight normalized SQL +fingerprints. SQL literals are removed before persistence. + +Reindex reasons are classified as: + +- `content_changed` +- `schema_version_changed` +- `model_changed` +- `manual_rebuild` +- `first_index` +- `unknown` + +## CLI projection + +```bash +codeclone observability trace --root . +codeclone observability trace --root . --last 50 --html /tmp/codeclone-observer.html +codeclone observability trace --root . --operation OPERATION_ID --json /tmp/trace.json +codeclone observability trace --root . --correlation CORRELATION_ID +``` + +Without `--json` or `--html`, the command writes JSON to stdout. A missing +store is an informational empty state and exits successfully. + +The HTML cockpit is self-contained and includes operation chains, a span +waterfall, pipeline and Engineering Memory costs, MCP tool aggregates, database +costs, normalized SQL fingerprints, agent context, and costly no-op signals. +It has no external assets or JavaScript dependency. + +## MCP projection + +`query_platform_observability` returns one bounded section per call: + +- `summary` +- `slow_operations` +- `memory_pipeline_cost` +- `db_cost` +- `agent_context` +- `mcp_tool_matrix` +- `correlated_chains` +- `costly_noops` +- `pipeline` + +`detail_level=compact` returns at most five rows. `normal` honors `limit`, +clamped to `1..50`; `full` currently downgrades to `normal`. `window` accepts +`latest` or a correlation ID. `operation_id` and `span_id` are reserved and +reported as ignored parameters. + +The response explicitly declares a CodeClone-development audience and states +that it is not user-facing quality evidence. See +[MCP determinism and tests](25-mcp-interface/determinism-and-tests.md) for the +bounded-projection contract. + +## Privacy and lifecycle + +The SQLite database is optional local diagnostic state. It is outside the +canonical report, baseline, and analysis cache contracts. Deleting it only +removes diagnostics; it does not alter analysis results. + +There is no network exporter. Automatic retention pruning is not currently +enforced, so operators who enable persistence own local database lifecycle. +See [Security model](21-security-model.md) and +[Plans and retention](../plans-and-retention.md). diff --git a/docs/book/README.md b/docs/book/README.md index 1d418e6e..b6a8268a 100644 --- a/docs/book/README.md +++ b/docs/book/README.md @@ -68,12 +68,14 @@ If a statement is not enforced by code/tests, it is explicitly marked as non-con - [22-determinism.md](22-determinism.md) — determinism policy - [23-testing-as-spec.md](23-testing-as-spec.md) — tests as specification - [24-compatibility-and-versioning.md](24-compatibility-and-versioning.md) — compatibility and versioning rules +- [26-platform-observability.md](26-platform-observability.md) — local diagnostics for CodeClone's own runtime ### MCP interface - [25-mcp-interface/index.md](25-mcp-interface/index.md) — MCP interface contract - [25-mcp-interface/tools/workflow.md](25-mcp-interface/tools/workflow.md) — workflow tools - [25-mcp-interface/resources.md](25-mcp-interface/resources.md) — resource URIs +- [25-mcp-interface/tools/platform-observability.md](25-mcp-interface/tools/platform-observability.md) — bounded diagnostics tool ### Appendix diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index f4e47da9..7f81dec7 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -820,7 +820,7 @@ Core tables: | `memory_fts` | FTS5 search index (schema 1.1+) | | `memory_revisions` | Governance audit trail | | `memory_ingestion_runs` | Init/refresh run metadata | -| `memory_projection_jobs` | Coalesced trajectory/Experience/semantic jobs (schema 1.3+) | +| `memory_projection_jobs` | Coalesced trajectory/semantic/Experience jobs (schema 1.3+) | Trajectory tables (schema **`1.2`**+ trajectory DDL, active projection **`trajectory-v3`**): @@ -839,7 +839,7 @@ Experience tables (schema **`1.6`**, derived from trajectory evidence): | Table | Role | |------------------------------|------------------------------------------------------------| | `memory_experiences` | Advisory distilled patterns (`experience-v1`) | -| `memory_experience_facets` | Agent/profile/intent diversity facets | +| `memory_experience_facets` | Agent-family facets today; profile/intent kinds are reserved | | `memory_experience_evidence` | Contributing trajectory ids and outcomes | Patch Trail JSON uses `PATCH_TRAIL_SCHEMA_VERSION` (currently **`1`**) in @@ -866,6 +866,26 @@ Format version constant: `SEMANTIC_INDEX_FORMAT_VERSION` in - Row/projection semantics: [Engineering Memory](../13-engineering-memory/index.md); bump rules: [24-compatibility-and-versioning.md](../24-compatibility-and-versioning.md). +## Platform Observability schema (`1.0`) + +Optional local SQLite database at +`.codeclone/db/platform_observability.sqlite3`. It is disposable development +telemetry, not report, baseline, cache, audit, or Engineering Memory truth. + +| Table | Role | +|---|---| +| `platform_meta` | Schema version metadata. | +| `platform_operations` | Surface-level operation identity, correlation, duration, status, bounded payload sizes, and optional process metrics. | +| `platform_spans` | Ordered subsystem timing, reason/dedupe metadata, counters, normalized SQL fingerprints, and optional process metrics. | + +Operation and span rows are persisted together in one transaction. Profile +columns are nullable and populated only when profiling is enabled with +`codeclone[perf]`. `db_fingerprints` is additively migrated for older local +stores. + +See [Platform Observability](../26-platform-observability.md) for configuration, +privacy, query, and anti-inference rules. + ## Refs - `codeclone/baseline/clone_baseline.py` @@ -874,6 +894,7 @@ Format version constant: `SEMANTIC_INDEX_FORMAT_VERSION` in - `codeclone/memory/schema_trajectory.py` - `codeclone/memory/schema_migrate.py` - `codeclone/memory/semantic/models.py` +- `codeclone/observability/store/schema.py` - `codeclone/contracts/__init__.py` (`SEMANTIC_INDEX_FORMAT_VERSION`) - `codeclone/report/document/builder.py` - `codeclone/report/renderers/text.py` diff --git a/docs/book/integrations/vs-code-extension.md b/docs/book/integrations/vs-code-extension.md index ceb6eb27..cd85b038 100644 --- a/docs/book/integrations/vs-code-extension.md +++ b/docs/book/integrations/vs-code-extension.md @@ -117,6 +117,9 @@ canonical report truth. and is not older than the current run. - **Session-local state**: reviewed markers shape review UX but never leak into repository truth. +- **Trajectory evidence**: dashboard/detail commands render MCP trajectory + status, anomalies, exact agent-label aggregates, quality passports, and + Patch Trail evidence without inventing IDE-local scoring. - **First-run clarity**: onboarding leads to `Analyze Workspace`, not transport setup. - **Restricted Mode honesty**: explain requirements without pretending @@ -145,3 +148,5 @@ CodeClone CLI, canonical report JSON, and CodeClone MCP. For the underlying interface contract, see [MCP usage guide](../../guide/mcp/README.md) and [MCP interface contract](../25-mcp-interface/index.md). +Trajectory scoring is defined by +[Trajectory quality and passport](../13-engineering-memory/trajectory-quality-and-passport.md). diff --git a/docs/getting-started.md b/docs/getting-started.md index 70b7ac95..f11bcb14 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -253,4 +253,7 @@ See [Config and defaults](book/10-config-and-defaults.md). - [Architecture narrative](guide/explanation/how-it-works.md) — how the pipeline works - [Baseline contract](book/07-baseline.md) — trust model and schema - [MCP interface contract](book/25-mcp-interface/index.md) — tool surface and guarantees +- [Engineering Memory recipes](guide/mcp/workflows/memory-recipes.md) — scoped context and governed drafts +- [Trajectories and Experiences](guide/memory/trajectories-and-experiences.md) — workflow evidence and recurring patterns +- [Platform Observability](guide/observability/diagnostics.md) — diagnose CodeClone's own runtime - [Report contract](book/05-report.md) — canonical JSON schema diff --git a/docs/guide/README.md b/docs/guide/README.md index dcc08245..c883fef3 100644 --- a/docs/guide/README.md +++ b/docs/guide/README.md @@ -18,6 +18,8 @@ enums, payload semantics), use the [Contracts book](../book/README.md). | Connect an AI agent via MCP | [MCP overview](mcp/README.md) | | Govern agent edits | [Change control overview](change-control/overview.md) | | Scope context before edits | [Engineering Memory overview](memory/overview.md) | +| Inspect trajectory history and patterns | [Trajectories and Experiences](memory/trajectories-and-experiences.md) | +| Diagnose CodeClone runtime cost | [Platform Observability](observability/diagnostics.md) | ## MCP workflows diff --git a/docs/guide/integrations/vscode/setup.md b/docs/guide/integrations/vscode/setup.md index f50a971f..29e97307 100644 --- a/docs/guide/integrations/vscode/setup.md +++ b/docs/guide/integrations/vscode/setup.md @@ -138,6 +138,11 @@ These commands require workspace trust and an active MCP connection. - **Open Memory Search Panel** / **Refresh Memory Search** — results webview. - **Configure Memory Search** — workspace wizard for semantic, drafts, stale, and result limit (see **Engineering Memory search** settings below). +- **Show Trajectory Dashboard** — projection health, quality/outcome aggregates, + anomalies, and recent trajectories. +- **Show Trajectory Detail** — full passport with quality/complexity + calculations, Patch Trail, contract gates, incidents, steps, and evidence. +- **Copy Trajectory Dashboard Brief** — Markdown summary for review notes. Server-side semantic still requires `[tool.codeclone.memory.semantic] enabled`, the semantic sidecar, and a successful rebuild (`manage_engineering_memory` @@ -147,6 +152,8 @@ rebuild` for CLI/CI). Install semantic-quality recall; `codeclone[semantic-lancedb]` alone can run only the deterministic diagnostic provider. See [Engineering Memory](../../../book/13-engineering-memory/index.md). +Trajectory semantics: +[Trajectory quality and passport](../../../book/13-engineering-memory/trajectory-quality-and-passport.md). ## Open Triage diff --git a/docs/guide/mcp/README.md b/docs/guide/mcp/README.md index 18333e30..ae4493e6 100644 --- a/docs/guide/mcp/README.md +++ b/docs/guide/mcp/README.md @@ -6,7 +6,8 @@ Use CodeClone through `codeclone-mcp` — same pipeline and report as the CLI. **Analysis truth is read-only:** MCP never mutates source, baselines, analysis cache, or canonical reports. It **may** write session-local coordination (workspace intents), Engineering Memory **drafts**, and optional audit rows when -enabled. +enabled. Opt-in Platform Observability writes separate local development +telemetry and never becomes repository truth. Install: [Getting started — MCP extra](../../getting-started.md#install). @@ -41,3 +42,4 @@ Install: [Getting started — MCP extra](../../getting-started.md#install). | Payload field cheat sheet | [Payload cheatsheet](payload-cheatsheet.md) | | Change control contract | [Structural Change Controller](../../book/12-structural-change-controller/index.md) | | Engineering Memory contract | [Engineering Memory](../../book/13-engineering-memory/index.md) | +| Runtime diagnostics | [Platform Observability](../observability/diagnostics.md) | diff --git a/docs/guide/mcp/architecture.md b/docs/guide/mcp/architecture.md index ce0deb55..30339dae 100644 --- a/docs/guide/mcp/architecture.md +++ b/docs/guide/mcp/architecture.md @@ -36,7 +36,7 @@ graph TD WIR["Workspace Intent Registry
    .codeclone/intents/ or intents.sqlite3"] MEM["Engineering Memory SQLite
    .codeclone/memory/"] AUD["Audit trail (optional)
    .codeclone/db/"] - OBS["Platform Observability (dev-only)
    .codeclone/db/"] + OBS["Platform Observability (dev-only)
    platform_observability.sqlite3"] end MCPSession -->|" coordination + drafts "| Disk diff --git a/docs/guide/mcp/workflows/memory-recipes.md b/docs/guide/mcp/workflows/memory-recipes.md index 69780ee3..2c9eb874 100644 --- a/docs/guide/mcp/workflows/memory-recipes.md +++ b/docs/guide/mcp/workflows/memory-recipes.md @@ -63,7 +63,8 @@ enqueue projection rebuild when configured. | Keyword search | `query_engineering_memory(mode=search, query=..., root=, filters={match_mode: any\|all})` | | Semantic blend | same + `semantic=true` when semantic index is built | | One path | `query_engineering_memory(mode=for_path, path=..., root=)` | -| Trajectory preview | `query_engineering_memory(mode=trajectory_get, intent_id=..., root=)` | +| Trajectory detail | `query_engineering_memory(mode=trajectory_get, record_id=, root=)` | +| Trajectory dashboard | `query_engineering_memory(mode=trajectory_dashboard, root=)` | | Playbook | `help(topic=engineering_memory)` | ## 6. Semantic index maintenance @@ -76,4 +77,12 @@ manage_engineering_memory(action=rebuild_semantic_index, root=) Contract: [Semantic search](../../../book/13-engineering-memory/search-semantic.md). +## 7. Trajectory and Experience evidence + +Scoped `get_relevant_memory` keeps governed records, trajectory precedents, and +advisory Experiences in separate response lanes. Inspect the workflow in +[Trajectories and Experiences](../../memory/trajectories-and-experiences.md); +use `promote_experience` only when a recurring pattern deserves human review as +a draft memory record. + --- diff --git a/docs/guide/memory/overview.md b/docs/guide/memory/overview.md index def91c0c..e5267173 100644 --- a/docs/guide/memory/overview.md +++ b/docs/guide/memory/overview.md @@ -8,7 +8,10 @@ control with scoped context before edits. |------|------| | Bootstrap / sync | [MCP memory recipes](../mcp/workflows/memory-recipes.md) | | MCP contract | [Engineering Memory](../../book/13-engineering-memory/index.md) | -| Trajectory / jobs | [Trajectory](../../book/13-engineering-memory/trajectory-and-patch-trail.md) | +| Trajectories / Experiences | [Practical guide](trajectories-and-experiences.md) | +| Trajectory contract | [Projection and Patch Trail](../../book/13-engineering-memory/trajectory-and-patch-trail.md) | +| Quality passport | [Quality and analytics](../../book/13-engineering-memory/trajectory-quality-and-passport.md) | +| Experience contract | [Experience Layer](../../book/13-engineering-memory/experience-layer.md) | Human **approve** of drafts: VS Code Memory view **or** `codeclone memory approve` (not MCP agent tools). diff --git a/docs/guide/memory/trajectories-and-experiences.md b/docs/guide/memory/trajectories-and-experiences.md new file mode 100644 index 00000000..90e4fb00 --- /dev/null +++ b/docs/guide/memory/trajectories-and-experiences.md @@ -0,0 +1,76 @@ +# Work with Trajectories and Experiences + + + +Engineering Memory exposes two evidence layers beyond curated records: + +- trajectories reconstruct what happened during agent work; +- Experiences distill recurring patterns across those trajectories. + +Neither layer grants permission to edit. Use them to prepare and review work, +then use change control for authorization. + +```mermaid +flowchart LR + A["Audit evidence"] --> B["Trajectories"] + B --> C["Quality passport and anomalies"] + B --> D["Experience distillation"] + D --> E["Scoped advisory patterns"] + E --> F["Optional draft promotion"] + F --> G["Human governance"] +``` + +## Inspect trajectory health + +```bash +codeclone memory trajectory status --root . +codeclone memory trajectory dashboard --root . +codeclone memory trajectory anomalies --root . +codeclone memory trajectory agents --root . +``` + +Routine run projections are hidden by default. Add `--include-routine` when +you are diagnosing those workflows too. + +Search and inspect one trajectory: + +```bash +codeclone memory trajectory search "verification" --root . +codeclone memory trajectory show TRAJECTORY_ID --root . +``` + +The detail view explains the quality score, complexity band, incidents, +anomalies, evidence, and patch-trail verification. + +## Rebuild projections + +```bash +codeclone memory trajectory rebuild --root . +codeclone memory jobs run-once --root . +``` + +The background projection job refreshes trajectory, semantic, and Experience +projections in that execution order. See +[Projection jobs](../../book/13-engineering-memory/projection-jobs.md). + +## Retrieve Experiences + +Experiences are returned automatically by scoped memory retrieval when their +directory family matches the requested scope. They are kept separate from +memory records and trajectory precedents so callers cannot confuse advisory +patterns with governed facts. + +Through MCP, call `get_relevant_memory` with `scope` or an active `intent_id`. +The response may include: + +- `records`: governed memory records; +- `trajectories`: relevant precedents; +- `experiences`: recurring project patterns. + +To inspect a known Experience in full, use the Engineering Memory query +surface. To turn it into a reviewable draft, use +`manage_engineering_memory(action="promote_experience", experience_id="...")`. +Promotion is idempotent and does not approve the draft. + +The normative contracts are [Trajectory quality and passport](../../book/13-engineering-memory/trajectory-quality-and-passport.md) +and [Experience layer](../../book/13-engineering-memory/experience-layer.md). diff --git a/docs/guide/observability/diagnostics.md b/docs/guide/observability/diagnostics.md new file mode 100644 index 00000000..7a9c128b --- /dev/null +++ b/docs/guide/observability/diagnostics.md @@ -0,0 +1,89 @@ +# Diagnose CodeClone with Platform Observability + + + +Platform Observability is for diagnosing CodeClone itself: slow MCP calls, +projection work, database query cost, redundant work, and correlated +CLI/MCP/worker activity. It is not a repository quality report. + +The normative contract is +[Platform Observability](../../book/26-platform-observability.md). + +## Enable it locally + +```bash +export CODECLONE_OBSERVABILITY_ENABLED=1 +``` + +Run the CodeClone workflow you want to inspect, then query the local store: + +```bash +codeclone observability trace --root . +``` + +For optional process metrics: + +```bash +uv pip install "codeclone[perf]" +export CODECLONE_OBSERVABILITY_PROFILE=1 +``` + +In CI, observation remains off unless it is explicitly enabled: + +```bash +export CODECLONE_OBSERVABILITY_ENABLED=1 +``` + +`CODECLONE_OBSERVABILITY_FORCE=1` is an explicit CI-gate override but never +enables collection by itself. + +## Render the cockpit + +```bash +codeclone observability trace \ + --root . \ + --last 50 \ + --html /tmp/codeclone-observer.html +``` + +The self-contained page visualizes: + +```mermaid +flowchart LR + A["Operation chains"] --> B["Span waterfall"] + B --> C["Pipeline and memory costs"] + C --> D["MCP and DB aggregates"] + D --> E["SQL fingerprints and no-op hints"] +``` + +Use `--operation` to isolate one operation or `--correlation` to follow a +workflow across process boundaries. Use `--json` for a machine-readable export. + +## Query through MCP + +Start broad: + +```json +{ + "root": "/absolute/repository", + "section": "summary", + "window": "latest", + "detail_level": "compact" +} +``` + +Then select one bounded section such as `slow_operations`, `db_cost`, +`memory_pipeline_cost`, `mcp_tool_matrix`, or `correlated_chains`. + +Do not infer repository quality from these numbers. High database activity +means CodeClone executed database work; it does not mean the analyzed project +has a database problem. See +[MCP observability tool](../../book/25-mcp-interface/tools/platform-observability.md). + +## Local data lifecycle + +The store is `.codeclone/db/platform_observability.sqlite3`. CodeClone does not +send it to a remote telemetry service. Automatic pruning is not currently +enforced, so remove the file when you no longer need the diagnostics. + +Raw prompts, payload bodies, and SQL literals are not stored. diff --git a/docs/index.md b/docs/index.md index 0c7770df..d4dd7c3d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -52,7 +52,9 @@ patch against the declared boundary, and generates an auditable review receipt. | MCP usage (workflows, setup) | [MCP guide](guide/mcp/README.md) | | Change controller workflow | [Structural Change Controller](book/12-structural-change-controller/index.md) | | Engineering Memory (scope context) | [Engineering Memory](book/13-engineering-memory/index.md) | +| Trajectories and recurring patterns | [Trajectories and Experiences](guide/memory/trajectories-and-experiences.md) | | MCP interface contract | [MCP interface](book/25-mcp-interface/index.md) | +| Diagnose CodeClone runtime | [Platform Observability](guide/observability/diagnostics.md) | ## IDE and Agent Clients diff --git a/docs/plans-and-retention.md b/docs/plans-and-retention.md index d20adb2c..4e7af1d5 100644 --- a/docs/plans-and-retention.md +++ b/docs/plans-and-retention.md @@ -28,6 +28,7 @@ Team or Enterprise options. | **Intent registry backend** | SQLite (local) | SQLite or managed | PostgreSQL (managed or self-hosted) | | **Audit trail retention** | 30 days | 90 days | configurable | | **Audit payloads** | compact | compact or full | compact or full | +| **Platform Observability** | local, opt-in | local, opt-in | local, opt-in | | **Support** | community | priority onboarding + premium | dedicated + SLA | --- @@ -104,6 +105,18 @@ transitions, workspace coordination) in a local SQLite database when Full payloads include complete tool request/response metadata; compact payloads include event type, timestamps, and identifiers only. +## Platform Observability + +Platform Observability is a development diagnostic store, not controller audit +retention and not repository quality history. It is disabled by default and +local in every edition. Automatic retention pruning is not currently enforced; +operators own the lifecycle of +`.codeclone/db/platform_observability.sqlite3`. + +The observer stores no raw MCP/prompt bodies and never contributes findings, +gates, baselines, memory facts, or edit authorization. See +[Platform Observability](book/26-platform-observability.md). + ## Why longer retention matters The SQLite intent registry and audit trail are **auditable coordination trails**: diff --git a/docs/privacy-policy.md b/docs/privacy-policy.md index a45167d7..8c76f1e2 100644 --- a/docs/privacy-policy.md +++ b/docs/privacy-policy.md @@ -17,8 +17,18 @@ For the CLI, MCP server, VS Code extension, and Claude Desktop bundle: - CodeClone does not send repository contents to an external CodeClone backend - CodeClone reads local repository files, local git state, baselines, and cache only to perform the requested structural analysis +- Engineering Memory, trajectory/Experience projections, controller audit, and + Platform Observability are optional local SQLite state under `.codeclone/` +- Platform Observability records bounded metadata, counters, timings, and + literal-free SQL fingerprints; it does not store raw prompts or payload bodies - the Claude Desktop bundle is only a local wrapper around `codeclone-mcp` +CodeClone does not provide a remote telemetry exporter. Automatic pruning of +the Platform Observability database is not currently enforced; users who enable +persistence control that local file's lifecycle. See +[Platform Observability](book/26-platform-observability.md) and +[Plans and Retention](plans-and-retention.md). + ## Claude Desktop bundle specifics The bundle in `extensions/claude-desktop-codeclone/`: diff --git a/extensions/vscode-codeclone/README.md b/extensions/vscode-codeclone/README.md index 45373db7..e4f9a944 100644 --- a/extensions/vscode-codeclone/README.md +++ b/extensions/vscode-codeclone/README.md @@ -25,6 +25,8 @@ artifacts. impact for the active file; `Copy Blast Radius Brief` puts a Markdown summary on the clipboard - **Session & audit insights** — `Show Session Stats` and `Show Controller Audit Trail` mirror CLI `--session-stats` and `--audit` in read-only webviews (IDE-only MCP tools, not exposed to agents) +- **Trajectory passports** — dashboard and detail views expose quality, + complexity, anomalies, Patch Trail evidence, and agent aggregates - **Coverage Join** — integrates `coverage.xml` to surface untested hotspots when available - **Source-first navigation** — `Reveal Source` opens the exact location; `Next / Previous Hotspot` steps through active targets in the editor @@ -151,10 +153,18 @@ is separate so the tree stays focused on human review work: (mode=for_path) - **Open Memory Search Panel** — read-only results webview (CSP, no scripts, allowlisted `command:` links to open a record) +- **Show Trajectory Dashboard** — status, agent/outcome aggregates, anomalies, + and recent trajectories +- **Show Trajectory Detail** — quality passport, complexity factors, Patch + Trail, contract gates, incidents, steps, and evidence +- **Copy Trajectory Dashboard Brief** — Markdown summary for review notes Use **Configure Memory Search** to adjust semantic recall, drafts/stale filters, and result limits per workspace. +Trajectory views are read-only projections from +`query_engineering_memory`; they do not create IDE-local workflow truth. + --- ## Settings diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index 36c0e8d1..e27ada70 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -1796,7 +1796,7 @@ "input_schema": { "properties": { "topic": { - "description": "workflow, analysis_profile, suppressions, baseline, coverage, latest_runs, review_state, changed_scope, change_control, trust_boundaries, engineering_memory, verification_profiles", + "description": "workflow, analysis_profile, suppressions, baseline, coverage, latest_runs, review_state, changed_scope, change_control, trust_boundaries, engineering_memory, verification_profiles, observability", "title": "Topic", "type": "string" }, diff --git a/tests/test_docs_ia_contract.py b/tests/test_docs_ia_contract.py index 0bea3546..5a88cae0 100644 --- a/tests/test_docs_ia_contract.py +++ b/tests/test_docs_ia_contract.py @@ -35,6 +35,23 @@ "book/25-mcp-interface/**/*.md", ) +_COMPLEX_SURFACE_PAGES = ( + "book/26-platform-observability.md", + "book/13-engineering-memory/experience-layer.md", + "book/13-engineering-memory/trajectory-quality-and-passport.md", + "guide/observability/diagnostics.md", + "guide/memory/trajectories-and-experiences.md", +) + +_REQUIRED_NAV_PAGES = ( + "book/26-platform-observability.md", + "book/13-engineering-memory/experience-layer.md", + "book/13-engineering-memory/trajectory-quality-and-passport.md", + "book/25-mcp-interface/tools/platform-observability.md", + "guide/observability/diagnostics.md", + "guide/memory/trajectories-and-experiences.md", +) + def _line_count(path: Path) -> int: return len(path.read_text(encoding="utf-8").splitlines()) @@ -85,3 +102,45 @@ def test_change_control_workflow_has_single_mermaid_diagram() -> None: text = path.read_text(encoding="utf-8") count = text.count("```mermaid") assert count == 1, f"expected one mermaid block, found {count}" + + +def test_complex_surfaces_have_visual_contracts() -> None: + missing = [ + rel + for rel in _COMPLEX_SURFACE_PAGES + if "```mermaid" not in (_DOCS / rel).read_text(encoding="utf-8") + ] + assert missing == [], f"complex pages without Mermaid diagrams: {missing}" + + +def test_new_surfaces_are_reachable_from_navigation() -> None: + nav = (_REPO_ROOT / "zensical.toml").read_text(encoding="utf-8") + missing = [rel for rel in _REQUIRED_NAV_PAGES if rel not in nav] + assert missing == [], f"pages missing from navigation: {missing}" + + +def test_observability_and_memory_guides_cross_link_contracts() -> None: + pairs = ( + ( + "guide/observability/diagnostics.md", + "../../book/26-platform-observability.md", + ), + ( + "guide/memory/trajectories-and-experiences.md", + "../../book/13-engineering-memory/experience-layer.md", + ), + ( + "book/26-platform-observability.md", + "../guide/observability/diagnostics.md", + ), + ( + "book/13-engineering-memory/experience-layer.md", + "../../guide/memory/trajectories-and-experiences.md", + ), + ) + missing = [ + f"{rel} -> {target}" + for rel, target in pairs + if target not in (_DOCS / rel).read_text(encoding="utf-8") + ] + assert missing == [], f"missing required cross-links: {missing}" diff --git a/zensical.toml b/zensical.toml index 177dab17..d864d556 100644 --- a/zensical.toml +++ b/zensical.toml @@ -34,6 +34,10 @@ nav = [ ] }, { "Engineering Memory" = [ { "Overview" = "guide/memory/overview.md" }, + { "Trajectories & Experiences" = "guide/memory/trajectories-and-experiences.md" }, + ] }, + { "Diagnostics" = [ + { "Platform Observability" = "guide/observability/diagnostics.md" }, ] }, { "MCP" = [ { "Overview" = "guide/mcp/README.md" }, @@ -104,7 +108,9 @@ nav = [ { "FTS search" = "book/13-engineering-memory/search-fts.md" }, { "Semantic search" = "book/13-engineering-memory/search-semantic.md" }, { "Trajectory" = "book/13-engineering-memory/trajectory-and-patch-trail.md" }, + { "Trajectory quality & passport" = "book/13-engineering-memory/trajectory-quality-and-passport.md" }, { "Trajectory labels" = "book/13-engineering-memory/trajectory-labels.md" }, + { "Experience Layer" = "book/13-engineering-memory/experience-layer.md" }, { "Projection jobs" = "book/13-engineering-memory/projection-jobs.md" }, { "Scope & invariants" = "book/13-engineering-memory/scope-and-invariants.md" }, ] }, @@ -121,6 +127,7 @@ nav = [ { "Determinism" = "book/22-determinism.md" }, { "Testing as Spec" = "book/23-testing-as-spec.md" }, { "Compatibility and Versioning" = "book/24-compatibility-and-versioning.md" }, + { "Platform Observability" = "book/26-platform-observability.md" }, ] }, { "MCP interface" = [ { "Overview" = "book/25-mcp-interface/index.md" }, @@ -131,6 +138,7 @@ nav = [ { "Workflow" = "book/25-mcp-interface/tools/workflow.md" }, { "Atomic change control" = "book/25-mcp-interface/tools/atomic-change-control.md" }, { "Session & memory" = "book/25-mcp-interface/tools/session-and-memory.md" }, + { "Platform observability" = "book/25-mcp-interface/tools/platform-observability.md" }, { "IDE governance" = "book/25-mcp-interface/tools/ide-governance.md" }, ] }, { "Resources" = "book/25-mcp-interface/resources.md" }, From 832c60bffa346cde93ea0dc033511ea469c5be37 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 22:13:45 +0500 Subject: [PATCH 261/318] test: expanding test coverage for new branches and reorganizing test modules --- .pre-commit-config.yaml | 2 +- tests/test_audit_analysis_completed.py | 8 + tests/test_audit_event_core_v2.py | 37 + tests/test_cache.py | 8 + tests/test_cli_audit.py | 57 + tests/test_cli_config.py | 30 + tests/test_cli_memory_trajectory.py | 115 ++ tests/test_core_branch_coverage.py | 12 + tests/test_coverage_ci_uplift.py | 1026 ----------------- tests/test_instance_independent_methods.py | 9 + tests/test_lancedb_backend_mocked.py | 30 + tests/test_mcp_service.py | 359 +++++- tests/test_memory_cli_branch_coverage.py | 119 ++ tests/test_memory_config_resolve_edges.py | 35 + tests/test_memory_experience_store.py | 9 + tests/test_memory_ingest_paths.py | 37 + tests/test_memory_jobs_coverage.py | 181 +++ ...test_memory_retrieval_semantic_coverage.py | 124 ++ .../test_memory_retrieval_service_coverage.py | 91 ++ tests/test_memory_staleness.py | 154 +++ tests/test_memory_trajectory_anomalies.py | 83 ++ tests/test_memory_trajectory_coverage.py | 47 + tests/test_memory_trajectory_export.py | 14 + .../test_memory_trajectory_export_context.py | 55 + tests/test_memory_trajectory_projector.py | 68 ++ tests/test_memory_trajectory_retrieval.py | 71 ++ tests/test_memory_trajectory_store.py | 118 ++ tests/test_observability_cli_pipeline.py | 92 ++ tests/test_observability_profile.py | 22 + tests/test_observability_query.py | 174 +++ tests/test_observability_reader.py | 18 + tests/test_observability_render.py | 38 + tests/test_observability_runtime.py | 50 + tests/test_semantic_sources.py | 66 ++ tests/test_trajectory_rebuild_incremental.py | 29 + tests/test_workspace_intent_gate.py | 123 ++ tests/test_workspace_intent_gate_errors.py | 49 + 37 files changed, 2509 insertions(+), 1051 deletions(-) delete mode 100644 tests/test_coverage_ci_uplift.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8b31c51a..6c17738f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,7 +56,7 @@ repos: - id: pytest name: Pytest - entry: uv run pytest -q --cov=codeclone --cov-report=term-missing --cov-fail-under=97 + entry: uv run pytest -q --cov=codeclone --cov-report=term-missing --cov-fail-under=99 language: system pass_filenames: false always_run: true diff --git a/tests/test_audit_analysis_completed.py b/tests/test_audit_analysis_completed.py index 391b366f..948cda17 100644 --- a/tests/test_audit_analysis_completed.py +++ b/tests/test_audit_analysis_completed.py @@ -303,3 +303,11 @@ def test_emit_analysis_completed_from_report_custom_agent_fields( ), ) assert row == (4242, 1700000001, "custom-agent") + + +def test_sequence_normalizes_only_list_values() -> None: + from codeclone.audit.analysis_completed import _sequence + + assert _sequence("not-a-list") == () + assert _sequence([1, 2]) == (1, 2) + assert _sequence(42) == () diff --git a/tests/test_audit_event_core_v2.py b/tests/test_audit_event_core_v2.py index 9937c9ed..c632083d 100644 --- a/tests/test_audit_event_core_v2.py +++ b/tests/test_audit_event_core_v2.py @@ -8,6 +8,8 @@ from pathlib import Path +import pytest + from codeclone.audit.events import ( AUDIT_EVENT_CORE_VERSION, EVENT_INTENT_CHECKED, @@ -98,3 +100,38 @@ def test_patch_trail_event_core_uses_counts() -> None: assert isinstance(facts, dict) assert facts["untouched_in_declared"] == 1 assert facts["patch_trail_digest"] == "abc" + + +def test_event_core_json_falls_back_when_canonical_encoding_fails( + monkeypatch: pytest.MonkeyPatch, +) -> None: + import json + + from codeclone.audit.events import AuditEvent, event_core_for_event + from codeclone.audit.writer import _event_core_json + + calls = {"count": 0} + + def _canonical_or_fallback(payload: object) -> str: + calls["count"] += 1 + if calls["count"] == 1: + raise TypeError("cannot serialize") + return json.dumps(payload, sort_keys=True, separators=(",", ":")) + + monkeypatch.setattr( + "codeclone.audit.writer._canonical_json", + _canonical_or_fallback, + ) + event = AuditEvent( + event_type="intent.declared", + severity="info", + repo_root_digest="digest", + agent_pid=1, + agent_label="agent", + status="active", + payload={}, + ) + payload = json.loads(_event_core_json(event)) + assert payload["truncated"] is True + assert payload["event_type"] == "intent.declared" + assert event_core_for_event(event)["event_type"] == "intent.declared" diff --git a/tests/test_cache.py b/tests/test_cache.py index f3bedad5..481e7cad 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -2557,3 +2557,11 @@ def test_decode_wire_segment_rejects_missing_segment_signature() -> None: def test_decode_wire_dead_candidate_rejects_invalid_rows() -> None: assert _decode_wire_dead_candidate(object(), "pkg/mod.py") is None + + +def test_integrity_read_json_document_forwards_max_bytes(tmp_path: Path) -> None: + from codeclone.cache.integrity import read_json_document + + path = tmp_path / "doc.json" + path.write_text('{"ok": true}', encoding="utf-8") + assert read_json_document(path, max_bytes=64) == {"ok": True} diff --git a/tests/test_cli_audit.py b/tests/test_cli_audit.py index 5a57610b..161b3e47 100644 --- a/tests/test_cli_audit.py +++ b/tests/test_cli_audit.py @@ -1200,3 +1200,60 @@ def _boom(**kwargs: object) -> None: new_func_count=0, new_block_count=0, ) + + +def test_workflow_audit_emit_and_digest_helpers( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + import sys + + from codeclone.surfaces.cli import workflow as cli_workflow + + class _Args: + audit_enabled = True + + cli_workflow._emit_cli_analysis_completed_if_enabled( + args=_Args(), + root_path=tmp_path, + report_document="not-a-dict", + new_func_count=0, + new_block_count=0, + ) + cli_workflow._emit_cli_analysis_completed_if_enabled( + args=_Args(), + root_path=tmp_path, + report_document={"integrity": {"digest": {"value": ""}}}, + new_func_count=0, + new_block_count=0, + ) + + def _boom(**_kwargs: object) -> None: + raise RuntimeError("audit unavailable") + + monkeypatch.setattr( + "codeclone.audit.analysis_completed.emit_analysis_completed_from_report", + _boom, + ) + cli_workflow._emit_cli_analysis_completed_if_enabled( + args=_Args(), + root_path=tmp_path, + report_document={"integrity": {"digest": {"value": "a" * 64}}}, + new_func_count=1, + new_block_count=0, + ) + + assert cli_workflow._report_digest_from_document({}) == "" + assert ( + cli_workflow._report_digest_from_document( + {"integrity": {"digest": "not-a-mapping"}} + ) + == "" + ) + + monkeypatch.setattr(sys, "argv", ["codeclone", "observability"]) + with pytest.raises(SystemExit): + cli_workflow.main() + monkeypatch.setattr(sys, "argv", ["codeclone", "memory", "--help"]) + with pytest.raises(SystemExit): + cli_workflow.main() diff --git a/tests/test_cli_config.py b/tests/test_cli_config.py index 66a88b98..0d0e7a71 100644 --- a/tests/test_cli_config.py +++ b/tests/test_cli_config.py @@ -339,3 +339,33 @@ def load(file_obj: Any) -> dict[str, object]: SimpleNamespace(import_module=lambda _name: _FakeTomli), ) assert loader_mod._load_toml(toml_path) == {"tool": {}} + + +def test_pyproject_loader_rejects_symlinks_and_invalid_ingest_table( + tmp_path: Path, +) -> None: + from codeclone.config.pyproject_loader import ( + ConfigValidationError, + _validate_nested_ingest_table, + load_pyproject_config, + open_repo_config, + ) + + broken = tmp_path / "pyproject.toml" + broken.symlink_to(tmp_path / "missing.toml") + with pytest.raises(ConfigValidationError, match="must not be a symlink"): + load_pyproject_config(tmp_path) + + real = tmp_path / "real.toml" + real.write_text("[tool.codeclone]\n", encoding="utf-8") + broken.unlink() + link = tmp_path / "pyproject.toml" + link.symlink_to(real) + with pytest.raises(ConfigValidationError, match="must not be a symlink"): + open_repo_config(tmp_path) + + with pytest.raises(ConfigValidationError, match="must be object"): + _validate_nested_ingest_table( + ingest_obj="not-a-table", + config_path=tmp_path / "pyproject.toml", + ) diff --git a/tests/test_cli_memory_trajectory.py b/tests/test_cli_memory_trajectory.py index bacb1e6c..a7643209 100644 --- a/tests/test_cli_memory_trajectory.py +++ b/tests/test_cli_memory_trajectory.py @@ -129,3 +129,118 @@ def test_memory_trajectory_cli_export_and_missing_db(tmp_path: Path) -> None: assert memory_main(["trajectory", "status", "--root", str(missing_root)]) == int( ExitCode.CONTRACT_ERROR ) + + +def test_trajectory_renderers_handle_populated_and_empty_payloads( + tmp_path: Path, +) -> None: + from dataclasses import replace + + from codeclone.memory.trajectory.cli_render import ( + render_projection_run, + render_trajectory_agents, + render_trajectory_anomalies, + render_trajectory_detail, + render_trajectory_list, + render_trajectory_search_results, + render_trajectory_status, + ) + from codeclone.memory.trajectory.models import TrajectoryListItem + + from .memory_fixtures import memory_store, seed_trajectory_audit_workflow + + class _CapturePrinter: + def __init__(self) -> None: + self.lines: list[str] = [] + + def print(self, *objects: object, **_kwargs: object) -> None: + self.lines.append(" ".join(str(item) for item in objects)) + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + projection = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ) + run = replace(projection.run, legacy_event_count=3) + trajectory = projection.trajectories[0] + + printer = _CapturePrinter() + render_trajectory_status( + console=printer, + enabled=True, + count=1, + latest_run=run, + ) + render_projection_run(console=printer, run=run) + render_trajectory_list(console=printer, items=[]) + assert "No trajectories found." in printer.lines + + item = TrajectoryListItem( + id=trajectory.id, + workflow_id=trajectory.workflow_id, + outcome=trajectory.outcome, + quality_tier=trajectory.quality_tier, + quality_score=trajectory.quality_score, + event_count=trajectory.event_count, + started_at_utc=trajectory.started_at_utc, + finished_at_utc=trajectory.finished_at_utc, + summary=trajectory.summary, + ) + render_trajectory_list(console=printer, items=[item]) + render_trajectory_search_results( + console=printer, + query="recover", + trajectories=[], + ) + render_trajectory_agents(console=printer, payload={"agents": []}) + render_trajectory_agents( + console=printer, + payload={ + "agent_count": 1, + "trajectory_count": 1, + "unlabeled_trajectory_count": 0, + "agents": [ + "not-a-mapping", + {"agent_label": "agent", "trajectory_count": 1}, + ], + }, + ) + render_trajectory_anomalies( + console=printer, + payload={ + "summary": { + "trajectories_with_anomalies": 1, + "anomaly_count": 1, + "error_count": 1, + "warn_count": 0, + }, + "trajectories": [ + "skip", + { + "trajectory_id": trajectory.id, + "agent_label": "agent", + "outcome": "violated", + "quality_tier": "incident", + "anomalies": [ + "skip", + { + "severity": "error", + "kind": "scope_violation", + "message": "bad scope", + }, + ], + }, + ], + }, + ) + render_trajectory_detail(console=printer, trajectory=trajectory) + + joined = "\n".join(printer.lines) + assert trajectory.id in joined + assert "No matching trajectories" in joined + assert "No agent-labeled" in joined + assert "scope_violation" in joined + assert trajectory.summary in joined diff --git a/tests/test_core_branch_coverage.py b/tests/test_core_branch_coverage.py index 5d3fccda..1bd6e0b7 100644 --- a/tests/test_core_branch_coverage.py +++ b/tests/test_core_branch_coverage.py @@ -1417,3 +1417,15 @@ def save(self) -> None: cli._run_analysis_stages(args=args, boot=boot, cache=cast(Cache, _BadCache())) cli.print_banner(root=None) + + +def test_worker_signature_cache_handles_uninspectable_callable() -> None: + from codeclone.core import worker as core_worker + + core_worker._supported_process_file_kwarg_names.cache_clear() + + def _broken(*_args: object, **_kwargs: object) -> object: + return None + + assert core_worker._supported_process_file_kwarg_names(_broken) is None + core_worker._supported_process_file_kwarg_names.cache_clear() diff --git a/tests/test_coverage_ci_uplift.py b/tests/test_coverage_ci_uplift.py deleted file mode 100644 index 674f2305..00000000 --- a/tests/test_coverage_ci_uplift.py +++ /dev/null @@ -1,1026 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import json -import os -from dataclasses import replace -from pathlib import Path -from typing import cast - -import pytest - -from codeclone.audit.analysis_completed import _sequence -from codeclone.audit.events import AuditEvent, event_core_for_event -from codeclone.audit.writer import _event_core_json -from codeclone.cache.integrity import read_json_document -from codeclone.config.intent_registry import IntentRegistryConfigError -from codeclone.config.memory import IngestConfig -from codeclone.contracts import ExitCode -from codeclone.memory.experience.store import _facet_kind, _status -from codeclone.memory.ingest.paths import ( - resolve_contract_constants_paths, - resolve_document_link_paths, - resolve_mcp_tool_contradiction_sources, - resolve_mcp_tool_schema_snapshot_path, -) -from codeclone.memory.trajectory.agents import ( - aggregate_agent_rows, - trajectory_agent_label, -) -from codeclone.memory.trajectory.cli_render import ( - render_projection_run, - render_trajectory_agents, - render_trajectory_anomalies, - render_trajectory_detail, - render_trajectory_list, - render_trajectory_search_results, - render_trajectory_status, -) -from codeclone.memory.trajectory.models import ( - Trajectory, - TrajectoryListItem, - TrajectoryOutcome, - TrajectoryProjectionRun, - TrajectoryStep, - TrajectorySubject, -) -from codeclone.surfaces.cli.observability import observability_main -from codeclone.surfaces.mcp.payloads import measure_payload -from codeclone.workspace_intent.gate import ( - HOOK_AUTHORIZE_FOREIGN_ENV, - WorkspaceIntentRegistryUnavailable, - _hook_authorizes_foreign_active, - _include_record_in_hook_cleanup, - list_unclosed_workspace_intents_for_hook_cleanup, -) -from tests.test_workspace_intents import _record -from tests.workspace_intent_gate_helpers import write_workspace_record - - -class _CapturePrinter: - def __init__(self) -> None: - self.lines: list[str] = [] - - def print(self, *objects: object, **kwargs: object) -> None: - self.lines.append(" ".join(str(item) for item in objects)) - - -def _projection_run(*, legacy: int = 0) -> TrajectoryProjectionRun: - return TrajectoryProjectionRun( - id="run-1", - project_id="proj", - repo_root_digest="digest", - projection_version="2", - started_at_utc="2026-01-01T00:00:00Z", - finished_at_utc="2026-01-01T00:01:00Z", - status="ok", - workflows_seen=2, - trajectories_created=1, - trajectories_updated=0, - trajectories_unchanged=1, - legacy_event_count=legacy, - message=None, - ) - - -def _trajectory(*, outcome: str = "accepted", agent: bool = True) -> Trajectory: - subjects = ( - ( - TrajectorySubject( - subject_kind="agent", - subject_key="cursor-vscode/1.0.0", - relation="actor", - ), - ) - if agent - else () - ) - return Trajectory( - id="traj-1", - project_id="proj", - repo_root_digest="digest", - workflow_id="intent:intent-a-001", - intent_id="intent-a", - primary_run_id="run1234567890abcdef", - first_run_id="run1234567890abcdef", - last_run_id="run1234567890abcdef", - report_digest="a" * 64, - outcome=cast(TrajectoryOutcome, outcome), - quality_tier="verified", - quality_score=90, - labels=(), - summary="workflow summary", - trajectory_digest="b" * 64, - source_event_stream_digest="c" * 64, - projection_version="2", - event_count=2, - step_count=2, - incident_count=1, - started_at_utc="2026-01-01T00:00:00Z", - finished_at_utc="2026-01-01T00:01:00Z", - projected_at_utc="2026-01-01T00:01:00Z", - updated_at_utc="2026-01-01T00:01:00Z", - steps=( - TrajectoryStep( - step_index=0, - audit_sequence=1, - event_id="evt-1", - event_type="intent.declared", - status="active", - run_id="run1234567890abcdef", - report_digest=None, - event_core_sha256="d" * 64, - event_core_json="{}", - summary="declared", - created_at_utc="2026-01-01T00:00:00Z", - ), - ), - subjects=subjects, - evidence=(), - ) - - -def test_observability_cli_help_and_stdout_trace( - tmp_path: Path, - capsys: pytest.CaptureFixture[str], -) -> None: - assert observability_main([]) == int(ExitCode.CONTRACT_ERROR) - assert "trace" in capsys.readouterr().out - - from codeclone.config.observability import ObservabilityConfig - from codeclone.observability import bootstrap, operation, shutdown - from codeclone.observability.models import OperationRecord - from codeclone.observability.store.schema import ( - observability_store_path, - open_observability_store, - ) - from codeclone.observability.store.writer import write_operation - - bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) - try: - with operation(name="cli.analyze", surface="cli"): - pass - finally: - shutdown() - conn = open_observability_store(observability_store_path(tmp_path)) - try: - write_operation( - conn, - OperationRecord( - operation_id="op-1", - correlation_id="corr", - surface="cli", - name="cli.analyze", - started_at_utc="2026-01-01T00:00:00Z", - duration_ms=1.0, - status="ok", - spans=(), - ), - ) - finally: - conn.close() - - code = observability_main(["trace", "--root", str(tmp_path)]) - out = capsys.readouterr().out - assert code == int(ExitCode.SUCCESS) - assert '"operation_tree"' in out - - -def test_measure_payload_handles_unserializable_values() -> None: - class _Bad: - def __str__(self) -> str: - raise TypeError("nope") - - bytes_size, tokens = measure_payload({"bad": _Bad()}) - assert bytes_size == 0 - assert tokens == 0 - - -def test_cache_integrity_read_json_document_forwards_max_bytes(tmp_path: Path) -> None: - path = tmp_path / "doc.json" - path.write_text('{"ok": true}', encoding="utf-8") - assert read_json_document(path, max_bytes=64) == {"ok": True} - - -def test_analysis_completed_sequence_helper() -> None: - assert _sequence("not-a-list") == () - assert _sequence([1, 2]) == (1, 2) - assert _sequence(42) == () - - -def test_event_core_json_fallback_on_canonical_failure( - monkeypatch: pytest.MonkeyPatch, -) -> None: - calls = {"count": 0} - - def _canonical_or_fallback(payload: object) -> str: - calls["count"] += 1 - if calls["count"] == 1: - raise TypeError("cannot serialize") - return json.dumps(payload, sort_keys=True, separators=(",", ":")) - - monkeypatch.setattr( - "codeclone.audit.writer._canonical_json", - _canonical_or_fallback, - ) - event = AuditEvent( - event_type="intent.declared", - severity="info", - repo_root_digest="digest", - agent_pid=1, - agent_label="agent", - status="active", - payload={}, - ) - payload = json.loads(_event_core_json(event)) - assert payload["truncated"] is True - assert payload["event_type"] == "intent.declared" - assert event_core_for_event(event)["event_type"] == "intent.declared" - - -def test_workspace_hook_cleanup_resolves_env_pid( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, -) -> None: - monkeypatch.setattr( - "codeclone.surfaces.mcp._workspace_intent_pid.is_agent_pid_alive", - lambda _pid: True, - ) - own_pid = os.getpid() - own = replace( - _record(intent_id="intent-own-env-001", status="active"), - agent_pid=own_pid, - agent_start_epoch=42, - ) - write_workspace_record(tmp_path, own) - monkeypatch.setenv("CODECLONE_HOOK_OWN_AGENT_PID", str(own_pid)) - monkeypatch.setenv("CODECLONE_HOOK_OWN_AGENT_START_EPOCH", "42") - - unclosed = list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) - - assert len(unclosed) == 1 - assert unclosed[0].intent_id == "intent-own-env-001" - - -def test_workspace_hook_cleanup_registry_unavailable( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, -) -> None: - def _boom(_root: Path) -> object: - raise ValueError("broken registry") - - monkeypatch.setattr( - "codeclone.workspace_intent.gate.resolve_intent_registry_config", - _boom, - ) - with pytest.raises(WorkspaceIntentRegistryUnavailable, match="broken registry"): - list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) - - -def test_workspace_hook_include_record_edges() -> None: - from codeclone.surfaces.mcp._workspace_intent_lifecycle import utc_now - - recoverable = replace( - _record(intent_id="intent-rec-001", status="active"), - agent_pid=os.getpid() + 5000, - agent_label="cursor-vscode/dead", - ) - now = utc_now() - assert ( - _include_record_in_hook_cleanup( - recoverable, - own_pid=os.getpid(), - own_start_epoch=1, - recoverable_agent_label_prefix=None, - include_foreign=False, - now=now, - ) - is False - ) - assert ( - _include_record_in_hook_cleanup( - recoverable, - own_pid=os.getpid(), - own_start_epoch=1, - recoverable_agent_label_prefix="cursor-vscode/", - include_foreign=False, - now=now, - ) - is True - ) - - -def test_hook_authorizes_foreign_active_env_values( - monkeypatch: pytest.MonkeyPatch, -) -> None: - monkeypatch.delenv(HOOK_AUTHORIZE_FOREIGN_ENV, raising=False) - assert _hook_authorizes_foreign_active() is True - monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "maybe") - assert _hook_authorizes_foreign_active() is False - monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "off") - assert _hook_authorizes_foreign_active() is False - - -def test_experience_store_private_validators() -> None: - with pytest.raises(ValueError, match="unknown experience facet kind"): - _facet_kind("not-a-facet") - with pytest.raises(ValueError, match="unknown experience status"): - _status("archived") - - -def test_trajectory_agents_aggregate_covers_failed_and_anomalies() -> None: - assert trajectory_agent_label(_trajectory(agent=False)) is None - violated = _trajectory(outcome="violated") - rows = aggregate_agent_rows( - (violated,), - anomaly_by_id={"traj-1": ()}, - ) - assert rows[0].failed_outcome_count == 1 - assert rows[0].anomaly_count == 0 - assert rows[0].intent_count == 1 - - -def test_trajectory_cli_render_populated_and_empty_paths() -> None: - printer = _CapturePrinter() - render_trajectory_status( - console=printer, - enabled=True, - count=1, - latest_run=_projection_run(legacy=3), - ) - assert any("legacy events" in line for line in printer.lines) - - printer = _CapturePrinter() - render_projection_run(console=printer, run=_projection_run(legacy=2)) - assert any("legacy audit events" in line for line in printer.lines) - - printer = _CapturePrinter() - render_trajectory_list(console=printer, items=[]) - assert printer.lines == ["No trajectories found."] - - item = TrajectoryListItem( - id="traj-1", - workflow_id="intent:a", - outcome="accepted", - quality_tier="verified", - quality_score=90, - event_count=2, - started_at_utc="2026-01-01T00:00:00Z", - finished_at_utc="2026-01-01T00:01:00Z", - summary="summary", - ) - printer = _CapturePrinter() - render_trajectory_list(console=printer, items=[item]) - assert any("traj-1" in line for line in printer.lines) - - printer = _CapturePrinter() - render_trajectory_search_results( - console=printer, - query="recover", - trajectories=[], - ) - assert any("No matching trajectories" in line for line in printer.lines) - - printer = _CapturePrinter() - render_trajectory_agents(console=printer, payload={"agents": []}) - assert any("No agent-labeled" in line for line in printer.lines) - - printer = _CapturePrinter() - render_trajectory_agents( - console=printer, - payload={ - "agent_count": 1, - "trajectory_count": 1, - "unlabeled_trajectory_count": 0, - "agents": [ - "not-a-mapping", - {"agent_label": "agent", "trajectory_count": 1}, - ], - }, - ) - assert any("agent" in line for line in printer.lines) - - printer = _CapturePrinter() - render_trajectory_anomalies( - console=printer, - payload={ - "summary": { - "trajectories_with_anomalies": 1, - "anomaly_count": 1, - "error_count": 1, - "warn_count": 0, - }, - "trajectories": [ - "skip", - { - "trajectory_id": "traj-1", - "agent_label": "agent", - "outcome": "violated", - "quality_tier": "incident", - "anomalies": [ - "skip", - { - "severity": "error", - "kind": "scope_violation", - "message": "bad scope", - }, - ], - }, - ], - }, - ) - assert any("scope_violation" in line for line in printer.lines) - - printer = _CapturePrinter() - trajectory = _trajectory() - render_trajectory_detail(console=printer, trajectory=trajectory) - joined = "\n".join(printer.lines) - assert "workflow summary" in joined - assert "labels:" not in joined - - -def test_ingest_path_resolvers_skip_missing_and_escape(tmp_path: Path) -> None: - root = tmp_path / "repo" - root.mkdir() - ingest = IngestConfig( - contract_constants_paths=("missing/contracts.py",), - document_link_paths=("../escape.md",), - mcp_tool_schema_snapshot_path="missing-tools.json", - mcp_tool_count_doc_paths=("missing-doc.md",), - ) - assert ( - resolve_contract_constants_paths( - root_path=root, - registry_paths=frozenset(), - ingest=ingest, - ) - == () - ) - assert ( - resolve_document_link_paths( - root_path=root, - registry_paths=frozenset({"docs/book/01.md"}), - ingest=ingest, - ) - == () - ) - assert resolve_mcp_tool_schema_snapshot_path(root_path=root, ingest=ingest) is None - assert resolve_mcp_tool_contradiction_sources(root_path=root, ingest=ingest) is None - - -def test_intent_registry_path_outside_repo_raises(tmp_path: Path) -> None: - from codeclone.config.intent_registry import resolve_intent_registry_db_path - - root = tmp_path / "repo" - root.mkdir() - outside = (tmp_path / "outside" / "intents.sqlite3").resolve() - with pytest.raises(IntentRegistryConfigError, match="relative to the repository"): - resolve_intent_registry_db_path( - root_path=root, - value=str(outside), - ) - - -def test_core_worker_signature_cache_handles_broken_callable() -> None: - from codeclone.core import worker as core_worker - - core_worker._supported_process_file_kwarg_names.cache_clear() - - def _broken(*_args: object, **_kwargs: object) -> object: - return None - - assert core_worker._supported_process_file_kwarg_names(_broken) is None - core_worker._supported_process_file_kwarg_names.cache_clear() - - -def test_measure_payload_estimate_failure_uses_char_fallback( - monkeypatch: pytest.MonkeyPatch, -) -> None: - def _boom(_payload: object) -> object: - raise TypeError("estimate failed") - - monkeypatch.setattr( - "codeclone.surfaces.mcp.payloads.estimate_payload", - _boom, - ) - byte_size, tokens = measure_payload({"ok": True}) - assert byte_size > 0 - assert tokens > 0 - - -def test_observability_cli_missing_store_and_file_outputs( - tmp_path: Path, - capsys: pytest.CaptureFixture[str], -) -> None: - empty_root = tmp_path / "empty" - empty_root.mkdir() - code = observability_main(["trace", "--root", str(empty_root)]) - assert code == int(ExitCode.SUCCESS) - assert "No observability store" in capsys.readouterr().out - - from codeclone.observability.models import OperationRecord - from codeclone.observability.store.schema import ( - observability_store_path, - open_observability_store, - ) - from codeclone.observability.store.writer import write_operation - - repo = tmp_path / "repo" - repo.mkdir() - conn = open_observability_store(observability_store_path(repo)) - try: - write_operation( - conn, - OperationRecord( - operation_id="op-cli", - correlation_id="op-cli", - surface="cli", - name="cli.analyze", - started_at_utc="2026-01-01T00:00:00Z", - duration_ms=1.0, - status="ok", - spans=(), - ), - ) - finally: - conn.close() - - json_path = tmp_path / "trace.json" - html_path = tmp_path / "trace.html" - code = observability_main( - [ - "trace", - "--root", - str(repo), - "--json", - str(json_path), - "--html", - str(html_path), - ] - ) - out = capsys.readouterr().out - assert code == int(ExitCode.SUCCESS) - assert json_path.is_file() - assert html_path.is_file() - assert f"Wrote {json_path}" in out - assert f"Wrote {html_path}" in out - - -def test_render_html_format_helpers_and_semantic_row() -> None: - from codeclone.observability.render_html import _bytes, _mb, _semantic_row, _tokens - from codeclone.observability.views import SpanCostView - - assert _mb(None) == "—" - assert "GB" in _mb(2048.0) - assert "MB" in _mb(512.0) - assert _bytes(None) == "—" - assert "MB" in _bytes(1024 * 1024) - assert "KB" in _bytes(2048) - assert _bytes(12).endswith(" B") - assert _tokens(None) == "—" - assert _tokens(0) == "—" - assert _tokens(1500).endswith("k") - - costly = SpanCostView( - span_id="s1", - name="memory.semantic.reindex", - surface="memory", - operation_id="op", - operation_name="memory.projection.job", - duration_ms=6000.0, - no_op=True, - reason_kind="schema_version_changed", - ) - costly_html = _semantic_row(costly) - assert "no-op · costly" in costly_html - assert "schema_version_changed" in costly_html - - noop = replace(costly, duration_ms=10.0) - assert "no-op" in _semantic_row(noop) - assert "costly" not in _semantic_row(noop) - - productive = replace(noop, no_op=False, reason_kind=None) - assert "productive" in _semantic_row(productive) - - -def test_observability_reader_epoch_ms_and_empty_correlation_filter( - tmp_path: Path, -) -> None: - from codeclone.observability.store.reader import _by_correlations, _epoch_ms - from codeclone.observability.store.schema import ( - observability_store_path, - open_observability_store, - ) - - assert _epoch_ms("") == 0.0 - assert _epoch_ms("not-a-date") == 0.0 - assert _epoch_ms("2026-01-01T00:00:00Z") > 0.0 - - conn = open_observability_store(observability_store_path(tmp_path)) - try: - assert _by_correlations(conn, []) == [] - finally: - conn.close() - - -def test_pyproject_loader_symlink_and_invalid_ingest_table(tmp_path: Path) -> None: - from codeclone.config.pyproject_loader import ( - ConfigValidationError, - _validate_nested_ingest_table, - load_pyproject_config, - open_repo_config, - ) - - broken = tmp_path / "pyproject.toml" - broken.symlink_to(tmp_path / "missing.toml") - with pytest.raises(ConfigValidationError, match="must not be a symlink"): - load_pyproject_config(tmp_path) - - real = tmp_path / "real.toml" - real.write_text("[tool.codeclone]\n", encoding="utf-8") - broken.unlink() - link = tmp_path / "pyproject.toml" - link.symlink_to(real) - with pytest.raises(ConfigValidationError, match="must not be a symlink"): - open_repo_config(tmp_path) - - with pytest.raises(ConfigValidationError, match="must be object"): - _validate_nested_ingest_table( - ingest_obj="not-a-table", - config_path=tmp_path / "pyproject.toml", - ) - - -def test_resolve_semantic_index_writer_disabled_returns_none() -> None: - from codeclone.config.memory import SemanticConfig - from codeclone.memory.semantic import resolve_semantic_index_writer - - assert resolve_semantic_index_writer(SemanticConfig(enabled=False)) is None - - -def test_semantic_retrieval_hydrate_trajectory_edges() -> None: - from codeclone.memory.retrieval.semantic import _hydrate_trajectory - from codeclone.memory.semantic.models import SemanticHit - - hit = SemanticHit(source_id="traj-1", source="trajectory", score=0.4) - - class _StoreWithoutTrajectoryApi: - pass - - assert _hydrate_trajectory(hit, _StoreWithoutTrajectoryApi(), 80) is None - - class _StoreMissingTrajectory: - def find_trajectory(self, _trajectory_id: str) -> None: - return None - - assert _hydrate_trajectory(hit, _StoreMissingTrajectory(), 80) is None - - -def test_execute_trajectory_rebuild_incremental_mode(tmp_path: Path) -> None: - from codeclone.config.memory import resolve_memory_config - from codeclone.memory.trajectory.rebuild_workflow import execute_trajectory_rebuild - - from .memory_fixtures import memory_store, seed_trajectory_audit_workflow - - with memory_store(tmp_path) as (root, project, store, _db_path): - audit_db = root / ".codeclone" / "db" / "audit.sqlite3" - seed_trajectory_audit_workflow(root=root, audit_db=audit_db) - config = resolve_memory_config(root) - full = execute_trajectory_rebuild( - root_path=root, - config=config, - store=store, - project=project, - ) - assert full["status"] == "ok" - assert full["mode"] == "full" - incremental = execute_trajectory_rebuild( - root_path=root, - config=config, - store=store, - project=project, - incremental_after_event_core_id=1, - ) - assert incremental["status"] == "ok" - assert incremental["mode"] == "incremental" - - -def test_memory_state_path_validation_errors(tmp_path: Path) -> None: - from codeclone.config.memory import _resolve_memory_state_path - - root = tmp_path / "repo" - root.mkdir() - with pytest.raises(TypeError, match="must resolve to a string path"): - _resolve_memory_state_path( - key="memory.semantic.index_path", - value=123, - root_path=root, - ) - with pytest.raises(ValueError, match="must stay under the repository root"): - _resolve_memory_state_path( - key="memory.semantic.index_path", - value="../outside.lance", - root_path=root, - ) - - -def test_hook_authorizes_foreign_active_truthy_env( - monkeypatch: pytest.MonkeyPatch, -) -> None: - monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "yes") - assert _hook_authorizes_foreign_active() is True - - -def test_hydrate_trajectory_hits_detail_levels(tmp_path: Path) -> None: - from codeclone.memory.retrieval import service as retrieval_service - from codeclone.memory.semantic.models import SemanticHit - - from .memory_fixtures import memory_store, seed_trajectory_audit_workflow - - with memory_store(tmp_path) as (root, project, store, _db_path): - audit_db = tmp_path / "audit.sqlite3" - seed_trajectory_audit_workflow(root=root, audit_db=audit_db) - trajectory = store.rebuild_trajectories_from_audit( - project=project, - root_path=root, - audit_db_path=audit_db, - ).trajectories[0] - hit = SemanticHit(source_id=trajectory.id, source="trajectory", score=0.5) - compact = retrieval_service._hydrate_trajectory_hits( - store, - project_id=project.id, - hits=[hit], - detail_level="compact", - ) - full = retrieval_service._hydrate_trajectory_hits( - store, - project_id=project.id, - hits=[hit], - detail_level="full", - ) - assert compact and full - assert compact[0]["semantic_score"] == 0.5 - assert full[0]["semantic_score"] == 0.5 - assert "steps" in full[0] - - -def test_mcp_payload_paginate_and_finding_resolution() -> None: - from codeclone.surfaces.mcp.payloads import ( - PageWindow, - paginate, - resolve_finding_id, - short_id, - ) - - window = paginate([1, 2, 3, 4], offset=1, limit=2, max_limit=10) - assert isinstance(window, PageWindow) - assert window.items == [2, 3] - assert window.next_offset == 3 - - tail = paginate([9], offset=0, limit=5, max_limit=10) - assert tail.next_offset is None - - canonical = {"finding-abcdef12": "short"} - assert ( - resolve_finding_id( - canonical_to_short=canonical, - short_to_canonical={"short": "finding-abcdef12"}, - finding_id="finding-abcdef12", - ) - == "finding-abcdef12" - ) - assert ( - resolve_finding_id( - canonical_to_short=canonical, - short_to_canonical={"short": "finding-abcdef12"}, - finding_id="short", - ) - == "finding-abcdef12" - ) - assert ( - resolve_finding_id( - canonical_to_short=canonical, - short_to_canonical={}, - finding_id="missing", - ) - is None - ) - assert short_id("finding-abcdef12", length=8) == "finding-" - - -def test_workspace_hook_cleanup_sqlite_load_error( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, -) -> None: - class _Config: - backend = "sqlite" - storage_path = Path(".codeclone/db/intents.sqlite3") - - monkeypatch.setattr( - "codeclone.workspace_intent.gate.resolve_intent_registry_config", - lambda _root: _Config(), - ) - - def _load_fail(*_args: object, **_kwargs: object) -> object: - raise OSError("cannot read sqlite") - - monkeypatch.setattr( - "codeclone.workspace_intent.gate._load_registry_records_read_only", - _load_fail, - ) - with pytest.raises(WorkspaceIntentRegistryUnavailable, match="cannot read sqlite"): - list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) - - -def test_workspace_ownership_authorizes_foreign_active( - monkeypatch: pytest.MonkeyPatch, -) -> None: - from codeclone.surfaces.mcp import _workspace_intents as workspace_intents - from codeclone.workspace_intent import gate as gate_mod - - monkeypatch.setattr(gate_mod, "_hook_authorizes_foreign_active", lambda: True) - assert ( - gate_mod._ownership_authorizes_hook( - workspace_intents.IntentOwnership.FOREIGN_ACTIVE, - liveness=workspace_intents.PidLiveness.ALIVE, - ) - is True - ) - monkeypatch.setattr(gate_mod, "_hook_authorizes_foreign_active", lambda: False) - assert ( - gate_mod._ownership_authorizes_hook( - workspace_intents.IntentOwnership.FOREIGN_ACTIVE, - liveness=workspace_intents.PidLiveness.ALIVE, - ) - is False - ) - - -def test_agent_pid_liveness_honors_monkeypatched_boolean_probe( - monkeypatch: pytest.MonkeyPatch, -) -> None: - from codeclone.surfaces.mcp import _workspace_intent_pid as pid_mod - from codeclone.surfaces.mcp._workspace_intent_lifecycle import PidLiveness - - monkeypatch.setattr(pid_mod, "is_agent_pid_alive", lambda _pid: False) - assert pid_mod.agent_pid_liveness(123) is PidLiveness.DEAD - - -def test_record_elapsed_span_noop_without_active_operation(tmp_path: Path) -> None: - from codeclone.config.observability import ObservabilityConfig - from codeclone.observability import bootstrap, record_elapsed_span, shutdown - - bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) - try: - record_elapsed_span( - "orphan-span", - started_at_utc="2026-01-01T00:00:00Z", - duration_ms=1.0, - ) - finally: - shutdown() - - -def test_staleness_anchor_drift_status_edges(tmp_path: Path) -> None: - from codeclone.memory.models import MemorySubject, generate_memory_id - from codeclone.memory.staleness import _evaluate_anchor_drift_status - - from .memory_fixtures import make_module_record, memory_store - - with memory_store(tmp_path) as (root, project, store, _db_path): - record = replace( - make_module_record(project.id, "pkg.mod"), - created_at_commit="abc123", - code_fingerprint="fp-1", - status="active", - ) - store.upsert_record(record) - subject = MemorySubject( - id=generate_memory_id(prefix="subj"), - memory_id=record.id, - subject_kind="path", - subject_key="pkg/missing.py", - relation="about", - ) - store.write_subject(subject) - assert ( - _evaluate_anchor_drift_status( - record, - anchor_subject=subject, - root_path=root, - ) - == "historical" - ) - historical = replace(record, status="historical") - assert ( - _evaluate_anchor_drift_status( - historical, - anchor_subject=subject, - root_path=root, - ) - is None - ) - stale_record = replace( - record, status="stale", stale_reason="subject_fingerprint_drift" - ) - assert ( - _evaluate_anchor_drift_status( - stale_record, - anchor_subject=subject, - root_path=root, - ) - == "historical" - ) - - -def test_instance_methods_decorator_and_base_name_fallbacks() -> None: - import ast - - import codeclone.findings.design.instance_methods as instance_methods_mod - - assert instance_methods_mod._simple_decorator_name(ast.Constant(value=1)) == "" - assert instance_methods_mod._simple_base_name(ast.Constant(value=1)) == "" - - -def test_workflow_audit_emit_and_digest_helpers( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, -) -> None: - import sys - - from codeclone.surfaces.cli import workflow as cli_workflow - - class _Args: - audit_enabled = True - - cli_workflow._emit_cli_analysis_completed_if_enabled( - args=_Args(), - root_path=tmp_path, - report_document="not-a-dict", - new_func_count=0, - new_block_count=0, - ) - cli_workflow._emit_cli_analysis_completed_if_enabled( - args=_Args(), - root_path=tmp_path, - report_document={"integrity": {"digest": {"value": ""}}}, - new_func_count=0, - new_block_count=0, - ) - - def _boom(**_kwargs: object) -> None: - raise RuntimeError("audit unavailable") - - monkeypatch.setattr( - "codeclone.audit.analysis_completed.emit_analysis_completed_from_report", - _boom, - ) - cli_workflow._emit_cli_analysis_completed_if_enabled( - args=_Args(), - root_path=tmp_path, - report_document={"integrity": {"digest": {"value": "a" * 64}}}, - new_func_count=1, - new_block_count=0, - ) - - assert cli_workflow._report_digest_from_document({}) == "" - assert ( - cli_workflow._report_digest_from_document( - {"integrity": {"digest": "not-a-mapping"}} - ) - == "" - ) - - monkeypatch.setattr(sys, "argv", ["codeclone", "observability"]) - with pytest.raises(SystemExit): - cli_workflow.main() - monkeypatch.setattr(sys, "argv", ["codeclone", "memory", "--help"]) - with pytest.raises(SystemExit): - cli_workflow.main() - - -def test_observability_profile_open_fds_degrades_gracefully( - monkeypatch: pytest.MonkeyPatch, -) -> None: - import sys - from unittest.mock import MagicMock - - from codeclone.observability.profile import build_profile_sample - - process = MagicMock() - process.memory_info.return_value = MagicMock(rss=1024 * 1024) - process.cpu_times.return_value = MagicMock(user=0.1, system=0.2) - process.num_fds.side_effect = OSError("unsupported") - process.num_threads.return_value = 3 - mock_psutil = MagicMock() - mock_psutil.Process.return_value = process - monkeypatch.setitem(sys.modules, "psutil", mock_psutil) - - sample = build_profile_sample((512 * 1024, 0.0, 0.0)) - assert sample is not None - assert sample.open_fds is None diff --git a/tests/test_instance_independent_methods.py b/tests/test_instance_independent_methods.py index d9abba18..d2308884 100644 --- a/tests/test_instance_independent_methods.py +++ b/tests/test_instance_independent_methods.py @@ -364,3 +364,12 @@ def test_occurrences_are_deterministically_ordered() -> None: ) starts = [occ.start for occ in occurrences] assert starts == sorted(starts) + + +def test_decorator_and_base_name_helpers_ignore_non_names() -> None: + import ast + + import codeclone.findings.design.instance_methods as instance_methods_mod + + assert instance_methods_mod._simple_decorator_name(ast.Constant(value=1)) == "" + assert instance_methods_mod._simple_base_name(ast.Constant(value=1)) == "" diff --git a/tests/test_lancedb_backend_mocked.py b/tests/test_lancedb_backend_mocked.py index fe9a437a..853bdc9e 100644 --- a/tests/test_lancedb_backend_mocked.py +++ b/tests/test_lancedb_backend_mocked.py @@ -264,6 +264,36 @@ def test_lancedb_backend_mocked_close_releases_available_handles( assert table.closed is True assert fake_db.closed is True + + +def test_lancedb_helper_and_schema_failure_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.semantic import lancedb_backend + + assert lancedb_backend._as_float("1.25") == 1.25 + lancedb_backend._close_if_available(None) + lancedb_backend._close_if_available(object()) + + _install_fake_lancedb(monkeypatch) + index = LanceDbSemanticIndex(path=tmp_path / "idx.lance", dimension=4) + + class _BrokenSchema: + def field(self, _name: str) -> object: + raise AttributeError("missing") + + broken_table = type("BrokenTable", (), {"schema": _BrokenSchema()})() + assert index._schema_matches(broken_table) is False + + assert index._table is not None + index._table._rows = [ # type: ignore[attr-defined] + {"id": "audit-1", "source": "audit", "_distance": "0.5"} + ] + hits = index.search([0.0, 0.0, 0.0, 0.0], k=1, source="audit") + assert hits[0].source == "audit" + assert index._table._where == "source = 'audit'" # type: ignore[attr-defined] + index._table._rows = [] # type: ignore[attr-defined] assert index.search([0.0, 0.0, 0.0, 0.0], k=1) == [] diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 7d70d5be..54618654 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -7410,6 +7410,27 @@ def test_mcp_service_record_lookup_helper_branches(tmp_path: Path) -> None: ) +def _assert_optional_summary_metric( + service: CodeCloneMCPService, + record: MCPRunRecord, + *, + key: str, +) -> dict[str, object]: + payload = service._summary_payload(record.summary, record=record) + metric_payload = cast("dict[str, object]", payload[key]) + service._runs.register(record) + assert key in service.get_production_triage(run_id=record.run_id) + empty_report_record = replace( + record, + report_document={"metrics": {"families": {}}}, + ) + assert key not in service._summary_payload( + empty_report_record.summary, + record=empty_report_record, + ) + return metric_payload + + def test_mcp_service_summary_and_gate_contract_for_coverage_join( tmp_path: Path, ) -> None: @@ -7463,8 +7484,11 @@ def test_mcp_service_summary_and_gate_contract_for_coverage_join( new_block=frozenset(), metrics_diff=None, ) - payload = service._summary_payload(record.summary, record=record) - assert cast(dict[str, object], payload["coverage_join"]) == { + assert _assert_optional_summary_metric( + service, + record, + key="coverage_join", + ) == { "status": "ok", "overall_permille": 700, "coverage_hotspots": 1, @@ -7472,14 +7496,6 @@ def test_mcp_service_summary_and_gate_contract_for_coverage_join( "hotspot_threshold_percent": 50, "source": "coverage.xml", } - empty_report_record = replace( - record, - report_document={"metrics": {"families": {}}}, - ) - assert "coverage_join" not in service._summary_payload( - empty_report_record.summary, - record=empty_report_record, - ) with pytest.raises(MCPServiceContractError, match="coverage_xml"): service._evaluate_gate_snapshot( record=record, @@ -7604,8 +7620,11 @@ def test_mcp_service_summary_payload_includes_security_surfaces( new_block=frozenset(), metrics_diff=None, ) - payload = service._summary_payload(record.summary, record=record) - assert cast(dict[str, object], payload["security_surfaces"]) == { + assert _assert_optional_summary_metric( + service, + record, + key="security_surfaces", + ) == { "items": 5, "categories": 3, "production": 4, @@ -7613,14 +7632,6 @@ def test_mcp_service_summary_payload_includes_security_surfaces( "report_only": True, "note": "report_only inventory; not a vulnerability scan", } - empty_report_record = replace( - record, - report_document={"metrics": {"families": {}}}, - ) - assert "security_surfaces" not in service._summary_payload( - empty_report_record.summary, - record=empty_report_record, - ) def test_mcp_service_short_id_and_comparison_helper_branches( @@ -8949,6 +8960,12 @@ def test_mcp_finish_controlled_change_external_health_and_memory_hook( "finish_propose_memory", lambda **_: {"memory_candidates": [{"id": "mem-1"}]}, ) + monkeypatch.setattr(docs_service, "_audit_emit", lambda **_: 42) + monkeypatch.setattr( + docs_service, + "maybe_auto_enqueue_projection_rebuild", + lambda **_: {"status": "enqueued"}, + ) finished = docs_service.finish_controlled_change( intent_id=docs_intent, changed_files=["README.md"], @@ -8967,6 +8984,12 @@ def test_mcp_finish_controlled_change_external_health_and_memory_hook( assert cast("list[dict[str, object]]", finished["memory_candidates"])[0]["id"] == ( "mem-1" ) + assert finished["projection_rebuild"] == {"status": "enqueued"} + patch_trail = cast("dict[str, object]", finished["patch_trail"]) + assert ( + cast("dict[str, object]", patch_trail["evidence"])["patch_trail_audit_sequence"] + == 42 + ) def test_mcp_finish_controlled_change_propose_memory_empty_hook( @@ -9016,10 +9039,6 @@ def test_mcp_finish_controlled_change_propose_memory_empty_hook( def test_mcp_intent_helper_edges_and_renew_paths(tmp_path: Path) -> None: - from codeclone.surfaces.mcp._workspace_hygiene import ( - ForeignDirtyOverlap, - WorkspaceHygieneResult, - ) from codeclone.surfaces.mcp.messages import intent as intent_msgs service = CodeCloneMCPService(history_limit=2) @@ -9109,6 +9128,179 @@ def test_mcp_intent_helper_edges_and_renew_paths(tmp_path: Path) -> None: == intent_msgs.RECOVERY_FOREIGN_STALE ) + +def test_mcp_intent_promotion_contract_errors_and_evicted_run( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=4) + _register_docs_patch_run(service, tmp_path) + + with pytest.raises(MCPServiceContractError, match="requires intent_id"): + service.manage_change_intent(action="promote") + with pytest.raises(MCPServiceContractError, match="Unknown change intent"): + service.manage_change_intent(action="promote", intent_id="missing") + + active = service.manage_change_intent( + action="declare", + run_id="workflow", + scope={"allowed_files": ["README.md"]}, + intent="active docs edit", + ) + with pytest.raises(MCPServiceContractError, match="not 'queued'"): + service.manage_change_intent( + action="promote", + intent_id=str(active["intent_id"]), + ) + + queued_root = tmp_path / "queued" + queued_root.mkdir() + queued_service, _foreign_id = _two_agent_service(queued_root, monkeypatch) + queued_id = _declare_queued_pkg_a(queued_service) + queued_service._runs.clear() + evicted = queued_service.manage_change_intent( + action="promote", + intent_id=queued_id, + ) + assert evicted["status"] == "unverified" + assert evicted["reason"] == "before_run_evicted" + + +def test_mcp_intent_queue_context_and_boundary_helper_edges() -> None: + service = CodeCloneMCPService(history_limit=2) + empty_scope = mcp_intent_mod.IntentScope( + allowed_files=(), + allowed_related=(), + forbidden=(), + ) + assert ( + service._queued_context_from_workspace( + scope=empty_scope, + workspace_existing=(), + ) + == [] + ) + + record = mcp_workspace_intents_mod.WorkspaceIntentRecord( + intent_id="q-other", + agent_pid=9999, + agent_start_epoch=1, + agent_label="other", + run_id="run-1", + declared_at_utc="2026-01-01T00:00:00Z", + expires_at_utc="2026-01-01T01:00:00Z", + ttl_seconds=3600, + report_digest="digest", + status="queued", + intent="queued", + scope={"allowed_files": ["pkg/other.py"]}, + scope_digest="digest", + blast_radius_summary={}, + lease_renewed_at_utc="2026-01-01T00:00:00Z", + lease_seconds=60, + ) + scope = mcp_intent_mod.IntentScope( + allowed_files=("pkg/a.py",), + allowed_related=(), + forbidden=(), + ) + assert ( + service._queued_context_from_workspace( + scope=scope, + workspace_existing=(record,), + ) + == [] + ) + assert mcp_session_intent_mod._blast_boundary_paths( + ({"path": "pkg/a.py"}, "pkg\\b.py", {"path": ""}), + limit=5, + ) == ("pkg/a.py", "pkg/b.py") + + +def test_mcp_redeclaring_same_run_replaces_previous_intent(tmp_path: Path) -> None: + service = CodeCloneMCPService(history_limit=4) + _register_docs_patch_run(service, tmp_path) + first = service.manage_change_intent( + action="declare", + run_id="workflow", + scope={"allowed_files": ["README.md"]}, + intent="first", + ) + second = service.manage_change_intent( + action="declare", + run_id="workflow", + scope={"allowed_files": ["docs/index.md"]}, + intent="second", + ) + assert first["intent_id"] != second["intent_id"] + assert ( + service.manage_change_intent(action="get")["intent_id"] == second["intent_id"] + ) + + +def test_mcp_run_store_pin_and_pruning_edges(tmp_path: Path) -> None: + store = mcp_shared_mod.CodeCloneMCPRunStore(history_limit=4) + with pytest.raises(MCPRunNotFoundError): + store.pin("missing") + + records = [ + _patch_contract_run_record( + tmp_path, + run_id=f"run-{index}", + digest=f"digest-{index}", + include_regression=False, + complexity=1, + ) + for index in range(3) + ] + for record in records: + store.register(record) + store.pin(records[0].run_id) + store._history_limit = 1 + store._latest_run_id = records[1].run_id + store._prune_unpinned_locked() + assert records[0].run_id in {item.run_id for item in store.records()} + assert records[1].run_id not in {item.run_id for item in store.records()} + + +def test_mcp_memory_projection_management_actions( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + import codeclone.memory.jobs as jobs + import codeclone.memory.trajectory.rebuild_workflow as rebuild_workflow + + service = CodeCloneMCPService(history_limit=2) + monkeypatch.setattr( + rebuild_workflow, + "execute_trajectory_rebuild", + lambda **_kwargs: {"status": "trajectory-ok"}, + ) + monkeypatch.setattr( + jobs, + "execute_run_projection_jobs_once", + lambda **_kwargs: {"status": "worker-ok"}, + ) + + rebuilt = service.manage_engineering_memory( + root=str(tmp_path), + action="rebuild_trajectories", + ) + worker = service.manage_engineering_memory( + root=str(tmp_path), + action="run_projection_jobs_once", + ) + assert rebuilt["status"] == "trajectory-ok" + assert worker["status"] == "worker-ok" + + +def test_mcp_intent_renew_and_workflow_helper_edges(tmp_path: Path) -> None: + from codeclone.surfaces.mcp._workspace_hygiene import ( + ForeignDirtyOverlap, + WorkspaceHygieneResult, + ) + + service = CodeCloneMCPService(history_limit=2) run_record = _blast_radius_run_record(tmp_path) service._runs.register(run_record) declared = service.manage_change_intent( @@ -9671,3 +9863,122 @@ def test_query_platform_observability_wires_dev_envelope(tmp_path: Path) -> None # No store under a fresh root -> inert envelope, never an error. assert out["status"] in {"disabled", "no_store"} assert out["rows"] == [] + + +def test_measure_payload_handles_unserializable_values() -> None: + from codeclone.surfaces.mcp.payloads import measure_payload + + class _Bad: + def __str__(self) -> str: + raise TypeError("nope") + + bytes_size, tokens = measure_payload({"bad": _Bad()}) + assert bytes_size == 0 + assert tokens == 0 + + +def test_measure_payload_estimate_failure_uses_char_fallback( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.mcp.payloads import measure_payload + + def _boom(_payload: object) -> object: + raise TypeError("estimate failed") + + monkeypatch.setattr( + "codeclone.surfaces.mcp.payloads.estimate_payload", + _boom, + ) + byte_size, tokens = measure_payload({"ok": True}) + assert byte_size > 0 + assert tokens > 0 + + +def test_mcp_payload_paginate_and_finding_resolution() -> None: + from codeclone.surfaces.mcp.payloads import ( + PageWindow, + paginate, + resolve_finding_id, + short_id, + ) + + window = paginate([1, 2, 3, 4], offset=1, limit=2, max_limit=10) + assert isinstance(window, PageWindow) + assert window.items == [2, 3] + assert window.next_offset == 3 + + tail = paginate([9], offset=0, limit=5, max_limit=10) + assert tail.next_offset is None + + canonical = {"finding-abcdef12": "short"} + assert ( + resolve_finding_id( + canonical_to_short=canonical, + short_to_canonical={"short": "finding-abcdef12"}, + finding_id="finding-abcdef12", + ) + == "finding-abcdef12" + ) + assert ( + resolve_finding_id( + canonical_to_short=canonical, + short_to_canonical={"short": "finding-abcdef12"}, + finding_id="short", + ) + == "finding-abcdef12" + ) + assert ( + resolve_finding_id( + canonical_to_short=canonical, + short_to_canonical={}, + finding_id="missing", + ) + is None + ) + assert short_id("finding-abcdef12", length=8) == "finding-" + + +def test_mcp_state_optional_payload_and_pruning_edges(tmp_path: Path) -> None: + service = CodeCloneMCPService(history_limit=2) + with pytest.raises(MCPServiceContractError, match="Numeric analysis settings"): + service._build_args( + root_path=tmp_path, + request=MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + coverage_min=101, + ), + ) + + summary = _dummy_run_record(tmp_path, "summary-run").summary + payload = service._summary_payload(summary) + assert payload["run_id"] == "summary-" + assert "workspace_hygiene" not in payload + + stale = _dummy_run_record(tmp_path, "stale-run") + service._runs.register(stale) + declared = service.manage_change_intent( + action="declare", + run_id="stale-ru", + scope={"allowed_files": ["README.md"]}, + intent="stale state", + ) + intent_id = str(declared["intent_id"]) + service._review_state[stale.run_id] = OrderedDict([("finding", None)]) + service._last_gate_results[stale.run_id] = {"status": "pass"} + service._spread_max_cache[stale.run_id] = 1 + from codeclone.analysis.blast_radius import BlastRadiusResult + + service._blast_radius_cache[(stale.run_id, ("README.md",), "direct")] = cast( + "BlastRadiusResult", + {}, + ) + + service._runs.clear() + service._prune_session_state() + + assert stale.run_id not in service._review_state + assert stale.run_id not in service._last_gate_results + assert stale.run_id not in service._spread_max_cache + assert service._blast_radius_cache == {} + assert intent_id not in service._active_intents diff --git a/tests/test_memory_cli_branch_coverage.py b/tests/test_memory_cli_branch_coverage.py index a2a6fa30..eeb7e1da 100644 --- a/tests/test_memory_cli_branch_coverage.py +++ b/tests/test_memory_cli_branch_coverage.py @@ -6,11 +6,14 @@ from __future__ import annotations +import argparse from pathlib import Path from types import SimpleNamespace +from typing import cast import pytest +from codeclone.config.memory import MemoryConfig from codeclone.contracts import ExitCode from codeclone.memory.ingest import InitReport from codeclone.surfaces.cli.memory import _CLI_GOVERNANCE_BREAK_GLASS_FLAG, memory_main @@ -18,6 +21,14 @@ from .memory_fixtures import cli_memory_repo +class _MemoryCliConsole: + def __init__(self) -> None: + self.lines: list[str] = [] + + def print(self, *objects: object, **_kwargs: object) -> None: + self.lines.append(" ".join(str(value) for value in objects)) + + @pytest.mark.parametrize( "rejected_cache_reason, source, expected_substring", [ @@ -340,3 +351,111 @@ def print(self, message: str) -> None: code = memory_main(argv) assert code == int(ExitCode.CONTRACT_ERROR) assert any("database not found" in line for line in printed) + + +def test_memory_cli_trajectory_and_job_fallback_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.cli import memory + + console = _MemoryCliConsole() + closed: list[bool] = [] + monkeypatch.setattr( + memory, + "_open_store", + lambda _root: ( + SimpleNamespace(close=lambda: closed.append(True)), + SimpleNamespace(trajectories_enabled=False), + SimpleNamespace(id="project"), + ), + ) + assert memory._run_trajectory_rebuild( + console=console, + root_path=tmp_path, + ) == int(ExitCode.CONTRACT_ERROR) + assert closed == [True] + + monkeypatch.setattr( + memory, + "query_engineering_memory", + lambda *_args, **_kwargs: {"payload": {}}, + ) + monkeypatch.setattr( + memory, + "_open_store", + lambda _root: ( + SimpleNamespace(close=lambda: None), + SimpleNamespace( + trajectories_enabled=True, + backend="sqlite", + db_path=tmp_path / "memory.sqlite3", + ), + SimpleNamespace(id="project"), + ), + ) + args = argparse.Namespace(limit=5, json=False) + assert memory._run_trajectory_dashboard( + console=console, + root_path=tmp_path, + args=args, + ) == int(ExitCode.SUCCESS) + + monkeypatch.setattr( + memory, + "query_engineering_memory", + lambda *_args, **_kwargs: { + "payload": { + "status": {"trajectory_count": 0, "latest_projection": None}, + } + }, + ) + assert memory._run_trajectory_dashboard( + console=console, + root_path=tmp_path, + args=args, + ) == int(ExitCode.SUCCESS) + + assert memory._run_jobs_json( + console=console, + root_path=tmp_path, + action=lambda: {"status": "failed"}, + ) == int(ExitCode.CONTRACT_ERROR) + + monkeypatch.setattr( + memory, + "execute_projection_rebuild_status", + lambda **_kwargs: {"jobs": ["invalid-row"]}, + ) + assert memory._run_jobs_list( + console=console, + root_path=tmp_path, + args=argparse.Namespace(limit=5, json=False), + ) == int(ExitCode.SUCCESS) + + +def test_memory_cli_semantic_text_without_subject_path() -> None: + from codeclone.memory.embedding import DeterministicHashEmbeddingProvider + from codeclone.memory.semantic.models import SemanticSearchResult + from codeclone.surfaces.cli import memory + + console = _MemoryCliConsole() + result = SemanticSearchResult( + source="memory", + source_id="mem-1", + score=0.5, + kind="module_role", + preview="Module role", + ) + code = memory._render_semantic_text( + console=console, + query="module", + config=cast( + "MemoryConfig", + SimpleNamespace(semantic=SimpleNamespace(embedding_provider="diagnostic")), + ), + provider=DeterministicHashEmbeddingProvider(dimension=8), + results=[result], + ) + assert code == int(ExitCode.SUCCESS) + assert not any("subject:" in line for line in console.lines) diff --git a/tests/test_memory_config_resolve_edges.py b/tests/test_memory_config_resolve_edges.py index 0ceddc54..f5f1c690 100644 --- a/tests/test_memory_config_resolve_edges.py +++ b/tests/test_memory_config_resolve_edges.py @@ -61,3 +61,38 @@ def test_resolve_memory_config_rejects_non_string_db_path( } }, ) + + +def test_intent_registry_path_must_stay_under_repo(tmp_path: Path) -> None: + from codeclone.config.intent_registry import ( + IntentRegistryConfigError, + resolve_intent_registry_db_path, + ) + + root = tmp_path / "repo" + root.mkdir() + outside = (tmp_path / "outside" / "intents.sqlite3").resolve() + with pytest.raises(IntentRegistryConfigError, match="relative to the repository"): + resolve_intent_registry_db_path( + root_path=root, + value=str(outside), + ) + + +def test_memory_state_path_validation_errors(tmp_path: Path) -> None: + from codeclone.config.memory import _resolve_memory_state_path + + root = tmp_path / "repo" + root.mkdir() + with pytest.raises(TypeError, match="must resolve to a string path"): + _resolve_memory_state_path( + key="memory.semantic.index_path", + value=123, + root_path=root, + ) + with pytest.raises(ValueError, match="must stay under the repository root"): + _resolve_memory_state_path( + key="memory.semantic.index_path", + value="../outside.lance", + root_path=root, + ) diff --git a/tests/test_memory_experience_store.py b/tests/test_memory_experience_store.py index c7fb6ab6..03c2a4e9 100644 --- a/tests/test_memory_experience_store.py +++ b/tests/test_memory_experience_store.py @@ -172,3 +172,12 @@ def test_empty_replace_clears_project(conn: sqlite3.Connection) -> None: ) replace_experiences(conn, project_id=_PROJECT_ID, experiences=[]) assert count_experiences(conn, project_id=_PROJECT_ID) == 0 + + +def test_private_validators_reject_unknown_values() -> None: + from codeclone.memory.experience.store import _facet_kind, _status + + with pytest.raises(ValueError, match="unknown experience facet kind"): + _facet_kind("not-a-facet") + with pytest.raises(ValueError, match="unknown experience status"): + _status("archived") diff --git a/tests/test_memory_ingest_paths.py b/tests/test_memory_ingest_paths.py index 7fa9d999..aee453f1 100644 --- a/tests/test_memory_ingest_paths.py +++ b/tests/test_memory_ingest_paths.py @@ -122,3 +122,40 @@ def test_resolve_memory_config_rejects_unknown_ingest_key(tmp_path: Path) -> Non ) with pytest.raises(ValueError, match=r"Invalid tool\.codeclone\.memory\.ingest"): resolve_memory_config(tmp_path) + + +def test_resolvers_skip_missing_and_escaping_paths(tmp_path: Path) -> None: + from codeclone.config.memory import IngestConfig + from codeclone.memory.ingest.paths import ( + resolve_contract_constants_paths, + resolve_document_link_paths, + resolve_mcp_tool_contradiction_sources, + resolve_mcp_tool_schema_snapshot_path, + ) + + root = tmp_path / "repo" + root.mkdir() + ingest = IngestConfig( + contract_constants_paths=("missing/contracts.py",), + document_link_paths=("../escape.md",), + mcp_tool_schema_snapshot_path="missing-tools.json", + mcp_tool_count_doc_paths=("missing-doc.md",), + ) + assert ( + resolve_contract_constants_paths( + root_path=root, + registry_paths=frozenset(), + ingest=ingest, + ) + == () + ) + assert ( + resolve_document_link_paths( + root_path=root, + registry_paths=frozenset({"docs/book/01.md"}), + ingest=ingest, + ) + == () + ) + assert resolve_mcp_tool_schema_snapshot_path(root_path=root, ingest=ingest) is None + assert resolve_mcp_tool_contradiction_sources(root_path=root, ingest=ingest) is None diff --git a/tests/test_memory_jobs_coverage.py b/tests/test_memory_jobs_coverage.py index b5019b8d..43fadf2d 100644 --- a/tests/test_memory_jobs_coverage.py +++ b/tests/test_memory_jobs_coverage.py @@ -20,6 +20,7 @@ from codeclone.audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path from codeclone.config.memory import resolve_memory_config from codeclone.memory.exceptions import MemoryContractError +from codeclone.memory.jobs.models import ProjectionJobRecord from codeclone.memory.jobs.spawn import ( run_projection_jobs_worker_sync, spawn_projection_jobs_worker, @@ -495,3 +496,183 @@ def test_execute_projection_rebuild_status_requires_existing_db(tmp_path: Path) root.mkdir() with pytest.raises(MemoryContractError, match="database not found"): execute_projection_rebuild_status(root_path=root) + + +def test_worker_reason_classification_and_bootstrap_short_circuits( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.jobs import worker + from codeclone.memory.trajectory.models import TRAJECTORY_PROJECTION_VERSION + + conn = sqlite3.connect(":memory:") + try: + assert ( + worker._trajectory_reason_kind( + conn, + project_id="p", + watermark=1, + ) + == "content_changed" + ) + + monkeypatch.setattr( + "codeclone.memory.jobs.staleness.last_applied_stimulus", + lambda _conn, *, project_id: None, + ) + assert ( + worker._trajectory_reason_kind( + conn, + project_id="p", + watermark=None, + ) + == "first_index" + ) + + monkeypatch.setattr( + "codeclone.memory.jobs.staleness.last_applied_stimulus", + lambda _conn, *, project_id: { + "trajectory_projection_version": f"{TRAJECTORY_PROJECTION_VERSION}-old" + }, + ) + assert ( + worker._trajectory_reason_kind( + conn, + project_id="p", + watermark=None, + ) + == "schema_version_changed" + ) + finally: + conn.close() + + monkeypatch.setattr(worker, "is_observability_enabled", lambda: False) + worker._emit_worker_bootstrap_span() + monkeypatch.setattr(worker, "is_observability_enabled", lambda: True) + monkeypatch.setattr(worker, "worker_bootstrap_sample", lambda: None) + worker._emit_worker_bootstrap_span() + + +def test_workflow_auto_enqueue_and_job_payload_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.jobs import workflow + + monkeypatch.setattr(workflow, "is_ci_environment", lambda: True) + assert workflow.maybe_auto_enqueue_projection_rebuild(root_path=tmp_path) is None + + monkeypatch.setattr(workflow, "is_ci_environment", lambda: False) + config = replace( + resolve_memory_config(tmp_path), + projection_rebuild_policy="enqueue_when_stale", + ) + monkeypatch.setattr(workflow, "resolve_memory_config", lambda _root: config) + monkeypatch.setattr( + workflow, + "execute_enqueue_projection_rebuild", + lambda **_kwargs: {"status": "skipped"}, + ) + assert workflow.maybe_auto_enqueue_projection_rebuild(root_path=tmp_path) is None + + record = ProjectionJobRecord( + id="job-1", + project_id="project", + job_kind="projection_bundle", + status="pending", + trigger="cli", + requested_at_utc="2026-01-01T00:00:00Z", + started_at_utc=None, + finished_at_utc=None, + claimed_by=None, + attempt=0, + stimulus_json="{}", + result_json=None, + error_message=None, + ) + payload = workflow._job_payload(record) + assert payload is not None + assert payload["id"] == "job-1" + + +def test_execute_worker_reuses_existing_observability_runtime( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.config.observability import ObservabilityConfig + from codeclone.memory.jobs import workflow + from codeclone.observability import bootstrap, shutdown + + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): + bootstrap(ObservabilityConfig(enabled=True), root=root) + shutdown_mock = MagicMock() + monkeypatch.setattr(workflow, "shutdown", shutdown_mock) + payload = workflow.execute_run_projection_jobs_once(root_path=root) + assert payload["status"] == "nothing_to_do" + shutdown_mock.assert_not_called() + shutdown() + + +def test_store_reclaims_invalid_timestamp_and_blocks_parallel_claim( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.jobs import store as job_store + + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, _store): + config = resolve_memory_config(root) + conn = open_memory_db(resolve_memory_db_path(root, config)) + try: + conn.execute( + "INSERT INTO memory_projection_jobs(" + "id, project_id, job_kind, status, trigger, requested_at_utc, " + "started_at_utc, claimed_by, attempt, stimulus_json" + ") VALUES (?, ?, 'projection_bundle', 'running', 'cli', ?, ?, ?, 1, ?)", + ( + "job-invalid-time", + project.id, + "2026-01-01T00:00:00Z", + "not-a-timestamp", + worker_claim_token(), + "{}", + ), + ) + conn.commit() + monkeypatch.setattr(job_store, "_pid_alive", lambda _token: True) + job_store._reclaim_stale_running_jobs( + conn, + project_id=project.id, + running_timeout_seconds=60, + ) + status = conn.execute( + "SELECT status FROM memory_projection_jobs WHERE id=?", + ("job-invalid-time",), + ).fetchone() + assert status is not None + assert status[0] == "failed" + + conn.execute( + "INSERT INTO memory_projection_jobs(" + "id, project_id, job_kind, status, trigger, requested_at_utc, " + "started_at_utc, claimed_by, attempt, stimulus_json" + ") VALUES (?, ?, 'projection_bundle', 'running', 'cli', ?, ?, ?, 1, ?)", + ( + "job-live", + project.id, + "2026-01-01T00:00:00Z", + "2999-01-01T00:00:00Z", + worker_claim_token(), + "{}", + ), + ) + conn.commit() + assert ( + claim_next_projection_job( + conn, + project_id=project.id, + claimed_by=worker_claim_token(), + running_timeout_seconds=60, + ) + is None + ) + finally: + conn.close() diff --git a/tests/test_memory_retrieval_semantic_coverage.py b/tests/test_memory_retrieval_semantic_coverage.py index 52de7913..672e7970 100644 --- a/tests/test_memory_retrieval_semantic_coverage.py +++ b/tests/test_memory_retrieval_semantic_coverage.py @@ -10,6 +10,8 @@ from dataclasses import replace from pathlib import Path +import pytest + from codeclone.memory.embedding import DeterministicHashEmbeddingProvider from codeclone.memory.models import MemoryRecord, MemorySubject from codeclone.memory.retrieval.semantic import semantic_search @@ -125,3 +127,125 @@ def test_semantic_search_skips_unknown_trajectory_source_without_store() -> None preview_chars=40, ) assert results == [] + + +def test_resolve_semantic_index_writer_returns_none_when_disabled() -> None: + from codeclone.config.memory import SemanticConfig + from codeclone.memory.semantic import resolve_semantic_index_writer + + assert resolve_semantic_index_writer(SemanticConfig(enabled=False)) is None + + +def test_hydrate_trajectory_skips_store_without_api_or_missing_record() -> None: + from codeclone.memory.retrieval.semantic import _hydrate_trajectory + + hit = SemanticHit(source_id="traj-1", source="trajectory", score=0.4) + + class _StoreWithoutTrajectoryApi: + pass + + assert _hydrate_trajectory(hit, _StoreWithoutTrajectoryApi(), 80) is None + + class _StoreMissingTrajectory: + def find_trajectory(self, _trajectory_id: str) -> None: + return None + + assert _hydrate_trajectory(hit, _StoreMissingTrajectory(), 80) is None + + +def test_hydrate_trajectory_hits_supports_compact_and_full_details( + tmp_path: Path, +) -> None: + from codeclone.memory.retrieval import service as retrieval_service + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + trajectory = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + hit = SemanticHit(source_id=trajectory.id, source="trajectory", score=0.5) + compact = retrieval_service._hydrate_trajectory_hits( + store, + project_id=project.id, + hits=[hit], + detail_level="compact", + ) + full = retrieval_service._hydrate_trajectory_hits( + store, + project_id=project.id, + hits=[hit], + detail_level="full", + ) + + assert compact and full + assert compact[0]["semantic_score"] == 0.5 + assert full[0]["semantic_score"] == 0.5 + assert "steps" in full[0] + + +def test_audit_event_row_and_primary_path_failure_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + import sqlite3 + + from codeclone.memory.retrieval import semantic + + db_path = tmp_path / "audit.sqlite3" + db_path.touch() + + monkeypatch.setattr( + semantic, + "open_audit_db_readonly", + lambda _path: (_ for _ in ()).throw(sqlite3.OperationalError("open failed")), + ) + assert semantic.audit_event_row(db_path, "event") is None + + class _BrokenConnection: + def execute(self, _sql: str, _params: object) -> object: + raise sqlite3.OperationalError("query failed") + + def close(self) -> None: + pass + + monkeypatch.setattr( + semantic, + "open_audit_db_readonly", + lambda _path: _BrokenConnection(), + ) + assert semantic.audit_event_row(db_path, "event") is None + + assert semantic._primary_path([]) is None + trajectory = Trajectory( + id="traj-empty", + project_id="proj", + repo_root_digest="digest", + workflow_id="run:one", + intent_id=None, + primary_run_id=None, + first_run_id=None, + last_run_id=None, + report_digest=None, + outcome="partial", + quality_tier="partial", + quality_score=0, + labels=(), + summary="summary", + trajectory_digest="a" * 64, + source_event_stream_digest="b" * 64, + projection_version="trajectory-v2", + event_count=0, + step_count=0, + incident_count=0, + started_at_utc="2026-01-01T00:00:00Z", + finished_at_utc="2026-01-01T00:00:00Z", + projected_at_utc="2026-01-01T00:00:00Z", + updated_at_utc="2026-01-01T00:00:00Z", + steps=(), + subjects=(), + evidence=(), + ) + assert semantic._primary_trajectory_path(trajectory) is None diff --git a/tests/test_memory_retrieval_service_coverage.py b/tests/test_memory_retrieval_service_coverage.py index 713a7749..4ebeed97 100644 --- a/tests/test_memory_retrieval_service_coverage.py +++ b/tests/test_memory_retrieval_service_coverage.py @@ -8,13 +8,16 @@ from dataclasses import replace from types import SimpleNamespace +from typing import cast import pytest from codeclone.memory.exceptions import MemoryContractError +from codeclone.memory.experience.models import Experience from codeclone.memory.models import MemoryEvidence, MemoryRecord, MemorySubject from codeclone.memory.retrieval import service as retrieval_service from codeclone.memory.retrieval.ranking import RankingContext +from codeclone.memory.sqlite_store import SqliteEngineeringMemoryStore from codeclone.report.meta import current_report_timestamp_utc @@ -258,3 +261,91 @@ def test_compact_record_subjects_are_bounded_and_scope_relevant() -> None: assert isinstance(full_subjects, list) assert len(full_subjects) == 11 assert {"subject_count", "subjects_truncated"}.isdisjoint(full) + + +def test_visibility_experience_and_subject_priority_edges() -> None: + assert ( + retrieval_service._record_visible( + _record(status="historical"), + include_stale=False, + include_drafts=False, + ) + is True + ) + + compact = retrieval_service._experience_detail_payload( + cast( + "Experience", + SimpleNamespace(evidence=[SimpleNamespace(trajectory_id="traj-1")]), + ), + detail_level="compact", + statement_length=20, + statement="short", + agent_facets=[{"agent_family": "codex"}, {"agent_family": "claude"}], + ) + assert compact["dominant_agent_facet"] == {"agent_family": "codex"} + assert compact["statement_truncated"] is True + assert compact["multi_agent"] is True + + subject = MemorySubject( + id="subject", + memory_id="mem-1", + subject_kind="symbol", + subject_key="pkg.mod.run", + relation="about", + ) + symbol_context = RankingContext.from_scope( + scope_paths=(), + symbols=("pkg.mod.run",), + blast_dependents=(), + ) + blast_context = RankingContext.from_scope( + scope_paths=(), + symbols=(), + blast_dependents=("pkg/mod.py",), + ) + assert retrieval_service._memory_subject_priority( + subject, + context=symbol_context, + )[:2] == (0, -1.0) + assert retrieval_service._memory_subject_priority( + replace(subject, subject_kind="path", subject_key="pkg/mod.py"), + context=blast_context, + )[:2] == (2, -0.7) + + +def test_record_relations_filters_external_endpoints_and_trajectory_not_found() -> None: + from codeclone.memory.models import MemoryLink + + now = current_report_timestamp_utc() + links = [ + MemoryLink( + id="link-1", + project_id="proj", + from_memory_id="mem-1", + to_memory_id="external", + relation="contradicts", + created_by="test", + created_at_utc=now, + ) + ] + store = SimpleNamespace( + list_links_for_records=lambda **_kwargs: links, + find_trajectory=lambda _trajectory_id: None, + ) + relations = retrieval_service._record_relations( + cast("SqliteEngineeringMemoryStore", store), + project_id="proj", + record_ids=("mem-1",), + ) + assert relations == {"mem-1": {"contradicted_by": ["external"]}} + assert retrieval_service._handle_trajectory_get_mode( + cast("SqliteEngineeringMemoryStore", store), + mode="trajectory_get", + project_id="proj", + record_id="missing", + ) == { + "mode": "trajectory_get", + "status": "not_found", + "payload": {"trajectory_id": "missing"}, + } diff --git a/tests/test_memory_staleness.py b/tests/test_memory_staleness.py index 02416f1c..753cada1 100644 --- a/tests/test_memory_staleness.py +++ b/tests/test_memory_staleness.py @@ -8,8 +8,12 @@ from dataclasses import replace from pathlib import Path +from typing import cast + +import pytest from codeclone.memory.models import MemorySubject, RecordBatch, generate_memory_id +from codeclone.memory.sqlite_store import SqliteEngineeringMemoryStore from codeclone.memory.staleness import ( apply_refresh_staleness, apply_scope_staleness, @@ -144,3 +148,153 @@ def test_inventory_paths_from_report_normalizes_and_skips_blanks() -> None: } ) assert paths == frozenset({"pkg/a.py", "pkg/b.py"}) + + +def test_anchor_drift_status_handles_missing_path_and_existing_stale_state( + tmp_path: Path, +) -> None: + from dataclasses import replace + + from codeclone.memory.models import MemorySubject, generate_memory_id + from codeclone.memory.staleness import _evaluate_anchor_drift_status + + from .memory_fixtures import make_module_record, memory_store + + with memory_store(tmp_path) as (root, project, store, _db_path): + record = replace( + make_module_record(project.id, "pkg.mod"), + created_at_commit="abc123", + code_fingerprint="fp-1", + status="active", + ) + store.upsert_record(record) + subject = MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record.id, + subject_kind="path", + subject_key="pkg/missing.py", + relation="about", + ) + store.write_subject(subject) + assert ( + _evaluate_anchor_drift_status( + record, + anchor_subject=subject, + root_path=root, + ) + == "historical" + ) + historical = replace(record, status="historical") + assert ( + _evaluate_anchor_drift_status( + historical, + anchor_subject=subject, + root_path=root, + ) + is None + ) + stale_record = replace( + record, status="stale", stale_reason="subject_fingerprint_drift" + ) + assert ( + _evaluate_anchor_drift_status( + stale_record, + anchor_subject=subject, + root_path=root, + ) + == "historical" + ) + + +def test_staleness_internal_noop_and_commit_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from types import SimpleNamespace + + from codeclone.memory import staleness + from codeclone.memory.models import MemoryEvidence + + with memory_store(tmp_path) as (root, project, store, _db_path): + record = make_module_record(project.id, "pkg.mod") + subject = MemorySubject( + id="subject", + memory_id=record.id, + subject_kind="path", + subject_key="pkg/mod.py", + relation="about", + ) + + monkeypatch.setattr( + staleness, + "_evaluate_anchor_drift_status", + lambda *_args, **_kwargs: record.status, + ) + outcome = staleness._apply_anchor_drift_for_record( + store, + record, + anchor_subject=subject, + root_path=root, + ) + assert outcome.handled is True + + evidence = MemoryEvidence( + id="evidence", + memory_id=record.id, + evidence_kind="report", + ref="report", + locator=None, + quote=None, + digest=None, + created_at_utc=record.created_at_utc, + ) + assert ( + staleness._evidence_stale_reasons( + record, + (evidence,), + {(record.identity_key, "report", "report"): "new"}, + ) + == [] + ) + historical = replace(record, status="historical") + assert ( + staleness._refresh_stale_primary_reason( + cast("SqliteEngineeringMemoryStore", SimpleNamespace()), + historical, + batch_identity_keys=frozenset(), + batch_by_identity={}, + batch_evidence={}, + report_digest=None, + ) + is None + ) + assert ( + staleness._refresh_staleness_for_record( + cast("SqliteEngineeringMemoryStore", SimpleNamespace()), + replace(record, status="draft"), + resolved_root=root, + batch_identity_keys=frozenset(), + batch_by_identity={}, + batch_evidence={}, + report_digest=None, + ) + == staleness._RefreshStalenessDelta() + ) + + commits: list[bool] = [] + fake_store = SimpleNamespace( + list_records_for_project=lambda *_args, **_kwargs: ( + replace(record, status="stale"), + record, + ), + list_subjects_for_memory=lambda _record_id: (), + commit=lambda: commits.append(True), + ) + result = staleness.apply_scope_staleness( + cast("SqliteEngineeringMemoryStore", fake_store), + project_id=project.id, + changed_paths=("pkg/mod.py",), + commit=True, + ) + assert result.records_marked_stale == 0 + assert commits == [True] diff --git a/tests/test_memory_trajectory_anomalies.py b/tests/test_memory_trajectory_anomalies.py index ca195084..fccb4d08 100644 --- a/tests/test_memory_trajectory_anomalies.py +++ b/tests/test_memory_trajectory_anomalies.py @@ -100,3 +100,86 @@ def test_query_engineering_memory_trajectory_dashboard_mode(tmp_path: Path) -> N assert "agents" in payload assert "anomalies" in payload assert "recent_trajectories" in payload + + +def test_agent_aggregation_counts_failed_outcomes_without_anomalies( + tmp_path: Path, +) -> None: + from dataclasses import replace + + from codeclone.memory.trajectory.agents import ( + aggregate_agent_rows, + trajectory_agent_label, + ) + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + trajectory = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + + assert trajectory_agent_label(replace(trajectory, subjects=())) is None + violated = replace(trajectory, outcome="violated") + rows = aggregate_agent_rows( + (violated,), + anomaly_by_id={trajectory.id: ()}, + ) + assert rows[0].failed_outcome_count == 1 + assert rows[0].anomaly_count == 0 + assert rows[0].intent_count == 1 + + +def test_anomalies_cover_elevated_incidents_and_incident_label_severity( + tmp_path: Path, +) -> None: + from dataclasses import replace + + from codeclone.memory.trajectory.anomalies import detect_trajectory_anomalies + + audit_db = tmp_path / "audit.sqlite3" + with memory_store(tmp_path) as (root, project, store, _db_path): + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + base = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + + trajectory = replace( + base, + incident_count=2, + labels=("baseline_abuse_detected", "foreign_conflict_seen"), + ) + anomalies = detect_trajectory_anomalies(trajectory) + by_kind = {item.kind: item.severity for item in anomalies} + assert by_kind["elevated_incidents"] == "warn" + assert by_kind["label_baseline_abuse_detected"] == "error" + assert by_kind["label_foreign_conflict_seen"] == "warn" + + +def test_agent_aggregation_skips_empty_agent_subject( + tmp_path: Path, +) -> None: + from dataclasses import replace + + from codeclone.memory.trajectory.agents import aggregate_agent_rows + from codeclone.memory.trajectory.models import TrajectorySubject + + audit_db = tmp_path / "audit.sqlite3" + with memory_store(tmp_path) as (root, project, store, _db_path): + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + base = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + + unlabeled = replace( + base, + subjects=(TrajectorySubject("agent", " ", "actor"),), + intent_id=None, + ) + assert aggregate_agent_rows((unlabeled,)) == () diff --git a/tests/test_memory_trajectory_coverage.py b/tests/test_memory_trajectory_coverage.py index b23b386c..bca940ce 100644 --- a/tests/test_memory_trajectory_coverage.py +++ b/tests/test_memory_trajectory_coverage.py @@ -178,6 +178,53 @@ def test_resolve_export_profile_and_eligibility(tmp_path: Path) -> None: assert trajectory_eligible_for_export(partial, profile=profile) is False +def test_export_eligibility_rejects_missing_digests_and_disallowed_partial( + tmp_path: Path, +) -> None: + profile = EXPORT_PROFILES["agent-change-control-v1"] + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + trajectory = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + + assert ( + trajectory_eligible_for_export( + replace(trajectory, trajectory_digest=""), + profile=profile, + ) + is False + ) + assert ( + trajectory_eligible_for_export( + replace(trajectory, outcome="partial"), + profile=profile, + ) + is False + ) + + +def test_execute_trajectory_rebuild_owns_and_closes_store(tmp_path: Path) -> None: + from .memory_fixtures import cli_memory_repo + + with cli_memory_repo(tmp_path, with_draft=False) as ( + root, + _project, + store, + ): + audit_db = root / ".codeclone" / "db" / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + config = resolve_memory_config(root) + store.close() + + payload = execute_trajectory_rebuild(root_path=root, config=config) + + assert payload["status"] == "ok" + + def test_cli_render_helpers_cover_empty_and_populated_states(tmp_path: Path) -> None: printer = _CapturePrinter() render_trajectory_status( diff --git a/tests/test_memory_trajectory_export.py b/tests/test_memory_trajectory_export.py index 5b46d1b6..4fc8fd51 100644 --- a/tests/test_memory_trajectory_export.py +++ b/tests/test_memory_trajectory_export.py @@ -97,3 +97,17 @@ def test_external_output_requires_explicit_opt_in(tmp_path: Path) -> None: raw_path="/tmp/codeclone-export.jsonl", allow_external_out=False, ) + + +def test_jsonl_accumulator_enforces_record_and_file_limits() -> None: + from codeclone.memory.trajectory.export import _JsonlExportAccumulator + + accumulator = _JsonlExportAccumulator() + assert accumulator.try_append("oversized", record_limit=2, file_limit=100) is False + assert accumulator.truncated_records == 1 + + assert accumulator.try_append("ok", record_limit=10, file_limit=2) is False + assert accumulator.records_written == 0 + + assert accumulator.try_append("ok", record_limit=10, file_limit=10) is True + assert accumulator.lines == ["ok"] diff --git a/tests/test_memory_trajectory_export_context.py b/tests/test_memory_trajectory_export_context.py index d8d376ff..d2c5f9e1 100644 --- a/tests/test_memory_trajectory_export_context.py +++ b/tests/test_memory_trajectory_export_context.py @@ -134,3 +134,58 @@ def test_export_record_includes_context_citations_and_patch_trail( assert payload["context"]["memory_precedents"][0]["memory_id"] == record.id assert "patch_trail_summary" in payload assert result.manifest["deduplicated_workflows"] == 1 + + +def test_export_context_helper_rejection_and_deduplication_edges( + tmp_path: Path, +) -> None: + from codeclone.memory.trajectory import export_context + + assert export_context.projection_version_rank("trajectory-vnext") == 0 + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + current = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + + legacy = replace( + current, + id="traj-legacy-later", + projection_version=TRAJECTORY_PROJECTION_VERSION_V1, + ) + assert select_canonical_trajectories([current, legacy]) == [current] + + citations: list[dict[str, object]] = [] + seen: set[tuple[str, str, int]] = set() + for _ in range(2): + export_context._append_trajectory_citation( + citations, + seen, + kind="finding", + cited_id="finding-1", + valid=True, + source_event_type="claim_validation.completed", + audit_sequence=1, + dedupe_sequence=1, + ) + assert len(citations) == 1 + + assert ( + export_context._trajectory_precedent_match( + replace( + current, + id="traj-prior", + workflow_id="intent:prior", + started_at_utc="2025-01-01T00:00:00Z", + finished_at_utc="2025-01-01T00:01:00Z", + subjects=(), + ), + trajectory=current, + scope_set={"pkg/missing.py"}, + ) + is None + ) diff --git a/tests/test_memory_trajectory_projector.py b/tests/test_memory_trajectory_projector.py index 74f35e0f..8f73b906 100644 --- a/tests/test_memory_trajectory_projector.py +++ b/tests/test_memory_trajectory_projector.py @@ -242,6 +242,40 @@ def test_project_trajectory_marks_incident_labels() -> None: assert "baseline_abuse_detected" in trajectory.labels +def test_projector_helpers_reject_incomplete_steps_and_cover_fallbacks() -> None: + from codeclone.memory.trajectory import projector + + record = _record(1, "analysis.completed") + with pytest.raises(TrajectoryProjectionError, match="missing audit_sequence"): + projector._step_from_record(0, replace(record, audit_sequence=None)) + with pytest.raises(TrajectoryProjectionError, match="missing event core"): + projector._step_from_record( + 0, + replace(record, event_core_json=None, event_core_sha256=None), + ) + + corrected = projector._quality_tier( + outcome="accepted", + records=(_record(1, "patch_contract.violated"),), + labels=(), + ) + assert corrected == "corrected" + assert projector._primary_agent_label((record,)) == "agent" + assert projector._primary_agent_label((replace(record, agent_label=""),)) is None + + subjects = projector._subjects( + workflow_id="run:one", + intent_id=None, + run_ids=(), + report_digests=(), + cores=(), + agent_label=None, + ) + assert {(item.subject_kind, item.subject_key) for item in subjects} == { + ("workflow", "run:one") + } + + def test_project_trajectory_rejects_event_core_digest_mismatch() -> None: record = _record(1, "intent.declared", status="active") broken = replace(record, event_core_sha256="0" * 64) @@ -253,3 +287,37 @@ def test_project_trajectory_rejects_event_core_digest_mismatch() -> None: workflow_id="intent:intent-a-001", records=(broken,), ) + + +def test_patch_trail_projector_helper_and_status_fallback_edges() -> None: + from codeclone.memory.trajectory import patch_trail_projector + + record = _record(1, "intent.checked", status="clean") + with pytest.raises(TrajectoryProjectionError, match="missing audit_sequence"): + patch_trail_projector._record_order_key(replace(record, audit_sequence=None)) + assert ( + patch_trail_projector._event_core( + replace(record, event_core_json=None, event_core_sha256=None) + ) + == {} + ) + assert patch_trail_projector._facts_paths({}, "changed_files") == () + assert ( + patch_trail_projector._facts_paths( + {"facts": {"changed_files": "pkg/a.py"}}, + "changed_files", + ) + == () + ) + + state = patch_trail_projector._WorkflowAuditState(scope_check_status="") + patch_trail_projector._apply_audit_record( + state, + replace(record, status=None), + ) + assert state.scope_check_status == "clean" + patch_trail_projector._apply_audit_record( + state, + _record(2, "receipt.created"), + ) + assert state.receipt_seq == 2 diff --git a/tests/test_memory_trajectory_retrieval.py b/tests/test_memory_trajectory_retrieval.py index 0ad2f890..b8c295f0 100644 --- a/tests/test_memory_trajectory_retrieval.py +++ b/tests/test_memory_trajectory_retrieval.py @@ -9,6 +9,7 @@ import json from dataclasses import replace from pathlib import Path +from typing import cast import pytest @@ -540,3 +541,73 @@ def test_rank_trajectories_for_scope_with_patch_trail_and_long_summary( assert previews assert isinstance(truncated, bool) assert "…" in str(previews[0]["summary"]) + + +def test_scope_and_query_ranking_cover_all_subject_kinds_and_rejections( + tmp_path: Path, +) -> None: + from codeclone.memory.trajectory.retrieval import ( + filter_trajectories_for_query, + serialize_trajectory_detail, + trajectory_semantic_text_parts, + ) + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + base = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + + scoped = replace( + base, + quality_tier="corrected", + labels=("change_control_workflow",), + subjects=( + TrajectorySubject("path", "pkg/mod.py", "touched"), + TrajectorySubject("path", "pkg/other.py", "untouched"), + TrajectorySubject("module", "pkg.mod", "about"), + TrajectorySubject("symbol", "pkg.mod.run", "about"), + ), + ) + irrelevant = replace( + base, + id="traj-irrelevant", + workflow_id="intent:intent-b-001", + summary="unrelated", + subjects=(), + ) + + assert filter_trajectories_for_query([scoped], query="", match_mode="any") == () + assert rank_trajectories_for_query( + [scoped], + query="missing token", + max_results=5, + match_mode="all", + ) == ([], False) + assert rank_trajectories_for_query( + [scoped], + query="missing", + max_results=5, + match_mode="any", + ) == ([], False) + + previews, truncated = rank_trajectories_for_scope( + [scoped, irrelevant], + scope_paths=("pkg/mod.py", "pkg/other.py"), + symbols=("pkg.mod.run",), + patch_trails={scoped.id: {}}, + max_results=5, + ) + assert truncated is False + assert [item["trajectory_id"] for item in previews] == [scoped.id] + assert cast("int", previews[0]["relevance_score"]) > 3 + + detail = serialize_trajectory_detail(scoped, patch_trail_payload={}) + assert "patch_trail" not in detail + semantic_parts = tuple(trajectory_semantic_text_parts(scoped)) + assert any(part.startswith("labels ") for part in semantic_parts) + assert any(part.startswith("paths ") for part in semantic_parts) + assert any(part.startswith("steps ") for part in semantic_parts) diff --git a/tests/test_memory_trajectory_store.py b/tests/test_memory_trajectory_store.py index e1dbf91e..6074e3e0 100644 --- a/tests/test_memory_trajectory_store.py +++ b/tests/test_memory_trajectory_store.py @@ -8,6 +8,8 @@ from pathlib import Path +import pytest + from codeclone.audit.events import AuditEvent, repo_root_digest from codeclone.audit.writer import SqliteAuditWriter @@ -116,3 +118,119 @@ def test_rebuild_supersedes_duplicate_workflow_projection_rows(tmp_path: Path) - assert store.count_trajectories(project_id=project.id) == 1 canonical = store.list_canonical_trajectories_for_export(project_id=project.id) assert len(canonical) == 1 + + +def test_store_empty_inputs_invalid_patch_trails_and_stale_projection_cleanup( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from dataclasses import replace + + from codeclone.memory.trajectory import store as trajectory_store + from tests.memory_fixtures import seed_trajectory_audit_workflow + + with memory_store(tmp_path) as (root, project, store, _db_path): + conn = store._conn + assert ( + trajectory_store.list_trajectories_for_subjects( + conn, + project_id=project.id, + subjects={}, + ) + == [] + ) + assert ( + trajectory_store.search_trajectories( + conn, + project_id=project.id, + query="", + ) + == [] + ) + assert ( + trajectory_store.load_trajectory_patch_trail( + conn, + trajectory_id="missing", + ) + is None + ) + + audit_db = tmp_path / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + base = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ).trajectories[0] + old = replace( + base, + id="traj-old", + workflow_id="intent:shared", + ) + current = replace( + old, + id="traj-current", + projection_version="3", + trajectory_digest="e" * 64, + ) + assert trajectory_store.upsert_trajectory(conn, old) == "created" + assert trajectory_store.upsert_trajectory(conn, current) == "created" + assert ( + trajectory_store.supersede_stale_projection_trajectories( + conn, + project_id=project.id, + workflow_id=current.workflow_id, + keep_trajectory_id=current.id, + keep_trajectory_digest=current.trajectory_digest, + ) + == 1 + ) + assert trajectory_store.find_trajectory(conn, old.id) is None + + trajectory_store.upsert_trajectory_patch_trail( + conn, + trajectory_id=current.id, + patch_trail_json="[]", + patch_trail_digest="f" * 64, + schema_version="1", + projected_at_utc=current.projected_at_utc, + ) + assert ( + trajectory_store.load_trajectory_patch_trail( + conn, + trajectory_id=current.id, + ) + is None + ) + assert ( + trajectory_store.load_trajectory_patch_trails( + conn, + trajectory_ids=(current.id,), + ) + == {} + ) + + monkeypatch.setattr( + trajectory_store, + "list_workflow_ids_with_events_after", + lambda **_kwargs: ["intent:empty"], + ) + monkeypatch.setattr( + trajectory_store, + "read_audit_event_core_records", + lambda **_kwargs: [], + ) + monkeypatch.setattr( + trajectory_store, + "count_audit_event_core_gaps", + lambda **_kwargs: 0, + ) + result = trajectory_store.rebuild_trajectories_incremental( + conn=conn, + project=project, + root_path=root, + audit_db_path=tmp_path / "audit-empty.sqlite3", + after_event_core_id=10, + ) + assert result.run.workflows_seen == 1 + assert result.trajectories == () diff --git a/tests/test_observability_cli_pipeline.py b/tests/test_observability_cli_pipeline.py index bcba8add..0d1325cc 100644 --- a/tests/test_observability_cli_pipeline.py +++ b/tests/test_observability_cli_pipeline.py @@ -18,6 +18,7 @@ from codeclone.analysis.normalizer import NormalizationConfig from codeclone.cache.store import Cache from codeclone.config.observability import ObservabilityConfig +from codeclone.contracts import ExitCode from codeclone.core._types import ( AnalysisResult, BootstrapResult, @@ -26,10 +27,13 @@ ProcessingResult, ) from codeclone.observability import bootstrap, operation, shutdown +from codeclone.observability.models import OperationRecord from codeclone.observability.store.schema import ( observability_store_path, open_observability_store, ) +from codeclone.observability.store.writer import write_operation +from codeclone.surfaces.cli.observability import observability_main @pytest.fixture(autouse=True) @@ -158,3 +162,91 @@ def test_cli_pipeline_emits_stage_spans( "files_analyzed": 2, "failed_files": 0, } + + +def test_observability_cli_help_and_stdout_trace( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + assert observability_main([]) == int(ExitCode.CONTRACT_ERROR) + assert "trace" in capsys.readouterr().out + + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + try: + with operation(name="cli.analyze", surface="cli"): + pass + finally: + shutdown() + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="op-1", + correlation_id="corr", + surface="cli", + name="cli.analyze", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=1.0, + status="ok", + spans=(), + ), + ) + finally: + conn.close() + + code = observability_main(["trace", "--root", str(tmp_path)]) + out = capsys.readouterr().out + assert code == int(ExitCode.SUCCESS) + assert '"operation_tree"' in out + + +def test_observability_cli_missing_store_and_file_outputs( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + empty_root = tmp_path / "empty" + empty_root.mkdir() + code = observability_main(["trace", "--root", str(empty_root)]) + assert code == int(ExitCode.SUCCESS) + assert "No observability store" in capsys.readouterr().out + + repo = tmp_path / "repo" + repo.mkdir() + conn = open_observability_store(observability_store_path(repo)) + try: + write_operation( + conn, + OperationRecord( + operation_id="op-cli", + correlation_id="op-cli", + surface="cli", + name="cli.analyze", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=1.0, + status="ok", + spans=(), + ), + ) + finally: + conn.close() + + json_path = tmp_path / "trace.json" + html_path = tmp_path / "trace.html" + code = observability_main( + [ + "trace", + "--root", + str(repo), + "--json", + str(json_path), + "--html", + str(html_path), + ] + ) + out = capsys.readouterr().out + assert code == int(ExitCode.SUCCESS) + assert json_path.is_file() + assert html_path.is_file() + assert f"Wrote {json_path}" in out + assert f"Wrote {html_path}" in out diff --git a/tests/test_observability_profile.py b/tests/test_observability_profile.py index 12b0cda2..2229ba74 100644 --- a/tests/test_observability_profile.py +++ b/tests/test_observability_profile.py @@ -122,3 +122,25 @@ def _import( assert worker_bootstrap_sample() is None assert capture_rss_cpu() is None assert build_profile_sample((0, 0.0, 0.0)) is None + + +def test_profile_open_fds_degrades_gracefully( + monkeypatch: pytest.MonkeyPatch, +) -> None: + import sys + from unittest.mock import MagicMock + + from codeclone.observability.profile import build_profile_sample + + process = MagicMock() + process.memory_info.return_value = MagicMock(rss=1024 * 1024) + process.cpu_times.return_value = MagicMock(user=0.1, system=0.2) + process.num_fds.side_effect = OSError("unsupported") + process.num_threads.return_value = 3 + mock_psutil = MagicMock() + mock_psutil.Process.return_value = process + monkeypatch.setitem(sys.modules, "psutil", mock_psutil) + + sample = build_profile_sample((512 * 1024, 0.0, 0.0)) + assert sample is not None + assert sample.open_fds is None diff --git a/tests/test_observability_query.py b/tests/test_observability_query.py index 20d85c65..c8abfcbd 100644 --- a/tests/test_observability_query.py +++ b/tests/test_observability_query.py @@ -243,3 +243,177 @@ def test_agent_context_ranks_token_consumers(tmp_path: Path) -> None: top = _rows(out["rows"])[0] assert top["tool"] == "mcp.get_relevant_memory" assert top["verdict"] == "context_heavy" + + +def test_projection_helpers_and_diagnostic_edges( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from dataclasses import replace + + from codeclone.observability.views import ( + AgentTokenRow, + AgentView, + AggregatesView, + DbCostRow, + McpToolAggregate, + OperationView, + PipelineGroup, + SpanCostView, + SpanView, + TraceView, + ) + + warnings: list[str] = [] + assert query_mod._resolve_detail("verbose", warnings) == "compact" + assert warnings + + sentinel = object() + calls: list[dict[str, object]] = [] + + def _build_trace(_conn: object, **kwargs: object) -> object: + calls.append(kwargs) + return sentinel + + monkeypatch.setattr(query_mod, "build_trace_view", _build_trace) + assert query_mod._build_trace(object(), "corr-1") is sentinel + assert calls == [{"correlation_id": "corr-1"}] + + empty_child = OperationView( + operation_id="empty", + correlation_id="corr", + surface="memory", + name="empty", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=1.0, + status="ok", + ) + measured_child = replace(empty_child, operation_id="measured", rss_delta_mb=4.0) + root = OperationView( + operation_id="root", + correlation_id="corr", + surface="mcp", + name="query_engineering_memory", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=12.25, + status="ok", + rss_delta_mb=2.0, + spans=( + SpanView( + span_id="span", + name="memory.semantic.reindex", + duration_ms=9.0, + status="ok", + rss_delta_mb=3.0, + ), + ), + children=(empty_child, measured_child), + ) + semantic = SpanCostView( + span_id="semantic", + name="memory.semantic.reindex", + surface="memory", + operation_id="root", + operation_name=root.name, + duration_ms=10.5, + rss_delta_mb=250.0, + produced=0, + skipped=2, + no_op=True, + ) + aggregate = AggregatesView( + operation_count=1, + slowest=(root,), + mcp_tools=( + McpToolAggregate( + name="query_engineering_memory", + count=2, + p50_duration_ms=3.0, + p95_duration_ms=8.0, + p95_response_bytes=4096, + p95_request_bytes=512, + p95_response_tokens=9000, + ), + ), + semantic_costs=(semantic,), + peak_memory_span=semantic, + db_costs=( + DbCostRow( + span_name="memory.hydrate", + surface="memory", + span_count=2, + total_queries=500, + total_writes=1, + max_queries=300, + ), + ), + agent=AgentView( + mcp_calls=2, + response_tokens=9000, + consumers=( + AgentTokenRow( + name="query_engineering_memory", + calls=2, + request_tokens=10, + response_tokens=9000, + ), + ), + ), + pipeline=(PipelineGroup("memory", 1, 12.25, 4.0),), + ) + trace = TraceView( + schema_version="1", + window_started_at_utc="2026-01-01T00:00:00Z", + window_ended_at_utc="2026-01-01T00:00:01Z", + aggregates=aggregate, + operation_tree=(root,), + ) + + assert query_mod._slow_operations(aggregate, 1)[0]["operation"] == root.name + assert query_mod._memory_pipeline_cost(aggregate, 1)[0]["no_op"] is True + assert query_mod._mcp_tool_matrix(aggregate, 1)[0]["calls"] == 2 + assert query_mod._costly_noops(aggregate, 1)[0]["span"] == semantic.name + assert query_mod._pipeline(aggregate, 1)[0]["subsystem"] == "memory" + assert query_mod._correlated_chains(trace, 1)[0]["peak_rss_delta_mb"] == 4.0 + assert query_mod._agent_context_body(AggregatesView(0), 1) == { + "total_response_tokens": 0, + "rows": [], + } + assert query_mod._memory_diagnostic(AggregatesView(0)) is None + assert ( + query_mod._memory_diagnostic( + AggregatesView( + 1, + peak_memory_span=replace(semantic, rss_delta_mb=10.0), + ) + ) + is None + ) + assert query_mod._db_diagnostic(AggregatesView(0)) is None + assert ( + query_mod._db_diagnostic( + AggregatesView( + 1, + db_costs=(replace(aggregate.db_costs[0], total_queries=2),), + ) + ) + is None + ) + assert query_mod._context_diagnostic(AggregatesView(0)) is None + agent = aggregate.agent + assert agent is not None + assert ( + query_mod._context_diagnostic( + AggregatesView( + 1, + agent=replace( + agent, + response_tokens=100, + consumers=(replace(agent.consumers[0], response_tokens=10),), + ), + ) + ) + is None + ) + assert query_mod._top_diagnostics(aggregate) + assert query_mod._recommended_next_sections("db_cost", aggregate) == [] + assert len(query_mod._recommended_next_sections("summary", aggregate)) == 3 diff --git a/tests/test_observability_reader.py b/tests/test_observability_reader.py index 9cda6425..29497827 100644 --- a/tests/test_observability_reader.py +++ b/tests/test_observability_reader.py @@ -429,3 +429,21 @@ def test_cpu_and_pipeline_rollup(tmp_path: Path) -> None: pipe = {group.name: group for group in agg.pipeline} assert pipe["memory"].cpu_ms == 2000.0 assert pipe["analysis"].op_count == 1 + + +def test_epoch_ms_and_empty_correlation_filter(tmp_path: Path) -> None: + from codeclone.observability.store.reader import _by_correlations, _epoch_ms + from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, + ) + + assert _epoch_ms("") == 0.0 + assert _epoch_ms("not-a-date") == 0.0 + assert _epoch_ms("2026-01-01T00:00:00Z") > 0.0 + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + assert _by_correlations(conn, []) == [] + finally: + conn.close() diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index c6b78537..b8a82598 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -498,3 +498,41 @@ def test_observability_main_no_store( code = observability_main(["trace", "--root", str(tmp_path)]) assert code == 0 assert "No observability store" in capsys.readouterr().out + + +def test_html_format_helpers_and_semantic_cost_rows() -> None: + from dataclasses import replace + + from codeclone.observability.render_html import _bytes, _mb, _semantic_row, _tokens + + assert _mb(None) == "—" + assert "GB" in _mb(2048.0) + assert "MB" in _mb(512.0) + assert _bytes(None) == "—" + assert "MB" in _bytes(1024 * 1024) + assert "KB" in _bytes(2048) + assert _bytes(12).endswith(" B") + assert _tokens(None) == "—" + assert _tokens(0) == "—" + assert _tokens(1500).endswith("k") + + costly = SpanCostView( + span_id="s1", + name="memory.semantic.reindex", + surface="memory", + operation_id="op", + operation_name="memory.projection.job", + duration_ms=6000.0, + no_op=True, + reason_kind="schema_version_changed", + ) + costly_html = _semantic_row(costly) + assert "no-op · costly" in costly_html + assert "schema_version_changed" in costly_html + + noop = replace(costly, duration_ms=10.0) + assert "no-op" in _semantic_row(noop) + assert "costly" not in _semantic_row(noop) + + productive = replace(noop, no_op=False, reason_kind=None) + assert "productive" in _semantic_row(productive) diff --git a/tests/test_observability_runtime.py b/tests/test_observability_runtime.py index 32cea6f7..6fd64f28 100644 --- a/tests/test_observability_runtime.py +++ b/tests/test_observability_runtime.py @@ -116,3 +116,53 @@ def test_operation_records_error_status(tmp_path: Path) -> None: assert row == ("error", "ValueError") finally: conn.close() + + +def test_record_elapsed_span_is_noop_without_active_operation(tmp_path: Path) -> None: + from codeclone.config.observability import ObservabilityConfig + from codeclone.observability import bootstrap, record_elapsed_span, shutdown + + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + try: + record_elapsed_span( + "orphan-span", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=1.0, + ) + finally: + shutdown() + + +def test_runtime_optional_payload_root_and_empty_sql_edges(tmp_path: Path) -> None: + from codeclone.observability import runtime + + bootstrap(ObservabilityConfig(enabled=True), session_id="session") + with operation(name="rootless", surface="mcp") as op: + op.set_request(request_bytes=1) + op.set_request(request_tokens=2) + op.set_response(response_bytes=3) + op.set_response(response_tokens=4) + with span(name="db"): + runtime.record_db_query("") + + first_root = tmp_path / "first" + second_root = tmp_path / "second" + runtime.bind_root(first_root) + runtime.bind_root(second_root) + with operation(name="rooted", surface="mcp"): + pass + shutdown() + + assert observability_store_path(first_root).exists() + assert not observability_store_path(second_root).exists() + + bootstrap(ObservabilityConfig(enabled=False)) + runtime.bind_root(tmp_path / "disabled") + + active = runtime._ActiveRuntime( + ObservabilityConfig(enabled=True), + root=tmp_path, + ) + active._conn = object() + active.close() + assert active._conn is None diff --git a/tests/test_semantic_sources.py b/tests/test_semantic_sources.py index 56c849b5..0196010b 100644 --- a/tests/test_semantic_sources.py +++ b/tests/test_semantic_sources.py @@ -10,6 +10,8 @@ from collections.abc import Sequence from pathlib import Path +import pytest + from codeclone.audit.events import EVENT_INTENT_DECLARED, AuditEvent, repo_root_digest from codeclone.audit.writer import SqliteAuditWriter from codeclone.memory.enums import MemoryStatus @@ -246,6 +248,70 @@ def test_trajectory_index_source_projects_bounded_text() -> None: assert '{"secret"' not in projection.text +def test_trajectory_source_name_missing_record_and_pagination() -> None: + trajectory = _trajectory("proj-traj") + source = TrajectoryIndexSource( + _FakeTrajectoryStore([trajectory]), + project_id="proj-traj", + ) + assert source.name() == "trajectory" + + class _MissingTrajectoryStore(_FakeTrajectoryStore): + def find_trajectory(self, trajectory_id: str) -> Trajectory | None: + return None + + assert ( + list( + TrajectoryIndexSource( + _MissingTrajectoryStore([trajectory]), + project_id="proj-traj", + ).iter_projections() + ) + == [] + ) + + trajectories = [ + dataclasses.replace(trajectory, id=f"traj-{index}") for index in range(201) + ] + projections = list( + TrajectoryIndexSource( + _FakeTrajectoryStore(trajectories), + project_id="proj-traj", + ).iter_projections() + ) + assert len(projections) == 201 + + +def test_audit_source_tolerates_query_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.semantic import sources + + db_path = tmp_path / "audit.sqlite3" + db_path.touch() + + class _BrokenConnection: + closed = False + + def execute(self, _sql: str, _params: object) -> object: + raise sqlite3.OperationalError("broken query") + + def close(self) -> None: + self.closed = True + + connection = _BrokenConnection() + monkeypatch.setattr( + sources, + "open_audit_db_readonly", + lambda _path: connection, + ) + assert ( + list(AuditIndexSource(enabled=True, db_path=db_path).iter_projections()) == [] + ) + assert connection.closed is True + + def _trajectory(project_id: str) -> Trajectory: return Trajectory( id="traj-1", diff --git a/tests/test_trajectory_rebuild_incremental.py b/tests/test_trajectory_rebuild_incremental.py index d2933535..4a77e181 100644 --- a/tests/test_trajectory_rebuild_incremental.py +++ b/tests/test_trajectory_rebuild_incremental.py @@ -198,3 +198,32 @@ def test_worker_watermark_decision(monkeypatch: pytest.MonkeyPatch) -> None: assert _trajectory_incremental_watermark(conn, project_id="p") is None finally: conn.close() + + +def test_execute_rebuild_reports_full_and_incremental_modes(tmp_path: Path) -> None: + from codeclone.config.memory import resolve_memory_config + from codeclone.memory.trajectory.rebuild_workflow import execute_trajectory_rebuild + + from .memory_fixtures import seed_trajectory_audit_workflow + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = root / ".codeclone" / "db" / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + config = resolve_memory_config(root) + full = execute_trajectory_rebuild( + root_path=root, + config=config, + store=store, + project=project, + ) + assert full["status"] == "ok" + assert full["mode"] == "full" + incremental = execute_trajectory_rebuild( + root_path=root, + config=config, + store=store, + project=project, + incremental_after_event_core_id=1, + ) + assert incremental["status"] == "ok" + assert incremental["mode"] == "incremental" diff --git a/tests/test_workspace_intent_gate.py b/tests/test_workspace_intent_gate.py index 2f864444..bb9cf75c 100644 --- a/tests/test_workspace_intent_gate.py +++ b/tests/test_workspace_intent_gate.py @@ -320,3 +320,126 @@ def test_hook_cleanup_includes_recoverable_cursor_intent_only( unclosed = list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) assert unclosed == (UnclosedWorkspaceIntent("intent-cursor-dead-001", "active"),) + + +def test_hook_cleanup_resolves_owner_identity_from_environment( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + import os + from dataclasses import replace + + from codeclone.workspace_intent.gate import ( + list_unclosed_workspace_intents_for_hook_cleanup, + ) + from tests.test_workspace_intents import _record + from tests.workspace_intent_gate_helpers import write_workspace_record + + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_intent_pid.is_agent_pid_alive", + lambda _pid: True, + ) + own_pid = os.getpid() + own = replace( + _record(intent_id="intent-own-env-001", status="active"), + agent_pid=own_pid, + agent_start_epoch=42, + ) + write_workspace_record(tmp_path, own) + monkeypatch.setenv("CODECLONE_HOOK_OWN_AGENT_PID", str(own_pid)) + monkeypatch.setenv("CODECLONE_HOOK_OWN_AGENT_START_EPOCH", "42") + + unclosed = list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) + + assert len(unclosed) == 1 + assert unclosed[0].intent_id == "intent-own-env-001" + + +def test_hook_cleanup_record_filter_handles_recoverable_agents() -> None: + import os + from dataclasses import replace + + from codeclone.surfaces.mcp._workspace_intent_lifecycle import utc_now + from codeclone.workspace_intent.gate import _include_record_in_hook_cleanup + from tests.test_workspace_intents import _record + + recoverable = replace( + _record(intent_id="intent-rec-001", status="active"), + agent_pid=os.getpid() + 5000, + agent_label="cursor-vscode/dead", + ) + now = utc_now() + assert ( + _include_record_in_hook_cleanup( + recoverable, + own_pid=os.getpid(), + own_start_epoch=1, + recoverable_agent_label_prefix=None, + include_foreign=False, + now=now, + ) + is False + ) + assert ( + _include_record_in_hook_cleanup( + recoverable, + own_pid=os.getpid(), + own_start_epoch=1, + recoverable_agent_label_prefix="cursor-vscode/", + include_foreign=False, + now=now, + ) + is True + ) + + +def test_hook_authorizes_foreign_active_environment_values( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.workspace_intent.gate import ( + HOOK_AUTHORIZE_FOREIGN_ENV, + _hook_authorizes_foreign_active, + ) + + monkeypatch.delenv(HOOK_AUTHORIZE_FOREIGN_ENV, raising=False) + assert _hook_authorizes_foreign_active() is True + monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "maybe") + assert _hook_authorizes_foreign_active() is False + monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "off") + assert _hook_authorizes_foreign_active() is False + monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "yes") + assert _hook_authorizes_foreign_active() is True + + +def test_workspace_ownership_honors_foreign_active_authorization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.mcp import _workspace_intents as workspace_intents + from codeclone.workspace_intent import gate as gate_mod + + monkeypatch.setattr(gate_mod, "_hook_authorizes_foreign_active", lambda: True) + assert ( + gate_mod._ownership_authorizes_hook( + workspace_intents.IntentOwnership.FOREIGN_ACTIVE, + liveness=workspace_intents.PidLiveness.ALIVE, + ) + is True + ) + monkeypatch.setattr(gate_mod, "_hook_authorizes_foreign_active", lambda: False) + assert ( + gate_mod._ownership_authorizes_hook( + workspace_intents.IntentOwnership.FOREIGN_ACTIVE, + liveness=workspace_intents.PidLiveness.ALIVE, + ) + is False + ) + + +def test_agent_pid_liveness_honors_boolean_probe( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.mcp import _workspace_intent_pid as pid_mod + from codeclone.surfaces.mcp._workspace_intent_lifecycle import PidLiveness + + monkeypatch.setattr(pid_mod, "is_agent_pid_alive", lambda _pid: False) + assert pid_mod.agent_pid_liveness(123) is PidLiveness.DEAD diff --git a/tests/test_workspace_intent_gate_errors.py b/tests/test_workspace_intent_gate_errors.py index 4575f042..5d53d2b4 100644 --- a/tests/test_workspace_intent_gate_errors.py +++ b/tests/test_workspace_intent_gate_errors.py @@ -117,3 +117,52 @@ def test_gate_load_registry_records_file_backend(tmp_path: Path) -> None: ) records = gate_mod._load_registry_records_read_only(tmp_path, config) assert records == () + + +def test_hook_cleanup_reports_registry_configuration_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.workspace_intent.gate import ( + WorkspaceIntentRegistryUnavailable, + list_unclosed_workspace_intents_for_hook_cleanup, + ) + + def _boom(_root: Path) -> object: + raise ValueError("broken registry") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate.resolve_intent_registry_config", + _boom, + ) + with pytest.raises(WorkspaceIntentRegistryUnavailable, match="broken registry"): + list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) + + +def test_hook_cleanup_reports_sqlite_load_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.workspace_intent.gate import ( + WorkspaceIntentRegistryUnavailable, + list_unclosed_workspace_intents_for_hook_cleanup, + ) + + class _Config: + backend = "sqlite" + storage_path = Path(".codeclone/db/intents.sqlite3") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate.resolve_intent_registry_config", + lambda _root: _Config(), + ) + + def _load_fail(*_args: object, **_kwargs: object) -> object: + raise OSError("cannot read sqlite") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate._load_registry_records_read_only", + _load_fail, + ) + with pytest.raises(WorkspaceIntentRegistryUnavailable, match="cannot read sqlite"): + list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) From 446a6f3ce043636a710aeb5726e4704dc4e5b33f Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 22:23:13 +0500 Subject: [PATCH 262/318] chore(deps): update all deps --- pyproject.toml | 2 +- uv.lock | 108 ++++++++++++++++++++++++------------------------- 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8731f6b4..c7249d2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,7 +93,7 @@ dev = [ "build>=1.4.3", "twine>=6.2.0", "mypy>=1.20.1", - "ruff>=0.15.16", + "ruff>=0.15.17", "pre-commit>=4.5.1", ] diff --git a/uv.lock b/uv.lock index f3e78ea8..b95c530c 100644 --- a/uv.lock +++ b/uv.lock @@ -400,7 +400,7 @@ requires-dist = [ { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.3" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=7.1.0" }, { name = "rich", specifier = ">=15.0.0" }, - { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.16" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.17" }, { name = "tiktoken", marker = "extra == 'token-bench'", specifier = ">=0.13.0" }, { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.1" }, { name = "twine", marker = "extra == 'dev'", specifier = ">=6.2.0" }, @@ -629,11 +629,11 @@ wheels = [ [[package]] name = "distlib" -version = "0.4.2" +version = "0.4.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/46/8d/873e9252ea2c0e0c857884e0a2899ec43ade132345df1925ef24cbe64f18/distlib-0.4.2.tar.gz", hash = "sha256:baeb401c90f27acd15c4861ae0847d1e731c27ac3dbf4210643ba61fa1e813db", size = 614914, upload-time = "2026-06-08T16:24:15.439Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/02/bd72be9134d25ed783ecbbc38a539ffaefbf90c78418c7fb7229600dbac7/distlib-0.4.3.tar.gz", hash = "sha256:f152097224a0ae24be5a0f6bae1b9359af82133bce63f98a95f86cae1aede9ed", size = 615141, upload-time = "2026-06-12T08:04:52.847Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/60/aa891c893821d4d127292ed66c6940d1d715894bd5a0ce048056bc641773/distlib-0.4.2-py2.py3-none-any.whl", hash = "sha256:ca4cb11e5d746b5ec13c199cbf19ae27a241f89702b54e153a74332955446067", size = 470510, upload-time = "2026-06-08T16:24:13.208Z" }, + { url = "https://files.pythonhosted.org/packages/02/08/9c41fb51ab5b43eb21674aff13df270e8ba6c4b29c8624e328dc7a9482af/distlib-0.4.3-py2.py3-none-any.whl", hash = "sha256:4b0ce306c966eb73bc3a7b6abad017c556dadd92c44701562cd528ac7fde4d5b", size = 470628, upload-time = "2026-06-12T08:04:50.506Z" }, ] [[package]] @@ -650,7 +650,7 @@ name = "exceptiongroup" version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } wheels = [ @@ -786,7 +786,7 @@ wheels = [ [[package]] name = "huggingface-hub" -version = "1.18.0" +version = "1.19.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -800,9 +800,9 @@ dependencies = [ { name = "typer" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fb/d8/748ea0a47f0fa15227fe682f7a80826b4b7c096e4818044b8f56d6cb66d6/huggingface_hub-1.18.0.tar.gz", hash = "sha256:f0c5ecd1ef8c6a60f86f61ee278f2c1570ba9e279c9f54de9094210723b3613b", size = 812699, upload-time = "2026-06-05T09:26:33.401Z" } +sdist = { url = "https://files.pythonhosted.org/packages/88/27/629cfe58c582f92ded066c4a07d1a057ff617118ab7973200f770bd853cb/huggingface_hub-1.19.0.tar.gz", hash = "sha256:fd771622182d40977272a923953ee3b1b13538f9f8a7f5d78398f10af0f1c0bd", size = 824721, upload-time = "2026-06-11T12:33:18.665Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/03/40a05316cb6616e5b7efd7773656441ab04b4b022c2199e79bb4622a92a3/huggingface_hub-1.18.0-py3-none-any.whl", hash = "sha256:729be4a976fb706dcc02d176bcda8a3f32bdf21a294e8f4b3dda6fbcbc9c1ab1", size = 684411, upload-time = "2026-06-05T09:26:31.48Z" }, + { url = "https://files.pythonhosted.org/packages/b2/a5/558da89f66464d8d0229ff497e8b8666977de2d8cf48c28a2862ecf1250f/huggingface_hub-1.19.0-py3-none-any.whl", hash = "sha256:1dc72e1f6b4d6df6b30eb72e57d00514ef453d660f04af2b87f0e67267f31ee0", size = 693398, upload-time = "2026-06-11T12:33:16.695Z" }, ] [[package]] @@ -852,7 +852,7 @@ name = "importlib-metadata" version = "9.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "zipp" }, + { name = "zipp", marker = "python_full_version < '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a9/01/15bb152d77b21318514a96f43af312635eb2500c96b55398d020c93d86ea/importlib_metadata-9.0.0.tar.gz", hash = "sha256:a4f57ab599e6a2e3016d7595cfd72eb4661a5106e787a95bcc90c7105b831efc", size = 56405, upload-time = "2026-03-20T06:42:56.999Z" } wheels = [ @@ -961,19 +961,19 @@ wheels = [ [[package]] name = "lance-namespace" -version = "0.8.2" +version = "0.8.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "lance-namespace-urllib3-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/33/fd/3a8731b2ed83ba198b15b5963c6df4836736057f23206107b0ab4a5f57fd/lance_namespace-0.8.2.tar.gz", hash = "sha256:78cd6ad2f2764bccded1d8b64474419cc5571956b68a23ad2770977ddaeb03a1", size = 11281, upload-time = "2026-06-05T04:46:23.696Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/22/3d8eb4e913edf36cda416f1dca287147af508abe3ca89bf0e619b9fa9f54/lance_namespace-0.8.5.tar.gz", hash = "sha256:b4a5967afcbf9924300a0b9d2fb74c44a23f76907e8734ebed6e0e3a561b0df0", size = 11531, upload-time = "2026-06-11T16:20:26.77Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/cb/7f3cc83b8b35a27a27539c3086562d11010f10ca113808ce1078308ca5c0/lance_namespace-0.8.2-py3-none-any.whl", hash = "sha256:6531a4d8b95f201835b954a949f890d03cbc3124aca5f1dd21d999157a08935f", size = 13113, upload-time = "2026-06-05T04:46:27.781Z" }, + { url = "https://files.pythonhosted.org/packages/c0/da/afc3cdc42fc2dcf885a9d3524bf2c3bd2a9df89b1668b1806dec5e436263/lance_namespace-0.8.5-py3-none-any.whl", hash = "sha256:6d3e2b8da586d06409494b56955a63c3152eeae2883cd2e8ba4e80d20dc0de0f", size = 13383, upload-time = "2026-06-11T16:20:26.004Z" }, ] [[package]] name = "lance-namespace-urllib3-client" -version = "0.8.2" +version = "0.8.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, @@ -981,9 +981,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5d/98/a0bb656a4f2d5989e1267a62acbb5a9ed8eb15ac45fbfe380b5a59dba642/lance_namespace_urllib3_client-0.8.2.tar.gz", hash = "sha256:82f0a5c9b6b7fde67326d6038b89ed807e8d14692e461246f1a7df5c36b804d6", size = 222291, upload-time = "2026-06-05T04:46:24.958Z" } +sdist = { url = "https://files.pythonhosted.org/packages/44/6f/1291523488523656342d1b424b76b4d91f3af6413b3b4ada43b888a87043/lance_namespace_urllib3_client-0.8.5.tar.gz", hash = "sha256:29922ffb5b0621e24a83183454ec3e5a5828f46d91a95d58efc35db05dec4e62", size = 228595, upload-time = "2026-06-11T16:20:23.985Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ff/58/6a993bf50375170547d0e0bfe9189cc9b378b89482dc2c7bb75ef170a49a/lance_namespace_urllib3_client-0.8.2-py3-none-any.whl", hash = "sha256:cb8dc098fcd42f848eb5206fb49ebc3b5f162ee32b5c4155a5048ffd30a7cd37", size = 364909, upload-time = "2026-06-05T04:46:26.504Z" }, + { url = "https://files.pythonhosted.org/packages/10/e2/62883d1f43a283ac08f00af993c6a2b92e4ca206fa1ccba032420d8dc578/lance_namespace_urllib3_client-0.8.5-py3-none-any.whl", hash = "sha256:8af211ddc6e73df713ffb59368c94780508e732b19dacb4239d937aaff2f8e3c", size = 369857, upload-time = "2026-06-11T16:20:25.006Z" }, ] [[package]] @@ -1874,17 +1874,17 @@ wheels = [ [[package]] name = "protobuf" -version = "7.35.0" +version = "7.35.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/60/fd/5b1491d9e4b586d621c54f4c36b888714164b6875f8d6afa3f9072906a51/protobuf-7.35.0.tar.gz", hash = "sha256:a2efd84605f41e559f1881b0912b44099d0a2ac9bf46b3474823f10fb393b0e6", size = 458677, upload-time = "2026-05-19T23:02:29.197Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/01/9ef0afd7999eb9badb3a768b4aedd78c86d4c65cfaf1958ab276199e76b4/protobuf-7.35.1.tar.gz", hash = "sha256:ce115a26fe0c39a2c29973d914d327e516a6455464489fe3cd1e51a1b354f81a", size = 458717, upload-time = "2026-06-11T21:55:40.257Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/83/ee/93d06e358a4aa32280b00e722d3ea0a1f25fc3cc5778d80581c9cca2c10e/protobuf-7.35.0-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:66be6c513931c794fa92c080ffee41671390da3d79da219cf9c0c0907f035dda", size = 433225, upload-time = "2026-05-19T23:02:19.884Z" }, - { url = "https://files.pythonhosted.org/packages/8b/39/1c76c2da93f3c507e958e0aecee2391cc44d4625de6c728bbc555195b5a8/protobuf-7.35.0-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:fcbe42a4ac09d3ec9c987ddfcd956afd0b15f1ff613bd8371bde9405ffd5c8e5", size = 328847, upload-time = "2026-05-19T23:02:22.3Z" }, - { url = "https://files.pythonhosted.org/packages/91/1a/39f7ce90a238c1a987a4d81ec26379e02ca0aff367de68e4a1fa474215b9/protobuf-7.35.0-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:4cbf5cc286130e06a6c9bbefac442431173906dfcc979712183d4adcc01b37ee", size = 344030, upload-time = "2026-05-19T23:02:23.591Z" }, - { url = "https://files.pythonhosted.org/packages/70/5b/6baf9008817964454055ff3fe65f1de0b5f1e26c80c82f7fb108b7cd4ea3/protobuf-7.35.0-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:6c0f98f10c8a05ea30f8993dfef2de093d27b490fdae78bb60c8343795d55011", size = 327130, upload-time = "2026-05-19T23:02:24.637Z" }, - { url = "https://files.pythonhosted.org/packages/8e/e5/e46adb0badc388bfb84877a5f9f026aff63f60e611016cf64dbe77e05446/protobuf-7.35.0-cp310-abi3-win32.whl", hash = "sha256:4c4617b83ade0e279d1d2bfe04025a1adb87f9ed657de038620dc0ff959357f6", size = 428946, upload-time = "2026-05-19T23:02:25.741Z" }, - { url = "https://files.pythonhosted.org/packages/a7/ab/547fbd9e16d879dd13c167478f8ae0a83a428008ca07a5e06acdc23ad473/protobuf-7.35.0-cp310-abi3-win_amd64.whl", hash = "sha256:f05bcadf9a2a6b8dda047007075135fb7d08c73d9177aabc067e1be46881a201", size = 439996, upload-time = "2026-05-19T23:02:26.808Z" }, - { url = "https://files.pythonhosted.org/packages/b8/ef/50433d346c56657a70d27f156c7b349ac59a068b01de4eb796e747eecc43/protobuf-7.35.0-py3-none-any.whl", hash = "sha256:c13f325cf242bad135c350629eeb5d54b24228eb472fb3e2e9ebbd4c5dc20ca0", size = 171659, upload-time = "2026-05-19T23:02:27.842Z" }, + { url = "https://files.pythonhosted.org/packages/10/03/8aeeb7458d22546bf64b5250ca1daeb5ff757d900e8e4a7476c6f0db843e/protobuf-7.35.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:24f857477359a85c0c235261b8ba905fd51b2562f4a64ca1df5473f29850cbf6", size = 433226, upload-time = "2026-06-11T21:55:31.719Z" }, + { url = "https://files.pythonhosted.org/packages/37/4b/dfb89eb0e652a1ff073c39a59fb5e3a83cfe9b57a2c83fa6d78270101767/protobuf-7.35.1-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:11d6b0ec246892d85215b0a13ca6e0233cf5284b68f0ac02646427f4ff88a799", size = 328847, upload-time = "2026-06-11T21:55:34.035Z" }, + { url = "https://files.pythonhosted.org/packages/0f/58/dc12f2cd484951524af6e3382c785869b9b3fb5e52ee95ae23add53ee8f9/protobuf-7.35.1-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:b73f9489a4b8b1c9cb1f8ed951c736392592edb24b9d6819f36d2e10b171d5b4", size = 344030, upload-time = "2026-06-11T21:55:34.941Z" }, + { url = "https://files.pythonhosted.org/packages/e4/be/5b3cfe508bfab6761414ff944e3366eb13be4fd71efcd69450f89ba39f43/protobuf-7.35.1-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:74758715c53d7158fb76caf4f0cfdacc5329a4b1bb994f865d6cf302d413a1c4", size = 327130, upload-time = "2026-06-11T21:55:35.921Z" }, + { url = "https://files.pythonhosted.org/packages/d8/bc/6d6c7ba8709c85f8f2c390b2b118d6fb08a783676a572271851bf45a7d22/protobuf-7.35.1-cp310-abi3-win32.whl", hash = "sha256:353652e4efd0bca5b5fc2656abf8307ef351f0cf938c9eba09f0e09c20a25c30", size = 428945, upload-time = "2026-06-11T21:55:37.034Z" }, + { url = "https://files.pythonhosted.org/packages/0a/19/8d0cb6f20a1ef7b18f1c8986ad5783f22f84cce39c6ce9a6e645ea55192e/protobuf-7.35.1-cp310-abi3-win_amd64.whl", hash = "sha256:230a75ddfc2de4806e56696ce9640c1cdfdb6543b7cfce98d42a4c0a0e7bdb87", size = 439996, upload-time = "2026-06-11T21:55:38.123Z" }, + { url = "https://files.pythonhosted.org/packages/19/c7/5f7c636ec43e0c545e28d1f1db71990108306f7bdcb89f069ba97e428e7f/protobuf-7.35.1-py3-none-any.whl", hash = "sha256:4bc97768d8fe4ad6743c8a19403e314511ed9f6d13205b687e52421c023ac1b9", size = 171659, upload-time = "2026-06-11T21:55:39.155Z" }, ] [[package]] @@ -2278,15 +2278,15 @@ wheels = [ [[package]] name = "python-discovery" -version = "1.4.0" +version = "1.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a6/12/38c1a0b1e64806780c9563e3fc9f6e472251839662587cfbe9bfaf2ae10a/python_discovery-1.4.0.tar.gz", hash = "sha256:eb8bc7daad3c226c147e45bb4e970a1feb1bf4048ee178e6db59e197b8010ce3", size = 68455, upload-time = "2026-05-28T01:15:37.639Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/1a/cbbaf13b730abb0a16b964d984e19f2fe520c21a4dc664051359a3f5a9e7/python_discovery-1.4.2.tar.gz", hash = "sha256:8f3746c4b4968d22afbb97d36e1a0e5b66e6c0f297290f2e95f05b9b8bf18690", size = 70277, upload-time = "2026-06-11T16:10:42.383Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/8d/3d316429f65029532bb1e28ff77b797d86b5ac3915bb44ca4e19aa283d43/python_discovery-1.4.0-py3-none-any.whl", hash = "sha256:26ed78d703e234879a66244c7d4114563fb13ec5cd30a2d1357e5fb4850782da", size = 33217, upload-time = "2026-05-28T01:15:36.573Z" }, + { url = "https://files.pythonhosted.org/packages/1a/82/a70006589557f267f15bd384c0642ad49f0d97b690c3a05b166b9dcbad3b/python_discovery-1.4.2-py3-none-any.whl", hash = "sha256:475803f53b7b2ed6e490e27373f9d8340f7d2eebf9acdaf645d7d714c97bb500", size = 33886, upload-time = "2026-06-11T16:10:41.192Z" }, ] [[package]] @@ -2875,27 +2875,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.16" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a6/bd/5f7ec371001337d8fa61701c186ff8b613ecac1651848c5950f4c4d5f2e9/ruff-0.15.16.tar.gz", hash = "sha256:d05e78d38c78caf020b03789e25106c93017db5a0cb6e2819885018c61343b78", size = 4714267, upload-time = "2026-06-04T16:33:09.974Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/42/53ef1c3953f157956db9bf7861e3bc50b9b887ce93300aa48cdba8336fe6/ruff-0.15.16-py3-none-linux_armv6l.whl", hash = "sha256:6ac3c0b3969cc6cf6b158c4e2f8f682acb58e7d700d8a44b65ecdc72d66ab0b2", size = 10709025, upload-time = "2026-06-04T16:32:51.935Z" }, - { url = "https://files.pythonhosted.org/packages/93/9a/a79159346f19134a956607754e57d8d128f7a4c00f4ad2f7514d224c172c/ruff-0.15.16-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:197c207ed75ffba54a0dec23db4aa939a27a3053073e085e0042433cbdc58e4a", size = 11063550, upload-time = "2026-06-04T16:32:42.24Z" }, - { url = "https://files.pythonhosted.org/packages/bc/72/3ce2ac000a5299ec238e01f51397b3b653c93b077d9b1bfe8715bb895f20/ruff-0.15.16-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3a39fec45ab316cc23e7558f23fea4a70403ddb5648ea9a4a3854a16973d0071", size = 10421345, upload-time = "2026-06-04T16:32:37.251Z" }, - { url = "https://files.pythonhosted.org/packages/b0/c2/cc7fad3ec9169373f5b6a18f1917b91080feec40c3f9658334a1d28e2f03/ruff-0.15.16-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba93191d79003116b95128c9d306e045200fdbd0bccb782b110f3cd1d4abc5cf", size = 10757217, upload-time = "2026-06-04T16:32:54.722Z" }, - { url = "https://files.pythonhosted.org/packages/69/d2/3474009eaa0a65b31fa7152a2fad5e2f050c640ceb1e6b02ee6922e94c82/ruff-0.15.16-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c6ee4b90520630120ef032aa5cc10db483852dff950e78b1d717e2993a61ac8d", size = 10507035, upload-time = "2026-06-04T16:33:05.343Z" }, - { url = "https://files.pythonhosted.org/packages/ca/81/b7ae6ccbd11f0c8dc3d5d67fc4be9b57ff57ca86ba56152021378e1277f2/ruff-0.15.16-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e4215bc938bc3c8215c1472c1aa437e310fee20cd427335fec9d7e609563628", size = 11255291, upload-time = "2026-06-04T16:32:49.49Z" }, - { url = "https://files.pythonhosted.org/packages/d9/e1/46e526f1a7cc90857ce6ddf25fbb77eb6568651ac38d71b033af07076dd5/ruff-0.15.16-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c8d26be963b090f10e29abc8b3e74a2a321f6fa34e02424e30b5af89350ecbb", size = 12124922, upload-time = "2026-06-04T16:33:07.821Z" }, - { url = "https://files.pythonhosted.org/packages/1a/da/5c791b088b596b24d0deb967fa28ae02ad751a140c0b9ea81c5ab915d6c0/ruff-0.15.16-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f198cf4123602a2280ed46c307bcbafe41758d6fee5b456b6b6058ca1514b3b4", size = 11332186, upload-time = "2026-06-04T16:33:02.971Z" }, - { url = "https://files.pythonhosted.org/packages/72/11/5da87abe20047c8962361473923ebb2f62b595250126aadfad8c20649c1e/ruff-0.15.16-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb27515fa6240fb586ae82b901a59e67d24acff86f2190b433dc542fe0435aeb", size = 11373541, upload-time = "2026-06-04T16:32:47.007Z" }, - { url = "https://files.pythonhosted.org/packages/fe/2a/8554754c23a854ae3fd6b507e36ad61ddb121e298c6d5d617dec94ed0f14/ruff-0.15.16-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a267c46ba1593fc26b8eecbea050b39d40c0b6bb7781ee11c90a02cd10032951", size = 11353014, upload-time = "2026-06-04T16:32:34.795Z" }, - { url = "https://files.pythonhosted.org/packages/62/25/62ea41529ec89f742ea3fed9cb1059c72877ec7cf9b9e99ac9cf3294d1d9/ruff-0.15.16-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:528c68f39a91498a8d50e91ff5985df3d105782bab49cc378e73ac26bff083e8", size = 10737467, upload-time = "2026-06-04T16:32:26.348Z" }, - { url = "https://files.pythonhosted.org/packages/90/17/334d3ad9de4d40f9dd58fdd09e35ce64553bb501e2f19a839e2fb6be14fc/ruff-0.15.16-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:7ed55c58950df60589a9a7a5d2f8fa5f54ebd287163be805adfe6ee95a9de123", size = 10521910, upload-time = "2026-06-04T16:32:32.54Z" }, - { url = "https://files.pythonhosted.org/packages/4d/bd/3ac7c6ae77a885c1004b3dda2446ea401768d24f851c14b4ad4b24f6639c/ruff-0.15.16-py3-none-musllinux_1_2_i686.whl", hash = "sha256:d482feaf51512b50f9790ceb417a56a61dd1e9d9bf967662b9ed27c01b34f53a", size = 10979190, upload-time = "2026-06-04T16:32:57.492Z" }, - { url = "https://files.pythonhosted.org/packages/33/d7/609546e6a413c3f216fbf2a50c928f97c80939154f6a0503114094a86191/ruff-0.15.16-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1e15bc8c94513dae2a40cc9ef07c94fdd4ecc9e29dabebeebe170f952322c9e3", size = 11477014, upload-time = "2026-06-04T16:32:44.687Z" }, - { url = "https://files.pythonhosted.org/packages/74/0d/f2cd247ad32633a5c36e97141a2c21b11c6279f7957bc2ff360b1e08fddd/ruff-0.15.16-py3-none-win32.whl", hash = "sha256:580378f7bd4aa25f72e74aa54948a9622f142b1e509521dd10902e886681cc1e", size = 10735541, upload-time = "2026-06-04T16:32:30.145Z" }, - { url = "https://files.pythonhosted.org/packages/8b/9e/02e845ef151b1dee585e55c4739f8e1734ae1d9f1221dff65761c162208b/ruff-0.15.16-py3-none-win_amd64.whl", hash = "sha256:408256017284eddf98fff77b29aa4fb30f586042d535b2d9befc6512f400aaec", size = 11843403, upload-time = "2026-06-04T16:32:39.76Z" }, - { url = "https://files.pythonhosted.org/packages/15/19/016553f86f207450aebebc2b2b5088d086b901cc8186c02ac4284db3bd88/ruff-0.15.16-py3-none-win_arm64.whl", hash = "sha256:8cd61783afb39638a7133ef0d2dfb1e91277593962f81b5a8423eb0b888a6121", size = 11134555, upload-time = "2026-06-04T16:33:00.136Z" }, +version = "0.15.17" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/a9/3abdf488f1bf3d24c699415e454ed554a6350d5d89ce183be1ee0a3361ac/ruff-0.15.17.tar.gz", hash = "sha256:2ec446937fd16c8c4de2674a209cc5af64d9c6f17d21fbf1151054fa0bcf5219", size = 4743346, upload-time = "2026-06-11T17:54:47.663Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/4d/e11259f5da07cb6afb2d074c31bf09da9671993f7329d4f15d2fdc458301/ruff-0.15.17-py3-none-linux_armv6l.whl", hash = "sha256:d9feddb927fc68bd295f5eebc587a7e42cfaf9b65f60ca4a2386febff575da8f", size = 10856677, upload-time = "2026-06-11T17:54:49.533Z" }, + { url = "https://files.pythonhosted.org/packages/29/3e/772d679e1a0dc058e58875bd2c0cb713a0530877b4a76fee3c7966df0d49/ruff-0.15.17-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:25805a226d741c47d274a35ad5c10a7dde175fcddfa511d7cf3da0a21eb3eab7", size = 11223443, upload-time = "2026-06-11T17:55:00.573Z" }, + { url = "https://files.pythonhosted.org/packages/68/58/bd41f7688b2fd5623012605130ed70e60aa7f2244baa3d5066bdd61530c8/ruff-0.15.17-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f6ad73b14c2d18a3bf8ad7cb6974294d7f613a7898604826058e6ac64918ef4d", size = 10566458, upload-time = "2026-06-11T17:55:07.52Z" }, + { url = "https://files.pythonhosted.org/packages/d8/5b/733371013fcf1ec339e477ece6ab42bfe10bdd9bba8ee88a9516aa56bfc0/ruff-0.15.17-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ba0c1e4f95bcb3869d0d30cbd5917071ef2e28665abfec970cdab0492c713ed", size = 10914483, upload-time = "2026-06-11T17:55:05.501Z" }, + { url = "https://files.pythonhosted.org/packages/bd/cc/6f24251cc0252f7239391ccb85833f320efad14ebe5b443943f37ced6332/ruff-0.15.17-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:81647960f10bff57d2e51cadd0c3950fe598400c852863a038720ef5b8cca91e", size = 10647497, upload-time = "2026-06-11T17:54:57.733Z" }, + { url = "https://files.pythonhosted.org/packages/68/dd/0d10c17ce1a1624d6fc3156309c3f834fdb5dfaad026ec90c85684f3990e/ruff-0.15.17-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e01a84ddbc8c16c23055ba3924476850f1bbc1917cebbb9376665a63e74260d", size = 11416967, upload-time = "2026-06-11T17:54:51.461Z" }, + { url = "https://files.pythonhosted.org/packages/2f/91/556bfb156f6144f355e831c23db00b2fc4120f86b3ce81cc5f7fd2df51f3/ruff-0.15.17-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fe9f653152f8f294f9f7e03bf3a453d8b4a27f7a59c78c8666167f2b17b96c", size = 12335770, upload-time = "2026-06-11T17:54:45.793Z" }, + { url = "https://files.pythonhosted.org/packages/88/82/8b5999aa13355e926f06d9f42a32dcca862f623bf0363785ff89d607dffd/ruff-0.15.17-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c0fe88a7676e7a05b73174d4d4a59cb2ac21ff8263583f87a81a6018475a978", size = 11575441, upload-time = "2026-06-11T17:54:32.661Z" }, + { url = "https://files.pythonhosted.org/packages/11/93/f10377bb04109ca0e8cbc483ff1982c54b6d418210041776f93e8cdc7fa9/ruff-0.15.17-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecfc3c7878fff94633ab0348524e093f9ce3243080416dd7d14f8ba400174719", size = 11557614, upload-time = "2026-06-11T17:54:34.698Z" }, + { url = "https://files.pythonhosted.org/packages/c7/a6/eeeae7f7d5493df41649ab3db92f086b2d0a30199e4efdf8e3dd7a033f24/ruff-0.15.17-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:b8461180b22420b1bdc289909410930761629fddf2a5aaf60fae1ab26cedc4c4", size = 11544450, upload-time = "2026-06-11T17:54:39.042Z" }, + { url = "https://files.pythonhosted.org/packages/32/88/5991ce565129a24dd4a00db1254b3b5db2e53018cbe4018ea5a89738e727/ruff-0.15.17-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6eccbe50a038b503e7140b441aa9c7fc8c1f36edf23ebef9f4165c2f28f568b7", size = 10892524, upload-time = "2026-06-11T17:55:09.432Z" }, + { url = "https://files.pythonhosted.org/packages/f5/1d/0fdd248313425f55223968af04b0a42125466a8d88d21c1d99c6af0a51e8/ruff-0.15.17-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:382fc0521025f5a8ad447d8bdd523545d0d7646adb718eb1c2dac5065ec27c0f", size = 10659573, upload-time = "2026-06-11T17:54:36.824Z" }, + { url = "https://files.pythonhosted.org/packages/9e/0e/072e8260deb9461062ce9311ced27a8e541229a6ffd483013dd37661e43e/ruff-0.15.17-py3-none-musllinux_1_2_i686.whl", hash = "sha256:456d41fcd1b2777ad63f09a6e7121d43f7b688bbc76a800c10f7f8fb1f912c3f", size = 11127818, upload-time = "2026-06-11T17:55:03.124Z" }, + { url = "https://files.pythonhosted.org/packages/ab/b4/55060a34163121498014696b5f656db5b8c6963768f227dbf0d76b311073/ruff-0.15.17-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b1a04bcc94ae6194e9db05d16ad31f298a7194bfbcb08258bbe589cee1d587b8", size = 11655901, upload-time = "2026-06-11T17:54:53.562Z" }, + { url = "https://files.pythonhosted.org/packages/49/71/9b29d6b87cef468d697f43c6a91e3fae4a80185779d7d5a4ef27d173439f/ruff-0.15.17-py3-none-win32.whl", hash = "sha256:596065960ab1ff593f744220c9fe6580eda00a95003cffa9f4048bb5b1bf0392", size = 10925574, upload-time = "2026-06-11T17:54:55.723Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b2/8fc77f3723228836fa5d12497eb71c808f83782e10d058d2b15cfa14640b/ruff-0.15.17-py3-none-win_amd64.whl", hash = "sha256:6769e5fa1710b179b92e0bfa5a51735b35baea9013dadb06d5f44cbcf9547084", size = 12058788, upload-time = "2026-06-11T17:54:41.042Z" }, + { url = "https://files.pythonhosted.org/packages/2d/c7/c53e8dbff9c9dc4b7928773421ae294a5d28fcb8dcda1a089579d3a7e510/ruff-0.15.17-py3-none-win_arm64.whl", hash = "sha256:f3be1fbb34bcdfd146240d8fb92a709d4c2c8191348580a3c044ec60fa0b4456", size = 11355275, upload-time = "2026-06-11T17:54:43.635Z" }, ] [[package]] @@ -2944,15 +2944,15 @@ wheels = [ [[package]] name = "starlette" -version = "1.2.1" +version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/25/44/ec35f1b6e83094b997da438a02c8c9b0ade2b1e84cfc48bd4656780760a6/starlette-1.2.1.tar.gz", hash = "sha256:9b9b5ebb992e67d6093741e63c2f59e4f6fff986f81163c087867bd7b924b3f6", size = 2701854, upload-time = "2026-05-31T01:07:51.847Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/e3/7c1dc7381d9f8ab7d854328ebfa884e62cb3f3d8549ddfd37c7814f42afa/starlette-1.3.1.tar.gz", hash = "sha256:05d0213193f2fbaae60e2ecb593b4add4262ad4e46536b54abe36f11a71724e0", size = 2703240, upload-time = "2026-06-12T09:23:11.602Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1c/54/196d0c1db10af76baa4f64894448505d60d3cdf70ef92cbb35f46a4e4c71/starlette-1.2.1-py3-none-any.whl", hash = "sha256:4de0082d08c8f6764a85a54cf1120d6939507a19905c7768acad2a9f875d2b89", size = 73350, upload-time = "2026-05-31T01:07:50.09Z" }, + { url = "https://files.pythonhosted.org/packages/ec/bb/2799cc2ede3ed41131f8975621e7213dfc7ef4acbbaadfa440f32500c370/starlette-1.3.1-py3-none-any.whl", hash = "sha256:c7372aae11c3c3f26a42df7bd626cec2f47d03483d261d369516a615a53714c6", size = 73632, upload-time = "2026-06-12T09:23:10.017Z" }, ] [[package]] @@ -3202,7 +3202,7 @@ wheels = [ [[package]] name = "virtualenv" -version = "21.4.2" +version = "21.4.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, @@ -3211,9 +3211,9 @@ dependencies = [ { name = "python-discovery" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e1/0d/4e93c8e6d1001a75763f87d8f5ecda8ebc7f4aa2153dddfaf4ae8892821a/virtualenv-21.4.2.tar.gz", hash = "sha256:38e6ee0a555615c0ea9da2ac7e9998fe8dc3b911dd33ad8eaad2020957653b0c", size = 7613326, upload-time = "2026-05-31T17:01:22.827Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4b/50/7564c805bb8966d9771caaba8a143fa5e57c848ce4e7fdf2d55a1feb2ead/virtualenv-21.4.3.tar.gz", hash = "sha256:938ff0fd3f4e0f0d3a025f67a3d2f25e3c3aabbcd5857ea6170619138d72d141", size = 7644454, upload-time = "2026-06-11T16:47:04.843Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bf/c4/557dc082be035381b85fdb2b74e21d3d21b57750b74f2b47a32f3a639ff9/virtualenv-21.4.2-py3-none-any.whl", hash = "sha256:854210ca524a1a4d0d744734f4acbc721c3ffe163b85bbf5d56d14d5ae2f0fae", size = 7594079, upload-time = "2026-05-31T17:01:20.735Z" }, + { url = "https://files.pythonhosted.org/packages/a2/8d/84b0d07c6b5f685f85ddf6c87a59d3a8a895a3dfd89e759666fabe951b94/virtualenv-21.4.3-py3-none-any.whl", hash = "sha256:75f4127d4067397c64f38579ce918fec6bf9ca2cd4f48685e82952cc3c035840", size = 7625544, upload-time = "2026-06-11T16:47:01.78Z" }, ] [[package]] From 0401ccd65ce06735da15e0a2d811c309f7609372 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 12 Jun 2026 22:24:25 +0500 Subject: [PATCH 263/318] fix(tests): mock CI gate and bootstrap span in projection job tests --- tests/test_mcp_memory_jobs.py | 7 +++++++ tests/test_memory_jobs_coverage.py | 27 ++++++++++++++++++++++--- tests/test_observability_correlation.py | 5 +++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/tests/test_mcp_memory_jobs.py b/tests/test_mcp_memory_jobs.py index 399304fe..2e72d7a0 100644 --- a/tests/test_mcp_memory_jobs.py +++ b/tests/test_mcp_memory_jobs.py @@ -8,6 +8,8 @@ from pathlib import Path +import pytest + from codeclone.surfaces.mcp.service import CodeCloneMCPService from .memory_fixtures import cli_memory_repo @@ -27,7 +29,12 @@ def test_mcp_manage_memory_projection_rebuild_status(tmp_path: Path) -> None: def test_mcp_manage_memory_enqueue_projection_rebuild_force_via_policy_off( tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, ) -> None: + monkeypatch.setattr( + "codeclone.memory.jobs.workflow.is_ci_environment", + lambda: False, + ) with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): service = CodeCloneMCPService(history_limit=2) payload = service.manage_engineering_memory( diff --git a/tests/test_memory_jobs_coverage.py b/tests/test_memory_jobs_coverage.py index 43fadf2d..badc546a 100644 --- a/tests/test_memory_jobs_coverage.py +++ b/tests/test_memory_jobs_coverage.py @@ -358,7 +358,14 @@ def test_execute_projection_rebuild_status_payload(tmp_path: Path) -> None: assert isinstance(payload["jobs"], list) -def test_execute_enqueue_skips_when_stimulus_unchanged(tmp_path: Path) -> None: +def test_execute_enqueue_skips_when_stimulus_unchanged( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "codeclone.memory.jobs.workflow.is_ci_environment", + lambda: False, + ) with cli_memory_repo(tmp_path, with_draft=False) as (root, project, _store): config = replace( resolve_memory_config(root), @@ -397,7 +404,14 @@ def test_execute_enqueue_skips_when_stimulus_unchanged(tmp_path: Path) -> None: assert payload["reason"] == "stimulus_unchanged" -def test_execute_enqueue_enqueues_with_spawn_disabled(tmp_path: Path) -> None: +def test_execute_enqueue_enqueues_with_spawn_disabled( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "codeclone.memory.jobs.workflow.is_ci_environment", + lambda: False, + ) with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): payload = execute_enqueue_projection_rebuild( root_path=root, @@ -464,7 +478,14 @@ def test_run_projection_jobs_once_completes_pending_job(tmp_path: Path) -> None: assert result.job_id is not None -def test_maybe_auto_enqueue_returns_payload_when_enqueued(tmp_path: Path) -> None: +def test_maybe_auto_enqueue_returns_payload_when_enqueued( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "codeclone.memory.jobs.workflow.is_ci_environment", + lambda: False, + ) with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): config = replace( resolve_memory_config(root), diff --git a/tests/test_observability_correlation.py b/tests/test_observability_correlation.py index ee3740c3..ee232f9f 100644 --- a/tests/test_observability_correlation.py +++ b/tests/test_observability_correlation.py @@ -60,6 +60,11 @@ def test_run_projection_job_links_under_finish( monkeypatch.setattr( worker, "execute_experience_distillation", lambda **_k: {"status": "ok"} ) + monkeypatch.setattr( + worker, + "worker_bootstrap_sample", + lambda: ("2026-01-01T00:00:00.000000Z", 12.5), + ) bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) store = MagicMock() From bbe82ce6c02a696492d62304fe0d647a1fa25345 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 12:54:46 +0500 Subject: [PATCH 264/318] feat(core): rank-preserving RRF fusion for hybrid memory search --- codeclone/memory/retrieval/ranking.py | 31 ++++++++++++++++- codeclone/memory/retrieval/service.py | 50 ++++++++++++++++++++++----- tests/test_semantic_ranking.py | 24 ++++++++++++- tests/test_semantic_search_service.py | 40 +++++++++++++++++++++ 4 files changed, 134 insertions(+), 11 deletions(-) diff --git a/codeclone/memory/retrieval/ranking.py b/codeclone/memory/retrieval/ranking.py index 78ca36ed..fc738fd5 100644 --- a/codeclone/memory/retrieval/ranking.py +++ b/codeclone/memory/retrieval/ranking.py @@ -39,6 +39,11 @@ # Finish-hook module-role drafts are workflow reminders, not durable module # descriptions. Keep them visible while placing substantive memory first. _WORKFLOW_CONTEXT_PENALTY = 0.65 +# Reciprocal Rank Fusion damping constant. 60 is the widely used default; a +# larger K flattens the gap between adjacent ranks. Used by hybrid search to +# fuse the lexical (BM25) and vector rankings without letting metadata boosts +# override a strong retrieval-engine match. +_RRF_K = 60 @dataclass(frozen=True, slots=True) @@ -144,4 +149,28 @@ def relevance_score( return round(score, 4) -__all__ = ["RankingContext", "relevance_score", "retrieval_lane"] +def reciprocal_rank_fusion( + *, lexical_rank: int | None = None, vector_rank: int | None = None +) -> float: + """Fuse a record's lexical (BM25) and vector ranks into one score. + + Each present rank contributes ``1 / (_RRF_K + rank)`` (0-based); a record + missing from a list simply omits that term. Higher is better. This keeps the + retrieval engines' own rank order as the lead signal — the caller adds + curation metadata only as a tie-break — so a strong lexical or vector match + is never buried by metadata boosts. + """ + score = 0.0 + if lexical_rank is not None: + score += 1.0 / (_RRF_K + lexical_rank) + if vector_rank is not None: + score += 1.0 / (_RRF_K + vector_rank) + return score + + +__all__ = [ + "RankingContext", + "reciprocal_rank_fusion", + "relevance_score", + "retrieval_lane", +] diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index a29dfb31..04627932 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -44,7 +44,12 @@ trajectory_subject_keys, ) from .context_coverage import build_context_coverage -from .ranking import RankingContext, relevance_score, retrieval_lane +from .ranking import ( + RankingContext, + reciprocal_rank_fusion, + relevance_score, + retrieval_lane, +) from .semantic import audit_event_row if TYPE_CHECKING: @@ -537,9 +542,17 @@ def _rank_records( context: RankingContext, max_records: int, detail_level: MemoryDetailLevel, - proximity: Mapping[str, float] | None = None, + lexical_ranks: Mapping[str, int] | None = None, + vector_ranks: Mapping[str, int] | None = None, ) -> tuple[list[dict[str, object]], bool]: - proximity_map = proximity or {} + # Fusion mode (hybrid search) supplies the lexical (BM25) and/or vector + # rankings. There the metadata relevance_score is only a deterministic + # tie-break, so the vector signal must NOT also be folded into it via + # semantic_proximity (avoid double-counting). Scoped retrieval supplies + # neither map and keeps relevance_score as the sole ordering signal. + fusion_enabled = lexical_ranks is not None or vector_ranks is not None + lexical_map = lexical_ranks or {} + vector_map = vector_ranks or {} candidate_ids = tuple(record.id for record in candidates) subjects_by_id = store.list_subjects_for_memories(candidate_ids) evidence_counts = store.count_evidence_for_memories(candidate_ids) @@ -552,7 +565,6 @@ def _rank_records( subjects=subjects, context=context, evidence_count=evidence_count, - semantic_proximity=proximity_map.get(record.id, 0.0), ) if score <= 0.0 and (context.scope_paths or context.symbols): continue @@ -560,10 +572,17 @@ def _rank_records( relations = _record_relations( store, project_id=project_id, record_ids=[item[1].id for item in base] ) - scored: list[tuple[float, str, dict[str, object]]] = [] + scored: list[tuple[float, float, str, dict[str, object]]] = [] for score, record, subjects, evidence_count in base: record_relations = relations.get(record.id) adjusted = _apply_conflict_penalty(score, record_relations) + if fusion_enabled: + primary = reciprocal_rank_fusion( + lexical_rank=lexical_map.get(record.id), + vector_rank=vector_map.get(record.id), + ) + else: + primary = adjusted summary = _serialize_record_summary( record=record, subjects=subjects, @@ -574,10 +593,10 @@ def _rank_records( ) if record_relations is not None: summary["relations"] = record_relations - scored.append((adjusted, record.id, summary)) - scored.sort(key=lambda item: (-item[0], item[1])) + scored.append((primary, adjusted, record.id, summary)) + scored.sort(key=lambda item: (-item[0], -item[1], item[2])) truncated = len(scored) > max_records - return [item[2] for item in scored[:max_records]], truncated + return [item[3] for item in scored[:max_records]], truncated def _coverage_summary( @@ -1481,6 +1500,18 @@ def _handle_semantic_search_mode( ) ] context = RankingContext.from_scope(scope_paths=(), symbols=(), blast_dependents=()) + # Reciprocal-rank-fusion inputs: the FTS list is already BM25-ordered, and + # the vector hits are ranked by descending proximity (id breaks ties for + # determinism). _rank_records fuses these and uses metadata only to break + # ties, so a strong lexical/vector match is no longer re-sorted away by + # metadata boosts. + lexical_ranks = {record.id: rank for rank, record in enumerate(fts_records)} + vector_ranks = { + record_id: rank + for rank, record_id in enumerate( + sorted(proximity, key=lambda rid: (-proximity[rid], rid)) + ) + } payload_records, truncated = _rank_records( store, project_id=project_id, @@ -1488,7 +1519,8 @@ def _handle_semantic_search_mode( context=context, max_records=max_results, detail_level=detail_level, - proximity=proximity, + lexical_ranks=lexical_ranks, + vector_ranks=vector_ranks, ) return { "mode": "search", diff --git a/tests/test_semantic_ranking.py b/tests/test_semantic_ranking.py index c7ecf4cd..d9d9ad9b 100644 --- a/tests/test_semantic_ranking.py +++ b/tests/test_semantic_ranking.py @@ -6,7 +6,11 @@ from __future__ import annotations from codeclone.memory.models import MemorySubject -from codeclone.memory.retrieval.ranking import RankingContext, relevance_score +from codeclone.memory.retrieval.ranking import ( + RankingContext, + reciprocal_rank_fusion, + relevance_score, +) from tests.memory_fixtures import make_module_record @@ -47,6 +51,24 @@ def test_default_proximity_matches_explicit_zero() -> None: assert implicit == explicit +def test_rrf_rewards_membership_in_both_lists() -> None: + # A record matched by both engines outranks one matched by a single engine, + # even when the single-engine match sits at the very top (rank 0). + both = reciprocal_rank_fusion(lexical_rank=2, vector_rank=2) + assert both > reciprocal_rank_fusion(lexical_rank=0) + assert both > reciprocal_rank_fusion(vector_rank=0) + + +def test_rrf_rewards_a_better_rank() -> None: + assert reciprocal_rank_fusion(lexical_rank=0) > reciprocal_rank_fusion( + lexical_rank=5 + ) + + +def test_rrf_absent_from_both_lists_is_zero() -> None: + assert reciprocal_rank_fusion() == 0.0 + + def test_scoped_shortcircuit_beats_semantic() -> None: # A scoped query with no contextual subject match must return 0.0 even # with maximal proximity: semantic cannot inject out-of-scope records. diff --git a/tests/test_semantic_search_service.py b/tests/test_semantic_search_service.py index 53415a3b..2b8b24e1 100644 --- a/tests/test_semantic_search_service.py +++ b/tests/test_semantic_search_service.py @@ -195,6 +195,46 @@ def status(self) -> SemanticIndexStatus: assert trajectory_hits == [] +def test_rank_records_lets_rrf_lead_over_metadata(tmp_path: Path) -> None: + from codeclone.memory.retrieval import service as retrieval_service + from codeclone.memory.retrieval.ranking import RankingContext + + with memory_store(tmp_path) as (_root, project, store, _db_path): + # module_role carries a smaller type boost than document_link, so under + # the old metadata-led ordering doc_link would sort first. + lexical_top = seed_module_role( + store, + project_id=project.id, + file_path="codeclone/a.py", + statement="alpha", + ) + meta_rich = seed_document_link( + store, + project_id=project.id, + doc_file="codeclone/b.py", + ref_path="codeclone/b.py", + statement="beta", + ) + context = RankingContext.from_scope( + scope_paths=(), symbols=(), blast_dependents=() + ) + payload, _truncated = retrieval_service._rank_records( + store, + project_id=project.id, + candidates=[lexical_top, meta_rich], + context=context, + max_records=10, + detail_level="compact", + lexical_ranks={lexical_top.id: 0, meta_rich.id: 1}, + vector_ranks={}, + ) + + ids = [item["id"] for item in payload] + # RRF leads: the BM25 rank-0 match wins even though document_link has the + # higher metadata boost. Metadata only breaks ties. + assert ids == [lexical_top.id, meta_rich.id] + + def test_unavailable_index_falls_back_to_fts(tmp_path: Path) -> None: with memory_store(tmp_path) as (root, project, store, db_path): fts = seed_module_role( From 02313e73f0d8546e44496dd8e59ba68a25c94d1b Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 13:18:41 +0500 Subject: [PATCH 265/318] feat(core): lazy FastEmbed model load with resilient embed degradation --- .../memory/embedding/fastembed_provider.py | 52 ++++++++++------- codeclone/memory/retrieval/service.py | 40 ++++++++----- codeclone/memory/semantic/rebuild_workflow.py | 11 ++++ codeclone/surfaces/cli/memory.py | 4 ++ tests/test_cli_memory_semantic.py | 35 +++++++++++ tests/test_semantic_embedding.py | 25 +++++++- tests/test_semantic_rebuild_workflow.py | 58 ++++++++++++++++++- tests/test_semantic_search_service.py | 40 ++++++++++++- 8 files changed, 227 insertions(+), 38 deletions(-) diff --git a/codeclone/memory/embedding/fastembed_provider.py b/codeclone/memory/embedding/fastembed_provider.py index 47a3ce05..70006bee 100644 --- a/codeclone/memory/embedding/fastembed_provider.py +++ b/codeclone/memory/embedding/fastembed_provider.py @@ -7,7 +7,7 @@ from __future__ import annotations import importlib -from collections.abc import Iterable, Sequence +from collections.abc import Callable, Iterable, Sequence from pathlib import Path from typing import Protocol, cast @@ -40,9 +40,16 @@ def __init__( self.dimension = dimension self.cache_dir = cache_dir self.allow_model_download = allow_model_download - self._model = self._load_model() - - def _load_model(self) -> _TextEmbeddingModel: + # Verify the optional package eagerly (cheap) so "extra not installed" + # still fails at construction, but defer the expensive ONNX model load + # (~hundreds of MB / seconds) to the first embed. A provider that is + # built but never embeds — e.g. a semantic query against an index that + # turns out to be unavailable — then costs nothing. Callers degrade + # gracefully when the model is unavailable at embed time. + self._text_embedding = self._resolve_text_embedding() + self._model: _TextEmbeddingModel | None = None + + def _resolve_text_embedding(self) -> Callable[..., object]: try: fastembed = importlib.import_module("fastembed") except ImportError as exc: @@ -55,25 +62,28 @@ def _load_model(self) -> _TextEmbeddingModel: raise MemorySemanticUnavailableError( "fastembed package does not expose TextEmbedding" ) - try: - return cast( - _TextEmbeddingModel, - text_embedding( + return cast("Callable[..., object]", text_embedding) + + def _get_model(self) -> _TextEmbeddingModel: + if self._model is None: + try: + model = self._text_embedding( model_name=self.model_name, cache_dir=str(self.cache_dir), local_files_only=not self.allow_model_download, - ), - ) - except Exception as exc: - mode = ( - "download disabled" - if not self.allow_model_download - else "download allowed" - ) - raise MemorySemanticUnavailableError( - "fastembed embedding model is unavailable " - f"({self.model_name}; {mode}; cache={self.cache_dir}): {exc}" - ) from exc + ) + except Exception as exc: + mode = ( + "download disabled" + if not self.allow_model_download + else "download allowed" + ) + raise MemorySemanticUnavailableError( + "fastembed embedding model is unavailable " + f"({self.model_name}; {mode}; cache={self.cache_dir}): {exc}" + ) from exc + self._model = cast(_TextEmbeddingModel, model) + return self._model def embed(self, texts: Sequence[str]) -> list[list[float]]: return self.embed_documents(texts) @@ -87,7 +97,7 @@ def embed_documents(self, texts: Sequence[str]) -> list[list[float]]: def _embed_prefixed(self, texts: Sequence[str]) -> list[list[float]]: try: - raw_vectors = list(self._model.embed(list(texts))) + raw_vectors = list(self._get_model().embed(list(texts))) except Exception as exc: raise MemorySemanticUnavailableError( f"fastembed embedding failed for model {self.model_name}: {exc}" diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index 04627932..839d4a2c 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -14,7 +14,7 @@ from ...contracts import SEMANTIC_INDEX_FORMAT_VERSION from ..embedding import embed_query from ..enums import LinkRelation, MemoryConfidence, MemoryRecordType, MemoryStatus -from ..exceptions import MemoryContractError +from ..exceptions import MemoryContractError, MemorySemanticUnavailableError from ..experience.models import Experience from ..models import MemoryEvidence, MemoryQuery, MemoryRecord, MemorySubject from ..paths import ( @@ -1439,18 +1439,36 @@ def _handle_semantic_search_mode( match_mode=match_mode, ) status = semantic_index.status() if semantic_index is not None else None + proximity: dict[str, float] = {} + audit_hits: list[SemanticHit] = [] + trajectory_hits: list[SemanticHit] = [] + used_block: dict[str, object] | None = None if ( semantic_index is not None and embedding_provider is not None and status is not None and status.available ): - proximity, audit_hits, trajectory_hits = _semantic_hits( - index=semantic_index, - provider=embedding_provider, - query=statement, - k=max_results, - ) + try: + proximity, audit_hits, trajectory_hits = _semantic_hits( + index=semantic_index, + provider=embedding_provider, + query=statement, + k=max_results, + ) + except MemorySemanticUnavailableError as exc: + # The embedding model loads lazily, so an unavailable model (e.g. + # download disabled and not cached) first surfaces here. Degrade to + # FTS-only with the reason rather than failing the whole query. + semantic_reason = str(exc) + else: + used_block = _semantic_status_block( + status, + used=True, + provider_label=provider_label, + model=embedding_provider.model_id, + ) + if used_block is not None: candidates = _semantic_search_candidates( store, project_id=project_id, @@ -1467,14 +1485,8 @@ def _handle_semantic_search_mode( hits=trajectory_hits, detail_level=detail_level, ) - semantic_block = _semantic_status_block( - status, - used=True, - provider_label=provider_label, - model=embedding_provider.model_id, - ) + semantic_block = used_block else: - proximity = {} candidates = list(fts_records) audit_events = [] trajectories = [] diff --git a/codeclone/memory/semantic/rebuild_workflow.py b/codeclone/memory/semantic/rebuild_workflow.py index 505d8b71..6944508c 100644 --- a/codeclone/memory/semantic/rebuild_workflow.py +++ b/codeclone/memory/semantic/rebuild_workflow.py @@ -174,6 +174,17 @@ def execute_semantic_index_rebuild( project=resolved_project, ), ) + except MemorySemanticUnavailableError as exc: + # The embedding model loads lazily, so an unavailable model surfaces at + # the first embed here rather than at resolve. Report it the same way an + # unresolved provider does instead of letting the rebuild raise. + return { + **base, + **empty, + "status": "unavailable", + "reason": str(exc), + "embedding_model": None, + } finally: close_semantic_index(writer) if owns_store and active_store is not None: diff --git a/codeclone/surfaces/cli/memory.py b/codeclone/surfaces/cli/memory.py index 2425c84d..f0fcf3db 100644 --- a/codeclone/surfaces/cli/memory.py +++ b/codeclone/surfaces/cli/memory.py @@ -1275,6 +1275,10 @@ def _run_semantic_search( limit=max(1, int(args.limit)), preview_chars=DEFAULT_MEMORY_STATEMENT_PREVIEW_CHARS, ) + except MemorySemanticUnavailableError as exc: + # The embedding model loads lazily, so an unavailable model surfaces at + # the first embed rather than at provider resolution. + return _semantic_unavailable(console, f"Semantic search unavailable: {exc}.") finally: if store is not None: store.close() diff --git a/tests/test_cli_memory_semantic.py b/tests/test_cli_memory_semantic.py index a8846aba..84e2ac43 100644 --- a/tests/test_cli_memory_semantic.py +++ b/tests/test_cli_memory_semantic.py @@ -16,6 +16,7 @@ import codeclone.surfaces.cli.memory as cli_memory from codeclone.config.memory import resolve_memory_config from codeclone.memory.embedding import DeterministicHashEmbeddingProvider +from codeclone.memory.exceptions import MemorySemanticUnavailableError from codeclone.memory.models import MemorySubject, generate_memory_id from codeclone.memory.project import resolve_memory_db_path, resolve_project_identity from codeclone.memory.semantic.models import ( @@ -238,6 +239,40 @@ def test_semantic_status_reports_provider_unavailable( assert "available yet" in out.replace("\n", " ") +def test_semantic_search_degrades_when_model_unavailable_at_embed( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, +) -> None: + _seed_semantic_repo( + tmp_path, statement="recover after restart", monkeypatch=monkeypatch + ) + + class _FailingProvider: + model_id = "diagnostic-hash-v1" + dimension = 64 + + def embed(self, texts: Sequence[str]) -> list[list[float]]: + raise MemorySemanticUnavailableError( + "model unavailable (download disabled)" + ) + + # Provider resolves (lazy); the model only fails when the search embeds. + monkeypatch.setattr( + cli_memory, "resolve_embedding_provider", lambda _config: _FailingProvider() + ) + + code = memory_main( + ["semantic", "search", "recover after restart", "--root", str(tmp_path)] + ) + + out = capsys.readouterr().out + assert code != 0 + assert "unavailable" in out.lower() + assert "model unavailable" in out.replace("\n", " ") + assert "Traceback" not in out + + def test_semantic_search_hydrates_and_renders_json( tmp_path: Path, capsys: pytest.CaptureFixture[str], diff --git a/tests/test_semantic_embedding.py b/tests/test_semantic_embedding.py index 4777a463..c05d9d2d 100644 --- a/tests/test_semantic_embedding.py +++ b/tests/test_semantic_embedding.py @@ -181,6 +181,24 @@ def test_fastembed_provider_uses_local_model_cache_and_prefixes( assert provider.embed(["legacy call"]) == [[1.0] * 384] +def test_fastembed_provider_defers_model_load_until_first_embed( + monkeypatch: pytest.MonkeyPatch, +) -> None: + created = _install_fake_fastembed(monkeypatch) + config = SemanticConfig(embedding_provider="fastembed") + + provider = resolve_embedding_provider(config) + # Construction verifies the package but must NOT load the ONNX model yet. + assert created == [] + assert provider.model_id == "fastembed:BAAI/bge-small-en-v1.5" + + embed_query(provider, "first call loads the model") + assert len(created) == 1 + # A second embed reuses the cached model instead of reloading it. + embed_documents(provider, ["reuse the model"]) + assert len(created) == 1 + + def test_fastembed_provider_honors_download_opt_in( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -192,6 +210,8 @@ def test_fastembed_provider_honors_download_opt_in( provider = resolve_embedding_provider(config) assert provider.dimension == 384 + # The download flag is passed when the model loads — i.e. at first embed. + embed_query(provider, "trigger lazy model load") assert [item.local_files_only for item in created] == [False] @@ -221,8 +241,11 @@ def test_fastembed_provider_fails_clear_when_model_unavailable( allow_model_download=allow_model_download, ) + # Resolve succeeds (cheap package check); the model load — and its failure — + # is deferred to the first embed. + provider = resolve_embedding_provider(config) with pytest.raises(MemorySemanticUnavailableError, match=message): - resolve_embedding_provider(config) + embed_query(provider, "boom") def test_fastembed_provider_fails_clear_when_embedding_call_fails( diff --git a/tests/test_semantic_rebuild_workflow.py b/tests/test_semantic_rebuild_workflow.py index 01524a35..9d0ec1ac 100644 --- a/tests/test_semantic_rebuild_workflow.py +++ b/tests/test_semantic_rebuild_workflow.py @@ -5,14 +5,20 @@ # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations +import dataclasses from pathlib import Path import pytest from codeclone.config.memory import resolve_memory_config -from codeclone.memory.exceptions import MemoryContractError +from codeclone.memory.exceptions import ( + MemoryContractError, + MemorySemanticUnavailableError, +) from codeclone.memory.semantic.rebuild_workflow import execute_semantic_index_rebuild +from .memory_fixtures import memory_store + def test_execute_semantic_rebuild_skipped_when_disabled(tmp_path: Path) -> None: config = resolve_memory_config(tmp_path) @@ -76,3 +82,53 @@ def close(self) -> None: with pytest.raises(MemoryContractError, match="database not found"): execute_semantic_index_rebuild(root_path=tmp_path, config=config) assert writer.closed is True + + +def test_execute_semantic_rebuild_unavailable_when_model_fails_at_embed( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + base = resolve_memory_config(tmp_path) + config = dataclasses.replace( + base, + semantic=base.semantic.model_copy( + update={"enabled": True, "embedding_provider": "diagnostic"} + ), + ) + + class _Writer: + def known_ids(self) -> set[str]: + return set() + + def delete(self, ids: object) -> None: + return None + + def upsert(self, rows: object) -> None: + return None + + def close(self) -> None: + return None + + import codeclone.memory.semantic as semantic_pkg + from codeclone.memory.semantic import rebuild_workflow + + monkeypatch.setattr( + semantic_pkg, "resolve_semantic_index_writer", lambda _config: _Writer() + ) + monkeypatch.setattr( + rebuild_workflow, "build_semantic_index_sources", lambda **_kwargs: [] + ) + + def _raise_unavailable(**_kwargs: object) -> object: + # Provider resolves fine (lazy); the model only fails when the rebuild + # actually embeds. + raise MemorySemanticUnavailableError("model unavailable (download disabled)") + + monkeypatch.setattr(rebuild_workflow, "rebuild_semantic_index", _raise_unavailable) + + with memory_store(tmp_path) as (root, project, store, _db_path): + payload = execute_semantic_index_rebuild( + root_path=root, config=config, store=store, project=project + ) + assert payload["status"] == "unavailable" + assert "model unavailable" in str(payload["reason"]) diff --git a/tests/test_semantic_search_service.py b/tests/test_semantic_search_service.py index 2b8b24e1..ca694eb9 100644 --- a/tests/test_semantic_search_service.py +++ b/tests/test_semantic_search_service.py @@ -8,6 +8,8 @@ from collections.abc import Mapping, Sequence from pathlib import Path +from codeclone.memory.embedding import EmbeddingProvider +from codeclone.memory.exceptions import MemorySemanticUnavailableError from codeclone.memory.retrieval import query_engineering_memory from codeclone.memory.semantic.models import SemanticHit, SemanticIndexStatus from codeclone.memory.sqlite_store import SqliteEngineeringMemoryStore @@ -68,6 +70,7 @@ def _search( index: _FakeIndex, audit: Path | None = None, filters: Mapping[str, object] | None = None, + provider: EmbeddingProvider | None = None, ) -> dict[str, object]: return query_engineering_memory( store, @@ -79,7 +82,7 @@ def _search( query=query, semantic=True, semantic_index=index, - embedding_provider=_FakeProvider(), + embedding_provider=provider or _FakeProvider(), provider_label="diagnostic", audit_db_path=audit, filters=filters, @@ -235,6 +238,41 @@ def test_rank_records_lets_rrf_lead_over_metadata(tmp_path: Path) -> None: assert ids == [lexical_top.id, meta_rich.id] +class _FailingProvider: + model_id = "diagnostic-hash-v1" + dimension = 8 + + def embed(self, texts: Sequence[str]) -> list[list[float]]: + raise MemorySemanticUnavailableError("model unavailable (download disabled)") + + +def test_semantic_search_degrades_to_fts_when_model_unavailable(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, db_path): + fts = seed_module_role( + store, + project_id=project.id, + file_path="codeclone/a.py", + statement="alpha beta gamma", + ) + index = _FakeIndex([SemanticHit(source_id=fts.id, source="memory", score=0.9)]) + result = _search( + store, + root=root, + project_id=project.id, + db_path=db_path, + query="alpha", + index=index, + provider=_FailingProvider(), + ) + block = result["semantic"] + assert isinstance(block, dict) + # The lazy model load fails at embed; the query degrades to FTS-only and + # surfaces the reason instead of raising. + assert block["used"] is False + assert "model unavailable" in str(block["reason"]) + assert fts.id in _record_ids(result) + + def test_unavailable_index_falls_back_to_fts(tmp_path: Path) -> None: with memory_store(tmp_path) as (root, project, store, db_path): fts = seed_module_role( From 8f02f95d3028eee969422239fc14d93f4598c2ac Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 13:21:00 +0500 Subject: [PATCH 266/318] docs(changelog): refine the 2.1.0a1 release narrative --- CHANGELOG.md | 251 +++++++++++++++++++++++---------------------------- 1 file changed, 114 insertions(+), 137 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 092aacfe..9622207c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,140 +1,117 @@ -# Changelog - -## [2.1.0a1] - Unreleased - -`2.1.0a1` opens the v2.1 alpha line with the structural change controller, -Engineering Memory with trajectory and experience layers, semantic retrieval, -Platform Observability, and a fully reorganized documentation site. - -### Added - -- **Structural Change Controller.** `start_controlled_change` / - `finish_controlled_change` reduce the agent edit cycle from 7-11 MCP calls - to 3-4. Blast radius projection (`get_blast_radius`), patch contract - verification with profile-aware depth (`check_patch_contract`), citation-based - claim guard (`validate_review_claims`), and deterministic review receipts - (`create_review_receipt`). 32 default agent-visible MCP tools. -- **Change intent lifecycle.** `manage_change_intent`: declare, check, clear, - queue, promote, recover. Renewable ownership leases with - own/recoverable/foreign-active classification. Optional SQLite backend with - configurable retention (default 7 days, max 14 in open source). -- **Engineering Memory.** SQLite store under `.codeclone/memory/` with typed, - evidence-linked repository facts (contracts, decisions, risks, git - provenance). Scoped retrieval via `get_relevant_memory` and - `query_engineering_memory`, FTS5 search, refresh-time staleness engine, - retention-driven vacuum, and MCP auto-sync - (`mcp_sync_policy=bootstrap_if_missing`). Agent-side draft recording with - human-governed promotion (VS Code Memory view or CLI - `codeclone memory approve`). Scope coverage metrics and - `finish_controlled_change(propose_memory=true)` for draft candidates on - accepted patches. -- **Trajectory memory.** Deterministic audit-derived workflow - timelines in Engineering Memory SQLite (`memory trajectory rebuild`), - scoped MCP/CLI retrieval (`trajectories[]`, `trajectory_*` query modes), - optional semantic source, and disabled-by-default local JSONL export profiles. - The `trajectory-v3` projection adds trajectory passports, contract-quality - and complexity scoring, anomaly detection, agent profiles, and dashboard - views. -- **Experience Layer.** Deterministic `experience-v1` advisory patterns are - distilled from canonical trajectories across all outcomes and surfaced through a separate - `experiences[]` retrieval lane with evidence and agent-diversity facets. - Projection jobs can distill experiences automatically, while - `promote_experience` converts a selected pattern into a human-governed - Engineering Memory draft rather than treating it as authority. -- **Semantic retrieval.** Opt-in `[tool.codeclone.memory.semantic]` with - LanceDB sidecar. Local `fastembed` provider (`BAAI/bge-small-en-v1.5`) via - `codeclone[semantic-local]`; `api` provider reserved for Team+; `local_model` - reserved for Enterprise. CLI `codeclone memory semantic status|rebuild|search`, - MCP `query_engineering_memory(mode=search, semantic=true)`. -- **Patch Trail.** Deterministic scope narrative at - `finish_controlled_change`: declared/changed/untouched-in-declared, - boundary-held paths, verification outcome, and audit anchors - (`patch_trail.computed`). Rebuild persists Patch Trail from audit into - Engineering Memory schema **1.6** and the current **`trajectory-v3`** - projection; scoped retrieval exposes `patch_trail_summary`. MCP finish accepts - optional `patch_trail_detail`. -- **Trajectory export enrichment (schema 2).** JSONL export rows now populate - `memory_precedents`, `trajectory_precedents`, `citations`, and - `patch_trail_summary`; export deduplicates superseded projection versions; - rebuild repoints trajectory memory evidence and deletes stale workflow rows. - Claim-validation event core stores bounded `validated_citations`; projection - supplements legacy path facts from stored audit payloads when event core lacks - them. -- **IDE surfaces.** VS Code extension Memory view: draft inbox, approve/reject - UX, QuickPick search, memory-for-active-file, search results webview. IDE - governance channel (`--ide-governance-channel`) with session HMAC attestation. - Workspace session stats and controller audit trail webviews (IDE-only MCP - tools; shared payloads in `codeclone/controller_insights/`). Trajectory - dashboard/detail views expose quality passports, anomalies, agent aggregates, - Patch Trail evidence, and a copyable dashboard brief. -- **Platform Observability.** Opt-in, development-only operation/span telemetry - correlates CLI, MCP, analysis, and projection-worker execution without - affecting canonical reports, gates, baselines, memory facts, or edit - authorization. The local SQLite trace captures RSS/CPU, MCP payload sizes, - database query shapes, pipeline costs, agent context pressure, and avoidable - work. CLI JSON/HTML trace views provide a diagnostic cockpit and waterfall; - MCP exposes the bounded `query_platform_observability` slicer. -- **Cursor plugin** (`plugins/cursor-codeclone/`): six skills, three rules - (including always-on `change-control-gate`), fail-closed `preToolUse` hook - via `codeclone.workspace_intent`, project hook installer with - `enforce_scope` (`python` | `repo`), and a `codeclone-structural-reviewer` - agent definition. -- **CLI controller query modes:** `--blast-radius`, `--patch-verify`, - `--session-stats`, `--audit`. -- **Documentation reorganization.** Book chapters organized 00-26 in thematic - groups. Four integration guide+contract splits merged into single pages - (VS Code, Claude Desktop, Codex, Cursor). Six-tab nav (Home, Get started, - Guides, Reference, Legal & plans, Maintainers). Doc-scope ownership comments - across guide and contract leaves. Dedicated chapters cover trajectory quality, - the Experience Layer, and Platform Observability with cross-linked diagrams. -- **Edition-specific feature tiers** (plans-and-retention): Engineering Memory - limits and retention, semantic provider editions (fastembed/api/local_model), - audit trail retention, and workspace intent registry limits per Open Source / - Team / Enterprise. -- Workspace hygiene tips when `.codeclone/` is not gitignored. -- MCP tool JSON schemas with per-parameter descriptions; `next_tool` hint in - analysis responses. -- Audit trail events for intent lifecycle and token budget tracking. -- Admonition indentation lint (`scripts/lint_admonitions.py`) covered by the - docs build contract test. - -### Changed - -- Default per-project workspace directory moved from `.cache/codeclone/` to - `.codeclone/`. CLI warns when legacy paths are still present. -- Documentation site build migrated from MkDocs to Zensical (`zensical.toml`); - docs workflow runs `uv run --with zensical==0.0.43 zensical build --clean --strict`. -- `pydantic` is now a base dependency. -- LCOM4 cohesion graph excludes Protocol methods and Pydantic - validation/serialization hooks; `computed_field` remains included. -- Repository test coverage gate raised to `>=99%`. - -### Fixed - -- **Memory draft persistence:** `open_memory_db` now uses - `synchronous=FULL` so every commit survives unclean MCP process exit - (SIGKILL, IDE restart, stdio timeout). Intent and audit stores keep - `synchronous=NORMAL` — their writes are frequent and recovery-designed. -- **Memory draft staleness:** refresh-time staleness engine no longer marks - draft records stale before human governance; drafts become subject to - staleness only after promotion to active. -- `finish_controlled_change` hygiene gate blocks only on `missing_evidence` - and `foreign_dirty_overlap`; out-of-scope dirt is advisory - (`accepted_with_external_changes`). Recoverable intents do not grant - foreign attribution. -- `dirty_scope_policy=continue_own_wip` allows resuming own dirty scope when - no foreign overlap. -- Queued foreign intents no longer populate `foreign_dirty_overlaps`. -- Patch verify rejects identical before/after runs for python_structural and - governance_config profiles (`reason: after_run_not_new`). -- Negative `health_delta` surfaces `health_regression_advisory`; Claim Guard - warns on overclaims when `patch_health_delta < 0`. -- MCP doc URLs updated across help topics, plugin READMEs, skill definitions, - and test expectations after book renumber. -- Blast-radius graph core moved to `codeclone/analysis/blast_radius.py` (fixes - CLI-to-MCP import violation). -- `respect_pyproject=false` no longer surfaces golden-fixture clone groups as - false `new` regressions. +Changelog + +[2.1.0a1] - Unreleased + +2.1.0a1 opens the CodeClone 2.1 alpha line with intent-first structural +change control, Engineering Memory, trajectory and experience layers, semantic +retrieval, Platform Observability, native agent integrations, and a reorganized +documentation site. + +Added + +* Structural Change Controller. The new + start_controlled_change / finish_controlled_change workflow reduces the + governed agent edit cycle from 7–11 MCP calls to 3–4. It combines workspace + checks, intent declaration, blast-radius mapping, bounded edit scope, patch + verification, review-claim validation, and deterministic review receipts. + CodeClone now exposes 32 agent-visible MCP tools by default. +* Change-intent lifecycle and multi-agent coordination. + manage_change_intent supports declare, check, clear, queue, promote, and + recover operations. Renewable leases, ownership classification, optional + SQLite coordination, retention, workspace hygiene, and recoverable-intent + handling make concurrent agent work explicit and auditable. +* Engineering Memory. A local SQLite knowledge graph stores typed, + evidence-linked repository facts such as contracts, decisions, risks, test + anchors, prior changes, and git provenance. Agents receive ranked, + scope-aware context through get_relevant_memory and + query_engineering_memory; drafts remain human-governed and can be approved + through the CLI or VS Code Memory view. Memory never authorizes edits or + overrides the canonical report, gates, or Patch Trail. +* Trajectory Memory and Patch Trail. Audit-derived trajectories preserve + agent workflows, declared scope, actual changed paths, verification outcomes, + incidents, citations, and review evidence. The current trajectory-v3 + projection adds quality passports, complexity scoring, anomaly detection, + agent profiles, dashboards, semantic retrieval, and deterministic Patch Trail + summaries. Engineering Memory schema 1.6 persists trajectory and Patch + Trail evidence. +* Experience Layer. Deterministic experience-v1 patterns are distilled + from canonical trajectories across all outcomes and exposed through a + separate advisory retrieval lane. Experiences retain supporting evidence and + agent-diversity facets, but never become authority automatically; + promote_experience creates a human-governed memory draft. +* Semantic memory retrieval. Optional LanceDB-backed hybrid search combines + FTS5/BM25 and vector retrieval using deterministic Reciprocal Rank Fusion. + Local embeddings are available through codeclone[semantic-local] with + BAAI/bge-small-en-v1.5. Semantic indexing is lazy, failure-tolerant, and + eventually consistent rather than synchronously rebuilt after every finish. +* Platform Observability. Opt-in, development-only telemetry traces + CodeClone’s own CLI, MCP, analysis, database, semantic-index, and projection + worker activity. The local observer captures timings, RSS/CPU, MCP payload and + token pressure, DB query counts and shapes, causal worker chains, and costly + no-ops. JSON/HTML views provide a diagnostic cockpit, while + query_platform_observability exposes bounded MCP sections for development + agents. Observability never affects reports, gates, baselines, memory facts, + or edit authorization. +* IDE and agent integrations. The VS Code extension gains Engineering + Memory governance, trajectory dashboards, controller audit views, and + workspace session statistics. Native integrations are available for Claude + Desktop, Codex, and Cursor. The Cursor plugin includes skills, rules, + fail-closed preToolUse enforcement, scoped workspace-intent checks, and a + structural-review agent. +* Controller and diagnostic CLI surfaces. Added blast-radius, patch + verification, session statistics, controller audit, memory trajectory, + anomaly, agent-profile, semantic-search, and Platform Observability commands. +* Documentation and edition model. Documentation is reorganized into a + thematic 00–26 contract book with unified integration guides, dedicated + chapters for the Controller, Engineering Memory, trajectories, Experiences, + and Platform Observability, plus explicit Open Source / Team / Enterprise + retention and capability tiers. +* MCP schemas now include parameter-level descriptions and deterministic + next_tool guidance. Workspace hygiene warnings, audit events, token-budget + tracking, and documentation-contract linting were also added. + +Changed + +* The default project workspace moved from .cache/codeclone/ to + .codeclone/; legacy locations now produce a migration warning. +* Documentation builds now use Zensical with strict, clean builds. +* pydantic is now a base dependency. +* LCOM4 excludes Protocol methods and Pydantic validation/serialization hooks; + computed_field remains part of cohesion analysis. +* Repository test coverage is enforced at >=99%. + +Fixed + +* Durable memory writes. Engineering Memory now uses + synchronous=FULL, preserving committed drafts across unclean MCP process + exits. Intent and audit stores retain recovery-oriented + synchronous=NORMAL. +* Memory lifecycle correctness. Draft records are no longer marked stale + before human promotion. Trajectory rebuilds now deduplicate superseded + projections, repoint evidence, remove stale workflow rows, and preserve + bounded claim-validation citations. +* Workspace hygiene and intent attribution. Finish blocks only on missing + evidence or foreign dirty overlap. Out-of-scope dirt is advisory, + continue_own_wip supports resuming owned work, queued foreign intents no + longer create false overlaps, and recoverable intents do not grant foreign + attribution. +* Patch verification correctness. Identical before/after runs are rejected + for structural and governance profiles. Negative health deltas now surface a + regression advisory, and Claim Guard warns when review text overstates patch + quality. +* Semantic retrieval correctness and cost. Hybrid search now preserves + lexical and vector relevance through RRF instead of allowing metadata ranking + to suppress strong matches. Per-source vector retrieval prevents dense lanes + from crowding out other sources. Embedding providers load lazily, failures + preserve documented fallback behavior, and redundant projection jobs are + coalesced or deferred. +* Architecture and import boundaries. Blast-radius graph logic moved into + codeclone/analysis/blast_radius.py, removing the CLI-to-MCP dependency + violation. +* Regression accuracy. respect_pyproject=false no longer reports + golden-fixture clone groups as false new regressions. Documentation URLs, + plugin references, and contract tests were updated after the documentation + reorganization. ## [2.0.2] - 2026-05-19 From de43e0ae66bc89fcfbe93fb220b9db3178c2fa10 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 13:35:05 +0500 Subject: [PATCH 267/318] fix(mcp): wrap tool handlers in a span so MCP DB-query cost is recorded --- codeclone/surfaces/mcp/server.py | 8 +++++- tests/test_observability_mcp_registrar.py | 34 ++++++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 77b43093..a748ed8e 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -28,6 +28,7 @@ operation, payload_capture_enabled, shutdown, + span, ) from .auth import ( MCP_AUTH_TOKEN_ENV, @@ -216,7 +217,12 @@ def wrapper(*args: object, **kwargs: object) -> object: op.set_request( request_bytes=request_bytes, request_tokens=request_tokens ) - result = func(*args, **kwargs) + # Open a root span around the handler: record_db_query attributes + # SQL to the active span, not the operation. Without this span every + # DB-touching MCP tool (start/finish/get_relevant_memory/manage_*) + # records zero db_queries — the operation has no span to hold them. + with span(name=f"mcp.{tool_name}"): + result = func(*args, **kwargs) if payload_capture_enabled() and isinstance(result, Mapping): response_bytes, response_tokens = measure_payload(result) op.set_response( diff --git a/tests/test_observability_mcp_registrar.py b/tests/test_observability_mcp_registrar.py index a73b1dc1..84f55f2f 100644 --- a/tests/test_observability_mcp_registrar.py +++ b/tests/test_observability_mcp_registrar.py @@ -10,10 +10,11 @@ from collections.abc import Iterator from pathlib import Path +import orjson import pytest from codeclone.config.observability import ObservabilityConfig -from codeclone.observability import bootstrap, shutdown +from codeclone.observability import bootstrap, record_db_query, shutdown from codeclone.observability.store.schema import ( observability_store_path, open_observability_store, @@ -60,6 +61,37 @@ def test_registrar_records_operation_with_payload_sizes(tmp_path: Path) -> None: assert row[4] > row[3] +def test_registrar_attributes_db_queries_to_a_span(tmp_path: Path) -> None: + bootstrap(ObservabilityConfig(enabled=True), session_id="mcp-test") + + def _db_tool(root: str) -> dict[str, object]: + # Emulate the sqlite trace callback firing during the handler's DB work. + record_db_query("SELECT 1") + record_db_query("INSERT INTO t (x) VALUES (1)") + return {"root": root} + + wrapped = _instrument_tool(_db_tool) + try: + wrapped(root=str(tmp_path)) + finally: + shutdown() + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + rows = conn.execute( + "SELECT s.counters_json FROM platform_spans s " + "JOIN platform_operations o ON o.operation_id = s.operation_id " + "WHERE o.name = 'mcp._db_tool'" + ).fetchall() + finally: + conn.close() + # The wrapper opens a root span, so the handler's DB queries are attributed + # to the operation instead of being dropped for lack of an active span. + counters = [orjson.loads(row[0]) for row in rows] + assert sum(c.get("db_queries", 0) for c in counters) == 2 + assert sum(c.get("db_writes", 0) for c in counters) == 1 + + def test_registrar_preserves_signature() -> None: wrapped = _instrument_tool(_sample_tool) # The wrapper exposes the same (resolved) parameters as the original so From ada26b41b178e64fe07026184e7cf724bcea830a Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 14:54:36 +0500 Subject: [PATCH 268/318] feat(integrations): add marketplace install surfaces --- AGENTS.md | 393 ++++++++-- CHANGELOG.md | 7 +- CONTRIBUTING.md | 737 ++++++++++++++---- README.md | 394 ++++------ docs/book/02-architecture-map.md | 5 +- docs/book/10-config-and-defaults.md | 6 +- docs/book/README.md | 9 + docs/book/integrations/claude-code-plugin.md | 85 ++ .../integrations/claude-desktop-bundle.md | 6 +- docs/book/integrations/codex-plugin.md | 7 + docs/book/integrations/cursor-plugin.md | 11 + docs/getting-started.md | 32 +- docs/guide/README.md | 5 +- docs/guide/integrations/claude-code/setup.md | 104 +++ .../integrations/claude-desktop/setup.md | 4 + docs/guide/integrations/codex/setup.md | 14 +- .../integrations/cursor/install-and-skills.md | 45 +- docs/guide/mcp/client-setup.md | 38 +- docs/index.md | 5 +- docs/releasing.md | 65 +- docs/terms-of-use.md | 3 +- .../.claude-plugin/plugin.json | 20 + plugins/claude-code-codeclone/.mcp.json | 10 + plugins/claude-code-codeclone/README.md | 75 ++ .../scripts/launch_mcp.py | 22 + .../skills/codeclone-change-control/SKILL.md | 249 ++++++ .../codeclone-engineering-memory/SKILL.md | 150 ++++ .../skills/codeclone-hotspots/SKILL.md | 61 ++ .../skills/codeclone-review/SKILL.md | 82 ++ plugins/codeclone/README.md | 6 +- plugins/cursor-codeclone/README.md | 25 +- .../README.claude-code.root.md | 39 + scripts/integration_dist/README.codex.root.md | 5 +- .../integration_dist/gitignore.claude-code | 18 + .../marketplace.claude-code.json | 27 + scripts/sync_integrations.py | 17 + tests/test_claude_code_plugin.py | 87 +++ tests/test_codex_plugin.py | 3 +- tests/test_cursor_plugin.py | 11 + tests/test_sync_integrations.py | 57 ++ zensical.toml | 2 + 41 files changed, 2386 insertions(+), 555 deletions(-) create mode 100644 docs/book/integrations/claude-code-plugin.md create mode 100644 docs/guide/integrations/claude-code/setup.md create mode 100644 plugins/claude-code-codeclone/.claude-plugin/plugin.json create mode 100644 plugins/claude-code-codeclone/.mcp.json create mode 100644 plugins/claude-code-codeclone/README.md create mode 100644 plugins/claude-code-codeclone/scripts/launch_mcp.py create mode 100644 plugins/claude-code-codeclone/skills/codeclone-change-control/SKILL.md create mode 100644 plugins/claude-code-codeclone/skills/codeclone-engineering-memory/SKILL.md create mode 100644 plugins/claude-code-codeclone/skills/codeclone-hotspots/SKILL.md create mode 100644 plugins/claude-code-codeclone/skills/codeclone-review/SKILL.md create mode 100644 scripts/integration_dist/README.claude-code.root.md create mode 100644 scripts/integration_dist/gitignore.claude-code create mode 100644 scripts/integration_dist/marketplace.claude-code.json create mode 100644 tests/test_claude_code_plugin.py diff --git a/AGENTS.md b/AGENTS.md index e3f1a544..2271c004 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,21 +1,29 @@ # AGENTS.md — CodeClone (AI Agent Playbook) This document is the **source of truth** for agent operating rules in this repository. -It is optimized for **determinism**, **CI stability**, and **reproducible changes**. +It is optimized for **explicit scope**, **determinism**, **CI stability**, and +**reproducible, human-reviewable changes**. For architecture, module ownership, and runtime behavior, the **current repository code is the source of truth**. If AGENTS.md and code diverge, follow code and update AGENTS.md accordingly. -> Repository goal: maximize **honesty**, **reproducibility**, **determinism**, and **precision** for real‑world CI -> usage. +**CodeClone** is a deterministic **Structural Change Controller** for +AI-assisted Python development. It starts before a diff exists: an agent +declares intent, CodeClone maps the structural blast radius, bounds the edit, +verifies the resulting patch against one canonical report, and leaves an +auditable receipt. + +> Repository goal: make AI-assisted structural change **explicit**, **bounded**, +> **remembered**, and **verifiable** without turning LLM output into truth. --- ## 1) Operating principles (non‑negotiable) -1. **Do not break CI contracts.** - - Treat baseline, analysis cache, canonical report formats, and documented - MCP tool payloads as **public APIs**. +1. **Do not break public contracts.** + - Treat controller workflow semantics, baseline, analysis cache, canonical + report formats, Engineering Memory schemas/governance, documented MCP + payloads, and published client behavior as **public APIs**. - Any contract change must be **versioned**, documented, and accompanied by tests. 2. **Determinism > cleverness.** @@ -46,31 +54,64 @@ If AGENTS.md and code diverge, follow code and update AGENTS.md accordingly. - explicit maintainer approval - Performance alone is never a sufficient reason to change fingerprint semantics. +7. **Control starts before the diff.** + - For repository edits, declare intent and scope before editing. + - `edit_allowed=true` is the authoritative permission signal when the + change-control surface is available. + - Blast radius, do-not-touch boundaries, actual changed files, patch + verification, and the review receipt are part of the change contract. + +8. **Agent-authored code requires human ownership.** + - CodeClone accepts code written with agents and language models. + - A human contributor must inspect and understand the complete diff, verify + tests/contracts/security/provenance, and be able to maintain it. + - Substantive human review is mandatory before merge. Agent-only review, + automated approval, or green CI does not satisfy this requirement. + - Material agent assistance must be disclosed in the pull request. + --- ## 2) Quick orientation -CodeClone provides structural code quality analysis for Python. It supports: - -- **function clones** (strongest signal) -- **block clones** (sliding window of statements, may be noisy on boilerplate) -- **segment clones** (report-only unless explicitly gated) - -Key artifacts: - -- `codeclone.baseline.json` — trusted baseline snapshot (for CI comparisons) -- `.codeclone/cache.json` — analysis cache (integrity-checked) -- `.codeclone/report.html|report.json|report.md|report.sarif|report.txt` — reports +CodeClone controls structural change through this deterministic lifecycle: + +1. declare intent and allowed scope; +2. inspect blast radius, review context, and do-not-touch boundaries; +3. make the bounded edit only after permission is granted; +4. reconcile actual changed files with declared scope; +5. verify structural deltas and review claims against one canonical report; +6. leave an auditable receipt and Patch Trail evidence. + +The controller is built on one deterministic structural analysis. The canonical +report includes function/block/segment clones, structural findings, quality +metrics, coverage and API-surface joins, baseline-aware novelty, and health +signals. CLI, reports, MCP, IDEs, plugins, and CI project the same facts. + +Key state and surfaces: + +- `codeclone.baseline.json` — trusted comparison snapshot for baseline-aware CI +- `.codeclone/cache.json` — integrity-checked analysis optimization, never truth +- `.codeclone/report.html|report.json|report.md|report.sarif|report.txt` — + deterministic projections of the canonical report +- `.codeclone/intents/` or configured SQLite registry — ephemeral, + lease/TTL-bound workspace coordination, never analysis truth +- `.codeclone/db/audit.sqlite3` — optional passive controller evidence +- `.codeclone/memory/engineering_memory.sqlite3` — governed Engineering Memory + with FTS, trajectory, Patch Trail, Experience, and projection-job state +- `.codeclone/memory/semantic_index.lance` — optional semantic sidecar +- `.codeclone/db/platform_observability.sqlite3` — opt-in local diagnostics for + CodeClone itself; never repository quality evidence or a gate input - `codeclone-mcp` — optional MCP server: read-only with respect to source - files, baselines, canonical reports, and analysis cache; stateful only for - session-local review/controller state, ephemeral workspace intent - coordination, and optional audit trail (install via `codeclone[mcp]`) + files, baselines, canonical/generated reports, and analysis cache; explicit + controller, audit, memory, projection, and observability contracts may write + only their documented bounded local state (install via `codeclone[mcp]`) - `extensions/vscode-codeclone/` — stable VS Code extension as a native, read-only IDE client over `codeclone-mcp` - `extensions/claude-desktop-codeclone/` — stable Claude Desktop `.mcpb` bundle as a local install wrapper over `codeclone-mcp` +- `plugins/claude-code-codeclone/` — stable Claude Code plugin source, synchronized to the public + `orenlab/codeclone-claude-code` marketplace with bundled MCP configuration and CodeClone skills - `plugins/codeclone/` + `.agents/plugins/marketplace.json` — stable Codex plugin as a native local discovery layer - over `codeclone-mcp`, with bundled CodeClone skills under `plugins/codeclone/skills/` (`codeclone-review`, - `codeclone-hotspots`, `codeclone-change-control`) + over `codeclone-mcp`, with bundled CodeClone skills under `plugins/codeclone/skills/` - `plugins/cursor-codeclone/` — stable Cursor plugin as a native local discovery layer over `codeclone-mcp`, with bundled skills, rules, hooks, and an agent definition - MCP runs are in-memory only. Review markers are session-local. Change intent @@ -83,16 +124,30 @@ Key artifacts: --- -## 3) One command to validate your change +## 3) Validation stages -Run these locally before proposing changes: +The installed `pre-commit` stage runs hygiene checks, Ruff, Mypy, +baseline-aware `codeclone . --ci`, and the docs admonition fixer: ```bash uv run pre-commit run --all-files ``` -Full `pytest` runs enforce package coverage `>=99%` (`fail_under` in -`pyproject.toml`; CI uses `--cov-fail-under=99`). +This does **not** run the `pre-push` pytest hook. Run it explicitly before +pushing: + +```bash +uv run pre-commit run --hook-stage pre-push --all-files +``` + +The pre-push hook and CI enforce package coverage `>=99%`: + +```bash +uv run pytest -q --cov=codeclone --cov-report=term-missing --cov-fail-under=99 +``` + +Hooks may rewrite files. Inspect `git diff` again afterward. Never use +`--no-verify` to bypass a failing hook. If you touched baseline/cache/report contracts or CLI/MCP audit surfaces, also exercise the CLI audit path (`--audit` / `codeclone/surfaces/cli/audit.py`) or the relevant audit/MCP tests. @@ -108,6 +163,17 @@ If you touched the MCP surface, also run: uv run pytest -q tests/test_mcp_service.py tests/test_mcp_server.py ``` +If you touched Engineering Memory, semantic retrieval, trajectories, +Experiences, or projection jobs, run the nearest owning modules, including the +applicable `tests/test_memory_*.py`, `tests/test_semantic_*.py`, and MCP memory +contract tests. + +If you touched Platform Observability, also run: + +```bash +uv run pytest -q tests/test_observability_*.py +``` + If you touched the VS Code extension surface, also run: ```bash @@ -146,22 +212,61 @@ python3 -m json.tool .agents/plugins/marketplace.json >/tmp/codeclone-codex-mark uv run pytest -q tests/test_codex_plugin.py ``` +If you touched the Claude Code plugin surface, also run: + +```bash +python3 -m json.tool plugins/claude-code-codeclone/.claude-plugin/plugin.json >/tmp/codeclone-claude-code-plugin.json +python3 -m json.tool plugins/claude-code-codeclone/.mcp.json >/tmp/codeclone-claude-code-mcp.json +python3 -m json.tool scripts/integration_dist/marketplace.claude-code.json >/tmp/codeclone-claude-code-marketplace.json +claude plugin validate plugins/claude-code-codeclone +uv run pytest -q tests/test_claude_code_plugin.py +``` + +If you touched the Cursor plugin surface, also run: + +```bash +uv run pytest -q tests/test_cursor_plugin.py tests/test_cursor_plugin_hooks.py +``` + +If you touched the GitHub Action helpers, also run: + +```bash +uv run pytest -q tests/test_github_action_helpers.py +``` + +If you touched `scripts/sync_integrations.py`, +`scripts/integration_dist/*`, or integration distribution layouts, also run: + +```bash +uv run pytest -q tests/test_sync_integrations.py +``` + --- ## 4) Baseline contract (v2, stable) ### Versioned constants (single source of truth) -All schema/version constants live in `codeclone/contracts/__init__.py`. **Always read them from code, never copy -from another doc.** Current values (verified at write time): - -| Constant | Source | Current value | -|-----------------------------------|-----------------------------------|---------------| -| `BASELINE_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `2.1` | -| `BASELINE_FINGERPRINT_VERSION` | `codeclone/contracts/__init__.py` | `1` | -| `CACHE_VERSION` | `codeclone/contracts/__init__.py` | `2.8` | -| `REPORT_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `2.11` | -| `METRICS_BASELINE_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `1.2` | +Cross-surface schema/version constants live in +`codeclone/contracts/__init__.py`; subsystem-local wire versions may live with +their owning modules. **Always read values from code, never copy from another +doc.** Current central values (verified at write time): + +| Constant | Current value | +|-----------------------------------------|-----------------| +| `BASELINE_SCHEMA_VERSION` | `2.1` | +| `BASELINE_FINGERPRINT_VERSION` | `1` | +| `CACHE_VERSION` | `2.8` | +| `REPORT_SCHEMA_VERSION` | `2.11` | +| `METRICS_BASELINE_SCHEMA_VERSION` | `1.2` | +| `ENGINEERING_MEMORY_SCHEMA_VERSION` | `1.6` | +| `SEMANTIC_INDEX_FORMAT_VERSION` | `1` | +| `PATCH_TRAIL_SCHEMA_VERSION` | `1` | +| `PLATFORM_OBSERVABILITY_SCHEMA_VERSION` | `1.0` | +| `TRAJECTORY_PROJECTION_VERSION` | `trajectory-v3` | +| `TRAJECTORY_QUALITY_SCORE_VERSION` | `2` | +| `EXPERIENCE_DISTILLATION_VERSION` | `experience-v1` | +| `IDE_GOVERNANCE_PROTOCOL_VERSION` | `2` | When updating any doc that mentions a version, re-read `codeclone/contracts/__init__.py` first. Do not derive versions from another document. @@ -254,11 +359,20 @@ Reports come in: MCP is a separate optional interface, not a report format. It must remain read-only with respect to repository source, baselines, canonical reports, -generated reports, and analysis cache. Session-local controller state, -workspace intent records, and audit trail are allowed only through explicit -controller/audit contracts. Workspace intent registry files under -`.codeclone/intents/` are advisory coordination state only, not analysis -cache or report truth. +generated reports, and analysis cache. Explicit controller/developer contracts +may maintain bounded local state: + +- session-local runs and review markers; +- ephemeral workspace intent coordination; +- optional controller audit evidence; +- governed Engineering Memory drafts and projection metadata; +- opt-in Platform Observability telemetry. + +These writes must use their owning controller, memory, audit, projection, or +observability contract. They must never alter canonical report identity, +baseline trust, cache compatibility, findings, gates, or edit authorization. +Workspace intent registry files under `.codeclone/intents/` are advisory +coordination state only, not analysis cache or report truth. For file edits, agents should prefer the workflow tools `start_controlled_change` and `finish_controlled_change` — they aggregate @@ -332,23 +446,31 @@ No UI-only heuristics that affect gating. ## 8) How to propose changes (agent workflow) For repository edits, follow `CLAUDE.md` / the active CodeClone change-control -skill first. This section describes what to report after the controlled change, -not a replacement workflow. +skill first. No edit begins until `start_controlled_change` returns +`edit_allowed=true`. Retrieve relevant memory after scope authorization, keep +the patch inside declared boundaries, verify with the profile selected by +`finish_controlled_change`, and leave a receipt. This section describes what to +report around that controlled change, not a replacement workflow. When you implement something: 1. **State the intent** (what user-visible issue does it solve?) -2. **List files touched** and why. -3. **Call out contracts affected**: +2. **Declare allowed files, related context, and forbidden paths.** +3. **Inspect blast radius, review context, and do-not-touch boundaries.** +4. **List actual files touched** and why. +5. **Call out contracts affected**: - baseline / cache / report schema - - CLI exit codes / messages -4. **Add/adjust tests** for: + - controller / memory / observability / MCP payloads + - CLI exit codes / messages / integration surfaces +6. **Add/adjust tests** for: - normal-mode behavior - CI gating behavior - determinism (identical output on rerun) - legacy/untrusted scenarios where applicable -5. Run: +7. **Run**: - `ruff`, `mypy`, `pytest` +8. **Request substantive human review** of the complete diff. Automated + analysis, agent review, receipts, and green CI are evidence, not approval. Avoid changing unrelated files (locks, roadmap) unless required. @@ -409,10 +531,17 @@ Before cutting a release: - Don’t embed suppressions into baseline unless explicitly designed as a versioned contract. - Don’t introduce nondeterministic ordering (dict iteration, set ordering, filesystem traversal without sort). - Don’t make the base `codeclone` install depend on optional MCP runtime packages. +- Don’t edit before the controller authorizes the declared scope when the + change-control surface is available. - Don’t let MCP mutate source files, baselines, canonical reports, generated - reports, or analysis cache data. Ephemeral controller state and audit trail - are allowed only through explicit controller/audit contracts. + reports, or analysis cache data. Bounded controller, memory, projection, + audit, and observability state is allowed only through explicit owning + contracts. - Don’t let MCP re-synthesize design findings from raw metrics; read canonical `findings.groups.design` only. +- Don’t let Engineering Memory, trajectories, Experiences, or Platform + Observability authorize edits or override canonical report facts. +- Don’t describe agent review, receipts, or automated checks as the mandatory + human review required for merge. --- @@ -420,6 +549,12 @@ Before cutting a release: Architecture is layered, but grounded in current code (not aspirational diagrams): +- **Structural Change Controller** (`codeclone/surfaces/mcp/_session_workflow_mixin.py`, + intent/blast-radius/patch-contract/receipt helpers under + `codeclone/surfaces/mcp/`, `codeclone/analysis/blast_radius.py`, + `codeclone/budget/*`) owns pre-edit scope authorization, deterministic blast + radius, patch verification, claim validation, and review receipts over + canonical report facts. - **CLI entry + orchestration surface** (`codeclone/main.py`, `codeclone/surfaces/cli/*`, `codeclone/ui_messages/*`) owns argument parsing, runtime/config resolution, summaries, report writes, and exit routing. User-facing copy lives in `ui_messages/` submodules (`help`, `labels`, `runtime`, @@ -444,18 +579,40 @@ Architecture is layered, but grounded in current code (not aspirational diagrams facts. HTML is render-only. - **MCP agent interface** (`codeclone/surfaces/mcp/*`, `codeclone/surfaces/mcp/messages/*`) exposes the same pipeline/report contracts as a deterministic MCP surface for AI agents and MCP-capable clients, - read-only with respect to repository artifacts and stateful only for session-local controller/review state, - workspace intent coordination, and optional audit. + read-only with respect to source/baseline/report/cache artifacts and stateful + only through explicit controller, memory, projection, audit, and observability + contracts. +- **Engineering Memory** (`codeclone/memory/*`, `codeclone/config/memory*.py`) + owns the local evidence-linked store, FTS/semantic retrieval, staleness, + governance, trajectory and Patch Trail projection, Experience distillation, + and coalesced projection jobs. It guides agents but never authorizes edits. +- **Platform Observability** (`codeclone/observability/*`) owns opt-in local + operation/span telemetry, normalized SQL fingerprints, bounded query + projections, and self-contained JSON/HTML diagnostics for CodeClone + development. It is never repository quality truth or a gate input. +- **Controller insights** (`codeclone/controller_insights/*`) owns shared + session-stat and audit-trail projections used by CLI and IDE-only MCP tools. - **Audit trail** (`codeclone/audit/*`) stores optional passive evidence (SQLite by default via `codeclone/surfaces/cli/audit.py` / MCP audit emit). It must not affect canonical report digests, baseline trust, cache compatibility, or finding identity. - **Patch budget helpers** (`codeclone/budget/*`) provide shared budget estimation for CLI/MCP patch-verify flows. - **Documentation/publishing surface** (`docs/`, `zensical.toml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) publishes contract docs and the live sample report. +- **Developer/release scripts** (`scripts/lint_admonitions.py`, + `scripts/sync_integrations.py`, `scripts/integration_dist/*`, + `scripts/launch_mcp`) provide docs hygiene, storefront synchronization, and + launcher adapters. They must remain thin and contract-tested. +- **GitHub Action surface** (`.github/actions/codeclone/*`) packages the public + composite Action over the same CLI contracts; shell inputs, timeouts, outputs, + and exit behavior are contract-sensitive. - **VS Code extension surface** (`extensions/vscode-codeclone/*`) is a native, workspace-only IDE client over `codeclone-mcp`, with baseline-aware, triage-first, source-first review UX. - **Claude Desktop bundle surface** (`extensions/claude-desktop-codeclone/*`) is a native `.mcpb` install wrapper for Claude Desktop that launches the same local `codeclone-mcp` server via local `stdio`. +- **Claude Code plugin surface** (`plugins/claude-code-codeclone/*`, + `scripts/integration_dist/marketplace.claude-code.json`) is a native + marketplace plugin over `codeclone-mcp`, with bundled skills and MCP + configuration synchronized to `orenlab/codeclone-claude-code`. - **Codex plugin surface** (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) is a native local Codex plugin over `codeclone-mcp`, with repo-local discovery metadata and bundled skills under `plugins/codeclone/skills/`. - **Cursor plugin surface** (`plugins/cursor-codeclone/*`) is a native local Cursor plugin over `codeclone-mcp` with @@ -464,13 +621,22 @@ Architecture is layered, but grounded in current code (not aspirational diagrams Non-negotiable interpretation: +- The Controller begins before the diff; intent/scope/blast radius are not + post-hoc review annotations. - Core produces facts; renderers present facts. - Baseline/cache are persistence contracts, not analysis truth. - UI/report must not invent gating semantics. - MCP reuses pipeline/report contracts and must not create a second analysis truth path. +- Engineering Memory, trajectories, Experiences, and Patch Trail are + evidence/context layers, not edit authorization or analysis truth. +- Platform Observability describes CodeClone execution cost, not repository + quality, vulnerabilities, or permission. - The VS Code extension is a guided IDE view over MCP and must not introduce a second analysis or truth path. - The Claude Desktop bundle is a local setup surface over `codeclone-mcp` and must not introduce a second server or truth path. +- The Claude Code plugin is a discovery and guidance surface over + `codeclone-mcp` and must not introduce a second analyzer, MCP server, or + truth path. - The Codex plugin is a local discovery and guidance surface over `codeclone-mcp` and must not introduce a second analyzer, MCP server, or truth path. - The Cursor plugin is a local discovery and guidance surface over `codeclone-mcp` and must not introduce a second @@ -481,6 +647,9 @@ Non-negotiable interpretation: Use this map to route changes to the right owner module. - `codeclone/main.py` — public CLI entrypoint only. Keep it tiny. +- `codeclone/analysis/blast_radius.py` — deterministic dependency/blast-radius + graph core shared by CLI/MCP controller projections; keep it independent from + MCP session policy. - `codeclone/surfaces/cli/workflow.py` — top-level CLI orchestration and exit routing. Add CLI control flow here, not in `main.py`. - `codeclone/surfaces/cli/*` — CLI support slices (startup, runtime, execution, post-run handling, summaries, @@ -517,12 +686,14 @@ Use this map to route changes to the right owner module. - `codeclone/report/gates/*` — metric-gate reason derivation over canonical metrics state. - `codeclone/report/*.py` (other modules) — deterministic report support slices such as explainability, suggestions, merge, overview, findings helpers, and source-kind routing. -- `codeclone/memory/*` — Engineering Memory store (`sqlite_store.py`), init ingestion (`extractors.py`, `runner.py`), - scoped retrieval (`retrieval/`), project identity, and CLI/MCP read surfaces. Memory mutations go through memory - tools only — not the general edit workflow. +- `codeclone/memory/*` — Engineering Memory persistence, ingest, scoped + retrieval, semantic sidecar, governance, trajectories, Patch Trail, + Experiences, and projection jobs. Memory mutations go through explicit memory + tools/workflows only — never the general source-edit workflow. - `codeclone/surfaces/mcp/service.py` — typed, in-process MCP service over the current pipeline/report contracts; - keep it deterministic and read-only except for session-local state and - ephemeral workspace intent records under `.codeclone/intents/`. + keep source/baseline/report/cache access read-only. Local mutations are + limited to documented controller, memory, projection, audit, and + observability contracts. - `codeclone/surfaces/mcp/server.py` — optional MCP launcher/server wiring, transport config, and MCP tool/resource registration; keep dependency loading lazy so base installs/CI do not require MCP runtime packages. - `codeclone/surfaces/mcp/messages/*` — MCP user-facing copy (tool/resource descriptions, help topics, workflow and @@ -530,6 +701,12 @@ Use this map to route changes to the right owner module. policy centralized like `ui_messages/`. - `codeclone/audit/*` — audit event schema, validation, writer/reader; passive evidence only. - `codeclone/budget/*` — patch/token budget estimation shared by CLI and MCP surfaces. +- `codeclone/controller_insights/*` — shared session-stats and audit-trail + collectors; CLI and IDE projections must reuse these rather than duplicating + insight semantics. +- `codeclone/observability/*` — developer-only instrumentation, local telemetry + persistence, bounded query views, and JSON/HTML rendering. It must remain + independent from findings, gates, baselines, memory facts, and authorization. - `tests/test_mcp_service.py`, `tests/test_mcp_server.py` — MCP contract and integration tests; run these when touching any MCP surface. - `codeclone/contracts/*` — version constants, schema types, exit enum, URLs, and typed exceptions. Treat as contract @@ -543,10 +720,23 @@ Use this map to route changes to the right owner module. gate prefixes). - `docs/`, `zensical.toml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py` — docs-site source, publication workflow, and live sample-report generation; keep published docs aligned with code contracts. +- `scripts/lint_admonitions.py` — deterministic MkDocs admonition/details + indentation validator/fixer used by pre-commit. +- `scripts/sync_integrations.py` + `scripts/integration_dist/*` — guarded + storefront synchronization and distribution overlays. Dry-run first; test + with `tests/test_sync_integrations.py`. +- `scripts/launch_mcp` — monorepo adapter to the shared Codex plugin launcher, + not an independent launcher implementation. +- `.github/actions/codeclone/*` — composite GitHub Action surface; pass inputs + through `env:`, keep subprocess timeouts explicit, and preserve documented + CLI/output semantics. - `extensions/vscode-codeclone/*` — stable VS Code extension surface; keep it baseline-aware, triage-first, source-first, and faithful to MCP/canonical report semantics rather than building a second analyzer or report model. - `extensions/claude-desktop-codeclone/*` — stable Claude Desktop bundle surface; keep it local-stdio-only, launcher-focused, and faithful to `codeclone-mcp` rather than re-implementing MCP semantics in the bundle layer. +- `plugins/claude-code-codeclone/*` — stable Claude Code plugin source; keep it + Claude-native, marketplace-installable, skills-guided, and faithful to + `codeclone-mcp` rather than inventing plugin-only analysis logic. - `plugins/codeclone/*`, `.agents/plugins/marketplace.json` — stable Codex plugin surface; keep it Codex-native, conservative-first, skills-guided, and faithful to `codeclone-mcp` rather than inventing plugin-only analysis logic. - `plugins/cursor-codeclone/*` — stable Cursor plugin surface; keep it Cursor-native, skills/rules/hooks-guided, and @@ -569,6 +759,9 @@ Dependency direction is enforceable and partially test-guarded (`tests/test_arch - `codeclone.memory.*` may import `codeclone.contracts`, `codeclone.utils`, blast-radius helpers under `codeclone/analysis/`, and report document types as needed for ingestion. It must NOT import `codeclone.surfaces.*` or `codeclone.ui_messages`. +- `codeclone.observability.*` is diagnostics-only and must not become a + dependency that changes analysis, findings, gates, baselines, memory facts, + or authorization. Operational rules: @@ -579,6 +772,8 @@ Operational rules: - Persistence semantics (baseline/cache trust/integrity) must stay in persistence/domain modules, not in render/UI layers. - MCP may depend on pipeline/report/contracts, but core/persistence/report layers must not depend on MCP modules. +- Controller insights are shared projections; CLI/MCP render them but must not + fork their collection semantics. ## 15) Suppression policy @@ -603,21 +798,27 @@ Prefer explicit inline suppressions for runtime/dynamic false positives instead If you change a contract-sensitive zone, route docs/tests/approval deliberately. -| Change zone | Must update docs | Must update tests | Explicit approval required when | Contract-change trigger | -|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------| -| Baseline schema/trust/integrity (`codeclone/baseline/clone_baseline.py`, `codeclone/baseline/trust.py`) | `docs/book/07-baseline.md`, `docs/book/24-compatibility-and-versioning.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_baseline.py`, CI/CLI behavior tests (`tests/test_cli_inprocess.py`, `tests/test_cli_unit.py`) | schema/trust semantics, compatibility windows, payload integrity logic change | baseline key layout/status semantics/compat rules change | -| Cache schema/profile/integrity (`codeclone/cache/store.py`, `codeclone/cache/versioning.py`, `codeclone/cache/integrity.py`) | `docs/book/08-cache.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_cache.py`, pipeline/CLI cache integration tests | cache schema/status/profile compatibility semantics change | cache payload/version/status semantics change | -| Canonical report JSON shape (`codeclone/report/document/*`, report projections) | `docs/book/05-report.md` (+ `docs/book/06-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | -| CLI flags/help/exit behavior (`codeclone/main.py`, `codeclone/surfaces/cli/*`, `codeclone/config/*`, `codeclone/contracts/*`) | `docs/book/11-cli.md`, `docs/book/09-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | -| Fingerprint-adjacent analysis (`codeclone/analysis/units.py`, `codeclone/analysis/_module_walk.py`, `codeclone/analysis/cfg.py`, `codeclone/analysis/normalizer.py`, `codeclone/findings/clones/grouping.py`) | `docs/book/03-core-pipeline.md`, `docs/book/04-cfg-semantics.md`, `docs/book/24-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | -| Suppression semantics/reporting (`codeclone/analysis/suppressions.py`, `codeclone/analysis/_module_walk.py` dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/17-dead-code-contract.md`, `docs/book/05-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | -| MCP interface (`codeclone/surfaces/mcp/*`, packaging extra/launcher) | `README.md`, `docs/book/25-mcp-interface/`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/book/24-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, `tests/fixtures/contract_snapshots/mcp_tool_schemas.json`, plus CLI/package tests if launcher/install semantics change | tool/resource shapes, workflow tool payloads, repository-read-only semantics, optional-dependency packaging behavior change | public MCP tool names, workflow tool payloads, resource URIs, launcher/install behavior, or response semantics change | -| Engineering Memory (`codeclone/memory/*`, `codeclone/config/memory*.py`, memory CLI/MCP tools) | `docs/book/13-engineering-memory/`, `docs/guide/mcp/workflows/memory-recipes.md`, `docs/book/25-mcp-interface/`, `docs/book/11-cli.md`, `CLAUDE.md`, plugin skills, `CHANGELOG.md` | `tests/test_memory_*.py`, MCP memory tool tests in `tests/test_mcp_service.py`, contract snapshots when tool schemas change | schema version bumps, store contract semantics, governance transitions (18.4+) | `ENGINEERING_MEMORY_SCHEMA_VERSION`, MCP memory tool payloads, CLI memory commands, or SQLite DDL change | -| VS Code extension surface (`extensions/vscode-codeclone/*`) | `README.md`, `docs/guide/integrations/vscode/setup.md`, `docs/book/integrations/vs-code-extension.md`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `node --check extensions/vscode-codeclone/src/support.js`, `node --check extensions/vscode-codeclone/src/mcpClient.js`, `node --check extensions/vscode-codeclone/src/extension.js`, `node --test extensions/vscode-codeclone/test/*.test.js`, plus local extension-host smoke and package smoke when surface/manifest/assets change | command/view UX, trust/runtime model, source-first review flow, or packaging metadata change | documented commands/views/setup/trust behavior, packaged assets, or publish metadata change | -| Claude Desktop bundle surface (`extensions/claude-desktop-codeclone/*`) | `docs/guide/integrations/claude-desktop/setup.md`, `docs/book/integrations/claude-desktop-bundle.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `node --check extensions/claude-desktop-codeclone/server/index.js`, `node --check extensions/claude-desktop-codeclone/src/launcher.js`, `node --check extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs`, `node --test extensions/claude-desktop-codeclone/test/*.test.js`, plus `.mcpb` build smoke | bundle install/runtime model, launcher UX, local-stdio constraints, or bundle metadata change | documented Claude Desktop install/setup/runtime behavior or packaged bundle semantics change | -| Codex plugin surface (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) | `docs/guide/integrations/codex/setup.md`, `docs/book/integrations/codex-plugin.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `python3 -m json.tool plugins/codeclone/.codex-plugin/plugin.json`, `python3 -m json.tool plugins/codeclone/.mcp.json`, `python3 -m json.tool .agents/plugins/marketplace.json`, `tests/test_codex_plugin.py` | plugin discovery/runtime model, bundled MCP config, bundled skill behavior, or plugin metadata change | documented Codex plugin install/discovery/runtime behavior or plugin manifest/marketplace semantics change | -| Cursor plugin surface (`plugins/cursor-codeclone/*`) | `docs/guide/integrations/cursor/install-and-skills.md`, `docs/book/integrations/cursor-plugin.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `tests/test_cursor_plugin.py`, `tests/test_cursor_plugin_hooks.py` | plugin discovery/runtime model, bundled MCP config, bundled skill/rule/hook behavior, or plugin metadata change | documented Cursor plugin install/discovery/runtime behavior or plugin manifest semantics change | -| Docs site / sample report publication (`docs/`, `zensical.toml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) | `docs/index.md`, `docs/publishing.md`, `docs/examples/report.md`, and any contract pages surfaced by the change, `CHANGELOG.md` when user-visible behavior changes | `zensical build --clean --strict`, sample-report generation smoke path, and relevant report/html tests if generated examples or embeds change | published docs navigation, sample-report generation, or Pages workflow semantics change | published documentation behavior or sample-report generation contract changes | +| Change zone | Must update docs | Must update tests | Explicit approval required when | Contract-change trigger | +|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------| +| Baseline schema/trust/integrity (`codeclone/baseline/clone_baseline.py`, `codeclone/baseline/trust.py`) | `docs/book/07-baseline.md`, `docs/book/24-compatibility-and-versioning.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_baseline.py`, CI/CLI behavior tests (`tests/test_cli_inprocess.py`, `tests/test_cli_unit.py`) | schema/trust semantics, compatibility windows, payload integrity logic change | baseline key layout/status semantics/compat rules change | +| Cache schema/profile/integrity (`codeclone/cache/store.py`, `codeclone/cache/versioning.py`, `codeclone/cache/integrity.py`) | `docs/book/08-cache.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_cache.py`, pipeline/CLI cache integration tests | cache schema/status/profile compatibility semantics change | cache payload/version/status semantics change | +| Canonical report JSON shape (`codeclone/report/document/*`, report projections) | `docs/book/05-report.md` (+ `docs/book/06-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | +| CLI flags/help/exit behavior (`codeclone/main.py`, `codeclone/surfaces/cli/*`, `codeclone/config/*`, `codeclone/contracts/*`) | `docs/book/11-cli.md`, `docs/book/09-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | +| Structural Change Controller (intent, blast radius, patch contract, hygiene, claims, receipts, Patch Trail) | `docs/book/12-structural-change-controller/`, `docs/guide/change-control/`, `docs/book/14-claim-guard.md`, MCP/plugin guidance, `README.md`, `CHANGELOG.md` | Controller/intent/verification/claim/receipt tests in `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, `tests/test_verification_profile.py`, `tests/test_patch_trail_*.py`, plus tool-schema snapshots when payloads change | edit authorization, scope/hygiene, verification profile, claim semantics, receipt or Patch Trail contract changes | workflow tool payloads, status transitions, permission signals, verification/receipt schemas change | +| Fingerprint-adjacent analysis (`codeclone/analysis/units.py`, `codeclone/analysis/_module_walk.py`, `codeclone/analysis/cfg.py`, `codeclone/analysis/normalizer.py`, `codeclone/findings/clones/grouping.py`) | `docs/book/03-core-pipeline.md`, `docs/book/04-cfg-semantics.md`, `docs/book/24-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | +| Suppression semantics/reporting (`codeclone/analysis/suppressions.py`, `codeclone/analysis/_module_walk.py` dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/17-dead-code-contract.md`, `docs/book/05-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | +| MCP interface (`codeclone/surfaces/mcp/*`, packaging extra/launcher) | `README.md`, `docs/book/25-mcp-interface/`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/book/24-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, `tests/fixtures/contract_snapshots/mcp_tool_schemas.json`, plus CLI/package tests if launcher/install semantics change | tool/resource shapes, workflow tool payloads, repository-read-only semantics, optional-dependency packaging behavior change | public MCP tool names, workflow tool payloads, resource URIs, launcher/install behavior, or response semantics change | +| Engineering Memory, semantic retrieval, trajectories, Experiences, projection jobs (`codeclone/memory/*`, `codeclone/config/memory*.py`) | `docs/book/13-engineering-memory/`, trajectory/Experience guides, `docs/book/25-mcp-interface/`, `docs/book/11-cli.md`, plugin skills, `CHANGELOG.md` | Applicable `tests/test_memory_*.py`, `tests/test_semantic_*.py`, projection/trajectory/Experience tests, MCP memory tests, and tool-schema snapshots when payloads change | schema/governance transitions, retrieval/fusion semantics, trajectory quality, Experience promotion, or worker lifecycle change | memory/semantic/projection versions, SQLite DDL, CLI/MCP payloads, ranking/filter/governance semantics change | +| Platform Observability (`codeclone/observability/*`, CLI trace, MCP bounded slicer, worker instrumentation) | `docs/book/26-platform-observability.md`, `docs/guide/observability/diagnostics.md`, config/MCP docs, `CHANGELOG.md` when user-visible | `tests/test_observability_*.py`, plus worker/memory/MCP tests for changed instrumentation boundaries | privacy/trust boundary, persisted schema, correlation, payload-size, SQL fingerprint, or public projection changes | `PLATFORM_OBSERVABILITY_SCHEMA_VERSION`, CLI/MCP section payloads, persistence or collection semantics change | +| Controller audit and insights (`codeclone/audit/*`, `codeclone/controller_insights/*`, CLI/MCP session/audit surfaces) | Controller, CLI/config, retention, MCP, and integration docs; `CHANGELOG.md` when public | `tests/test_audit_*.py`, `tests/test_controller_insights.py`, CLI/MCP projection tests | audit event core/schema, retention, token/payload footprint, or shared collector semantics change | audit schema/event core, `--audit`/`--session-stats`, IDE-only insight payloads change | +| VS Code extension surface (`extensions/vscode-codeclone/*`) | `README.md`, `docs/guide/integrations/vscode/setup.md`, `docs/book/integrations/vs-code-extension.md`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `node --check extensions/vscode-codeclone/src/support.js`, `node --check extensions/vscode-codeclone/src/mcpClient.js`, `node --check extensions/vscode-codeclone/src/extension.js`, `node --test extensions/vscode-codeclone/test/*.test.js`, plus local extension-host smoke and package smoke when surface/manifest/assets change | command/view UX, trust/runtime model, source-first review flow, or packaging metadata change | documented commands/views/setup/trust behavior, packaged assets, or publish metadata change | +| Claude Desktop bundle surface (`extensions/claude-desktop-codeclone/*`) | `docs/guide/integrations/claude-desktop/setup.md`, `docs/book/integrations/claude-desktop-bundle.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `node --check extensions/claude-desktop-codeclone/server/index.js`, `node --check extensions/claude-desktop-codeclone/src/launcher.js`, `node --check extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs`, `node --test extensions/claude-desktop-codeclone/test/*.test.js`, plus `.mcpb` build smoke | bundle install/runtime model, launcher UX, local-stdio constraints, or bundle metadata change | documented Claude Desktop install/setup/runtime behavior or packaged bundle semantics change | +| Claude Code plugin surface (`plugins/claude-code-codeclone/*`, `scripts/integration_dist/marketplace.claude-code.json`) | `docs/guide/integrations/claude-code/setup.md`, `docs/book/integrations/claude-code-plugin.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `python3 -m json.tool plugins/claude-code-codeclone/.claude-plugin/plugin.json`, `python3 -m json.tool plugins/claude-code-codeclone/.mcp.json`, `python3 -m json.tool scripts/integration_dist/marketplace.claude-code.json`, `claude plugin validate plugins/claude-code-codeclone`, `tests/test_claude_code_plugin.py` | plugin discovery/runtime model, bundled MCP config, bundled skill behavior, launcher behavior, or marketplace metadata change | documented Claude Code install/discovery/runtime behavior or plugin manifest/marketplace semantics change | +| Codex plugin surface (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) | `docs/guide/integrations/codex/setup.md`, `docs/book/integrations/codex-plugin.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `python3 -m json.tool plugins/codeclone/.codex-plugin/plugin.json`, `python3 -m json.tool plugins/codeclone/.mcp.json`, `python3 -m json.tool .agents/plugins/marketplace.json`, `tests/test_codex_plugin.py` | plugin discovery/runtime model, bundled MCP config, bundled skill behavior, or plugin metadata change | documented Codex plugin install/discovery/runtime behavior or plugin manifest/marketplace semantics change | +| Cursor plugin surface (`plugins/cursor-codeclone/*`) | `docs/guide/integrations/cursor/install-and-skills.md`, `docs/book/integrations/cursor-plugin.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `tests/test_cursor_plugin.py`, `tests/test_cursor_plugin_hooks.py` | plugin discovery/runtime model, bundled MCP config, bundled skill/rule/hook behavior, or plugin metadata change | documented Cursor plugin install/discovery/runtime behavior or plugin manifest semantics change | +| GitHub Action surface (`.github/actions/codeclone/*`) | Action README, main README/getting-started/CI docs, `CHANGELOG.md` when user-visible | `tests/test_github_action_helpers.py`, shell/action smoke for changed workflow behavior | input interpolation, command construction, timeout, output, or exit behavior changes | public action inputs/outputs/runtime behavior changes | +| Storefront sync and distribution overlays (`scripts/sync_integrations.py`, `scripts/integration_dist/*`, launcher copy rules) | `docs/releasing.md`, affected integration docs/READMEs, `CHANGELOG.md` when publish behavior changes | `tests/test_sync_integrations.py`, then target-native package/test smoke after sync | deletion/copy boundary, target layout, launcher override, denylist, manifest provenance, or dirty-source policy changes | distribution layout, copied source set, `SYNC_MANIFEST.json`, storefront launcher/metadata semantics change | +| Docs site / sample report publication (`docs/`, `zensical.toml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) | `docs/index.md`, `docs/publishing.md`, `docs/examples/report.md`, and any contract pages surfaced by the change, `CHANGELOG.md` when user-visible behavior changes | `zensical build --clean --strict`, sample-report generation smoke path, and relevant report/html tests if generated examples or embeds change | published docs navigation, sample-report generation, or Pages workflow semantics change | published documentation behavior or sample-report generation contract changes | Golden rule: do not “fix” failures by snapshot refresh unless the underlying contract change is intentional, documented, and approved. @@ -628,24 +829,37 @@ Treat tests as specification with explicit intent: - **Unit tests** — module-level behavior and edge conditions (e.g., `tests/test_cfg.py`, `tests/test_normalize.py`, `tests/test_metrics_modules.py`, `tests/test_suppressions.py`). -- **Contract tests** — baseline/cache/report/CLI public semantics (e.g., `tests/test_baseline.py`, - `tests/test_cache.py`, `tests/test_report_contract_coverage.py`, `tests/test_cli_unit.py`). +- **Contract tests** — controller, baseline/cache/report/CLI/MCP/Memory public + semantics (e.g., `tests/test_mcp_service.py`, `tests/test_baseline.py`, + `tests/test_cache.py`, `tests/test_report_contract_coverage.py`, + `tests/test_memory_compact_contract.py`). - **Golden tests** — snapshot sentinels for stable outputs (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`). - **Determinism/invariant tests** — ordering, branch-path invariants, and canonical stability (e.g., - `tests/test_report_branch_invariants.py`, `tests/test_core_branch_coverage.py`). + `tests/test_report_branch_invariants.py`, `tests/test_core_branch_coverage.py`, + `tests/test_semantic_determinism_gate.py`). - **Scenario/regression tests** — multi-step integration and process-level behavior (e.g., - `tests/test_cli_inprocess.py`, `tests/test_pipeline_process.py`, `tests/test_cli_smoke.py`). + `tests/test_cli_inprocess.py`, `tests/test_pipeline_process.py`, + `tests/test_memory_projection_jobs.py`, `tests/test_sync_integrations.py`). +- **Developer diagnostics tests** — observer configuration, correlation, + persistence, query, rendering, MCP, and worker chain behavior + (`tests/test_observability_*.py`). Policy: - Expand the closest taxonomy bucket when changing behavior. - If a change touches a public surface, include/adjust contract tests, not only unit tests. - Goldens validate intended contract shifts; they are not a substitute for reasoning or routing. +- Put tests in the owning behavior module. Do not create generic + coverage-uplift or miscellaneous dumping-ground test files. +- Coverage is a guardrail, not a reason to execute lines without asserting + behavior. ## 18) Public vs internal surfaces ### Public / contract-sensitive surfaces +- Structural Change Controller intent, permission, scope/hygiene, blast-radius, + verification-profile, claim, receipt, and Patch Trail semantics. - CLI flags, defaults, exit codes, and stable script-facing messages. - Baseline schema/trust semantics/integrity compatibility (`BASELINE_SCHEMA_VERSION` contract family). - Cache schema/status/profile compatibility/integrity (`CACHE_VERSION` contract family). @@ -655,9 +869,17 @@ Policy: repository-read-only semantics. - Documented MCP workflow tools, verification profiles, workspace intent coordination, queue/promote semantics, and review receipt payloads. +- Engineering Memory schema, governance transitions, retrieval/filter/ranking + semantics, semantic sidecar format, trajectory quality, Experience promotion, + projection jobs, and CLI/MCP payloads. +- Platform Observability environment contract, local schema/privacy boundary, + CLI trace output, bounded MCP sections, and correlation behavior. +- Controller audit/event-core and shared session/audit insight payloads. - Session-local MCP review state semantics (`mark_finding_reviewed`, `exclude_reviewed`) as documented public behavior. - Documented VS Code extension behavior: commands, views, setup guidance, trusted-workspace model, and its baseline-aware triage workflow over MCP. +- Documented Claude Desktop, Claude Code, Codex, Cursor, GitHub Action, and + storefront-sync install/runtime/package semantics. - Documented finding families/kinds/ids and suppression-facing report fields. - Metrics baseline schema/compatibility where used by CI/gating. - Benchmark schema/outputs if consumed as a reproducible contract surface. @@ -757,6 +979,17 @@ These rules exist because of real incidents in this repo. They are non-negotiabl - Before starting work, run `git status` and review uncommitted/untracked changes. They may belong to a parallel agent or to the maintainer; do not delete or overwrite them without explicit approval. +### Human review boundary + +- Agents may author substantial contributions, but they do not own merge + approval. +- A human contributor must inspect and understand the complete diff, verify + tests/contracts/security/licensing/provenance, and accept maintenance + responsibility. +- Material agent assistance must be disclosed in the pull request. +- Never describe agent review, CodeClone findings, receipts, or CI as a + substitute for substantive human review. + ### Documentation hygiene - Every doc claim about code (schema version, module path, function name, MCP tool count, exit code, @@ -808,14 +1041,18 @@ These rules exist because of real incidents in this repo. They are non-negotiabl ## 21) Minimal checklist for PRs (agents) +- [ ] Intent and scope were declared before editing; `edit_allowed=true` was observed when available. +- [ ] Actual changed files match declared scope; required verification and receipt completed. - [ ] Change is deterministic. - [ ] Contracts preserved or versioned. -- [ ] Tests added for new behavior. -- [ ] `ruff`, `mypy`, `pytest` green. +- [ ] Tests were added to the owning test module for new behavior. +- [ ] Pre-commit and pre-push/coverage validation are green. - [ ] CLI messages remain helpful and stable (don’t break scripts). - [ ] Reports contain provenance fields and reflect trust model correctly. - [ ] Golden snapshots were **not** updated just to satisfy failing tests. - [ ] If any golden snapshot changed, the corresponding contract change is intentional, documented, and approved. +- [ ] Material agent assistance is disclosed. +- [ ] A human reviewed and understood the complete diff before merge. --- diff --git a/CHANGELOG.md b/CHANGELOG.md index 9622207c..50184578 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,9 +55,10 @@ Added * IDE and agent integrations. The VS Code extension gains Engineering Memory governance, trajectory dashboards, controller audit views, and workspace session statistics. Native integrations are available for Claude - Desktop, Codex, and Cursor. The Cursor plugin includes skills, rules, - fail-closed preToolUse enforcement, scoped workspace-intent checks, and a - structural-review agent. + Desktop, Claude Code, Codex, and Cursor. Claude Code now has a dedicated + marketplace plugin and storefront, separate from the Desktop `.mcpb` bundle. + The Cursor plugin includes skills, rules, fail-closed preToolUse enforcement, + scoped workspace-intent checks, and a structural-review agent. * Controller and diagnostic CLI surfaces. Added blast-radius, patch verification, session statistics, controller audit, memory trajectory, anomaly, agent-profile, semantic-search, and Platform Observability commands. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f409e169..2b4b853b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,268 +1,673 @@ # Contributing to CodeClone -Thank you for your interest in contributing to **CodeClone**. +Thank you for contributing to **CodeClone**. -CodeClone provides **structural code quality analysis** for Python, including clone detection, -quality metrics, baseline-aware CI governance, and an optional MCP agent interface. +**CodeClone** is a deterministic **Structural Change Controller** for +AI-assisted Python development. -Contributions are welcome — especially those that improve **signal quality**, **CFG semantics**, -and **real-world CI usability**. +It starts before a diff exists: an agent declares intent, CodeClone maps the +structural blast radius, bounds the edit, verifies the resulting patch against +one canonical report, and leaves an auditable receipt. ---- - -## Project Philosophy - -Core principles: - -- **Low noise over high recall** -- **Structural and control-flow similarity**, not semantic equivalence -- **Deterministic and explainable behavior** -- Optimized for **CI usage** and architectural analysis - -If a change increases false positives, reduces determinism, or weakens explainability, -it is unlikely to be accepted. - ---- +```text +intent → blast radius → bounded edit → patch check → review receipt +``` -## Areas Open for Contribution +CodeClone combines structural analysis, baseline-aware CI, Engineering Memory, +agent trajectories, MCP tooling, and IDE integrations without turning LLM +output into truth. It is not an AI reviewer guessing whether code is safe; it +is a deterministic control layer for structural change. + +Contributions are welcome when they preserve the project's central guarantees: +honesty, reproducibility, determinism, explainability, and safe use in real +CI environments. + +## Source of Truth + +Before changing code, read: + +- [`AGENTS.md`](AGENTS.md) for repository-wide operating rules, module + ownership, change routing, and required validation; +- the [architecture map](docs/book/02-architecture-map.md) for current + boundaries and dependency direction; +- [testing as specification](docs/book/23-testing-as-spec.md) for contract and + test ownership; +- the relevant contract chapter under [`docs/book/`](docs/book/). + +The current repository code is the source of truth for implementation +behavior. Version constants must be read from +`codeclone/contracts/__init__.py`, not copied from this file or another +document. If contributor documentation and code diverge, align the +documentation as part of the change. + +## Project Principles + +- **Determinism over cleverness.** Identical inputs and versions must produce + stable findings, ordering, identities, and canonical payloads. +- **Control starts before the diff.** Intent, scope, blast radius, and + do-not-touch boundaries are part of the change contract, not review + commentary added afterward. +- **Evidence over inference.** Core analysis produces facts and metrics; + renderers and clients present them without inventing new gating semantics. +- **Low noise over inflated recall.** Detection changes must account for both + false positives and false negatives. +- **One analysis truth.** CLI, reports, MCP, extensions, and plugins project the + same canonical pipeline and report contracts. +- **Contracts are public APIs.** Baselines, cache compatibility, report + schemas, CLI behavior, MCP payloads, and published integration behavior + require deliberate compatibility handling. +- **Safety first.** Treat source code, paths, configuration, baselines, caches, + and external tool input as untrusted. + +Changes that increase unexplained noise, introduce nondeterminism, weaken +contract boundaries, or silently change trusted artifacts are unlikely to be +accepted. + +## AI-Assisted Contributions + +CodeClone accepts contributions written with coding agents, language models, +and other automated development tools. Agent assistance is welcome, including +substantial agent-authored code, but it does not transfer responsibility away +from people. + +Every AI-assisted contribution must meet all of the following requirements: + +- A human contributor must inspect the complete diff, understand it, and be + able to explain and maintain it. +- A human must verify the relevant tests, contract implications, security + properties, generated artifacts, and documentation before requesting review. +- A substantive human review is mandatory before merge. Agent-only review, + automated approval, or a passing CI run does not satisfy this requirement. +- Material agent assistance should be disclosed in the pull request + description, including what the agent produced or changed. +- The contributor must verify provenance, licensing, and third-party rights for + generated code, text, fixtures, and assets. +- Secrets, private prompts, credentials, unrelated user data, and unreviewed + generated output must not be committed. +- Do not submit code that no human can confidently explain, test, or support. + +CodeClone's Structural Change Controller, Engineering Memory, review receipts, +claim validation, and Platform Observability can strengthen review evidence. +They do not replace human engineering judgment or human approval. + +## Where to Contribute + +Contributions are especially useful in: + +- Structural Change Controller intent, scope, blast-radius, patch-contract, + claim-validation, and receipt workflows; +- Engineering Memory retrieval, semantic indexing, trajectories, Patch Trail, + Experiences, governance, and projection jobs; +- Platform Observability instrumentation and developer diagnostics; +- AST normalization, CFG construction, and structural extraction; +- clone grouping, explainability, and false-positive reduction; +- complexity, coupling, cohesion, dependency, dead-code, coverage, adoption, + API-surface, and health metrics; +- baseline, cache, canonical report, and deterministic renderer contracts; +- MCP tools, resources, messages, and transport behavior; +- VS Code, Claude Desktop, Claude Code, Codex, Cursor, and GitHub Action surfaces; +- performance work that preserves fingerprint and canonical-output semantics; +- documentation, examples, tests, and real-world CI scenarios. + +Use the module ownership table in [`AGENTS.md`](AGENTS.md) and the +[architecture map](docs/book/02-architecture-map.md) to route changes to the +correct layer. + +## Contribution Workflow + +1. Confirm the user-visible problem and identify the owning module. +2. Classify whether the change affects a versioned or public contract. +3. Read the nearest tests and normative documentation before editing. +4. Keep the patch narrowly scoped and preserve unrelated work in the tree. +5. Add tests in the test module that owns the behavior. Do not create generic + coverage-uplift or miscellaneous test dumping grounds. +6. Update documentation when behavior, configuration, commands, payloads, or + public integration surfaces change. +7. Run the relevant focused checks, then the repository validation required + below. +8. Review the final diff as a human-readable change, not merely as passing + automation. + +When CodeClone MCP change control is available, contributors and coding agents +should use `start_controlled_change` before editing and +`finish_controlled_change` after verification. These tools bind intent, scope, +blast radius, patch budget, verification, and the review receipt. The atomic +tools remain available for deeper inspection and recovery. See the +[Structural Change Controller](docs/book/12-structural-change-controller/index.md). -We especially welcome contributions in the following areas: +## Reporting Bugs -- Control Flow Graph (CFG) construction and semantics -- AST normalization improvements -- Segment-level clone detection and reporting -- Quality metrics (complexity, coupling, cohesion, dead-code, dependencies) -- False-positive reduction -- HTML report UX improvements -- MCP server tools and agent workflows -- GitHub Action improvements -- Performance optimizations -- Documentation and real-world examples +Use the appropriate GitHub issue template. Include: ---- +- a minimal reproducer, preferably source text rather than screenshots; +- CodeClone and Python versions; +- the command, configuration, and relevant optional extras; +- expected and actual behavior; +- whether a baseline, cache, coverage XML, MCP client, memory store, semantic + sidecar, projection worker, or observability store was involved; +- sanitized logs or payload excerpts where useful. -## Reporting Bugs +Classify the affected area when possible: change control/blast radius, +Engineering Memory/trajectories, analysis/CFG, normalization, clones, metrics, +baseline/cache/report, CLI, MCP, observability, documentation, or a +client/integration surface. + +For false positives, explain why the detected code is architecturally distinct +in control flow, responsibilities, or structure. Naming, comments, and +formatting alone are not sufficient evidence. + +For a suspected Platform Observability issue, include the operation or +correlation ID and a bounded, sanitized JSON projection when possible. Never +attach raw repository secrets or private source unnecessarily. + +## Design-Sensitive Changes + +### Analysis, CFG, and fingerprints + +For AST normalization, CFG, extraction, or clone identity changes, describe: -Please use the appropriate **GitHub Issue Template**. +- current and proposed behavior; +- concrete positive and negative examples; +- expected false-positive and false-negative impact; +- determinism implications; +- baseline and fingerprint compatibility implications. -When reporting issues related to clone detection, include: +Performance work must not change normalization, fingerprint inputs, clone +identity, or NEW-versus-KNOWN classification while +`BASELINE_FINGERPRINT_VERSION` is unchanged. Fingerprint-adjacent changes +require explicit maintainer approval, version review, migration/release notes, +tests, and documentation. -- minimal reproducible code snippets (preferred over screenshots); -- the CodeClone version; -- the Python version (`python_tag`, e.g. `cp314`); -- whether the issue is primarily: - - AST-related, - - CFG-related, - - normalization-related, - - metrics-related, - - MCP-related, - - reporting / UI-related. +### Golden tests + +Golden tests are contract sentinels. Do not update snapshots merely to make a +failure disappear. A golden update is acceptable only when the contract change +is intentional, reviewed, documented, and versioned where required. + +### Security and safety + +- Preserve path validation and repository-root containment. +- Keep normal-mode fail-open and gating-mode fail-closed behavior only where + the owning contract explicitly defines it. +- Add negative tests for parser, normalization, transport, path, and + persistence boundaries. +- Do not let UI, MCP, memory, observability, or client surfaces invent analysis + facts or authorization. + +See the [security model](docs/book/21-security-model.md). + +## Versioned Contracts + +Current values must always be verified in `codeclone/contracts/__init__.py`. +At the time this document was updated, the main contracts were: + +| Contract | Version | Primary owner | +|---|---:|---| +| Baseline schema | `2.1` | `codeclone/baseline/` | +| Baseline fingerprint | `1` | `codeclone/contracts/__init__.py` | +| Analysis cache | `2.8` | `codeclone/cache/` | +| Canonical report | `2.11` | `codeclone/report/document/` | +| Metrics baseline | `1.2` | `codeclone/baseline/` | +| Engineering Memory | `1.6` | `codeclone/memory/` | +| Semantic index format | `1` | `codeclone/memory/semantic/` | -Screenshots alone are usually insufficient for analysis. +Any schema shape or semantic change requires version review, tests, and +documentation. Compatibility details live in +[compatibility and versioning](docs/book/24-compatibility-and-versioning.md). ---- +### Baseline and CI behavior -## False Positives +- Baseline trust depends on schema compatibility, fingerprint version, Python + tag, generator identity, and canonical payload integrity. +- Regenerate the baseline when fingerprint compatibility or Python tag changes. +- Do not regenerate it for report-only, UI-only, cache-only, or performance-only + work that preserves fingerprint semantics. +- Untrusted baseline state fails fast with exit `2` in gating mode. +- Outside gating mode, an untrusted baseline is ignored with a warning and + comparison proceeds against an empty baseline. +- Baseline novelty is baseline-relative. Patch-local regression claims require + a clean before/after comparison. -False positives are **expected edge cases**, not necessarily bugs. +Public exit categories are: -When reporting a false positive: +- `0`: success; +- `2`: contract or invocation error; +- `3`: analysis/quality gate failure; +- `5`: unexpected internal error. -- explain **why the detected code is architecturally distinct**; -- avoid arguments based solely on naming, comments, or formatting; -- focus on **control-flow, responsibilities, or structural differences**. +See [baseline trust](docs/book/07-baseline.md), +[exit codes](docs/book/09-exit-codes.md), and +[metrics and gates](docs/book/16-metrics-and-quality-gates.md). + +## MCP and Agent Surfaces + +The optional `codeclone[mcp]` server is read-only with respect to source files, +baselines, canonical/generated reports, and analysis cache data. + +Explicit controller and developer contracts may maintain bounded local state: -Well-argued false-positive reports are valuable and appreciated. +- session-local runs and review markers; +- ephemeral workspace intent records under `.codeclone/intents/`; +- optional audit evidence under `.codeclone/db/`; +- governed Engineering Memory and projection state under `.codeclone/memory/`; +- optional Platform Observability telemetry under + `.codeclone/db/platform_observability.sqlite3`. ---- +Engineering Memory mutations must use explicit memory tools. Agent-initiated +mutations are limited to the documented refresh, projection, and draft +proposal contracts; approval, rejection, and archival remain human-governed. +None of this state may alter canonical report identity, baseline trust, cache +compatibility, findings, or edit authorization. -## CFG Semantics Discussions +Tool names, parameter fields, response shapes, resource URIs, descriptions, and +error semantics are public surfaces. Keep optional MCP dependencies lazy so the +base package and non-MCP CI do not require them. + +See the [MCP interface](docs/book/25-mcp-interface/index.md) and +[MCP contributor guide](docs/guide/mcp/README.md). + +## Engineering Memory + +Engineering Memory is a local, evidence-linked knowledge store, not a second +analyzer and not analysis cache. It combines governed records with report, git, +documentation, audit, trajectory, Patch Trail, and Experience evidence. + +When changing memory behavior: + +- preserve deterministic retrieval and stable bounded payloads; +- keep FTS, semantic sidecar, trajectory, and Experience lanes explicit; +- preserve human governance for durable promoted knowledge; +- treat semantic search as optional and keep the default installation free of + vector-model dependencies; +- keep projection jobs coalesced, watermarked, observable, and independent from + analysis truth; +- test schema migration, staleness, filtering, ranking, scope, governance, and + worker lifecycle as applicable. + +Start with the [Engineering Memory chapter](docs/book/13-engineering-memory/index.md), +[trajectory and Patch Trail contract](docs/book/13-engineering-memory/trajectory-and-patch-trail.md), +[Experience Layer](docs/book/13-engineering-memory/experience-layer.md), and +[projection jobs](docs/book/13-engineering-memory/projection-jobs.md). + +## Platform Observability + +Platform Observability is an opt-in developer diagnostics surface for +CodeClone's own execution. It helps investigate slow CLI/MCP operations, +database cost, projection workers, memory pipelines, redundant work, and +cross-process correlations. + +It is disabled by default and configured only through environment variables. +It stores bounded local telemetry, normalized literal-free SQL fingerprints, +durations, counters, and optional process metrics. It does not store raw prompt +or MCP payload bodies and has no network exporter. + +Most importantly, observer data is **not** repository quality evidence. It must +never affect findings, gates, baseline trust, cache compatibility, memory facts, +permissions, or edit authorization. + +Enable it for a local diagnostic run: -If proposing changes to CFG semantics, include: +```bash +CODECLONE_OBSERVABILITY_ENABLED=1 uv run codeclone . +uv run codeclone observability trace --root . +uv run codeclone observability trace \ + --root . \ + --last 50 \ + --html /tmp/codeclone-observer.html +``` -- a description of the current behavior; -- the proposed new behavior; -- the expected impact on clone detection quality (noise/recall); -- concrete code examples; -- a note on determinism implications. +Optional process metrics require the `perf` extra and +`CODECLONE_OBSERVABILITY_PROFILE=1`. Raw payload snapshots are unsupported. +Automatic retention pruning is not currently guaranteed, so developers who +enable persistence own the lifecycle of the local SQLite database. -Such changes often require design-level discussion and may be staged across versions. +Instrumentation must be initialized before instrumented stores/connections are +opened, and worker correlation IDs must be propagated rather than synthesized +independently. New spans and counters must remain numeric, bounded, +deterministic in shape, and privacy-safe. ---- +Read the normative [Platform Observability contract](docs/book/26-platform-observability.md) +and the practical [diagnostics guide](docs/guide/observability/diagnostics.md) +before modifying instrumentation, storage, rendering, or MCP projections. -## Security & Safety Expectations +## Native Clients and Integrations -- Assume **untrusted input** (paths and source code). -- Prefer **fail-closed in gating modes** and **fail-open in normal modes** only when explicitly intended. -- Add **negative tests** for any normalization/CFG change. -- Changes must preserve determinism and avoid introducing new false positives. +VS Code, Claude Desktop, Claude Code, Codex, Cursor, and the composite GitHub +Action are clients or packaging surfaces over the same CodeClone/MCP contracts. +They must not implement a second analyzer, redefine finding semantics, or +silently drift from MCP tool schemas. ---- +Public commands, views, manifests, bundled skills/rules/hooks, launcher +behavior, trust boundaries, packaged assets, and marketplace metadata require +surface-specific tests and documentation. -## Baseline & CI +Architecture references: -### Baseline contract (v2) +- [VS Code extension](docs/book/integrations/vs-code-extension.md) +- [Claude Desktop bundle](docs/book/integrations/claude-desktop-bundle.md) +- [Claude Code plugin](docs/book/integrations/claude-code-plugin.md) +- [Codex plugin](docs/book/integrations/codex-plugin.md) +- [Cursor plugin](docs/book/integrations/cursor-plugin.md) -- The baseline schema is versioned (`meta.schema_version`, currently `2.1`). -- Compatibility/trust gates include `schema_version`, `fingerprint_version`, `python_tag`, - and `meta.generator.name`. -- Integrity is tamper-evident via `meta.payload_sha256` over canonical payload. -- The baseline may embed a `metrics` section for metrics-baseline-aware CI gating. +For the GitHub Action, never interpolate `${{ inputs.* }}` directly into shell +scripts; pass values through `env:`. Keep subprocess timeouts explicit and +preserve documented output and exit semantics. -### When baseline regeneration is required +## Developer Scripts -- Regenerate baseline with `codeclone . --update-baseline` when - `fingerprint_version` **or** `python_tag` changes. -- Regeneration is **not** required for UI/report/CLI/cache/performance-only changes - if both `fingerprint_version` and `python_tag` are unchanged. +The top-level [`scripts/`](scripts/) directory contains developer, docs, and +release utilities. It is not a miscellaneous home for product behavior: +runtime logic belongs in the owning `codeclone/` module and scripts should stay +thin, explicit, and tested. -### Gating behavior +| Path | Purpose | Important boundary | +|---|---|---| +| `scripts/build_docs_example_report.py` | Analyze the repository and stage the live docs example as HTML, JSON, SARIF, and `manifest.json` | Writes generated output, by default under `site/examples/report/live`; use it only for docs example/report publication work | +| `scripts/lint_admonitions.py` | Validate MkDocs admonition/details indentation | `--fix` rewrites Markdown; review the resulting diff | +| `scripts/launch_mcp` | Monorepo adapter that delegates to the shared Codex plugin MCP launcher | Not an independent launcher implementation; keep launcher resolution in `plugins/codeclone/scripts/launch_mcp.py` | +| `scripts/sync_integrations.py` | Synchronize Codex, Claude Code, Cursor, VS Code, and Claude Desktop distribution repositories | Maintainer/release tool that deletes and recopies managed target paths; always dry-run first | +| `scripts/integration_dist/*` | Distribution-only README, `.gitignore`, and marketplace overlays used by storefront sync | Source-controlled release inputs, not generated scratch files | +| `scripts/__init__.py` | Package marker for importing script helpers in tests | Not a command-line entrypoint | -- In `--ci` (or explicit gating flags), **untrusted baseline states fail fast** as a contract error (exit 2). -- Outside gating mode, an untrusted/missing baseline is ignored with a warning and comparison proceeds - against an empty baseline. +### Docs utilities -### Exit codes contract +When changing the live sample report or its publication path: -- **0** — success -- **2** — contract error (e.g., missing/untrusted baseline in gating, invalid output path/extension, incompatible - versions) -- **3** — gating failure (new clones detected, `--fail-threshold` exceeded) -- **5** — internal error (unexpected exception; please report) +```bash +uv run python scripts/build_docs_example_report.py \ + --output-dir site/examples/report/live +uv run --with zensical==0.0.43 zensical build --clean --strict +``` ---- +The generator runs CodeClone against the repository, stages its output in a +temporary directory, then copies only the documented artifacts to the +destination. Changes require the relevant report/HTML tests plus +`tests/test_docs_example_report.py`. -## Versioned schemas +Validate admonition indentation without writing: -CodeClone maintains several versioned schema contracts: +```bash +uv run scripts/lint_admonitions.py docs/ +``` -| Schema | Current version | Owner | -|------------------|-----------------|------------------------------------------| -| Baseline | `2.1` | `codeclone/baseline/clone_baseline.py` | -| Report | `2.11` | `codeclone/report/document/*` | -| Cache | `2.8` | `codeclone/cache/store.py` | -| Metrics baseline | `1.2` | `codeclone/baseline/metrics_baseline.py` | +Apply its deterministic indentation repair only when needed: -Any change to schema shape or semantics requires version review, documentation, and tests. +```bash +uv run scripts/lint_admonitions.py docs/ --fix +``` ---- +The pre-commit hook uses `--fix`, so docs commits must be re-reviewed after the +hook runs. -## MCP Interface +### Storefront synchronization -CodeClone includes an optional **read-only MCP server** (`codeclone[mcp]`) for AI agents. +`scripts/sync_integrations.py` mirrors monorepo integration sources into sibling +git repositories named `codeclone-codex`, `codeclone-claude-code`, +`codeclone-cursor`, `codeclone-vscode`, and `codeclone-claude-desktop`. It also +writes a `SYNC_MANIFEST.json` containing source commit and package provenance. -When contributing to MCP: +Run it from the monorepo root and inspect a dry run first: -- MCP must remain **read-only** with respect to source files, baselines, analysis - cache, and canonical report artifacts. -- Allowed repo-local writes are limited to ephemeral controller coordination - (`.codeclone/intents/`) and optional audit trail - (`.codeclone/db/audit.sqlite3` when `audit_enabled=true`). -- Session-local review markers and in-memory run history are ephemeral and do - not survive process restart. -- MCP reuses pipeline/report contracts — do not create a second analysis truth path. -- Tool names, resource URIs, and response shapes are public surfaces — changes require tests and docs. +```bash +uv run python scripts/sync_integrations.py \ + --dry-run \ + --all \ + --base-dir .. +``` -See `docs/guide/mcp/README.md` and `docs/book/25-mcp-interface/index.md` for details. +Only after reviewing the plan should a maintainer perform a write: ---- +```bash +uv run python scripts/sync_integrations.py --all --base-dir .. +``` -## GitHub Action +The script refuses a dirty source tree by default, validates target repository +names and containment, rejects copied symlinks, and writes the manifest +atomically. `--allow-dirty` is an emergency override, not a normal release +workflow; its dirty provenance is recorded in the manifest. Sync each target, +inspect its diff, run its native checks, and commit/push each distribution +repository separately. -CodeClone ships a composite GitHub Action (`.github/actions/codeclone/`). +Cursor and Claude Code have intentional launcher overrides: their monorepo +launchers are thin delegates, while standalone distributions receive the full +shared launcher implementation. Do not replace this with a blind directory +copy. -When contributing to the Action: +Changes to sync logic, layouts, deny lists, launchers, or +`scripts/integration_dist/*` require: -- Never inline `${{ inputs.* }}` in shell scripts — pass through `env:` variables. -- Prefer major-tag pinning for actions (e.g., `actions/setup-python@v5`). -- Add timeouts to all `subprocess.run` calls. +```bash +uv run pytest -q tests/test_sync_integrations.py +``` ---- +The full operational and post-sync checklist is in +[`docs/releasing.md`](docs/releasing.md). ## Development Setup +CodeClone supports Python 3.10 through 3.14. + ```bash git clone https://github.com/orenlab/codeclone.git cd codeclone -uv sync --all-extras --dev +uv sync --extra dev --extra mcp --extra token-bench uv run pre-commit install ``` -Run tests: +`.pre-commit-config.yaml` installs both `pre-commit` and `pre-push` hooks by +default. Do not use `--no-verify` to bypass them; fix the failure or document a +genuine infrastructure blocker for maintainers. + +The semantic and performance extras are intentionally optional. Install them +only for work that needs those paths, for example: ```bash -uv run pytest +uv sync --extra dev --extra mcp --extra semantic-local --extra perf ``` -Static checks: +## Required Validation + +The pre-commit stage runs repository hygiene checks, Ruff formatting and lint, +Mypy, baseline-aware `codeclone . --ci`, and the docs admonition fixer when +matching Markdown changed: ```bash uv run pre-commit run --all-files ``` -Build documentation (if you touched `docs/` or `zensical.toml`): +Some hooks modify files (`end-of-file-fixer`, trailing whitespace, line endings, +Ruff format, and docs admonition repair). Always inspect `git diff` again after +the hook completes. + +The command above runs the **pre-commit stage only**. It does not run the +pre-push pytest hook. Run that stage explicitly before pushing: ```bash -uv run --with zensical==0.0.43 zensical build --clean --strict +uv run pre-commit run --hook-stage pre-push --all-files ``` -Run MCP tests (if you touched `mcp_service.py` or `mcp_server.py`): +The pre-push hook executes the full test suite with package coverage of at +least 99%. Its underlying CI command is: + +```bash +uv run pytest \ + --cov=codeclone \ + --cov-report=term-missing \ + --cov-fail-under=99 +``` + +CI runs this suite on Python 3.10, 3.11, 3.12, 3.13, and 3.14. A test that only +passes on the contributor's interpreter is not sufficient. + +Run focused tests while developing, but do not use them as a substitute for +the required full validation when the change can affect shared behavior. + +### Contract-specific checks + +For MCP changes: ```bash uv run pytest -q tests/test_mcp_service.py tests/test_mcp_server.py ``` ---- +For Engineering Memory, semantic retrieval, trajectory, Experience, or +projection-job changes, run the nearest owning modules described in +[testing as specification](docs/book/23-testing-as-spec.md), including the +relevant `tests/test_memory_*.py`, `tests/test_semantic_*.py`, and MCP memory +contract tests. -## Commit Messages +For Platform Observability changes: -Use the repository's existing **Conventional Commits** style: +```bash +uv run pytest -q tests/test_observability_*.py +``` -- format: `type(scope): imperative summary` -- keep `type` lowercase (`feat`, `fix`, `docs`, `chore`, ...) -- keep the summary short, imperative, and specific to the user-visible change -- use a narrow scope when it helps (`metrics`, `mcp,vscode`, `core,ci`, ...) -- split unrelated changes into separate commits instead of writing one broad summary +For documentation, navigation, publishing, or sample-report changes: -Examples from the current history: +```bash +uv run --with zensical==0.0.43 zensical build --clean --strict +``` -- `fix(core,ci): harden git diff validation, make segment digests canonical, and align CI policy` -- `feat(metrics): add adoption and public API baselines with compact schema-aware storage` -- `chore(docs): align AGENTS and contract docs with current code` +For VS Code extension changes: -If a commit needs extra context, keep the subject line concise and explain the -rest in the commit body. +```bash +node --check extensions/vscode-codeclone/src/support.js +node --check extensions/vscode-codeclone/src/mcpClient.js +node --check extensions/vscode-codeclone/src/extension.js +node --test extensions/vscode-codeclone/test/*.test.js +node extensions/vscode-codeclone/test/runExtensionHost.js +``` ---- +If VS Code packaging metadata or assets changed, also package a `.vsix` with +`vsce package --out /tmp/codeclone.vsix`. -## Code Style +For Claude Desktop bundle changes: + +```bash +node --check extensions/claude-desktop-codeclone/server/index.js +node --check extensions/claude-desktop-codeclone/src/launcher.js +node --check extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs +node --test extensions/claude-desktop-codeclone/test/*.test.js +node extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs \ + --out /tmp/codeclone-claude-desktop.mcpb +``` + +For Codex plugin changes: + +```bash +python3 -m json.tool plugins/codeclone/.codex-plugin/plugin.json \ + >/tmp/codeclone-codex-plugin.json +python3 -m json.tool plugins/codeclone/.mcp.json \ + >/tmp/codeclone-codex-mcp.json +python3 -m json.tool .agents/plugins/marketplace.json \ + >/tmp/codeclone-codex-marketplace.json +uv run pytest -q tests/test_codex_plugin.py +``` + +For Claude Code plugin changes: + +```bash +python3 -m json.tool \ + plugins/claude-code-codeclone/.claude-plugin/plugin.json \ + >/tmp/codeclone-claude-code-plugin.json +python3 -m json.tool plugins/claude-code-codeclone/.mcp.json \ + >/tmp/codeclone-claude-code-mcp.json +python3 -m json.tool scripts/integration_dist/marketplace.claude-code.json \ + >/tmp/codeclone-claude-code-marketplace.json +claude plugin validate plugins/claude-code-codeclone +uv run pytest -q tests/test_claude_code_plugin.py +``` + +For Cursor plugin changes: + +```bash +uv run pytest -q tests/test_cursor_plugin.py tests/test_cursor_plugin_hooks.py +``` + +For GitHub Action helper changes: + +```bash +uv run pytest -q tests/test_github_action_helpers.py +``` + +The change-routing matrix in [`AGENTS.md`](AGENTS.md) is authoritative when a +change spans more than one contract or integration. + +## Test Policy + +- Put tests beside the contract they specify, using the owning module's test + file and naming conventions. +- Prefer behavior and invariant assertions over implementation-detail checks. +- Cover normal mode, gating mode, error paths, determinism, and legacy or + untrusted states where relevant. +- Public payload changes require contract tests, not only unit tests. +- Avoid sleeps, unstable filesystem ordering, machine-local paths, and + network-dependent assertions. +- Coverage is a guardrail, not a reason to create artificial test modules or + tests that merely execute lines without asserting behavior. +- A bug fix should normally include a regression test that fails before the + fix and passes after it. + +## Pull Requests + +A pull request should state: -- Python **3.10 – 3.14** -- Type annotations are required -- `Any` should be minimized; prefer precise types and small typed helpers -- `mypy` must pass -- `ruff check` must pass -- Code must be formatted with `ruff format` -- Prefer explicit, readable logic over clever or implicit constructs +- the problem and user-visible outcome; +- files and ownership boundaries affected; +- contract, schema, baseline, cache, report, CLI, MCP, memory, observability, or + integration implications; +- tests and validation commands run; +- documentation and migration/release-note impact; +- material use of coding agents or generated content. ---- +Keep unrelated refactors and generated churn out of the patch. Do not claim a +finding is new, fixed, regression-free, or patch-local without the evidence +required by the relevant contract. + +Maintainers may request narrower scope, stronger negative tests, before/after +evidence, or a versioned migration even when CI is green. + +## Commit Messages + +Use the repository's Conventional Commits style: + +- `type(scope): imperative summary`; +- lowercase type such as `feat`, `fix`, `docs`, `test`, or `chore`; +- a narrow scope when useful; +- separate commits for unrelated work; +- a concise subject with explanatory detail in the body when needed. + +Examples: + +- `fix(memory): preserve lane filters during semantic fusion` +- `feat(observability): correlate projection worker spans` +- `docs(contributing): align developer workflow with current surfaces` + +## Code Style -## Versioning +- Python 3.10 through 3.14 +- required type annotations and precise types +- minimal use of `Any` +- `ruff format`, `ruff check`, and `mypy` must pass +- explicit, readable control flow over clever implicit behavior +- comments only where they clarify non-obvious reasoning or contracts +- deterministic sorting and serialization at all public boundaries -CodeClone follows **semantic versioning**: +Follow existing local patterns before introducing new abstractions. -- **MAJOR**: fundamental detection model changes -- **MINOR**: new detection capabilities (e.g., new detectors or major CFG/normalization behavior shifts) -- **PATCH**: bug fixes, performance improvements, and UI/UX polish +## Releases and Changelog -Any change that affects detection behavior must include documentation and tests, -and may require a `fingerprint_version` bump (and thus baseline regeneration). +User-facing features, compatibility changes, migrations, and notable developer +surfaces belong in `CHANGELOG.md`. Routine fixes made during the current +development cycle do not need individual changelog entries unless they alter a +published contract or require user action. ---- +Release work must follow [`docs/releasing.md`](docs/releasing.md), including +artifact, installation, and publishing checks for every affected surface. ## License -By contributing code to CodeClone, you agree that your contributions will be -licensed under **MPL-2.0**. +By contributing code to CodeClone, you agree that the contribution is licensed +under **MPL-2.0**. Documentation contributions are licensed under **MIT**. diff --git a/README.md b/README.md index 13ef143d..34f54037 100644 --- a/README.md +++ b/README.md @@ -33,255 +33,226 @@ --- -**CodeClone** is a deterministic **structural change controller** with **Engineering Memory** for AI-assisted -Python development. It governs change *before the diff exists*: an agent declares intent, CodeClone maps the -structural blast radius, bounds the edit, verifies the resulting patch against one canonical report, and leaves an -auditable receipt. Engineering Memory adds a typed, evidence-linked project knowledge graph — contracts, decisions, -incidents, stale assumptions — so agents carry durable context without turning LLM output into truth. - -It is not a linter and not LLM judgment. CodeClone makes structural **scope, context, memory, and verification** -explicit — deterministically, before the diff, and verified after. The same control surface protects human -reviewers, CI pipelines, and pre-merge gates. - -## At a glance - -- **Change control before the diff** — declare intent, inspect blast radius, bound the edit, verify the patch - contract, validate review claims, leave an auditable receipt. -- **Engineering Memory** — typed, evidence-linked project facts (contracts, risks, decisions, prior changes); - durable agent context, trajectory passports, recurring Experiences, human-governed promotion, never LLM-as-truth. -- **One canonical report, many surfaces** — duplication, structural drift, dead code, complexity / coupling / - cohesion, health — the same deterministic facts everywhere, no second engine. -- **Baseline-aware CI** — gates fail only on what got *worse*; accepted legacy debt stays separate from real - regressions. -- **Built for agents and teams** — CLI · HTML · JSON · SARIF · Markdown · MCP · VS Code · Claude Desktop · Codex · - Cursor · GitHub Action · CI. +> [!NOTE] +> This repository and the documentation site track the **unreleased v2.1.0 development line**. +> For the current stable release, use +> [CodeClone v2.0.2](https://github.com/orenlab/codeclone/tree/v2.0.2) +> or install [CodeClone 2.0.2 from PyPI](https://pypi.org/project/codeclone/2.0.2/). -## Why CodeClone - -AI coding agents do not just write code faster — they expand scope faster. A prompt asks for one change; the agent -edits the target file, touches another module because it looks "related", updates a helper, rewrites a few tests — -and the final diff still looks plausible. The problem is not speed. It is **silent scope expansion**. +**CodeClone** is a deterministic **Structural Change Controller** for AI-assisted Python development, built on one +canonical structural analysis of the repository. -CodeClone governs that workflow with deterministic structural boundaries: +Before editing, an agent declares intent. CodeClone maps the structural blast radius, establishes explicit edit +boundaries, and exposes the regression budget. After editing, it compares the actual patch with the declared scope, +verifies structural changes, checks review claims against report facts, and leaves an auditable receipt. ```text -declare intent - → inspect structural blast radius - → constrain edit scope - → edit - → verify patch contract - → validate review claims - → leave auditable receipt +intent → blast radius → bounded edit → patch check → review receipt ``` -It does not replace the agent and does not use LLM judgment to decide what is safe. It gives the agent deterministic -boundaries **before the diff exists**, then verifies whether the resulting patch stayed inside them. +CodeClone does not use LLM judgment to classify structural regressions or authorize edits. Structural facts come +from deterministic analysis; the same facts serve agents, human reviewers, IDEs, and CI. -## Install +## Install and try -```bash -uv tool install codeclone # recommended -pip install codeclone # or pip - -# with the MCP server for AI agents and IDE clients -uv tool install "codeclone[mcp]" +Stable release: -# with token-accurate MCP payload sizing (adds tiktoken) -uv tool install "codeclone[mcp,token-bench]" +```bash +uv tool install codeclone +codeclone . +codeclone . --html --open-html-report ``` -
    -Run without installing +Run without installing: ```bash uvx codeclone@latest . ``` -
    - -## Quick start +Install the MCP server for local AI agents and IDE clients: ```bash -codeclone . # analyze the current directory -codeclone . --html --open-html-report # interactive HTML report -codeclone . --json --md --sarif --text # every report format -codeclone . --ci # CI mode: baseline-aware gating +uv tool install "codeclone[mcp]" +codeclone-mcp --transport stdio ``` -
    -More commands +Run the current development line from source: ```bash -# Changed-scope review against a branch -codeclone . --changed-only --diff-against main -codeclone . --paths-from-git-diff HEAD~1 - -# Structural Change Controller — CLI surface -codeclone . --blast-radius codeclone/analysis/parser.py -codeclone . --patch-verify --diff-against HEAD~1 +git clone https://github.com/orenlab/codeclone.git +cd codeclone +uv sync --all-extras +uv run codeclone . ``` -
    +## Why CodeClone -## How it works +AI coding agents accelerate implementation, but they also make scope expansion easier to miss. A narrow task can +quietly spread into shared helpers, tests, public APIs, configuration, and unrelated modules while the final diff +still looks reasonable. -
    -Pipeline overview -
    -CodeClone pipeline — parse, analyze, fuse, report, gate -
    +Most review tools start with the completed diff. CodeClone starts with the declared intent. -CodeClone produces **one canonical JSON report** and renders it through every surface — CLI, HTML, Markdown, SARIF, -MCP, IDE extensions, GitHub Action, CI. The same deterministic facts drive human review, baseline-aware gates, and -agent workflows. The canonical report is the source of truth; surfaces render, filter, and explain it — there is -never a second analysis engine. +```text +declare intent + → inspect structural blast radius + → establish edit boundaries + → make the change + → compare declared and actual scope + → verify structural regressions + → record the outcome +``` -[Architecture narrative](https://orenlab.github.io/codeclone/guide/explanation/how-it-works/) · -[CFG semantics](https://orenlab.github.io/codeclone/book/04-cfg-semantics/) +The agent still writes the code. CodeClone makes the declared scope explicit before editing and exposes undeclared +expansion when the patch is verified. ## Structural Change Controller -The Controller governs AI-assisted edits before they become invisible diffs. Every stage is deterministic — -structural facts come from the canonical report, not from LLM inference. +The controller reduces the governed agent workflow to four steps: -| Stage | Surface | Purpose | -|------------------------------|---------------------------------------------|------------------------------------------------------------------------------| -| **Start controlled change** | `start_controlled_change` | Pre-edit: workspace check, declare scope, blast radius, patch budget | -| **Finish controlled change** | `finish_controlled_change` | Post-edit: scope check, verify, optional claims/receipt, clear intent | -| **Map blast radius** | `get_blast_radius` · `--blast-radius` | Reverse imports, clone cohorts, review context, do-not-touch boundaries | -| **Check patch contract** | `check_patch_contract` · `--patch-verify` | Pre-edit budget check and post-edit structural verification | -| **Validate claims** | `validate_review_claims` | Cross-check review text against the canonical report | -| **Generate receipt** | `create_review_receipt` | Auditable artifact: intent, scope, blast radius, patch outcome | +```text +analyze → start → edit → finish +``` -Intent execution is **session-local**; cross-agent visibility is optional, advisory, TTL/lease-bound, and stored as -ephemeral workspace coordination state under `.codeclone/intents/`. An optional audit trail records passive -controller events when enabled. CodeClone never mutates source files, baselines, generated reports, or analysis -cache through MCP — **read-only by contract**. +- **Start controlled change** — `start_controlled_change` checks workspace state, records intent, maps blast radius, + separates allowed paths from review context and do-not-touch boundaries, and returns the authoritative + `edit_allowed` permission. +- **Finish controlled change** — `finish_controlled_change` resolves the actual changed files once, checks scope, + verifies the patch against the canonical report, validates optional review claims, and produces a review receipt. +- **Patch Trail** — records declared, changed, untouched-in-declared, and boundary-held paths together with + verification and audit anchors. +- **Multi-agent coordination** — lease-bound intents, queues, recovery, and workspace hygiene make concurrent work + visible without treating advisory ownership as structural truth. -[Structural Change Controller docs](https://orenlab.github.io/codeclone/book/12-structural-change-controller/) +Host integrations can enforce the permission model before file edits where the host supports hooks. Regardless of +host enforcement, finish-time verification remains deterministic. -## What CodeClone reviews +[Structural Change Controller documentation](https://orenlab.github.io/codeclone/book/12-structural-change-controller/) -The canonical report the Controller acts on covers: +## One canonical report, every structural surface -| Category | What it covers | -|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------| -| **Clone detection** | Function clones via CFG fingerprints, block clones via statement windows, segment clones as report-only review context | -| **Structural findings** | Duplicated branch families, clone guard/exit divergence, clone-cohort drift | -| **Quality metrics** | Cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, adaptive depth profile, dead code, overall health score, overloaded-module profile | -| **Baseline governance** | Separates accepted legacy debt from new regressions — CI fails only on what got worse | -| **Coverage Join** | Fuses external Cobertura XML into the current run to surface untested hotspots and coverage scope gaps | -| **Adoption & API** | Type and docstring annotation coverage, public API surface inventory, baseline-aware API break detection | -| **Security Surfaces** | Report-only inventory of security-relevant capability boundaries — no vulnerability claims | -| **Design signals** | Overloaded modules and other report-only structural review context | +CodeClone runs one deterministic structural analysis and renders its canonical report through CLI, HTML, JSON, +Markdown, SARIF, MCP, IDE integrations, GitHub Action, and CI. There is no separate analysis engine for agents. -## AI agents and IDE clients +The report covers: -CodeClone ships an MCP control surface for AI agents and IDE clients, built on the same canonical pipeline as the -CLI. Canonical analysis is **read-only by contract** — MCP tools never mutate source, baselines, reports, or cache; -controller state is session-local or ephemeral workspace coordination. +- function clones through CFG fingerprints; +- block clones through statement windows and report-only segment clones; +- clone-cohort drift, duplicated branch families, and guard/exit divergence; +- cyclomatic complexity, coupling, cohesion, dependency cycles, and dead code; +- overloaded-module and other report-only design context; +- type and docstring adoption; +- public API inventory and baseline-aware API break detection; +- external Cobertura coverage joined with structural hotspots; +- report-only security capability boundaries without vulnerability claims; +- deterministic structural health and review priorities. ```bash -codeclone-mcp --transport stdio # local clients (IDE, agents) -codeclone-mcp --transport streamable-http # HTTP transport +codeclone . --json --html --md --sarif --text ``` -Tools are triage-first (analyze → triage → drill down → focused checks → change control → session), so the full -report never floods agent context. Stable `codeclone://latest/*` and `codeclone://runs/{run_id}/*` resources return -deterministic projections, and run identity is derived from the canonical report integrity digest. +[How CodeClone works](https://orenlab.github.io/codeclone/guide/explanation/how-it-works/) · +[Canonical report contract](https://orenlab.github.io/codeclone/book/05-report/) -> [!WARNING] -> Analysis tools require an absolute repository root; relative roots such as `.` are rejected. Keep `stdio` as the -> default transport for local clients — HTTP exposure beyond loopback requires explicit `--allow-remote`. +## Baseline-aware CI -[MCP usage guide](https://orenlab.github.io/codeclone/guide/mcp/) · -[MCP interface contract](https://orenlab.github.io/codeclone/book/25-mcp-interface/) · +CodeClone separates accepted legacy debt from new structural regressions. -### Engineering Memory +```bash +# Create and commit the project baseline once +codeclone . --update-baseline -A local SQLite store of evidence-linked repository facts — contract notes, decisions, risk hotspots, git provenance, -and governed drafts. After `start_controlled_change`, agents read ranked scope context via MCP. Promotion to durable -memory is **human-governed** — agent drafts never become truth automatically. The store auto-bootstraps from the -latest MCP run (`mcp_sync_policy=bootstrap_if_missing`); `codeclone memory init` remains for CI/offline. +# Gate future changes against that baseline +codeclone . --ci +``` -Audit-derived trajectories preserve workflow and Patch Trail evidence. Quality -passports, anomaly and agent dashboards, and deterministic Experiences make -recurring project patterns visible without promoting them to authority. +The baseline is a versioned, integrity-checked contract. CI can reject newly introduced clones and baseline-aware +metric, API, and coverage regressions without requiring the existing codebase to be clean first. Absolute threshold +gates remain opt-in. ```bash -codeclone memory init --root . -codeclone memory search "baseline schema" --match all -codeclone memory approve mem-… # human-only governance +codeclone . --fail-on-new-metrics +codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 +codeclone . --fail-cycles --fail-dead-code +codeclone . --coverage coverage.xml --fail-on-untested-hotspots +codeclone . --api-surface --fail-on-api-break ``` -[Engineering Memory docs](https://orenlab.github.io/codeclone/book/13-engineering-memory/) · -[Trajectories and Experiences](https://orenlab.github.io/codeclone/guide/memory/trajectories-and-experiences/) +[Metrics and quality gates](https://orenlab.github.io/codeclone/book/16-metrics-and-quality-gates/) · +[Baseline contract](https://orenlab.github.io/codeclone/book/07-baseline/) -### Platform Observability +## Engineering Memory -Opt-in local diagnostics trace CodeClone's own CLI, MCP, database, analysis, -and projection-worker costs. The observer is disabled by default, stores no raw -payload bodies, and never influences findings, gates, baselines, memory facts, -or edit authorization. +Engineering Memory gives agents durable, repository-specific context without treating model output as project truth. + +The local SQLite store contains typed, evidence-linked knowledge such as contracts, architecture decisions, risks, +test anchors, public surfaces, git provenance, and prior controlled changes. Scope-aware retrieval supports the +current change, while project-wide search can combine FTS5 with optional semantic retrieval. + +Audit-derived trajectories preserve how work actually unfolded. Trajectory passports, anomaly profiles, Patch Trail +evidence, and recurring advisory patterns called **Experiences** make previous successes and failures reusable. +Agent-created records remain drafts until a human approves them. ```bash -CODECLONE_OBSERVABILITY_ENABLED=1 codeclone . -codeclone observability trace --root . --html /tmp/codeclone-observer.html +codeclone memory init --root . +codeclone memory search "baseline schema" --match all +codeclone memory approve mem-12345678 ``` -[Platform Observability](https://orenlab.github.io/codeclone/book/26-platform-observability/) +Memory can guide an agent. It cannot authorize edits, override blast radius, change a gate, or replace canonical +report facts. -### Native agent and IDE clients - -| Surface | Install | Docs | -|---------------------------|------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------| -| **VS Code extension** | [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) | [Guide](https://orenlab.github.io/codeclone/guide/integrations/vscode/setup/) | -| **Claude Desktop bundle** | [`orenlab/codeclone-claude-desktop`](https://github.com/orenlab/codeclone-claude-desktop) | [Guide](https://orenlab.github.io/codeclone/guide/integrations/claude-desktop/setup/) | -| **Codex plugin** | [`orenlab/codeclone-codex`](https://github.com/orenlab/codeclone-codex) | [Guide](https://orenlab.github.io/codeclone/guide/integrations/codex/setup/) | -| **Cursor plugin** | [`orenlab/codeclone-cursor`](https://github.com/orenlab/codeclone-cursor) | [Guide](https://orenlab.github.io/codeclone/guide/integrations/cursor/install-and-skills/) | +[Engineering Memory documentation](https://orenlab.github.io/codeclone/book/13-engineering-memory/) · +[Trajectories and Experiences](https://orenlab.github.io/codeclone/guide/memory/trajectories-and-experiences/) -All clients connect to the same `codeclone-mcp` contract — no second analysis engine. +## AI agents and IDE integrations -## CI and quality gates +The MCP server is triage-first: analyze the repository, narrow the problem, inspect evidence, start a controlled +change, and finish with verification. Bounded tools and resources keep the full report out of agent context until +deeper evidence is requested. ```bash -# 1. Generate the baseline once, then commit it to your repo -codeclone . --update-baseline - -# 2. Enforce it on every push -codeclone . --ci +codeclone-mcp --transport stdio +codeclone-mcp --transport streamable-http ``` -`--ci` is equivalent to `--fail-on-new --no-color --quiet`, and enables `--fail-on-new-metrics` when a trusted -metrics baseline is present. The baseline becomes the contract CI enforces — separating accepted legacy debt from -real regressions. Exit codes: `0` success · `2` contract error · `3` gating failure · `5` internal -([policy](https://orenlab.github.io/codeclone/book/09-exit-codes/)). +Structural analysis tools do not mutate source files, baselines, generated reports, or analysis cache. Controller +and memory operations update only their explicit state stores. + +> [!WARNING] +> Analysis tools require an absolute repository root. Keep `stdio` as the default transport for local clients. +> Exposing HTTP beyond loopback requires explicit `--allow-remote`. -
    -Quality gate flags +| Surface | Install or source | Documentation | +|---|---|---| +| **VS Code extension** | [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) | [Setup](https://orenlab.github.io/codeclone/guide/integrations/vscode/setup/) | +| **Cursor plugin** | [Cursor storefront](https://github.com/orenlab/codeclone-cursor) | [Install](https://orenlab.github.io/codeclone/guide/integrations/cursor/install-and-skills/) | +| **Claude Code plugin** | [Claude Code marketplace](https://github.com/orenlab/codeclone-claude-code) | [Install](https://orenlab.github.io/codeclone/guide/integrations/claude-code/setup/) | +| **Codex plugin** | [Codex marketplace](https://github.com/orenlab/codeclone-codex) | [Install](https://orenlab.github.io/codeclone/guide/integrations/codex/setup/) | +| **Claude Desktop bundle** | [Bundle repository](https://github.com/orenlab/codeclone-claude-desktop) | [Setup](https://orenlab.github.io/codeclone/guide/integrations/claude-desktop/setup/) | -```bash -# Structural metric thresholds -codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 --fail-health 60 -codeclone . --fail-cycles --fail-dead-code +Every client uses the same `codeclone-mcp` interface and canonical structural facts. + +[MCP usage guide](https://orenlab.github.io/codeclone/guide/mcp/) · +[MCP interface contract](https://orenlab.github.io/codeclone/book/25-mcp-interface/) -# Baseline-aware regression detection -codeclone . --fail-on-new-metrics --fail-on-typing-regression --fail-on-docstring-regression +## Quick workflows -# Adoption, API, and coverage governance -codeclone . --min-typing-coverage 80 --api-surface --fail-on-api-break -codeclone . --coverage coverage.xml --fail-on-untested-hotspots --coverage-min 50 +Review only the current Git scope: + +```bash +codeclone . --changed-only --diff-against main +codeclone . --paths-from-git-diff HEAD~1 ``` -[Gate reference](https://orenlab.github.io/codeclone/book/16-metrics-and-quality-gates/) +Inspect structural blast radius and verify a patch: -
    +```bash +codeclone . --blast-radius codeclone/analysis/parser.py +codeclone . --patch-verify --diff-against HEAD~1 +``` -### GitHub Action +Use CodeClone in GitHub Actions: ```yaml - uses: orenlab/codeclone/.github/actions/codeclone@v2 @@ -291,49 +262,30 @@ codeclone . --coverage coverage.xml --fail-on-untested-hotspots --coverage-min 5 pr-comment: "true" ``` -The Action runs baseline-aware gating, generates JSON and SARIF reports, uploads SARIF to GitHub Code Scanning, and -posts or updates a PR summary comment. -[Action docs](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) +The Action can run baseline-aware gating, publish SARIF to GitHub Code Scanning, upload reports, and maintain a PR +summary comment. -### Pre-commit +[GitHub Action documentation](https://orenlab.github.io/codeclone/getting-started/#github-action) -```yaml -repos: - - repo: local - hooks: - - id: codeclone - name: CodeClone - entry: codeclone - language: system - pass_filenames: false - args: [ ".", "--ci" ] - types: [ python ] -``` +## Platform Observability -## Reports +Platform Observability is an opt-in diagnostics layer for developing CodeClone itself. It correlates CLI, MCP, +analysis, database, semantic-index, and projection-worker execution and exposes timings, RSS/CPU, query shapes, +payload pressure, causal worker chains, and costly no-ops. -All formats render from one canonical JSON payload — same facts, different audiences. - -| Format | Flag | Default path | -|----------|-----------|---------------------------------| -| HTML | `--html` | `.codeclone/report.html` | -| JSON | `--json` | `.codeclone/report.json` | -| Markdown | `--md` | `.codeclone/report.md` | -| SARIF | `--sarif` | `.codeclone/report.sarif` | -| Text | `--text` | `.codeclone/report.txt` | +It is disabled by default, stores no raw payload bodies, and cannot affect repository findings, gates, baselines, +memory facts, or edit authorization. ```bash -codeclone . --html --json --md --sarif --text +CODECLONE_OBSERVABILITY_ENABLED=1 codeclone . +codeclone observability trace --root . --html /tmp/codeclone-observer.html ``` -`--open-html-report` opens the HTML in the default browser; `--timestamped-report-paths` appends a UTC timestamp to -default filenames. The canonical JSON (`report_schema_version`, `meta`, `inventory`, `findings`, `metrics`, -`derived`, `integrity`) is documented in the [report contract](https://orenlab.github.io/codeclone/book/05-report/). +[Platform Observability documentation](https://orenlab.github.io/codeclone/book/26-platform-observability/) ## Configuration -CodeClone loads project configuration from `pyproject.toml` — precedence is -**CLI flags > `pyproject.toml` > built-in defaults**. +Project configuration lives in `pyproject.toml`: ```toml [tool.codeclone] @@ -346,23 +298,17 @@ block_min_loc = 20 block_min_stmt = 8 ``` -[Config reference](https://orenlab.github.io/codeclone/book/10-config-and-defaults/) · -[Inline suppressions](https://orenlab.github.io/codeclone/book/19-inline-suppressions/) · -[Baseline contract](https://orenlab.github.io/codeclone/book/07-baseline/) +Precedence is **CLI flags > `pyproject.toml` > built-in defaults**. + +[Configuration reference](https://orenlab.github.io/codeclone/book/10-config-and-defaults/) · +[Inline suppressions](https://orenlab.github.io/codeclone/book/19-inline-suppressions/) ## Documentation -Full docs and contract book: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) +The documentation site contains user guides, interface contracts, report and baseline schemas, configuration +reference, integration setup, and maintainer material: -[Baseline](https://orenlab.github.io/codeclone/book/07-baseline/) · -[Report](https://orenlab.github.io/codeclone/book/05-report/) · -[Metrics & gates](https://orenlab.github.io/codeclone/book/16-metrics-and-quality-gates/) · -[MCP guide](https://orenlab.github.io/codeclone/guide/mcp/) · -[Structural Change Controller](https://orenlab.github.io/codeclone/book/12-structural-change-controller/) · -[Engineering Memory](https://orenlab.github.io/codeclone/book/13-engineering-memory/) · -[Platform Observability](https://orenlab.github.io/codeclone/book/26-platform-observability/) · -[CLI](https://orenlab.github.io/codeclone/book/11-cli/) · -[Benchmarking](https://orenlab.github.io/codeclone/book/20-benchmarking/) +**[orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/)** ## License @@ -371,13 +317,13 @@ Full docs and contract book: [orenlab.github.io/codeclone](https://orenlab.githu ## Links -- **Docs:** +- **Documentation:** - **PyPI:** - **Issues:** - **Discussions:** - **Licenses:** [MPL-2.0](https://github.com/orenlab/codeclone/blob/main/LICENSE) - · [MIT docs](https://github.com/orenlab/codeclone/blob/main/LICENSE-MIT) - · [Scope map](https://github.com/orenlab/codeclone/blob/main/LICENSES.md) + · [MIT documentation license](https://github.com/orenlab/codeclone/blob/main/LICENSE-MIT) + · [License scope map](https://github.com/orenlab/codeclone/blob/main/LICENSES.md) diff --git a/docs/book/02-architecture-map.md b/docs/book/02-architecture-map.md index 8a8a3713..9ddd08e3 100644 --- a/docs/book/02-architecture-map.md +++ b/docs/book/02-architecture-map.md @@ -46,7 +46,7 @@ Main ownership layers: | Platform Observability | `codeclone/observability/*` | Opt-in operation/span telemetry, local SQLite store, bounded MCP slicer, and CLI JSON/HTML diagnostics; never analysis truth or a gate input | | Controller insights | `codeclone/controller_insights/*` | Shared session-stats and audit-trail payloads for CLI `--session-stats` / `--audit` and IDE-only MCP `get_workspace_session_stats` / `get_controller_audit_trail` | | Audit trail | `codeclone/audit/*` | Optional controller event and MCP payload footprint recording under `.codeclone/db/` when enabled | -| Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*`, `plugins/cursor-codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | +| Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*`, `plugins/cursor-codeclone/*`, `plugins/claude-code-codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | Refs: @@ -67,7 +67,8 @@ Refs: - MCP is read-only and must not create a second analysis truth path. Change control and claim guard are projections over stored run/report semantics, not new analyzers. -- VS Code, Claude Desktop, Codex plugin, and Cursor plugin surfaces are clients over MCP, not second analyzers. +- VS Code, Claude Desktop, Claude Code, Codex, and Cursor surfaces are clients + over MCP, not second analyzers. Refs: diff --git a/docs/book/10-config-and-defaults.md b/docs/book/10-config-and-defaults.md index d4976b5b..d1f6559b 100644 --- a/docs/book/10-config-and-defaults.md +++ b/docs/book/10-config-and-defaults.md @@ -434,9 +434,9 @@ Refs: `plugins/cursor-codeclone/hooks/_hook_io.py`, [integrations/cursor-plugin. ### IDE and MCP launcher passthrough -Set by VS Code, Claude Desktop, Codex/Cursor plugin launchers — not usually edited -in `pyproject.toml`. Launchers forward variables prefixed with `CODECLONE_` to the -child `codeclone-mcp` process. +Set by VS Code, Claude Desktop, Claude Code, Codex, and Cursor launchers — not +usually edited in `pyproject.toml`. Launchers forward variables prefixed with +`CODECLONE_` to the child `codeclone-mcp` process. | Variable | Values | Effect | |-----------------------------------|---------------------|-------------------------------------------------------------------------------------------------------------------------------| diff --git a/docs/book/README.md b/docs/book/README.md index b6a8268a..936e10c2 100644 --- a/docs/book/README.md +++ b/docs/book/README.md @@ -77,6 +77,15 @@ If a statement is not enforced by code/tests, it is explicitly marked as non-con - [25-mcp-interface/resources.md](25-mcp-interface/resources.md) — resource URIs - [25-mcp-interface/tools/platform-observability.md](25-mcp-interface/tools/platform-observability.md) — bounded diagnostics tool +### Integrations + +- [integrations/vs-code-extension.md](integrations/vs-code-extension.md) — VS Code extension contract +- [integrations/cursor-plugin.md](integrations/cursor-plugin.md) — Cursor plugin contract +- [integrations/claude-code-plugin.md](integrations/claude-code-plugin.md) — Claude Code plugin contract +- [integrations/codex-plugin.md](integrations/codex-plugin.md) — Codex plugin contract +- [integrations/claude-desktop-bundle.md](integrations/claude-desktop-bundle.md) — Claude Desktop bundle contract +- [integrations/sarif.md](integrations/sarif.md) — SARIF projection contract + ### Appendix - [appendix/a-status-enums.md](appendix/a-status-enums.md) — status enums and typed contracts diff --git a/docs/book/integrations/claude-code-plugin.md b/docs/book/integrations/claude-code-plugin.md new file mode 100644 index 00000000..3a332490 --- /dev/null +++ b/docs/book/integrations/claude-code-plugin.md @@ -0,0 +1,85 @@ + +# Claude Code Plugin + +## Distribution contract + +The monorepo source lives under `plugins/claude-code-codeclone/`. +`scripts/sync_integrations.py --target claude-code` publishes it into the +dedicated `orenlab/codeclone-claude-code` storefront. + +The distribution repository contains: + +| Path | Role | +|---|---| +| `.claude-plugin/marketplace.json` | Marketplace catalog named `orenlab-codeclone` | +| `plugins/codeclone/.claude-plugin/plugin.json` | Plugin identity and metadata | +| `plugins/codeclone/.mcp.json` | Local stdio MCP definition | +| `plugins/codeclone/skills/` | Review, hotspots, change control, and memory | +| `plugins/codeclone/scripts/launch_mcp.py` | Standalone workspace-first launcher | + +## Installation contract + +Public installation is the two-step marketplace flow: + +```bash +claude plugin marketplace add orenlab/codeclone-claude-code +claude plugin install codeclone@orenlab-codeclone +``` + +Local `--plugin-dir` loading is a development path, not the user installation +contract. + +## Runtime model + +```mermaid +flowchart TD + A["Marketplace catalog"] --> B["Installed CodeClone plugin"] + B --> C["Namespaced skills"] + B --> D[".mcp.json"] + D --> E["Workspace-first launcher"] + E --> F["Local codeclone-mcp"] + F --> G["Canonical analysis and change control"] +``` + +The plugin is additive. It provides four skills and the standard agent MCP +surface from the locally resolved `codeclone-mcp` version. It does not install +the Python package, filter tools, or create a second analysis model. + +The MCP configuration uses `${CLAUDE_PLUGIN_ROOT}` because Claude Code copies +installed plugins into a versioned cache. Storefront sync replaces the +monorepo delegate launcher with the full standalone implementation. + +The plugin manifest intentionally omits `version`. For a Git-based marketplace, +Claude Code can identify the installed revision by commit SHA; adding an +explicit version would require the distribution release process to bump it for +every plugin change or risk retaining a stale cache entry. + +## Read-only and state boundaries + +The server must not mutate source files, baselines, analysis cache, or canonical +reports. Controller coordination, audit, and Engineering Memory may write only +their documented bounded local state. + +## Separation from Claude Desktop + +Claude Code and Claude Desktop are different install surfaces: + +- Claude Code installs a marketplace plugin with skills and `.mcp.json`. +- Claude Desktop installs the local `.mcpb` bundle. + +Neither surface owns analysis semantics; both connect to `codeclone-mcp`. + +## Current limits + +- `codeclone[mcp]` must already be available in the workspace environment or on + `PATH`. +- Duplicate manual MCP registration can expose the same server twice; keep one + active setup path. +- Plugin skills are namespaced as `/codeclone:`. + +## Further reading + +- [Claude Code setup](../../guide/integrations/claude-code/setup.md) +- [MCP usage guide](../../guide/mcp/README.md) +- [MCP interface contract](../25-mcp-interface/index.md) +- [Claude Desktop bundle](claude-desktop-bundle.md) diff --git a/docs/book/integrations/claude-desktop-bundle.md b/docs/book/integrations/claude-desktop-bundle.md index f1285673..46d45d0b 100644 --- a/docs/book/integrations/claude-desktop-bundle.md +++ b/docs/book/integrations/claude-desktop-bundle.md @@ -1,6 +1,9 @@ # Claude Desktop Bundle +This contract covers the Claude Desktop `.mcpb` package. Claude Code uses the +separate [Claude Code plugin](claude-code-plugin.md) and marketplace workflow. + ## Bundle workflow 1. Build: `cd extensions/claude-desktop-codeclone && node scripts/build-mcpb.mjs` @@ -79,7 +82,8 @@ See [Privacy Policy](../../privacy-policy.md). - CLI remains the scripting and CI surface. - MCP remains the read-only agent/client contract. -- Claude Code can still register `codeclone-mcp` directly through `mcp add`. +- Claude Code installs the dedicated marketplace plugin; direct `mcp add` + remains a manual fallback. - The Claude Desktop bundle is the installable local package layer for users who want a native Claude Desktop setup path. diff --git a/docs/book/integrations/codex-plugin.md b/docs/book/integrations/codex-plugin.md index 74219ddc..cc9d4a88 100644 --- a/docs/book/integrations/codex-plugin.md +++ b/docs/book/integrations/codex-plugin.md @@ -30,6 +30,13 @@ tools are VS Code only). development and packaging into `orenlab/codeclone-codex`; it is not the public install path. +Public installation is: + +```bash +codex plugin marketplace add orenlab/codeclone-codex +codex plugin add codeclone@orenlab-codeclone +``` + ## Read-only contract diff --git a/docs/book/integrations/cursor-plugin.md b/docs/book/integrations/cursor-plugin.md index 2b81bc4e..63bb7dcd 100644 --- a/docs/book/integrations/cursor-plugin.md +++ b/docs/book/integrations/cursor-plugin.md @@ -2,6 +2,17 @@ # Cursor Plugin +## Installation contract + +The public source is +`https://github.com/orenlab/codeclone-cursor`. Users install CodeClone from +Cursor's marketplace panel. Team administrators expose the storefront through +**Dashboard → Settings → Plugins → Team Marketplaces → Add Marketplace → +Import from Repo**. + +`~/.cursor/plugins/local` symlinks are development-only and must not be +presented as the normal installation path. + ## Rules All three ship under `plugins/cursor-codeclone/rules/`: diff --git a/docs/getting-started.md b/docs/getting-started.md index f11bcb14..7473f9f7 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -185,26 +185,40 @@ codeclone-mcp --transport streamable-http # remote / HTTP clients See [Claude Desktop guide](guide/integrations/claude-desktop/setup.md). +=== "Claude Code" + + ```bash + claude plugin marketplace add orenlab/codeclone-claude-code + claude plugin install codeclone@orenlab-codeclone + ``` + + The marketplace repository is + [orenlab/codeclone-claude-code](https://github.com/orenlab/codeclone-claude-code). + + See [Claude Code plugin guide](guide/integrations/claude-code/setup.md). + === "Codex" ```bash - marketplace add orenlab/codeclone-codex + codex plugin marketplace add orenlab/codeclone-codex + codex plugin add codeclone@orenlab-codeclone ``` - The source plugin lives in - [`plugins/codeclone/`](https://github.com/orenlab/codeclone/tree/main/plugins/codeclone); - the marketplace distribution is `orenlab/codeclone-codex`. + The marketplace repository is + [orenlab/codeclone-codex](https://github.com/orenlab/codeclone-codex). See [Codex plugin guide](guide/integrations/codex/setup.md). === "Cursor" - Install from the monorepo path - [`plugins/cursor-codeclone/`](https://github.com/orenlab/codeclone/tree/main/plugins/cursor-codeclone) - (symlink into `.cursor/` or use Cursor local plugin discovery). + In Cursor, open **Dashboard → Settings → Plugins → Team Marketplaces**, + choose **Add Marketplace → Import from Repo**, and enter: + + ```text + https://github.com/orenlab/codeclone-cursor + ``` - The Cursor plugin is **not** listed in `.agents/plugins/marketplace.json`; - that file is Codex-only for local monorepo development. + Then install **CodeClone** from the imported marketplace. See [Cursor plugin guide](guide/integrations/cursor/install-and-skills.md). diff --git a/docs/guide/README.md b/docs/guide/README.md index c883fef3..96ca83a4 100644 --- a/docs/guide/README.md +++ b/docs/guide/README.md @@ -7,7 +7,7 @@ enums, payload semantics), use the [Contracts book](../book/README.md). !!! abstract "Who is this for?" - **Developers** — install, CI, first analysis run - **Agent authors** — MCP workflows, change control, memory recipes - - **IDE users** — VS Code, Cursor, Codex, Claude Desktop setup + - **IDE and agent users** — VS Code, Cursor, Claude Code, Codex, Claude Desktop setup ## Start here @@ -37,6 +37,7 @@ enums, payload semantics), use the [Contracts book](../book/README.md). |--------|-------------|----------| | VS Code | [Setup](integrations/vscode/setup.md) | [Contract](../book/integrations/vs-code-extension.md) | | Cursor | [Install & skills](integrations/cursor/install-and-skills.md) | [Contract](../book/integrations/cursor-plugin.md) | -| Codex | [Setup](integrations/codex/setup.md) | [Contract](../book/integrations/codex-plugin.md) | +| Claude Code | [Install](integrations/claude-code/setup.md) | [Contract](../book/integrations/claude-code-plugin.md) | +| Codex | [Install](integrations/codex/setup.md) | [Contract](../book/integrations/codex-plugin.md) | | Claude Desktop | [Setup](integrations/claude-desktop/setup.md) | [Contract](../book/integrations/claude-desktop-bundle.md) | | SARIF export | [Export](integrations/sarif/export.md) | [Contract](../book/integrations/sarif.md) | diff --git a/docs/guide/integrations/claude-code/setup.md b/docs/guide/integrations/claude-code/setup.md new file mode 100644 index 00000000..9e3ea815 --- /dev/null +++ b/docs/guide/integrations/claude-code/setup.md @@ -0,0 +1,104 @@ + +# Claude Code setup + +CodeClone ships a native Claude Code plugin through the public +[orenlab/codeclone-claude-code](https://github.com/orenlab/codeclone-claude-code) +marketplace repository. + +This is distinct from the +[Claude Desktop `.mcpb` bundle](../claude-desktop/setup.md). Claude Code loads +skills and the MCP definition; Claude Desktop installs an extension bundle. + +## Prerequisites + +- Claude Code with plugin support +- Python 3.10+ +- a local `codeclone-mcp` installation + +## Install from the marketplace + +Add the marketplace and install the plugin: + +```bash +claude plugin marketplace add orenlab/codeclone-claude-code +claude plugin install codeclone@orenlab-codeclone +``` + +The equivalent interactive commands are: + +```text +/plugin marketplace add orenlab/codeclone-claude-code +/plugin install codeclone@orenlab-codeclone +``` + +Verify: + +```bash +claude plugin marketplace list +claude plugin list +``` + +## Install the MCP launcher + +Global tool installation: + +```bash +uv tool install "codeclone[mcp]" +codeclone-mcp --help +``` + +Workspace-local installation: + +```bash +uv venv +uv pip install --python .venv/bin/python "codeclone[mcp]" +.venv/bin/codeclone-mcp --help +``` + +The plugin launcher resolves a workspace `.venv`, then the current Poetry +environment, then `codeclone-mcp` from `PATH`. + +## Runtime path + +```mermaid +flowchart LR + A["Claude Code"] --> B["CodeClone plugin"] + B --> C["Plugin skills"] + B --> D["Local stdio launcher"] + D --> E["codeclone-mcp"] + E --> F["Canonical report and controller"] +``` + +The plugin does not bundle Python or a second analyzer. It supplies guidance and +a local MCP definition over the same canonical CodeClone server. + +## Skills + +Claude Code namespaces installed plugin skills: + +| Task | Invocation | +|---|---| +| Repository review | `/codeclone:codeclone-review` | +| Hotspot snapshot | `/codeclone:codeclone-hotspots` | +| Controlled edit | `/codeclone:codeclone-change-control` | +| Engineering Memory | `/codeclone:codeclone-engineering-memory` | + +## Update or remove + +```bash +claude plugin marketplace update orenlab-codeclone +claude plugin update codeclone@orenlab-codeclone +claude plugin uninstall codeclone@orenlab-codeclone +``` + +## Local development + +Marketplace installation is the public path. For plugin development only: + +```bash +claude --plugin-dir plugins/claude-code-codeclone +claude plugin validate plugins/claude-code-codeclone +``` + +Contract reference: +[Claude Code plugin](../../../book/integrations/claude-code-plugin.md). diff --git a/docs/guide/integrations/claude-desktop/setup.md b/docs/guide/integrations/claude-desktop/setup.md index 04dd61fe..813df556 100644 --- a/docs/guide/integrations/claude-desktop/setup.md +++ b/docs/guide/integrations/claude-desktop/setup.md @@ -4,6 +4,10 @@ Local `.mcpb` bundle that launches `codeclone-mcp` over stdio. Same canonical MCP surface as CLI, VS Code, Codex, and Cursor — no second analyzer or truth path. +For the terminal agent, use the separate +[Claude Code marketplace plugin](../claude-code/setup.md). The `.mcpb` described +here is only for Claude Desktop. + ## Prerequisites - Claude Desktop with extension support diff --git a/docs/guide/integrations/codex/setup.md b/docs/guide/integrations/codex/setup.md index 325bf6ce..91a737e6 100644 --- a/docs/guide/integrations/codex/setup.md +++ b/docs/guide/integrations/codex/setup.md @@ -4,7 +4,19 @@ Install the plugin from the Codex marketplace: ```bash -marketplace add orenlab/codeclone-codex +codex plugin marketplace add orenlab/codeclone-codex +codex plugin add codeclone@orenlab-codeclone +``` + +The first command registers the public marketplace repository. The second +installs the `codeclone` plugin from the marketplace named +`orenlab-codeclone`. + +Verify the configured marketplace and installed plugin: + +```bash +codex plugin marketplace list +codex plugin list ``` The plugin manifest version tracks the CodeClone package release line (currently diff --git a/docs/guide/integrations/cursor/install-and-skills.md b/docs/guide/integrations/cursor/install-and-skills.md index e5d2b927..04d69b60 100644 --- a/docs/guide/integrations/cursor/install-and-skills.md +++ b/docs/guide/integrations/cursor/install-and-skills.md @@ -33,29 +33,41 @@ skills. ## Install -Install `codeclone[mcp]` so `launch_mcp.py` can resolve `codeclone-mcp`: +### Install from the Cursor marketplace + +The public storefront is +[orenlab/codeclone-cursor](https://github.com/orenlab/codeclone-cursor). + +If CodeClone is already listed in your marketplace panel, select **CodeClone**, +choose user or project scope, and install it. + +To expose the repository as a team marketplace: + +1. Open **Cursor Dashboard → Settings → Plugins**. +2. Under **Team Marketplaces**, select **Add Marketplace**. +3. Select **Import from Repo** and enter + `https://github.com/orenlab/codeclone-cursor`. +4. Add CodeClone, configure team access, and save. +5. Install CodeClone from Cursor's marketplace panel. + +Install `codeclone[mcp]` separately so the bundled launcher can resolve +`codeclone-mcp`: ```bash uv tool install "codeclone[mcp]" codeclone-mcp --help ``` -### Recommended: Cursor plugin discovery +### Local development only -Register the plugin directory (loads manifest, skills, rules, hooks, and -`mcp.json` together): +Use a local symlink only while developing the plugin: ```bash ln -sfn /path/to/codeclone/plugins/cursor-codeclone ~/.cursor/plugins/local/codeclone ``` -Reload Cursor. Enable the plugin for trusted workspaces. - -### Optional: manual `.cursor/` symlinks - -Only if you are not using plugin discovery — symlink skills, rules, agent, and -MCP separately (see monorepo comments in older guides). Prefer plugin discovery -so all three rules and hook manifest stay bundled. +Reload Cursor after changing the local source. Do not present this path to +normal users as the installation flow. ### Project hooks (Hooks UI) @@ -73,9 +85,10 @@ Writes: Do **not** commit generated files (machine-local Python paths). This monorepo ignores `/.cursor/` in `.gitignore`. -!!! note "Marketplace" - Not listed in `.agents/plugins/marketplace.json` (Codex-only). Install from - `plugins/cursor-codeclone/` via Cursor local plugin discovery or symlinks. +!!! note "Marketplace catalogs" + `.agents/plugins/marketplace.json` belongs to Codex. Cursor installs this + plugin from the `orenlab/codeclone-cursor` storefront through Cursor's own + marketplace UI. ## Skills @@ -142,8 +155,8 @@ does not treat report-only signals as CI failures or vulnerability claims. ## Distribution - **Monorepo source:** `plugins/cursor-codeclone/` -- **Marketplace:** not in `.agents/plugins/marketplace.json` (Codex-only entry) -- **Install:** Cursor local plugin discovery (recommended) or `.cursor/` symlinks +- **Marketplace source:** `https://github.com/orenlab/codeclone-cursor` +- **Install:** Cursor marketplace panel; local symlink only for development - **Standalone releases:** ship full `plugins/codeclone/scripts/launch_mcp.py` body diff --git a/docs/guide/mcp/client-setup.md b/docs/guide/mcp/client-setup.md index 83670d0c..61c7d983 100644 --- a/docs/guide/mcp/client-setup.md +++ b/docs/guide/mcp/client-setup.md @@ -8,15 +8,24 @@ All clients use the same server. Only the registration format differs. === "Claude Code" ```bash - claude mcp add codeclone -- codeclone-mcp --transport stdio + claude plugin marketplace add orenlab/codeclone-claude-code + claude plugin install codeclone@orenlab-codeclone ``` - Use `--scope project` to store config in `.mcp.json` for the repository. + The native plugin supplies the MCP definition and CodeClone skills. See the + [Claude Code plugin guide](../integrations/claude-code/setup.md). + + Manual MCP registration without the plugin remains available: + + ```bash + claude mcp add --scope project codeclone -- codeclone-mcp --transport stdio + ``` === "Codex" ```bash - marketplace add orenlab/codeclone-codex + codex plugin marketplace add orenlab/codeclone-codex + codex plugin add codeclone@orenlab-codeclone ``` The native plugin includes the MCP definition and CodeClone skills. @@ -30,22 +39,15 @@ All clients use the same server. Only the registration format differs. === "Cursor" - Add to `.cursor/mcp.json`: - - ```json - { - "mcpServers": { - "codeclone": { - "command": "codeclone-mcp", - "args": ["--transport", "stdio"] - } - } - } - ``` + For the complete integration, import + `https://github.com/orenlab/codeclone-cursor` through + **Dashboard → Settings → Plugins → Team Marketplaces → Add Marketplace → + Import from Repo**, then install **CodeClone**. - For intent-first edits with IDE enforcement, use the bundled - [Cursor plugin](../integrations/cursor/install-and-skills.md): install project hooks so `preToolUse` - reads the same workspace intent registry (file or SQLite) as MCP. + The bundled [Cursor plugin](../integrations/cursor/install-and-skills.md) + includes MCP registration, skills, rules, and project hooks. Manual + `.cursor/mcp.json` registration is covered under generic setup below, but + does not install the rest of that surface. === "Claude Desktop" diff --git a/docs/index.md b/docs/index.md index d4dd7c3d..5dc0128e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -61,9 +61,10 @@ patch against the declared boundary, and generates an auditable review receipt. | Surface | Guide (how to) | Contract (guarantees) | |-----------------------|----------------|------------------------| | VS Code extension | [Setup](guide/integrations/vscode/setup.md) | [VS Code contract](book/integrations/vs-code-extension.md) | -| Claude Desktop bundle | [Setup](guide/integrations/claude-desktop/setup.md) | [Claude Desktop contract](book/integrations/claude-desktop-bundle.md) | -| Codex plugin | [Setup](guide/integrations/codex/setup.md) | [Codex contract](book/integrations/codex-plugin.md) | | Cursor plugin | [Install & skills](guide/integrations/cursor/install-and-skills.md) | [Cursor contract](book/integrations/cursor-plugin.md) | +| Claude Code plugin | [Install](guide/integrations/claude-code/setup.md) | [Claude Code contract](book/integrations/claude-code-plugin.md) | +| Codex plugin | [Install](guide/integrations/codex/setup.md) | [Codex contract](book/integrations/codex-plugin.md) | +| Claude Desktop bundle | [Setup](guide/integrations/claude-desktop/setup.md) | [Claude Desktop contract](book/integrations/claude-desktop-bundle.md) | | SARIF & code scanning | [Export](guide/integrations/sarif/export.md) | [SARIF contract](book/integrations/sarif.md) | ## Reports diff --git a/docs/releasing.md b/docs/releasing.md index 6ca48745..eb88e489 100644 --- a/docs/releasing.md +++ b/docs/releasing.md @@ -1,9 +1,10 @@ + # Releasing & Storefront Sync ## Integration distribution repos (storefronts) @@ -13,12 +14,13 @@ repositories** under a shared parent directory. The sync driver is `scripts/sync_integrations.py`; contract tests live in `tests/test_sync_integrations.py`. -| CLI `--target` | Distribution directory | GitHub / marketplace | Monorepo source paths | -|----------------|------------------------|----------------------|------------------------| -| `codex` | `codeclone-codex/` | `orenlab/codeclone-codex` | `plugins/codeclone/` + overlays under `scripts/integration_dist/` (root `README.md`, `.gitignore`, public `marketplace.json`) | -| `cursor` | `codeclone-cursor/` | Cursor plugin publish flow | `plugins/cursor-codeclone/` + `plugins/codeclone/scripts/launch_mcp.py` → `scripts/launch_mcp.py` + `gitignore.cursor` | -| `vscode` | `codeclone-vscode/` | VS Code Marketplace | `extensions/vscode-codeclone/` (flat) + `gitignore.vscode` | -| `claude-desktop` | `codeclone-claude-desktop/` | Claude Desktop `.mcpb` bundle | `extensions/claude-desktop-codeclone/` (flat) + `gitignore.claude-desktop` | +| CLI `--target` | Distribution directory | GitHub / marketplace | Monorepo source paths | +|------------------|-----------------------------|---------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `codex` | `codeclone-codex/` | `orenlab/codeclone-codex` | `plugins/codeclone/` + overlays under `scripts/integration_dist/` (root `README.md`, `.gitignore`, public `marketplace.json`) | +| `claude-code` | `codeclone-claude-code/` | `orenlab/codeclone-claude-code` | `plugins/claude-code-codeclone/` → `plugins/codeclone/` + shared standalone launcher + root `README.md`, `.gitignore`, and public `marketplace.json` overlays | +| `cursor` | `codeclone-cursor/` | `orenlab/codeclone-cursor` | `plugins/cursor-codeclone/` + `plugins/codeclone/scripts/launch_mcp.py` → `scripts/launch_mcp.py` + `gitignore.cursor` | +| `vscode` | `codeclone-vscode/` | VS Code Marketplace | `extensions/vscode-codeclone/` (flat) + `gitignore.vscode` | +| `claude-desktop` | `codeclone-claude-desktop/` | Claude Desktop `.mcpb` bundle | `extensions/claude-desktop-codeclone/` (flat) + `gitignore.claude-desktop` | Each target must be a **git repository** named exactly `codeclone-{target}` (for example `codeclone-cursor`). The script refuses wrong directory names or @@ -27,8 +29,8 @@ non-git targets. ### What sync copies (and what it does not) **Copied:** plugin/extension trees listed above, **distribution overlays** from -`scripts/integration_dist/` (per-target `.gitignore`; Codex-only root `README.md` -and public `.agents/plugins/marketplace.json`), plus generated +`scripts/integration_dist/` (per-target `.gitignore`; Codex and Claude Code root +`README.md` plus their public marketplace manifests), plus generated `SYNC_MANIFEST.json` at the distribution repo root (commit, package version from `pyproject.toml`, file counts, UTC timestamp). @@ -42,14 +44,21 @@ plugin README to the repo root. `scripts/integration_dist/marketplace.codex.json` (`orenlab-codeclone` / `displayName: CodeClone`). +**Claude Code marketplace rule:** the public `codeclone-claude-code` repo gets +`.claude-plugin/marketplace.json` from +`scripts/integration_dist/marketplace.claude-code.json`. The distributable +plugin stays nested under `plugins/codeclone/`, while its root README comes from +`scripts/integration_dist/README.claude-code.root.md`. + **Not copied:** the Python package (`codeclone/`), baselines, analysis cache, canonical reports, monorepo `.cursor/rules` (developer-only; Cursor users get `plugins/cursor-codeclone/rules/`), or arbitrary files already present in a distribution repo (for example `.github/workflows/`, extra CI-only files). -**Flat targets (Cursor, VS Code, Claude):** product `README.md` still comes from -the synced extension/plugin tree at the distribution repo root (same file as in -the monorepo). Only Codex needs a second, distribution-specific root README. +**Flat targets (Cursor, VS Code, Claude Desktop):** product `README.md` still +comes from the synced extension/plugin tree at the distribution repo root (same +file as in the monorepo). Codex and Claude Code use separate, +distribution-specific root READMEs. **Denied globally during copy:** `.git`, `__pycache__`, `*.pyc`, `node_modules`, `dist/`, `build/`, `.coverage`. VS Code sync also skips `node_modules/**` and @@ -57,8 +66,9 @@ the monorepo). Only Codex needs a second, distribution-specific root README. ### Layout models -- **Nested (Codex):** `plugins/codeclone/` stays under `plugins/codeclone/` in - `codeclone-codex`. Stale files inside that subtree are removed before copy. +- **Nested (Codex and Claude Code):** `plugins/codeclone/` stays under + `plugins/codeclone/` in the distribution repository. Stale files inside that + subtree are removed before copy. - **Flat (Cursor, VS Code, Claude Desktop):** extension/plugin files land at the distribution repo root. Sync deletes only **top-level names that still exist** in the current source tree, then recopies. If you **remove an entire top-level @@ -66,7 +76,7 @@ the monorepo). Only Codex needs a second, distribution-specific root README. the distribution repo — remove it manually or restore a stub directory before syncing. -### Cursor launcher override +### Standalone launcher overrides `plugins/cursor-codeclone/scripts/launch_mcp.py` in the monorepo is a thin `runpy` delegate to the shared Codex launcher. Distribution **`codeclone-cursor`** @@ -75,6 +85,11 @@ must ship the **full** `plugins/codeclone/scripts/launch_mcp.py` body so applies a second copy pair for that file after the plugin tree (see `test_cursor_sync_ships_standalone_launcher`). +The Claude Code source plugin uses the same monorepo delegation pattern. +Distribution **`codeclone-claude-code`** therefore replaces +`plugins/codeclone/scripts/launch_mcp.py` with the same full standalone +implementation (see `test_claude_code_sync_ships_standalone_launcher`). + ## Sync workflow (maintainers) Run from the **monorepo root** (`codeclone/`), with sibling repos checked out @@ -93,7 +108,7 @@ next to it (default `--base-dir ..`) or pass an absolute parent path. uv run python scripts/sync_integrations.py --target codex --base-dir .. ``` -=== "Sync all four storefronts" +=== "Sync all five storefronts" ```bash title="Update every distribution repo" uv run python scripts/sync_integrations.py --all --base-dir .. @@ -119,21 +134,27 @@ Use this after `--all` or a single `--target` before tagging a plugin release: 1. **`SYNC_MANIFEST.json`** — `target` matches repo; `codeclone_version` matches monorepo `pyproject.toml`; `source_dirty` is `false` for release builds; `files_copied` is stable for the same source tree. -2. **`.gitignore` (all four)** — present at distribution repo root; includes +2. **`.gitignore` (all five)** — present at distribution repo root; includes `.idea/`, `.DS_Store`; VS Code copy also lists `node_modules/`, `*.vsix`, `out/`. 3. **Codex (`codeclone-codex`)** — root `README.md` is the distribution stub (not a duplicate of `plugins/codeclone/README.md`); `plugins/codeclone/skills/` has four skills; `plugins/codeclone/.mcp.json` and `scripts/launch_mcp.py` present; `.agents/plugins/marketplace.json` has `name: orenlab-codeclone`. -4. **Cursor (`codeclone-cursor`)** — six skills including `production-triage/` and +4. **Claude Code (`codeclone-claude-code`)** — root `README.md` documents the + two-step marketplace install; `.claude-plugin/marketplace.json` has + `name: orenlab-codeclone`; `plugins/codeclone/.claude-plugin/plugin.json`, + `.mcp.json`, four skills, and the standalone launcher are present. The plugin + manifest omits `version` intentionally so Git commit identity drives cache + updates. +5. **Cursor (`codeclone-cursor`)** — six skills including `production-triage/` and `blast-radius/`; three rules under `rules/` (including `change-control-gate.mdc`); `scripts/launch_mcp.py` contains `resolve_launch_target` and **not** `runpy`; `mcp.json` still points at `./scripts/launch_mcp.py`. -5. **VS Code (`codeclone-vscode`)** — `package.json` and `src/` at repo root (no +6. **VS Code (`codeclone-vscode`)** — `package.json` and `src/` at repo root (no `extensions/` mirror path); `codeclone.memory.searchSemantic` and related memory search settings present when the monorepo extension ships them. -6. **Claude Desktop (`codeclone-claude-desktop`)** — `manifest.json`, `server/index.js`, +7. **Claude Desktop (`codeclone-claude-desktop`)** — `manifest.json`, `server/index.js`, `src/launcher.js` at repo root; bundle build smoke: `node extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs` in monorepo or the equivalent script path in the distribution repo after sync. @@ -142,8 +163,8 @@ Automated regression: `uv run pytest -q tests/test_sync_integrations.py`. Byte-for-byte parity: for each synced file, the distribution copy should match the monorepo source file that sync last wrote for that destination (remember -Cursor `scripts/launch_mcp.py` comes from `plugins/codeclone/scripts/`, not from -the monorepo delegate stub). +Cursor and Claude Code standalone launchers come from +`plugins/codeclone/scripts/`, not from their monorepo delegate stubs). ## When to update this page diff --git a/docs/terms-of-use.md b/docs/terms-of-use.md index 48a7deeb..cecf4c7e 100644 --- a/docs/terms-of-use.md +++ b/docs/terms-of-use.md @@ -37,7 +37,8 @@ Integrations: CodeClone integrations do not modify or replace the security, account, privacy, or usage policies of third-party host applications such as -Claude Desktop, Codex, VS Code, Anthropic services, or OpenAI services. +Claude Desktop, Claude Code, Codex, Cursor, VS Code, Anthropic services, or +OpenAI services. Those platforms remain governed by their own applicable terms and policies. diff --git a/plugins/claude-code-codeclone/.claude-plugin/plugin.json b/plugins/claude-code-codeclone/.claude-plugin/plugin.json new file mode 100644 index 00000000..69baf1b1 --- /dev/null +++ b/plugins/claude-code-codeclone/.claude-plugin/plugin.json @@ -0,0 +1,20 @@ +{ + "name": "codeclone", + "description": "Structural Change Controller for AI-assisted Python development over local codeclone-mcp.", + "author": { + "name": "Den Rozhnovskiy", + "email": "pytelemonbot@mail.ru", + "url": "https://github.com/orenlab" + }, + "homepage": "https://orenlab.github.io/codeclone/guide/integrations/claude-code/setup/", + "repository": "https://github.com/orenlab/codeclone-claude-code", + "license": "MPL-2.0", + "keywords": [ + "codeclone", + "claude-code", + "mcp", + "structural-review", + "engineering-memory", + "change-control" + ] +} diff --git a/plugins/claude-code-codeclone/.mcp.json b/plugins/claude-code-codeclone/.mcp.json new file mode 100644 index 00000000..27152fb5 --- /dev/null +++ b/plugins/claude-code-codeclone/.mcp.json @@ -0,0 +1,10 @@ +{ + "mcpServers": { + "codeclone": { + "command": "python3", + "args": [ + "${CLAUDE_PLUGIN_ROOT}/scripts/launch_mcp.py" + ] + } + } +} diff --git a/plugins/claude-code-codeclone/README.md b/plugins/claude-code-codeclone/README.md new file mode 100644 index 00000000..3eea3a7b --- /dev/null +++ b/plugins/claude-code-codeclone/README.md @@ -0,0 +1,75 @@ +# CodeClone for Claude Code + +Native Claude Code plugin for the CodeClone **Structural Change Controller**. +It connects Claude Code to the same local `codeclone-mcp` server used by the +CLI, Codex, Cursor, VS Code, and Claude Desktop. + +## Install + +Add the public marketplace and install CodeClone: + +```bash +claude plugin marketplace add orenlab/codeclone-claude-code +claude plugin install codeclone@orenlab-codeclone +``` + +Inside an interactive Claude Code session, the equivalent commands are: + +```text +/plugin marketplace add orenlab/codeclone-claude-code +/plugin install codeclone@orenlab-codeclone +``` + +Install the local MCP server separately: + +```bash +uv tool install "codeclone[mcp]" +codeclone-mcp --help +``` + +For a workspace-local environment: + +```bash +uv venv +uv pip install --python .venv/bin/python "codeclone[mcp]" +.venv/bin/codeclone-mcp --help +``` + +The launcher prefers a workspace `.venv`, then the current Poetry environment, +then `codeclone-mcp` from `PATH`. It uses local stdio and does not rewrite +Claude Code settings. + +## Skills + +Claude Code namespaces plugin skills with the plugin name: + +| Skill | Invocation | +|---|---| +| Repository review | `/codeclone:codeclone-review` | +| Hotspot snapshot | `/codeclone:codeclone-hotspots` | +| Controlled repository edit | `/codeclone:codeclone-change-control` | +| Engineering Memory | `/codeclone:codeclone-engineering-memory` | + +The MCP server remains read-only with respect to source, baselines, cache, and +canonical reports. Change control, audit, and Engineering Memory write only +their documented bounded local state. + +## Development + +Load the monorepo source directly: + +```bash +claude --plugin-dir plugins/claude-code-codeclone +``` + +Validate the plugin: + +```bash +claude plugin validate plugins/claude-code-codeclone +``` + +## Documentation + +- [Claude Code setup](https://orenlab.github.io/codeclone/guide/integrations/claude-code/setup/) +- [MCP usage guide](https://orenlab.github.io/codeclone/guide/mcp/) +- [Structural Change Controller](https://orenlab.github.io/codeclone/book/12-structural-change-controller/) diff --git a/plugins/claude-code-codeclone/scripts/launch_mcp.py b/plugins/claude-code-codeclone/scripts/launch_mcp.py new file mode 100644 index 00000000..8d172b70 --- /dev/null +++ b/plugins/claude-code-codeclone/scripts/launch_mcp.py @@ -0,0 +1,22 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +"""Claude Code plugin MCP launcher entrypoint. + +The monorepo delegates to the shared launcher. Storefront synchronization +replaces this file with the full standalone launcher implementation. +""" + +from __future__ import annotations + +import runpy +from pathlib import Path + +SHARED_LAUNCHER = ( + Path(__file__).resolve().parents[2] / "codeclone" / "scripts" / "launch_mcp.py" +) + +if __name__ == "__main__": + runpy.run_path(str(SHARED_LAUNCHER), run_name="__main__") diff --git a/plugins/claude-code-codeclone/skills/codeclone-change-control/SKILL.md b/plugins/claude-code-codeclone/skills/codeclone-change-control/SKILL.md new file mode 100644 index 00000000..645aa7ed --- /dev/null +++ b/plugins/claude-code-codeclone/skills/codeclone-change-control/SKILL.md @@ -0,0 +1,249 @@ +--- +name: codeclone-change-control +description: MANDATORY HARD GATE before ANY repository file write when CodeClone MCP is connected — read on every implement/fix/refactor task; start_controlled_change before first edit; finish_controlled_change before claiming done; see always-on rule change-control-gate. +--- + +# CodeClone Change Control + +Edit pipeline for the **target Python repository** (source, `tests/`, docs, config). +CodeClone MCP available → follow this pipeline. Coverage/CI/docs labels do **not** +skip intent. Use `dirty_scope_policy="continue_own_wip"` only to resume known +uncommitted WIP in declared scope when start would otherwise block on dirty scope +alone — finish still proves scope via `changed_files` or `diff_ref`. + +**Skip pipeline** only when: no files will change; analysis-only; MCP unavailable +(edits → BLOCKED). Not for read-only review (`codeclone-review`) or snapshots +(`codeclone-hotspots`). + +Findings are source of truth — do not reinterpret. No CLI/local-report fallback. +Never mutate baseline, cache, canonical reports, or generated state; never +auto-suppress. Pass absolute `root` to analysis tools. + +## Tool tiers + +| Tier | Tools | Role | +|--------------------|-----------------------------------------------------------------------------------------------|----------------------------------| +| **Normal** | `analyze_repository`, `start_controlled_change`, `finish_controlled_change` | Every edit cycle — use these | +| **Queue/recovery** | `manage_change_intent` (promote, recover, renew, reset) | Multi-agent wait, crash recovery | +| **Advanced** | `get_blast_radius`, `check_patch_contract`, `validate_review_claims`, `create_review_receipt` | Debugging or legacy servers only | + +Workflow tools orchestrate the same steps as atomic tools. They **never run +analysis**. Do not call atomic verify/receipt/clear in the same cycle when +start/finish are available. + +## Normal pipeline + +One edit cycle: + +``` +1. analyze_repository(root=abs) # before-run; skip if valid recent run +2. start_controlled_change(...) # see decision table — before first edit +3. get_relevant_memory(root=abs, scope=... or intent_id=...) # root required +4. edit inside declared scope only +5. analyze_repository(root=abs) # after-run ONLY if finish will require it +6. record engineering memory (MCP) # REQUIRED before finish if §Incident memory +7. finish_controlled_change(...) # see decision table — same intent_id + # optional: propose_memory=true on accept for draft memory candidates +``` + +Keep `run_id`, `intent_id`, and the before-run from step 1 through the cycle. +Intent binds to the **before-run digest** — do not redeclare on the after-run. + +### Engineering Memory (step 3) + +After `edit_allowed=true`, call `get_relevant_memory` before the first edit. +**Always pass absolute `root`** (same as `analyze_repository`); `intent_id` or +`scope` alone fails MCP validation. Default `mcp_sync_policy=bootstrap_if_missing` +auto-bootstraps when the store is missing and a session run exists; explicit +`refresh_from_run` when you need a forced ingest. No MCP run → auto sync skipped. + +| Need | Tool | +|----------------------|---------------------------------------------------------------------------------------------------------------------| +| Ranked scope context | `get_relevant_memory(root=abs, scope=… \| intent_id=…)` | +| One path | `query_engineering_memory(mode=for_path, path=…)` | +| Keyword search | `query_engineering_memory(mode=search, query=…, filters={match_mode:…})`; optional `semantic=true` when index built | +| Draft observation | `manage_engineering_memory(action=record_candidate, …)` | +| Post-edit proposals | `finish(..., propose_memory=true)` | + +Full playbook: `codeclone-engineering-memory` skill and +`docs/book/13-engineering-memory/index.md`. Human approval via VS Code Memory view (not +MCP) required to promote drafts — agents cannot activate records via MCP. + +Do not use memory to expand scope, override findings, or justify `do_not_touch` +edits. Surface `contradiction_note` and stale warnings to the user. + +### Incident memory (before step 7) + +**Chat does not persist.** If the cycle had an incident, non-trivial complexity, or +a decision the next agent should not rediscover, call +`manage_engineering_memory(action=record_candidate, …)` **before** +`finish_controlled_change` — or use `propose_memory=true` on finish for a batch. + +| Write when | Examples | +|------------|------------------------------------------------------------------------| +| Incident | verify/hygiene surprise, recovery, workaround, blocked then unblocked | +| Complexity | non-obvious root cause, multi-file debug, acted on stale/contradiction | +| Decision | tradeoff, “do not repeat X”, integration quirk | + +Skip for trivial one-liner fixes only. See `change-control-gate` rule and +`codeclone-engineering-memory` skill. + +Before `record_candidate`, compress to one durable fact with `subject_path` set; +target ≤300 chars (hard reject above `max_statement_chars`, default 1000). + +### After `start` (`edit_allowed` gate) + +| Response | Action | +|------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `needs_analysis` | Run step 1 for same `root`, then `start` again | +| `queued` | **No edits.** Wait → `manage_change_intent(promote)`. If `before_run_evicted`: step 1 → `start` again | +| `blocked` | **No edits.** Intent exists — clear via `manage_change_intent(clear)` if abandoning; follow `next_step`. If dirty scope is known WIP with no foreign overlap, retry `start` with `dirty_scope_policy="continue_own_wip"`. | +| `active` | Read `blast_radius` + `budget`. Edit only if `edit_allowed=true`. Budget `gate_preview.would_fail` is advisory — edit may proceed, but verify may reject. | + +**Edit permission:** `status == "active"` alone is not enough — require +`edit_allowed == true`. Treat unknown start statuses as no permission. + +Three independent contours (do not collapse): + +```text +status = persisted registry lifecycle +ownership = runtime view (PID / TTL / lease) +hygiene = git working tree ∩ declared scope +permission = edit_allowed (with status gate) +``` + +Before edit: scan `do_not_touch` (hard boundary), `direct_dependents`, clone +cohort / `review_context` (context only). `get_blast_radius(transitive)` only if +start summary is insufficient. + +Declare in `start`: `allowed_files`, `allowed_related`, `forbidden`, `intent`, +`expected_effects`. Outside scope → stop → user OK (unless already allowed) → +new `start` with wider scope. Silent expansion = failed patch. Foreign overlap → +`on_conflict=queue` unless immediate edit required. + +**Scope declaration rules:** + +| Path kind | Declare in | Notes | +|-----------------------------------|-------------------|-------------------------------------------------------------------------------| +| Files you create or edit | `allowed_files` | **New modules go here**, not only `allowed_related` | +| Tests/docs/helpers you will touch | `allowed_related` | Finish-allowed; may show `scope: expanded` | +| Paths you will not touch | omit | Foreign **active/stale** dirty paths outside your scope are ignored at finish | + +### After edit → `finish` + +Evidence: **`changed_files` XOR `diff_ref`** — exactly one; both or neither is +an error. `before_run_id` is resolved from the intent — do not pass a new declare. + +**Git reconciliation (automatic):** finish cross-checks agent evidence against +the **full git working tree** and the dirty snapshot captured at `start` — not +honor-system. List every path you touched in `changed_files` when possible; the +controller also reads git and blocks under-reporting or silent out-of-scope +edits. You **must** declare scope wide enough at `start`. + +| `finish_block_reason` | Blocks? | Action | +|-------------------------|---------------------------------------|---------------------------------------------------------| +| `missing_evidence` | yes | Add in-scope dirty paths to evidence or revert | +| `foreign_dirty_overlap` | yes | Coordinate foreign intent on overlapping in-scope paths | +| `own_unscoped_dirty` | only if `CODECLONE_STRICT_FINISH` env | Reconcile out-of-scope dirt or widen scope | + +Out-of-scope unattributed dirt (`new_` / `modified_` / `unknown_unattributed_*`) and +`preexisting_unscoped_dirty` are **advisory** — report them; they may elevate status to +`accepted_with_external_changes` without blocking. + +``` +finish_controlled_change( + intent_id=..., + changed_files=[...] | diff_ref=..., # XOR + after_run_id=..., # when verification.after_run_required + detail_level=summary|full, # hygiene attribution + patch_trail_detail=summary|full, # patch_trail forensics + claims_text=..., # optional + propose_memory=..., # optional draft batch on accept +) +``` + +Pipeline (do not replicate manually): hygiene → check → verify → patch_trail → +claims → receipt → clear. `patch_trail` does not authorize edits. + +### After `finish` + +| Status | Action | +|-----------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------| +| `accepted` / `accepted_with_external_changes` | Cycle complete only if `intent_cleared=true` **and** §Completion gate + §Advisory acceptance satisfied | +| `unverified` | Intent stays active. Follow `next_step` (usually after-run), then **retry same `intent_id`** | +| `violated` (scope) | Fix files or expand scope via new `start`; retry same `intent_id` | +| `expired` | Before-run digest stale. Re-analyze → new `start` | +| `reason=workspace_hygiene` | **No atomic verify bypass.** Reconcile dirty scope/evidence → retry same `intent_id`. Queued foreign intents do not block finish. | +| `user_action_required=true` | Stop; follow `next_step` or escalate | + +Do not start a new intent unless scope changed or intent expired. + +## Completion gate + +No "done" / "verified" / "implemented" / "ready" unless all hold: + +- `finish.status` is `accepted` or `accepted_with_external_changes` +- `intent_cleared=true` +- claim warnings reported when `claims.valid` is false +- §Advisory acceptance signals reported when present + +`accepted` = patch contract passed for declared scope — **not** "no regressions" or +unchanged health. + +`novelty="known"` is baseline-relative, not patch-relative. It means the finding +fingerprint is accepted by the trusted baseline; it does **not** prove the patch +did not introduce or reintroduce it. Patch-local regression claims require clean +before-run to after-run evidence from compare/verify. + +## Advisory acceptance (do not hide) + +Read **before** the user summary, even when `intent_cleared=true`: + +| Field | Report when | +|----------------------------------------------|----------------------------------------------------------------| +| `verification.structural_delta.health_delta` | `< 0` — health dropped; cite delta even when verify `accepted` | +| `health_regression_advisory` | present on accepted finish when delta negative | +| `verification.reason: after_run_not_new` | after-run equals before-run — re-analyze with new run_id | +| `verification.structural_delta.verdict` | `regressed` or `mixed` | +| `external_regressions`, `gate_worsened` | non-empty / true | +| `accepted_with_external_changes` | name external workspace signal | +| `contract_violations` | non-empty (`relaxed` may still accept) | +| `receipt.verdict`, `human_decision_points` | `needs_attention` or non-empty | + +**Anti-pattern:** `status: accepted` → skip reporting health drop or structural +regressions. Contract acceptance clears the intent; structural delta is +user-facing advisory. + +**Example:** docs-only patch → `accepted`, `intent_cleared=true`, but +`health_delta: -2`, `verdict: regressed` → tell the user health fell; do not stop +at "patch accepted". + +## Verify profiles + +Controller derives profile from changed files — read +`verification.verification_profile` and `after_run_required` from finish. +Do not guess. Details: `help(topic="verification_profiles")`. + +## Atomic fallback (legacy / debug only) + +When start/finish unavailable: + +``` +list_workspace → analyze → declare → budget → edit → analyze → check → verify +→ validate_review_claims(text=..., patch_health_delta=verify.structural_delta.health_delta) +→ create_review_receipt → clear +``` + +Say explicitly which tools were skipped. Never mix with normal pipeline in one cycle. + +## Escalate to user + +Scope expansion; touch `do_not_touch`; foreign active without queue; blocked +`next_step`; baseline/cache/report mutation; recover foreign intent. Routine +analyze/queue/promote runs automatically. + +## Claims (do not) + +Report-only ≠ CI fail; Security Surfaces ≠ vulns; baselined debt ≠ new relative +to baseline; patch-local regression needs before/after evidence; dead code vs +runtime reachability; structural verify without profile evidence. diff --git a/plugins/claude-code-codeclone/skills/codeclone-engineering-memory/SKILL.md b/plugins/claude-code-codeclone/skills/codeclone-engineering-memory/SKILL.md new file mode 100644 index 00000000..c9f26439 --- /dev/null +++ b/plugins/claude-code-codeclone/skills/codeclone-engineering-memory/SKILL.md @@ -0,0 +1,150 @@ +--- +name: codeclone-engineering-memory +description: Use CodeClone Engineering Memory via MCP — scope context before edits, FTS search, draft writes, finish proposals, and human approve boundaries. +--- + +# CodeClone Engineering Memory + +Local SQLite store of evidence-linked repository facts. Complements change +control — does **not** replace analysis, blast radius, or patch verify. + +Full contract: `docs/book/13-engineering-memory/index.md`. MCP help: +`help(topic="engineering_memory")`. + +## Prerequisites + +Default `mcp_sync_policy=bootstrap_if_missing` auto-creates the store from the +latest MCP analysis run on first `get_relevant_memory` after `analyze_repository`. + +| Need | Tool | +|------|------| +| Auto bootstrap (default) | `analyze_repository(root=abs)` → `get_relevant_memory(root=abs, …)` | +| Explicit refresh | `manage_engineering_memory(action=refresh_from_run, run_id?)` | +| CI / offline bootstrap | `codeclone memory init [--refresh]` | + +If policy is `off` or no MCP run exists and the DB is missing, call +`refresh_from_run` after `analyze_repository` or ask the user to run CLI init. +Do not invent memory from local files or report dumps. + +## When to read + +| Moment | Tool | Parameters | +|----------------------------------|----------------------------|-------------------------------------------------------------| +| After `start`, before first edit | `get_relevant_memory` | **`root` required**; `scope` or `intent_id` from active intent | +| One file deep-dive | `query_engineering_memory` | `mode=for_path`, `path` | +| Symbol context | `query_engineering_memory` | `mode=for_symbol`, `symbol` | +| Keyword discovery | `query_engineering_memory` | `mode=search`, `query`, `filters={match_mode:"any"\|"all"}`; optional `semantic=true` when index built | +| Store health | `query_engineering_memory` | `mode=status` | +| Stale inventory | `query_engineering_memory` | `mode=stale` | +| Trajectory forensics | `query_engineering_memory` | `mode=trajectory_get\|trajectory_search\|trajectory_status` | +| Trajectory analytics | `query_engineering_memory` | `mode=trajectory_anomalies\|trajectory_agents\|trajectory_dashboard` | + +Defaults exclude **stale**. Keyword `search` excludes drafts unless +`include_drafts=true`; scoped `get_relevant_memory` and `for_path` / +`for_symbol` include draft agent notes automatically so handoffs are visible. +Draft records remain non-authoritative. + +### Optional semantic search (Phase 20) + +Repository default: `memory.semantic.enabled=false`. To use semantic blend: + +1. Enable `[tool.codeclone.memory.semantic]` in `pyproject.toml` +2. `pip install 'codeclone[semantic-lancedb]'` +3. `manage_engineering_memory(action=rebuild_semantic_index)` (MCP) or + `codeclone memory semantic rebuild` (CLI/CI) +4. `query_engineering_memory(mode=search, semantic=true, …)` + +Without a built index, search stays FTS-only (`semantic.used: false` in the +response). Default provider `diagnostic` is **deterministic hash vectors**, not +semantic-quality embeddings — do not present hits as LLM recall. + +### Read checklist + +1. Scan ranked records for `contract_note`, `document_link`, `risk_note` +2. Check response warnings for stale linked paths +3. If `contradiction_note` matches scope → **stop and tell the user** +4. Do not treat `draft` / `inferred` as policy + +## When to write (draft only) + +| Situation | Tool | Notes | +|---------------------------------|---------------------------------------------------------------------------------------------|------------------------------| +| Durable observation during edit | `manage_engineering_memory(action=record_candidate, record_type, statement, subject_path)` | Creates **draft** — **subject_path required** | +| Validate claims before finish | `manage_engineering_memory(action=validate_claims, text=…)` | Memory-layer guard | +| Post-edit batch proposal | `finish_controlled_change(..., propose_memory=true)` | On **accept** only | +| Refresh system facts from run | `manage_engineering_memory(action=refresh_from_run, run_id?)` | Force ingest | +| Rebuild semantic LanceDB sidecar | `manage_engineering_memory(action=rebuild_semantic_index)` | After semantic enabled + extras | +| Rebuild trajectories | `manage_engineering_memory(action=rebuild_trajectories)` | After audit-enabled workflows | +| Promote an Experience | `manage_engineering_memory(action=promote_experience, experience_id=…)` | Creates a human-reviewable draft | +| Projection jobs | `manage_engineering_memory(action=enqueue_projection_rebuild)` / `action=projection_rebuild_status` / `action=run_projection_jobs_once` | When policy enabled | +| Atomic fallback | `manage_engineering_memory(action=propose_from_receipt, text=…, intent_id?)` | When finish hook unavailable | + +### Write rules + +- **Session chat is ephemeral** — durable notes require `record_candidate` or + `finish(..., propose_memory=true)`; never rely on the assistant message alone. +- Before `finish_controlled_change`, if the cycle had an **incident**, **complexity**, + or a **decision** worth remembering, write at least one `record_candidate` (see + `change-control-gate` and `codeclone-change-control` §Incident memory). +- Agents **never** approve, reject, or archive via MCP +- Ask the user to approve drafts in the CodeClone VS Code **Memory** view (agents + cannot approve through MCP) +- Ask user to run `codeclone memory init --refresh` when policy is `off` and facts drift +- Or call `refresh_from_run` when an MCP run is available +- Memory writes do **not** satisfy change-control scope or verify requirements +- **Never** use project root as memory scope (`"."`, `""`, unscoped retrieval) +- Compress observations before `record_candidate`: one durable fact, target + ≤300 chars; rewrite if >500; hard reject >1000 +- Read compact lanes separately: `records[]` are durable assertions, + `experiences[]` are advisory patterns, `trajectories[]` are bounded examples, + and `coverage` describes evidence availability +- Compact is default: subject lists are bounded with + `subject_count`/`subjects_truncated`; experience diversity uses + `multi_agent`/`dominant_agent_facet`; trajectory contracts, steps, evidence + ids, payloads, and the duplicated root Patch Trail are omitted +- Use `mode=get`, `trajectory_get`, or `detail_level=full` for complete + statements, subjects, agent facets, contracts, evidence, and payloads + +## When NOT to use memory + +- Justifying `do_not_touch` path edits +- Expanding scope beyond declared intent +- Overriding CodeClone findings +- Substituting for `analyze_repository` or `get_blast_radius` +- Treating draft/stale as verified project policy +- Treating `trajectories[]` or `patch_trail_summary` as edit authorization + +## Integration with change control + +Normal edit cycle (memory steps in **bold**): + +``` +analyze_repository +→ start_controlled_change +→ get_relevant_memory # after edit_allowed=true +→ edit in scope +→ analyze_repository # when after_run required +→ record_candidate # before finish if incident/complexity/decision +→ finish_controlled_change # optional propose_memory=true +``` + +Memory context is **advisory**. Blast radius `do_not_touch` remains a hard boundary. + +## Record types (common) + +| Type | Typical source | Agent trust | +|----------------------|-----------------------------|---------------------------| +| `contract_note` | init ingest | high when active+verified | +| `document_link` | docs ingest | high when active | +| `risk_note` | metrics ingest | informational | +| `module_role` | inventory / finish proposal | context | +| `change_rationale` | finish proposal | draft until approved | +| `contradiction_note` | ingest conflict | **escalate to user** | + +## Escalate to user + +- `contradiction_note` in scope +- Stale warnings on previously approved records you rely on +- Missing memory DB with no MCP run (init or analyze first) +- Draft candidate should become team policy (approve needed) +- System facts outdated after large refactor (refresh needed) diff --git a/plugins/claude-code-codeclone/skills/codeclone-hotspots/SKILL.md b/plugins/claude-code-codeclone/skills/codeclone-hotspots/SKILL.md new file mode 100644 index 00000000..ca1dba2d --- /dev/null +++ b/plugins/claude-code-codeclone/skills/codeclone-hotspots/SKILL.md @@ -0,0 +1,61 @@ +--- +name: codeclone-hotspots +description: Use for quick CodeClone hotspot discovery — health check, top risks, or a single-question quality snapshot without a full review loop. +--- + +# CodeClone Hotspots + +Use this skill when the user wants a fast quality snapshot — not a full review +session but a quick answer about health, top risks, or a specific metric. + +## When to use + +- "How healthy is this repo?" +- "What are the worst hotspots?" +- "Any new baseline-relative regressions?" +- "Show me the complexity hotspots." +- Quick pre-merge sanity checks. + +Baseline-relative `new`/`known` is not patch-local proof. For "did my patch +introduce this?", use the change-control before-run to after-run verify path. + +## Workflow + +``` +analyze_repository → get_production_triage +``` + +That's the cheapest useful path. Stop there unless the user asks for more. + +### If the user asks about a specific metric + +``` +analyze_repository → check_complexity | check_coupling | check_cohesion | check_dead_code | check_clones +``` + +For adoption, API-surface, or current-run coverage join questions: + +``` +analyze_repository → get_report_section(section="metrics") +``` + +If external coverage semantics are unclear, call `help(topic="coverage")` +before interpreting `coverage_hotspots` or `scope_gap_hotspots`. + +### If the user wants a gate preview + +``` +analyze_repository → evaluate_gates +``` + +## Rules + +- Use MCP tools only when invoked through the CodeClone plugin. +- If no latest MCP run exists, call `analyze_repository` yourself before reading `latest/*` resources. +- Use default thresholds — this is a quick check, not an exploratory deep-dive. +- For `check_*` tools, use `detail_level="summary"`, `"normal"`, or + `"full"` only. `compact` is valid only for `help(detail="compact")`. +- One tool call is better than three when answering a simple question. +- Summarize concisely — the user wants a snapshot, not a report. +- Do not fall back to CLI or local report files. +- If the result looks concerning, suggest using `codeclone-review` for a proper session. diff --git a/plugins/claude-code-codeclone/skills/codeclone-review/SKILL.md b/plugins/claude-code-codeclone/skills/codeclone-review/SKILL.md new file mode 100644 index 00000000..08701b5a --- /dev/null +++ b/plugins/claude-code-codeclone/skills/codeclone-review/SKILL.md @@ -0,0 +1,82 @@ +--- +name: codeclone-review +description: Use when Claude Code should review a Python repository through CodeClone MCP — conservative first pass, baseline-aware triage, changed-files review, or deeper exploratory follow-up. +--- + +# CodeClone Review + +Use this skill for structural review, clone triage, changed-scope review, or +health-oriented refactor planning in a Python repository. + +## Rules + +- Use MCP tools only when invoked through the CodeClone plugin. +- If no latest MCP run exists, call `analyze_repository` or `analyze_changed_paths` yourself before reading `latest/*` resources. +- Start with the default or `pyproject`-resolved CodeClone profile. +- Do not lower thresholds on the first pass. +- Lower-threshold runs are explicit exploratory follow-ups, not silent replacements. +- Prefer production-first and changed-files-first review over broad listing. +- CodeClone is the source of truth — do not reinterpret findings independently. +- Do not fall back to CLI or local report files. +- Never auto-suppress findings or mutate repository state. + +## Workflows + +### Full repository + +``` +analyze_repository → get_production_triage +→ list_hotspots → get_finding → get_remediation +``` + +### Changed files (PR / patch) + +``` +analyze_changed_paths → get_report_section(section="changed") +→ list_findings(changed_paths=..., sort_by="priority") → generate_pr_summary +``` + +### Gate preview + +``` +analyze_repository → evaluate_gates +→ explain reasons, do not change files +``` + +### Current-run metrics and coverage surfaces + +``` +analyze_repository → get_report_section(section="metrics") +``` + +If the question is about external Cobertura join semantics: + +1. Call `help(topic="coverage")`. +2. Explain `coverage_hotspots` vs `scope_gap_hotspots` from canonical metrics. +3. Do not turn scope gaps into "untested" claims. + +### Deeper follow-up + +If the default pass looks clean: + +1. Call `help(topic="analysis_profile")` for threshold semantics. +2. Run a second analysis with lower thresholds. +3. Explain this is a higher-sensitivity pass with more noise. +4. Use `compare_runs` to show the delta. + +## Tool preferences + +- Prefer `list_hotspots` or `check_*` before broad `list_findings`. +- For finding/list/check tools, use `detail_level="summary"`, `"normal"`, or + `"full"` only. `compact` is valid only for `help(detail="compact")`. +- Use `get_finding` / `get_remediation` for one finding — not `detail_level=full` on lists. +- Use `source_kind="production"` (or `tests`, `fixtures`, `mixed`, `other`) to cut test noise. +- Use `get_report_section(section="metrics")` for adoption, API-surface, or Coverage Join facts. +- Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions. +- Pass absolute `root` — MCP rejects relative roots. + +## Non-goals + +- Do not auto-suppress findings. +- Do not treat report-only `overloaded_modules` as findings or gate data. +- Do not present a clean default pass as proof that no finer-grained issues exist. diff --git a/plugins/codeclone/README.md b/plugins/codeclone/README.md index 2ec8569c..f2b7ca63 100644 --- a/plugins/codeclone/README.md +++ b/plugins/codeclone/README.md @@ -33,7 +33,8 @@ user-facing label stays in `interface.displayName` as `CodeClone`. Install the distribution package from the Codex marketplace: ```bash -marketplace add orenlab/codeclone-codex +codex plugin marketplace add orenlab/codeclone-codex +codex plugin add codeclone@orenlab-codeclone ``` This plugin does not install the MCP server binary. Install CodeClone with the @@ -62,7 +63,8 @@ then `codeclone-mcp` from `PATH`, without relying on `sh -lc`. `.agents/plugins/marketplace.json` is the monorepo-local source entry used for development and distribution packaging. Public installs should use -`marketplace add orenlab/codeclone-codex`. +`codex plugin marketplace add orenlab/codeclone-codex`, followed by +`codex plugin add codeclone@orenlab-codeclone`. The plugin does not rewrite `~/.codex/config.toml`. diff --git a/plugins/cursor-codeclone/README.md b/plugins/cursor-codeclone/README.md index d06080fe..2f2afdfa 100644 --- a/plugins/cursor-codeclone/README.md +++ b/plugins/cursor-codeclone/README.md @@ -15,7 +15,27 @@ the `codeclone-mcp` server. - Python workspace - `codeclone-mcp` launcher (`codeclone >= 2.0.0`) -### Install the launcher +## Install from the Cursor marketplace + +The public plugin source is +[orenlab/codeclone-cursor](https://github.com/orenlab/codeclone-cursor). + +If CodeClone is already visible in your Cursor marketplace, open the marketplace +panel, select **CodeClone**, choose user or project scope, and install it. + +For a team marketplace, an administrator imports the storefront repository: + +1. Open **Cursor Dashboard → Settings → Plugins**. +2. Under **Team Marketplaces**, choose **Add Marketplace**. +3. Choose **Import from Repo** and enter + `https://github.com/orenlab/codeclone-cursor`. +4. Add CodeClone to the marketplace, configure team access, and save. +5. Install CodeClone from the marketplace panel in Cursor. + +The old `~/.cursor/plugins/local` symlink path is for plugin development only; +it is not the public installation route. + +### Install the MCP launcher ```bash uv tool install "codeclone[mcp]" @@ -127,6 +147,7 @@ extension launcher. ## Distribution - **Monorepo source:** `plugins/cursor-codeclone/` +- **Public storefront:** `https://github.com/orenlab/codeclone-cursor` - **Not in** `.agents/plugins/marketplace.json` (Codex-only local marketplace) - **Standalone releases:** embed the full launcher from `plugins/codeclone/scripts/launch_mcp.py`; the monorepo uses a thin delegator @@ -135,7 +156,7 @@ extension launcher. ## Local development -Symlink the plugin directory for local testing: +For plugin development only, symlink the source directory: ```bash ln -s /path/to/codeclone/plugins/cursor-codeclone ~/.cursor/plugins/local/codeclone diff --git a/scripts/integration_dist/README.claude-code.root.md b/scripts/integration_dist/README.claude-code.root.md new file mode 100644 index 00000000..fe55dfee --- /dev/null +++ b/scripts/integration_dist/README.claude-code.root.md @@ -0,0 +1,39 @@ +# CodeClone for Claude Code + +This repository is the public Claude Code marketplace for the CodeClone plugin. +It is synchronized from +[orenlab/codeclone](https://github.com/orenlab/codeclone); see +`SYNC_MANIFEST.json` for the exact source commit and package version. + +## Install + +Add the marketplace, then install the plugin: + +```bash +claude plugin marketplace add orenlab/codeclone-claude-code +claude plugin install codeclone@orenlab-codeclone +``` + +The equivalent commands inside an interactive Claude Code session are: + +```text +/plugin marketplace add orenlab/codeclone-claude-code +/plugin install codeclone@orenlab-codeclone +``` + +The plugin does not bundle the Python MCP server. Install `codeclone[mcp]` in +the workspace or on `PATH`: + +```bash +uv tool install "codeclone[mcp]" +codeclone-mcp --help +``` + +See the [plugin guide](plugins/codeclone/README.md) for skills, runtime +resolution, and trust boundaries. + +## Documentation + +- [Claude Code setup](https://orenlab.github.io/codeclone/guide/integrations/claude-code/setup/) +- [MCP usage guide](https://orenlab.github.io/codeclone/guide/mcp/) +- [Claude Code plugin contract](https://orenlab.github.io/codeclone/book/integrations/claude-code-plugin/) diff --git a/scripts/integration_dist/README.codex.root.md b/scripts/integration_dist/README.codex.root.md index b6541139..97a5a698 100644 --- a/scripts/integration_dist/README.codex.root.md +++ b/scripts/integration_dist/README.codex.root.md @@ -8,7 +8,7 @@ synced from [orenlab/codeclone](https://github.com/orenlab/codeclone); see | Path | Role | |------|------| -| `.agents/plugins/marketplace.json` | Codex marketplace catalog (`marketplace add orenlab/codeclone-codex`) | +| `.agents/plugins/marketplace.json` | Codex marketplace catalog (`codex plugin marketplace add orenlab/codeclone-codex`) | | `plugins/codeclone/` | Plugin root (manifest, skills, MCP launcher, assets) | | `plugins/codeclone/README.md` | **Full** install and usage guide for the plugin tree | @@ -19,7 +19,8 @@ repo root. ## Install ```bash -marketplace add orenlab/codeclone-codex +codex plugin marketplace add orenlab/codeclone-codex +codex plugin add codeclone@orenlab-codeclone ``` Install `codeclone[mcp]` in your workspace or on `PATH` so the bundled launcher diff --git a/scripts/integration_dist/gitignore.claude-code b/scripts/integration_dist/gitignore.claude-code new file mode 100644 index 00000000..4056fc18 --- /dev/null +++ b/scripts/integration_dist/gitignore.claude-code @@ -0,0 +1,18 @@ +# Distribution repo — synced from orenlab/codeclone (scripts/sync_integrations.py) + +# IDE / OS +.idea/ +.DS_Store + +# Python (plugin launcher under plugins/codeclone/scripts/) +__pycache__/ +*.py[cod] +.venv/ +venv/ + +# Local secrets +.env +.env.* + +# Logs +*.log diff --git a/scripts/integration_dist/marketplace.claude-code.json b/scripts/integration_dist/marketplace.claude-code.json new file mode 100644 index 00000000..6febbaeb --- /dev/null +++ b/scripts/integration_dist/marketplace.claude-code.json @@ -0,0 +1,27 @@ +{ + "name": "orenlab-codeclone", + "owner": { + "name": "OrenLab", + "email": "pytelemonbot@mail.ru" + }, + "metadata": { + "description": "CodeClone Structural Change Controller plugins for Claude Code." + }, + "plugins": [ + { + "name": "codeclone", + "source": "./plugins/codeclone", + "description": "Deterministic structural review, Engineering Memory, and intent-first change control over codeclone-mcp.", + "homepage": "https://orenlab.github.io/codeclone/guide/integrations/claude-code/setup/", + "repository": "https://github.com/orenlab/codeclone-claude-code", + "license": "MPL-2.0", + "category": "developer-tools", + "tags": [ + "python", + "mcp", + "structural-review", + "change-control" + ] + } + ] +} diff --git a/scripts/sync_integrations.py b/scripts/sync_integrations.py index fcbb1082..5cb49fd6 100644 --- a/scripts/sync_integrations.py +++ b/scripts/sync_integrations.py @@ -90,6 +90,23 @@ def _dist_file(name: str, destination: str) -> tuple[str, str]: ), generated=(MANIFEST_NAME,), ), + "claude-code": SyncTarget( + name="claude-code", + copies=( + ("plugins/claude-code-codeclone", "plugins/codeclone"), + ( + "plugins/codeclone/scripts/launch_mcp.py", + "plugins/codeclone/scripts/launch_mcp.py", + ), + _dist_file("README.claude-code.root.md", "README.md"), + _dist_file("gitignore.claude-code", ".gitignore"), + _dist_file( + "marketplace.claude-code.json", + ".claude-plugin/marketplace.json", + ), + ), + generated=(MANIFEST_NAME,), + ), "claude-desktop": SyncTarget( name="claude-desktop", copies=( diff --git a/tests/test_claude_code_plugin.py b/tests/test_claude_code_plugin.py new file mode 100644 index 00000000..90971281 --- /dev/null +++ b/tests/test_claude_code_plugin.py @@ -0,0 +1,87 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import json +from pathlib import Path + +from tests.plugin_test_helpers import load_json, parse_frontmatter + + +def test_claude_code_plugin_manifest_and_mcp_config() -> None: + root = Path(__file__).resolve().parents[1] + plugin_root = root / "plugins" / "claude-code-codeclone" + manifest = load_json(plugin_root / ".claude-plugin" / "plugin.json") + mcp_config = load_json(plugin_root / ".mcp.json") + + assert isinstance(manifest, dict) + assert isinstance(mcp_config, dict) + assert manifest["name"] == "codeclone" + assert manifest["license"] == "MPL-2.0" + assert ( + manifest["homepage"] + == "https://orenlab.github.io/codeclone/guide/integrations/claude-code/setup/" + ) + assert manifest["repository"] == "https://github.com/orenlab/codeclone-claude-code" + assert "version" not in manifest + + server = mcp_config["mcpServers"]["codeclone"] + assert server == { + "command": "python3", + "args": ["${CLAUDE_PLUGIN_ROOT}/scripts/launch_mcp.py"], + } + assert (plugin_root / "scripts" / "launch_mcp.py").is_file() + + +def test_claude_code_plugin_skills_match_shared_contracts() -> None: + root = Path(__file__).resolve().parents[1] + claude_skills = root / "plugins" / "claude-code-codeclone" / "skills" + codex_skills = root / "plugins" / "codeclone" / "skills" + + for skill_name in ( + "codeclone-review", + "codeclone-hotspots", + "codeclone-change-control", + "codeclone-engineering-memory", + ): + claude_text = (claude_skills / skill_name / "SKILL.md").read_text( + encoding="utf-8" + ) + codex_text = (codex_skills / skill_name / "SKILL.md").read_text( + encoding="utf-8" + ) + claude_frontmatter = parse_frontmatter(claude_text) + codex_frontmatter = parse_frontmatter(codex_text) + assert claude_frontmatter["name"] == codex_frontmatter["name"] + if skill_name == "codeclone-review": + assert "claude code" in claude_frontmatter["description"].lower() + assert "codex" in codex_frontmatter["description"].lower() + continue + assert claude_frontmatter == codex_frontmatter + + +def test_claude_code_marketplace_overlay_and_install_docs() -> None: + root = Path(__file__).resolve().parents[1] + marketplace = json.loads( + ( + root / "scripts" / "integration_dist" / "marketplace.claude-code.json" + ).read_text(encoding="utf-8") + ) + readme = (root / "plugins" / "claude-code-codeclone" / "README.md").read_text( + encoding="utf-8" + ) + + assert marketplace["name"] == "orenlab-codeclone" + assert marketplace["metadata"]["description"] + assert marketplace["plugins"][0]["source"] == "./plugins/codeclone" + assert "claude plugin marketplace add orenlab/codeclone-claude-code" in readme + assert "claude plugin install codeclone@orenlab-codeclone" in readme + assert 'uv tool install "codeclone[mcp]"' in readme + + assert ( + root / "docs" / "guide" / "integrations" / "claude-code" / "setup.md" + ).is_file() + assert (root / "docs" / "book" / "integrations" / "claude-code-plugin.md").is_file() diff --git a/tests/test_codex_plugin.py b/tests/test_codex_plugin.py index 76eb6743..4ab56a18 100644 --- a/tests/test_codex_plugin.py +++ b/tests/test_codex_plugin.py @@ -161,7 +161,8 @@ def test_codex_plugin_readme_and_docs_exist() -> None: readme_text = (plugin_root / "README.md").read_text(encoding="utf-8") assert "# CodeClone for Codex" in readme_text - assert "marketplace add orenlab/codeclone-codex" in readme_text + assert "codex plugin marketplace add orenlab/codeclone-codex" in readme_text + assert "codex plugin add codeclone@orenlab-codeclone" in readme_text assert "codex mcp add codeclone -- codeclone-mcp --transport stdio" in readme_text assert "does not rewrite `~/.codex/config.toml`" in readme_text assert "prefers a workspace `.venv`" in readme_text diff --git a/tests/test_cursor_plugin.py b/tests/test_cursor_plugin.py index c6237dd0..d80afbf5 100644 --- a/tests/test_cursor_plugin.py +++ b/tests/test_cursor_plugin.py @@ -104,3 +104,14 @@ def test_cursor_plugin_version_is_semver() -> None: assert re.fullmatch(r"\d+\.\d+\.\d+", version), ( f"Plugin version must be semver (X.Y.Z), got: {version}" ) + + +def test_cursor_readme_uses_marketplace_install_flow() -> None: + root = Path(__file__).resolve().parents[1] + readme = (root / "plugins" / "cursor-codeclone" / "README.md").read_text( + encoding="utf-8" + ) + + assert "https://github.com/orenlab/codeclone-cursor" in readme + assert "Import from Repo" in readme + assert "development only" in readme.lower() diff --git a/tests/test_sync_integrations.py b/tests/test_sync_integrations.py index 270b9954..7312d4ac 100644 --- a/tests/test_sync_integrations.py +++ b/tests/test_sync_integrations.py @@ -93,6 +93,21 @@ def _make_source(tmp_path: Path) -> Path: source / "plugins" / "codeclone" / "scripts" / "launch_mcp.py", "def resolve_launch_target():\n return None\n", ) + _write( + source / "plugins" / "claude-code-codeclone" / ".claude-plugin" / "plugin.json", + "{}\n", + ) + _write( + source / "plugins" / "claude-code-codeclone" / ".mcp.json", + "{}\n", + ) + _write( + source / "plugins" / "claude-code-codeclone" / "scripts" / "launch_mcp.py", + "import runpy\n", + ) + _write( + source / "plugins" / "claude-code-codeclone" / "skills" / "review" / "SKILL.md" + ) _seed_integration_dist(source) _write( source / "extensions" / "claude-desktop-codeclone" / "manifest.json", @@ -404,6 +419,29 @@ def test_codex_sync_writes_public_marketplace(tmp_path: Path) -> None: assert marketplace["plugins"][0]["source"]["path"] == "./plugins/codeclone" +def test_claude_code_sync_writes_public_marketplace(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "claude-code") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["claude-code"], + allow_dirty=False, + dry_run=False, + ) + + marketplace = json.loads( + (target / ".claude-plugin/marketplace.json").read_text(encoding="utf-8") + ) + assert marketplace["name"] == "orenlab-codeclone" + assert marketplace["plugins"][0]["name"] == "codeclone" + assert marketplace["plugins"][0]["source"] == "./plugins/codeclone" + assert ( + target / "plugins" / "codeclone" / ".claude-plugin" / "plugin.json" + ).is_file() + + def test_sync_writes_gitignore_for_all_targets(tmp_path: Path) -> None: source = _make_source(tmp_path) for name in SYNC_TARGETS: @@ -456,3 +494,22 @@ def test_cursor_sync_ships_standalone_launcher(tmp_path: Path) -> None: launcher = (target / "scripts" / "launch_mcp.py").read_text(encoding="utf-8") assert "resolve_launch_target" in launcher assert "runpy" not in launcher + + +def test_claude_code_sync_ships_standalone_launcher(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "claude-code") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["claude-code"], + allow_dirty=False, + dry_run=False, + ) + + launcher = ( + target / "plugins" / "codeclone" / "scripts" / "launch_mcp.py" + ).read_text(encoding="utf-8") + assert "resolve_launch_target" in launcher + assert "runpy" not in launcher diff --git a/zensical.toml b/zensical.toml index d864d556..eea1fc09 100644 --- a/zensical.toml +++ b/zensical.toml @@ -57,6 +57,7 @@ nav = [ { "Integrations" = [ { "VS Code" = "guide/integrations/vscode/setup.md" }, { "Cursor" = "guide/integrations/cursor/install-and-skills.md" }, + { "Claude Code" = "guide/integrations/claude-code/setup.md" }, { "Codex" = "guide/integrations/codex/setup.md" }, { "Claude Desktop" = "guide/integrations/claude-desktop/setup.md" }, { "SARIF export" = "guide/integrations/sarif/export.md" }, @@ -148,6 +149,7 @@ nav = [ { "Integrations" = [ { "VS Code" = "book/integrations/vs-code-extension.md" }, { "Cursor plugin" = "book/integrations/cursor-plugin.md" }, + { "Claude Code plugin" = "book/integrations/claude-code-plugin.md" }, { "Codex plugin" = "book/integrations/codex-plugin.md" }, { "Claude Desktop" = "book/integrations/claude-desktop-bundle.md" }, { "SARIF" = "book/integrations/sarif.md" }, From ae71cc06742aa78767b35a58a49cec77e7391395 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 15:02:23 +0500 Subject: [PATCH 269/318] chore(docs, deps): reformat docs and refresh transitive deps --- docs/book/00-intro.md | 1 + docs/book/01-terminology.md | 1 + docs/book/02-architecture-map.md | 70 +++++------ docs/book/03-core-pipeline.md | 1 + docs/book/04-cfg-semantics.md | 1 + docs/book/05-report.md | 1 + docs/book/06-html-render.md | 1 + docs/book/07-baseline.md | 1 + docs/book/08-cache.md | 1 + docs/book/09-exit-codes.md | 1 + docs/book/10-config-and-defaults.md | 40 +++--- docs/book/11-cli.md | 1 + .../blast-radius-and-receipt.md | 1 + .../finish-controlled-change.md | 1 - .../12-structural-change-controller/index.md | 31 ++--- .../workflow-tools.md | 2 + .../book/13-engineering-memory/cli-surface.md | 34 +++--- docs/book/13-engineering-memory/index.md | 2 +- .../book/13-engineering-memory/mcp-surface.md | 68 +++++------ .../13-engineering-memory/search-semantic.md | 1 + .../trajectory-and-patch-trail.md | 32 ++--- .../trajectory-labels.md | 36 +++--- .../trajectory-quality-and-passport.md | 16 +-- docs/book/14-claim-guard.md | 1 + docs/book/15-health-score.md | 1 + docs/book/16-metrics-and-quality-gates.md | 1 + docs/book/17-dead-code-contract.md | 3 + docs/book/18-suggestions-and-clone-typing.md | 1 + docs/book/19-inline-suppressions.md | 5 + docs/book/20-benchmarking.md | 1 + docs/book/22-determinism.md | 1 + docs/book/23-testing-as-spec.md | 29 ++--- docs/book/24-compatibility-and-versioning.md | 3 +- .../25-mcp-interface/determinism-and-tests.md | 17 +-- .../25-mcp-interface/payload-conventions.md | 1 + .../tools/atomic-change-control.md | 18 +-- .../tools/platform-observability.md | 16 +-- .../tools/report-and-findings.md | 2 +- .../tools/session-and-memory.md | 6 +- docs/book/26-platform-observability.md | 16 +-- docs/book/README.md | 10 +- docs/book/appendix/a-status-enums.md | 1 + docs/book/appendix/b-schema-layouts.md | 48 ++++---- docs/book/appendix/c-error-catalog.md | 1 + docs/book/integrations/claude-code-plugin.md | 15 +-- .../integrations/claude-desktop-bundle.md | 8 +- docs/book/integrations/codex-plugin.md | 7 +- docs/book/integrations/sarif.md | 5 +- docs/book/integrations/vs-code-extension.md | 60 +++++---- docs/examples/report.md | 1 + docs/getting-started.md | 4 +- docs/guide/README.md | 47 +++---- docs/guide/change-control/agent-cycle.md | 1 + docs/guide/change-control/atomic-debug.md | 1 + docs/guide/change-control/overview.md | 17 +-- docs/guide/explanation/how-it-works.md | 19 +-- docs/guide/integrations/claude-code/setup.md | 11 +- .../integrations/claude-desktop/setup.md | 1 + docs/guide/integrations/codex/setup.md | 2 - .../integrations/cursor/install-and-skills.md | 42 +++---- docs/guide/integrations/sarif/export.md | 3 - docs/guide/integrations/vscode/setup.md | 8 -- docs/guide/mcp/README.md | 39 +++--- docs/guide/mcp/architecture.md | 1 + docs/guide/mcp/client-setup.md | 1 + docs/guide/mcp/payload-cheatsheet.md | 1 + docs/guide/mcp/prompts.md | 1 + docs/guide/mcp/troubleshooting.md | 1 + .../guide/mcp/workflows/analyze-and-triage.md | 1 + docs/guide/mcp/workflows/change-control.md | 14 ++- .../mcp/workflows/drill-down-and-checks.md | 1 + docs/guide/mcp/workflows/memory-recipes.md | 25 ++-- .../mcp/workflows/session-and-coverage.md | 1 + docs/guide/memory/overview.md | 17 +-- .../memory/trajectories-and-experiences.md | 3 +- docs/index.md | 41 ++++--- docs/privacy-policy.md | 1 + docs/publishing.md | 1 + docs/terms-of-use.md | 1 + uv.lock | 115 +++++++++--------- 80 files changed, 532 insertions(+), 511 deletions(-) diff --git a/docs/book/00-intro.md b/docs/book/00-intro.md index 9eae1409..0f07d208 100644 --- a/docs/book/00-intro.md +++ b/docs/book/00-intro.md @@ -1,6 +1,7 @@ + # 00. Intro ## Purpose diff --git a/docs/book/01-terminology.md b/docs/book/01-terminology.md index 025f3b30..677275a5 100644 --- a/docs/book/01-terminology.md +++ b/docs/book/01-terminology.md @@ -2,6 +2,7 @@ owns: every defined term used across docs and codebase. does-not-own: architecture (→ 02), pipeline (→ 03). rule: new terms go HERE, nowhere else. --> + # 01. Terminology ## Purpose diff --git a/docs/book/02-architecture-map.md b/docs/book/02-architecture-map.md index 9ddd08e3..2a03cecd 100644 --- a/docs/book/02-architecture-map.md +++ b/docs/book/02-architecture-map.md @@ -26,27 +26,27 @@ Main ownership layers: ## Data model -| Layer | Modules | Responsibility | -|-------------------------|-------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Entry | `codeclone/main.py` | Public CLI entrypoint only | -| CLI surface | `codeclone/surfaces/cli/*`, `codeclone/ui_messages/*` | Parse args, resolve runtime mode, print summaries, write outputs, route exits | -| Report copy | `codeclone/report/messages/*` | Glossary, suggestions, explainability, overview, security, chrome, text/markdown/sarif projections, gate prefixes | -| Config | `codeclone/config/*` | Option specs, parser construction, pyproject loading, CLI > pyproject > defaults merge | -| Core runtime | `codeclone/core/*` | Bootstrap, discovery, worker processing, project metrics, report/gate integration | -| Analysis | `codeclone/analysis/*`, `codeclone/blocks/*`, `codeclone/paths/*`, `codeclone/qualnames/*` | Parse source, normalize AST/CFG facts, extract units, prepare deterministic analysis inputs; includes shared blast-radius graph core (`analysis/blast_radius.py`) | -| Findings | `codeclone/findings/clones/*`, `codeclone/findings/structural/*` | Clone grouping and structural finding derivation | -| Metrics | `codeclone/metrics/*` | Complexity, coupling, cohesion, dependencies, dead code, health, adoption, coverage join, API surface | -| Contracts/domain | `codeclone/contracts/*`, `codeclone/models.py`, `codeclone/domain/*` | Version constants, enums, typed exceptions, shared models, domain taxonomies | -| Persistence | `codeclone/baseline/*`, `codeclone/cache/*` | Trusted comparison state and optimization-only cache contracts | -| Canonical report | `codeclone/report/document/*`, `codeclone/report/gates/*`, `codeclone/report/*.py` | Canonical report payload, derived projections, explainability, suggestions, gate reasons | -| Deterministic renderers | `codeclone/report/renderers/*` | Text/Markdown/SARIF/JSON projections over the canonical report | -| HTML render layer | `codeclone/report/html/*` | Render-only HTML view over canonical report/meta facts | -| MCP surface | `codeclone/surfaces/mcp/*`, `codeclone/surfaces/mcp/messages/*` | Read-only MCP tools/resources, change-control projections, Engineering Memory retrieval/governance, dev-only Platform Observability slices, and centralized agent-facing copy | -| Engineering Memory | `codeclone/memory/*`, `codeclone/config/memory*.py` | Local SQLite store, scoped retrieval, semantic sidecar, trajectory + Patch Trail projection, Experience distillation, coalesced rebuild jobs, staleness, governance, and CLI/MCP surfaces over deterministic report/git/doc/audit facts | -| Platform Observability | `codeclone/observability/*` | Opt-in operation/span telemetry, local SQLite store, bounded MCP slicer, and CLI JSON/HTML diagnostics; never analysis truth or a gate input | -| Controller insights | `codeclone/controller_insights/*` | Shared session-stats and audit-trail payloads for CLI `--session-stats` / `--audit` and IDE-only MCP `get_workspace_session_stats` / `get_controller_audit_trail` | -| Audit trail | `codeclone/audit/*` | Optional controller event and MCP payload footprint recording under `.codeclone/db/` when enabled | -| Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*`, `plugins/cursor-codeclone/*`, `plugins/claude-code-codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | +| Layer | Modules | Responsibility | +|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Entry | `codeclone/main.py` | Public CLI entrypoint only | +| CLI surface | `codeclone/surfaces/cli/*`, `codeclone/ui_messages/*` | Parse args, resolve runtime mode, print summaries, write outputs, route exits | +| Report copy | `codeclone/report/messages/*` | Glossary, suggestions, explainability, overview, security, chrome, text/markdown/sarif projections, gate prefixes | +| Config | `codeclone/config/*` | Option specs, parser construction, pyproject loading, CLI > pyproject > defaults merge | +| Core runtime | `codeclone/core/*` | Bootstrap, discovery, worker processing, project metrics, report/gate integration | +| Analysis | `codeclone/analysis/*`, `codeclone/blocks/*`, `codeclone/paths/*`, `codeclone/qualnames/*` | Parse source, normalize AST/CFG facts, extract units, prepare deterministic analysis inputs; includes shared blast-radius graph core (`analysis/blast_radius.py`) | +| Findings | `codeclone/findings/clones/*`, `codeclone/findings/structural/*` | Clone grouping and structural finding derivation | +| Metrics | `codeclone/metrics/*` | Complexity, coupling, cohesion, dependencies, dead code, health, adoption, coverage join, API surface | +| Contracts/domain | `codeclone/contracts/*`, `codeclone/models.py`, `codeclone/domain/*` | Version constants, enums, typed exceptions, shared models, domain taxonomies | +| Persistence | `codeclone/baseline/*`, `codeclone/cache/*` | Trusted comparison state and optimization-only cache contracts | +| Canonical report | `codeclone/report/document/*`, `codeclone/report/gates/*`, `codeclone/report/*.py` | Canonical report payload, derived projections, explainability, suggestions, gate reasons | +| Deterministic renderers | `codeclone/report/renderers/*` | Text/Markdown/SARIF/JSON projections over the canonical report | +| HTML render layer | `codeclone/report/html/*` | Render-only HTML view over canonical report/meta facts | +| MCP surface | `codeclone/surfaces/mcp/*`, `codeclone/surfaces/mcp/messages/*` | Read-only MCP tools/resources, change-control projections, Engineering Memory retrieval/governance, dev-only Platform Observability slices, and centralized agent-facing copy | +| Engineering Memory | `codeclone/memory/*`, `codeclone/config/memory*.py` | Local SQLite store, scoped retrieval, semantic sidecar, trajectory + Patch Trail projection, Experience distillation, coalesced rebuild jobs, staleness, governance, and CLI/MCP surfaces over deterministic report/git/doc/audit facts | +| Platform Observability | `codeclone/observability/*` | Opt-in operation/span telemetry, local SQLite store, bounded MCP slicer, and CLI JSON/HTML diagnostics; never analysis truth or a gate input | +| Controller insights | `codeclone/controller_insights/*` | Shared session-stats and audit-trail payloads for CLI `--session-stats` / `--audit` and IDE-only MCP `get_workspace_session_stats` / `get_controller_audit_trail` | +| Audit trail | `codeclone/audit/*` | Optional controller event and MCP payload footprint recording under `.codeclone/db/` when enabled | +| Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*`, `plugins/cursor-codeclone/*`, `plugins/claude-code-codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | Refs: @@ -137,18 +137,18 @@ Refs: ## Chapter map -| Topic | Primary chapters | -|---------------------------------------|------------------------------------------------------------------------------------------------------------------| -| CLI behavior and failure routing | [09-exit-codes.md](09-exit-codes.md), [11-cli.md](11-cli.md) | -| Config precedence and defaults | [10-config-and-defaults.md](10-config-and-defaults.md) | -| Core processing pipeline | [03-core-pipeline.md](03-core-pipeline.md) | -| Clone baseline trust/compat/integrity | [07-baseline.md](07-baseline.md) | -| Cache trust and fail-open behavior | [08-cache.md](08-cache.md) | -| Report schema and provenance | [05-report.md](05-report.md), [06-html-render.md](06-html-render.md) | -| MCP agent surface | [25-mcp-interface/index.md](25-mcp-interface/index.md), [14-claim-guard.md](14-claim-guard.md) | +| Topic | Primary chapters | +|---------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| CLI behavior and failure routing | [09-exit-codes.md](09-exit-codes.md), [11-cli.md](11-cli.md) | +| Config precedence and defaults | [10-config-and-defaults.md](10-config-and-defaults.md) | +| Core processing pipeline | [03-core-pipeline.md](03-core-pipeline.md) | +| Clone baseline trust/compat/integrity | [07-baseline.md](07-baseline.md) | +| Cache trust and fail-open behavior | [08-cache.md](08-cache.md) | +| Report schema and provenance | [05-report.md](05-report.md), [06-html-render.md](06-html-render.md) | +| MCP agent surface | [25-mcp-interface/index.md](25-mcp-interface/index.md), [14-claim-guard.md](14-claim-guard.md) | | Engineering Memory evidence layers | [13-engineering-memory/index.md](13-engineering-memory/index.md), [13-engineering-memory/trajectory-quality-and-passport.md](13-engineering-memory/trajectory-quality-and-passport.md), [13-engineering-memory/experience-layer.md](13-engineering-memory/experience-layer.md) | -| Platform runtime diagnostics | [26-platform-observability.md](26-platform-observability.md) | -| Health score model | [15-health-score.md](15-health-score.md) | -| Metrics gates and metrics baseline | [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) | -| Dead-code liveness policy | [17-dead-code-contract.md](17-dead-code-contract.md) | -| Determinism and versioning policy | [22-determinism.md](22-determinism.md), [24-compatibility-and-versioning.md](24-compatibility-and-versioning.md) | +| Platform runtime diagnostics | [26-platform-observability.md](26-platform-observability.md) | +| Health score model | [15-health-score.md](15-health-score.md) | +| Metrics gates and metrics baseline | [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) | +| Dead-code liveness policy | [17-dead-code-contract.md](17-dead-code-contract.md) | +| Determinism and versioning policy | [22-determinism.md](22-determinism.md), [24-compatibility-and-versioning.md](24-compatibility-and-versioning.md) | diff --git a/docs/book/03-core-pipeline.md b/docs/book/03-core-pipeline.md index 27d2993a..d70ec393 100644 --- a/docs/book/03-core-pipeline.md +++ b/docs/book/03-core-pipeline.md @@ -1,6 +1,7 @@ + # 03. Core Pipeline ## Purpose diff --git a/docs/book/04-cfg-semantics.md b/docs/book/04-cfg-semantics.md index 6240d7a9..0184d9f9 100644 --- a/docs/book/04-cfg-semantics.md +++ b/docs/book/04-cfg-semantics.md @@ -3,6 +3,7 @@ does-not-own: pipeline contract (→ 03), determinism policy (→ 22), python-tag rules (→ 24). rule: moved here from docs/cfg.md. Do not move back. --> + # 04. Control Flow Graph (CFG) — Design and Semantics > Contract-level guarantees are in [Core Pipeline](03-core-pipeline.md) and diff --git a/docs/book/05-report.md b/docs/book/05-report.md index 977389ff..7c29cf5f 100644 --- a/docs/book/05-report.md +++ b/docs/book/05-report.md @@ -2,6 +2,7 @@ owns: report schema, section definitions, integrity metadata, explainability. does-not-own: HTML rendering (→ 06), CLI output modes (→ 11), SARIF projection (→ ../guide/integrations/sarif/export.md). --> + # 05. Report ## Purpose diff --git a/docs/book/06-html-render.md b/docs/book/06-html-render.md index d9bec543..65b44dda 100644 --- a/docs/book/06-html-render.md +++ b/docs/book/06-html-render.md @@ -1,6 +1,7 @@ + # 06. HTML Render ## Purpose diff --git a/docs/book/07-baseline.md b/docs/book/07-baseline.md index 2d15cf9b..c1911ec6 100644 --- a/docs/book/07-baseline.md +++ b/docs/book/07-baseline.md @@ -3,6 +3,7 @@ validation order. does-not-own: CI setup (→ ../getting-started.md), cache (→ 08), versioning policy (→ 24). --> + # 07. Baseline ## Purpose diff --git a/docs/book/08-cache.md b/docs/book/08-cache.md index c2a3a188..3ac7b2ec 100644 --- a/docs/book/08-cache.md +++ b/docs/book/08-cache.md @@ -1,6 +1,7 @@ + # 08. Cache ## Purpose diff --git a/docs/book/09-exit-codes.md b/docs/book/09-exit-codes.md index 17c1a33e..955f85f4 100644 --- a/docs/book/09-exit-codes.md +++ b/docs/book/09-exit-codes.md @@ -2,6 +2,7 @@ owns: exit codes (0, 2, 3, 5), failure-mode classification. does-not-own: CLI flag details (→ 11), config keys (→ 10). rule: this is the CANONICAL exit-code table. CLI chapter references it. --> + # 09. Contracts: Exit Codes ## Purpose diff --git a/docs/book/10-config-and-defaults.md b/docs/book/10-config-and-defaults.md index d1f6559b..82e8ba0a 100644 --- a/docs/book/10-config-and-defaults.md +++ b/docs/book/10-config-and-defaults.md @@ -325,7 +325,7 @@ instead of duplicating tables. | Workspace intent registry | `resolve_intent_registry_config` | Documented env > `[tool.codeclone]` registry keys > defaults | | MCP workspace intent TTL / lease | `resolved_ttl_seconds`, `resolved_lease_seconds` | Explicit MCP tool parameter > env > built-in default | | Finish hygiene strict mode | `_strict_finish_enabled` | Env only (no pyproject key) | -| Platform Observability | `resolve_observability_config` | Env only; disabled by default, no pyproject table | +| Platform Observability | `resolve_observability_config` | Env only; disabled by default, no pyproject table | | Cursor / IDE hooks | hook helpers | Env > repo config file (where noted) > built-in default | There is no generic `CODECLONE_MEMORY__*` nested env convention. Each variable @@ -344,16 +344,16 @@ Platform Observability is environment-only and disabled by default. It has no [Platform Observability](26-platform-observability.md) for the data and trust contracts. -| Variable | Values | Effect | -|---|---|---| -| `CODECLONE_OBSERVABILITY_ENABLED` | truthy / falsy | Enable local operation/span instrumentation. | -| `CODECLONE_OBSERVABILITY_FORCE` | truthy / falsy | Lift the CI collection guard; does not enable collection by itself. | -| `CODECLONE_OBSERVABILITY_PROFILE` | truthy / falsy | Capture process metrics; requires `codeclone[perf]`. | -| `CODECLONE_OBSERVABILITY_PERSIST` | truthy / falsy | Persist completed operations; default true when enabled. | -| `CODECLONE_OBSERVABILITY_CAPTURE_PAYLOAD_SIZES` | truthy / falsy | Capture bounded size/token estimates; default true. | -| `CODECLONE_OBSERVABILITY_PAYLOAD_SNAPSHOT` | reserved | Rejected; raw payload snapshots are unsupported. | -| `CODECLONE_OBSERVABILITY_CORRELATION_ID` | internal ID | Worker handoff for cross-process correlation; set by CodeClone. | -| `CODECLONE_OBSERVABILITY_PARENT_OPERATION_ID` | internal ID | Worker handoff for the parent operation; set by CodeClone. | +| Variable | Values | Effect | +|-------------------------------------------------|----------------|---------------------------------------------------------------------| +| `CODECLONE_OBSERVABILITY_ENABLED` | truthy / falsy | Enable local operation/span instrumentation. | +| `CODECLONE_OBSERVABILITY_FORCE` | truthy / falsy | Lift the CI collection guard; does not enable collection by itself. | +| `CODECLONE_OBSERVABILITY_PROFILE` | truthy / falsy | Capture process metrics; requires `codeclone[perf]`. | +| `CODECLONE_OBSERVABILITY_PERSIST` | truthy / falsy | Persist completed operations; default true when enabled. | +| `CODECLONE_OBSERVABILITY_CAPTURE_PAYLOAD_SIZES` | truthy / falsy | Capture bounded size/token estimates; default true. | +| `CODECLONE_OBSERVABILITY_PAYLOAD_SNAPSHOT` | reserved | Rejected; raw payload snapshots are unsupported. | +| `CODECLONE_OBSERVABILITY_CORRELATION_ID` | internal ID | Worker handoff for cross-process correlation; set by CodeClone. | +| `CODECLONE_OBSERVABILITY_PARENT_OPERATION_ID` | internal ID | Worker handoff for the parent operation; set by CodeClone. | The internal correlation variables are launcher/worker protocol, not operator tuning knobs. @@ -363,16 +363,16 @@ tuning knobs. Overrides `[tool.codeclone.memory]` and `[tool.codeclone.memory.semantic]` for the listed field only. Paths resolve under the repository root like pyproject paths. -| Variable | Values | Overrides | Effect | -|--------------------------------------------------|-------------------------------------------------|----------------------------------------|--------------------------------------------------------------------------------| -| `CODECLONE_MEMORY_DB_PATH` | repo-relative or absolute path under root | `memory.db_path` | SQLite Engineering Memory store location | +| Variable | Values | Overrides | Effect | +|--------------------------------------------------|-------------------------------------------------|----------------------------------------|-------------------------------------------------------------------------------------------| +| `CODECLONE_MEMORY_DB_PATH` | repo-relative or absolute path under root | `memory.db_path` | SQLite Engineering Memory store location | | `CODECLONE_PROJECTION_REBUILD_POLICY` | `off`, `enqueue_when_stale` | `memory.projection_rebuild_policy` | When accepted MCP finish may enqueue async trajectory/semantic/Experience projection jobs | -| `CODECLONE_MEMORY_SEMANTIC_ENABLED` | `true` / `false` | `memory.semantic.enabled` | Turn semantic index sidecar on or off | -| `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_PROVIDER` | `diagnostic`, `fastembed`, `local_model`, `api` | `memory.semantic.embedding_provider` | Embedding backend for semantic rebuild/search | -| `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_MODEL` | model name string | `memory.semantic.embedding_model` | Provider model id (for example FastEmbed model name) | -| `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_CACHE_DIR` | path | `memory.semantic.embedding_cache_dir` | Local ONNX/model cache directory for FastEmbed | -| `CODECLONE_MEMORY_SEMANTIC_ALLOW_MODEL_DOWNLOAD` | `true` / `false` | `memory.semantic.allow_model_download` | When `false`, FastEmbed requires a pre-populated cache | -| `CODECLONE_MEMORY_SEMANTIC_INDEX_PATH` | path | `memory.semantic.index_path` | LanceDB semantic sidecar directory | +| `CODECLONE_MEMORY_SEMANTIC_ENABLED` | `true` / `false` | `memory.semantic.enabled` | Turn semantic index sidecar on or off | +| `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_PROVIDER` | `diagnostic`, `fastembed`, `local_model`, `api` | `memory.semantic.embedding_provider` | Embedding backend for semantic rebuild/search | +| `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_MODEL` | model name string | `memory.semantic.embedding_model` | Provider model id (for example FastEmbed model name) | +| `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_CACHE_DIR` | path | `memory.semantic.embedding_cache_dir` | Local ONNX/model cache directory for FastEmbed | +| `CODECLONE_MEMORY_SEMANTIC_ALLOW_MODEL_DOWNLOAD` | `true` / `false` | `memory.semantic.allow_model_download` | When `false`, FastEmbed requires a pre-populated cache | +| `CODECLONE_MEMORY_SEMANTIC_INDEX_PATH` | path | `memory.semantic.index_path` | LanceDB semantic sidecar directory | Memory keys without a documented env override (for example `projection_rebuild_spawn_worker`) are pyproject-only. diff --git a/docs/book/11-cli.md b/docs/book/11-cli.md index b618f16a..f384e1e8 100644 --- a/docs/book/11-cli.md +++ b/docs/book/11-cli.md @@ -3,6 +3,7 @@ does-not-own: exit-code enum (→ 09), config keys (→ 10), memory subcommands (→ 13), session/audit details (→ 12). rule: keep LEAN — push normative tables to their canonical owners. --> + # 11. CLI ## Purpose diff --git a/docs/book/12-structural-change-controller/blast-radius-and-receipt.md b/docs/book/12-structural-change-controller/blast-radius-and-receipt.md index 8ba30ba1..2f631b67 100644 --- a/docs/book/12-structural-change-controller/blast-radius-and-receipt.md +++ b/docs/book/12-structural-change-controller/blast-radius-and-receipt.md @@ -16,6 +16,7 @@ adapters over that core — non-MCP surfaces must not import Long context sections are bounded and include summaries with `total`, `shown`, and `truncated`. + ## Review Receipt Payload `create_review_receipt` returns `format="markdown"` by default and can return a diff --git a/docs/book/12-structural-change-controller/finish-controlled-change.md b/docs/book/12-structural-change-controller/finish-controlled-change.md index 62a6ca0d..a5a74154 100644 --- a/docs/book/12-structural-change-controller/finish-controlled-change.md +++ b/docs/book/12-structural-change-controller/finish-controlled-change.md @@ -64,7 +64,6 @@ On `start_controlled_change` / `finish_controlled_change`, hygiene uses `dirty_attribution`, path classification arrays, and expanded `dirty_snapshot`. Findings/hotspots tools still honor all three levels. - ### Response payloads agents should read | Field | Use | diff --git a/docs/book/12-structural-change-controller/index.md b/docs/book/12-structural-change-controller/index.md index a3a1b74f..a691a0d8 100644 --- a/docs/book/12-structural-change-controller/index.md +++ b/docs/book/12-structural-change-controller/index.md @@ -27,6 +27,7 @@ queries: | Verify ergonomics | Live in `2.1.0a1` | MCP `check_patch_contract` | | MCP payload token budget | Live in `2.1.0a1` | Audit trail, CLI `--audit`, `--session-stats` | | Patch Trail | Live in `2.1.0a1` | MCP `finish_controlled_change(patch_trail_detail=…)`; audit `patch_trail.computed` | + ## Contract - The canonical report remains the source of truth. @@ -41,21 +42,21 @@ queries: - Tools derive responses from existing run/report facts rather than LLM inference. - Report-only context is review context, not an edit prohibition. -!!! note "Claim Guard" - Full pattern catalog: [Claim Guard](../14-claim-guard.md). + !!! note "Claim Guard" + Full pattern catalog: [Claim Guard](../14-claim-guard.md). ## Chapters -| Topic | Contract | -|-------|----------| -| CLI `--blast-radius`, `--patch-verify` | [CLI controller queries](cli-controller-queries.md) | -| Blast radius & review receipt | [Blast radius & receipt](blast-radius-and-receipt.md) | -| Intent registry & queue | [Intent registry & queue](intent-registry-and-queue.md) | -| Verification profiles | [Verification profiles](verification-profiles.md) | -| Patch contract verify | [Patch contract verify](patch-contract-verify.md) | -| Workflow tools | [Workflow tools](workflow-tools.md) | -| `finish_controlled_change` | [finish_controlled_change](finish-controlled-change.md) | -| Finish hygiene | [Finish hygiene](finish-hygiene.md) | -| Patch Trail | [Patch Trail](patch-trail.md) | -| Payload semantics | [Payload semantics](payload-semantics.md) | -| Token budget | [Token budget](token-budget.md) | +| Topic | Contract | +|----------------------------------------|---------------------------------------------------------| +| CLI `--blast-radius`, `--patch-verify` | [CLI controller queries](cli-controller-queries.md) | +| Blast radius & review receipt | [Blast radius & receipt](blast-radius-and-receipt.md) | +| Intent registry & queue | [Intent registry & queue](intent-registry-and-queue.md) | +| Verification profiles | [Verification profiles](verification-profiles.md) | +| Patch contract verify | [Patch contract verify](patch-contract-verify.md) | +| Workflow tools | [Workflow tools](workflow-tools.md) | +| `finish_controlled_change` | [finish_controlled_change](finish-controlled-change.md) | +| Finish hygiene | [Finish hygiene](finish-hygiene.md) | +| Patch Trail | [Patch Trail](patch-trail.md) | +| Payload semantics | [Payload semantics](payload-semantics.md) | +| Token budget | [Token budget](token-budget.md) | diff --git a/docs/book/12-structural-change-controller/workflow-tools.md b/docs/book/12-structural-change-controller/workflow-tools.md index 4a3421ab..7be3f296 100644 --- a/docs/book/12-structural-change-controller/workflow-tools.md +++ b/docs/book/12-structural-change-controller/workflow-tools.md @@ -45,6 +45,7 @@ trusted baseline. Budget payloads use `null` for disabled numeric thresholds rather than sentinel values. Boolean policy gates are named `forbid_*`, for example `forbid_dead_code_regression`. + ## Verify Ergonomics `check_patch_contract(mode="verify")` includes three ergonomic features that @@ -80,6 +81,7 @@ calling `validate_review_claims` is meaningful for the verification profile. It is `true` for `python_structural` and `governance_config` profiles, `false` for `documentation_only`, `non_python_patch`, `state_artifact_change`, and non-accepted outcomes. + ## Workflow consolidation The atomic change control workflow requires 7–11 MCP tool calls per edit diff --git a/docs/book/13-engineering-memory/cli-surface.md b/docs/book/13-engineering-memory/cli-surface.md index 8c2830ad..c26e4b37 100644 --- a/docs/book/13-engineering-memory/cli-surface.md +++ b/docs/book/13-engineering-memory/cli-surface.md @@ -2,24 +2,24 @@ All commands live under `codeclone memory` and accept `--root` (default `.`). -| Command | Purpose | -|----------------------------------------------------------------------------|-----------------------------------------------| -| `init [--refresh] [--dry-run]` | Create or refresh the memory store | -| `status` | Schema version, counts, last ingest metadata | -| `for-path PATH [--limit N]` | Records linked to a repo-relative path | -| `search QUERY [--match any\|all] [--semantic] [--active-only] [--limit N]` | FTS search; optional semantic blend | -| `semantic status` | Index availability, provider, row counts | -| `semantic rebuild` | Rebuild LanceDB sidecar from memory + audit | -| `semantic search QUERY [--limit N]` | Search with semantic ranking (index required) | -| `stale [--limit N]` | List stale records and reasons | -| `vacuum [--dry-run]` | Retention purge per config | -| `coverage --scope PATH [PATH...]` | Scope coverage metrics | -| `review-candidates [--limit N]` | List draft records awaiting human review | -| `approve RECORD_ID [--verified-by NAME]` | Promote draft → active | -| `reject RECORD_ID [--reason TEXT]` | Reject draft | -| `archive RECORD_ID [--reason TEXT]` | Archive record | +| Command | Purpose | +|----------------------------------------------------------------------------------------|-------------------------------------------------------| +| `init [--refresh] [--dry-run]` | Create or refresh the memory store | +| `status` | Schema version, counts, last ingest metadata | +| `for-path PATH [--limit N]` | Records linked to a repo-relative path | +| `search QUERY [--match any\|all] [--semantic] [--active-only] [--limit N]` | FTS search; optional semantic blend | +| `semantic status` | Index availability, provider, row counts | +| `semantic rebuild` | Rebuild LanceDB sidecar from memory + audit | +| `semantic search QUERY [--limit N]` | Search with semantic ranking (index required) | +| `stale [--limit N]` | List stale records and reasons | +| `vacuum [--dry-run]` | Retention purge per config | +| `coverage --scope PATH [PATH...]` | Scope coverage metrics | +| `review-candidates [--limit N]` | List draft records awaiting human review | +| `approve RECORD_ID [--verified-by NAME]` | Promote draft → active | +| `reject RECORD_ID [--reason TEXT]` | Reject draft | +| `archive RECORD_ID [--reason TEXT]` | Archive record | | `trajectory status\|rebuild\|list\|search\|show\|agents\|anomalies\|dashboard\|export` | Trajectory projection, passport analytics, and export | -| `jobs status\|enqueue\|run-once\|list` | Trajectory + semantic + Experience projection queue | +| `jobs status\|enqueue\|run-once\|list` | Trajectory + semantic + Experience projection queue | Human governance (`approve`, `reject`, `archive`) is available through the **CodeClone VS Code Memory** view (IDE governance channel) and the diff --git a/docs/book/13-engineering-memory/index.md b/docs/book/13-engineering-memory/index.md index 067a84fd..8a3cb04b 100644 --- a/docs/book/13-engineering-memory/index.md +++ b/docs/book/13-engineering-memory/index.md @@ -38,7 +38,7 @@ controlled edits. | 26 | Patch Trail persistence + scoped retrieval | `memory_trajectory_patch_trails`; `patch_trail_summary` on scoped retrieval | | 28 | Incremental projection jobs | Watermarked trajectory rebuild, semantic hash-skip, coalesced worker | | Live | Trajectory quality and passport analytics | Quality/complexity contract, anomalies, agents, dashboard | -| Live | Experience Layer | Distillation job, scoped `experiences[]`, `promote_experience` draft bridge | +| Live | Experience Layer | Distillation job, scoped `experiences[]`, `promote_experience` draft bridge | Schema version constant: `ENGINEERING_MEMORY_SCHEMA_VERSION` in `codeclone/contracts/__init__.py` (currently **`1.6`**). diff --git a/docs/book/13-engineering-memory/mcp-surface.md b/docs/book/13-engineering-memory/mcp-surface.md index ad08a6ee..938c3ad0 100644 --- a/docs/book/13-engineering-memory/mcp-surface.md +++ b/docs/book/13-engineering-memory/mcp-surface.md @@ -34,22 +34,22 @@ When auto-sync runs, the response includes a `memory_sync` object (`status`, Mode router for inspection and search. -| `mode` | Required inputs | Purpose | -|--------------|---------------------------------------|-------------------------------------------| -| `search` | `query`; optional `semantic=true` | FTS keyword search; optional vector blend | -| `get` | `record_id` | Single record + subjects + evidence | -| `for_path` | `path` | Path-linked records | -| `for_symbol` | `symbol` | Symbol-linked records | -| `stale` | — | Stale inventory | -| `coverage` | `scope` (non-empty, not project root) | Coverage metrics for paths | -| `status` | — | Store status (like CLI `status`) | -| `drafts` | optional `limit` | Draft inbox (compact by default) | -| `trajectory_status` | — | Trajectory projection run metadata | -| `trajectory_search` | `query`; optional `filters.include_routine` | Search stored trajectories | -| `trajectory_get` | `record_id` (trajectory id) | One trajectory + steps (always full) | -| `trajectory_anomalies` | optional `filters.include_routine` | Detected trajectory contract anomalies | -| `trajectory_agents` | optional `filters.include_routine` | Aggregate quality/outcomes by exact agent label | -| `trajectory_dashboard` | optional `filters.include_routine` | Combined status, agent, and anomaly view | +| `mode` | Required inputs | Purpose | +|------------------------|---------------------------------------------|-------------------------------------------------| +| `search` | `query`; optional `semantic=true` | FTS keyword search; optional vector blend | +| `get` | `record_id` | Single record + subjects + evidence | +| `for_path` | `path` | Path-linked records | +| `for_symbol` | `symbol` | Symbol-linked records | +| `stale` | — | Stale inventory | +| `coverage` | `scope` (non-empty, not project root) | Coverage metrics for paths | +| `status` | — | Store status (like CLI `status`) | +| `drafts` | optional `limit` | Draft inbox (compact by default) | +| `trajectory_status` | — | Trajectory projection run metadata | +| `trajectory_search` | `query`; optional `filters.include_routine` | Search stored trajectories | +| `trajectory_get` | `record_id` (trajectory id) | One trajectory + steps (always full) | +| `trajectory_anomalies` | optional `filters.include_routine` | Detected trajectory contract anomalies | +| `trajectory_agents` | optional `filters.include_routine` | Aggregate quality/outcomes by exact agent label | +| `trajectory_dashboard` | optional `filters.include_routine` | Combined status, agent, and anomaly view | List modes (`search`, `stale`, `drafts`, scoped `get_relevant_memory`) default to **compact** payloads: statement preview, `statement_length`, no `payload`. @@ -58,12 +58,12 @@ Use `mode=get` or `detail_level=full` for complete statements and payload. Scoped retrieval keeps four typed lanes: -| Lane | Meaning | `compact` | `full` | -|------------------|----------------------------------------------|---------------------------------------------------------------|---------------------------------------------| -| `records[]` | Durable asserted/project memory | Preview; relevance-first bounded `subjects`; count/truncation | Full statement, subjects, record payload | -| `experiences[]` | Advisory patterns distilled from trajectories | Preview; agent-family count, multi-agent flag, dominant facet | Full agent facets and trajectory evidence ids | -| `trajectories[]` | Prior workflow examples/evidence | Bounded preview; no steps or `quality_contract` | Full contract/subjects; use `trajectory_get` for steps | -| `coverage` | Availability of record/trajectory/experience context | Same factual coverage metadata | Same factual coverage metadata | +| Lane | Meaning | `compact` | `full` | +|------------------|------------------------------------------------------|---------------------------------------------------------------|--------------------------------------------------------| +| `records[]` | Durable asserted/project memory | Preview; relevance-first bounded `subjects`; count/truncation | Full statement, subjects, record payload | +| `experiences[]` | Advisory patterns distilled from trajectories | Preview; agent-family count, multi-agent flag, dominant facet | Full agent facets and trajectory evidence ids | +| `trajectories[]` | Prior workflow examples/evidence | Bounded preview; no steps or `quality_contract` | Full contract/subjects; use `trajectory_get` for steps | +| `coverage` | Availability of record/trajectory/experience context | Same factual coverage metadata | Same factual coverage metadata | `subject_count` and `subjects_truncated=true` mean more linked subjects exist; they do not downgrade or discard the record. Each compact trajectory retains @@ -85,18 +85,18 @@ CLI equivalent: `codeclone memory search QUERY --match any|all`. #### `manage_engineering_memory` -| `action` | Required params | Effect | -|--------------------------|-----------------------------------------------------|------------------------------------------------------------| -| `refresh_from_run` | optional `run_id` (defaults to latest MCP run) | Force ingest from MCP run report | -| `rebuild_semantic_index` | (none) | Rebuild LanceDB sidecar when `memory.semantic.enabled` | -| `rebuild_trajectories` | (none) | Rebuild trajectory projections from audit event core | -| `enqueue_projection_rebuild` | (none) | Queue trajectory + semantic + Experience projection job | -| `projection_rebuild_status` | (none) | Latest projection job status | -| `run_projection_jobs_once` | (none) | Run one queued projection job inline | -| `record_candidate` | `record_type`, `statement`, **`subject_path`** | Creates **draft** record | -| `promote_experience` | `experience_id` | Convert advisory Experience into human-reviewable draft | -| `validate_claims` | `text` | Memory-layer claim guard (warnings/errors) | -| `propose_from_receipt` | optional `text`, `intent_id` | Draft proposals from finish-like payload (atomic fallback) | +| `action` | Required params | Effect | +|------------------------------|------------------------------------------------|------------------------------------------------------------| +| `refresh_from_run` | optional `run_id` (defaults to latest MCP run) | Force ingest from MCP run report | +| `rebuild_semantic_index` | (none) | Rebuild LanceDB sidecar when `memory.semantic.enabled` | +| `rebuild_trajectories` | (none) | Rebuild trajectory projections from audit event core | +| `enqueue_projection_rebuild` | (none) | Queue trajectory + semantic + Experience projection job | +| `projection_rebuild_status` | (none) | Latest projection job status | +| `run_projection_jobs_once` | (none) | Run one queued projection job inline | +| `record_candidate` | `record_type`, `statement`, **`subject_path`** | Creates **draft** record | +| `promote_experience` | `experience_id` | Convert advisory Experience into human-reviewable draft | +| `validate_claims` | `text` | Memory-layer claim guard (warnings/errors) | +| `propose_from_receipt` | optional `text`, `intent_id` | Draft proposals from finish-like payload (atomic fallback) | IDE channel only (VS Code launches MCP with `--ide-governance-channel`): diff --git a/docs/book/13-engineering-memory/search-semantic.md b/docs/book/13-engineering-memory/search-semantic.md index 52b90ef0..6401bf8a 100644 --- a/docs/book/13-engineering-memory/search-semantic.md +++ b/docs/book/13-engineering-memory/search-semantic.md @@ -1,4 +1,5 @@ + # Optional semantic retrieval (Phase 20) Semantic search is **opt-in** and **off by default** (`enabled = false` in diff --git a/docs/book/13-engineering-memory/trajectory-and-patch-trail.md b/docs/book/13-engineering-memory/trajectory-and-patch-trail.md index 2dda05e7..eb7b88bc 100644 --- a/docs/book/13-engineering-memory/trajectory-and-patch-trail.md +++ b/docs/book/13-engineering-memory/trajectory-and-patch-trail.md @@ -85,7 +85,7 @@ Module ownership: | `codeclone/memory/trajectory/patch_trail_projector.py` | Rebuild Patch Trail from audit event cores | | `codeclone/memory/trajectory/projector.py` | Deterministic trajectory projection (`trajectory-v3`) | | `codeclone/memory/trajectory/quality.py` | Contract-quality and separate complexity scoring | -| `codeclone/memory/trajectory/analytics.py` | Dashboard, anomaly, and per-agent aggregates | +| `codeclone/memory/trajectory/analytics.py` | Dashboard, anomaly, and per-agent aggregates | | `codeclone/memory/trajectory/store.py` | SQLite persistence, supersede, rebuild orchestration | | `codeclone/memory/trajectory/retrieval.py` | Scoped ranking + `patch_trail_summary` | | `codeclone/memory/trajectory/export_context.py` | Export v2 context: precedents, citations, scope paths | @@ -129,13 +129,13 @@ Export profiles (schema contracts): `agent-change-control-v1`, Export row schema version is **`2`** (`TRAJECTORY_EXPORT_SCHEMA_VERSION`). Each row includes: -| Field | Source | -|---------------------------------|-----------------------------------------------------------------------| -| `context.memory_precedents` | Active memory records overlapping trajectory/path scope | -| `context.trajectory_precedents` | Prior workflows with path overlap | -| `citations` | Claim-validation event cores + report digests | -| `scope.paths` | Resolved from Patch Trail / declare / check event cores | -| `patch_trail_summary` | When persisted in `memory_trajectory_patch_trails` | +| Field | Source | +|---------------------------------|-----------------------------------------------------------------------------------------------------------------------------------| +| `context.memory_precedents` | Active memory records overlapping trajectory/path scope | +| `context.trajectory_precedents` | Prior workflows with path overlap | +| `citations` | Claim-validation event cores + report digests | +| `scope.paths` | Resolved from Patch Trail / declare / check event cores | +| `patch_trail_summary` | When persisted in `memory_trajectory_patch_trails` | | `projection_version` | `trajectory-v1`, `trajectory-v2`, or active `trajectory-v3`; v2 adds Patch Trail digest and v3 adds quality score + agent subject | Rebuild supersedes older projection rows for the same workflow (one canonical @@ -170,14 +170,14 @@ Scoped ranking adds a small boost when query scope paths intersect `query_engineering_memory` modes: -| Mode | Scope | Notes | -|---------------------|---------------|-------------------------------------------------------| -| `trajectory_status` | project | Projection run manifest | -| `trajectory_search` | query text | Requires `query`; excludes `run:*` routine by default | -| `trajectory_get` | trajectory id | `record_id` = trajectory id | -| `trajectory_anomalies` | project | Contract anomalies, optionally including routine runs | -| `trajectory_agents` | project | Outcome and quality aggregates by exact agent label | -| `trajectory_dashboard` | project | Combined status, agent, and anomaly payload | +| Mode | Scope | Notes | +|------------------------|---------------|-------------------------------------------------------| +| `trajectory_status` | project | Projection run manifest | +| `trajectory_search` | query text | Requires `query`; excludes `run:*` routine by default | +| `trajectory_get` | trajectory id | `record_id` = trajectory id | +| `trajectory_anomalies` | project | Contract anomalies, optionally including routine runs | +| `trajectory_agents` | project | Outcome and quality aggregates by exact agent label | +| `trajectory_dashboard` | project | Combined status, agent, and anomaly payload | Filter: `filters.include_routine=true` on `trajectory_search` includes single-event `run:*` analysis workflows. diff --git a/docs/book/13-engineering-memory/trajectory-labels.md b/docs/book/13-engineering-memory/trajectory-labels.md index 0facbff2..7e5c5f8a 100644 --- a/docs/book/13-engineering-memory/trajectory-labels.md +++ b/docs/book/13-engineering-memory/trajectory-labels.md @@ -4,24 +4,24 @@ Each projected trajectory carries a sorted **`labels`** list in `memory_trajectories.labels_json`. Labels are deterministic tags derived from audit event cores — not free-form agent text. -| Label | When set | -|-------|----------| -| `change_control_workflow` | Any change-controller event (`intent.*`, `patch_contract.*`, …) | -| `verified_finish` | `patch_contract.verified` with accepted outcome | -| `scope_clean` | `intent.checked` with status `clean` or `expanded` | -| `scope_expanded` | `intent.expanded` present | -| `queue_used` | `intent.queued` present | -| `patch_trail_recorded` | `patch_trail.computed` present | -| `receipt_issued` | `review_receipt.created` present | -| `claim_validated` | `claim_validation.completed` present | -| `analysis_observed` | Standalone `analysis.completed` workflow (no change-control events) | -| `memory_used` | `manage_engineering_memory` tool use in the stream | -| `recovered` | `intent.promoted` (queue recovery) | -| `foreign_conflict_seen` | Workspace conflict | -| `hook_blocked` | Hook surface warn/error | -| `claim_guard_failed` | Claim validation violated | -| `baseline_abuse_detected` | Baseline abuse | -| `external_changes_accepted` | Finish accepted with external changes | +| Label | When set | +|-----------------------------|---------------------------------------------------------------------| +| `change_control_workflow` | Any change-controller event (`intent.*`, `patch_contract.*`, …) | +| `verified_finish` | `patch_contract.verified` with accepted outcome | +| `scope_clean` | `intent.checked` with status `clean` or `expanded` | +| `scope_expanded` | `intent.expanded` present | +| `queue_used` | `intent.queued` present | +| `patch_trail_recorded` | `patch_trail.computed` present | +| `receipt_issued` | `review_receipt.created` present | +| `claim_validated` | `claim_validation.completed` present | +| `analysis_observed` | Standalone `analysis.completed` workflow (no change-control events) | +| `memory_used` | `manage_engineering_memory` tool use in the stream | +| `recovered` | `intent.promoted` (queue recovery) | +| `foreign_conflict_seen` | Workspace conflict | +| `hook_blocked` | Hook surface warn/error | +| `claim_guard_failed` | Claim validation violated | +| `baseline_abuse_detected` | Baseline abuse | +| `external_changes_accepted` | Finish accepted with external changes | Routine successful edit cycles should carry **`change_control_workflow`** and **`verified_finish`** at minimum. Empty `labels` indicates a projection bug or a diff --git a/docs/book/13-engineering-memory/trajectory-quality-and-passport.md b/docs/book/13-engineering-memory/trajectory-quality-and-passport.md index 8d2aaade..f28badf1 100644 --- a/docs/book/13-engineering-memory/trajectory-quality-and-passport.md +++ b/docs/book/13-engineering-memory/trajectory-quality-and-passport.md @@ -42,14 +42,14 @@ High complexity is not a defect and does not reduce quality by itself. Quality score version `2` is the minimum of six components: -| Component | Scoring | -|---|---| -| Outcome | accepted `100`, accepted external `85`, partial `55`, abandoned `40`, blocked `30`, violated `20` | -| Verification | accepted `100`, accepted external `85`, unverified `50`, violated/blocked `0`, not reached `40` | -| Scope | clean `100`, expanded `85`, partial `70`, violated `0` | -| Incidents | `max(0, 100 - 10 × incident_count)` | -| Anomalies | starts at `100`; error costs `12`, warning costs `5` | -| Receipt | change-control trajectory with receipt `100`, without `85`; non-change workflow `100` | +| Component | Scoring | +|--------------|---------------------------------------------------------------------------------------------------| +| Outcome | accepted `100`, accepted external `85`, partial `55`, abandoned `40`, blocked `30`, violated `20` | +| Verification | accepted `100`, accepted external `85`, unverified `50`, violated/blocked `0`, not reached `40` | +| Scope | clean `100`, expanded `85`, partial `70`, violated `0` | +| Incidents | `max(0, 100 - 10 × incident_count)` | +| Anomalies | starts at `100`; error costs `12`, warning costs `5` | +| Receipt | change-control trajectory with receipt `100`, without `85`; non-change workflow `100` | When patch-trail verification is unavailable, the verification component falls back to quality tier: verified `100`, corrected `90`, routine `85`, partial diff --git a/docs/book/14-claim-guard.md b/docs/book/14-claim-guard.md index a258eac8..de7133c6 100644 --- a/docs/book/14-claim-guard.md +++ b/docs/book/14-claim-guard.md @@ -3,6 +3,7 @@ patch_health_delta handling. does-not-own: change controller (→ 12), engineering memory (→ 13), MCP tool schemas (→ 25). --> + # 14. Claim Guard ## Purpose diff --git a/docs/book/15-health-score.md b/docs/book/15-health-score.md index 8e1fd807..f332b7b6 100644 --- a/docs/book/15-health-score.md +++ b/docs/book/15-health-score.md @@ -1,6 +1,7 @@ + # 15. Health Score ## Purpose diff --git a/docs/book/16-metrics-and-quality-gates.md b/docs/book/16-metrics-and-quality-gates.md index a6524938..ec66a18f 100644 --- a/docs/book/16-metrics-and-quality-gates.md +++ b/docs/book/16-metrics-and-quality-gates.md @@ -2,6 +2,7 @@ owns: gate flag definitions, threshold semantics, metrics-baseline contract. does-not-own: health score formula (→ 15), dead-code rules (→ 17), config keys (→ 10). --> + # 16. Metrics and Quality Gates ## Purpose diff --git a/docs/book/17-dead-code-contract.md b/docs/book/17-dead-code-contract.md index 72767e3d..4f93c4b2 100644 --- a/docs/book/17-dead-code-contract.md +++ b/docs/book/17-dead-code-contract.md @@ -1,6 +1,7 @@ + # 17. Dead Code Contract ## Purpose @@ -169,9 +170,11 @@ Refs: - `tests/test_extractor.py::test_dead_code_marks_symbol_dead_when_referenced_only_by_tests` - `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[skip_pep562_hooks]` - + `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[inline_suppression_per_declaration]` - `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[suppression_binding_scoped_to_target]` + - `tests/test_extractor.py::test_dead_code_uses_fastapi_route_and_dependency_reachability` - `tests/test_extractor.py::test_dead_code_uses_fastapi_annotated_dependency_reachability` - `tests/test_extractor.py::test_dead_code_uses_fastapi_route_decorator_factory_reachability` diff --git a/docs/book/18-suggestions-and-clone-typing.md b/docs/book/18-suggestions-and-clone-typing.md index 92d12f4d..bf4fa7eb 100644 --- a/docs/book/18-suggestions-and-clone-typing.md +++ b/docs/book/18-suggestions-and-clone-typing.md @@ -1,6 +1,7 @@ + # 18. Suggestions and Clone Typing ## Purpose diff --git a/docs/book/19-inline-suppressions.md b/docs/book/19-inline-suppressions.md index db811208..d023e8ef 100644 --- a/docs/book/19-inline-suppressions.md +++ b/docs/book/19-inline-suppressions.md @@ -1,6 +1,7 @@ + # 19. Inline Suppressions ## Purpose @@ -89,13 +90,17 @@ Refs: - `tests/test_suppressions.py::test_extract_suppression_directives_ignores_invalid_forms[unknown_and_malformed]` - `tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[adjacent_leading_only]` - + `tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[class_inline_does_not_propagate]` + - `tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[method_target]` - `tests/test_suppressions.py::test_build_suppression_index_deduplicates_rules_stably` - + `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[inline_suppression_per_declaration]` - `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[suppression_binding_scoped_to_target]` + - `tests/test_metrics_modules.py::test_find_unused_applies_inline_dead_code_suppression` - `tests/test_metrics_modules.py::test_find_suppressed_unused_returns_actionable_suppressed_candidates` - `tests/test_report.py::test_report_json_dead_code_suppressed_items_are_reported_separately` diff --git a/docs/book/20-benchmarking.md b/docs/book/20-benchmarking.md index 05f1797c..dc5f791b 100644 --- a/docs/book/20-benchmarking.md +++ b/docs/book/20-benchmarking.md @@ -1,6 +1,7 @@ + # 20. Benchmarking (Docker) ## Purpose diff --git a/docs/book/22-determinism.md b/docs/book/22-determinism.md index 336abbc1..fbb3b9d4 100644 --- a/docs/book/22-determinism.md +++ b/docs/book/22-determinism.md @@ -2,6 +2,7 @@ owns: determinism guarantees, non-determinism sources, sorted-iteration rules. does-not-own: CFG semantics (→ 04), fingerprint version (→ 24), benchmarking (→ 20). --> + # 22. Determinism ## Purpose diff --git a/docs/book/23-testing-as-spec.md b/docs/book/23-testing-as-spec.md index 887fbc92..c889f5ac 100644 --- a/docs/book/23-testing-as-spec.md +++ b/docs/book/23-testing-as-spec.md @@ -2,6 +2,7 @@ owns: testing philosophy, test taxonomy, golden/snapshot policy, contract matrix. does-not-own: per-file test inventory (→ test modules), maintainer playbook detail (→ AGENTS.md §17, mirrored here). --> + # 23. Testing as Specification ## Purpose @@ -36,13 +37,13 @@ Contract tests are concentrated in: Treat tests as specification. Every new behavior belongs in the closest bucket; public-surface changes need contract tests, not only unit tests. -| Bucket | Intent | Examples | -|--------|--------|----------| -| **Unit** | Module behavior and edge conditions | `tests/test_cfg.py`, `tests/test_normalize.py`, `tests/test_metrics_modules.py`, `tests/test_suppressions.py` | -| **Contract** | Baseline, cache, report, CLI, MCP public semantics | `tests/test_baseline.py`, `tests/test_cache.py`, `tests/test_report_contract_coverage.py`, `tests/test_cli_unit.py`, `tests/test_mcp_service.py` | -| **Golden** | Snapshot sentinels for stable outputs | `tests/test_detector_golden.py`, `tests/test_golden_v2.py` | -| **Determinism / invariant** | Ordering, branch paths, canonical stability | `tests/test_report_branch_invariants.py`, `tests/test_core_branch_coverage.py`, `tests/test_semantic_determinism_gate.py` | -| **Scenario / regression** | Multi-step integration and process behavior | `tests/test_cli_inprocess.py`, `tests/test_pipeline_process.py`, `tests/test_cli_smoke.py` | +| Bucket | Intent | Examples | +|-----------------------------|----------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------| +| **Unit** | Module behavior and edge conditions | `tests/test_cfg.py`, `tests/test_normalize.py`, `tests/test_metrics_modules.py`, `tests/test_suppressions.py` | +| **Contract** | Baseline, cache, report, CLI, MCP public semantics | `tests/test_baseline.py`, `tests/test_cache.py`, `tests/test_report_contract_coverage.py`, `tests/test_cli_unit.py`, `tests/test_mcp_service.py` | +| **Golden** | Snapshot sentinels for stable outputs | `tests/test_detector_golden.py`, `tests/test_golden_v2.py` | +| **Determinism / invariant** | Ordering, branch paths, canonical stability | `tests/test_report_branch_invariants.py`, `tests/test_core_branch_coverage.py`, `tests/test_semantic_determinism_gate.py` | +| **Scenario / regression** | Multi-step integration and process behavior | `tests/test_cli_inprocess.py`, `tests/test_pipeline_process.py`, `tests/test_cli_smoke.py` | Maintainer routing tables and golden-update policy also live in `AGENTS.md` §17 and §16 (change routing); this chapter is the published contract copy. @@ -63,13 +64,13 @@ The following matrix is treated as executable contract: | Framework-aware dead-code reachability facts | `tests/test_extractor.py`, `tests/test_pipeline_metrics.py`, `tests/test_cache.py` | | Golden fixture clone exclusion policy | `tests/test_golden_fixtures.py`, `tests/test_cli_inprocess.py::test_cli_pyproject_golden_fixture_paths_exclude_fixture_clone_groups`, `tests/test_report.py::test_report_json_clone_groups_can_include_suppressed_golden_fixture_bucket` | | Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` | -| Engineering Memory SQLite schema, governance, retrieval | `tests/test_memory_schema.py`, `tests/test_memory_store.py`, `tests/test_memory_governance.py`, `tests/test_memory_retrieval.py`, `tests/test_memory_mcp_sync.py` | -| Semantic index projection, rebuild, LanceDB backend | `tests/test_semantic_projection.py`, `tests/test_semantic_rebuild.py`, `tests/test_semantic_lancedb_backend.py`, `tests/test_semantic_embedding.py` | -| Trajectory projection, quality passport, anomalies, retrieval | `tests/test_memory_trajectory_projector.py`, `tests/test_memory_trajectory_quality.py`, `tests/test_memory_trajectory_anomalies.py`, `tests/test_memory_trajectory_retrieval.py` | -| Experience distillation, evidence diversity, scoped retrieval, promotion | `tests/test_memory_experience_distillation.py`, `tests/test_memory_experience_retrieval.py`, `tests/test_memory_experience_promotion.py` | -| Projection queue coalescing, watermarks, worker lifecycle | `tests/test_memory_projection_jobs.py`, `tests/test_memory_projection_jobs_schema.py`, `tests/test_projection_spawn_guard.py` | -| Platform Observability config, correlation, persistence, query, rendering, MCP | `tests/test_observability_config.py`, `tests/test_observability_correlation.py`, `tests/test_observability_store.py`, `tests/test_observability_query.py`, `tests/test_observability_render.py`, `tests/test_observability_mcp_registrar.py` | -| Documentation IA, line budgets, strict site build | `tests/test_docs_ia_contract.py`, `tests/test_docs_build_contract.py` | +| Engineering Memory SQLite schema, governance, retrieval | `tests/test_memory_schema.py`, `tests/test_memory_store.py`, `tests/test_memory_governance.py`, `tests/test_memory_retrieval.py`, `tests/test_memory_mcp_sync.py` | +| Semantic index projection, rebuild, LanceDB backend | `tests/test_semantic_projection.py`, `tests/test_semantic_rebuild.py`, `tests/test_semantic_lancedb_backend.py`, `tests/test_semantic_embedding.py` | +| Trajectory projection, quality passport, anomalies, retrieval | `tests/test_memory_trajectory_projector.py`, `tests/test_memory_trajectory_quality.py`, `tests/test_memory_trajectory_anomalies.py`, `tests/test_memory_trajectory_retrieval.py` | +| Experience distillation, evidence diversity, scoped retrieval, promotion | `tests/test_memory_experience_distillation.py`, `tests/test_memory_experience_retrieval.py`, `tests/test_memory_experience_promotion.py` | +| Projection queue coalescing, watermarks, worker lifecycle | `tests/test_memory_projection_jobs.py`, `tests/test_memory_projection_jobs_schema.py`, `tests/test_projection_spawn_guard.py` | +| Platform Observability config, correlation, persistence, query, rendering, MCP | `tests/test_observability_config.py`, `tests/test_observability_correlation.py`, `tests/test_observability_store.py`, `tests/test_observability_query.py`, `tests/test_observability_render.py`, `tests/test_observability_mcp_registrar.py` | +| Documentation IA, line budgets, strict site build | `tests/test_docs_ia_contract.py`, `tests/test_docs_build_contract.py` | | Layer dependency direction | `tests/test_architecture.py` | ## Invariants (MUST) diff --git a/docs/book/24-compatibility-and-versioning.md b/docs/book/24-compatibility-and-versioning.md index c5713f95..0ddaba76 100644 --- a/docs/book/24-compatibility-and-versioning.md +++ b/docs/book/24-compatibility-and-versioning.md @@ -66,7 +66,8 @@ Version bump rules: distillation** versions when their derived identity/formula changes; rebuild derived rows rather than migrating source evidence - bump **semantic index format** when LanceDB projection or stored row fields change - incompatibly — forces index rebuild, not SQLite migration (see [13-engineering-memory/index.md](13-engineering-memory/index.md)) + incompatibly — forces index rebuild, not SQLite migration ( + see [13-engineering-memory/index.md](13-engineering-memory/index.md)) - bump **Platform Observability schema** only for incompatible telemetry-store changes; it remains separate from reports, gates, baselines, and memory facts (see [26-platform-observability.md](26-platform-observability.md)) diff --git a/docs/book/25-mcp-interface/determinism-and-tests.md b/docs/book/25-mcp-interface/determinism-and-tests.md index ba5d10a3..ebf13ae8 100644 --- a/docs/book/25-mcp-interface/determinism-and-tests.md +++ b/docs/book/25-mcp-interface/determinism-and-tests.md @@ -1,4 +1,5 @@ + # MCP Security, Determinism, and Tests Tool inventory and payload contracts: @@ -7,17 +8,15 @@ Tool inventory and payload contracts: ## Security model -| Property | Guarantee | -|-------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Default transport | Local `stdio` | -| Remote exposure | Explicit `--allow-remote` required for non-loopback | -| Lazy loading | Base installs and CI do not require MCP packages | +| Property | Guarantee | +|-------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Default transport | Local `stdio` | +| Remote exposure | Explicit `--allow-remote` required for non-loopback | +| Lazy loading | Base installs and CI do not require MCP packages | | Read-only | Never mutates source, baseline, cache, or canonical report artifacts; may write the ephemeral workspace intent registry under `.codeclone/`, optional audit/observability DBs, Engineering Memory **draft** rows, and projection job metadata when enabled | --- - - ## Determinism - Run identity is derived from canonical report integrity digest. @@ -28,8 +27,6 @@ Tool inventory and payload contracts: --- - - ## Locked by tests - `tests/test_mcp_service.py` @@ -40,8 +37,6 @@ Tool inventory and payload contracts: --- - - ## See also - [14-claim-guard.md](../14-claim-guard.md) — citation-based review validation diff --git a/docs/book/25-mcp-interface/payload-conventions.md b/docs/book/25-mcp-interface/payload-conventions.md index a3fc9752..27f1c7d9 100644 --- a/docs/book/25-mcp-interface/payload-conventions.md +++ b/docs/book/25-mcp-interface/payload-conventions.md @@ -1,4 +1,5 @@ + # MCP payload conventions ## Payload conventions diff --git a/docs/book/25-mcp-interface/tools/atomic-change-control.md b/docs/book/25-mcp-interface/tools/atomic-change-control.md index 8e0f0a3b..0a199768 100644 --- a/docs/book/25-mcp-interface/tools/atomic-change-control.md +++ b/docs/book/25-mcp-interface/tools/atomic-change-control.md @@ -1,15 +1,15 @@ ### Atomic change control tools (advanced / diagnostic) -| Tool | Key parameters | Purpose | -|-----------------------------|--------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `on_conflict`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, promote, list_workspace, gc_workspace, recover, reset_workspace. Use for queue/promote/recover operations alongside workflow tools | -| `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: full transitive graph, custom include filters | -| `get_relevant_memory` | `root`, `scope`, `intent_id`, `symbols`, `max_records`, `include_stale`, `include_drafts`, `detail_level` | Ranked engineering memory for declared edit scope. Compact by default: bounded record/trajectory subjects plus typed `records`, `experiences`, `trajectories`, and `coverage` lanes. Auto-bootstraps store when `mcp_sync_policy=bootstrap_if_missing` (default). See [Engineering Memory](../../13-engineering-memory/index.md) | +| Tool | Key parameters | Purpose | +|-----------------------------|--------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `on_conflict`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, promote, list_workspace, gc_workspace, recover, reset_workspace. Use for queue/promote/recover operations alongside workflow tools | +| `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: full transitive graph, custom include filters | +| `get_relevant_memory` | `root`, `scope`, `intent_id`, `symbols`, `max_records`, `include_stale`, `include_drafts`, `detail_level` | Ranked engineering memory for declared edit scope. Compact by default: bounded record/trajectory subjects plus typed `records`, `experiences`, `trajectories`, and `coverage` lanes. Auto-bootstraps store when `mcp_sync_policy=bootstrap_if_missing` (default). See [Engineering Memory](../../13-engineering-memory/index.md) | | `query_engineering_memory` | `root`, `mode`, …, optional `semantic` (search only), `detail_level` | Mode router: search, get, for_path, for_symbol, stale, drafts, coverage, status, trajectory_status, trajectory_search, trajectory_get, trajectory_anomalies, trajectory_agents, trajectory_dashboard. List/search modes default compact; `get`, `trajectory_get`, or `detail_level=full` are explicit drill-down. `filters` supports `types`, `statuses`, `confidences`, and `match_mode` (`any`\|`all`) for search. `semantic=true` blends LanceDB proximity when `[tool.codeclone.memory.semantic] enabled` and index built (default off). See [Engineering Memory](../../13-engineering-memory/index.md) | -| `manage_engineering_memory` | `root`, `action`, … | Agent-side: `refresh_from_run`, `record_candidate`, `promote_experience`, `validate_claims`, `propose_from_receipt`, `rebuild_semantic_index`, `rebuild_trajectories`, `enqueue_projection_rebuild`, `projection_rebuild_status`, `run_projection_jobs_once`. `promote_experience` creates a human-reviewable draft; human approve/reject/archive remains VS Code/CLI only. See [Engineering Memory](../../13-engineering-memory/index.md) | -| `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Manual budget query or step-by-step verification | -| `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Manual receipt generation | -| `validate_review_claims` | `text`, `run_id`, `require_citations`, `patch_health_delta` | Standalone citation-based overclaim detection; pass `patch_health_delta` from verify when using the atomic workflow | +| `manage_engineering_memory` | `root`, `action`, … | Agent-side: `refresh_from_run`, `record_candidate`, `promote_experience`, `validate_claims`, `propose_from_receipt`, `rebuild_semantic_index`, `rebuild_trajectories`, `enqueue_projection_rebuild`, `projection_rebuild_status`, `run_projection_jobs_once`. `promote_experience` creates a human-reviewable draft; human approve/reject/archive remains VS Code/CLI only. See [Engineering Memory](../../13-engineering-memory/index.md) | +| `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Manual budget query or step-by-step verification | +| `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Manual receipt generation | +| `validate_review_claims` | `text`, `run_id`, `require_citations`, `patch_health_delta` | Standalone citation-based overclaim detection; pass `patch_health_delta` from verify when using the atomic workflow | ??? info "Blast radius: do_not_touch vs review_context" `do_not_touch` is limited to actionable negative context: baselines, diff --git a/docs/book/25-mcp-interface/tools/platform-observability.md b/docs/book/25-mcp-interface/tools/platform-observability.md index 19885ccc..e709266a 100644 --- a/docs/book/25-mcp-interface/tools/platform-observability.md +++ b/docs/book/25-mcp-interface/tools/platform-observability.md @@ -11,15 +11,15 @@ privacy, configuration, and trust boundaries. ## Parameters -| Parameter | Contract | -|---|---| -| `root` | Absolute repository root. | -| `section` | One supported diagnostics section. | +| Parameter | Contract | +|----------------|--------------------------------------------------------------------------| +| `root` | Absolute repository root. | +| `section` | One supported diagnostics section. | | `detail_level` | `compact`, `normal`, or `full`; `full` currently downgrades to `normal`. | -| `limit` | Row cap, clamped to `1..50`. | -| `window` | `latest` or a correlation ID. | -| `operation_id` | Reserved; reported in `ignored_parameters`. | -| `span_id` | Reserved; reported in `ignored_parameters`. | +| `limit` | Row cap, clamped to `1..50`. | +| `window` | `latest` or a correlation ID. | +| `operation_id` | Reserved; reported in `ignored_parameters`. | +| `span_id` | Reserved; reported in `ignored_parameters`. | Supported sections: diff --git a/docs/book/25-mcp-interface/tools/report-and-findings.md b/docs/book/25-mcp-interface/tools/report-and-findings.md index e73ac7f2..af32df15 100644 --- a/docs/book/25-mcp-interface/tools/report-and-findings.md +++ b/docs/book/25-mcp-interface/tools/report-and-findings.md @@ -6,5 +6,5 @@ | `list_findings` | `run_id`, `family`, `category`, `severity`, `source_kind`, `novelty`, `sort_by`, `detail_level`, changed-scope filters, pagination | Filtered, paginated finding list | | `get_finding` | `finding_id`, `run_id`, `detail_level` | One canonical finding by short or full ID | | `get_remediation` | `finding_id`, `run_id`, `detail_level` | Remediation/explainability for one finding | -| `list_hotspots` | `kind`, `run_id`, `detail_level`, changed-scope filters, `limit`, `max_results` | Priority-ranked hotspot views by kind | +| `list_hotspots` | `kind`, `run_id`, `detail_level`, changed-scope filters, `limit`, `max_results` | Priority-ranked hotspot views by kind | | `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | PR-oriented markdown or JSON summary | diff --git a/docs/book/25-mcp-interface/tools/session-and-memory.md b/docs/book/25-mcp-interface/tools/session-and-memory.md index 53a4ce37..3a0f5c8a 100644 --- a/docs/book/25-mcp-interface/tools/session-and-memory.md +++ b/docs/book/25-mcp-interface/tools/session-and-memory.md @@ -8,9 +8,9 @@ ### Platform observability -| Tool | Key parameters | Purpose | -|----------------------------------|-------------------------------------------------|-------------------------------------------------------------------------| -| `query_platform_observability` | `root`, `section`, `window`, `detail_level`, `limit` | Bounded, read-only slices of CodeClone's own runtime telemetry | +| Tool | Key parameters | Purpose | +|--------------------------------|------------------------------------------------------|----------------------------------------------------------------| +| `query_platform_observability` | `root`, `section`, `window`, `detail_level`, `limit` | Bounded, read-only slices of CodeClone's own runtime telemetry | This tool is **development-only**. It reports numeric operation/span, database-cost, payload, agent-context, and pipeline diagnostics for CodeClone diff --git a/docs/book/26-platform-observability.md b/docs/book/26-platform-observability.md index 1e32b6e9..68334999 100644 --- a/docs/book/26-platform-observability.md +++ b/docs/book/26-platform-observability.md @@ -40,14 +40,14 @@ The observer: Configuration is environment-only. There is no `[tool.codeclone]` observability table. -| Variable | Meaning | -|---|---| -| `CODECLONE_OBSERVABILITY_ENABLED=1` | Enable instrumentation. | -| `CODECLONE_OBSERVABILITY_FORCE=1` | Permit observation in CI; it does not enable instrumentation by itself. | -| `CODECLONE_OBSERVABILITY_PROFILE=1` | Capture optional process metrics; requires `codeclone[perf]`. | -| `CODECLONE_OBSERVABILITY_PERSIST=0` | Instrument without persisting completed operations. | -| `CODECLONE_OBSERVABILITY_CAPTURE_PAYLOAD_SIZES=0` | Disable request/response size and token estimates. | -| `CODECLONE_OBSERVABILITY_PAYLOAD_SNAPSHOT=1` | Reserved and rejected: raw payload snapshots are not supported. | +| Variable | Meaning | +|---------------------------------------------------|-------------------------------------------------------------------------| +| `CODECLONE_OBSERVABILITY_ENABLED=1` | Enable instrumentation. | +| `CODECLONE_OBSERVABILITY_FORCE=1` | Permit observation in CI; it does not enable instrumentation by itself. | +| `CODECLONE_OBSERVABILITY_PROFILE=1` | Capture optional process metrics; requires `codeclone[perf]`. | +| `CODECLONE_OBSERVABILITY_PERSIST=0` | Instrument without persisting completed operations. | +| `CODECLONE_OBSERVABILITY_CAPTURE_PAYLOAD_SIZES=0` | Disable request/response size and token estimates. | +| `CODECLONE_OBSERVABILITY_PAYLOAD_SNAPSHOT=1` | Reserved and rejected: raw payload snapshots are not supported. | An explicit `CODECLONE_OBSERVABILITY_ENABLED=1` is sufficient in CI. `CODECLONE_OBSERVABILITY_FORCE` never enables observation by itself and is diff --git a/docs/book/README.md b/docs/book/README.md index 936e10c2..d41cbd4a 100644 --- a/docs/book/README.md +++ b/docs/book/README.md @@ -3,6 +3,7 @@ does-not-own: chapter content. rule: other files (index.md, nav) link here — they do NOT duplicate this TOC. Do not add chapter summaries — keep it a pure link list. --> + # CodeClone Contracts Book This book is the contract-level documentation for CodeClone v2.x. @@ -47,8 +48,10 @@ If a statement is not enforced by code/tests, it is explicitly marked as non-con ### Change control - [12-structural-change-controller/index.md](12-structural-change-controller/index.md) — overview -- [12-structural-change-controller/finish-controlled-change.md](12-structural-change-controller/finish-controlled-change.md) — finish pipeline -- [12-structural-change-controller/finish-hygiene.md](12-structural-change-controller/finish-hygiene.md) — hygiene blocking vs advisory +- [12-structural-change-controller/finish-controlled-change.md](12-structural-change-controller/finish-controlled-change.md) — + finish pipeline +- [12-structural-change-controller/finish-hygiene.md](12-structural-change-controller/finish-hygiene.md) — hygiene + blocking vs advisory - [12-structural-change-controller/patch-trail.md](12-structural-change-controller/patch-trail.md) — Patch Trail - [13-engineering-memory/index.md](13-engineering-memory/index.md) — evidence-linked repository memory - [14-claim-guard.md](14-claim-guard.md) — review claim validation @@ -75,7 +78,8 @@ If a statement is not enforced by code/tests, it is explicitly marked as non-con - [25-mcp-interface/index.md](25-mcp-interface/index.md) — MCP interface contract - [25-mcp-interface/tools/workflow.md](25-mcp-interface/tools/workflow.md) — workflow tools - [25-mcp-interface/resources.md](25-mcp-interface/resources.md) — resource URIs -- [25-mcp-interface/tools/platform-observability.md](25-mcp-interface/tools/platform-observability.md) — bounded diagnostics tool +- [25-mcp-interface/tools/platform-observability.md](25-mcp-interface/tools/platform-observability.md) — bounded + diagnostics tool ### Integrations diff --git a/docs/book/appendix/a-status-enums.md b/docs/book/appendix/a-status-enums.md index 83d23c33..d15701f0 100644 --- a/docs/book/appendix/a-status-enums.md +++ b/docs/book/appendix/a-status-enums.md @@ -1,6 +1,7 @@ + # Appendix A. Status Enums ## Purpose diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index 7f81dec7..c0696514 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -812,35 +812,35 @@ Schema version stored in `memory_meta.schema_version`. Core tables: -| Table | Role | -|--------------------------|-----------------------------------------------------------| -| `memory_records` | Typed statements with status, confidence, origin, payload | -| `memory_subjects` | Path/symbol/module links (`subject_kind`, `subject_key`) | -| `memory_evidence` | Deterministic evidence refs (report, git_commit, doc, …) | -| `memory_fts` | FTS5 search index (schema 1.1+) | -| `memory_revisions` | Governance audit trail | -| `memory_ingestion_runs` | Init/refresh run metadata | +| Table | Role | +|--------------------------|-------------------------------------------------------------| +| `memory_records` | Typed statements with status, confidence, origin, payload | +| `memory_subjects` | Path/symbol/module links (`subject_kind`, `subject_key`) | +| `memory_evidence` | Deterministic evidence refs (report, git_commit, doc, …) | +| `memory_fts` | FTS5 search index (schema 1.1+) | +| `memory_revisions` | Governance audit trail | +| `memory_ingestion_runs` | Init/refresh run metadata | | `memory_projection_jobs` | Coalesced trajectory/semantic/Experience jobs (schema 1.3+) | Trajectory tables (schema **`1.2`**+ trajectory DDL, active projection **`trajectory-v3`**): -| Table | Role | -|-------------------------------------|-----------------------------------------------------------------------| +| Table | Role | +|-------------------------------------|--------------------------------------------------------------------------------| | `memory_trajectories` | One row per `(project_id, workflow_id, projection_version)` with quality score | -| `memory_trajectory_steps` | Ordered audit steps with frozen `event_core_json` | -| `memory_trajectory_subjects` | Path/module subjects linked to a trajectory | -| `memory_trajectory_evidence` | Report/run/audit evidence refs | -| `memory_trajectory_patch_trails` | Patch Trail JSON + digest per trajectory (schema **`1.4`**, Phase 26) | -| `memory_trajectory_projection_runs` | Rebuild run manifest | +| `memory_trajectory_steps` | Ordered audit steps with frozen `event_core_json` | +| `memory_trajectory_subjects` | Path/module subjects linked to a trajectory | +| `memory_trajectory_evidence` | Report/run/audit evidence refs | +| `memory_trajectory_patch_trails` | Patch Trail JSON + digest per trajectory (schema **`1.4`**, Phase 26) | +| `memory_trajectory_projection_runs` | Rebuild run manifest | Experience tables (schema **`1.6`**, derived from trajectory evidence): -| Table | Role | -|------------------------------|------------------------------------------------------------| -| `memory_experiences` | Advisory distilled patterns (`experience-v1`) | +| Table | Role | +|------------------------------|--------------------------------------------------------------| +| `memory_experiences` | Advisory distilled patterns (`experience-v1`) | | `memory_experience_facets` | Agent-family facets today; profile/intent kinds are reserved | -| `memory_experience_evidence` | Contributing trajectory ids and outcomes | +| `memory_experience_evidence` | Contributing trajectory ids and outcomes | Patch Trail JSON uses `PATCH_TRAIL_SCHEMA_VERSION` (currently **`1`**) in `codeclone/contracts/__init__.py`. Trajectory JSONL export rows use @@ -872,11 +872,11 @@ Optional local SQLite database at `.codeclone/db/platform_observability.sqlite3`. It is disposable development telemetry, not report, baseline, cache, audit, or Engineering Memory truth. -| Table | Role | -|---|---| -| `platform_meta` | Schema version metadata. | -| `platform_operations` | Surface-level operation identity, correlation, duration, status, bounded payload sizes, and optional process metrics. | -| `platform_spans` | Ordered subsystem timing, reason/dedupe metadata, counters, normalized SQL fingerprints, and optional process metrics. | +| Table | Role | +|-----------------------|------------------------------------------------------------------------------------------------------------------------| +| `platform_meta` | Schema version metadata. | +| `platform_operations` | Surface-level operation identity, correlation, duration, status, bounded payload sizes, and optional process metrics. | +| `platform_spans` | Ordered subsystem timing, reason/dedupe metadata, counters, normalized SQL fingerprints, and optional process metrics. | Operation and span rows are persisted together in one transaction. Profile columns are nullable and populated only when profiling is enabled with diff --git a/docs/book/appendix/c-error-catalog.md b/docs/book/appendix/c-error-catalog.md index 8bdbf42e..37393c52 100644 --- a/docs/book/appendix/c-error-catalog.md +++ b/docs/book/appendix/c-error-catalog.md @@ -1,6 +1,7 @@ + # Appendix C. Error Catalog ## Purpose diff --git a/docs/book/integrations/claude-code-plugin.md b/docs/book/integrations/claude-code-plugin.md index 3a332490..a3dbc755 100644 --- a/docs/book/integrations/claude-code-plugin.md +++ b/docs/book/integrations/claude-code-plugin.md @@ -1,4 +1,5 @@ + # Claude Code Plugin ## Distribution contract @@ -9,13 +10,13 @@ dedicated `orenlab/codeclone-claude-code` storefront. The distribution repository contains: -| Path | Role | -|---|---| -| `.claude-plugin/marketplace.json` | Marketplace catalog named `orenlab-codeclone` | -| `plugins/codeclone/.claude-plugin/plugin.json` | Plugin identity and metadata | -| `plugins/codeclone/.mcp.json` | Local stdio MCP definition | -| `plugins/codeclone/skills/` | Review, hotspots, change control, and memory | -| `plugins/codeclone/scripts/launch_mcp.py` | Standalone workspace-first launcher | +| Path | Role | +|------------------------------------------------|-----------------------------------------------| +| `.claude-plugin/marketplace.json` | Marketplace catalog named `orenlab-codeclone` | +| `plugins/codeclone/.claude-plugin/plugin.json` | Plugin identity and metadata | +| `plugins/codeclone/.mcp.json` | Local stdio MCP definition | +| `plugins/codeclone/skills/` | Review, hotspots, change control, and memory | +| `plugins/codeclone/scripts/launch_mcp.py` | Standalone workspace-first launcher | ## Installation contract diff --git a/docs/book/integrations/claude-desktop-bundle.md b/docs/book/integrations/claude-desktop-bundle.md index 46d45d0b..a4ad18b6 100644 --- a/docs/book/integrations/claude-desktop-bundle.md +++ b/docs/book/integrations/claude-desktop-bundle.md @@ -1,4 +1,5 @@ + # Claude Desktop Bundle This contract covers the Claude Desktop `.mcpb` package. Claude Code uses the @@ -11,7 +12,6 @@ separate [Claude Code plugin](claude-code-plugin.md) and marketplace workflow. 3. If you want to bypass auto-discovery, set **CodeClone launcher command** in the bundle settings to an absolute path. - ## Settings | Setting | Purpose | @@ -20,7 +20,6 @@ separate [Claude Code plugin](claude-code-plugin.md) and marketplace workflow. | **CodeClone launcher command** | Absolute path or bare command for `codeclone-mcp` | | **Advanced launcher args** | JSON array of extra args (transport is always stdio) | - ## Runtime model Node wrapper launches `codeclone-mcp` via local `stdio`. It: @@ -43,13 +42,11 @@ Engineering Memory and optional semantic search follow the server contract in [Engineering Memory](../13-engineering-memory/index.md) (`query_engineering_memory`, `get_relevant_memory`; semantic off by default in pyproject). - ## Privacy Local wrapper only — no telemetry, no cloud sync, no remote listener. See [Privacy Policy](../../privacy-policy.md). - ## Design rules - **Canonical MCP first**: the bundle keeps Claude Desktop on the same @@ -62,7 +59,6 @@ See [Privacy Policy](../../privacy-policy.md). - **Small and deterministic**: package only the wrapper, manifest, icon, and documentation needed for local installation. - ## Non-guarantees - Bundle presentation inside Claude Desktop may evolve with MCPB client UX. @@ -70,14 +66,12 @@ See [Privacy Policy](../../privacy-policy.md). the explicit launcher setting remains stable. - The bundle does not guarantee automatic updates or remote install flows. - ## Current limits - expects either a workspace launcher, a user-local/global launcher, or an explicitly configured absolute launcher path - local install surface, not a hosted service layer - ## Source of truth - CLI remains the scripting and CI surface. diff --git a/docs/book/integrations/codex-plugin.md b/docs/book/integrations/codex-plugin.md index cc9d4a88..23819599 100644 --- a/docs/book/integrations/codex-plugin.md +++ b/docs/book/integrations/codex-plugin.md @@ -1,4 +1,5 @@ + # Codex Plugin ## What ships in the plugin @@ -14,7 +15,6 @@ | `skills/codeclone-engineering-memory/` | Engineering memory read/write skill | | `assets/` | Plugin branding | - ## Runtime model Additive — the marketplace install provides a local MCP definition and **four** @@ -37,7 +37,6 @@ codex plugin marketplace add orenlab/codeclone-codex codex plugin add codeclone@orenlab-codeclone ``` - ## Read-only contract Repository truth stays read-only: MCP must not mutate source files, baselines, @@ -45,7 +44,6 @@ analysis cache, or canonical report artifacts. Change-control and session tools may write ephemeral coordination state through the configured workspace intent registry (file or SQLite backend) and optional audit records when enabled. - ## Design rules - **Codex-native packaging**: keep source under `plugins/` and publish the @@ -63,7 +61,6 @@ registry (file or SQLite backend) and optional audit records when enabled. - **Shell-free launch**: the bundled launcher must stay argv-based and local-stdio-only. - ## Non-guarantees - Codex plugin UI presentation may evolve independently of the plugin manifest @@ -71,7 +68,6 @@ registry (file or SQLite backend) and optional audit records when enabled. - Users who already configured `codeclone-mcp` manually may still prefer the direct MCP path over the bundled plugin MCP definition. - ## Current limits - If you already registered `codeclone-mcp` manually, keep only one setup path @@ -79,7 +75,6 @@ registry (file or SQLite backend) and optional audit records when enabled. - The bundled `.mcp.json` prefers `.venv`, then a Poetry env, then `PATH`. - The bundled launcher stays shell-free and local-stdio-only. - ## Further reading - [MCP usage guide](../../guide/mcp/README.md) diff --git a/docs/book/integrations/sarif.md b/docs/book/integrations/sarif.md index 6681a011..2ce04a72 100644 --- a/docs/book/integrations/sarif.md +++ b/docs/book/integrations/sarif.md @@ -1,4 +1,5 @@ + # SARIF ## Source files @@ -7,7 +8,6 @@ - `codeclone/report/document/builder.py` - `codeclone/report/findings.py` - ## Design model CodeClone builds SARIF from the already materialized canonical report document. @@ -19,7 +19,6 @@ That means: - severity/confidence/category data comes from canonical report payloads - SARIF ordering remains deterministic - ## Path model To improve IDE and code-scanning integration, SARIF uses repo-relative paths @@ -34,7 +33,6 @@ Current behavior: - `run.invocations[*].workingDirectory` mirrors the scan root URI when available - `run.automationDetails.id` is unique per run - ## Result model Current SARIF output includes: @@ -49,7 +47,6 @@ Current SARIF output includes: Coverage Join may materialize coverage design findings only when the canonical report already contains valid `metrics.families.coverage_join` facts. - ## Validation and tests Relevant tests: diff --git a/docs/book/integrations/vs-code-extension.md b/docs/book/integrations/vs-code-extension.md index cd85b038..adf69989 100644 --- a/docs/book/integrations/vs-code-extension.md +++ b/docs/book/integrations/vs-code-extension.md @@ -1,4 +1,5 @@ + # VS Code Extension ## Trust model @@ -25,7 +26,6 @@ the local MCP launcher. `codeclone.mcp.args`) are machine-scoped. Analysis-depth settings are resource-scoped so they can vary by workspace or folder. - ## Settings Authoritative definitions: `extensions/vscode-codeclone/package.json` → @@ -33,45 +33,45 @@ Authoritative definitions: `extensions/vscode-codeclone/package.json` → ### Launcher (machine-scoped) -| Setting | Default | Notes | -|---------|---------|-------| -| `codeclone.mcp.command` | `auto` | Workspace venv, then `PATH`. User/remote settings. | -| `codeclone.mcp.args` | `[]` | Extra launcher argv. The extension injects `--ide-governance-channel` for Memory governance and session/audit tools (do not duplicate in args). | +| Setting | Default | Notes | +|-------------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------| +| `codeclone.mcp.command` | `auto` | Workspace venv, then `PATH`. User/remote settings. | +| `codeclone.mcp.args` | `[]` | Extra launcher argv. The extension injects `--ide-governance-channel` for Memory governance and session/audit tools (do not duplicate in args). | ### Analysis (resource-scoped) -| Setting | Default | Notes | -|---------|---------|-------| -| `codeclone.analysis.profile` | `defaults` | `defaults`, `deeperReview`, or `custom`. | -| `codeclone.analysis.cachePolicy` | `reuse` | `reuse` or `off`. | -| `codeclone.analysis.changedDiffRef` | `HEAD` | Git ref for **Review Changes**. | -| `codeclone.analysis.coverageXml` | `""` | Explicit Cobertura path for Coverage Join. | -| `codeclone.analysis.autoDetectCoverageXml` | `true` | Use workspace-root `coverage.xml` when path empty. | -| `codeclone.analysis.minLoc` | `10` | Custom thresholds — only when `profile=custom`. | -| `codeclone.analysis.minStmt` | `6` | Same. | -| `codeclone.analysis.blockMinLoc` | `20` | Same. | -| `codeclone.analysis.blockMinStmt` | `8` | Same. | -| `codeclone.analysis.segmentMinLoc` | `20` | Same. | -| `codeclone.analysis.segmentMinStmt` | `10` | Same. | +| Setting | Default | Notes | +|--------------------------------------------|------------|----------------------------------------------------| +| `codeclone.analysis.profile` | `defaults` | `defaults`, `deeperReview`, or `custom`. | +| `codeclone.analysis.cachePolicy` | `reuse` | `reuse` or `off`. | +| `codeclone.analysis.changedDiffRef` | `HEAD` | Git ref for **Review Changes**. | +| `codeclone.analysis.coverageXml` | `""` | Explicit Cobertura path for Coverage Join. | +| `codeclone.analysis.autoDetectCoverageXml` | `true` | Use workspace-root `coverage.xml` when path empty. | +| `codeclone.analysis.minLoc` | `10` | Custom thresholds — only when `profile=custom`. | +| `codeclone.analysis.minStmt` | `6` | Same. | +| `codeclone.analysis.blockMinLoc` | `20` | Same. | +| `codeclone.analysis.blockMinStmt` | `8` | Same. | +| `codeclone.analysis.segmentMinLoc` | `20` | Same. | +| `codeclone.analysis.segmentMinStmt` | `10` | Same. | ### UI (window-scoped) -| Setting | Default | Notes | -|---------|---------|-------| -| `codeclone.ui.showStatusBar` | `true` | Workspace-level status bar item. | +| Setting | Default | Notes | +|------------------------------|---------|----------------------------------| +| `codeclone.ui.showStatusBar` | `true` | Workspace-level status bar item. | ### Engineering Memory search (resource-scoped) These map to MCP `query_engineering_memory` parameters from `extensions/vscode-codeclone/src/memorySearch.js` (`readMemorySearchSettings`). -| Setting | Default | MCP mapping | Notes | -|---------|---------|-------------|-------| -| `codeclone.memory.searchSemantic` | `true` | `semantic` on `mode=search` only | Extension **asks** for semantic blend by default. Server still needs `[tool.codeclone.memory.semantic] enabled`, a built sidecar, and a provider. Use `codeclone[semantic-local]` + `embedding_provider="fastembed"` for semantic-quality recall; otherwise FTS-only or diagnostic/degraded results report `semantic.used: false` / provider details. | -| `codeclone.memory.searchIncludeDrafts` | `false` | `include_drafts` (search) | Drafts are still included automatically on `for_path` per memory contract. | -| `codeclone.memory.searchIncludeStale` | `false` | `include_stale` (search and `for_path`) | | -| `codeclone.memory.searchMaxResults` | `20` | `max_results` (clamped 5–50) | | -| `codeclone.memory.searchDetailLevel` | `compact` | `detail_level`: `compact` or `full` | `mode=get` always returns full records. Not exposed in **Configure Memory Search** (settings UI only). | +| Setting | Default | MCP mapping | Notes | +|----------------------------------------|-----------|-----------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `codeclone.memory.searchSemantic` | `true` | `semantic` on `mode=search` only | Extension **asks** for semantic blend by default. Server still needs `[tool.codeclone.memory.semantic] enabled`, a built sidecar, and a provider. Use `codeclone[semantic-local]` + `embedding_provider="fastembed"` for semantic-quality recall; otherwise FTS-only or diagnostic/degraded results report `semantic.used: false` / provider details. | +| `codeclone.memory.searchIncludeDrafts` | `false` | `include_drafts` (search) | Drafts are still included automatically on `for_path` per memory contract. | +| `codeclone.memory.searchIncludeStale` | `false` | `include_stale` (search and `for_path`) | | +| `codeclone.memory.searchMaxResults` | `20` | `max_results` (clamped 5–50) | | +| `codeclone.memory.searchDetailLevel` | `compact` | `detail_level`: `compact` or `full` | `mode=get` always returns full records. Not exposed in **Configure Memory Search** (settings UI only). | !!! important "Extension default differs from server default" `searchSemantic` defaults to **`true` in VS Code** so the IDE requests semantic @@ -85,7 +85,6 @@ These map to MCP `query_engineering_memory` parameters from `searchDetailLevel` is settings-editor only. Search queries must be 2–200 characters without control characters (`sanitizeSearchQuery`). - ## State boundaries The extension keeps three state classes visibly separate: @@ -100,7 +99,6 @@ used by the extension for a workspace. only, do not update baseline state, do not rewrite findings, and do not change canonical report truth. - ## Design rules - **Native VS Code first**: tree views, status bar, Quick Pick, CodeLens, and @@ -125,7 +123,6 @@ canonical report truth. - **Restricted Mode honesty**: explain requirements without pretending analysis is available before trust is granted. - ## Non-guarantees - Exact view grouping and copy may evolve between extension releases. @@ -134,7 +131,6 @@ canonical report truth. - Explorer decoration styling, review-loop polish, and other non-contract UI details may evolve without changing the extension contract. - ## Source of truth The extension reads the same canonical analysis semantics already exposed by diff --git a/docs/examples/report.md b/docs/examples/report.md index 742dc34c..611a5cbf 100644 --- a/docs/examples/report.md +++ b/docs/examples/report.md @@ -1,6 +1,7 @@ + # Sample Report This page links to a live example report generated from the current `codeclone` diff --git a/docs/getting-started.md b/docs/getting-started.md index 7473f9f7..88c51e83 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -3,6 +3,7 @@ does-not-own: contract details (→ book/), architecture (→ guide/explanation/how-it-works.md), MCP tool reference (→ guide/mcp/README.md). rule: other pages link HERE for install — never duplicate install blocks. --> + # Getting Started Install CodeClone, run your first analysis, set up CI gating, and connect @@ -268,6 +269,7 @@ See [Config and defaults](book/10-config-and-defaults.md). - [Baseline contract](book/07-baseline.md) — trust model and schema - [MCP interface contract](book/25-mcp-interface/index.md) — tool surface and guarantees - [Engineering Memory recipes](guide/mcp/workflows/memory-recipes.md) — scoped context and governed drafts -- [Trajectories and Experiences](guide/memory/trajectories-and-experiences.md) — workflow evidence and recurring patterns +- [Trajectories and Experiences](guide/memory/trajectories-and-experiences.md) — workflow evidence and recurring + patterns - [Platform Observability](guide/observability/diagnostics.md) — diagnose CodeClone's own runtime - [Report contract](book/05-report.md) — canonical JSON schema diff --git a/docs/guide/README.md b/docs/guide/README.md index 96ca83a4..af444ea2 100644 --- a/docs/guide/README.md +++ b/docs/guide/README.md @@ -1,4 +1,5 @@ + # Guide Recipes and workflows for humans and AI agents. For normative guarantees (schemas, @@ -11,33 +12,33 @@ enums, payload semantics), use the [Contracts book](../book/README.md). ## Start here -| I want to… | Page | -|------------|------| -| Install and run locally | [Getting started](../getting-started.md) | -| Understand the pipeline | [How CodeClone works](explanation/how-it-works.md) | -| Connect an AI agent via MCP | [MCP overview](mcp/README.md) | -| Govern agent edits | [Change control overview](change-control/overview.md) | -| Scope context before edits | [Engineering Memory overview](memory/overview.md) | +| I want to… | Page | +|-----------------------------------------|------------------------------------------------------------------------| +| Install and run locally | [Getting started](../getting-started.md) | +| Understand the pipeline | [How CodeClone works](explanation/how-it-works.md) | +| Connect an AI agent via MCP | [MCP overview](mcp/README.md) | +| Govern agent edits | [Change control overview](change-control/overview.md) | +| Scope context before edits | [Engineering Memory overview](memory/overview.md) | | Inspect trajectory history and patterns | [Trajectories and Experiences](memory/trajectories-and-experiences.md) | -| Diagnose CodeClone runtime cost | [Platform Observability](observability/diagnostics.md) | +| Diagnose CodeClone runtime cost | [Platform Observability](observability/diagnostics.md) | ## MCP workflows -| Task | Recipe | -|------|--------| -| First analysis pass | [Analyze & triage](mcp/workflows/analyze-and-triage.md) | -| Hotspots and checks | [Drill down & checks](mcp/workflows/drill-down-and-checks.md) | -| Declare → edit → finish | [Change control](mcp/workflows/change-control.md) | -| Memory before/after edits | [Memory recipes](mcp/workflows/memory-recipes.md) | -| Session stats and coverage | [Session & coverage](mcp/workflows/session-and-coverage.md) | +| Task | Recipe | +|----------------------------|---------------------------------------------------------------| +| First analysis pass | [Analyze & triage](mcp/workflows/analyze-and-triage.md) | +| Hotspots and checks | [Drill down & checks](mcp/workflows/drill-down-and-checks.md) | +| Declare → edit → finish | [Change control](mcp/workflows/change-control.md) | +| Memory before/after edits | [Memory recipes](mcp/workflows/memory-recipes.md) | +| Session stats and coverage | [Session & coverage](mcp/workflows/session-and-coverage.md) | ## Integrations -| Client | Setup guide | Contract | -|--------|-------------|----------| -| VS Code | [Setup](integrations/vscode/setup.md) | [Contract](../book/integrations/vs-code-extension.md) | -| Cursor | [Install & skills](integrations/cursor/install-and-skills.md) | [Contract](../book/integrations/cursor-plugin.md) | -| Claude Code | [Install](integrations/claude-code/setup.md) | [Contract](../book/integrations/claude-code-plugin.md) | -| Codex | [Install](integrations/codex/setup.md) | [Contract](../book/integrations/codex-plugin.md) | -| Claude Desktop | [Setup](integrations/claude-desktop/setup.md) | [Contract](../book/integrations/claude-desktop-bundle.md) | -| SARIF export | [Export](integrations/sarif/export.md) | [Contract](../book/integrations/sarif.md) | +| Client | Setup guide | Contract | +|----------------|---------------------------------------------------------------|-----------------------------------------------------------| +| VS Code | [Setup](integrations/vscode/setup.md) | [Contract](../book/integrations/vs-code-extension.md) | +| Cursor | [Install & skills](integrations/cursor/install-and-skills.md) | [Contract](../book/integrations/cursor-plugin.md) | +| Claude Code | [Install](integrations/claude-code/setup.md) | [Contract](../book/integrations/claude-code-plugin.md) | +| Codex | [Install](integrations/codex/setup.md) | [Contract](../book/integrations/codex-plugin.md) | +| Claude Desktop | [Setup](integrations/claude-desktop/setup.md) | [Contract](../book/integrations/claude-desktop-bundle.md) | +| SARIF export | [Export](integrations/sarif/export.md) | [Contract](../book/integrations/sarif.md) | diff --git a/docs/guide/change-control/agent-cycle.md b/docs/guide/change-control/agent-cycle.md index 6f9e49b2..676f6f80 100644 --- a/docs/guide/change-control/agent-cycle.md +++ b/docs/guide/change-control/agent-cycle.md @@ -1,4 +1,5 @@ + # Agent edit cycle Same sequence as [MCP change control workflow](../mcp/workflows/change-control.md). diff --git a/docs/guide/change-control/atomic-debug.md b/docs/guide/change-control/atomic-debug.md index 66482882..68d55515 100644 --- a/docs/guide/change-control/atomic-debug.md +++ b/docs/guide/change-control/atomic-debug.md @@ -1,4 +1,5 @@ + # Atomic debug path For legacy MCP servers or step-by-step debugging: diff --git a/docs/guide/change-control/overview.md b/docs/guide/change-control/overview.md index 63ac9a3c..49dbca2e 100644 --- a/docs/guide/change-control/overview.md +++ b/docs/guide/change-control/overview.md @@ -1,17 +1,18 @@ + # Change control overview CodeClone v2.1 requires agents to **declare scope before editing**, verify the patch against structural boundaries, and finish with evidence-linked hygiene. -| Step | Action | -|------|--------| -| 1 | `analyze_repository` (or reuse valid run) | -| 2 | `start_controlled_change` → `edit_allowed=true` | -| 3 | `get_relevant_memory` (requires absolute `root`) | -| 4 | Edit inside declared scope only | -| 5 | After-run when profile requires it | -| 6 | `finish_controlled_change` with `changed_files` or `diff_ref` | +| Step | Action | +|------|---------------------------------------------------------------| +| 1 | `analyze_repository` (or reuse valid run) | +| 2 | `start_controlled_change` → `edit_allowed=true` | +| 3 | `get_relevant_memory` (requires absolute `root`) | +| 4 | Edit inside declared scope only | +| 5 | After-run when profile requires it | +| 6 | `finish_controlled_change` with `changed_files` or `diff_ref` | MCP recipe: [Change control workflow](../mcp/workflows/change-control.md). diff --git a/docs/guide/explanation/how-it-works.md b/docs/guide/explanation/how-it-works.md index 894d5112..342d3fa9 100644 --- a/docs/guide/explanation/how-it-works.md +++ b/docs/guide/explanation/how-it-works.md @@ -5,6 +5,7 @@ CFG semantics (→ book/04), report schema (→ book/05). rule: this is a MAP — 1-2 sentences per topic + link into Reference. Do not shadow-copy book chapters here. --> + # How CodeClone Works > This page is a narrative architecture overview. @@ -169,15 +170,15 @@ Exit codes: [09-exit-codes](../../book/09-exit-codes.md). Every output surface — CLI, HTML, MCP, IDE — is a projection of the same canonical report. No surface adds a second analysis engine. -| Surface | Role | Contract | -|---------|------|----------| -| CLI | Scripting and CI | [CLI](../../book/11-cli.md) | -| MCP | Read-only agent/client integration | [MCP interface](../../book/25-mcp-interface/index.md) | -| VS Code | Guided IDE review | [VS Code](../integrations/vscode/setup.md) | -| Claude Desktop | Local `.mcpb` bundle | [Claude Desktop](../integrations/claude-desktop/setup.md) | -| Codex | Marketplace plugin with skills | [Codex](../integrations/codex/setup.md) | -| Cursor | Plugin with skills, rules, hooks | [Cursor](../integrations/cursor/install-and-skills.md) | -| SARIF | IDE code scanning | [SARIF](../integrations/sarif/export.md) | +| Surface | Role | Contract | +|----------------|------------------------------------|-----------------------------------------------------------| +| CLI | Scripting and CI | [CLI](../../book/11-cli.md) | +| MCP | Read-only agent/client integration | [MCP interface](../../book/25-mcp-interface/index.md) | +| VS Code | Guided IDE review | [VS Code](../integrations/vscode/setup.md) | +| Claude Desktop | Local `.mcpb` bundle | [Claude Desktop](../integrations/claude-desktop/setup.md) | +| Codex | Marketplace plugin with skills | [Codex](../integrations/codex/setup.md) | +| Cursor | Plugin with skills, rules, hooks | [Cursor](../integrations/cursor/install-and-skills.md) | +| SARIF | IDE code scanning | [SARIF](../integrations/sarif/export.md) | --- diff --git a/docs/guide/integrations/claude-code/setup.md b/docs/guide/integrations/claude-code/setup.md index 9e3ea815..7db28391 100644 --- a/docs/guide/integrations/claude-code/setup.md +++ b/docs/guide/integrations/claude-code/setup.md @@ -1,4 +1,5 @@ + # Claude Code setup CodeClone ships a native Claude Code plugin through the public @@ -76,11 +77,11 @@ a local MCP definition over the same canonical CodeClone server. Claude Code namespaces installed plugin skills: -| Task | Invocation | -|---|---| -| Repository review | `/codeclone:codeclone-review` | -| Hotspot snapshot | `/codeclone:codeclone-hotspots` | -| Controlled edit | `/codeclone:codeclone-change-control` | +| Task | Invocation | +|--------------------|-------------------------------------------| +| Repository review | `/codeclone:codeclone-review` | +| Hotspot snapshot | `/codeclone:codeclone-hotspots` | +| Controlled edit | `/codeclone:codeclone-change-control` | | Engineering Memory | `/codeclone:codeclone-engineering-memory` | ## Update or remove diff --git a/docs/guide/integrations/claude-desktop/setup.md b/docs/guide/integrations/claude-desktop/setup.md index 813df556..be0e1ed2 100644 --- a/docs/guide/integrations/claude-desktop/setup.md +++ b/docs/guide/integrations/claude-desktop/setup.md @@ -1,4 +1,5 @@ + # Claude Desktop setup Local `.mcpb` bundle that launches `codeclone-mcp` over stdio. Same canonical MCP diff --git a/docs/guide/integrations/codex/setup.md b/docs/guide/integrations/codex/setup.md index 91a737e6..48183f5f 100644 --- a/docs/guide/integrations/codex/setup.md +++ b/docs/guide/integrations/codex/setup.md @@ -1,4 +1,3 @@ - ## Install Install the plugin from the Codex marketplace: @@ -45,7 +44,6 @@ Manual MCP registration without the plugin: codex mcp add codeclone -- codeclone-mcp --transport stdio ``` - ## Skills ### codeclone-review diff --git a/docs/guide/integrations/cursor/install-and-skills.md b/docs/guide/integrations/cursor/install-and-skills.md index 04d69b60..92bf7830 100644 --- a/docs/guide/integrations/cursor/install-and-skills.md +++ b/docs/guide/integrations/cursor/install-and-skills.md @@ -1,36 +1,34 @@ - ## What ships in the plugin -| Component | Path | Purpose | -|-----------|------|---------| -| `.cursor-plugin/plugin.json` | Manifest | `skills/`, `rules/`, `agents/`, `hooks/hooks.json`, `mcp.json` | -| `mcp.json` | MCP | `python3` + `./scripts/launch_mcp.py` — resolves `codeclone-mcp` (`.venv` → Poetry → `PATH`) | -| Skills (6) | `skills/*/` | See table below | -| Agent | `agents/structural-reviewer.md` | Invoke id: **`codeclone-structural-reviewer`** | -| Rules (3) | `rules/*.mdc` | See **Rules** | -| Hooks | `hooks/hooks.json` | Dispatches via `hooks/run_hook.py` (plugin manifest; optional project install) | -| `scripts/install-project-hooks.py` | Installer | Writes `.cursor/hooks.json` + `.cursor/codeclone-hooks.json` | -| `assets/` | Branding | Logo and icon | +| Component | Path | Purpose | +|------------------------------------|---------------------------------|----------------------------------------------------------------------------------------------| +| `.cursor-plugin/plugin.json` | Manifest | `skills/`, `rules/`, `agents/`, `hooks/hooks.json`, `mcp.json` | +| `mcp.json` | MCP | `python3` + `./scripts/launch_mcp.py` — resolves `codeclone-mcp` (`.venv` → Poetry → `PATH`) | +| Skills (6) | `skills/*/` | See table below | +| Agent | `agents/structural-reviewer.md` | Invoke id: **`codeclone-structural-reviewer`** | +| Rules (3) | `rules/*.mdc` | See **Rules** | +| Hooks | `hooks/hooks.json` | Dispatches via `hooks/run_hook.py` (plugin manifest; optional project install) | +| `scripts/install-project-hooks.py` | Installer | Writes `.cursor/hooks.json` + `.cursor/codeclone-hooks.json` | +| `assets/` | Branding | Logo and icon | ### Skills (directory vs chat command) Chat commands use the `name:` field in each `SKILL.md` (not always the folder name on disk): -| Folder on disk | Chat command (`name`) | Primary MCP flow | -|----------------|----------------------|------------------| -| `production-triage/` | `/codeclone-production-triage` | `analyze_repository` → `get_production_triage` | -| `codeclone-hotspots/` | `/codeclone-hotspots` | `analyze_repository` → hotspots / `check_*` | -| `blast-radius/` | `/codeclone-blast-radius` | `analyze_repository` → `get_blast_radius` (read-only) | -| `codeclone-review/` | `/codeclone-review` | Full review loop (conservative first) | -| `codeclone-change-control/` | `/codeclone-change-control` | `start_controlled_change` → edit → `finish_controlled_change` | -| `codeclone-engineering-memory/` | `/codeclone-engineering-memory` | `get_relevant_memory`, `query_engineering_memory`, drafts | +| Folder on disk | Chat command (`name`) | Primary MCP flow | +|---------------------------------|---------------------------------|---------------------------------------------------------------| +| `production-triage/` | `/codeclone-production-triage` | `analyze_repository` → `get_production_triage` | +| `codeclone-hotspots/` | `/codeclone-hotspots` | `analyze_repository` → hotspots / `check_*` | +| `blast-radius/` | `/codeclone-blast-radius` | `analyze_repository` → `get_blast_radius` (read-only) | +| `codeclone-review/` | `/codeclone-review` | Full review loop (conservative first) | +| `codeclone-change-control/` | `/codeclone-change-control` | `start_controlled_change` → edit → `finish_controlled_change` | +| `codeclone-engineering-memory/` | `/codeclone-engineering-memory` | `get_relevant_memory`, `query_engineering_memory`, drafts | Codex plugin ships the overlapping subset (review, hotspots, change-control, engineering-memory) but **not** standalone production-triage or blast-radius skills. - ## Install ### Install from the Cursor marketplace @@ -90,7 +88,6 @@ ignores `/.cursor/` in `.gitignore`. plugin from the `orenlab/codeclone-cursor` storefront through Cursor's own marketplace UI. - ## Skills ### codeclone-production-triage @@ -140,7 +137,6 @@ approve/reject: VS Code **Memory** view only (MCP agents cannot approve). Full contract: [Engineering Memory](../../../book/13-engineering-memory/index.md). - ## Agent ### codeclone-structural-reviewer @@ -151,7 +147,6 @@ intent or modify files. The structural reviewer agent uses CodeClone MCP tools exclusively for evidence, does not modify files or declare change intent, and does not treat report-only signals as CI failures or vulnerability claims. - ## Distribution - **Monorepo source:** `plugins/cursor-codeclone/` @@ -159,7 +154,6 @@ does not treat report-only signals as CI failures or vulnerability claims. - **Install:** Cursor marketplace panel; local symlink only for development - **Standalone releases:** ship full `plugins/codeclone/scripts/launch_mcp.py` body - ## Runtime model Additive: local MCP via `launch_mcp.py`, six skills, three rules (two diff --git a/docs/guide/integrations/sarif/export.md b/docs/guide/integrations/sarif/export.md index c3156f08..6a0072e7 100644 --- a/docs/guide/integrations/sarif/export.md +++ b/docs/guide/integrations/sarif/export.md @@ -1,4 +1,3 @@ - ## Purpose Explain how CodeClone projects canonical findings into SARIF and what IDEs or @@ -7,7 +6,6 @@ code-scanning tools can rely on. SARIF is a deterministic projection layer. The canonical source of truth remains the report document. - ## What SARIF is good for here SARIF is useful as: @@ -22,7 +20,6 @@ It is not the source of truth for: - gating semantics - baseline compatibility - ## See also - [05. Report](../../../book/05-report.md) diff --git a/docs/guide/integrations/vscode/setup.md b/docs/guide/integrations/vscode/setup.md index 29e97307..18bfe6a3 100644 --- a/docs/guide/integrations/vscode/setup.md +++ b/docs/guide/integrations/vscode/setup.md @@ -1,4 +1,3 @@ - ## What it is for The extension helps you: @@ -16,7 +15,6 @@ The extension helps you: It does not create a second truth model and it does not mutate the repository. - ## Install requirements Install from the VS Code Marketplace: **`orenlab.codeclone`** (publisher @@ -53,7 +51,6 @@ show a one-time extension hint after the summary. It is suppressed in quiet, CI, and non-interactive runs, and is remembered per CodeClone version next to the resolved project cache path. - ## Main views ### Overview @@ -92,7 +89,6 @@ actions, and human approve/reject through the IDE governance channel extension launches MCP with `--ide-governance-channel` and registers a `SecretStorage` governance key on connect. - ## Review model The extension stays source-first: @@ -110,7 +106,6 @@ The extension stays source-first: `Open in HTML Report` exists as an explicit bridge to the richer human report, not as the primary IDE workflow. - ## Blast radius, session, and audit commands The extension also exposes structural change-controller helpers over MCP: @@ -127,7 +122,6 @@ The extension also exposes structural change-controller helpers over MCP: These commands require workspace trust and an active MCP connection. - ## Engineering Memory in the IDE - **Memory** view — draft inbox, approve/reject through the IDE governance @@ -155,7 +149,6 @@ deterministic diagnostic provider. See Trajectory semantics: [Trajectory quality and passport](../../../book/13-engineering-memory/trajectory-quality-and-passport.md). - ## Open Triage **Open Triage** (`orenlab.codeclone.openTriage`) calls `get_production_triage` for @@ -163,7 +156,6 @@ the current run before opening the markdown panel. Repeated opens reuse the cach payload for 5 seconds when the run is unchanged and not marked stale; concurrent opens share one in-flight request. - ## First-run path 1. Open the `CodeClone` view container. diff --git a/docs/guide/mcp/README.md b/docs/guide/mcp/README.md index ae4493e6..b6e98d7f 100644 --- a/docs/guide/mcp/README.md +++ b/docs/guide/mcp/README.md @@ -1,4 +1,5 @@ + # MCP for AI Agents Use CodeClone through `codeclone-mcp` — same pipeline and report as the CLI. @@ -17,29 +18,29 @@ Install: [Getting started — MCP extra](../../getting-started.md#install). ## Setup -| Step | Page | -|------|------| -| Register a client | [Client setup](client-setup.md) | +| Step | Page | +|----------------------|-----------------------------------------------| +| Register a client | [Client setup](client-setup.md) | | Launcher & transport | [Server & transport](server-and-transport.md) | -| Layer diagram | [Architecture](architecture.md) | -| Common failures | [Troubleshooting](troubleshooting.md) | +| Layer diagram | [Architecture](architecture.md) | +| Common failures | [Troubleshooting](troubleshooting.md) | ## Workflows (recommended order) -| Phase | Recipe | -|-------|--------| -| 1. Baseline-aware triage | [Analyze & triage](workflows/analyze-and-triage.md) | -| 2. Focused inspection | [Drill down & checks](workflows/drill-down-and-checks.md) | -| 3. Governed edits | [Change control](workflows/change-control.md) | -| 4. Durable scope context | [Memory recipes](workflows/memory-recipes.md) | -| 5. Coverage & session | [Session & coverage](workflows/session-and-coverage.md) | +| Phase | Recipe | +|--------------------------|-----------------------------------------------------------| +| 1. Baseline-aware triage | [Analyze & triage](workflows/analyze-and-triage.md) | +| 2. Focused inspection | [Drill down & checks](workflows/drill-down-and-checks.md) | +| 3. Governed edits | [Change control](workflows/change-control.md) | +| 4. Durable scope context | [Memory recipes](workflows/memory-recipes.md) | +| 5. Coverage & session | [Session & coverage](workflows/session-and-coverage.md) | ## Reference shortcuts -| Need | Page | -|------|------| -| Prompt patterns | [Prompt patterns](prompts.md) | -| Payload field cheat sheet | [Payload cheatsheet](payload-cheatsheet.md) | -| Change control contract | [Structural Change Controller](../../book/12-structural-change-controller/index.md) | -| Engineering Memory contract | [Engineering Memory](../../book/13-engineering-memory/index.md) | -| Runtime diagnostics | [Platform Observability](../observability/diagnostics.md) | +| Need | Page | +|-----------------------------|-------------------------------------------------------------------------------------| +| Prompt patterns | [Prompt patterns](prompts.md) | +| Payload field cheat sheet | [Payload cheatsheet](payload-cheatsheet.md) | +| Change control contract | [Structural Change Controller](../../book/12-structural-change-controller/index.md) | +| Engineering Memory contract | [Engineering Memory](../../book/13-engineering-memory/index.md) | +| Runtime diagnostics | [Platform Observability](../observability/diagnostics.md) | diff --git a/docs/guide/mcp/architecture.md b/docs/guide/mcp/architecture.md index 30339dae..4ae4401f 100644 --- a/docs/guide/mcp/architecture.md +++ b/docs/guide/mcp/architecture.md @@ -1,4 +1,5 @@ + # MCP architecture ## Where MCP fits diff --git a/docs/guide/mcp/client-setup.md b/docs/guide/mcp/client-setup.md index 61c7d983..77600b4c 100644 --- a/docs/guide/mcp/client-setup.md +++ b/docs/guide/mcp/client-setup.md @@ -1,4 +1,5 @@ + # MCP client setup ## Client setup diff --git a/docs/guide/mcp/payload-cheatsheet.md b/docs/guide/mcp/payload-cheatsheet.md index 59bb2921..1ad5684e 100644 --- a/docs/guide/mcp/payload-cheatsheet.md +++ b/docs/guide/mcp/payload-cheatsheet.md @@ -1,4 +1,5 @@ + # Payload cheat sheet !!! warning "Non-normative" diff --git a/docs/guide/mcp/prompts.md b/docs/guide/mcp/prompts.md index a0bf9131..ee60de25 100644 --- a/docs/guide/mcp/prompts.md +++ b/docs/guide/mcp/prompts.md @@ -1,4 +1,5 @@ + # MCP prompt patterns ## Prompt patterns diff --git a/docs/guide/mcp/troubleshooting.md b/docs/guide/mcp/troubleshooting.md index d6220108..b81efce5 100644 --- a/docs/guide/mcp/troubleshooting.md +++ b/docs/guide/mcp/troubleshooting.md @@ -1,4 +1,5 @@ + # MCP troubleshooting ## Troubleshooting diff --git a/docs/guide/mcp/workflows/analyze-and-triage.md b/docs/guide/mcp/workflows/analyze-and-triage.md index c015a0c9..f41754b4 100644 --- a/docs/guide/mcp/workflows/analyze-and-triage.md +++ b/docs/guide/mcp/workflows/analyze-and-triage.md @@ -1,4 +1,5 @@ + # Analyze & triage ### Phase 1: Analyze diff --git a/docs/guide/mcp/workflows/change-control.md b/docs/guide/mcp/workflows/change-control.md index 00841d6c..30e4d45c 100644 --- a/docs/guide/mcp/workflows/change-control.md +++ b/docs/guide/mcp/workflows/change-control.md @@ -1,4 +1,5 @@ + # Change control workflow Primary MCP edit cycle (sole sequence diagram for change control in the guide): @@ -24,14 +25,15 @@ sequenceDiagram ## Tool tiers -| Tier | Tools | When | -|------|-------|------| -| Normal | `start_controlled_change`, `finish_controlled_change` | Every edit cycle | -| Queue/recovery | `manage_change_intent` (promote, recover, …) | Multi-agent / crash | -| Advanced | `get_blast_radius`, `check_patch_contract`, … | Debugging only | +| Tier | Tools | When | +|----------------|-------------------------------------------------------|---------------------| +| Normal | `start_controlled_change`, `finish_controlled_change` | Every edit cycle | +| Queue/recovery | `manage_change_intent` (promote, recover, …) | Multi-agent / crash | +| Advanced | `get_blast_radius`, `check_patch_contract`, … | Debugging only | Normative tool params: [MCP workflow tools](../../../book/25-mcp-interface/tools/workflow.md). -Finish pipeline and hygiene: [finish_controlled_change](../../../book/12-structural-change-controller/finish-controlled-change.md), +Finish pipeline and +hygiene: [finish_controlled_change](../../../book/12-structural-change-controller/finish-controlled-change.md), [Finish hygiene](../../../book/12-structural-change-controller/finish-hygiene.md). ## Related recipes diff --git a/docs/guide/mcp/workflows/drill-down-and-checks.md b/docs/guide/mcp/workflows/drill-down-and-checks.md index 62e6f95e..5ec06eaf 100644 --- a/docs/guide/mcp/workflows/drill-down-and-checks.md +++ b/docs/guide/mcp/workflows/drill-down-and-checks.md @@ -1,4 +1,5 @@ + # Drill down & focused checks ### Phase 3: Drill down diff --git a/docs/guide/mcp/workflows/memory-recipes.md b/docs/guide/mcp/workflows/memory-recipes.md index 2c9eb874..054ead49 100644 --- a/docs/guide/mcp/workflows/memory-recipes.md +++ b/docs/guide/mcp/workflows/memory-recipes.md @@ -1,4 +1,5 @@ + # Engineering Memory recipes (MCP) Ranked scope context and governed drafts — **not** a second analyzer. Normative @@ -11,11 +12,11 @@ Session-local review markers live in [Session & coverage](session-and-coverage.m When the store is missing, default `mcp_sync_policy=bootstrap_if_missing` ingests from the latest MCP run on the first scoped `get_relevant_memory`. -| Step | Tool / action | -|------|----------------| -| Analyze | `analyze_repository(root=)` | +| Step | Tool / action | +|--------------------------|------------------------------------------------------------------| +| Analyze | `analyze_repository(root=)` | | Optional explicit ingest | `manage_engineering_memory(action=refresh_from_run, root=)` | -| Offline init | `codeclone memory init` (CI/offline; same ingest contract) | +| Offline init | `codeclone memory init` (CI/offline; same ingest contract) | ## 2. Scope context after `start_controlled_change` @@ -58,14 +59,14 @@ enqueue projection rebuild when configured. ## 5. Search and drill-down -| Goal | Call | -|------|------| -| Keyword search | `query_engineering_memory(mode=search, query=..., root=, filters={match_mode: any\|all})` | -| Semantic blend | same + `semantic=true` when semantic index is built | -| One path | `query_engineering_memory(mode=for_path, path=..., root=)` | -| Trajectory detail | `query_engineering_memory(mode=trajectory_get, record_id=, root=)` | -| Trajectory dashboard | `query_engineering_memory(mode=trajectory_dashboard, root=)` | -| Playbook | `help(topic=engineering_memory)` | +| Goal | Call | +|----------------------|------------------------------------------------------------------------------------------------| +| Keyword search | `query_engineering_memory(mode=search, query=..., root=, filters={match_mode: any\|all})` | +| Semantic blend | same + `semantic=true` when semantic index is built | +| One path | `query_engineering_memory(mode=for_path, path=..., root=)` | +| Trajectory detail | `query_engineering_memory(mode=trajectory_get, record_id=, root=)` | +| Trajectory dashboard | `query_engineering_memory(mode=trajectory_dashboard, root=)` | +| Playbook | `help(topic=engineering_memory)` | ## 6. Semantic index maintenance diff --git a/docs/guide/mcp/workflows/session-and-coverage.md b/docs/guide/mcp/workflows/session-and-coverage.md index dce71c49..c257c009 100644 --- a/docs/guide/mcp/workflows/session-and-coverage.md +++ b/docs/guide/mcp/workflows/session-and-coverage.md @@ -1,4 +1,5 @@ + # Session & coverage ## Coverage review diff --git a/docs/guide/memory/overview.md b/docs/guide/memory/overview.md index e5267173..d36f2fe5 100644 --- a/docs/guide/memory/overview.md +++ b/docs/guide/memory/overview.md @@ -1,17 +1,18 @@ + # Engineering Memory overview Local SQLite store of evidence-linked repository facts. Complements change control with scoped context before edits. -| Task | Page | -|------|------| -| Bootstrap / sync | [MCP memory recipes](../mcp/workflows/memory-recipes.md) | -| MCP contract | [Engineering Memory](../../book/13-engineering-memory/index.md) | -| Trajectories / Experiences | [Practical guide](trajectories-and-experiences.md) | -| Trajectory contract | [Projection and Patch Trail](../../book/13-engineering-memory/trajectory-and-patch-trail.md) | -| Quality passport | [Quality and analytics](../../book/13-engineering-memory/trajectory-quality-and-passport.md) | -| Experience contract | [Experience Layer](../../book/13-engineering-memory/experience-layer.md) | +| Task | Page | +|----------------------------|----------------------------------------------------------------------------------------------| +| Bootstrap / sync | [MCP memory recipes](../mcp/workflows/memory-recipes.md) | +| MCP contract | [Engineering Memory](../../book/13-engineering-memory/index.md) | +| Trajectories / Experiences | [Practical guide](trajectories-and-experiences.md) | +| Trajectory contract | [Projection and Patch Trail](../../book/13-engineering-memory/trajectory-and-patch-trail.md) | +| Quality passport | [Quality and analytics](../../book/13-engineering-memory/trajectory-quality-and-passport.md) | +| Experience contract | [Experience Layer](../../book/13-engineering-memory/experience-layer.md) | Human **approve** of drafts: VS Code Memory view **or** `codeclone memory approve` (not MCP agent tools). diff --git a/docs/guide/memory/trajectories-and-experiences.md b/docs/guide/memory/trajectories-and-experiences.md index 90e4fb00..939bcdb2 100644 --- a/docs/guide/memory/trajectories-and-experiences.md +++ b/docs/guide/memory/trajectories-and-experiences.md @@ -72,5 +72,6 @@ surface. To turn it into a reviewable draft, use `manage_engineering_memory(action="promote_experience", experience_id="...")`. Promotion is idempotent and does not approve the draft. -The normative contracts are [Trajectory quality and passport](../../book/13-engineering-memory/trajectory-quality-and-passport.md) +The normative contracts +are [Trajectory quality and passport](../../book/13-engineering-memory/trajectory-quality-and-passport.md) and [Experience layer](../../book/13-engineering-memory/experience-layer.md). diff --git a/docs/index.md b/docs/index.md index 5dc0128e..b2824a9a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -3,6 +3,7 @@ does-not-own: full book TOC (book/README.md), install instructions (getting-started.md), local preview commands (publishing.md). rule: keep under 90 lines. Add links, not content. --> + # CodeClone Docs > Structural Change Controller for AI-assisted Python development — @@ -30,11 +31,11 @@ patch against the declared boundary, and generates an auditable review receipt. ## Getting Started -| Goal | Start here | -|-----------------------|----------------------------------------------| -| First install and run | [Getting started](getting-started.md) | +| Goal | Start here | +|-----------------------|---------------------------------------------------| +| First install and run | [Getting started](getting-started.md) | | Understand the model | [How it works](guide/explanation/how-it-works.md) | -| Terminology lookup | [Terminology](book/01-terminology.md) | +| Terminology lookup | [Terminology](book/01-terminology.md) | ## CI and Gating @@ -47,25 +48,25 @@ patch against the declared boundary, and generates an auditable review receipt. ## AI Agent Governance -| Goal | Start here | -|------------------------------------|-------------------------------------------------------------------------| -| MCP usage (workflows, setup) | [MCP guide](guide/mcp/README.md) | -| Change controller workflow | [Structural Change Controller](book/12-structural-change-controller/index.md) | -| Engineering Memory (scope context) | [Engineering Memory](book/13-engineering-memory/index.md) | -| Trajectories and recurring patterns | [Trajectories and Experiences](guide/memory/trajectories-and-experiences.md) | -| MCP interface contract | [MCP interface](book/25-mcp-interface/index.md) | -| Diagnose CodeClone runtime | [Platform Observability](guide/observability/diagnostics.md) | +| Goal | Start here | +|-------------------------------------|-------------------------------------------------------------------------------| +| MCP usage (workflows, setup) | [MCP guide](guide/mcp/README.md) | +| Change controller workflow | [Structural Change Controller](book/12-structural-change-controller/index.md) | +| Engineering Memory (scope context) | [Engineering Memory](book/13-engineering-memory/index.md) | +| Trajectories and recurring patterns | [Trajectories and Experiences](guide/memory/trajectories-and-experiences.md) | +| MCP interface contract | [MCP interface](book/25-mcp-interface/index.md) | +| Diagnose CodeClone runtime | [Platform Observability](guide/observability/diagnostics.md) | ## IDE and Agent Clients -| Surface | Guide (how to) | Contract (guarantees) | -|-----------------------|----------------|------------------------| -| VS Code extension | [Setup](guide/integrations/vscode/setup.md) | [VS Code contract](book/integrations/vs-code-extension.md) | -| Cursor plugin | [Install & skills](guide/integrations/cursor/install-and-skills.md) | [Cursor contract](book/integrations/cursor-plugin.md) | -| Claude Code plugin | [Install](guide/integrations/claude-code/setup.md) | [Claude Code contract](book/integrations/claude-code-plugin.md) | -| Codex plugin | [Install](guide/integrations/codex/setup.md) | [Codex contract](book/integrations/codex-plugin.md) | -| Claude Desktop bundle | [Setup](guide/integrations/claude-desktop/setup.md) | [Claude Desktop contract](book/integrations/claude-desktop-bundle.md) | -| SARIF & code scanning | [Export](guide/integrations/sarif/export.md) | [SARIF contract](book/integrations/sarif.md) | +| Surface | Guide (how to) | Contract (guarantees) | +|-----------------------|---------------------------------------------------------------------|-----------------------------------------------------------------------| +| VS Code extension | [Setup](guide/integrations/vscode/setup.md) | [VS Code contract](book/integrations/vs-code-extension.md) | +| Cursor plugin | [Install & skills](guide/integrations/cursor/install-and-skills.md) | [Cursor contract](book/integrations/cursor-plugin.md) | +| Claude Code plugin | [Install](guide/integrations/claude-code/setup.md) | [Claude Code contract](book/integrations/claude-code-plugin.md) | +| Codex plugin | [Install](guide/integrations/codex/setup.md) | [Codex contract](book/integrations/codex-plugin.md) | +| Claude Desktop bundle | [Setup](guide/integrations/claude-desktop/setup.md) | [Claude Desktop contract](book/integrations/claude-desktop-bundle.md) | +| SARIF & code scanning | [Export](guide/integrations/sarif/export.md) | [SARIF contract](book/integrations/sarif.md) | ## Reports diff --git a/docs/privacy-policy.md b/docs/privacy-policy.md index 8c76f1e2..47dc60a5 100644 --- a/docs/privacy-policy.md +++ b/docs/privacy-policy.md @@ -2,6 +2,7 @@ owns: privacy policy text. does-not-own: MCP read-only contract (→ book/25, book/21). rule: cross-link to contracts, do not restate them. --> + # Privacy Policy This page describes the privacy behavior of CodeClone's local integration diff --git a/docs/publishing.md b/docs/publishing.md index fcbde5b1..cc5f8946 100644 --- a/docs/publishing.md +++ b/docs/publishing.md @@ -3,6 +3,7 @@ local preview commands, maintenance rules. does-not-own: storefront sync (→ releasing.md), contract content (→ book/). rule: split from the former combined publishing page. Do not re-merge. --> + # Publishing the Docs Site ## Purpose diff --git a/docs/terms-of-use.md b/docs/terms-of-use.md index cecf4c7e..8d686fa1 100644 --- a/docs/terms-of-use.md +++ b/docs/terms-of-use.md @@ -2,6 +2,7 @@ owns: terms of use text. does-not-own: MCP read-only contract (→ book/25), security model (→ book/21). rule: cross-link to contracts, do not restate them. --> + # Terms of Use These terms describe the intended operational and integration boundaries of diff --git a/uv.lock b/uv.lock index b95c530c..46ad84da 100644 --- a/uv.lock +++ b/uv.lock @@ -548,62 +548,59 @@ toml = [ [[package]] name = "cryptography" -version = "48.0.1" +version = "49.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/12/45/870e7f4bef50e5f53b9f51d4428aee5290eedf58ba443f16b1ebb7ab8e66/cryptography-48.0.1.tar.gz", hash = "sha256:266f4ee051abb2f725b74ef8072b521ce1feacf685a3364fa6a6b45548db791a", size = 832989, upload-time = "2026-06-09T22:32:31.8Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1b/bc/ee4137cbbe105652c0ee4252792b78fc8e7afa4b8e61d9d5dc05a7f45731/cryptography-48.0.1-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:3e4a1a3232eef2e6c732827d5722db29a0cc8b27af2a4d865b094cf954be9ca1", size = 8008324, upload-time = "2026-06-09T22:31:00.702Z" }, - { url = "https://files.pythonhosted.org/packages/d5/85/6379d42181bfc713094f081360fc5784d6c816b599d45e7f082502d173ce/cryptography-48.0.1-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:32143b24adb918f078134e1e230f1eb8cc04886b92c28b5f0041aaf3e5699225", size = 4696243, upload-time = "2026-06-09T22:32:33.446Z" }, - { url = "https://files.pythonhosted.org/packages/9c/87/c85d147b53323c7eb4d850920c8901377323c2a0ff8d79c262d4fee89aa2/cryptography-48.0.1-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0d27a5696721ef7a672b8c810f6aded391058e0b9486e63e6d93baf765da691", size = 4713235, upload-time = "2026-06-09T22:31:40.141Z" }, - { url = "https://files.pythonhosted.org/packages/79/58/67cbf8cf1ee7c54b439ca07bbecf8362c07afc11a3724fea70f745784add/cryptography-48.0.1-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eb86ce1af36fe65041b6db9a8bb064ee621a7e5fded0f80d475ec243477cd242", size = 4702323, upload-time = "2026-06-09T22:31:42.191Z" }, - { url = "https://files.pythonhosted.org/packages/89/c6/24266ac10c47f6cd2a865f4446062b466da1d1f10b27189eac00e61bf0c9/cryptography-48.0.1-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:b024e784ad6c077ee0147b35ea9cbfc1e34e1fd4c1dcca214c2794d73a12df08", size = 5300085, upload-time = "2026-06-09T22:31:58.703Z" }, - { url = "https://files.pythonhosted.org/packages/d2/bb/cc4b78784f97efc8c5874c2a9743708d172be6663024b34a0467885ae0c8/cryptography-48.0.1-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3752f2dbc8f07a30aad2932c986cea495b03bb554887828225da104f732852b6", size = 4746137, upload-time = "2026-06-09T22:31:31.01Z" }, - { url = "https://files.pythonhosted.org/packages/1f/52/0c44de3f5267f8fbe8e835138017522a333436166e406f0db9b9e6e3033f/cryptography-48.0.1-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:bd81490cd5801d755cf97bb68ac191f14b708470b1c7cf4580f669b9c9264cd8", size = 4333867, upload-time = "2026-06-09T22:32:28.096Z" }, - { url = "https://files.pythonhosted.org/packages/9a/2e/772d7adbfa931537bc401640b7cac9976bff689bda187833e5d63b428e49/cryptography-48.0.1-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:66fd0771e7b9c6dcd44cf1120690d2338d16d72795cf40cae2786a39eba65429", size = 4701805, upload-time = "2026-06-09T22:31:38.284Z" }, - { url = "https://files.pythonhosted.org/packages/f8/a3/b06844f303873493c963caf581c04df31c7035e0c1b0f02c4814d319ec80/cryptography-48.0.1-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:3fd2ca57062b241c856670b073487d2e86c4637937ca5601e48f97bf8e11fc8f", size = 5258461, upload-time = "2026-06-09T22:31:04.187Z" }, - { url = "https://files.pythonhosted.org/packages/9f/13/8b765e2e12b07c74941caadb9d1c8fdc006c4dfbf2b8f2d610519758954d/cryptography-48.0.1-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:0ee6ea481db1ab889cba043ec1eda17bb9c1ea79db6722f779c3667f9f70322f", size = 4745488, upload-time = "2026-06-09T22:32:30.07Z" }, - { url = "https://files.pythonhosted.org/packages/2e/aa/48972bce55049b32a94f4907eda4d75fa385aad8a39506cc2fc72196ecf0/cryptography-48.0.1-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f2ceef93cb096aa3c4cc4b5c94ca6131f9196d28c64d6111533402a9b2054d41", size = 4830256, upload-time = "2026-06-09T22:31:43.868Z" }, - { url = "https://files.pythonhosted.org/packages/47/a2/e5079a032fb85cf6005046ca92bbd78b0c82dad2b5751ab8c311659da06f/cryptography-48.0.1-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9bd3f92d76217892b15df84ca256c2c113d386fdda7a7d8691aeeced976507c6", size = 4979117, upload-time = "2026-06-09T22:31:05.845Z" }, - { url = "https://files.pythonhosted.org/packages/b7/a0/8f50cae9c74e718ed769d63ed5c74bd0ea830c9550a74629cebd1b9c7bc7/cryptography-48.0.1-cp311-abi3-win32.whl", hash = "sha256:b9a32b876490d66c8bcc9963ef220199569748434ab01a9d6aaeabf88e7f5158", size = 3304154, upload-time = "2026-06-09T22:32:16.845Z" }, - { url = "https://files.pythonhosted.org/packages/c5/69/0572c77dbace6fef72f33755bd52ea399c71367250d366237f8691826b9e/cryptography-48.0.1-cp311-abi3-win_amd64.whl", hash = "sha256:39489bfca54c7a1f6b297efcd8bc608ab92d16c4ca631b0cad4da46724588b24", size = 3817138, upload-time = "2026-06-09T22:32:00.388Z" }, - { url = "https://files.pythonhosted.org/packages/42/06/3e768b4c3bc78201583fa35a0e18f640dd782ff41afba88f8545481a8874/cryptography-48.0.1-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:f817adc181390bd54f2f700107a7419040fb7c1bdf2fc26f36551a06a68c3345", size = 7989830, upload-time = "2026-06-09T22:31:07.8Z" }, - { url = "https://files.pythonhosted.org/packages/8a/13/6476736484b94041110c8340a3eb63962fea4975baea8cb4a512adb44d4d/cryptography-48.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d5d30989c6917b478b5817902e85fddaea2261efa8648383d965381ccb9e1ac4", size = 4689201, upload-time = "2026-06-09T22:31:09.745Z" }, - { url = "https://files.pythonhosted.org/packages/79/62/65a87f34d2a431546e2509b85d55e8c90df86d668f6731da64d538512ac2/cryptography-48.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:df637c05205ea7c1d7fbcbe54bbfea648a52951155f997af13d895d0ecc96991", size = 4702822, upload-time = "2026-06-09T22:32:24.409Z" }, - { url = "https://files.pythonhosted.org/packages/7f/59/810b5204b0a9b10f4b6bc06bd551a8b609803cd931806bc3b71884b225e5/cryptography-48.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:869c3b8a53bfe27147832df48b32adadf558249d50e76cb3769d40e986b13265", size = 4694875, upload-time = "2026-06-09T22:32:08.737Z" }, - { url = "https://files.pythonhosted.org/packages/24/dc/d8ca05ffea724eec6d232ea6f18e74c269eb6bdfdcc9bfba689790d1325f/cryptography-48.0.1-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:e361afba8918070d376df76f408a4f67fec0ee9cff81a99e48fe9a233ef59e17", size = 5290385, upload-time = "2026-06-09T22:31:15.212Z" }, - { url = "https://files.pythonhosted.org/packages/03/8c/3be6cb4da181f5bb6c19cf560c2359d60644a6b5fc5b57854e528f47b296/cryptography-48.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:d069066deead00ac7f090be101be875a06855908f7ec004c27b8fefb4acfb411", size = 4737082, upload-time = "2026-06-09T22:32:22.66Z" }, - { url = "https://files.pythonhosted.org/packages/aa/f6/d5f60a5a1434dbfd949e227fd0065d194c7e6b6ac526b17f5c06152b8231/cryptography-48.0.1-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:09f73a725d582cef64b91281a322cd798d14a33b2b6f2b7ad9531dc336d84c02", size = 4325328, upload-time = "2026-06-09T22:32:10.777Z" }, - { url = "https://files.pythonhosted.org/packages/17/b7/ba75dd947a14b6ad907b01ae8f6b5b348cdd1b48142f0063dee9e20c1d9d/cryptography-48.0.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:15254441469dd6bf027039453288e2072124f8b6603563f5d759e1c9b69273fa", size = 4694530, upload-time = "2026-06-09T22:31:53.105Z" }, - { url = "https://files.pythonhosted.org/packages/62/29/50d6b9e8aff12d8b67afaeb3569335e32dc83a5723e3bbded24fdac9f809/cryptography-48.0.1-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:8ace4507d1e6533c125f4fac754f8bb8b6a74c08e92179dabd7e16571a3efbf3", size = 5245046, upload-time = "2026-06-09T22:31:25.774Z" }, - { url = "https://files.pythonhosted.org/packages/9f/04/618f4115cfc0add0838c82507aa18a346089428da8653ad38b3ff36f5cb3/cryptography-48.0.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:b4e391975f038e66432328639620a4aff2d307513b004f1ca06d6225bced815c", size = 4736660, upload-time = "2026-06-09T22:32:12.676Z" }, - { url = "https://files.pythonhosted.org/packages/24/9c/06e062462a0de28a3b3911322eded4c16deb9f441b1b7575d3dc59488ab5/cryptography-48.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42fcd8e26fe555d9b3577a135f5091fefa0aa4e99129c23fb56787a1bd4ada72", size = 4822229, upload-time = "2026-06-09T22:31:17.062Z" }, - { url = "https://files.pythonhosted.org/packages/f4/be/0561971eaaee4b8a0e7d5113c536921063ab91aaf23278ac374eaf881e11/cryptography-48.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c1400da5e32a43253392277eac7490a60e497d810a63dd5608d71bbd7af507c9", size = 4966364, upload-time = "2026-06-09T22:31:32.842Z" }, - { url = "https://files.pythonhosted.org/packages/a4/27/728c77876f12b000820b69ae490f3c4083775e79e07827e9e60be07ad209/cryptography-48.0.1-cp314-cp314t-win32.whl", hash = "sha256:0df56b056bc17c1b7d6821dfa65216e62bd232d8ab05eb3db44e71d235651471", size = 3278498, upload-time = "2026-06-09T22:31:29.154Z" }, - { url = "https://files.pythonhosted.org/packages/06/e3/79a612c6d7b1e6ee0edd43633d53035bec2cfb78c82b76f7864f39e36f34/cryptography-48.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:9de21387aa95e2a895823d0745b430bed4f33503ba9ab5e0b5311f33e37d66d2", size = 3798790, upload-time = "2026-06-09T22:31:56.697Z" }, - { url = "https://files.pythonhosted.org/packages/ca/6c/00fa2a95997164c8b2072ce327c23d4ab20809ccc323ea5fab91e53a4bba/cryptography-48.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:4fdc69f8e4316bcf0c8c8ec1f26f285d12e8142d88d96c876a59a03be3f6ae67", size = 7987408, upload-time = "2026-06-09T22:32:20.777Z" }, - { url = "https://files.pythonhosted.org/packages/b0/d9/45f309a7e4e5f3f8f121d6d3be9e94024a7726ec598d6e08ae04edb2f04d/cryptography-48.0.1-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48fe40804d4caa2288f24e70ca8c64c42dd826da0ad7e4f1b41b2128d679e6c8", size = 4690196, upload-time = "2026-06-09T22:31:54.74Z" }, - { url = "https://files.pythonhosted.org/packages/5f/9f/a1bc8bcc798811b8527eb374bbccf30a3f3e806829d967118222bf1125eb/cryptography-48.0.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:86be3b1b0b6bf09482fb50a979c508d2950ed95f5621ec77f4e385962006b83a", size = 4696782, upload-time = "2026-06-09T22:31:45.615Z" }, - { url = "https://files.pythonhosted.org/packages/66/c2/81a4fb4e4373c500bb526bc337ac5719dd31dd15b970b84a238168c6aa08/cryptography-48.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:4ab0a343c807bbcd90c971cd1ecf072937cd01847a9e002bef88fb47ac6be577", size = 4696618, upload-time = "2026-06-09T22:31:11.564Z" }, - { url = "https://files.pythonhosted.org/packages/e5/0b/aa68b221dde92d09cb29a024ede17550ee21e77a404e59fc093c82bb51e1/cryptography-48.0.1-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9621de99d2da096006b629979efd8ae7eb2d8b822488d0c89ee4000c306c59b1", size = 5289970, upload-time = "2026-06-09T22:31:20.368Z" }, - { url = "https://files.pythonhosted.org/packages/78/13/fba657f958d2af66ea959a4ba01212632089249d34af1ae48054136344d7/cryptography-48.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:88c852a0ae366e262e5a1744b685e6a433dc8788dd2a277e418bf4904203609d", size = 4731873, upload-time = "2026-06-09T22:31:22.253Z" }, - { url = "https://files.pythonhosted.org/packages/4c/4c/9a964756d24a26b3e34dfcb16f961b89838786e6700b635b0d1e3adff4b6/cryptography-48.0.1-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:43c5835e2cb98c8733d86f57d6fc879b613f5c3478607281c3e36daffc6dd8a6", size = 4330804, upload-time = "2026-06-09T22:31:36.56Z" }, - { url = "https://files.pythonhosted.org/packages/4b/0f/a10f3a6eb12950a10e3a874070283aa2dd5875b2bfd15fad8a3e17b3f13e/cryptography-48.0.1-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:fe0180af5bf9236518a087e35bf2d9a347d5f5f51e63c579d683ddff424e3d46", size = 4696217, upload-time = "2026-06-09T22:31:13.351Z" }, - { url = "https://files.pythonhosted.org/packages/f3/6f/5cd12f951165ea73ef85266775d97e4c763b2474ccfd816dd69d3a18d6f8/cryptography-48.0.1-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:b7a2d1a937a738a881737cec135a38bb61470589b17515b9f73f571d0ae10401", size = 5245252, upload-time = "2026-06-09T22:32:02.193Z" }, - { url = "https://files.pythonhosted.org/packages/68/ab/8aaa12e4516ec4464033ab79b6f3b592bd5a92102467c4ace8a0d970203f/cryptography-48.0.1-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:b74ca3b8e5ecdd833bf6a002ca41b4793bb27fb8f1c06ffaf2643c9e9140e31b", size = 4731388, upload-time = "2026-06-09T22:32:04.019Z" }, - { url = "https://files.pythonhosted.org/packages/1b/24/50027ea4dca85ec1f40688f3c24fb32ccacd520583c9592c3cc95628e6fb/cryptography-48.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2c37f2461406063b417837f5f3daab668652acd82423efcd7f0a9f04be972de1", size = 4824186, upload-time = "2026-06-09T22:32:18.707Z" }, - { url = "https://files.pythonhosted.org/packages/52/41/04cb5eb17085ade6f50cc611fb657df6a0f5885350de8764ece89c050197/cryptography-48.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:86fe77abb1bd87afb251d4d02ada7ecf53a32cee9b67d976abb2e45a13297475", size = 4964539, upload-time = "2026-06-09T22:31:18.793Z" }, - { url = "https://files.pythonhosted.org/packages/36/bf/ed70785c496e89d7e73b7cda2d21f2447fd6d4e821714b8d04ff217fed92/cryptography-48.0.1-cp39-abi3-win32.whl", hash = "sha256:6b2c0c3e6ccf3ade7750f836ef3ee36eea250cc467d45c256895573ac08cc6f1", size = 3282307, upload-time = "2026-06-09T22:30:53.162Z" }, - { url = "https://files.pythonhosted.org/packages/b3/ff/371ea7d252656ee1eb6d83eeeef3d1d0c6baf1d6497687d081ea03814670/cryptography-48.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:9a49ca6c81417f6a5edb50375a60cccdd70fa0a91a5211829dbea74eba94d2ac", size = 3793408, upload-time = "2026-06-09T22:32:15.191Z" }, - { url = "https://files.pythonhosted.org/packages/a9/d3/eb4e394e587341fdad09a09101fa76478ead3a78b0ad63e55c22f0d75c02/cryptography-48.0.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:08a597acce1ff37f347400087776599e2348a3a8bc53b44120e463cd274efe4a", size = 3951747, upload-time = "2026-06-09T22:31:23.871Z" }, - { url = "https://files.pythonhosted.org/packages/e0/4a/3f43451b4f858bfceaaaffc649e6e787e8d4fb332a1d443af39ab02cc8f1/cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:735824ec41b7f74a7c45fb1591349333e4c696cb6c044e5f46356e560143e4cd", size = 4641226, upload-time = "2026-06-09T22:31:02.532Z" }, - { url = "https://files.pythonhosted.org/packages/73/4e/855584c2c23b09e4ce2d3b9c30e983e679cd60b068c513c6bbdb91e11782/cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:92a46e1d638daa264ba2971c0b0489c9409787943efae4d60ffda3d091ef832c", size = 4668958, upload-time = "2026-06-09T22:32:06.213Z" }, - { url = "https://files.pythonhosted.org/packages/42/3b/d35750e41d803d1e516fd6d6011f065424924da7af1748cef4cc9cb3ede1/cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:7e234ac052af99f2700826a5c29ea99d9c1b1f80341cde62d11c8154dc8e0bd9", size = 4640793, upload-time = "2026-06-09T22:32:26.331Z" }, - { url = "https://files.pythonhosted.org/packages/ca/aa/cdb7181fe865285e87e96825aaab239400f1de0c3bfba9bd9769b79f1a92/cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:33842cf0888951cef5bc7ac724ab844a42044c1727b967b7f8997289a0464f92", size = 4668505, upload-time = "2026-06-09T22:31:27.534Z" }, - { url = "https://files.pythonhosted.org/packages/5d/8c/ce3823c06c2804f194f9e64f0d67fa3f4094a39f2bb1a990cd03603af8fc/cryptography-48.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6184ca7b174f28d7c703f1290d4b297217c45355f77a98f67e9b7f14549ac54a", size = 3742204, upload-time = "2026-06-09T22:31:34.773Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/1f/99/d1c90d6041656cc6ee229dc99cd67fd0cd5aec3c5f7d72fffc27cc750054/cryptography-49.0.0.tar.gz", hash = "sha256:f89660a348f4f78a92366240a61404e337586ef7f5909a2fef59ca88ef505493", size = 854345, upload-time = "2026-06-12T20:02:30.512Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/22/adf66990e63584a68dfb50c24f48a125c07b1699899381c8151e63ed458c/cryptography-49.0.0-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:966fe0e9c67490071f14c0d2b1cb2dfb3023c5ce39457343931415f08382f2db", size = 4032100, upload-time = "2026-06-12T20:02:32.143Z" }, + { url = "https://files.pythonhosted.org/packages/09/41/3797cfaf69cae04a13ee78ebd83f0678d9c02b4779d21ce24445326f1a69/cryptography-49.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:36d1709f992593689b45bda411498d62c6e365f2ca00b84657d4dadd24de16db", size = 4692978, upload-time = "2026-06-12T20:01:21.305Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8b/43011f7ebe515a8aa20d61f290a326cd890c2e738e16e59eaff8d9c3a412/cryptography-49.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0e959b578856a3924bc0cbb710fc12c387b9412a951389f3ca61704a9e25f325", size = 4716422, upload-time = "2026-06-12T20:01:48.566Z" }, + { url = "https://files.pythonhosted.org/packages/4a/91/01ce7303a4579e6d3a6abef01bd322848e9ea7a219adcabc5048b9033571/cryptography-49.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:53ecee2e23f7169b6117e99fc8a944e5e50f79e69758a83b52a00cb98ab2b2d2", size = 4700503, upload-time = "2026-06-12T20:02:47.091Z" }, + { url = "https://files.pythonhosted.org/packages/62/99/a2c95cf8293f07491e9e27c20cc4dcd18176d944e674679adeb1d0173fd6/cryptography-49.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:2eda353d8a27bcbcaa4cbed18994a74ab4d19a2ca897db188ea269ab9b71419b", size = 5309779, upload-time = "2026-06-12T20:02:08.987Z" }, + { url = "https://files.pythonhosted.org/packages/20/2c/0622f20ff02b2ef32558733443805dc82fd4c275be01b2d19d14676f3a1b/cryptography-49.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2afe9051da7ae7bd5905da5a949280c7d2bb75682e188f650a9d0f2756b834c6", size = 4749683, upload-time = "2026-06-12T20:02:03.335Z" }, + { url = "https://files.pythonhosted.org/packages/a3/5b/c5246635d5fd3b64e0d45ae10e99fd32fe9676a79915ccfe5a61ba9af1a5/cryptography-49.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:0b82e28ee398a386f0807bba7884d30f25218855690f45115831bcce5d90822c", size = 4337874, upload-time = "2026-06-12T20:02:54.323Z" }, + { url = "https://files.pythonhosted.org/packages/6d/88/05563c7fe2e914e87d1a536d06fe83e66b4e1d95cb593e05aea375531da8/cryptography-49.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:ccac2bfebc306b862133e3bb71f3f6ee8bb525240089b2d952e4144b3a6d5da7", size = 4700283, upload-time = "2026-06-12T20:01:34.822Z" }, + { url = "https://files.pythonhosted.org/packages/c4/b6/d7696e4e890d6ae1469935164c9e5215c557671cb78d6e3f458ccceaa632/cryptography-49.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:d0527ce944105f257f605a827d6ebead966c752038b6e8656abb9c5edee6fc68", size = 5265844, upload-time = "2026-06-12T20:01:24.09Z" }, + { url = "https://files.pythonhosted.org/packages/a9/3c/f3ad17eecc1a57b0ba236dc01f90e783c51f4a2f35f64777cc4f47a184b2/cryptography-49.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:cbc77da8c523d5abd028635ba850a6966fcee2c82e2bf65a41d1d8afe0f98be9", size = 4749290, upload-time = "2026-06-12T20:01:30.848Z" }, + { url = "https://files.pythonhosted.org/packages/4f/01/339573cf1023163a400b0b5d16f6d507de413b9f60be6fd1b77feeaf6737/cryptography-49.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b87e65d263b3e5d3bb92a57e2a6638e2f31110fa7aa890c7b2dbba42248d0a3f", size = 4834612, upload-time = "2026-06-12T20:01:29.246Z" }, + { url = "https://files.pythonhosted.org/packages/71/fd/577302e213a1be9468f92d1afef66fcf1ef83d516819d9992ca547f592bd/cryptography-49.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:66ec79c3904820572d7e987abdf304281f141d37ad9a489b8e97066e7b9b6459", size = 4980804, upload-time = "2026-06-12T20:01:42.853Z" }, + { url = "https://files.pythonhosted.org/packages/1f/09/f42b1d190c5ba75f72062a387f8030d1d75f6ab035788f1d9c4b01de6525/cryptography-49.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:e5dfc1e64de5677cec922ffa8da89c546d0415bf6efdf081842e5d44c84e1f0e", size = 3810026, upload-time = "2026-06-12T20:02:39.262Z" }, + { url = "https://files.pythonhosted.org/packages/ec/9e/db72b3ae7fc9cfad53e630e56c6ae83b9b6ff0bf3718ffb8012d20b3aabf/cryptography-49.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:73a205dce83953d131a4aa1e0fd917a2fd1c5b1eef251e9d7152efefcbf5caf7", size = 4013892, upload-time = "2026-06-12T20:02:10.735Z" }, + { url = "https://files.pythonhosted.org/packages/86/12/c48a424f38db03027be9f7ed5c7dc5de9933dbee992865f98b13727a009d/cryptography-49.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:196ecd6a36e4e9aa10270393bb98d8df88fccee0bf1e5128b91ae4eb4375896d", size = 4678835, upload-time = "2026-06-12T20:02:48.743Z" }, + { url = "https://files.pythonhosted.org/packages/68/28/8a3ad4653662c93fc44dc4e5d8fd374c25c42e07b34bbfbadf49cf57a5a8/cryptography-49.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7abcee80084cda3f7691f3eb1ce480d8df49cec637b429aa35986c1de71738aa", size = 4697239, upload-time = "2026-06-12T20:02:56.03Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b2/2193fc74f81aee4f9b62733133b73b5176718932ed8f2e4b03fa040480a6/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:4ae387c9cb68ea569ca17e490d66d8142b81c3cc814bf179974b7d146e490bbb", size = 4685593, upload-time = "2026-06-12T20:02:50.666Z" }, + { url = "https://files.pythonhosted.org/packages/47/f1/1d3eaa243bfc5de4a187b22aa8c048b3e4980bfbe830ac46e6bac2e66947/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:f37d847238971164fdbc68ade6f6574aecc9c0af714190e2083429ff68f4ce9d", size = 5289961, upload-time = "2026-06-12T20:01:46.468Z" }, + { url = "https://files.pythonhosted.org/packages/58/39/2d51306721330c486495853eda1c567880ff036de15a14c4b74f399934af/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:c2bc30226390d60ea19d9f82b19db005fe0452154a23c1c410c12ea801e43561", size = 4731145, upload-time = "2026-06-12T20:02:16.832Z" }, + { url = "https://files.pythonhosted.org/packages/17/50/983e838c7fd0d87fd8c969bcdd328edaf5f756e38df5281637424c155873/cryptography-49.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:07cab27cc7b7e0fd28e5e26bb9eeedde5c135c868b46de4a27845abe94af6122", size = 4321719, upload-time = "2026-06-12T20:02:52.611Z" }, + { url = "https://files.pythonhosted.org/packages/a7/f5/8f571d7e27c55bce9f76f026143bcb1e040a4233149ecca0bea5fa5dd5f7/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:b20133d204d2bb56ba047642199603876c872026ca53e79c35b83772ab2cc505", size = 4685209, upload-time = "2026-06-12T20:02:07.282Z" }, + { url = "https://files.pythonhosted.org/packages/e7/84/0e27016a6fc5a0886f797018b26aa42f40c09a82332bff77822a451deaaa/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b970c6da94d5bb18629db453d14f2a1300f6bf59b61e9b82377931ef95504866", size = 5246285, upload-time = "2026-06-12T20:01:32.439Z" }, + { url = "https://files.pythonhosted.org/packages/11/2d/5e1fb307cb5931881516b464c98774b3f2c36b5d4bb9a2830253cf553cad/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:d8ecde755e2e91bf773fc94e8c9d730cd7f2007004cb492263a794ec3899a1c8", size = 4730441, upload-time = "2026-06-12T20:02:01.469Z" }, + { url = "https://files.pythonhosted.org/packages/e4/c0/bff5a02ee731d207d6a1ed51732549d8c53d2bc8da1d10ec6f2844201d68/cryptography-49.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e3fb64c420688e5319ae25113a354015abbd8dffbfbc41781a1ea66fc7622ac3", size = 4815869, upload-time = "2026-06-12T20:01:36.574Z" }, + { url = "https://files.pythonhosted.org/packages/b9/26/814681d14248d95d73d5c3eea0c39a94eb8302df966f670a2c60de90974b/cryptography-49.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32703d93296f5c1f4b53349ad3a250c2cae0fdecd3a3dd5d47e616d8d616af27", size = 4960948, upload-time = "2026-06-12T20:02:18.688Z" }, + { url = "https://files.pythonhosted.org/packages/4c/fe/93ecac273d3738939d023612ad12cca9a3740a5345d69fda04134c43fd96/cryptography-49.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:33cd0565932807baddb67b96dbee92f2c374b5c89dee09fd74079aeb8c8dba61", size = 3799153, upload-time = "2026-06-12T20:01:39.059Z" }, + { url = "https://files.pythonhosted.org/packages/19/2a/5bb823f5bedcf80718cea7fbc95ec5515cca3769633c4b01a32be7f30e7c/cryptography-49.0.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ec5e529fb80935c94fe7b729f9972b50e351a0e6b50aa294fd5cabb109fcc29a", size = 4025947, upload-time = "2026-06-12T20:01:25.745Z" }, + { url = "https://files.pythonhosted.org/packages/3d/df/40577043ca124e17012f408ddddaeb213b856336ac82ddb3bc915f39e29f/cryptography-49.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f78ff2c9ed8dc2d036b0f4d640e22522213d047c1b14e61205a7e55c80a494d4", size = 4692429, upload-time = "2026-06-12T20:01:53.628Z" }, + { url = "https://files.pythonhosted.org/packages/2c/99/2d13299eb3dd27b02dcfaafcc91d6b5cb3329f7cbd6d8f51921acd566c1a/cryptography-49.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:35b151772baff2c74cba7fa290ceaff4c3b11c0c881eb93eb5dbc05a7cfbba18", size = 4700968, upload-time = "2026-06-12T20:02:45.383Z" }, + { url = "https://files.pythonhosted.org/packages/a5/4d/9c0cd02f95e2602dd5e563da149ee0830abef3537be8b34dc56281ebe27a/cryptography-49.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0f21641cf4b30fca7aee061ced0ec7ad7b073518088b7c9969a297c0ae796c69", size = 4697758, upload-time = "2026-06-12T20:01:41.13Z" }, + { url = "https://files.pythonhosted.org/packages/24/01/186c825898477d77e2324d5360fefe622ff1d8d1963ec0554e2cada8ec77/cryptography-49.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9e82dcc8e56052715fb18b2429e3bca4823b1629136a2084fc45a9a5cecb9b64", size = 5298863, upload-time = "2026-06-12T20:02:24.579Z" }, + { url = "https://files.pythonhosted.org/packages/b8/7b/62cbbab75d0659865bf0273790031544a0b16c8072d258f9428dcd8190dc/cryptography-49.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6f2debedf9ca60cf1d5bd466475638af5130f89965605cd818484d19987d3a21", size = 4735983, upload-time = "2026-06-12T20:01:50.14Z" }, + { url = "https://files.pythonhosted.org/packages/6c/72/3e798c064bc39e471008075d0f9bc9daf77a80879c092e4a8e170c585ed4/cryptography-49.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:8c25ceb16df5b9435f3f6a9829204985b0e0cbee3b48aacd432c7d2c850b44d9", size = 4334173, upload-time = "2026-06-12T20:01:44.743Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ee/6fca21d1ac73e06f8bef71940abfd4d2f6472b4bca284d770f32bd4086f6/cryptography-49.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:28d8b15e6275f12c8a207dc309dfa957903c927d08d0cc937ee3f63f200693cc", size = 4697298, upload-time = "2026-06-12T20:02:20.918Z" }, + { url = "https://files.pythonhosted.org/packages/67/d0/a5fcd3515f0bae49a7b6d0413cc1bdccdcc1fc0047037a0d480642cdc5d6/cryptography-49.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:6fc361c34fb6aac015ce19435876635e5c6d21db31998b0920f675f131e043b8", size = 5254338, upload-time = "2026-06-12T20:02:22.737Z" }, + { url = "https://files.pythonhosted.org/packages/a0/84/84fe36f19caf857d61cb7fc9c63035a47ffabd84ea12d1d393148efa3615/cryptography-49.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:2400ef9c9e2299a25614eb1dea3db54a69b1349efd043bfac9c67630d136df36", size = 4735650, upload-time = "2026-06-12T20:02:41.389Z" }, + { url = "https://files.pythonhosted.org/packages/6c/a0/db537264e234f7273a73ec020873d6d6b39dfd8a53db78b550ca8320440e/cryptography-49.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:67e1d20ad9ef3a563c59ef22e7a8a0b8210bd26604369ea4a30a7c66aefe504e", size = 4834820, upload-time = "2026-06-12T20:01:51.847Z" }, + { url = "https://files.pythonhosted.org/packages/93/77/8df9eb486495979bccecd1062e2eaf435250e84437040295b57d09048b0b/cryptography-49.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:42b0684e0e40cf26122427802486f6d93aea593612603a94fbf260c7eb1e9c1b", size = 4967968, upload-time = "2026-06-12T20:02:12.524Z" }, + { url = "https://files.pythonhosted.org/packages/c2/e6/f60198ea8d9dfa15fff9ed4ca02ce362f6eadd9ba757dcc50634c4257b63/cryptography-49.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:026ac7423e6fa66872d3bf889be5974507da3944f866f704fa200eadacd00001", size = 3785547, upload-time = "2026-06-12T20:02:26.847Z" }, + { url = "https://files.pythonhosted.org/packages/63/d3/4a83af35d65e3fad632c926fad684c193ea4398569ccb0bbbc7fe8f5dc9a/cryptography-49.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:fc1e275c2f1d97b1a6450b8b0ea3ebfa6e087a611c2b26cb2404d48588abab7b", size = 3993685, upload-time = "2026-06-12T20:02:14.883Z" }, + { url = "https://files.pythonhosted.org/packages/d6/a7/f9dac0ab7f80368c56993a7bf638ef9935f825c91902798481fac0898138/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83782480a4a9da4d0feb51950131ba32e12e70813848b3343f6e18c28a66838", size = 4676239, upload-time = "2026-06-12T20:02:28.793Z" }, + { url = "https://files.pythonhosted.org/packages/d7/70/2ba3769dd0ae167e2f33dfa9592d45db6ff9a61d62ca1a5b3d1bdd09068f/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b39efa323140595abd3ecca8529d321ae50f55f3aa3ba9cc81ea56a6011953d5", size = 4715584, upload-time = "2026-06-12T20:01:27.495Z" }, + { url = "https://files.pythonhosted.org/packages/94/64/2923570ac1c0bd3a737aa366ac3abbbbde273042308b8cde95e2364a6e6a/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:b47db11c2c3525083296069b98ac5221907455e989ae0c2e3008bde851921615", size = 4675885, upload-time = "2026-06-12T20:01:55.49Z" }, + { url = "https://files.pythonhosted.org/packages/ab/f8/614dc7e051418cfe53d55173c1e24c6b0085e89996fe90508c2fdf769aef/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:084ef1af862eb07ec46d25f68689f2102a9fc0e05ce7b80f14f5fe51e4eef0f6", size = 4715449, upload-time = "2026-06-12T20:02:05.469Z" }, + { url = "https://files.pythonhosted.org/packages/aa/50/a9caea39ad19c431c1a3f8a31114df65b260cdfe67786b6c7e7c040c4c44/cryptography-49.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be9fcb48a55f023493482827d4f459bd263cc20efde64f204b97c123201850c6", size = 3783731, upload-time = "2026-06-12T20:02:43.319Z" }, ] [[package]] @@ -650,7 +647,7 @@ name = "exceptiongroup" version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } wheels = [ @@ -852,7 +849,7 @@ name = "importlib-metadata" version = "9.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "zipp", marker = "python_full_version < '3.12'" }, + { name = "zipp" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a9/01/15bb152d77b21318514a96f43af312635eb2500c96b55398d020c93d86ea/importlib_metadata-9.0.0.tar.gz", hash = "sha256:a4f57ab599e6a2e3016d7595cfd72eb4661a5106e787a95bcc90c7105b831efc", size = 56405, upload-time = "2026-03-20T06:42:56.999Z" } wheels = [ @@ -961,19 +958,19 @@ wheels = [ [[package]] name = "lance-namespace" -version = "0.8.5" +version = "0.8.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "lance-namespace-urllib3-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d0/22/3d8eb4e913edf36cda416f1dca287147af508abe3ca89bf0e619b9fa9f54/lance_namespace-0.8.5.tar.gz", hash = "sha256:b4a5967afcbf9924300a0b9d2fb74c44a23f76907e8734ebed6e0e3a561b0df0", size = 11531, upload-time = "2026-06-11T16:20:26.77Z" } +sdist = { url = "https://files.pythonhosted.org/packages/af/12/f7ab93b29be3edbf5fc3610714bf2d06088e7f4524bfb38dfd6852458b08/lance_namespace-0.8.6.tar.gz", hash = "sha256:18232e721c8188145f4ec9389cc2dfbeeabf54a619d94885ea1b3375bee9f4af", size = 11529, upload-time = "2026-06-12T17:36:41.651Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/da/afc3cdc42fc2dcf885a9d3524bf2c3bd2a9df89b1668b1806dec5e436263/lance_namespace-0.8.5-py3-none-any.whl", hash = "sha256:6d3e2b8da586d06409494b56955a63c3152eeae2883cd2e8ba4e80d20dc0de0f", size = 13383, upload-time = "2026-06-11T16:20:26.004Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1b/5b1668ee2dc8910965f390640359112a31157092fcf8e000b89c79b58708/lance_namespace-0.8.6-py3-none-any.whl", hash = "sha256:571eae34f9aad70e5b05020416c2860889b9ec82993ccd0eb015e7b39c3ea309", size = 13383, upload-time = "2026-06-12T17:36:43.456Z" }, ] [[package]] name = "lance-namespace-urllib3-client" -version = "0.8.5" +version = "0.8.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, @@ -981,9 +978,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/44/6f/1291523488523656342d1b424b76b4d91f3af6413b3b4ada43b888a87043/lance_namespace_urllib3_client-0.8.5.tar.gz", hash = "sha256:29922ffb5b0621e24a83183454ec3e5a5828f46d91a95d58efc35db05dec4e62", size = 228595, upload-time = "2026-06-11T16:20:23.985Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/80/fb224b4a89c1c1638cde949cb6cce6c3aca7759effbfea46a3d9c3960b21/lance_namespace_urllib3_client-0.8.6.tar.gz", hash = "sha256:b6fb1d306e74a7576e5309919020be744527de484a63dbf5eed10f8b368548df", size = 228772, upload-time = "2026-06-12T17:36:42.609Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/e2/62883d1f43a283ac08f00af993c6a2b92e4ca206fa1ccba032420d8dc578/lance_namespace_urllib3_client-0.8.5-py3-none-any.whl", hash = "sha256:8af211ddc6e73df713ffb59368c94780508e732b19dacb4239d937aaff2f8e3c", size = 369857, upload-time = "2026-06-11T16:20:25.006Z" }, + { url = "https://files.pythonhosted.org/packages/c5/90/1e27de15cd1b16785a1c7312beb0a59e75c8344a815f600f58173a565bd1/lance_namespace_urllib3_client-0.8.6-py3-none-any.whl", hash = "sha256:9d78249c3fb15aa3d15d668f78f04a275af3d08d800a7027492f37996ac4968b", size = 369950, upload-time = "2026-06-12T17:36:40.438Z" }, ] [[package]] From b09f9c338ee6d4a750ee4f4cd73e26def1db95cb Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 18:54:30 +0500 Subject: [PATCH 270/318] feat(analytics): corpus analytics slice 1 (intent lane), shared embedding cache --- CHANGELOG.md | 6 + codeclone/analytics/__init__.py | 11 + codeclone/analytics/agent_labels.py | 47 ++ codeclone/analytics/capabilities.py | 63 +++ codeclone/analytics/clustering/__init__.py | 0 .../analytics/clustering/canonicalize.py | 105 ++++ codeclone/analytics/clustering/diagnostics.py | 233 ++++++++ codeclone/analytics/clustering/models.py | 55 ++ codeclone/analytics/clustering/pipeline.py | 162 ++++++ codeclone/analytics/clustering/sweep.py | 148 +++++ codeclone/analytics/contracts.py | 126 +++++ codeclone/analytics/corpus/__init__.py | 0 .../analytics/corpus/adapters/__init__.py | 0 .../corpus/adapters/intent_historical.py | 428 +++++++++++++++ codeclone/analytics/corpus/keys.py | 55 ++ codeclone/analytics/corpus/normalizer.py | 72 +++ .../analytics/corpus/registry_overlay.py | 84 +++ .../corpus/representations/__init__.py | 0 .../corpus/representations/intent.py | 120 +++++ codeclone/analytics/corpus/snapshot.py | 131 +++++ .../analytics/corpus/trajectory_selection.py | 83 +++ codeclone/analytics/embedding/__init__.py | 0 codeclone/analytics/embedding/generation.py | 164 ++++++ codeclone/analytics/exceptions.py | 31 ++ codeclone/analytics/export/__init__.py | 0 codeclone/analytics/export/json_export.py | 163 ++++++ codeclone/analytics/report/__init__.py | 0 codeclone/analytics/report/html.py | 132 +++++ codeclone/analytics/schema.py | 175 ++++++ codeclone/analytics/store/__init__.py | 0 codeclone/analytics/store/protocols.py | 149 +++++ codeclone/analytics/store/sqlite.py | 508 ++++++++++++++++++ codeclone/analytics/store/vectors_lancedb.py | 179 ++++++ codeclone/analytics/workflow.py | 436 +++++++++++++++ codeclone/audit/reader.py | 35 ++ codeclone/audit/schema.py | 18 +- codeclone/config/analytics.py | 174 ++++++ codeclone/config/analytics_specs.py | 24 + codeclone/config/pyproject_loader.py | 43 +- codeclone/contracts/__init__.py | 17 + codeclone/memory/schema.py | 19 +- codeclone/memory/trajectory/store.py | 19 + codeclone/observability/sqlite_access.py | 52 ++ codeclone/surfaces/cli/analytics.py | 335 ++++++++++++ codeclone/surfaces/cli/workflow.py | 4 + .../surfaces/mcp/_workspace_intent_schema.py | 19 +- docs/README-pypi.md | 6 +- docs/book/02-architecture-map.md | 2 + docs/book/10-config-and-defaults.md | 15 + docs/book/11-cli.md | 8 + docs/book/24-compatibility-and-versioning.md | 8 + docs/book/27-corpus-analytics.md | 129 +++++ docs/book/README.md | 1 + docs/book/appendix/b-schema-layouts.md | 20 + docs/guide/README.md | 1 + docs/guide/analytics/overview.md | 67 +++ docs/index.md | 1 + pyproject.toml | 16 + tests/fixtures/analytics/helpers.py | 74 +++ tests/test_analytics_foundation.py | 186 +++++++ tests/test_analytics_integration.py | 228 ++++++++ tests/test_analytics_trajectory_selection.py | 104 ++++ tests/test_architecture.py | 26 +- tests/test_config_analytics.py | 55 ++ tests/test_sqlite_readonly_openers.py | 49 +- uv.lock | 422 ++++++++++++++- zensical.toml | 2 + 67 files changed, 5995 insertions(+), 50 deletions(-) create mode 100644 codeclone/analytics/__init__.py create mode 100644 codeclone/analytics/agent_labels.py create mode 100644 codeclone/analytics/capabilities.py create mode 100644 codeclone/analytics/clustering/__init__.py create mode 100644 codeclone/analytics/clustering/canonicalize.py create mode 100644 codeclone/analytics/clustering/diagnostics.py create mode 100644 codeclone/analytics/clustering/models.py create mode 100644 codeclone/analytics/clustering/pipeline.py create mode 100644 codeclone/analytics/clustering/sweep.py create mode 100644 codeclone/analytics/contracts.py create mode 100644 codeclone/analytics/corpus/__init__.py create mode 100644 codeclone/analytics/corpus/adapters/__init__.py create mode 100644 codeclone/analytics/corpus/adapters/intent_historical.py create mode 100644 codeclone/analytics/corpus/keys.py create mode 100644 codeclone/analytics/corpus/normalizer.py create mode 100644 codeclone/analytics/corpus/registry_overlay.py create mode 100644 codeclone/analytics/corpus/representations/__init__.py create mode 100644 codeclone/analytics/corpus/representations/intent.py create mode 100644 codeclone/analytics/corpus/snapshot.py create mode 100644 codeclone/analytics/corpus/trajectory_selection.py create mode 100644 codeclone/analytics/embedding/__init__.py create mode 100644 codeclone/analytics/embedding/generation.py create mode 100644 codeclone/analytics/exceptions.py create mode 100644 codeclone/analytics/export/__init__.py create mode 100644 codeclone/analytics/export/json_export.py create mode 100644 codeclone/analytics/report/__init__.py create mode 100644 codeclone/analytics/report/html.py create mode 100644 codeclone/analytics/schema.py create mode 100644 codeclone/analytics/store/__init__.py create mode 100644 codeclone/analytics/store/protocols.py create mode 100644 codeclone/analytics/store/sqlite.py create mode 100644 codeclone/analytics/store/vectors_lancedb.py create mode 100644 codeclone/analytics/workflow.py create mode 100644 codeclone/config/analytics.py create mode 100644 codeclone/config/analytics_specs.py create mode 100644 codeclone/observability/sqlite_access.py create mode 100644 codeclone/surfaces/cli/analytics.py create mode 100644 docs/book/27-corpus-analytics.md create mode 100644 docs/guide/analytics/overview.md create mode 100644 tests/fixtures/analytics/helpers.py create mode 100644 tests/test_analytics_foundation.py create mode 100644 tests/test_analytics_integration.py create mode 100644 tests/test_analytics_trajectory_selection.py create mode 100644 tests/test_config_analytics.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 50184578..d96bd09f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,6 +70,12 @@ Added * MCP schemas now include parameter-level descriptions and deterministic next_tool guidance. Workspace hygiene warnings, audit events, token-budget tracking, and documentation-contract linting were also added. +* **Corpus Analytics (intent lane, Slice 1).** Optional offline clustering of + historical change-control intents via `codeclone analytics corpus …`. + Requires `codeclone[analytics]`. Reads audit + trajectory (+ optional registry + overlay), writes SQLite/LanceDB artifacts under `.codeclone/analytics/`, and + exports JSON/HTML. Separate embedding contract from Engineering Memory semantic + index; `[tool.codeclone.analytics]` configures paths and clustering defaults. Changed diff --git a/codeclone/analytics/__init__.py b/codeclone/analytics/__init__.py new file mode 100644 index 00000000..e4875c65 --- /dev/null +++ b/codeclone/analytics/__init__.py @@ -0,0 +1,11 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Corpus analytics — derived clustering over intent historical evidence.""" + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/codeclone/analytics/agent_labels.py b/codeclone/analytics/agent_labels.py new file mode 100644 index 00000000..f5d96d74 --- /dev/null +++ b/codeclone/analytics/agent_labels.py @@ -0,0 +1,47 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence + +from ..contracts import CORPUS_AGENT_LABEL_CONTRACT_VERSION + +_AGENT_FAMILY_RULES: tuple[tuple[str, str], ...] = ( + ("cursor-", "cursor"), + ("claude-", "claude"), + ("codex-", "codex"), + ("vscode-", "vscode"), + ("mcp-client", "mcp"), +) + + +def map_agent_family(agent_client_raw: str | None) -> str: + """Map raw agent client label to a deterministic agent family string.""" + if not agent_client_raw: + return "unknown" + normalized = agent_client_raw.strip().lower() + if not normalized: + return "unknown" + for prefix, family in _AGENT_FAMILY_RULES: + if normalized.startswith(prefix) or prefix in normalized: + return family + return "unknown" + + +def agent_label_contract_version() -> str: + return CORPUS_AGENT_LABEL_CONTRACT_VERSION + + +def agent_family_rules() -> Sequence[tuple[str, str]]: + return _AGENT_FAMILY_RULES + + +__all__ = [ + "agent_family_rules", + "agent_label_contract_version", + "map_agent_family", +] diff --git a/codeclone/analytics/capabilities.py b/codeclone/analytics/capabilities.py new file mode 100644 index 00000000..2b42bbbb --- /dev/null +++ b/codeclone/analytics/capabilities.py @@ -0,0 +1,63 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import importlib +from dataclasses import dataclass +from typing import Literal + +AnalyticsCapability = Literal["base", "embed", "cluster", "full"] + + +@dataclass(frozen=True, slots=True) +class CapabilityStatus: + available: bool + missing_packages: tuple[str, ...] + + +def _package_available(name: str) -> bool: + try: + importlib.import_module(name) + except ImportError: + return False + return True + + +def check_capability(capability: AnalyticsCapability) -> CapabilityStatus: + if capability == "base": + return CapabilityStatus(available=True, missing_packages=()) + missing: list[str] = [] + if capability in {"embed", "full"}: + missing.extend( + package + for package in ("fastembed", "lancedb") + if not _package_available(package) + ) + if capability in {"cluster", "full"}: + missing.extend( + package + for package in ("sklearn", "hdbscan") + if not _package_available(package) + ) + return CapabilityStatus( + available=not missing, + missing_packages=tuple(sorted(set(missing))), + ) + + +def install_hint(missing_packages: tuple[str, ...]) -> str: + if not missing_packages: + return "uv sync --extra analytics" + return "uv sync --extra analytics" + + +__all__ = [ + "AnalyticsCapability", + "CapabilityStatus", + "check_capability", + "install_hint", +] diff --git a/codeclone/analytics/clustering/__init__.py b/codeclone/analytics/clustering/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/clustering/canonicalize.py b/codeclone/analytics/clustering/canonicalize.py new file mode 100644 index 00000000..ec344e94 --- /dev/null +++ b/codeclone/analytics/clustering/canonicalize.py @@ -0,0 +1,105 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import math +from collections.abc import Sequence + +from ..corpus.keys import membership_digest +from .models import NOISE_LABEL, ClusterPartition + + +def canonicalize_partitions( + partitions: Sequence[ClusterPartition], +) -> tuple[ClusterPartition, ...]: + """Assign display order: size desc, medoid asc, membership_digest asc.""" + non_noise = [part for part in partitions if part.cluster_label != NOISE_LABEL] + noise = [part for part in partitions if part.cluster_label == NOISE_LABEL] + non_noise.sort( + key=lambda part: ( + -len(part.snapshot_item_ids), + part.snapshot_item_ids[0] if part.snapshot_item_ids else "", + part.membership_digest, + ) + ) + canonical: list[ClusterPartition] = [] + for _display_id, part in enumerate(non_noise, start=1): + canonical.append( + ClusterPartition( + cluster_label=part.cluster_label, + snapshot_item_ids=part.snapshot_item_ids, + membership_digest=part.membership_digest, + ) + ) + canonical.extend(noise) + return tuple(canonical) + + +def display_cluster_id_map( + partitions: Sequence[ClusterPartition], +) -> dict[int, int | None]: + mapping: dict[int, int | None] = {} + display = 1 + for part in partitions: + if part.cluster_label == NOISE_LABEL: + mapping[part.cluster_label] = None + continue + mapping[part.cluster_label] = display + display += 1 + return mapping + + +def medoid_item_id( + *, + member_ids: Sequence[str], + coordinates: dict[str, tuple[float, ...]], +) -> str: + if not member_ids: + return "" + if len(member_ids) == 1: + return member_ids[0] + + def average_distance(item_id: str) -> float: + anchor = coordinates.get(item_id) + if anchor is None: + return float("inf") + total = 0.0 + count = 0 + for other_id in member_ids: + if other_id == item_id: + continue + other = coordinates.get(other_id) + if other is None: + continue + total += _euclidean(anchor, other) + count += 1 + return total / count if count else float("inf") + + return min(member_ids, key=lambda item_id: (average_distance(item_id), item_id)) + + +def _euclidean(left: Sequence[float], right: Sequence[float]) -> float: + return math.sqrt(float(sum((a - b) ** 2 for a, b in zip(left, right, strict=True)))) + + +def partition_membership_map( + partitions: Sequence[ClusterPartition], +) -> dict[str, str]: + mapping: dict[str, str] = {} + for part in partitions: + digest = membership_digest(list(part.snapshot_item_ids)) + for item_id in part.snapshot_item_ids: + mapping[item_id] = digest + return mapping + + +__all__ = [ + "canonicalize_partitions", + "display_cluster_id_map", + "medoid_item_id", + "partition_membership_map", +] diff --git a/codeclone/analytics/clustering/diagnostics.py b/codeclone/analytics/clustering/diagnostics.py new file mode 100644 index 00000000..2f2596d7 --- /dev/null +++ b/codeclone/analytics/clustering/diagnostics.py @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import math +import re +from collections import Counter +from collections.abc import Mapping, Sequence +from dataclasses import dataclass + +from ..contracts import CorpusItemRecord +from .canonicalize import medoid_item_id +from .models import NOISE_LABEL, ClusterPartition + + +@dataclass(frozen=True, slots=True) +class CorrelationCell: + numerator: int + denominator: int + rate: float | None + insufficient_sample: bool + + +@dataclass(frozen=True, slots=True) +class NoiseExplorerFlags: + short_text: bool + long_text: bool + multiple_paragraphs: bool + high_conjunction_count: bool + template_match: bool + low_membership_strength: bool + + +def cluster_size_percent(size: int, total: int) -> float: + if total <= 0: + return 0.0 + return (size / total) * 100.0 + + +def metadata_distribution( + items: Sequence[CorpusItemRecord], + *, + field: str, +) -> dict[str, CorrelationCell]: + counts: Counter[str] = Counter() + for item in items: + payload = _metadata_object(item.metadata_json) + value = payload.get(field) + key = str(value) if value is not None else "null" + counts[key] += 1 + total = len(items) + return { + key: _cell(count, total, min_sample_size=5) + for key, count in sorted(counts.items()) + } + + +def correlation_rate( + *, + numerator: int, + denominator: int, + min_sample_size: int, +) -> CorrelationCell: + return _cell(numerator, denominator, min_sample_size=min_sample_size) + + +def build_cluster_diagnostics( + *, + partition: ClusterPartition, + items_by_id: Mapping[str, CorpusItemRecord], + coordinates: Mapping[str, tuple[float, ...]], + membership_strengths: Mapping[str, float | None], + total_items: int, + min_correlation_sample_size: int, +) -> dict[str, object]: + member_items = [ + items_by_id[item_id] + for item_id in partition.snapshot_item_ids + if item_id in items_by_id + ] + size = len(member_items) + medoid = medoid_item_id( + member_ids=partition.snapshot_item_ids, + coordinates=dict(coordinates), + ) + strengths = [ + membership_strengths.get(item_id) for item_id in partition.snapshot_item_ids + ] + avg_strength = _average([value for value in strengths if value is not None]) + metadata_fields = ( + "agent_family", + "outcome", + "quality_tier", + "scope_check_status", + "verification_status", + ) + distributions = { + field: { + key: { + "numerator": cell.numerator, + "denominator": cell.denominator, + "rate": cell.rate, + "insufficient_sample": cell.insufficient_sample, + } + for key, cell in metadata_distribution( + member_items, + field=field, + ).items() + } + for field in metadata_fields + } + return { + "cluster_label": partition.cluster_label, + "membership_digest": partition.membership_digest, + "size": size, + "size_percent": cluster_size_percent(size, total_items), + "medoid_snapshot_item_id": medoid, + "average_membership_strength": avg_strength, + "representatives": list(partition.snapshot_item_ids[:5]), + "metadata_distributions": distributions, + "min_correlation_sample_size": min_correlation_sample_size, + } + + +def noise_explorer_flags( + *, + item: CorpusItemRecord, + membership_strength: float | None, + strength_threshold: float = 0.2, +) -> NoiseExplorerFlags: + text = item.normalized_text + conjunctions = len(re.findall(r"\b(and|or|but|while|whereas)\b", text, re.I)) + return NoiseExplorerFlags( + short_text=len(text) < 40, + long_text=len(text) > 800, + multiple_paragraphs=text.count("\n\n") >= 2, + high_conjunction_count=conjunctions >= 4, + template_match=text.startswith("<"), + low_membership_strength=( + membership_strength is not None and membership_strength < strength_threshold + ), + ) + + +def nearest_cluster_ids( + *, + cluster_label: int, + centroids: Mapping[int, tuple[float, ...]], + limit: int = 3, +) -> tuple[int, ...]: + origin = centroids.get(cluster_label) + if origin is None: + return () + distances: list[tuple[float, int]] = [] + for label, centroid in centroids.items(): + if label in (cluster_label, NOISE_LABEL): + continue + distances.append((_euclidean(origin, centroid), label)) + distances.sort(key=lambda item: (item[0], item[1])) + return tuple(label for _distance, label in distances[:limit]) + + +def compute_centroids( + *, + partitions: Sequence[ClusterPartition], + coordinates: Mapping[str, tuple[float, ...]], +) -> dict[int, tuple[float, ...]]: + centroids: dict[int, tuple[float, ...]] = {} + for partition in partitions: + if partition.cluster_label == NOISE_LABEL: + continue + vectors = [ + coordinates[item_id] + for item_id in partition.snapshot_item_ids + if item_id in coordinates + ] + if not vectors: + continue + dim = len(vectors[0]) + sums = [0.0] * dim + for vector in vectors: + for index, value in enumerate(vector): + sums[index] += value + count = float(len(vectors)) + centroids[partition.cluster_label] = tuple(value / count for value in sums) + return centroids + + +def _metadata_object(text: str) -> dict[str, object]: + try: + parsed = json.loads(text) + except json.JSONDecodeError: + return {} + return parsed if isinstance(parsed, dict) else {} + + +def _cell(numerator: int, denominator: int, *, min_sample_size: int) -> CorrelationCell: + insufficient = denominator < min_sample_size + rate = (numerator / denominator) if denominator and not insufficient else None + return CorrelationCell( + numerator=numerator, + denominator=denominator, + rate=rate, + insufficient_sample=insufficient, + ) + + +def _average(values: Sequence[float]) -> float | None: + if not values: + return None + return sum(values) / len(values) + + +def _euclidean(left: Sequence[float], right: Sequence[float]) -> float: + return math.sqrt(float(sum((a - b) ** 2 for a, b in zip(left, right, strict=True)))) + + +__all__ = [ + "CorrelationCell", + "NoiseExplorerFlags", + "build_cluster_diagnostics", + "cluster_size_percent", + "compute_centroids", + "correlation_rate", + "metadata_distribution", + "nearest_cluster_ids", + "noise_explorer_flags", +] diff --git a/codeclone/analytics/clustering/models.py b/codeclone/analytics/clustering/models.py new file mode 100644 index 00000000..6e4e276d --- /dev/null +++ b/codeclone/analytics/clustering/models.py @@ -0,0 +1,55 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True, slots=True) +class ClusteringParameters: + pca_dimensions: int + min_cluster_size: int + min_samples: int + cluster_selection_method: str + + +@dataclass(frozen=True, slots=True) +class EffectiveClusteringParameters: + pca_dimensions: int + min_cluster_size: int + min_samples: int + cluster_selection_method: str + n_samples: int + n_features: int + + +@dataclass(frozen=True, slots=True) +class ClusterPartition: + cluster_label: int + snapshot_item_ids: tuple[str, ...] + membership_digest: str + + +@dataclass(frozen=True, slots=True) +class ClusteringPipelineResult: + partitions: tuple[ClusterPartition, ...] + labels: tuple[int, ...] + membership_strengths: tuple[float | None, ...] + reduced_coordinates: tuple[tuple[float, ...], ...] + effective_parameters: EffectiveClusteringParameters + + +NOISE_LABEL = -1 + + +__all__ = [ + "NOISE_LABEL", + "ClusterPartition", + "ClusteringParameters", + "ClusteringPipelineResult", + "EffectiveClusteringParameters", +] diff --git a/codeclone/analytics/clustering/pipeline.py b/codeclone/analytics/clustering/pipeline.py new file mode 100644 index 00000000..da57d56b --- /dev/null +++ b/codeclone/analytics/clustering/pipeline.py @@ -0,0 +1,162 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import importlib +import math +import types +from collections.abc import Sequence +from typing import Any + +from ..corpus.keys import membership_digest +from ..exceptions import AnalyticsCapabilityError +from .models import ( + NOISE_LABEL, + ClusteringParameters, + ClusteringPipelineResult, + ClusterPartition, + EffectiveClusteringParameters, +) + + +def resolve_effective_parameters( + requested: ClusteringParameters, + *, + n_samples: int, + n_features: int, +) -> EffectiveClusteringParameters | None: + effective_pca = min(requested.pca_dimensions, n_samples - 1, n_features) + eligible = n_samples + if ( + requested.min_cluster_size > eligible + or requested.min_samples > eligible + or effective_pca < 2 + ): + return None + return EffectiveClusteringParameters( + pca_dimensions=effective_pca, + min_cluster_size=requested.min_cluster_size, + min_samples=requested.min_samples, + cluster_selection_method=requested.cluster_selection_method, + n_samples=n_samples, + n_features=n_features, + ) + + +def _l2_normalize(matrix: list[list[float]]) -> list[list[float]]: + normalized: list[list[float]] = [] + for row in matrix: + norm = math.sqrt(sum(value * value for value in row)) or 1.0 + normalized.append([value / norm for value in row]) + return normalized + + +def _load_sklearn_pca() -> Any: # Any: optional sklearn import boundary + try: + decomposition = importlib.import_module("sklearn.decomposition") + except ImportError as exc: + raise AnalyticsCapabilityError( + "scikit-learn is required for analytics clustering; " + "install with: uv sync --extra analytics" + ) from exc + return decomposition.PCA + + +def _load_hdbscan() -> types.ModuleType: + try: + return importlib.import_module("hdbscan") + except ImportError as exc: + raise AnalyticsCapabilityError( + "hdbscan is required for analytics clustering; " + "install with: uv sync --extra analytics" + ) from exc + + +def run_clustering_pipeline( + *, + snapshot_item_ids: Sequence[str], + embeddings: Sequence[Sequence[float]], + requested: ClusteringParameters, + random_seed: int = 42, +) -> ClusteringPipelineResult | None: + if len(snapshot_item_ids) != len(embeddings): + msg = "snapshot_item_ids and embeddings length mismatch" + raise ValueError(msg) + if not snapshot_item_ids: + return None + n_samples = len(snapshot_item_ids) + n_features = len(embeddings[0]) if embeddings else 0 + effective = resolve_effective_parameters( + requested, + n_samples=n_samples, + n_features=n_features, + ) + if effective is None: + return None + + matrix = _l2_normalize([list(row) for row in embeddings]) + pca_cls = _load_sklearn_pca() + reducer = pca_cls( + n_components=effective.pca_dimensions, + whiten=False, + svd_solver="full", + random_state=random_seed, + ) + reduced = reducer.fit_transform(matrix) + reduced_rows = [tuple(float(value) for value in row) for row in reduced.tolist()] + + hdbscan = _load_hdbscan() + clusterer = hdbscan.HDBSCAN( + min_cluster_size=effective.min_cluster_size, + min_samples=effective.min_samples, + metric="euclidean", + cluster_selection_method=effective.cluster_selection_method, + core_dist_n_jobs=1, + ) + labels_raw = clusterer.fit_predict(reduced) + labels = tuple(int(value) for value in labels_raw.tolist()) + probabilities = getattr(clusterer, "probabilities_", None) + if probabilities is not None: + strengths: list[float | None] = [ + float(value) for value in probabilities.tolist() + ] + else: + strengths = [None for _ in labels] + + by_label: dict[int, list[str]] = {} + for item_id, label in zip(snapshot_item_ids, labels, strict=True): + by_label.setdefault(label, []).append(item_id) + + partitions: list[ClusterPartition] = [] + for label, members in sorted(by_label.items()): + ordered = sorted(members) + partitions.append( + ClusterPartition( + cluster_label=label, + snapshot_item_ids=tuple(ordered), + membership_digest=membership_digest(ordered), + ) + ) + + return ClusteringPipelineResult( + partitions=tuple(partitions), + labels=labels, + membership_strengths=tuple(strengths), + reduced_coordinates=tuple(reduced_rows), + effective_parameters=effective, + ) + + +def is_noise_label(label: int) -> bool: + return label == NOISE_LABEL + + +__all__ = [ + "is_noise_label", + "resolve_effective_parameters", + "run_clustering_pipeline", +] diff --git a/codeclone/analytics/clustering/sweep.py b/codeclone/analytics/clustering/sweep.py new file mode 100644 index 00000000..3ea3153f --- /dev/null +++ b/codeclone/analytics/clustering/sweep.py @@ -0,0 +1,148 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass + +from ...utils.json_io import json_text +from ..corpus.keys import sha256_hex +from .models import ClusteringParameters, EffectiveClusteringParameters +from .pipeline import resolve_effective_parameters + +SWEEP_PCA_DIMENSIONS = (32, 64, 128) +SWEEP_MIN_CLUSTER_SIZES = (5, 8, 12, 15) +SWEEP_MIN_SAMPLES = (1, 3, 5) +SWEEP_SELECTION_METHODS = ("eom", "leaf") + + +@dataclass(frozen=True, slots=True) +class SweepCandidate: + requested: ClusteringParameters + effective: EffectiveClusteringParameters + dedupe_key: str + + +@dataclass(frozen=True, slots=True) +class SweepCandidateResult: + candidate: SweepCandidate + score: float + cluster_count: int + noise_fraction: float + + +def iter_sweep_candidates( + *, + n_samples: int, + n_features: int, +) -> tuple[SweepCandidate, ...]: + seen: set[str] = set() + candidates: list[SweepCandidate] = [] + for pca_dimensions in SWEEP_PCA_DIMENSIONS: + for min_cluster_size in SWEEP_MIN_CLUSTER_SIZES: + for min_samples in SWEEP_MIN_SAMPLES: + for method in SWEEP_SELECTION_METHODS: + requested = ClusteringParameters( + pca_dimensions=pca_dimensions, + min_cluster_size=min_cluster_size, + min_samples=min_samples, + cluster_selection_method=method, + ) + effective = resolve_effective_parameters( + requested, + n_samples=n_samples, + n_features=n_features, + ) + if effective is None: + continue + dedupe_key = json_text( + { + "pca_dimensions": effective.pca_dimensions, + "min_cluster_size": effective.min_cluster_size, + "min_samples": effective.min_samples, + "cluster_selection_method": ( + effective.cluster_selection_method + ), + }, + sort_keys=True, + ) + if dedupe_key in seen: + continue + seen.add(dedupe_key) + candidates.append( + SweepCandidate( + requested=requested, + effective=effective, + dedupe_key=dedupe_key, + ) + ) + return tuple(candidates) + + +def rank_sweep_results( + results: Sequence[SweepCandidateResult], +) -> SweepCandidateResult | None: + if not results: + return None + return min( + results, + key=lambda item: ( + -item.score, + item.candidate.effective.pca_dimensions, + item.candidate.effective.min_cluster_size, + item.candidate.effective.min_samples, + item.candidate.effective.cluster_selection_method, + ), + ) + + +def score_clustering_result( + *, + cluster_count: int, + noise_fraction: float, + n_samples: int, +) -> float: + if n_samples == 0: + return 0.0 + cluster_bonus = min(cluster_count, 12) / 12.0 + noise_penalty = noise_fraction + return cluster_bonus - noise_penalty + + +def run_digest( + *, + snapshot_id: str, + embedding_generation_id: str, + effective: EffectiveClusteringParameters, + random_seed: int, +) -> str: + payload = { + "snapshot_id": snapshot_id, + "embedding_generation_id": embedding_generation_id, + "effective_parameters": { + "pca_dimensions": effective.pca_dimensions, + "min_cluster_size": effective.min_cluster_size, + "min_samples": effective.min_samples, + "cluster_selection_method": effective.cluster_selection_method, + }, + "random_seed": random_seed, + } + return sha256_hex(json_text(payload, sort_keys=True)) + + +__all__ = [ + "SWEEP_MIN_CLUSTER_SIZES", + "SWEEP_MIN_SAMPLES", + "SWEEP_PCA_DIMENSIONS", + "SWEEP_SELECTION_METHODS", + "SweepCandidate", + "SweepCandidateResult", + "iter_sweep_candidates", + "rank_sweep_results", + "run_digest", + "score_clustering_result", +] diff --git a/codeclone/analytics/contracts.py b/codeclone/analytics/contracts.py new file mode 100644 index 00000000..966caf09 --- /dev/null +++ b/codeclone/analytics/contracts.py @@ -0,0 +1,126 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + +ClusteringRunStatus = Literal["pending", "running", "completed", "failed"] +IntentRepresentationKind = Literal[ + "intent.description.v1", + "intent.description_with_frame.v1", +] +CorpusLane = Literal["intent"] + +INTENT_REPRESENTATION_DESCRIPTION = "intent.description.v1" +INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME = "intent.description_with_frame.v1" + + +@dataclass(frozen=True, slots=True) +class CorpusItemRecord: + snapshot_id: str + representation_key: str + snapshot_item_id: str + source_record_key: str + project_id: str + intent_id: str + normalized_text: str + normalized_digest: str + normalizer_version: str + representation_digest: str + metadata_json: str + registry_overlay_json: str | None + + +@dataclass(frozen=True, slots=True) +class CorpusSnapshotRecord: + snapshot_id: str + lane: CorpusLane + representation_kind: str + representation_version: str + source_stores_json: str + source_schema_versions_json: str + record_count: int + source_digest: str + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class EmbeddingGenerationRecord: + embedding_generation_id: str + provider_id: str + provider_package_version: str + model_id: str + model_revision: str | None + model_artifact_fingerprint: str | None + exact_model_artifact_reproducibility: bool + dimensions: int + embedding_contract_version: str + embedding_similarity_metric: str + vector_preprocessing: str + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class EmbeddingItemRecord: + embedding_generation_id: str + snapshot_item_id: str + vector_row_key: str + vector_digest: str + dimensions: int + + +@dataclass(frozen=True, slots=True) +class ClusteringRunRecord: + clustering_run_id: str + snapshot_id: str + embedding_generation_id: str + requested_parameters_json: str + effective_parameters_json: str + random_seed: int + run_digest: str + recommended_by_heuristic: bool + selected_by_maintainer: bool + status: ClusteringRunStatus + created_at_utc: str + finished_at_utc: str | None + error_message: str | None + + +@dataclass(frozen=True, slots=True) +class ClusterAssignmentRecord: + clustering_run_id: str + snapshot_item_id: str + cluster_label: int + membership_strength: float | None + membership_digest: str + + +@dataclass(frozen=True, slots=True) +class ClusterSummaryRecord: + clustering_run_id: str + cluster_label: int + display_cluster_id: int | None + membership_digest: str + size: int + diagnostics_json: str + + +__all__ = [ + "INTENT_REPRESENTATION_DESCRIPTION", + "INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME", + "ClusterAssignmentRecord", + "ClusterSummaryRecord", + "ClusteringRunRecord", + "ClusteringRunStatus", + "CorpusItemRecord", + "CorpusLane", + "CorpusSnapshotRecord", + "EmbeddingGenerationRecord", + "EmbeddingItemRecord", + "IntentRepresentationKind", +] diff --git a/codeclone/analytics/corpus/__init__.py b/codeclone/analytics/corpus/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/corpus/adapters/__init__.py b/codeclone/analytics/corpus/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/corpus/adapters/intent_historical.py b/codeclone/analytics/corpus/adapters/intent_historical.py new file mode 100644 index 00000000..d56025e1 --- /dev/null +++ b/codeclone/analytics/corpus/adapters/intent_historical.py @@ -0,0 +1,428 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import sqlite3 +from collections import defaultdict +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from pathlib import Path + +from ....audit.events import repo_root_digest +from ....audit.reader import AuditRecord, read_intent_declared_records +from ....audit.validation import DEFAULT_AUDIT_PATH +from ....config.intent_registry import ( + IntentRegistryConfigError, + resolve_intent_registry_config, +) +from ....contracts import ( + CORPUS_NORMALIZER_VERSION, + ENGINEERING_MEMORY_SCHEMA_VERSION, + PATCH_TRAIL_SCHEMA_VERSION, +) +from ....memory.project import compute_project_id, resolve_memory_db_path +from ....memory.schema import open_memory_db_readonly +from ....memory.trajectory.agents import trajectory_agent_label +from ....memory.trajectory.anomalies import detect_trajectory_anomalies +from ....memory.trajectory.models import Trajectory +from ....memory.trajectory.patch_trail import patch_trail_from_mapping +from ....memory.trajectory.store import ( + list_trajectories_for_intent_id, + load_trajectory_patch_trail, +) +from ....utils.json_io import json_text +from ...agent_labels import map_agent_family +from ..keys import ( + representation_key, + representation_version_for_kind, + sha256_hex, + snapshot_item_id, + source_record_key, +) +from ..normalizer import normalize_corpus_text, source_content_digest +from ..registry_overlay import read_registry_overlay +from ..representations.intent import ( + IntentRepresentationInput, + build_representation_text, + declared_constraints_from_audit_payload, + declared_path_families_from_patch_trail, +) +from ..representations.intent import ( + representation_digest as compute_representation_digest, +) +from ..trajectory_selection import ( + TRAJECTORY_SELECTION_RULE_VERSION, + scope_expanded_from_labels, + select_trajectory_for_intent, +) + + +@dataclass(frozen=True, slots=True) +class HistoricalIntentSourceItem: + project_id: str + intent_id: str + source_record_key_value: str + source_content_digest: str + provenance: dict[str, object] + metadata: dict[str, object] + registry_overlay: dict[str, object] | None + representation_input: IntentRepresentationInput + + +@dataclass(frozen=True, slots=True) +class SourceDigestItem: + source_record_key: str + source_content_digest: str + provenance_digest: str + + +def _payload_mapping(record: AuditRecord) -> dict[str, object]: + if record.payload_json: + try: + parsed = json.loads(record.payload_json) + if isinstance(parsed, dict): + return parsed + except json.JSONDecodeError: + return {} + if record.event_core_json: + try: + parsed = json.loads(record.event_core_json) + if isinstance(parsed, dict): + return parsed + except json.JSONDecodeError: + return {} + return {} + + +def _intent_description(payload: Mapping[str, object]) -> str: + value = payload.get("intent_description") + if isinstance(value, str): + return value + return "" + + +def _intent_kind(payload: Mapping[str, object]) -> str | None: + value = payload.get("intent_kind") + if isinstance(value, str) and value.strip(): + return value.strip() + return None + + +def _canonical_declaration( + records: Sequence[AuditRecord], +) -> tuple[AuditRecord, bool, tuple[str, ...]]: + ordered = sorted( + records, + key=lambda item: (item.audit_sequence or 0, item.event_id), + ) + canonical = ordered[0] + descriptions = { + _intent_description(_payload_mapping(item)).strip() + for item in ordered + if _intent_description(_payload_mapping(item)).strip() + } + description_conflict = len(descriptions) > 1 + declaration_event_ids = tuple(item.event_id for item in ordered) + return canonical, description_conflict, declaration_event_ids + + +def _resolved_registry_db_path( + root_path: Path, + registry_db_path: Path | None, +) -> Path | None: + if registry_db_path is not None: + return registry_db_path + try: + config = resolve_intent_registry_config(root_path) + except (IntentRegistryConfigError, OSError, ValueError): + return None + if config.backend != "sqlite": + return None + return config.storage_path + + +def extract_historical_intent_items( + *, + root_path: Path, + representation_kind: str, + audit_db_path: Path | None = None, + memory_db_path: Path | None = None, + registry_db_path: Path | None = None, +) -> tuple[HistoricalIntentSourceItem, ...]: + resolved_root = root_path.resolve() + project_id = compute_project_id(resolved_root) + digest = repo_root_digest(resolved_root) + audit_path = audit_db_path or (resolved_root / DEFAULT_AUDIT_PATH) + records = read_intent_declared_records( + db_path=audit_path, + repo_root_digest=digest, + ) + grouped: defaultdict[tuple[str, str], list[AuditRecord]] = defaultdict(list) + for record in records: + intent_id = record.intent_id + if not intent_id: + continue + grouped[(project_id, intent_id)].append(record) + + memory_path = memory_db_path or resolve_memory_db_path(resolved_root) + memory_conn = ( + open_memory_db_readonly(memory_path) if memory_path.is_file() else None + ) + if memory_conn is not None: + memory_conn.row_factory = sqlite3.Row + + resolved_registry_db = _resolved_registry_db_path( + resolved_root, + registry_db_path, + ) + + items: list[HistoricalIntentSourceItem] = [] + try: + for (group_project_id, intent_id), group_records in sorted(grouped.items()): + canonical, description_conflict, declaration_event_ids = ( + _canonical_declaration(group_records) + ) + payload = _payload_mapping(canonical) + description = _intent_description(payload) + if not description.strip(): + continue + trajectories: tuple[Trajectory, ...] = () + patch_trail_payload: dict[str, object] | None = None + selected_trajectory = None + discarded_ids: tuple[str, ...] = () + if memory_conn is not None: + trajectories = list_trajectories_for_intent_id( + memory_conn, + project_id=group_project_id, + intent_id=intent_id, + ) + selection = select_trajectory_for_intent(trajectories) + selected_trajectory = selection.selected + discarded_ids = selection.discarded_ids + if selected_trajectory is not None: + patch_trail_payload = load_trajectory_patch_trail( + memory_conn, + trajectory_id=selected_trajectory.id, + ) + + patch_trail_digest: str | None = None + if patch_trail_payload is not None: + trail = patch_trail_from_mapping(patch_trail_payload) + if trail is not None: + patch_trail_digest = trail.patch_trail_digest + + provenance: dict[str, object] = { + "description": { + "source": "audit", + "event_id": canonical.event_id, + "audit_sequence": canonical.audit_sequence, + "duplicate_declaration_count": len(group_records), + "description_conflict": description_conflict, + "declaration_event_ids": list(declaration_event_ids), + }, + "trajectory": { + "selected_trajectory_id": ( + selected_trajectory.id if selected_trajectory else None + ), + "discarded_trajectory_ids": list(discarded_ids), + "selection_rule_version": TRAJECTORY_SELECTION_RULE_VERSION, + }, + "patch_trail": { + "source": "patch_trail", + "digest": patch_trail_digest, + }, + "registry_overlay": {"present": False}, + } + + metadata: dict[str, object] = {} + agent_raw: str | None = None + if selected_trajectory is not None: + agent_raw = trajectory_agent_label(selected_trajectory) + metadata["outcome"] = selected_trajectory.outcome + metadata["quality_tier"] = selected_trajectory.quality_tier + metadata["finished_at_utc"] = selected_trajectory.finished_at_utc + metadata["scope_expanded"] = scope_expanded_from_labels( + selected_trajectory.labels + ) + anomalies = detect_trajectory_anomalies( + selected_trajectory, + patch_trail_payload=patch_trail_payload, + ) + metadata["anomaly_kinds"] = sorted({item.kind for item in anomalies}) + elif canonical.agent_label.strip(): + agent_raw = canonical.agent_label.strip() + + metadata["agent_client_raw"] = agent_raw + metadata["agent_family"] = map_agent_family(agent_raw) + + if patch_trail_payload is not None: + trail = patch_trail_from_mapping(patch_trail_payload) + if trail is not None: + metadata["scope_check_status"] = trail.scope_check_status + metadata["verification_status"] = trail.verification_status + metadata["declared_file_count"] = len(trail.declared_files) + metadata["changed_file_count"] = len(trail.changed_files) + + registry_overlay = ( + read_registry_overlay(resolved_registry_db, intent_id=intent_id) + if resolved_registry_db is not None + else None + ) + if registry_overlay is not None: + provenance["registry_overlay"] = {"present": True} + + rep_input = IntentRepresentationInput( + description=description, + intent_kind=_intent_kind(payload), + declared_path_families=declared_path_families_from_patch_trail( + patch_trail_payload + ), + declared_constraints=declared_constraints_from_audit_payload(payload), + ) + + items.append( + HistoricalIntentSourceItem( + project_id=group_project_id, + intent_id=intent_id, + source_record_key_value=source_record_key( + project_id=group_project_id, + intent_id=intent_id, + ), + source_content_digest=source_content_digest(description), + provenance=provenance, + metadata=metadata, + registry_overlay=registry_overlay, + representation_input=rep_input, + ) + ) + finally: + if memory_conn is not None: + memory_conn.close() + + return tuple(items) + + +def build_source_digest_items( + items: Sequence[HistoricalIntentSourceItem], + *, + lane: str, + representation_kind: str, +) -> tuple[SourceDigestItem, ...]: + digest_items: list[SourceDigestItem] = [] + for item in items: + provenance_digest = sha256_hex(json_text(item.provenance, sort_keys=True)) + digest_items.append( + SourceDigestItem( + source_record_key=item.source_record_key_value, + source_content_digest=item.source_content_digest, + provenance_digest=provenance_digest, + ) + ) + return tuple(sorted(digest_items, key=lambda entry: entry.source_record_key)) + + +def compute_source_digest( + *, + items: Sequence[HistoricalIntentSourceItem], + lane: str, + representation_kind: str, + representation_version: str, + source_schema_versions: Mapping[str, str], +) -> str: + digest_items = build_source_digest_items( + items, + lane=lane, + representation_kind=representation_kind, + ) + payload = { + "source_schema_versions": dict(sorted(source_schema_versions.items())), + "lane": lane, + "representation_kind": representation_kind, + "representation_version": representation_version, + "normalizer_version": CORPUS_NORMALIZER_VERSION, + "items": [ + { + "source_record_key": entry.source_record_key, + "source_content_digest": entry.source_content_digest, + "provenance_digest": entry.provenance_digest, + } + for entry in digest_items + ], + } + return sha256_hex(json_text(payload, sort_keys=True)) + + +def materialize_corpus_item( + *, + snapshot_id: str, + lane: str, + representation_kind: str, + item: HistoricalIntentSourceItem, +) -> tuple[str, str, str, str, str, str, str, str, str | None, str]: + rep_version = representation_version_for_kind(representation_kind) + source_key = item.source_record_key_value + rep_key = representation_key( + lane=lane, + representation_kind=representation_kind, + representation_version=rep_version, + source_record_key_value=source_key, + ) + snap_item_id = snapshot_item_id( + snapshot_id=snapshot_id, + representation_key_value=rep_key, + ) + normalized = normalize_corpus_text( + build_representation_text( + representation_kind=representation_kind, + payload=item.representation_input, + ) + ) + if not normalized.text: + msg = "normalized representation text is empty" + raise ValueError(msg) + rep_digest = compute_representation_digest( + representation_kind=representation_kind, + normalized_text=normalized.text, + ) + metadata_json = json_text(item.metadata, sort_keys=True) + overlay_json = ( + json_text(item.registry_overlay, sort_keys=True) + if item.registry_overlay is not None + else None + ) + return ( + rep_key, + snap_item_id, + source_key, + normalized.text, + normalized.digest, + normalized.normalizer_version, + rep_digest, + metadata_json, + overlay_json, + rep_version, + ) + + +def default_source_schema_versions() -> dict[str, str]: + return { + "audit": "4", + "memory": ENGINEERING_MEMORY_SCHEMA_VERSION, + "patch_trail": PATCH_TRAIL_SCHEMA_VERSION, + } + + +__all__ = [ + "HistoricalIntentSourceItem", + "SourceDigestItem", + "build_source_digest_items", + "compute_source_digest", + "default_source_schema_versions", + "extract_historical_intent_items", + "materialize_corpus_item", +] diff --git a/codeclone/analytics/corpus/keys.py b/codeclone/analytics/corpus/keys.py new file mode 100644 index 00000000..1dbb1b4b --- /dev/null +++ b/codeclone/analytics/corpus/keys.py @@ -0,0 +1,55 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib + +from ...contracts import CORPUS_REPRESENTATION_CONTRACT_VERSION + + +def sha256_hex(text: str) -> str: + return hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def source_record_key(*, project_id: str, intent_id: str) -> str: + return sha256_hex(f"{project_id}\n{intent_id}") + + +def representation_key( + *, + lane: str, + representation_kind: str, + representation_version: str, + source_record_key_value: str, +) -> str: + return sha256_hex( + f"{lane}\n{representation_kind}\n{representation_version}\n" + f"{source_record_key_value}" + ) + + +def snapshot_item_id(*, snapshot_id: str, representation_key_value: str) -> str: + return sha256_hex(f"{snapshot_id}\n{representation_key_value}") + + +def representation_version_for_kind(representation_kind: str) -> str: + return CORPUS_REPRESENTATION_CONTRACT_VERSION + + +def membership_digest(snapshot_item_ids: list[str]) -> str: + ordered = sorted(snapshot_item_ids) + return sha256_hex("\n".join(ordered)) + + +__all__ = [ + "membership_digest", + "representation_key", + "representation_version_for_kind", + "sha256_hex", + "snapshot_item_id", + "source_record_key", +] diff --git a/codeclone/analytics/corpus/normalizer.py b/codeclone/analytics/corpus/normalizer.py new file mode 100644 index 00000000..7bca5a28 --- /dev/null +++ b/codeclone/analytics/corpus/normalizer.py @@ -0,0 +1,72 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import re +import unicodedata +from dataclasses import dataclass + +from ...contracts import CORPUS_NORMALIZER_VERSION +from .keys import sha256_hex + +_DIGEST_PATTERN = re.compile( + r"\b[a-f0-9]{8,64}\b", + re.IGNORECASE, +) +_UUID_PATTERN = re.compile( + r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b", + re.IGNORECASE, +) +_ISO_TIMESTAMP_PATTERN = re.compile( + r"\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z\b" +) +_ABS_PATH_PATTERN = re.compile(r"(?:/[\w./-]+|(?:[A-Za-z]:\\)[\w\\./-]+)") +_TEMPLATE_PREFIXES = ( + "implement ", + "fix ", + "refactor ", + "add ", + "update ", + "validate ", +) + + +@dataclass(frozen=True, slots=True) +class NormalizedText: + text: str + digest: str + normalizer_version: str + + +def normalize_corpus_text(raw: str) -> NormalizedText: + text = unicodedata.normalize("NFC", raw) + text = text.replace("\r\n", "\n").replace("\r", "\n") + text = re.sub(r"[ \t]+", " ", text) + text = re.sub(r"\n{3,}", "\n\n", text) + text = text.strip() + text = _DIGEST_PATTERN.sub("", text) + text = _UUID_PATTERN.sub("", text) + text = _ISO_TIMESTAMP_PATTERN.sub("", text) + text = _ABS_PATH_PATTERN.sub("", text) + lowered = text.lower() + for prefix in _TEMPLATE_PREFIXES: + if lowered.startswith(prefix): + text = text[len(prefix) :].lstrip() + break + digest = sha256_hex(text) + return NormalizedText( + text=text, + digest=digest, + normalizer_version=CORPUS_NORMALIZER_VERSION, + ) + + +def source_content_digest(raw: str) -> str: + return normalize_corpus_text(raw).digest + + +__all__ = ["NormalizedText", "normalize_corpus_text", "source_content_digest"] diff --git a/codeclone/analytics/corpus/registry_overlay.py b/codeclone/analytics/corpus/registry_overlay.py new file mode 100644 index 00000000..ba746458 --- /dev/null +++ b/codeclone/analytics/corpus/registry_overlay.py @@ -0,0 +1,84 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import sqlite3 +from pathlib import Path + +from ...utils.sqlite_store import get_meta_value + +_INTENT_REGISTRY_META_TABLE = "intent_registry_meta" +_SUPPORTED_REGISTRY_SCHEMA_VERSIONS = frozenset({"1", "2"}) + + +def _validate_registry_readonly_schema(conn: sqlite3.Connection) -> None: + version = get_meta_value( + conn, + meta_table=_INTENT_REGISTRY_META_TABLE, + key="schema_version", + ) + if version not in _SUPPORTED_REGISTRY_SCHEMA_VERSIONS: + msg = f"unsupported intent registry schema version: {version!r}" + raise sqlite3.DatabaseError(msg) + + +def read_registry_overlay( + registry_db: Path, + *, + intent_id: str, +) -> dict[str, object] | None: + """Optional live coordination overlay; excluded from corpus digests.""" + + if not registry_db.is_file(): + return None + try: + from ...observability.sqlite_access import open_instrumented_sqlite_db_readonly + + conn = open_instrumented_sqlite_db_readonly( + registry_db, + validate_schema=_validate_registry_readonly_schema, + ) + except (OSError, sqlite3.Error): + return None + try: + row = conn.execute( + """ + SELECT payload_json, declared_at_utc, closed_at_utc + FROM workspace_intents + WHERE intent_id=? + ORDER BY declared_at_utc DESC, agent_pid DESC, intent_id ASC + LIMIT 1 + """, + (intent_id,), + ).fetchone() + except sqlite3.Error: + return None + finally: + conn.close() + if row is None: + return None + payload_json = row[0] + status: str | None = None + if isinstance(payload_json, str): + try: + parsed = json.loads(payload_json) + if isinstance(parsed, dict): + raw_status = parsed.get("status") + if isinstance(raw_status, str): + status = raw_status + except json.JSONDecodeError: + status = None + return { + "present": True, + "status": status, + "declared_at_utc": row[1], + "closed_at_utc": row[2], + } + + +__all__ = ["read_registry_overlay"] diff --git a/codeclone/analytics/corpus/representations/__init__.py b/codeclone/analytics/corpus/representations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/corpus/representations/intent.py b/codeclone/analytics/corpus/representations/intent.py new file mode 100644 index 00000000..7763055a --- /dev/null +++ b/codeclone/analytics/corpus/representations/intent.py @@ -0,0 +1,120 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass + +from ...contracts import ( + INTENT_REPRESENTATION_DESCRIPTION, + INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME, +) +from ..keys import sha256_hex +from ..normalizer import normalize_corpus_text + + +@dataclass(frozen=True, slots=True) +class IntentRepresentationInput: + description: str + intent_kind: str | None + declared_path_families: Sequence[str] + declared_constraints: Sequence[str] + + +def build_intent_description_v1(description: str) -> str: + normalized = normalize_corpus_text(description) + return normalized.text + + +def build_intent_description_with_frame_v1(payload: IntentRepresentationInput) -> str: + normalized_description = normalize_corpus_text(payload.description) + kind = (payload.intent_kind or "").strip() + families = ", ".join(sorted(set(payload.declared_path_families))) + constraints = "; ".join(sorted(set(payload.declared_constraints))) + parts = [ + "DESCRIPTION:", + normalized_description.text, + "INTENT_KIND:", + kind, + "DECLARED_PATH_FAMILIES:", + families, + "DECLARED_CONSTRAINTS:", + constraints, + ] + return "\n".join(parts) + + +def representation_digest(*, representation_kind: str, normalized_text: str) -> str: + return sha256_hex(f"{representation_kind}\n{normalized_text}") + + +def build_representation_text( + *, + representation_kind: str, + payload: IntentRepresentationInput, +) -> str: + if representation_kind == INTENT_REPRESENTATION_DESCRIPTION: + return build_intent_description_v1(payload.description) + if representation_kind == INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME: + return build_intent_description_with_frame_v1(payload) + msg = f"unsupported representation kind: {representation_kind}" + raise ValueError(msg) + + +def declared_path_families_from_patch_trail( + patch_trail: Mapping[str, object] | None, + *, + limit: int = 12, +) -> tuple[str, ...]: + if patch_trail is None: + return () + declared = patch_trail.get("declared_files") + if not isinstance(declared, list): + return () + families: set[str] = set() + for item in declared: + if not isinstance(item, str): + continue + path = item.strip().replace("\\", "/") + while path.startswith("./"): + path = path[2:] + if not path: + continue + top = path.split("/", 1)[0] + if top: + families.add(top) + return tuple(sorted(families)[:limit]) + + +def declared_constraints_from_audit_payload( + payload: Mapping[str, object] | None, +) -> tuple[str, ...]: + if payload is None: + return () + constraints: list[str] = [] + for key in ("verification_profile", "dirty_scope_policy", "on_conflict"): + value = payload.get(key) + if isinstance(value, str) and value.strip(): + constraints.append(f"{key}={value.strip()}") + scope = payload.get("scope") + if isinstance(scope, Mapping): + for scope_key in ("allowed_files", "allowed_related", "forbidden"): + items = scope.get(scope_key) + if isinstance(items, list) and items: + constraints.append(f"scope.{scope_key}_count={len(items)}") + return tuple(sorted(constraints)) + + +__all__ = [ + "IntentRepresentationInput", + "build_intent_description_v1", + "build_intent_description_with_frame_v1", + "build_representation_text", + "declared_constraints_from_audit_payload", + "declared_path_families_from_patch_trail", + "representation_digest", +] diff --git a/codeclone/analytics/corpus/snapshot.py b/codeclone/analytics/corpus/snapshot.py new file mode 100644 index 00000000..182fe0cd --- /dev/null +++ b/codeclone/analytics/corpus/snapshot.py @@ -0,0 +1,131 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import uuid +from pathlib import Path + +from ...audit.validation import DEFAULT_AUDIT_PATH +from ...config.analytics import AnalyticsConfig, resolve_analytics_config +from ...memory.project import compute_project_id, resolve_memory_db_path +from ...report.meta import current_report_timestamp_utc +from ...utils.json_io import json_text +from ..contracts import CorpusItemRecord, CorpusLane, CorpusSnapshotRecord +from ..store.protocols import CorpusStore, SnapshotBuildResult +from ..store.sqlite import SqliteCorpusAnalyticsStore +from .adapters.intent_historical import ( + compute_source_digest, + default_source_schema_versions, + extract_historical_intent_items, + materialize_corpus_item, +) +from .keys import representation_version_for_kind + + +def _relative_store_paths(root_path: Path) -> dict[str, str]: + return { + "audit": DEFAULT_AUDIT_PATH, + "memory": ".codeclone/memory/engineering_memory.sqlite3", + "analytics": ".codeclone/analytics/corpus_clustering.sqlite3", + } + + +def build_intent_snapshot( + *, + root_path: Path, + representation_kind: str, + config: AnalyticsConfig | None = None, + registry_db_path: Path | None = None, + store: CorpusStore | None = None, +) -> SnapshotBuildResult: + resolved_root = root_path.resolve() + analytics_config = config or resolve_analytics_config(resolved_root) + owned_store = store is None + active_store = store or SqliteCorpusAnalyticsStore.open(analytics_config.db_path) + try: + lane: CorpusLane = "intent" + rep_version = representation_version_for_kind(representation_kind) + source_items = extract_historical_intent_items( + root_path=resolved_root, + representation_kind=representation_kind, + memory_db_path=resolve_memory_db_path(resolved_root), + registry_db_path=registry_db_path, + ) + source_digest = compute_source_digest( + items=source_items, + lane=lane, + representation_kind=representation_kind, + representation_version=rep_version, + source_schema_versions=default_source_schema_versions(), + ) + snapshot_id = f"snap-{uuid.uuid4().hex[:16]}" + created_at = current_report_timestamp_utc() + project_id = compute_project_id(resolved_root) + corpus_items: list[CorpusItemRecord] = [] + for source_item in source_items: + ( + rep_key, + snap_item_id, + source_key, + normalized_text, + normalized_digest, + normalizer_version, + rep_digest, + metadata_json, + overlay_json, + _rep_version, + ) = materialize_corpus_item( + snapshot_id=snapshot_id, + lane=lane, + representation_kind=representation_kind, + item=source_item, + ) + corpus_items.append( + CorpusItemRecord( + snapshot_id=snapshot_id, + representation_key=rep_key, + snapshot_item_id=snap_item_id, + source_record_key=source_key, + project_id=project_id, + intent_id=source_item.intent_id, + normalized_text=normalized_text, + normalized_digest=normalized_digest, + normalizer_version=normalizer_version, + representation_digest=rep_digest, + metadata_json=metadata_json, + registry_overlay_json=overlay_json, + ) + ) + snapshot = CorpusSnapshotRecord( + snapshot_id=snapshot_id, + lane=lane, + representation_kind=representation_kind, + representation_version=rep_version, + source_stores_json=json_text( + _relative_store_paths(resolved_root), sort_keys=True + ), + source_schema_versions_json=json_text( + default_source_schema_versions(), + sort_keys=True, + ), + record_count=len(corpus_items), + source_digest=source_digest, + created_at_utc=created_at, + ) + active_store.insert_snapshot(snapshot, corpus_items) + active_store.commit() + return SnapshotBuildResult( + snapshot_id=snapshot_id, + source_digest=source_digest, + record_count=len(corpus_items), + ) + finally: + if owned_store: + active_store.close() + + +__all__ = ["build_intent_snapshot"] diff --git a/codeclone/analytics/corpus/trajectory_selection.py b/codeclone/analytics/corpus/trajectory_selection.py new file mode 100644 index 00000000..73b8c6b1 --- /dev/null +++ b/codeclone/analytics/corpus/trajectory_selection.py @@ -0,0 +1,83 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass + +from ...audit.events import EVENT_INTENT_CLEARED, EVENT_PATCH_VERIFIED +from ...contracts import TRAJECTORY_PROJECTION_VERSION +from ...memory.trajectory.models import Trajectory, TrajectoryLabel + +TRAJECTORY_SELECTION_RULE_VERSION = "1" + + +@dataclass(frozen=True, slots=True) +class TrajectorySelectionResult: + selected: Trajectory | None + discarded_ids: tuple[str, ...] + + +def _has_verified_finish(trajectory: Trajectory) -> bool: + if "verified_finish" in trajectory.labels: + return True + for step in trajectory.steps: + if step.event_type == EVENT_INTENT_CLEARED: + return True + if step.event_type == EVENT_PATCH_VERIFIED and step.status in { + "accepted", + "accepted_with_external_changes", + }: + return True + return False + + +def _terminal_audit_sequence(trajectory: Trajectory) -> int: + if not trajectory.steps: + return -1 + return max(step.audit_sequence for step in trajectory.steps) + + +def select_trajectory_for_intent( + trajectories: Sequence[Trajectory], +) -> TrajectorySelectionResult: + """Deterministic trajectory selection per spec §4.4.""" + candidates = [ + trajectory + for trajectory in trajectories + if trajectory.projection_version == TRAJECTORY_PROJECTION_VERSION + ] + if not candidates: + return TrajectorySelectionResult(selected=None, discarded_ids=()) + + finish_candidates = [item for item in candidates if _has_verified_finish(item)] + pool = finish_candidates if finish_candidates else list(candidates) + pool.sort( + key=lambda item: ( + -_terminal_audit_sequence(item), + item.id, + ) + ) + selected = pool[0] + discarded = tuple( + sorted( + trajectory.id for trajectory in candidates if trajectory.id != selected.id + ) + ) + return TrajectorySelectionResult(selected=selected, discarded_ids=discarded) + + +def scope_expanded_from_labels(labels: Sequence[TrajectoryLabel | str]) -> bool: + return "scope_expanded" in labels + + +__all__ = [ + "TRAJECTORY_SELECTION_RULE_VERSION", + "TrajectorySelectionResult", + "scope_expanded_from_labels", + "select_trajectory_for_intent", +] diff --git a/codeclone/analytics/embedding/__init__.py b/codeclone/analytics/embedding/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/embedding/generation.py b/codeclone/analytics/embedding/generation.py new file mode 100644 index 00000000..5b04c81d --- /dev/null +++ b/codeclone/analytics/embedding/generation.py @@ -0,0 +1,164 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import importlib +import uuid +from collections.abc import Sequence +from dataclasses import dataclass + +from ...config.analytics import AnalyticsConfig +from ...contracts import CORPUS_EMBEDDING_CONTRACT_VERSION +from ...memory.embedding import EmbeddingProvider, embed_documents +from ...memory.embedding.fastembed_provider import FastEmbedEmbeddingProvider +from ...report.meta import current_report_timestamp_utc +from ..contracts import CorpusItemRecord, EmbeddingGenerationRecord, EmbeddingItemRecord +from ..exceptions import AnalyticsCapabilityError +from ..store.protocols import CorpusStore +from ..store.vectors_lancedb import AnalyticsVectorStore, vector_digest, vector_row_key + + +@dataclass(frozen=True, slots=True) +class EmbeddingBatchResult: + embedding_generation_id: str + item_count: int + + +def _resolve_fastembed_provider(config: AnalyticsConfig) -> FastEmbedEmbeddingProvider: + try: + importlib.import_module("fastembed") + except ImportError as exc: + raise AnalyticsCapabilityError( + "fastembed is required for analytics embeddings; " + "install with: uv sync --extra analytics" + ) from exc + return FastEmbedEmbeddingProvider( + model_name=config.embedding_model, + dimension=config.embedding_dimension, + cache_dir=config.embedding_cache_dir, + allow_model_download=config.allow_model_download, + ) + + +def _provider_package_version(provider_id: str) -> str: + if provider_id == "fastembed": + module = importlib.import_module("fastembed") + return str(getattr(module, "__version__", "unknown")) + return "unknown" + + +def generate_embeddings_for_snapshot( + *, + store: CorpusStore, + vector_store: AnalyticsVectorStore, + config: AnalyticsConfig, + snapshot_id: str, + provider: EmbeddingProvider | None = None, +) -> EmbeddingBatchResult: + items = store.list_items(snapshot_id) + if not items: + msg = f"snapshot has no items: {snapshot_id}" + raise ValueError(msg) + active_provider = provider or _resolve_fastembed_provider(config) + texts = [item.normalized_text for item in items] + vectors = embed_documents(active_provider, texts) + generation_id = f"emb-{uuid.uuid4().hex[:16]}" + provider_id = active_provider.model_id.split(":", 1)[0] + if provider_id not in {"fastembed", "diagnostic-hash-v1"}: + provider_id = ( + "fastembed" if "fastembed" in active_provider.model_id else "custom" + ) + if active_provider.model_id.startswith("fastembed:"): + provider_id = "fastembed" + model_id = ( + active_provider.model_id.split(":", 1)[1] + if ":" in active_provider.model_id + else active_provider.model_id + ) + generation = EmbeddingGenerationRecord( + embedding_generation_id=generation_id, + provider_id=provider_id, + provider_package_version=_provider_package_version(provider_id), + model_id=model_id, + model_revision=None, + model_artifact_fingerprint=None, + exact_model_artifact_reproducibility=False, + dimensions=active_provider.dimension, + embedding_contract_version=CORPUS_EMBEDDING_CONTRACT_VERSION, + embedding_similarity_metric="cosine", + vector_preprocessing="l2_normalize", + created_at_utc=current_report_timestamp_utc(), + ) + store.insert_embedding_generation(generation) + embedding_items: list[EmbeddingItemRecord] = [] + vector_rows: list[dict[str, object]] = [] + for item, vector in zip(items, vectors, strict=True): + row_key = vector_row_key( + embedding_generation_id=generation_id, + snapshot_item_id=item.snapshot_item_id, + ) + digest = vector_digest(vector) + embedding_items.append( + EmbeddingItemRecord( + embedding_generation_id=generation_id, + snapshot_item_id=item.snapshot_item_id, + vector_row_key=row_key, + vector_digest=digest, + dimensions=len(vector), + ) + ) + vector_rows.append( + { + "snapshot_item_id": item.snapshot_item_id, + "vector": vector, + } + ) + store.insert_embedding_items(embedding_items) + stored_items = store.list_embedding_items(embedding_generation_id=generation_id) + if len(stored_items) != len(embedding_items): + msg = ( + "embedding item count mismatch after persist: " + f"expected {len(embedding_items)}, stored {len(stored_items)}" + ) + raise ValueError(msg) + vector_store.write_vectors( + embedding_generation_id=generation_id, + rows=vector_rows, + ) + store.commit() + return EmbeddingBatchResult( + embedding_generation_id=generation_id, + item_count=len(items), + ) + + +def load_snapshot_vectors( + *, + vector_store: AnalyticsVectorStore, + embedding_generation_id: str, + items: Sequence[CorpusItemRecord], +) -> list[list[float]]: + item_ids = [item.snapshot_item_id for item in items] + loaded = vector_store.read_vectors( + embedding_generation_id=embedding_generation_id, + snapshot_item_ids=item_ids, + ) + vectors: list[list[float]] = [] + for item_id in item_ids: + vector = loaded.get(item_id) + if vector is None: + msg = f"missing vector for snapshot item: {item_id}" + raise ValueError(msg) + vectors.append(vector) + return vectors + + +__all__ = [ + "EmbeddingBatchResult", + "generate_embeddings_for_snapshot", + "load_snapshot_vectors", +] diff --git a/codeclone/analytics/exceptions.py b/codeclone/analytics/exceptions.py new file mode 100644 index 00000000..d13db131 --- /dev/null +++ b/codeclone/analytics/exceptions.py @@ -0,0 +1,31 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + + +class AnalyticsError(Exception): + """Base error for corpus analytics.""" + + +class AnalyticsCapabilityError(AnalyticsError): + """Required optional dependency is not installed.""" + + +class AnalyticsStoreError(AnalyticsError): + """Analytics SQLite store error.""" + + +class AnalyticsWorkflowError(AnalyticsError): + """Orchestration or input validation error.""" + + +__all__ = [ + "AnalyticsCapabilityError", + "AnalyticsError", + "AnalyticsStoreError", + "AnalyticsWorkflowError", +] diff --git a/codeclone/analytics/export/__init__.py b/codeclone/analytics/export/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/export/json_export.py b/codeclone/analytics/export/json_export.py new file mode 100644 index 00000000..a1da7378 --- /dev/null +++ b/codeclone/analytics/export/json_export.py @@ -0,0 +1,163 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json + +from ...contracts import CORPUS_EXPORT_SCHEMA_VERSION +from ...utils.json_io import json_text +from ..contracts import ClusteringRunRecord, CorpusItemRecord, CorpusSnapshotRecord +from ..store.sqlite import SqliteCorpusAnalyticsStore + + +def export_clustering_json( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + clustering_run_id: str, +) -> str: + snapshot = store.get_snapshot(snapshot_id) + if snapshot is None: + msg = f"unknown snapshot: {snapshot_id}" + raise ValueError(msg) + run = store.get_clustering_run(clustering_run_id) + if run is None: + msg = f"unknown clustering run: {clustering_run_id}" + raise ValueError(msg) + items = store.list_items(snapshot_id) + assignments = store.list_assignments(clustering_run_id) + summaries = store.list_summaries(clustering_run_id) + generation = store.get_embedding_generation(run.embedding_generation_id) + payload: dict[str, object] = { + "schema_version": CORPUS_EXPORT_SCHEMA_VERSION, + "snapshot": _snapshot_dict(snapshot), + "embedding_generation": _generation_dict(generation) if generation else None, + "clustering_run": _run_dict(run), + "clusters": [_summary_dict(summary) for summary in summaries], + "assignments": [_assignment_dict(item) for item in assignments], + "items": [_item_dict(item) for item in items], + "exact_model_artifact_reproducibility": ( + generation.exact_model_artifact_reproducibility if generation else False + ), + } + return json_text(payload, sort_keys=True, indent=True, trailing_newline=True) + + +def export_sweep_comparison_json( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + embedding_generation_id: str, +) -> str: + runs = store.list_clustering_runs( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + ) + payload = { + "schema_version": CORPUS_EXPORT_SCHEMA_VERSION, + "snapshot_id": snapshot_id, + "embedding_generation_id": embedding_generation_id, + "candidates": [_run_dict(run) for run in runs], + } + return json_text(payload, sort_keys=True, indent=True, trailing_newline=True) + + +def _snapshot_dict(snapshot: CorpusSnapshotRecord) -> dict[str, object]: + return { + "snapshot_id": snapshot.snapshot_id, + "lane": snapshot.lane, + "representation_kind": snapshot.representation_kind, + "representation_version": snapshot.representation_version, + "source_stores": json.loads(snapshot.source_stores_json), + "source_schema_versions": json.loads(snapshot.source_schema_versions_json), + "record_count": snapshot.record_count, + "source_digest": snapshot.source_digest, + "created_at_utc": snapshot.created_at_utc, + } + + +def _run_dict(run: ClusteringRunRecord) -> dict[str, object]: + return { + "clustering_run_id": run.clustering_run_id, + "snapshot_id": run.snapshot_id, + "embedding_generation_id": run.embedding_generation_id, + "requested_parameters": json.loads(run.requested_parameters_json), + "effective_parameters": json.loads(run.effective_parameters_json), + "random_seed": run.random_seed, + "run_digest": run.run_digest, + "recommended_by_heuristic": run.recommended_by_heuristic, + "selected_by_maintainer": run.selected_by_maintainer, + "status": run.status, + "created_at_utc": run.created_at_utc, + "finished_at_utc": run.finished_at_utc, + "error_message": run.error_message, + } + + +def _summary_dict(summary: object) -> dict[str, object]: + from ..contracts import ClusterSummaryRecord + + assert isinstance(summary, ClusterSummaryRecord) + return { + "cluster_label": summary.cluster_label, + "display_cluster_id": summary.display_cluster_id, + "membership_digest": summary.membership_digest, + "size": summary.size, + "diagnostics": json.loads(summary.diagnostics_json), + } + + +def _assignment_dict(assignment: object) -> dict[str, object]: + from ..contracts import ClusterAssignmentRecord + + assert isinstance(assignment, ClusterAssignmentRecord) + return { + "snapshot_item_id": assignment.snapshot_item_id, + "cluster_label": assignment.cluster_label, + "membership_strength": assignment.membership_strength, + "membership_digest": assignment.membership_digest, + } + + +def _item_dict(item: CorpusItemRecord) -> dict[str, object]: + return { + "snapshot_item_id": item.snapshot_item_id, + "intent_id": item.intent_id, + "normalized_digest": item.normalized_digest, + "representation_digest": item.representation_digest, + "metadata": json.loads(item.metadata_json), + "registry_overlay": ( + json.loads(item.registry_overlay_json) + if item.registry_overlay_json is not None + else None + ), + } + + +def _generation_dict(generation: object) -> dict[str, object]: + from ..contracts import EmbeddingGenerationRecord + + assert isinstance(generation, EmbeddingGenerationRecord) + return { + "embedding_generation_id": generation.embedding_generation_id, + "provider_id": generation.provider_id, + "provider_package_version": generation.provider_package_version, + "model_id": generation.model_id, + "model_revision": generation.model_revision, + "model_artifact_fingerprint": generation.model_artifact_fingerprint, + "exact_model_artifact_reproducibility": ( + generation.exact_model_artifact_reproducibility + ), + "dimensions": generation.dimensions, + "embedding_contract_version": generation.embedding_contract_version, + "embedding_similarity_metric": generation.embedding_similarity_metric, + "vector_preprocessing": generation.vector_preprocessing, + "created_at_utc": generation.created_at_utc, + } + + +__all__ = ["export_clustering_json", "export_sweep_comparison_json"] diff --git a/codeclone/analytics/report/__init__.py b/codeclone/analytics/report/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/report/html.py b/codeclone/analytics/report/html.py new file mode 100644 index 00000000..1cfdeb98 --- /dev/null +++ b/codeclone/analytics/report/html.py @@ -0,0 +1,132 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import html +import json +from collections.abc import Sequence + +from ..contracts import ClusteringRunRecord, ClusterSummaryRecord, CorpusSnapshotRecord +from ..store.sqlite import SqliteCorpusAnalyticsStore + + +def render_analytics_html( + *, + store: SqliteCorpusAnalyticsStore, + snapshot: CorpusSnapshotRecord, + run: ClusteringRunRecord, + comparison_only: bool = False, +) -> str: + summaries = store.list_summaries(run.clustering_run_id) + generation = store.get_embedding_generation(run.embedding_generation_id) + reproducibility_note = "" + if generation is not None and not generation.exact_model_artifact_reproducibility: + reproducibility_note = ( + "

    Full vector reproducibility is not guaranteed from model id " + "alone.

    " + ) + if comparison_only: + body = _render_comparison_table(store, snapshot.snapshot_id, run) + title = "Corpus Analytics Sweep Comparison" + else: + body = _render_detail_view(summaries) + title = "Corpus Analytics Cluster Report" + return f""" + + + +{html.escape(title)} + + + +

    {html.escape(title)}

    +

    Snapshot: {html.escape(snapshot.snapshot_id)}

    +

    Run: {html.escape(run.clustering_run_id)}

    +

    Recommended by heuristic: {run.recommended_by_heuristic}

    +

    Selected by maintainer: {run.selected_by_maintainer}

    +{reproducibility_note} +{body} + + +""" + + +def _render_comparison_table( + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + current_run: ClusteringRunRecord, +) -> str: + runs = store.list_clustering_runs( + snapshot_id=snapshot_id, + embedding_generation_id=current_run.embedding_generation_id, + ) + rows = [ + "" + f"{html.escape(run.clustering_run_id)}" + f"{html.escape(run.effective_parameters_json)}" + f"{run.recommended_by_heuristic}" + f"{run.selected_by_maintainer}" + "" + for run in runs + ] + return ( + "" + "" + "" + "" + "".join(rows) + "
    RunEffective ParametersRecommendedSelected
    " + ) + + +def _render_detail_view(summaries: Sequence[ClusterSummaryRecord]) -> str: + sections: list[str] = ["

    Clusters

    "] + for summary in summaries: + diagnostics = json.loads(summary.diagnostics_json) + if not isinstance(diagnostics, dict): + diagnostics = {} + display = summary.display_cluster_id + label = "noise" if display is None else str(display) + sections.append(f"

    Cluster {html.escape(label)}

    ") + sections.append(f"

    Size: {summary.size}

    ") + distributions = diagnostics.get("metadata_distributions") + if isinstance(distributions, dict): + sections.append(_render_distributions(distributions)) + return "\n".join(sections) + + +def _render_distributions(distributions: dict[str, object]) -> str: + parts = [ + "" + ] + for field, values in sorted(distributions.items()): + if not isinstance(values, dict): + continue + for key, cell in sorted(values.items()): + if not isinstance(cell, dict): + continue + numerator = cell.get("numerator") + denominator = cell.get("denominator") + rate = cell.get("rate") + insufficient = bool(cell.get("insufficient_sample")) + rate_text = "n/a" if insufficient else str(rate) + css = ' class="insufficient"' if insufficient else "" + parts.append( + f"" + f"" + f"" + ) + parts.append("
    FieldValueRate
    {html.escape(str(field))}{html.escape(str(key))}{html.escape(rate_text)} " + f"({numerator}/{denominator})
    ") + return "".join(parts) + + +__all__ = ["render_analytics_html"] diff --git a/codeclone/analytics/schema.py b/codeclone/analytics/schema.py new file mode 100644 index 00000000..05e88c6b --- /dev/null +++ b/codeclone/analytics/schema.py @@ -0,0 +1,175 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +from ..contracts import CORPUS_ANALYTICS_STORE_SCHEMA_VERSION +from ..report.meta import current_report_timestamp_utc +from ..utils.sqlite_store import ( + get_meta_value, + initialize_schema_v1, +) +from .exceptions import AnalyticsStoreError + +_ANALYTICS_META_TABLE = "analytics_meta" + +_DDL = ( + """ + CREATE TABLE IF NOT EXISTS corpus_snapshots ( + snapshot_id TEXT PRIMARY KEY, + lane TEXT NOT NULL, + representation_kind TEXT NOT NULL, + representation_version TEXT NOT NULL, + source_stores_json TEXT NOT NULL, + source_schema_versions_json TEXT NOT NULL, + record_count INTEGER NOT NULL, + source_digest TEXT NOT NULL, + created_at_utc TEXT NOT NULL + ) + """, + """ + CREATE TABLE IF NOT EXISTS corpus_items ( + snapshot_id TEXT NOT NULL, + representation_key TEXT NOT NULL, + snapshot_item_id TEXT NOT NULL, + source_record_key TEXT NOT NULL, + project_id TEXT NOT NULL, + intent_id TEXT NOT NULL, + normalized_text TEXT NOT NULL, + normalized_digest TEXT NOT NULL, + normalizer_version TEXT NOT NULL, + representation_digest TEXT NOT NULL, + metadata_json TEXT NOT NULL, + registry_overlay_json TEXT, + PRIMARY KEY (snapshot_id, representation_key) + ) + """, + """ + CREATE TABLE IF NOT EXISTS embedding_generations ( + embedding_generation_id TEXT PRIMARY KEY, + provider_id TEXT NOT NULL, + provider_package_version TEXT NOT NULL, + model_id TEXT NOT NULL, + model_revision TEXT, + model_artifact_fingerprint TEXT, + exact_model_artifact_reproducibility INTEGER NOT NULL, + dimensions INTEGER NOT NULL, + embedding_contract_version TEXT NOT NULL, + embedding_similarity_metric TEXT NOT NULL, + vector_preprocessing TEXT NOT NULL, + created_at_utc TEXT NOT NULL + ) + """, + """ + CREATE TABLE IF NOT EXISTS embedding_items ( + embedding_generation_id TEXT NOT NULL, + snapshot_item_id TEXT NOT NULL, + vector_row_key TEXT NOT NULL, + vector_digest TEXT NOT NULL, + dimensions INTEGER NOT NULL, + PRIMARY KEY (embedding_generation_id, snapshot_item_id) + ) + """, + """ + CREATE TABLE IF NOT EXISTS clustering_runs ( + clustering_run_id TEXT PRIMARY KEY, + snapshot_id TEXT NOT NULL, + embedding_generation_id TEXT NOT NULL, + requested_parameters_json TEXT NOT NULL, + effective_parameters_json TEXT NOT NULL, + random_seed INTEGER NOT NULL, + run_digest TEXT NOT NULL, + recommended_by_heuristic INTEGER NOT NULL DEFAULT 0, + selected_by_maintainer INTEGER NOT NULL DEFAULT 0, + status TEXT NOT NULL, + created_at_utc TEXT NOT NULL, + finished_at_utc TEXT, + error_message TEXT + ) + """, + """ + CREATE TABLE IF NOT EXISTS cluster_assignments ( + clustering_run_id TEXT NOT NULL, + snapshot_item_id TEXT NOT NULL, + cluster_label INTEGER NOT NULL, + membership_strength REAL, + membership_digest TEXT NOT NULL, + PRIMARY KEY (clustering_run_id, snapshot_item_id) + ) + """, + """ + CREATE TABLE IF NOT EXISTS cluster_summaries ( + clustering_run_id TEXT NOT NULL, + cluster_label INTEGER NOT NULL, + display_cluster_id INTEGER, + membership_digest TEXT NOT NULL, + size INTEGER NOT NULL, + diagnostics_json TEXT NOT NULL, + PRIMARY KEY (clustering_run_id, cluster_label) + ) + """, + f""" + CREATE TABLE IF NOT EXISTS {_ANALYTICS_META_TABLE} ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ) + """, +) + +_INDEXES = ( + "CREATE UNIQUE INDEX IF NOT EXISTS idx_corpus_items_snapshot_item " + "ON corpus_items(snapshot_id, snapshot_item_id)", + "CREATE INDEX IF NOT EXISTS idx_corpus_items_intent " + "ON corpus_items(project_id, intent_id)", + "CREATE INDEX IF NOT EXISTS idx_clustering_runs_snapshot " + "ON clustering_runs(snapshot_id, embedding_generation_id)", + "CREATE INDEX IF NOT EXISTS idx_cluster_assignments_run " + "ON cluster_assignments(clustering_run_id, cluster_label)", +) + + +def ensure_analytics_schema(conn: sqlite3.Connection) -> None: + current = get_meta_value( + conn, meta_table=_ANALYTICS_META_TABLE, key="schema_version" + ) + if current is not None and current != CORPUS_ANALYTICS_STORE_SCHEMA_VERSION: + raise AnalyticsStoreError(f"unsupported analytics schema version: {current}") + if current is None: + initialize_schema_v1( + conn, + ddl_statements=_DDL, + index_statements=_INDEXES, + meta_table=_ANALYTICS_META_TABLE, + seed_meta={ + "schema_version": CORPUS_ANALYTICS_STORE_SCHEMA_VERSION, + "created_at_utc": current_report_timestamp_utc(), + }, + ) + + +def open_analytics_db(path: Path) -> sqlite3.Connection: + from ..observability.sqlite_access import open_instrumented_sqlite_db + + return open_instrumented_sqlite_db(path, ensure_schema=ensure_analytics_schema) + + +def open_analytics_db_readonly(path: Path) -> sqlite3.Connection: + from ..observability.sqlite_access import open_instrumented_sqlite_db_readonly + + return open_instrumented_sqlite_db_readonly( + path, + validate_schema=ensure_analytics_schema, + ) + + +__all__ = [ + "ensure_analytics_schema", + "open_analytics_db", + "open_analytics_db_readonly", +] diff --git a/codeclone/analytics/store/__init__.py b/codeclone/analytics/store/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/store/protocols.py b/codeclone/analytics/store/protocols.py new file mode 100644 index 00000000..c31c0186 --- /dev/null +++ b/codeclone/analytics/store/protocols.py @@ -0,0 +1,149 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Protocol + +from ..contracts import ( + ClusterAssignmentRecord, + ClusteringRunRecord, + ClusterSummaryRecord, + CorpusItemRecord, + CorpusSnapshotRecord, + EmbeddingGenerationRecord, + EmbeddingItemRecord, +) + + +class CorpusStore(Protocol): + def insert_snapshot( + self, + snapshot: CorpusSnapshotRecord, + items: Sequence[CorpusItemRecord], + ) -> None: ... + + def get_snapshot(self, snapshot_id: str) -> CorpusSnapshotRecord | None: ... + + def list_snapshots(self) -> tuple[CorpusSnapshotRecord, ...]: ... + + def list_items(self, snapshot_id: str) -> tuple[CorpusItemRecord, ...]: ... + + def insert_embedding_generation( + self, + generation: EmbeddingGenerationRecord, + ) -> None: ... + + def insert_embedding_items( + self, + items: Sequence[EmbeddingItemRecord], + ) -> None: ... + + def get_embedding_generation( + self, + embedding_generation_id: str, + ) -> EmbeddingGenerationRecord | None: ... + + def list_embedding_items( + self, + *, + embedding_generation_id: str, + ) -> tuple[EmbeddingItemRecord, ...]: ... + + def insert_clustering_run(self, run: ClusteringRunRecord) -> None: ... + + def update_clustering_run(self, run: ClusteringRunRecord) -> None: ... + + def get_clustering_run( + self, + clustering_run_id: str, + ) -> ClusteringRunRecord | None: ... + + def list_clustering_runs( + self, + *, + snapshot_id: str, + embedding_generation_id: str | None = None, + ) -> tuple[ClusteringRunRecord, ...]: ... + + def set_recommended_run( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + clustering_run_id: str, + ) -> None: ... + + def set_selected_run( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + clustering_run_id: str, + ) -> None: ... + + def insert_cluster_assignments( + self, + assignments: Sequence[ClusterAssignmentRecord], + ) -> None: ... + + def insert_cluster_summaries( + self, + summaries: Sequence[ClusterSummaryRecord], + ) -> None: ... + + def list_assignments( + self, + clustering_run_id: str, + ) -> tuple[ClusterAssignmentRecord, ...]: ... + + def list_summaries( + self, + clustering_run_id: str, + ) -> tuple[ClusterSummaryRecord, ...]: ... + + def commit(self) -> None: ... + + def close(self) -> None: ... + + +class VectorGenerationStore(Protocol): + def write_vectors( + self, + *, + embedding_generation_id: str, + rows: Sequence[Mapping[str, object]], + ) -> None: ... + + def read_vectors( + self, + *, + embedding_generation_id: str, + snapshot_item_ids: Sequence[str], + ) -> dict[str, list[float]]: ... + + def close(self) -> None: ... + + +class CorpusSnapshotReader(Protocol): + def read_items(self, snapshot_id: str) -> tuple[CorpusItemRecord, ...]: ... + + +@dataclass(frozen=True, slots=True) +class SnapshotBuildResult: + snapshot_id: str + source_digest: str + record_count: int + + +__all__ = [ + "CorpusSnapshotReader", + "CorpusStore", + "SnapshotBuildResult", + "VectorGenerationStore", +] diff --git a/codeclone/analytics/store/sqlite.py b/codeclone/analytics/store/sqlite.py new file mode 100644 index 00000000..46e2baea --- /dev/null +++ b/codeclone/analytics/store/sqlite.py @@ -0,0 +1,508 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import sqlite3 +from collections.abc import Sequence +from dataclasses import replace +from pathlib import Path + +from ..contracts import ( + ClusterAssignmentRecord, + ClusteringRunRecord, + ClusterSummaryRecord, + CorpusItemRecord, + CorpusSnapshotRecord, + EmbeddingGenerationRecord, + EmbeddingItemRecord, +) +from ..exceptions import AnalyticsStoreError +from ..schema import open_analytics_db, open_analytics_db_readonly + + +class SqliteCorpusAnalyticsStore: + """SQLite implementation of CorpusStore.""" + + def __init__(self, conn: sqlite3.Connection) -> None: + self._conn = conn + self._conn.row_factory = sqlite3.Row + + @classmethod + def open(cls, path: Path) -> SqliteCorpusAnalyticsStore: + return cls(open_analytics_db(path)) + + @classmethod + def open_readonly(cls, path: Path) -> SqliteCorpusAnalyticsStore: + return cls(open_analytics_db_readonly(path)) + + def insert_snapshot( + self, + snapshot: CorpusSnapshotRecord, + items: Sequence[CorpusItemRecord], + ) -> None: + self._conn.execute( + """ + INSERT INTO corpus_snapshots ( + snapshot_id, lane, representation_kind, representation_version, + source_stores_json, source_schema_versions_json, + record_count, source_digest, created_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + snapshot.snapshot_id, + snapshot.lane, + snapshot.representation_kind, + snapshot.representation_version, + snapshot.source_stores_json, + snapshot.source_schema_versions_json, + snapshot.record_count, + snapshot.source_digest, + snapshot.created_at_utc, + ), + ) + self._conn.executemany( + """ + INSERT INTO corpus_items ( + snapshot_id, representation_key, snapshot_item_id, + source_record_key, project_id, intent_id, + normalized_text, normalized_digest, normalizer_version, + representation_digest, metadata_json, registry_overlay_json + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + [ + ( + item.snapshot_id, + item.representation_key, + item.snapshot_item_id, + item.source_record_key, + item.project_id, + item.intent_id, + item.normalized_text, + item.normalized_digest, + item.normalizer_version, + item.representation_digest, + item.metadata_json, + item.registry_overlay_json, + ) + for item in items + ], + ) + + def get_snapshot(self, snapshot_id: str) -> CorpusSnapshotRecord | None: + row = self._conn.execute( + "SELECT * FROM corpus_snapshots WHERE snapshot_id=?", + (snapshot_id,), + ).fetchone() + return _snapshot_from_row(row) if row is not None else None + + def list_snapshots(self) -> tuple[CorpusSnapshotRecord, ...]: + rows = self._conn.execute( + "SELECT * FROM corpus_snapshots " + "ORDER BY created_at_utc DESC, snapshot_id ASC" + ).fetchall() + return tuple(_snapshot_from_row(row) for row in rows) + + def list_items(self, snapshot_id: str) -> tuple[CorpusItemRecord, ...]: + rows = self._conn.execute( + "SELECT * FROM corpus_items WHERE snapshot_id=? " + "ORDER BY source_record_key ASC, representation_key ASC", + (snapshot_id,), + ).fetchall() + return tuple(_item_from_row(row) for row in rows) + + def insert_embedding_generation( + self, + generation: EmbeddingGenerationRecord, + ) -> None: + self._conn.execute( + """ + INSERT INTO embedding_generations ( + embedding_generation_id, provider_id, provider_package_version, + model_id, model_revision, model_artifact_fingerprint, + exact_model_artifact_reproducibility, dimensions, + embedding_contract_version, embedding_similarity_metric, + vector_preprocessing, created_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + generation.embedding_generation_id, + generation.provider_id, + generation.provider_package_version, + generation.model_id, + generation.model_revision, + generation.model_artifact_fingerprint, + int(generation.exact_model_artifact_reproducibility), + generation.dimensions, + generation.embedding_contract_version, + generation.embedding_similarity_metric, + generation.vector_preprocessing, + generation.created_at_utc, + ), + ) + + def insert_embedding_items( + self, + items: Sequence[EmbeddingItemRecord], + ) -> None: + self._conn.executemany( + """ + INSERT INTO embedding_items ( + embedding_generation_id, snapshot_item_id, + vector_row_key, vector_digest, dimensions + ) VALUES (?, ?, ?, ?, ?) + """, + [ + ( + item.embedding_generation_id, + item.snapshot_item_id, + item.vector_row_key, + item.vector_digest, + item.dimensions, + ) + for item in items + ], + ) + + def get_embedding_generation( + self, + embedding_generation_id: str, + ) -> EmbeddingGenerationRecord | None: + row = self._conn.execute( + "SELECT * FROM embedding_generations WHERE embedding_generation_id=?", + (embedding_generation_id,), + ).fetchone() + return _generation_from_row(row) if row is not None else None + + def list_embedding_items( + self, + *, + embedding_generation_id: str, + ) -> tuple[EmbeddingItemRecord, ...]: + rows = self._conn.execute( + "SELECT * FROM embedding_items WHERE embedding_generation_id=? " + "ORDER BY snapshot_item_id ASC", + (embedding_generation_id,), + ).fetchall() + return tuple(_embedding_item_from_row(row) for row in rows) + + def insert_clustering_run(self, run: ClusteringRunRecord) -> None: + self._conn.execute( + """ + INSERT INTO clustering_runs ( + clustering_run_id, snapshot_id, embedding_generation_id, + requested_parameters_json, effective_parameters_json, + random_seed, run_digest, recommended_by_heuristic, + selected_by_maintainer, status, created_at_utc, + finished_at_utc, error_message + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + run.clustering_run_id, + run.snapshot_id, + run.embedding_generation_id, + run.requested_parameters_json, + run.effective_parameters_json, + run.random_seed, + run.run_digest, + int(run.recommended_by_heuristic), + int(run.selected_by_maintainer), + run.status, + run.created_at_utc, + run.finished_at_utc, + run.error_message, + ), + ) + + def update_clustering_run(self, run: ClusteringRunRecord) -> None: + self._conn.execute( + """ + UPDATE clustering_runs SET + requested_parameters_json=?, + effective_parameters_json=?, + random_seed=?, + run_digest=?, + recommended_by_heuristic=?, + selected_by_maintainer=?, + status=?, + finished_at_utc=?, + error_message=? + WHERE clustering_run_id=? + """, + ( + run.requested_parameters_json, + run.effective_parameters_json, + run.random_seed, + run.run_digest, + int(run.recommended_by_heuristic), + int(run.selected_by_maintainer), + run.status, + run.finished_at_utc, + run.error_message, + run.clustering_run_id, + ), + ) + + def get_clustering_run( + self, + clustering_run_id: str, + ) -> ClusteringRunRecord | None: + row = self._conn.execute( + "SELECT * FROM clustering_runs WHERE clustering_run_id=?", + (clustering_run_id,), + ).fetchone() + return _run_from_row(row) if row is not None else None + + def list_clustering_runs( + self, + *, + snapshot_id: str, + embedding_generation_id: str | None = None, + ) -> tuple[ClusteringRunRecord, ...]: + if embedding_generation_id is None: + rows = self._conn.execute( + "SELECT * FROM clustering_runs WHERE snapshot_id=? " + "ORDER BY created_at_utc ASC, clustering_run_id ASC", + (snapshot_id,), + ).fetchall() + else: + rows = self._conn.execute( + "SELECT * FROM clustering_runs WHERE snapshot_id=? " + "AND embedding_generation_id=? " + "ORDER BY created_at_utc ASC, clustering_run_id ASC", + (snapshot_id, embedding_generation_id), + ).fetchall() + return tuple(_run_from_row(row) for row in rows) + + def set_recommended_run( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + clustering_run_id: str, + ) -> None: + for run in self.list_clustering_runs( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + ): + self.update_clustering_run( + replace( + run, + recommended_by_heuristic=( + run.clustering_run_id == clustering_run_id + ), + ) + ) + + def set_selected_run( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + clustering_run_id: str, + ) -> None: + for run in self.list_clustering_runs( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + ): + self.update_clustering_run( + replace( + run, + selected_by_maintainer=(run.clustering_run_id == clustering_run_id), + ) + ) + + def insert_cluster_assignments( + self, + assignments: Sequence[ClusterAssignmentRecord], + ) -> None: + self._conn.executemany( + """ + INSERT INTO cluster_assignments ( + clustering_run_id, snapshot_item_id, cluster_label, + membership_strength, membership_digest + ) VALUES (?, ?, ?, ?, ?) + """, + [ + ( + item.clustering_run_id, + item.snapshot_item_id, + item.cluster_label, + item.membership_strength, + item.membership_digest, + ) + for item in assignments + ], + ) + + def insert_cluster_summaries( + self, + summaries: Sequence[ClusterSummaryRecord], + ) -> None: + self._conn.executemany( + """ + INSERT INTO cluster_summaries ( + clustering_run_id, cluster_label, display_cluster_id, + membership_digest, size, diagnostics_json + ) VALUES (?, ?, ?, ?, ?, ?) + """, + [ + ( + item.clustering_run_id, + item.cluster_label, + item.display_cluster_id, + item.membership_digest, + item.size, + item.diagnostics_json, + ) + for item in summaries + ], + ) + + def list_assignments( + self, + clustering_run_id: str, + ) -> tuple[ClusterAssignmentRecord, ...]: + rows = self._conn.execute( + "SELECT * FROM cluster_assignments WHERE clustering_run_id=? " + "ORDER BY snapshot_item_id ASC", + (clustering_run_id,), + ).fetchall() + return tuple(_assignment_from_row(row) for row in rows) + + def list_summaries( + self, + clustering_run_id: str, + ) -> tuple[ClusterSummaryRecord, ...]: + rows = self._conn.execute( + "SELECT * FROM cluster_summaries WHERE clustering_run_id=? " + "ORDER BY display_cluster_id ASC NULLS LAST, cluster_label ASC", + (clustering_run_id,), + ).fetchall() + return tuple(_summary_from_row(row) for row in rows) + + def commit(self) -> None: + self._conn.commit() + + def close(self) -> None: + self._conn.close() + + +def _snapshot_from_row(row: sqlite3.Row) -> CorpusSnapshotRecord: + return CorpusSnapshotRecord( + snapshot_id=str(row["snapshot_id"]), + lane=str(row["lane"]), # type: ignore[arg-type] + representation_kind=str(row["representation_kind"]), + representation_version=str(row["representation_version"]), + source_stores_json=str(row["source_stores_json"]), + source_schema_versions_json=str(row["source_schema_versions_json"]), + record_count=int(row["record_count"]), + source_digest=str(row["source_digest"]), + created_at_utc=str(row["created_at_utc"]), + ) + + +def _item_from_row(row: sqlite3.Row) -> CorpusItemRecord: + overlay = row["registry_overlay_json"] + return CorpusItemRecord( + snapshot_id=str(row["snapshot_id"]), + representation_key=str(row["representation_key"]), + snapshot_item_id=str(row["snapshot_item_id"]), + source_record_key=str(row["source_record_key"]), + project_id=str(row["project_id"]), + intent_id=str(row["intent_id"]), + normalized_text=str(row["normalized_text"]), + normalized_digest=str(row["normalized_digest"]), + normalizer_version=str(row["normalizer_version"]), + representation_digest=str(row["representation_digest"]), + metadata_json=str(row["metadata_json"]), + registry_overlay_json=str(overlay) if overlay is not None else None, + ) + + +def _generation_from_row(row: sqlite3.Row) -> EmbeddingGenerationRecord: + return EmbeddingGenerationRecord( + embedding_generation_id=str(row["embedding_generation_id"]), + provider_id=str(row["provider_id"]), + provider_package_version=str(row["provider_package_version"]), + model_id=str(row["model_id"]), + model_revision=_optional_str(row["model_revision"]), + model_artifact_fingerprint=_optional_str(row["model_artifact_fingerprint"]), + exact_model_artifact_reproducibility=bool( + int(row["exact_model_artifact_reproducibility"]) + ), + dimensions=int(row["dimensions"]), + embedding_contract_version=str(row["embedding_contract_version"]), + embedding_similarity_metric=str(row["embedding_similarity_metric"]), + vector_preprocessing=str(row["vector_preprocessing"]), + created_at_utc=str(row["created_at_utc"]), + ) + + +def _embedding_item_from_row(row: sqlite3.Row) -> EmbeddingItemRecord: + return EmbeddingItemRecord( + embedding_generation_id=str(row["embedding_generation_id"]), + snapshot_item_id=str(row["snapshot_item_id"]), + vector_row_key=str(row["vector_row_key"]), + vector_digest=str(row["vector_digest"]), + dimensions=int(row["dimensions"]), + ) + + +def _run_from_row(row: sqlite3.Row) -> ClusteringRunRecord: + return ClusteringRunRecord( + clustering_run_id=str(row["clustering_run_id"]), + snapshot_id=str(row["snapshot_id"]), + embedding_generation_id=str(row["embedding_generation_id"]), + requested_parameters_json=str(row["requested_parameters_json"]), + effective_parameters_json=str(row["effective_parameters_json"]), + random_seed=int(row["random_seed"]), + run_digest=str(row["run_digest"]), + recommended_by_heuristic=bool(int(row["recommended_by_heuristic"])), + selected_by_maintainer=bool(int(row["selected_by_maintainer"])), + status=str(row["status"]), # type: ignore[arg-type] + created_at_utc=str(row["created_at_utc"]), + finished_at_utc=_optional_str(row["finished_at_utc"]), + error_message=_optional_str(row["error_message"]), + ) + + +def _assignment_from_row(row: sqlite3.Row) -> ClusterAssignmentRecord: + strength = row["membership_strength"] + return ClusterAssignmentRecord( + clustering_run_id=str(row["clustering_run_id"]), + snapshot_item_id=str(row["snapshot_item_id"]), + cluster_label=int(row["cluster_label"]), + membership_strength=float(strength) if strength is not None else None, + membership_digest=str(row["membership_digest"]), + ) + + +def _summary_from_row(row: sqlite3.Row) -> ClusterSummaryRecord: + display = row["display_cluster_id"] + return ClusterSummaryRecord( + clustering_run_id=str(row["clustering_run_id"]), + cluster_label=int(row["cluster_label"]), + display_cluster_id=int(display) if display is not None else None, + membership_digest=str(row["membership_digest"]), + size=int(row["size"]), + diagnostics_json=str(row["diagnostics_json"]), + ) + + +def _optional_str(value: object) -> str | None: + return str(value) if isinstance(value, str) else None + + +def parse_json_object(text: str) -> dict[str, object]: + parsed = json.loads(text) + if not isinstance(parsed, dict): + raise AnalyticsStoreError("expected JSON object") + return parsed + + +__all__ = ["SqliteCorpusAnalyticsStore", "parse_json_object"] diff --git a/codeclone/analytics/store/vectors_lancedb.py b/codeclone/analytics/store/vectors_lancedb.py new file mode 100644 index 00000000..d8c268a1 --- /dev/null +++ b/codeclone/analytics/store/vectors_lancedb.py @@ -0,0 +1,179 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import importlib +from collections.abc import Mapping, Sequence +from pathlib import Path +from types import ModuleType +from typing import Protocol, cast + +from ..corpus.keys import sha256_hex +from ..exceptions import AnalyticsCapabilityError + +_TABLE_NAME = "corpus_vectors" +_ID_QUERY_BATCH = 500 + + +class _LanceSearchQuery(Protocol): + def select(self, columns: list[str]) -> _LanceSearchQuery: ... + + def where(self, predicate: str) -> _LanceSearchQuery: ... + + def limit(self, k: int) -> _LanceSearchQuery: ... + + def to_list(self) -> list[dict[str, object]]: ... + + +class _LanceMergeInsert(Protocol): + def when_matched_update_all(self) -> _LanceMergeInsert: ... + + def when_not_matched_insert_all(self) -> _LanceMergeInsert: ... + + def execute(self, records: list[dict[str, object]]) -> None: ... + + +class _LanceTable(Protocol): + def search(self, vector: list[float] | None = None) -> _LanceSearchQuery: ... + + def merge_insert(self, key: str) -> _LanceMergeInsert: ... + + +class _LanceConnection(Protocol): + def open_table(self, name: str) -> _LanceTable: ... + + def create_table( + self, name: str, schema: object, *, exist_ok: bool = False + ) -> _LanceTable: ... + + +def _load_lancedb() -> ModuleType: + try: + return importlib.import_module("lancedb") + except ImportError as exc: + raise AnalyticsCapabilityError( + "lancedb is required for analytics embeddings; " + "install with: uv sync --extra analytics" + ) from exc + + +def _schema(pa: ModuleType, dimension: int) -> object: + return pa.schema( + [ + pa.field("vector_row_key", pa.string()), + pa.field("embedding_generation_id", pa.string()), + pa.field("snapshot_item_id", pa.string()), + pa.field("vector_digest", pa.string()), + pa.field("vector", pa.list_(pa.float32(), dimension)), + ] + ) + + +def vector_row_key(*, embedding_generation_id: str, snapshot_item_id: str) -> str: + return sha256_hex(f"{embedding_generation_id}\n{snapshot_item_id}") + + +def vector_digest(vector: Sequence[float]) -> str: + payload = ",".join(f"{value:.8f}" for value in vector) + return hashlib.sha256(payload.encode("utf-8")).hexdigest() + + +class AnalyticsVectorStore: + """Separate LanceDB sidecar for analytics corpus vectors.""" + + def __init__(self, *, path: Path, dimension: int) -> None: + lancedb = _load_lancedb() + pyarrow = importlib.import_module("pyarrow") + self._dimension = dimension + path.mkdir(parents=True, exist_ok=True) + self._conn = cast(_LanceConnection, lancedb.connect(str(path))) + self._table = self._open_or_create_table(pyarrow) + + def _open_or_create_table(self, pyarrow: ModuleType) -> _LanceTable: + try: + return self._conn.open_table(_TABLE_NAME) + except ValueError as exc: + if f"Table '{_TABLE_NAME}' was not found" not in str(exc): + raise + return self._conn.create_table( + _TABLE_NAME, + schema=_schema(pyarrow, self._dimension), + exist_ok=True, + ) + + def write_vectors( + self, + *, + embedding_generation_id: str, + rows: Sequence[Mapping[str, object]], + ) -> None: + records: list[dict[str, object]] = [] + for row in rows: + snapshot_item_id = str(row["snapshot_item_id"]) + vector = row["vector"] + if not isinstance(vector, list): + msg = "vector must be a list of floats" + raise TypeError(msg) + float_vector = [float(value) for value in vector] + row_key = vector_row_key( + embedding_generation_id=embedding_generation_id, + snapshot_item_id=snapshot_item_id, + ) + records.append( + { + "vector_row_key": row_key, + "embedding_generation_id": embedding_generation_id, + "snapshot_item_id": snapshot_item_id, + "vector_digest": vector_digest(float_vector), + "vector": float_vector, + } + ) + if not records: + return + ( + self._table.merge_insert("vector_row_key") + .when_matched_update_all() + .when_not_matched_insert_all() + .execute(records) + ) + + def read_vectors( + self, + *, + embedding_generation_id: str, + snapshot_item_ids: Sequence[str], + ) -> dict[str, list[float]]: + if not snapshot_item_ids: + return {} + loaded: dict[str, list[float]] = {} + ordered = sorted(set(snapshot_item_ids)) + for start in range(0, len(ordered), _ID_QUERY_BATCH): + batch = ordered[start : start + _ID_QUERY_BATCH] + quoted = ", ".join(f"'{item}'" for item in batch) + rows = ( + self._table.search(None) + .select(["snapshot_item_id", "vector"]) + .where( + f"embedding_generation_id = '{embedding_generation_id}' " + f"AND snapshot_item_id IN ({quoted})" + ) + .limit(len(batch)) + .to_list() + ) + for row in rows: + item_id = row.get("snapshot_item_id") + vector = row.get("vector") + if isinstance(item_id, str) and isinstance(vector, list): + loaded[item_id] = [float(value) for value in vector] + return loaded + + def close(self) -> None: + return None + + +__all__ = ["AnalyticsVectorStore", "vector_digest", "vector_row_key"] diff --git a/codeclone/analytics/workflow.py b/codeclone/analytics/workflow.py new file mode 100644 index 00000000..574dd42a --- /dev/null +++ b/codeclone/analytics/workflow.py @@ -0,0 +1,436 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import uuid +from collections.abc import Sequence +from dataclasses import dataclass +from pathlib import Path + +from ..config.analytics import AnalyticsConfig, resolve_analytics_config +from ..observability import operation +from ..report.meta import current_report_timestamp_utc +from ..utils.json_io import json_text +from .clustering.canonicalize import ( + canonicalize_partitions, + display_cluster_id_map, + partition_membership_map, +) +from .clustering.diagnostics import ( + build_cluster_diagnostics, + compute_centroids, + nearest_cluster_ids, +) +from .clustering.models import NOISE_LABEL, ClusteringParameters +from .clustering.pipeline import run_clustering_pipeline +from .clustering.sweep import ( + SweepCandidateResult, + iter_sweep_candidates, + rank_sweep_results, + run_digest, + score_clustering_result, +) +from .contracts import ( + ClusterAssignmentRecord, + ClusteringRunRecord, + ClusterSummaryRecord, + CorpusItemRecord, +) +from .corpus.snapshot import build_intent_snapshot +from .embedding.generation import ( + EmbeddingBatchResult, + generate_embeddings_for_snapshot, + load_snapshot_vectors, +) +from .exceptions import AnalyticsWorkflowError +from .store.protocols import SnapshotBuildResult +from .store.sqlite import SqliteCorpusAnalyticsStore +from .store.vectors_lancedb import AnalyticsVectorStore + + +@dataclass(frozen=True, slots=True) +class ClusterRunResult: + clustering_run_id: str + cluster_count: int + noise_count: int + + +@dataclass(frozen=True, slots=True) +class BuildResult: + snapshot_id: str + embedding_generation_id: str + clustering_run_ids: tuple[str, ...] + recommended_run_id: str | None + + +def run_snapshot( + *, + root_path: Path, + representation_kind: str, + config: AnalyticsConfig | None = None, +) -> SnapshotBuildResult: + with operation(name="analytics.snapshot", surface="cli"): + return build_intent_snapshot( + root_path=root_path, + representation_kind=representation_kind, + config=config, + ) + + +def run_embed( + *, + root_path: Path, + snapshot_id: str, + config: AnalyticsConfig | None = None, +) -> EmbeddingBatchResult: + resolved_config = config or resolve_analytics_config(root_path) + store = SqliteCorpusAnalyticsStore.open(resolved_config.db_path) + vector_store = AnalyticsVectorStore( + path=resolved_config.vectors_path, + dimension=resolved_config.embedding_dimension, + ) + try: + if store.get_snapshot(snapshot_id) is None: + known = ", ".join(item.snapshot_id for item in store.list_snapshots()[:5]) + msg = f"unknown snapshot: {snapshot_id}" + if known: + msg = f"{msg}; known snapshots: {known}" + raise AnalyticsWorkflowError(msg) + with operation(name="analytics.embed", surface="cli"): + return generate_embeddings_for_snapshot( + store=store, + vector_store=vector_store, + config=resolved_config, + snapshot_id=snapshot_id, + ) + finally: + store.close() + vector_store.close() + + +def run_clustering( + *, + root_path: Path, + snapshot_id: str, + embedding_generation_id: str, + requested: ClusteringParameters | None = None, + sweep: bool = False, + config: AnalyticsConfig | None = None, +) -> tuple[str, ...]: + resolved_config = config or resolve_analytics_config(root_path) + store = SqliteCorpusAnalyticsStore.open(resolved_config.db_path) + vector_store = AnalyticsVectorStore( + path=resolved_config.vectors_path, + dimension=resolved_config.embedding_dimension, + ) + try: + with operation(name="analytics.cluster", surface="cli"): + items = store.list_items(snapshot_id) + if not items: + raise AnalyticsWorkflowError("snapshot has no corpus items") + vectors = load_snapshot_vectors( + vector_store=vector_store, + embedding_generation_id=embedding_generation_id, + items=items, + ) + item_ids = [item.snapshot_item_id for item in items] + if sweep: + return _run_sweep( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + item_ids=item_ids, + items=items, + vectors=vectors, + config=resolved_config, + ) + params = requested or ClusteringParameters( + pca_dimensions=resolved_config.default_pca_dimensions, + min_cluster_size=resolved_config.default_min_cluster_size, + min_samples=resolved_config.default_min_samples, + cluster_selection_method=resolved_config.default_cluster_selection_method, + ) + run_id = _execute_single_run( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + item_ids=item_ids, + items=items, + vectors=vectors, + requested=params, + config=resolved_config, + recommended_by_heuristic=False, + ) + store.commit() + return (run_id,) + finally: + store.close() + vector_store.close() + + +def select_cluster_run( + *, + root_path: Path, + clustering_run_id: str, + config: AnalyticsConfig | None = None, +) -> None: + resolved_config = config or resolve_analytics_config(root_path) + store = SqliteCorpusAnalyticsStore.open(resolved_config.db_path) + try: + run = store.get_clustering_run(clustering_run_id) + if run is None: + raise AnalyticsWorkflowError(f"unknown clustering run: {clustering_run_id}") + store.set_selected_run( + snapshot_id=run.snapshot_id, + embedding_generation_id=run.embedding_generation_id, + clustering_run_id=clustering_run_id, + ) + store.commit() + finally: + store.close() + + +def run_build( + *, + root_path: Path, + representation_kind: str, + sweep: bool = False, + use_recommended: bool = False, + config: AnalyticsConfig | None = None, +) -> BuildResult: + resolved_config = config or resolve_analytics_config(root_path) + with operation(name="analytics.build", surface="cli"): + snapshot = run_snapshot( + root_path=root_path, + representation_kind=representation_kind, + config=resolved_config, + ) + embed = run_embed( + root_path=root_path, + snapshot_id=snapshot.snapshot_id, + config=resolved_config, + ) + run_ids = run_clustering( + root_path=root_path, + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embed.embedding_generation_id, + sweep=sweep, + config=resolved_config, + ) + recommended: str | None = None + if sweep and run_ids: + store = SqliteCorpusAnalyticsStore.open(resolved_config.db_path) + try: + runs = store.list_clustering_runs( + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embed.embedding_generation_id, + ) + for run in runs: + if run.recommended_by_heuristic: + recommended = run.clustering_run_id + break + finally: + store.close() + if use_recommended and recommended is None and run_ids: + recommended = run_ids[0] + return BuildResult( + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embed.embedding_generation_id, + clustering_run_ids=run_ids, + recommended_run_id=recommended, + ) + + +def _run_sweep( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + embedding_generation_id: str, + item_ids: list[str], + items: Sequence[CorpusItemRecord], + vectors: list[list[float]], + config: AnalyticsConfig, +) -> tuple[str, ...]: + candidates = iter_sweep_candidates( + n_samples=len(item_ids), + n_features=len(vectors[0]) if vectors else 0, + ) + run_ids: list[str] = [] + scored: list[SweepCandidateResult] = [] + for candidate in candidates: + run_id = _execute_single_run( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + item_ids=item_ids, + items=items, + vectors=vectors, + requested=candidate.requested, + config=config, + recommended_by_heuristic=False, + ) + run_ids.append(run_id) + result = store.get_clustering_run(run_id) + if result is None: + continue + assignments = store.list_assignments(run_id) + noise = sum(1 for item in assignments if item.cluster_label == NOISE_LABEL) + cluster_labels = { + item.cluster_label + for item in assignments + if item.cluster_label != NOISE_LABEL + } + scored.append( + SweepCandidateResult( + candidate=candidate, + score=score_clustering_result( + cluster_count=len(cluster_labels), + noise_fraction=noise / len(assignments) if assignments else 1.0, + n_samples=len(assignments), + ), + cluster_count=len(cluster_labels), + noise_fraction=noise / len(assignments) if assignments else 1.0, + ) + ) + best = rank_sweep_results(scored) + if best is not None and run_ids: + best_run_id = run_ids[scored.index(best)] + store.set_recommended_run( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + clustering_run_id=best_run_id, + ) + store.commit() + return tuple(run_ids) + + +def _execute_single_run( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + embedding_generation_id: str, + item_ids: list[str], + items: Sequence[CorpusItemRecord], + vectors: list[list[float]], + requested: ClusteringParameters, + config: AnalyticsConfig, + recommended_by_heuristic: bool, +) -> str: + pipeline = run_clustering_pipeline( + snapshot_item_ids=item_ids, + embeddings=vectors, + requested=requested, + random_seed=config.cluster_random_seed, + ) + if pipeline is None: + raise AnalyticsWorkflowError("clustering parameters produced no valid run") + run_id = f"run-{uuid.uuid4().hex[:16]}" + created_at = current_report_timestamp_utc() + effective = pipeline.effective_parameters + run = ClusteringRunRecord( + clustering_run_id=run_id, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + requested_parameters_json=json_text( + { + "pca_dimensions": requested.pca_dimensions, + "min_cluster_size": requested.min_cluster_size, + "min_samples": requested.min_samples, + "cluster_selection_method": requested.cluster_selection_method, + }, + sort_keys=True, + ), + effective_parameters_json=json_text( + { + "pca_dimensions": effective.pca_dimensions, + "min_cluster_size": effective.min_cluster_size, + "min_samples": effective.min_samples, + "cluster_selection_method": effective.cluster_selection_method, + "n_samples": effective.n_samples, + "n_features": effective.n_features, + }, + sort_keys=True, + ), + random_seed=config.cluster_random_seed, + run_digest=run_digest( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + effective=effective, + random_seed=config.cluster_random_seed, + ), + recommended_by_heuristic=recommended_by_heuristic, + selected_by_maintainer=False, + status="completed", + created_at_utc=created_at, + finished_at_utc=current_report_timestamp_utc(), + error_message=None, + ) + store.insert_clustering_run(run) + partitions = canonicalize_partitions(pipeline.partitions) + membership_map = partition_membership_map(partitions) + coordinates = dict(zip(item_ids, pipeline.reduced_coordinates, strict=True)) + items_by_id = {item.snapshot_item_id: item for item in items} + strength_by_id = dict(zip(item_ids, pipeline.membership_strengths, strict=True)) + assignments: list[ClusterAssignmentRecord] = [] + for item_id, label, strength in zip( + item_ids, + pipeline.labels, + pipeline.membership_strengths, + strict=True, + ): + assignments.append( + ClusterAssignmentRecord( + clustering_run_id=run_id, + snapshot_item_id=item_id, + cluster_label=label, + membership_strength=strength, + membership_digest=membership_map.get(item_id, ""), + ) + ) + store.insert_cluster_assignments(assignments) + display_map = display_cluster_id_map(partitions) + centroids = compute_centroids(partitions=partitions, coordinates=coordinates) + summaries: list[ClusterSummaryRecord] = [] + for partition in partitions: + diagnostics = build_cluster_diagnostics( + partition=partition, + items_by_id=items_by_id, + coordinates=coordinates, + membership_strengths=strength_by_id, + total_items=len(items), + min_correlation_sample_size=config.min_correlation_sample_size, + ) + if partition.cluster_label != NOISE_LABEL: + diagnostics["nearest_clusters"] = list( + nearest_cluster_ids( + cluster_label=partition.cluster_label, + centroids=centroids, + ) + ) + summaries.append( + ClusterSummaryRecord( + clustering_run_id=run_id, + cluster_label=partition.cluster_label, + display_cluster_id=display_map.get(partition.cluster_label), + membership_digest=partition.membership_digest, + size=len(partition.snapshot_item_ids), + diagnostics_json=json_text(diagnostics, sort_keys=True), + ) + ) + store.insert_cluster_summaries(summaries) + return run_id + + +__all__ = [ + "BuildResult", + "ClusterRunResult", + "run_build", + "run_clustering", + "run_embed", + "run_snapshot", + "select_cluster_run", +] diff --git a/codeclone/audit/reader.py b/codeclone/audit/reader.py index ca186ab3..c0ba8c52 100644 --- a/codeclone/audit/reader.py +++ b/codeclone/audit/reader.py @@ -269,6 +269,40 @@ def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: ) +def read_intent_declared_records( + *, + db_path: Path, + repo_root_digest: str, +) -> tuple[AuditRecord, ...]: + """Return audit ``intent.declared`` rows for a repository, ordered by id ASC.""" + + from .events import EVENT_INTENT_DECLARED + + if not db_path.is_file(): + return () + try: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: + raise AuditReadError(f"cannot open audit database: {exc}") from exc + try: + rows = conn.execute( + "SELECT id, event_id, event_type, severity, created_at_utc, run_id, " + "intent_id, report_digest, workflow_id, surface, tool_name, " + "event_core_json, event_core_sha256, payload_sha256, " + "status, agent_label, summary, " + "estimated_tokens, token_encoding, payload_characters, payload_json " + "FROM controller_events " + "WHERE repo_root_digest = ? AND event_type = ? " + "ORDER BY id ASC", + (repo_root_digest, EVENT_INTENT_DECLARED), + ).fetchall() + except (sqlite3.Error, AuditSchemaError) as exc: + raise AuditReadError(f"cannot read audit database: {exc}") from exc + finally: + conn.close() + return tuple(_record_from_row(row) for row in rows) + + def read_audit_event_core_records( *, db_path: Path, @@ -741,5 +775,6 @@ def _short_run_id(run_id: str | None, payload: Mapping[str, object]) -> str | No "payload_footprint_to_dict", "read_audit_event_core_records", "read_audit_summary", + "read_intent_declared_records", "read_latest_analysis_run", ] diff --git a/codeclone/audit/schema.py b/codeclone/audit/schema.py index 867031ac..af85cf87 100644 --- a/codeclone/audit/schema.py +++ b/codeclone/audit/schema.py @@ -14,8 +14,6 @@ from ..utils.sqlite_store import ( get_meta_value, initialize_schema_v1, - open_sqlite_db, - open_sqlite_db_readonly, ) from .validation import AUDIT_SCHEMA_VERSION, AuditSchemaError @@ -118,21 +116,19 @@ def open_audit_db(path: Path) -> sqlite3.Connection: - conn = open_sqlite_db(path, ensure_schema=ensure_schema) - from ..observability import instrument_db_connection + from ..observability.sqlite_access import open_instrumented_sqlite_db - instrument_db_connection(conn) - return conn + return open_instrumented_sqlite_db(path, ensure_schema=ensure_schema) def open_audit_db_readonly(path: Path) -> sqlite3.Connection: """Open a structurally readable audit database without mutating it.""" + from ..observability.sqlite_access import open_instrumented_sqlite_db_readonly - conn = open_sqlite_db_readonly(path, validate_schema=_validate_readonly_schema) - from ..observability import instrument_db_connection - - instrument_db_connection(conn) - return conn + return open_instrumented_sqlite_db_readonly( + path, + validate_schema=_validate_readonly_schema, + ) def ensure_schema(conn: sqlite3.Connection) -> None: diff --git a/codeclone/config/analytics.py b/codeclone/config/analytics.py new file mode 100644 index 00000000..2f988464 --- /dev/null +++ b/codeclone/config/analytics.py @@ -0,0 +1,174 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from pydantic import BaseModel, ConfigDict, Field, ValidationError + +from ..utils.repo_paths import RepoPathPolicy, resolve_under_repo_root +from .analytics_specs import ANALYTICS_NESTED_TABLE_KEY +from .memory import resolve_memory_config +from .pyproject_loader import load_pyproject_config + +DEFAULT_ANALYTICS_DB_RELATIVE = ".codeclone/analytics/corpus_clustering.sqlite3" +DEFAULT_ANALYTICS_VECTORS_RELATIVE = ".codeclone/analytics/corpus_vectors" +DEFAULT_EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5" +DEFAULT_EMBEDDING_DIMENSION = 384 +DEFAULT_EMBEDDING_PROVIDER = "fastembed" +DEFAULT_MIN_CORRELATION_SAMPLE_SIZE = 5 +DEFAULT_CLUSTER_RANDOM_SEED = 42 +DEFAULT_PCA_DIMENSIONS = 64 +DEFAULT_MIN_CLUSTER_SIZE = 8 +DEFAULT_MIN_SAMPLES = 3 +DEFAULT_CLUSTER_SELECTION_METHOD = "eom" + + +class AnalyticsPyprojectTable(BaseModel): + """Validated ``[tool.codeclone.analytics]`` table.""" + + model_config = ConfigDict(extra="forbid", frozen=True) + + db_path: str | None = None + vectors_path: str | None = None + embedding_model: str | None = None + embedding_dimension: int | None = Field(default=None, gt=0) + embedding_provider: str | None = None + embedding_cache_dir: str | None = None + min_correlation_sample_size: int | None = Field(default=None, gt=0) + cluster_random_seed: int | None = None + default_pca_dimensions: int | None = Field(default=None, gt=0) + default_min_cluster_size: int | None = Field(default=None, gt=0) + default_min_samples: int | None = Field(default=None, gt=0) + default_cluster_selection_method: str | None = None + allow_model_download: bool | None = None + + +@dataclass(frozen=True, slots=True) +class AnalyticsConfig: + db_path: Path + vectors_path: Path + embedding_model: str + embedding_dimension: int + embedding_provider: str + embedding_cache_dir: Path + min_correlation_sample_size: int + cluster_random_seed: int + default_pca_dimensions: int + default_min_cluster_size: int + default_min_samples: int + default_cluster_selection_method: str + allow_model_download: bool + + +def _resolve_path(root_path: Path, raw: str | None, default_relative: str) -> Path: + policy = RepoPathPolicy(allow_absolute=True) + selected = raw if raw is not None else default_relative + return resolve_under_repo_root(root_path, selected, policy=policy) + + +def _load_pyproject_table(root_path: Path) -> AnalyticsPyprojectTable | None: + payload = load_pyproject_config(root_path) + raw = payload.get(ANALYTICS_NESTED_TABLE_KEY) + if raw is None: + return None + if not isinstance(raw, dict): + msg = "tool.codeclone.analytics must be a table" + raise TypeError(msg) + try: + return AnalyticsPyprojectTable.model_validate(raw) + except ValidationError as exc: + raise ValueError(str(exc)) from exc + + +def resolve_analytics_config(root_path: Path) -> AnalyticsConfig: + resolved_root = root_path.resolve() + table = _load_pyproject_table(resolved_root) + # The FastEmbed model artifact is a multi-hundred-MB download; analytics + # vectors are kept separate (own LanceDB sidecar + embedding_generation_id), + # but the model weights are shared with Engineering Memory rather than + # re-downloaded into a second cache. Default the model cache + download + # policy to the resolved memory semantic config (single source of truth). + memory_semantic = resolve_memory_config(resolved_root).semantic + return AnalyticsConfig( + db_path=_resolve_path( + resolved_root, + table.db_path if table is not None else None, + DEFAULT_ANALYTICS_DB_RELATIVE, + ), + vectors_path=_resolve_path( + resolved_root, + table.vectors_path if table is not None else None, + DEFAULT_ANALYTICS_VECTORS_RELATIVE, + ), + embedding_model=( + table.embedding_model + if table is not None and table.embedding_model is not None + else DEFAULT_EMBEDDING_MODEL + ), + embedding_dimension=( + table.embedding_dimension + if table is not None and table.embedding_dimension is not None + else DEFAULT_EMBEDDING_DIMENSION + ), + embedding_provider=( + table.embedding_provider + if table is not None and table.embedding_provider is not None + else DEFAULT_EMBEDDING_PROVIDER + ), + embedding_cache_dir=_resolve_path( + resolved_root, + table.embedding_cache_dir if table is not None else None, + memory_semantic.embedding_cache_dir, + ), + min_correlation_sample_size=( + table.min_correlation_sample_size + if table is not None and table.min_correlation_sample_size is not None + else DEFAULT_MIN_CORRELATION_SAMPLE_SIZE + ), + cluster_random_seed=( + table.cluster_random_seed + if table is not None and table.cluster_random_seed is not None + else DEFAULT_CLUSTER_RANDOM_SEED + ), + default_pca_dimensions=( + table.default_pca_dimensions + if table is not None and table.default_pca_dimensions is not None + else DEFAULT_PCA_DIMENSIONS + ), + default_min_cluster_size=( + table.default_min_cluster_size + if table is not None and table.default_min_cluster_size is not None + else DEFAULT_MIN_CLUSTER_SIZE + ), + default_min_samples=( + table.default_min_samples + if table is not None and table.default_min_samples is not None + else DEFAULT_MIN_SAMPLES + ), + default_cluster_selection_method=( + table.default_cluster_selection_method + if table is not None and table.default_cluster_selection_method is not None + else DEFAULT_CLUSTER_SELECTION_METHOD + ), + allow_model_download=( + table.allow_model_download + if table is not None and table.allow_model_download is not None + else memory_semantic.allow_model_download + ), + ) + + +__all__ = [ + "DEFAULT_ANALYTICS_DB_RELATIVE", + "DEFAULT_ANALYTICS_VECTORS_RELATIVE", + "DEFAULT_MIN_CORRELATION_SAMPLE_SIZE", + "AnalyticsConfig", + "AnalyticsPyprojectTable", + "resolve_analytics_config", +] diff --git a/codeclone/config/analytics_specs.py b/codeclone/config/analytics_specs.py new file mode 100644 index 00000000..663412ec --- /dev/null +++ b/codeclone/config/analytics_specs.py @@ -0,0 +1,24 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Final + +ANALYTICS_NESTED_TABLE_KEY: Final = "analytics" + +ANALYTICS_PATH_CONFIG_KEYS: Final = frozenset( + { + "db_path", + "vectors_path", + "embedding_cache_dir", + } +) + +__all__ = [ + "ANALYTICS_NESTED_TABLE_KEY", + "ANALYTICS_PATH_CONFIG_KEYS", +] diff --git a/codeclone/config/pyproject_loader.py b/codeclone/config/pyproject_loader.py index 015f0260..2b81aaaa 100644 --- a/codeclone/config/pyproject_loader.py +++ b/codeclone/config/pyproject_loader.py @@ -15,6 +15,10 @@ GoldenFixturePatternError, normalize_golden_fixture_patterns, ) +from .analytics_specs import ( + ANALYTICS_NESTED_TABLE_KEY, + ANALYTICS_PATH_CONFIG_KEYS, +) from .memory_specs import ( INGEST_NESTED_TABLE_KEY, MEMORY_CONFIG_KEY_SPECS, @@ -129,7 +133,9 @@ def load_pyproject_config( ) unknown = sorted( - set(codeclone_obj.keys()) - set(config_key_specs) - {MEMORY_NESTED_TABLE_KEY} + set(codeclone_obj.keys()) + - set(config_key_specs) + - {MEMORY_NESTED_TABLE_KEY, ANALYTICS_NESTED_TABLE_KEY} ) if unknown: raise ConfigValidationError( @@ -138,7 +144,7 @@ def load_pyproject_config( validated: dict[str, object] = {} for key in sorted(codeclone_obj.keys()): - if key == MEMORY_NESTED_TABLE_KEY: + if key in {MEMORY_NESTED_TABLE_KEY, ANALYTICS_NESTED_TABLE_KEY}: continue value = validate_config_value( key=key, @@ -159,9 +165,42 @@ def load_pyproject_config( root_path=root_path, config_path=config_path, ) + analytics_obj = codeclone_obj.get(ANALYTICS_NESTED_TABLE_KEY) + if analytics_obj is not None: + validated[ANALYTICS_NESTED_TABLE_KEY] = _validate_nested_analytics_table( + analytics_obj=analytics_obj, + root_path=root_path, + config_path=config_path, + ) return validated +def _validate_nested_analytics_table( + *, + analytics_obj: object, + root_path: Path, + config_path: Path, +) -> dict[str, object]: + if not isinstance(analytics_obj, dict): + raise ConfigValidationError( + "Invalid pyproject payload at " + f"{config_path}: 'tool.codeclone.analytics' must be object" + ) + normalized: dict[str, object] = {} + for key in sorted(analytics_obj.keys()): + value = analytics_obj[key] + if key in ANALYTICS_PATH_CONFIG_KEYS and isinstance(value, str): + normalized[key] = normalize_path_config_value( + key=key, + value=value, + root_path=root_path, + path_config_keys=ANALYTICS_PATH_CONFIG_KEYS, + ) + else: + normalized[key] = value + return normalized + + def _validate_nested_memory_table( *, memory_obj: object, diff --git a/codeclone/contracts/__init__.py b/codeclone/contracts/__init__.py index 51085ee6..10d1d202 100644 --- a/codeclone/contracts/__init__.py +++ b/codeclone/contracts/__init__.py @@ -39,6 +39,16 @@ # IDE governance HMAC attestation protocol version (VS Code Memory channel). IDE_GOVERNANCE_PROTOCOL_VERSION: Final = 2 +# Corpus analytics store (.codeclone/analytics/corpus_clustering.sqlite3) and +# derived export/representation contracts. Bump independently from memory schema. +CORPUS_ANALYTICS_STORE_SCHEMA_VERSION: Final = "1.0" +CORPUS_EXPORT_SCHEMA_VERSION: Final = "1.0" +CORPUS_REPRESENTATION_CONTRACT_VERSION: Final = "1" +CORPUS_NORMALIZER_VERSION: Final = "1" +CORPUS_EMBEDDING_CONTRACT_VERSION: Final = "1" +CORPUS_AGENT_LABEL_CONTRACT_VERSION: Final = "1" +CORPUS_PARTITION_MAP_VERSION: Final = "1" + DEFAULT_COMPLEXITY_THRESHOLD: Final = 20 DEFAULT_COUPLING_THRESHOLD: Final = 10 DEFAULT_COHESION_THRESHOLD: Final = 4 @@ -123,6 +133,13 @@ def cli_help_epilog() -> str: "COHESION_RISK_MEDIUM_MAX", "COMPLEXITY_RISK_LOW_MAX", "COMPLEXITY_RISK_MEDIUM_MAX", + "CORPUS_AGENT_LABEL_CONTRACT_VERSION", + "CORPUS_ANALYTICS_STORE_SCHEMA_VERSION", + "CORPUS_EMBEDDING_CONTRACT_VERSION", + "CORPUS_EXPORT_SCHEMA_VERSION", + "CORPUS_NORMALIZER_VERSION", + "CORPUS_PARTITION_MAP_VERSION", + "CORPUS_REPRESENTATION_CONTRACT_VERSION", "COUPLING_RISK_LOW_MAX", "COUPLING_RISK_MEDIUM_MAX", "DEFAULT_BASELINE_PATH", diff --git a/codeclone/memory/schema.py b/codeclone/memory/schema.py index 8015d79f..512ea69b 100644 --- a/codeclone/memory/schema.py +++ b/codeclone/memory/schema.py @@ -14,7 +14,6 @@ from ..report.meta import current_report_timestamp_utc from ..utils.sqlite_store import ( initialize_schema_v1, - open_sqlite_db, ) from .exceptions import MemorySchemaError from .schema_experience import ( @@ -231,20 +230,21 @@ def open_memory_db(path: Path) -> sqlite3.Connection: - # synchronous=FULL: every commit survives unclean process exit. - # Memory records are few, each governance-governed and valuable. - conn = open_sqlite_db( + from ..observability.sqlite_access import open_instrumented_sqlite_db + + return open_instrumented_sqlite_db( path, ensure_schema=ensure_schema, foreign_keys=True, synchronous="FULL", ) - # Performance telemetry only: count SQL per active observability span so the - # cockpit can attribute span cost to DB work. No-op when disabled. - from ..observability import instrument_db_connection - instrument_db_connection(conn) - return conn + +def open_memory_db_readonly(path: Path) -> sqlite3.Connection: + """Open an existing engineering-memory database without allowing writes.""" + from ..observability.sqlite_access import open_instrumented_sqlite_db_readonly + + return open_instrumented_sqlite_db_readonly(path, validate_schema=ensure_schema) def ensure_schema(conn: sqlite3.Connection) -> None: @@ -293,5 +293,6 @@ def create_schema_v1(conn: sqlite3.Connection) -> None: "ensure_schema", "get_meta", "open_memory_db", + "open_memory_db_readonly", "set_meta", ] diff --git a/codeclone/memory/trajectory/store.py b/codeclone/memory/trajectory/store.py index a65b46bd..c0c95ddd 100644 --- a/codeclone/memory/trajectory/store.py +++ b/codeclone/memory/trajectory/store.py @@ -500,6 +500,24 @@ def list_trajectories_for_subjects( return _find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) +def list_trajectories_for_intent_id( + conn: sqlite3.Connection, + *, + project_id: str, + intent_id: str, +) -> tuple[Trajectory, ...]: + rows = conn.execute( + """ + SELECT id + FROM memory_trajectories + WHERE project_id=? AND intent_id=? + ORDER BY finished_at_utc DESC, id ASC + """, + (project_id, intent_id), + ).fetchall() + return tuple(_find_trajectories_by_ids(conn, [str(row["id"]) for row in rows])) + + def search_trajectories( conn: sqlite3.Connection, *, @@ -915,6 +933,7 @@ def load_trajectory_patch_trails( "find_trajectory", "latest_projection_run", "list_trajectories", + "list_trajectories_for_intent_id", "list_trajectories_for_subjects", "load_trajectory_patch_trail", "load_trajectory_patch_trails", diff --git a/codeclone/observability/sqlite_access.py b/codeclone/observability/sqlite_access.py new file mode 100644 index 00000000..372ea71b --- /dev/null +++ b/codeclone/observability/sqlite_access.py @@ -0,0 +1,52 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Shared SQLite open helpers with observability instrumentation.""" + +from __future__ import annotations + +import sqlite3 +from collections.abc import Callable +from pathlib import Path + +from ..utils.sqlite_store import open_sqlite_db, open_sqlite_db_readonly + + +def open_instrumented_sqlite_db( + path: Path, + *, + ensure_schema: Callable[[sqlite3.Connection], None], + foreign_keys: bool = False, + synchronous: str | None = None, +) -> sqlite3.Connection: + conn = open_sqlite_db( + path, + ensure_schema=ensure_schema, + foreign_keys=foreign_keys, + synchronous=synchronous, + ) + from codeclone.observability.runtime import instrument_db_connection + + instrument_db_connection(conn) + return conn + + +def open_instrumented_sqlite_db_readonly( + path: Path, + *, + validate_schema: Callable[[sqlite3.Connection], None], +) -> sqlite3.Connection: + conn = open_sqlite_db_readonly(path, validate_schema=validate_schema) + from codeclone.observability.runtime import instrument_db_connection + + instrument_db_connection(conn) + return conn + + +__all__ = [ + "open_instrumented_sqlite_db", + "open_instrumented_sqlite_db_readonly", +] diff --git a/codeclone/surfaces/cli/analytics.py b/codeclone/surfaces/cli/analytics.py new file mode 100644 index 00000000..1ff97977 --- /dev/null +++ b/codeclone/surfaces/cli/analytics.py @@ -0,0 +1,335 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Corpus analytics CLI subcommands.""" + +from __future__ import annotations + +import argparse +import sys +from collections.abc import Callable +from pathlib import Path + +from ...analytics.capabilities import ( + AnalyticsCapability, + check_capability, + install_hint, +) +from ...analytics.clustering.models import NOISE_LABEL +from ...analytics.contracts import ( + INTENT_REPRESENTATION_DESCRIPTION, + INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME, +) +from ...analytics.exceptions import AnalyticsCapabilityError, AnalyticsWorkflowError +from ...analytics.export.json_export import ( + export_clustering_json, + export_sweep_comparison_json, +) +from ...analytics.report.html import render_analytics_html +from ...analytics.store.sqlite import SqliteCorpusAnalyticsStore +from ...analytics.workflow import ( + BuildResult, + run_build, + run_clustering, + run_embed, + run_snapshot, + select_cluster_run, +) +from ...config.analytics import resolve_analytics_config +from ...contracts import ExitCode +from ...utils.json_io import write_json_document_atomically + + +def _representation_kind(raw: str) -> str: + if raw == "description": + return INTENT_REPRESENTATION_DESCRIPTION + if raw == "description_with_frame": + return INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME + msg = f"unsupported representation: {raw}" + raise AnalyticsWorkflowError(msg) + + +def _require_capability(capability: AnalyticsCapability) -> None: + status = check_capability(capability) + if not status.available: + missing = ", ".join(status.missing_packages) + raise AnalyticsCapabilityError( + f"missing analytics dependencies: {missing}. " + f"Install with: {install_hint(status.missing_packages)}" + ) + + +def _add_root(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--root", + default=".", + help="Repository root (default: .)", + ) + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="codeclone analytics") + sub = parser.add_subparsers(dest="command", required=True) + + snapshot = sub.add_parser("snapshot", help="Build immutable intent corpus snapshot") + _add_root(snapshot) + snapshot.add_argument( + "--representation", + choices=("description", "description_with_frame"), + default="description", + ) + snapshot.add_argument("--output-json", type=Path, default=None) + + embed = sub.add_parser("embed", help="Generate analytics embeddings for snapshot") + _add_root(embed) + embed.add_argument("--snapshot-id", required=True) + + cluster = sub.add_parser("cluster", help="Cluster embedded snapshot") + _add_root(cluster) + cluster.add_argument("--snapshot-id", required=True) + cluster.add_argument("--embedding-generation-id", required=True) + cluster.add_argument("--sweep", action="store_true") + cluster.add_argument("--select-run", dest="select_run", default=None) + + build = sub.add_parser("build", help="Snapshot, embed, and cluster end-to-end") + _add_root(build) + build.add_argument( + "--lane", + choices=("intent",), + default="intent", + ) + build.add_argument( + "--representation", + choices=("description", "description_with_frame"), + default="description", + ) + build.add_argument("--sweep", action="store_true") + build.add_argument("--use-recommended", action="store_true") + build.add_argument("--html-out", type=Path, default=None) + build.add_argument("--json-out", type=Path, default=None) + + clusters = sub.add_parser("clusters", help="List clustering runs for snapshot") + _add_root(clusters) + clusters.add_argument("--snapshot-id", required=True) + + cluster_show = sub.add_parser("cluster-show", help="Export one clustering run JSON") + _add_root(cluster_show) + cluster_show.add_argument("--snapshot-id", required=True) + cluster_show.add_argument("--run-id", required=True) + cluster_show.add_argument("--output", type=Path, default=None) + + outliers = sub.add_parser("outliers", help="Show noise cluster assignments") + _add_root(outliers) + outliers.add_argument("--snapshot-id", required=True) + outliers.add_argument("--run-id", required=True) + + return parser + + +def _run_snapshot_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("base") + snapshot_result = run_snapshot( + root_path=root, + representation_kind=_representation_kind(args.representation), + ) + payload = { + "snapshot_id": snapshot_result.snapshot_id, + "source_digest": snapshot_result.source_digest, + "record_count": snapshot_result.record_count, + } + if args.output_json is not None: + write_json_document_atomically(args.output_json, payload) + else: + print(payload) + return ExitCode.SUCCESS + + +def _run_embed_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("embed") + embed_result = run_embed(root_path=root, snapshot_id=args.snapshot_id) + print( + { + "embedding_generation_id": embed_result.embedding_generation_id, + "item_count": embed_result.item_count, + } + ) + return ExitCode.SUCCESS + + +def _run_cluster_command(args: argparse.Namespace, root: Path) -> int: + if args.select_run: + _require_capability("base") + select_cluster_run(root_path=root, clustering_run_id=args.select_run) + print({"selected_run_id": args.select_run}) + return ExitCode.SUCCESS + _require_capability("cluster") + run_ids = run_clustering( + root_path=root, + snapshot_id=args.snapshot_id, + embedding_generation_id=args.embedding_generation_id, + sweep=args.sweep, + ) + print({"clustering_run_ids": list(run_ids)}) + return ExitCode.SUCCESS + + +def _write_build_exports( + *, + args: argparse.Namespace, + root: Path, + build_result: BuildResult, +) -> None: + config = resolve_analytics_config(root) + store = SqliteCorpusAnalyticsStore.open(config.db_path) + try: + snapshot = store.get_snapshot(build_result.snapshot_id) + if snapshot is None: + raise AnalyticsWorkflowError("snapshot missing after build") + primary_run_id = build_result.recommended_run_id or ( + build_result.clustering_run_ids[0] + if build_result.clustering_run_ids + else None + ) + if args.json_out is not None and primary_run_id is not None: + if args.sweep and not args.use_recommended: + text = export_sweep_comparison_json( + store=store, + snapshot_id=build_result.snapshot_id, + embedding_generation_id=build_result.embedding_generation_id, + ) + else: + text = export_clustering_json( + store=store, + snapshot_id=build_result.snapshot_id, + clustering_run_id=primary_run_id, + ) + args.json_out.write_text(text, encoding="utf-8") + if args.html_out is not None and primary_run_id is not None: + run = store.get_clustering_run(primary_run_id) + if run is None: + raise AnalyticsWorkflowError("clustering run missing after build") + html = render_analytics_html( + store=store, + snapshot=snapshot, + run=run, + comparison_only=args.sweep and not args.use_recommended, + ) + args.html_out.write_text(html, encoding="utf-8") + finally: + store.close() + + +def _run_build_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("full") + build_result = run_build( + root_path=root, + representation_kind=_representation_kind(args.representation), + sweep=args.sweep, + use_recommended=args.use_recommended, + ) + if args.json_out is not None or args.html_out is not None: + _write_build_exports(args=args, root=root, build_result=build_result) + print( + { + "snapshot_id": build_result.snapshot_id, + "embedding_generation_id": build_result.embedding_generation_id, + "clustering_run_ids": list(build_result.clustering_run_ids), + "recommended_run_id": build_result.recommended_run_id, + } + ) + return ExitCode.SUCCESS + + +def _run_clusters_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("base") + config = resolve_analytics_config(root) + store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) + try: + runs = store.list_clustering_runs(snapshot_id=args.snapshot_id) + print( + [ + { + "clustering_run_id": run.clustering_run_id, + "recommended_by_heuristic": run.recommended_by_heuristic, + "selected_by_maintainer": run.selected_by_maintainer, + "status": run.status, + } + for run in runs + ] + ) + finally: + store.close() + return ExitCode.SUCCESS + + +def _run_cluster_show_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("base") + config = resolve_analytics_config(root) + store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) + try: + text = export_clustering_json( + store=store, + snapshot_id=args.snapshot_id, + clustering_run_id=args.run_id, + ) + if args.output is not None: + args.output.write_text(text, encoding="utf-8") + else: + print(text) + finally: + store.close() + return ExitCode.SUCCESS + + +def _run_outliers_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("base") + config = resolve_analytics_config(root) + store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) + try: + assignments = store.list_assignments(args.run_id) + noise = [ + item.snapshot_item_id + for item in assignments + if item.cluster_label == NOISE_LABEL + ] + print({"noise_items": noise}) + finally: + store.close() + return ExitCode.SUCCESS + + +_CommandHandler = Callable[[argparse.Namespace, Path], int] + +_COMMAND_HANDLERS: dict[str, _CommandHandler] = { + "snapshot": _run_snapshot_command, + "embed": _run_embed_command, + "cluster": _run_cluster_command, + "build": _run_build_command, + "clusters": _run_clusters_command, + "cluster-show": _run_cluster_show_command, + "outliers": _run_outliers_command, +} + + +def analytics_main(argv: list[str] | None = None) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + root = Path(args.root).resolve() + handler = _COMMAND_HANDLERS.get(args.command) + if handler is None: + parser.error(f"unknown command: {args.command}") + return ExitCode.INTERNAL_ERROR + try: + return handler(args, root) + except AnalyticsCapabilityError as exc: + print(str(exc), file=sys.stderr) + return ExitCode.CONTRACT_ERROR + except AnalyticsWorkflowError as exc: + print(str(exc), file=sys.stderr) + return ExitCode.CONTRACT_ERROR + + +__all__ = ["analytics_main"] diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index 920fc245..822bd168 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -806,6 +806,10 @@ def _report_digest_from_document(report_document: dict[str, object]) -> str: def main() -> None: + if len(sys.argv) > 1 and sys.argv[1] == "analytics": + from .analytics import analytics_main + + raise SystemExit(analytics_main(sys.argv[2:])) if len(sys.argv) > 1 and sys.argv[1] == "memory": from .memory import memory_main diff --git a/codeclone/surfaces/mcp/_workspace_intent_schema.py b/codeclone/surfaces/mcp/_workspace_intent_schema.py index d4f93efb..146d50ef 100644 --- a/codeclone/surfaces/mcp/_workspace_intent_schema.py +++ b/codeclone/surfaces/mcp/_workspace_intent_schema.py @@ -14,8 +14,6 @@ from ...utils.sqlite_store import ( get_meta_value, initialize_schema_v1, - open_sqlite_db, - open_sqlite_db_readonly, ) INTENT_REGISTRY_SCHEMA_VERSION = "2" @@ -69,21 +67,18 @@ class IntentRegistrySchemaError(RuntimeError): def open_intent_registry_db(path: Path) -> sqlite3.Connection: - conn = open_sqlite_db(path, ensure_schema=ensure_schema) - from ...observability import instrument_db_connection + from ...observability.sqlite_access import open_instrumented_sqlite_db - instrument_db_connection(conn) - return conn + return open_instrumented_sqlite_db(path, ensure_schema=ensure_schema) def open_intent_registry_db_readonly(path: Path) -> sqlite3.Connection: - """Open a current registry without creating or migrating coordination state.""" + from ...observability.sqlite_access import open_instrumented_sqlite_db_readonly - conn = open_sqlite_db_readonly(path, validate_schema=_validate_readonly_schema) - from ...observability import instrument_db_connection - - instrument_db_connection(conn) - return conn + return open_instrumented_sqlite_db_readonly( + path, + validate_schema=_validate_readonly_schema, + ) def ensure_schema(conn: sqlite3.Connection) -> None: diff --git a/docs/README-pypi.md b/docs/README-pypi.md index 1430a370..911959ac 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -49,6 +49,7 @@ codeclone . --ci # CI mode - **Engineering Memory** — governed records, trajectory passports, and advisory Experiences - **MCP server** — 32-tool default interface for IDE and agent clients - **Platform Observability** — opt-in local diagnostics for CodeClone's own runtime +- **Corpus Analytics** — optional offline intent clustering (`codeclone[analytics]`) - **Reports** — HTML, JSON, Markdown, SARIF, text from one canonical payload ## MCP Server @@ -60,9 +61,11 @@ codeclone-mcp --transport stdio Native clients: VS Code extension, Claude Desktop bundle, Codex plugin. -Engineering Memory and runtime diagnostics: +Engineering Memory, Corpus Analytics, and runtime diagnostics: ```bash +uv tool install "codeclone[analytics]" +codeclone analytics build --root . --use-recommended codeclone memory trajectory dashboard --root . CODECLONE_OBSERVABILITY_ENABLED=1 codeclone . codeclone observability trace --root . --html /tmp/codeclone-observer.html @@ -73,6 +76,7 @@ codeclone observability trace --root . --html /tmp/codeclone-observer.html - Documentation: - Engineering Memory: - Platform Observability: +- Corpus Analytics: - Source: - Issues: diff --git a/docs/book/02-architecture-map.md b/docs/book/02-architecture-map.md index 2a03cecd..8055fb8b 100644 --- a/docs/book/02-architecture-map.md +++ b/docs/book/02-architecture-map.md @@ -44,6 +44,7 @@ Main ownership layers: | MCP surface | `codeclone/surfaces/mcp/*`, `codeclone/surfaces/mcp/messages/*` | Read-only MCP tools/resources, change-control projections, Engineering Memory retrieval/governance, dev-only Platform Observability slices, and centralized agent-facing copy | | Engineering Memory | `codeclone/memory/*`, `codeclone/config/memory*.py` | Local SQLite store, scoped retrieval, semantic sidecar, trajectory + Patch Trail projection, Experience distillation, coalesced rebuild jobs, staleness, governance, and CLI/MCP surfaces over deterministic report/git/doc/audit facts | | Platform Observability | `codeclone/observability/*` | Opt-in operation/span telemetry, local SQLite store, bounded MCP slicer, and CLI JSON/HTML diagnostics; never analysis truth or a gate input | +| Corpus Analytics | `codeclone/analytics/*`, `codeclone/config/analytics.py` | Optional offline intent corpus clustering (`codeclone[analytics]`); audit/trajectory ingestion, separate analytics embeddings, SQLite + LanceDB under `.codeclone/analytics/`; never report/gate/memory authority | | Controller insights | `codeclone/controller_insights/*` | Shared session-stats and audit-trail payloads for CLI `--session-stats` / `--audit` and IDE-only MCP `get_workspace_session_stats` / `get_controller_audit_trail` | | Audit trail | `codeclone/audit/*` | Optional controller event and MCP payload footprint recording under `.codeclone/db/` when enabled | | Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*`, `plugins/cursor-codeclone/*`, `plugins/claude-code-codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | @@ -148,6 +149,7 @@ Refs: | MCP agent surface | [25-mcp-interface/index.md](25-mcp-interface/index.md), [14-claim-guard.md](14-claim-guard.md) | | Engineering Memory evidence layers | [13-engineering-memory/index.md](13-engineering-memory/index.md), [13-engineering-memory/trajectory-quality-and-passport.md](13-engineering-memory/trajectory-quality-and-passport.md), [13-engineering-memory/experience-layer.md](13-engineering-memory/experience-layer.md) | | Platform runtime diagnostics | [26-platform-observability.md](26-platform-observability.md) | +| Corpus analytics (intent clustering) | [27-corpus-analytics.md](27-corpus-analytics.md) | | Health score model | [15-health-score.md](15-health-score.md) | | Metrics gates and metrics baseline | [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) | | Dead-code liveness policy | [17-dead-code-contract.md](17-dead-code-contract.md) | diff --git a/docs/book/10-config-and-defaults.md b/docs/book/10-config-and-defaults.md index 82e8ba0a..9adc384d 100644 --- a/docs/book/10-config-and-defaults.md +++ b/docs/book/10-config-and-defaults.md @@ -326,6 +326,7 @@ instead of duplicating tables. | MCP workspace intent TTL / lease | `resolved_ttl_seconds`, `resolved_lease_seconds` | Explicit MCP tool parameter > env > built-in default | | Finish hygiene strict mode | `_strict_finish_enabled` | Env only (no pyproject key) | | Platform Observability | `resolve_observability_config` | Env only; disabled by default, no pyproject table | +| Corpus Analytics | `resolve_analytics_config` | `[tool.codeclone.analytics]` > built-in defaults; no env overrides in Slice 1 | | Cursor / IDE hooks | hook helpers | Env > repo config file (where noted) > built-in default | There is no generic `CODECLONE_MEMORY__*` nested env convention. Each variable @@ -344,6 +345,20 @@ Platform Observability is environment-only and disabled by default. It has no [Platform Observability](26-platform-observability.md) for the data and trust contracts. +### Corpus Analytics + +Optional intent corpus clustering uses `[tool.codeclone.analytics]`. Install +`codeclone[analytics]` before running `codeclone analytics …`. Paths resolve +under the repository root. Full key list: +[Corpus Analytics](27-corpus-analytics.md#configuration). + +Refs: + +- `codeclone/config/analytics.py:resolve_analytics_config` +- `codeclone/config/pyproject_loader.py:load_pyproject_config` + +### Platform Observability (environment) + | Variable | Values | Effect | |-------------------------------------------------|----------------|---------------------------------------------------------------------| | `CODECLONE_OBSERVABILITY_ENABLED` | truthy / falsy | Enable local operation/span instrumentation. | diff --git a/docs/book/11-cli.md b/docs/book/11-cli.md index f384e1e8..4e968995 100644 --- a/docs/book/11-cli.md +++ b/docs/book/11-cli.md @@ -137,6 +137,14 @@ Refs: cockpit views. - A missing local store is an informational success state. - Full contract: [Platform Observability](26-platform-observability.md). +- Corpus Analytics commands are terminal-only, offline clustering of historical + intents (requires `codeclone[analytics]`): + - `codeclone analytics snapshot|embed|cluster|build|clusters|cluster-show|outliers` + - `build` runs snapshot → embed → cluster; `--use-recommended` selects the + heuristic sweep winner. + - Representations: `description` (default) or `description_with_frame`. + - Artifacts live under `.codeclone/analytics/` (SQLite metadata + LanceDB vectors). + - Full contract: [Corpus Analytics](27-corpus-analytics.md). - Controller and workspace query flags are mutually exclusive where enforced: - `--blast-radius` and `--patch-verify` cannot be combined. - `--strictness {ci,strict,relaxed}` is valid only with `--patch-verify`. diff --git a/docs/book/24-compatibility-and-versioning.md b/docs/book/24-compatibility-and-versioning.md index 0ddaba76..f31f9128 100644 --- a/docs/book/24-compatibility-and-versioning.md +++ b/docs/book/24-compatibility-and-versioning.md @@ -41,6 +41,10 @@ Current contract versions: - `EXPERIENCE_DISTILLATION_VERSION = "experience-v1"` (derived Experience rows) - `SEMANTIC_INDEX_FORMAT_VERSION = "1"` (LanceDB sidecar; separate from SQLite memory schema) - `PLATFORM_OBSERVABILITY_SCHEMA_VERSION = "1.0"` (dev-only telemetry SQLite) +- `CORPUS_ANALYTICS_STORE_SCHEMA_VERSION = "1.0"` (corpus analytics SQLite) +- `CORPUS_EXPORT_SCHEMA_VERSION = "1.0"` (clustering JSON export) +- `CORPUS_REPRESENTATION_CONTRACT_VERSION = "1"` (intent representation payloads) +- `CORPUS_EMBEDDING_CONTRACT_VERSION = "1"` (analytics embedding sidecar) Refs: @@ -71,6 +75,10 @@ Version bump rules: - bump **Platform Observability schema** only for incompatible telemetry-store changes; it remains separate from reports, gates, baselines, and memory facts (see [26-platform-observability.md](26-platform-observability.md)) +- bump **corpus analytics store/export/representation/embedding** versions when + SQLite layout or export semantics change incompatibly; rebuild analytics + artifacts rather than treating them as analysis truth ( + see [27-corpus-analytics.md](27-corpus-analytics.md)) Operational compatibility rules: diff --git a/docs/book/27-corpus-analytics.md b/docs/book/27-corpus-analytics.md new file mode 100644 index 00000000..e9dc504b --- /dev/null +++ b/docs/book/27-corpus-analytics.md @@ -0,0 +1,129 @@ +# Corpus Analytics + +Corpus Analytics is an optional, offline analytics lane for clustering historical +change-control intents. It reads audit, Engineering Memory trajectory, and optional +workspace intent registry overlays, builds immutable corpus snapshots, generates +**separate** analytics embeddings, and runs deterministic PCA + HDBSCAN clustering. + +It is **not** analysis truth: outputs never affect reports, gates, baselines, +cache compatibility, Engineering Memory records, or edit authorization. + +For a practical walkthrough, see the +[Corpus Analytics guide](../guide/analytics/overview.md). + +## Trust boundary + +```mermaid +flowchart LR + A["Audit DB
    intent.declared"] --> B["Corpus snapshot
    SQLite metadata"] + C["Trajectory / patch trail"] --> B + D["Optional registry overlay"] --> B + B --> E["Analytics embeddings
    LanceDB sidecar"] + E --> F["PCA + HDBSCAN clustering"] + F --> G["JSON / HTML export"] + B -. "must not influence" .-> H["Reports, gates, baseline, memory authority"] +``` + +Properties: + +- requires `codeclone[analytics]` optional dependencies; +- stores artifacts under `.codeclone/analytics/` (SQLite + LanceDB); +- uses dedicated contract versions in `codeclone/contracts/__init__.py` + (`CORPUS_ANALYTICS_STORE_SCHEMA_VERSION`, `CORPUS_EXPORT_SCHEMA_VERSION`, …); +- keeps analytics vectors separate from Engineering Memory semantic index, while + reusing the shared FastEmbed model artifact (not a second download); +- excludes live registry text from normalized corpus digests; +- opens all SQLite through `codeclone/observability/sqlite_access.py` (wrapping + `codeclone/utils/sqlite_store.py`) so connection-open is instrumented only when + observability is enabled (lazy import). + +## Install + +```bash +uv sync --extra analytics +# or +pip install "codeclone[analytics]" +``` + +Capability tiers: + +| Tier | Packages | Commands | +|-----------|----------------------------------|-----------------------------------| +| `base` | core only | snapshot metadata, list runs | +| `embed` | fastembed + lancedb | `embed`, vector IO | +| `cluster` | scikit-learn + hdbscan | `cluster`, sweep, diagnostics | +| `full` | all of the above | `build` end-to-end | + +`umap-learn` (in the `analytics` extra, Python versions before 3.14) is optional and +used only for the HTML report's 2-D visualization — never for clustering input. + +## Configuration + +`[tool.codeclone.analytics]` in `pyproject.toml` overrides repository-local +defaults. Paths resolve under the repository root unless absolute. + +| Key | Default | Role | +|-----------------------------------|-------------------------------------------------|-------------------------------------------| +| `db_path` | `.codeclone/analytics/corpus_clustering.sqlite3`| Snapshot / clustering metadata SQLite | +| `vectors_path` | `.codeclone/analytics/corpus_vectors` | Analytics LanceDB directory | +| `embedding_model` | `BAAI/bge-small-en-v1.5` | FastEmbed model id | +| `embedding_dimension` | `384` | Vector width contract | +| `embedding_provider` | `fastembed` | Embedding backend | +| `embedding_cache_dir` | inherits memory (`.codeclone/memory/fastembed`) | Shared FastEmbed model artifact cache | +| `min_correlation_sample_size` | `5` | Minimum sample size for correlation stats | +| `cluster_random_seed` | `42` | Deterministic clustering seed | +| `default_pca_dimensions` | `64` | PCA projection width | +| `default_min_cluster_size` | `8` | HDBSCAN `min_cluster_size` default | +| `default_min_samples` | `3` | HDBSCAN `min_samples` default | +| `default_cluster_selection_method`| `eom` | HDBSCAN selection method | +| `allow_model_download` | inherits memory (`false` by default) | FastEmbed may download the model when `true` | + +Resolver: `codeclone/config/analytics.py:resolve_analytics_config`. + +`embedding_cache_dir` and `allow_model_download` default to the resolved +`[tool.codeclone.memory.semantic]` values, so the FastEmbed model is downloaded +once and shared with Engineering Memory. Only the analytics **vectors** +(`vectors_path`) and snapshot/clustering metadata (`db_path`) live under +`.codeclone/analytics/`. + +## CLI + +Terminal-only commands under `codeclone analytics`: + +| Command | Purpose | +|-----------------|------------------------------------------------------| +| `snapshot` | Build immutable intent corpus snapshot | +| `embed` | Generate analytics embeddings for a snapshot | +| `cluster` | Cluster embedded snapshot (optional `--sweep`) | +| `build` | Snapshot → embed → cluster end-to-end | +| `clusters` | List clustering runs for a snapshot | +| `cluster-show` | Export one clustering run as JSON | +| `outliers` | Show noise-cluster assignments | + +Representations: + +- `description` — intent text only (default); +- `description_with_frame` — adds bounded structural frame fields. + +Sweep modes write both `recommended_by_heuristic` and +`selected_by_maintainer` metadata; maintainer selection is explicit via +`cluster --select-run`. + +Full CLI contract: [11-cli.md](11-cli.md). + +## Module map + +| Area | Path | +|-----------------|-------------------------------------------| +| Config | `codeclone/config/analytics.py` | +| Workflow | `codeclone/analytics/workflow.py` | +| Corpus adapters | `codeclone/analytics/corpus/` | +| Store | `codeclone/analytics/store/` | +| Clustering | `codeclone/analytics/clustering/` | +| CLI | `codeclone/surfaces/cli/analytics.py` | + +## Refs + +- `codeclone/contracts/__init__.py` — corpus analytics version constants +- `tests/test_analytics_foundation.py`, `tests/test_analytics_integration.py` +- `docs/book/appendix/b-schema-layouts.md` — store layout summary diff --git a/docs/book/README.md b/docs/book/README.md index d41cbd4a..457de36f 100644 --- a/docs/book/README.md +++ b/docs/book/README.md @@ -72,6 +72,7 @@ If a statement is not enforced by code/tests, it is explicitly marked as non-con - [23-testing-as-spec.md](23-testing-as-spec.md) — tests as specification - [24-compatibility-and-versioning.md](24-compatibility-and-versioning.md) — compatibility and versioning rules - [26-platform-observability.md](26-platform-observability.md) — local diagnostics for CodeClone's own runtime +- [27-corpus-analytics.md](27-corpus-analytics.md) — offline intent corpus clustering (optional `[analytics]`) ### MCP interface diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index c0696514..feabc69f 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -886,6 +886,26 @@ stores. See [Platform Observability](../26-platform-observability.md) for configuration, privacy, query, and anti-inference rules. +## Corpus analytics store (`1.0`) + +Optional SQLite database (default `.codeclone/analytics/corpus_clustering.sqlite3`) +and LanceDB vector directory (default `.codeclone/analytics/corpus_vectors`). +Derived offline analytics — not report, baseline, cache, audit, or Engineering +Memory truth. + +| Artifact | Role | +|---------------------|-------------------------------------------------------------| +| `corpus_snapshots` | Immutable snapshot metadata and source digests | +| `embedding_runs` | Analytics embedding generation records | +| `clustering_runs` | PCA/HDBSCAN parameters, diagnostics, membership digest | +| LanceDB sidecar | Separate vectors from Engineering Memory semantic index | + +Store schema version: `CORPUS_ANALYTICS_STORE_SCHEMA_VERSION` in +`codeclone/contracts/__init__.py` (currently **`1.0`**). + +See [Corpus Analytics](../27-corpus-analytics.md) for CLI, configuration, and trust +boundaries. + ## Refs - `codeclone/baseline/clone_baseline.py` diff --git a/docs/guide/README.md b/docs/guide/README.md index af444ea2..08804708 100644 --- a/docs/guide/README.md +++ b/docs/guide/README.md @@ -21,6 +21,7 @@ enums, payload semantics), use the [Contracts book](../book/README.md). | Scope context before edits | [Engineering Memory overview](memory/overview.md) | | Inspect trajectory history and patterns | [Trajectories and Experiences](memory/trajectories-and-experiences.md) | | Diagnose CodeClone runtime cost | [Platform Observability](observability/diagnostics.md) | +| Cluster historical agent intents | [Corpus Analytics](analytics/overview.md) | ## MCP workflows diff --git a/docs/guide/analytics/overview.md b/docs/guide/analytics/overview.md new file mode 100644 index 00000000..82778a7e --- /dev/null +++ b/docs/guide/analytics/overview.md @@ -0,0 +1,67 @@ +# Corpus Analytics + +Use Corpus Analytics when you want **offline clustering of historical change-control +intents** — for example to compare agent workflow cohorts, inspect outliers, or +export HTML/JSON summaries for maintainer review. + +## Prerequisites + +1. A repository with audit enabled and historical `intent.declared` events. +2. Engineering Memory trajectory projection (optional but improves selection). +3. Install optional dependencies: + +```bash +uv sync --extra analytics +``` + +## Quick start + +Build snapshot, embeddings, and a recommended clustering run in one step: + +```bash +codeclone analytics build --root . --use-recommended +``` + +Write artifacts to explicit paths: + +```bash +codeclone analytics build \ + --root . \ + --representation description \ + --html-out /tmp/corpus-clusters.html \ + --json-out /tmp/corpus-clusters.json +``` + +## Step-by-step + +```bash +# 1. Immutable snapshot from audit + trajectory (+ optional registry overlay) +codeclone analytics snapshot --root . + +# 2. Analytics embeddings (separate LanceDB sidecar) +codeclone analytics embed --root . --snapshot-id SNAPSHOT_ID + +# 3. Cluster (add --sweep for parameter sweep) +codeclone analytics cluster \ + --root . \ + --snapshot-id SNAPSHOT_ID \ + --embedding-generation-id GENERATION_ID + +# 4. Inspect runs +codeclone analytics clusters --root . --snapshot-id SNAPSHOT_ID +codeclone analytics cluster-show \ + --root . --snapshot-id SNAPSHOT_ID --run-id RUN_ID +``` + +## Configuration + +Defaults live in `[tool.codeclone.analytics]` inside `pyproject.toml`. See +[Corpus Analytics contract](../../book/27-corpus-analytics.md) for the full table. + +## What this is not + +- Not a second analyzer — it does not replace `codeclone` structural reports. +- Not Engineering Memory semantic search — vectors are stored separately. +- Not MCP-visible in Slice 1 — CLI only. + +Contract reference: [27-corpus-analytics.md](../../book/27-corpus-analytics.md). diff --git a/docs/index.md b/docs/index.md index b2824a9a..34a08053 100644 --- a/docs/index.md +++ b/docs/index.md @@ -56,6 +56,7 @@ patch against the declared boundary, and generates an auditable review receipt. | Trajectories and recurring patterns | [Trajectories and Experiences](guide/memory/trajectories-and-experiences.md) | | MCP interface contract | [MCP interface](book/25-mcp-interface/index.md) | | Diagnose CodeClone runtime | [Platform Observability](guide/observability/diagnostics.md) | +| Cluster historical intents | [Corpus Analytics](guide/analytics/overview.md) | ## IDE and Agent Clients diff --git a/pyproject.toml b/pyproject.toml index c7249d2f..07e65869 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,13 @@ semantic-local = [ "lancedb>=0.33.0", "fastembed>=0.8.0,<0.9", ] +analytics = [ + "hdbscan>=0.8.0", + "scikit-learn>=1.5.0", + "lancedb>=0.33.0", + "fastembed>=0.8.0,<0.9", + "umap-learn>=0.5.0; python_version < '3.14'", +] perf = [ "psutil>=7,<8", ] @@ -105,6 +112,15 @@ codeclone-mcp = "codeclone.surfaces.mcp.server:main" packages = [ "codeclone", "codeclone.analysis", + "codeclone.analytics", + "codeclone.analytics.clustering", + "codeclone.analytics.corpus", + "codeclone.analytics.corpus.adapters", + "codeclone.analytics.corpus.representations", + "codeclone.analytics.embedding", + "codeclone.analytics.export", + "codeclone.analytics.report", + "codeclone.analytics.store", "codeclone.audit", "codeclone.baseline", "codeclone.blocks", diff --git a/tests/fixtures/analytics/helpers.py b/tests/fixtures/analytics/helpers.py new file mode 100644 index 00000000..8e2bd0f9 --- /dev/null +++ b/tests/fixtures/analytics/helpers.py @@ -0,0 +1,74 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import json +import sqlite3 +from pathlib import Path + +from codeclone.audit.events import EVENT_INTENT_DECLARED, repo_root_digest +from codeclone.audit.schema import open_audit_db +from codeclone.memory.schema import ensure_schema as ensure_memory_schema + + +def write_intent_declared_event( + *, + db_path: Path, + repo_root: Path, + intent_id: str, + description: str, + audit_sequence: int = 1, + agent_label: str = "cursor-agent", + intent_kind: str | None = None, +) -> None: + digest = repo_root_digest(repo_root.resolve()) + conn = open_audit_db(db_path) + try: + payload = { + "intent_description": description, + "intent_kind": intent_kind, + "scope": {"allowed_files": ["codeclone/analytics"]}, + } + conn.execute( + """ + INSERT INTO controller_events ( + event_id, event_type, severity, created_at_utc, + repo_root_digest, intent_id, workflow_id, agent_label, agent_pid, + status, payload_json + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + f"evt-{audit_sequence}", + EVENT_INTENT_DECLARED, + "info", + f"2026-01-01T00:00:{audit_sequence:02d}Z", + digest, + intent_id, + f"intent:{intent_id}", + agent_label, + 1, + "active", + json.dumps(payload, sort_keys=True), + ), + ) + conn.commit() + finally: + conn.close() + + +def seed_memory_db(path: Path) -> sqlite3.Connection: + path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(path)) + conn.row_factory = sqlite3.Row + ensure_memory_schema(conn) + return conn + + +def trajectory_digest(payload: dict[str, object]) -> str: + text = json.dumps(payload, sort_keys=True, separators=(",", ":")) + return hashlib.sha256(text.encode("utf-8")).hexdigest() diff --git a/tests/test_analytics_foundation.py b/tests/test_analytics_foundation.py new file mode 100644 index 00000000..f938d0ea --- /dev/null +++ b/tests/test_analytics_foundation.py @@ -0,0 +1,186 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import replace +from pathlib import Path + +from codeclone.analytics.agent_labels import map_agent_family +from codeclone.analytics.clustering.diagnostics import correlation_rate +from codeclone.analytics.clustering.sweep import iter_sweep_candidates +from codeclone.analytics.contracts import INTENT_REPRESENTATION_DESCRIPTION +from codeclone.analytics.corpus.adapters.intent_historical import ( + compute_source_digest, + extract_historical_intent_items, + materialize_corpus_item, +) +from codeclone.analytics.corpus.keys import ( + representation_key, + snapshot_item_id, + source_record_key, +) +from tests.fixtures.analytics.helpers import write_intent_declared_event + + +def _audit_db(root: Path) -> Path: + path = root / ".codeclone" / "db" / "audit.sqlite3" + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def _seed_intent_repo( + tmp_path: Path, *, description: str, audit_sequence: int = 1 +) -> Path: + """Create a repo root and write one intent.declared audit event.""" + root = tmp_path / "repo" + root.mkdir() + write_intent_declared_event( + db_path=_audit_db(root), + repo_root=root, + intent_id="intent-a", + description=description, + audit_sequence=audit_sequence, + ) + return root + + +def test_identity_keys() -> None: + project_id = "proj-abc" + intent_id = "intent-1" + source_key = source_record_key(project_id=project_id, intent_id=intent_id) + rep_key = representation_key( + lane="intent", + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + representation_version="1", + source_record_key_value=source_key, + ) + snap_item = snapshot_item_id(snapshot_id="snap-1", representation_key_value=rep_key) + assert len(source_key) == 64 + assert len(rep_key) == 64 + assert len(snap_item) == 64 + assert source_key != rep_key != snap_item + + +def test_registry_not_in_normalized_text(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + audit_db = _audit_db(root) + write_intent_declared_event( + db_path=audit_db, + repo_root=root, + intent_id="intent-a", + description="Add analytics module", + ) + items = extract_historical_intent_items( + root_path=root, + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + ) + assert len(items) == 1 + before = materialize_corpus_item( + snapshot_id="snap-1", + lane="intent", + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + item=items[0], + ) + overlay_item = replace( + items[0], + registry_overlay={"present": True, "status": "active"}, + ) + after = materialize_corpus_item( + snapshot_id="snap-1", + lane="intent", + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + item=overlay_item, + ) + assert before[3] == after[3] + assert before[6] == after[6] + + +def test_intent_adapter_audit_first(tmp_path: Path) -> None: + root = _seed_intent_repo(tmp_path, description="First description") + write_intent_declared_event( + db_path=_audit_db(root), + repo_root=root, + intent_id="intent-a", + description="Later description", + audit_sequence=2, + ) + items = extract_historical_intent_items( + root_path=root, + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + ) + assert len(items) == 1 + assert items[0].representation_input.description == "First description" + + +def test_duplicate_declaration_conflict(tmp_path: Path) -> None: + root = _seed_intent_repo(tmp_path, description="Alpha") + write_intent_declared_event( + db_path=_audit_db(root), + repo_root=root, + intent_id="intent-a", + description="Beta", + audit_sequence=2, + ) + items = extract_historical_intent_items( + root_path=root, + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + ) + description = items[0].provenance["description"] + assert isinstance(description, dict) + assert description["description_conflict"] is True + + +def test_source_digest_stable(tmp_path: Path) -> None: + root = _seed_intent_repo(tmp_path, description="Stable intent") + items = extract_historical_intent_items( + root_path=root, + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + ) + digest_a = compute_source_digest( + items=items, + lane="intent", + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + representation_version="1", + source_schema_versions={"audit": "4"}, + ) + digest_b = compute_source_digest( + items=items, + lane="intent", + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + representation_version="1", + source_schema_versions={"audit": "4"}, + ) + assert digest_a == digest_b + + +def test_session_intent_never_in_corpus(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + _audit_db(root) + items = extract_historical_intent_items( + root_path=root, + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + ) + assert items == () + + +def test_correlation_sample_guard() -> None: + cell = correlation_rate(numerator=2, denominator=4, min_sample_size=5) + assert cell.insufficient_sample is True + assert cell.rate is None + + +def test_sweep_effective_dedup() -> None: + candidates = iter_sweep_candidates(n_samples=10, n_features=384) + keys = [candidate.dedupe_key for candidate in candidates] + assert len(keys) == len(set(keys)) + + +def test_agent_family_mapping() -> None: + assert map_agent_family("cursor-vscode") == "cursor" + assert map_agent_family(None) == "unknown" diff --git a/tests/test_analytics_integration.py b/tests/test_analytics_integration.py new file mode 100644 index 00000000..08c2fbb7 --- /dev/null +++ b/tests/test_analytics_integration.py @@ -0,0 +1,228 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.analytics.capabilities import check_capability +from codeclone.analytics.clustering.models import ClusteringParameters +from codeclone.analytics.clustering.pipeline import run_clustering_pipeline +from codeclone.analytics.contracts import INTENT_REPRESENTATION_DESCRIPTION +from codeclone.analytics.embedding.generation import EmbeddingBatchResult +from codeclone.analytics.schema import open_analytics_db +from codeclone.analytics.store.protocols import SnapshotBuildResult +from codeclone.analytics.store.sqlite import SqliteCorpusAnalyticsStore +from codeclone.analytics.workflow import ( + run_clustering, + run_embed, + run_snapshot, + select_cluster_run, +) +from codeclone.config.analytics import AnalyticsConfig, resolve_analytics_config +from codeclone.memory.embedding import DeterministicHashEmbeddingProvider +from tests.fixtures.analytics.helpers import write_intent_declared_event + + +@pytest.fixture +def analytics_repo(tmp_path: Path) -> tuple[Path, Path, AnalyticsConfig]: + root = tmp_path / "repo" + root.mkdir() + audit_db = root / ".codeclone" / "db" / "audit.sqlite3" + audit_db.parent.mkdir(parents=True) + write_intent_declared_event( + db_path=audit_db, + repo_root=root, + intent_id="intent-a", + description="Implement corpus analytics slice", + audit_sequence=1, + ) + write_intent_declared_event( + db_path=audit_db, + repo_root=root, + intent_id="intent-b", + description="Refactor clustering pipeline", + audit_sequence=2, + ) + for index in range(3, 13): + write_intent_declared_event( + db_path=audit_db, + repo_root=root, + intent_id=f"intent-{index}", + description=f"Intent workload {index} for clustering", + audit_sequence=index, + ) + config = resolve_analytics_config(root) + return root, audit_db, config + + +def _snapshot_and_embed( + analytics_repo: tuple[Path, Path, AnalyticsConfig], + monkeypatch: pytest.MonkeyPatch, +) -> tuple[Path, AnalyticsConfig, SnapshotBuildResult, EmbeddingBatchResult]: + """Build a snapshot and embed it with the deterministic provider, with the + memory DB stubbed out so the corpus is audit-only.""" + root, _audit_db, config = analytics_repo + monkeypatch.setattr( + "codeclone.analytics.corpus.snapshot.resolve_memory_db_path", + lambda _root: config.db_path.parent / "missing.sqlite3", + ) + snapshot = run_snapshot( + root_path=root, + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + config=config, + ) + provider = DeterministicHashEmbeddingProvider(dimension=config.embedding_dimension) + monkeypatch.setattr( + "codeclone.analytics.embedding.generation._resolve_fastembed_provider", + lambda _config: provider, + ) + embed = run_embed(root_path=root, snapshot_id=snapshot.snapshot_id, config=config) + return root, config, snapshot, embed + + +def test_embedding_lancedb_only( + analytics_repo: tuple[Path, Path, AnalyticsConfig], + monkeypatch: pytest.MonkeyPatch, +) -> None: + _root, config, _snapshot, embed = _snapshot_and_embed(analytics_repo, monkeypatch) + conn = open_analytics_db(config.db_path) + try: + rows = conn.execute( + "SELECT vector_row_key, vector_digest FROM embedding_items " + "WHERE embedding_generation_id=?", + (embed.embedding_generation_id,), + ).fetchall() + assert rows + for row in rows: + assert row[0] + assert row[1] + blob_rows = conn.execute( + "SELECT sql FROM sqlite_master WHERE name='embedding_items'" + ).fetchone() + assert blob_rows is not None + assert "BLOB" not in str(blob_rows[0]).upper() + finally: + conn.close() + + +def test_inspect_commands_without_fastembed( + analytics_repo: tuple[Path, Path, AnalyticsConfig], + monkeypatch: pytest.MonkeyPatch, +) -> None: + root, _audit_db, config = analytics_repo + monkeypatch.setattr( + "codeclone.analytics.corpus.snapshot.resolve_memory_db_path", + lambda _root: config.db_path.parent / "missing.sqlite3", + ) + snapshot = run_snapshot( + root_path=root, + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + config=config, + ) + store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) + try: + runs = store.list_clustering_runs(snapshot_id=snapshot.snapshot_id) + assert runs == () + finally: + store.close() + assert check_capability("base").available is True + + +@pytest.mark.skipif( + not check_capability("cluster").available, + reason="analytics clustering deps not installed", +) +def test_cluster_partition_deterministic( + analytics_repo: tuple[Path, Path, AnalyticsConfig], + monkeypatch: pytest.MonkeyPatch, +) -> None: + _root, config, snapshot, embed = _snapshot_and_embed(analytics_repo, monkeypatch) + params = ClusteringParameters( + pca_dimensions=8, + min_cluster_size=3, + min_samples=1, + cluster_selection_method="eom", + ) + store = SqliteCorpusAnalyticsStore.open(config.db_path) + vector_store_path = config.vectors_path + try: + from codeclone.analytics.embedding.generation import load_snapshot_vectors + from codeclone.analytics.store.vectors_lancedb import AnalyticsVectorStore + + items = store.list_items(snapshot.snapshot_id) + vectors = load_snapshot_vectors( + vector_store=AnalyticsVectorStore( + path=vector_store_path, + dimension=config.embedding_dimension, + ), + embedding_generation_id=embed.embedding_generation_id, + items=items, + ) + item_ids = [item.snapshot_item_id for item in items] + first = run_clustering_pipeline( + snapshot_item_ids=item_ids, + embeddings=vectors, + requested=params, + ) + second = run_clustering_pipeline( + snapshot_item_ids=item_ids, + embeddings=vectors, + requested=params, + ) + assert first is not None and second is not None + first_digests = sorted(part.membership_digest for part in first.partitions) + second_digests = sorted(part.membership_digest for part in second.partitions) + assert first_digests == second_digests + finally: + store.close() + + +@pytest.mark.skipif( + not check_capability("cluster").available, + reason="analytics clustering deps not installed", +) +def test_sweep_selection_flags( + analytics_repo: tuple[Path, Path, AnalyticsConfig], + monkeypatch: pytest.MonkeyPatch, +) -> None: + root, config, snapshot, embed = _snapshot_and_embed(analytics_repo, monkeypatch) + run_ids = run_clustering( + root_path=root, + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embed.embedding_generation_id, + sweep=True, + config=config, + ) + store = SqliteCorpusAnalyticsStore.open(config.db_path) + try: + runs = store.list_clustering_runs( + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embed.embedding_generation_id, + ) + recommended = [run for run in runs if run.recommended_by_heuristic] + assert len(recommended) == 1 + assert all(not run.selected_by_maintainer for run in runs) + select_cluster_run(root_path=root, clustering_run_id=run_ids[0], config=config) + runs = store.list_clustering_runs( + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embed.embedding_generation_id, + ) + selected = [run for run in runs if run.selected_by_maintainer] + assert len(selected) == 1 + assert selected[0].clustering_run_id == run_ids[0] + finally: + store.close() + + +def test_no_semantic_index_reuse( + analytics_repo: tuple[Path, Path, AnalyticsConfig], +) -> None: + _root, _audit_db, config = analytics_repo + assert config.vectors_path.name == "corpus_vectors" + assert ".codeclone/analytics/corpus_vectors" in config.vectors_path.as_posix() diff --git a/tests/test_analytics_trajectory_selection.py b/tests/test_analytics_trajectory_selection.py new file mode 100644 index 00000000..424d078b --- /dev/null +++ b/tests/test_analytics_trajectory_selection.py @@ -0,0 +1,104 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from codeclone.analytics.corpus.trajectory_selection import select_trajectory_for_intent +from codeclone.contracts import TRAJECTORY_PROJECTION_VERSION +from codeclone.memory.trajectory.models import ( + Trajectory, + TrajectoryEvidence, + TrajectoryLabel, + TrajectoryStep, + TrajectorySubject, +) + + +def _trajectory( + *, + trajectory_id: str, + labels: tuple[TrajectoryLabel, ...] = (), + terminal_sequence: int = 10, + projection_version: str = TRAJECTORY_PROJECTION_VERSION, + terminal_event_type: str = "intent.cleared", +) -> Trajectory: + step = TrajectoryStep( + step_index=0, + audit_sequence=terminal_sequence, + event_id=f"evt-{trajectory_id}", + event_type=terminal_event_type, + status="accepted", + run_id="run-1", + report_digest=None, + event_core_sha256="abc", + event_core_json="{}", + summary=None, + created_at_utc="2026-01-01T00:00:00Z", + ) + return Trajectory( + id=trajectory_id, + project_id="proj-1", + repo_root_digest="digest", + workflow_id="intent:intent-a", + intent_id="intent-a", + primary_run_id="run-1", + first_run_id="run-1", + last_run_id="run-1", + report_digest=None, + outcome="accepted", + quality_tier="verified", + quality_score=90, + labels=labels, + summary="done", + trajectory_digest=f"digest-{trajectory_id}", + source_event_stream_digest="stream", + projection_version=projection_version, + event_count=1, + step_count=1, + incident_count=0, + started_at_utc="2026-01-01T00:00:00Z", + finished_at_utc="2026-01-01T00:01:00Z", + projected_at_utc="2026-01-01T00:01:00Z", + updated_at_utc="2026-01-01T00:01:00Z", + steps=(step,), + subjects=( + TrajectorySubject( + subject_kind="agent", + subject_key="cursor-agent", + relation="actor", + ), + ), + evidence=( + TrajectoryEvidence( + evidence_kind="audit", + ref="evt-1", + locator=None, + digest=None, + created_at_utc="2026-01-01T00:00:00Z", + ), + ), + ) + + +def test_trajectory_selection_deterministic() -> None: + first = _trajectory( + trajectory_id="traj-a", + terminal_sequence=20, + terminal_event_type="intent.declared", + ) + second = _trajectory( + trajectory_id="traj-b", + labels=("verified_finish",), + terminal_sequence=10, + ) + legacy = _trajectory( + trajectory_id="traj-legacy", + projection_version="trajectory-v1", + ) + result = select_trajectory_for_intent((first, second, legacy)) + assert result.selected is not None + assert result.selected.id == "traj-b" + assert set(result.discarded_ids) == {"traj-a"} diff --git a/tests/test_architecture.py b/tests/test_architecture.py index c91e33da..944c272c 100644 --- a/tests/test_architecture.py +++ b/tests/test_architecture.py @@ -212,16 +212,18 @@ def test_non_mcp_surfaces_do_not_import_mcp_blast_radius_module() -> None: assert violations == [] -def test_memory_package_does_not_import_forbidden_surfaces() -> None: +_FORBIDDEN_SURFACE_PREFIXES = ( + "codeclone.surfaces.", + "codeclone.ui_messages", + "codeclone.report.html", +) + + +def _assert_no_forbidden_surface_imports(package_prefix: str) -> None: root = Path(__file__).resolve().parents[1] - forbidden_prefixes = ( - "codeclone.surfaces.", - "codeclone.ui_messages", - "codeclone.report.html", - ) violations: list[str] = [] for module_name, path in _iter_codeclone_modules(root): - if not module_name.startswith("codeclone.memory"): + if not module_name.startswith(package_prefix): continue imports = _iter_local_imports(module_name, path.read_text("utf-8")) violations.extend( @@ -229,7 +231,15 @@ def test_memory_package_does_not_import_forbidden_surfaces() -> None: for import_name in imports if any( import_name == prefix.rstrip(".") or import_name.startswith(prefix) - for prefix in forbidden_prefixes + for prefix in _FORBIDDEN_SURFACE_PREFIXES ) ) assert violations == [] + + +def test_analytics_package_does_not_import_forbidden_surfaces() -> None: + _assert_no_forbidden_surface_imports("codeclone.analytics") + + +def test_memory_package_does_not_import_forbidden_surfaces() -> None: + _assert_no_forbidden_surface_imports("codeclone.memory") diff --git a/tests/test_config_analytics.py b/tests/test_config_analytics.py new file mode 100644 index 00000000..33fc00fd --- /dev/null +++ b/tests/test_config_analytics.py @@ -0,0 +1,55 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.config.analytics import resolve_analytics_config + + +def _write_pyproject(root: Path, body: str) -> None: + (root / "pyproject.toml").write_text(body, encoding="utf-8") + + +def test_analytics_defaults_when_table_absent(tmp_path: Path) -> None: + config = resolve_analytics_config(tmp_path) + assert config.embedding_model == "BAAI/bge-small-en-v1.5" + assert config.embedding_dimension == 384 + assert config.min_correlation_sample_size == 5 + assert config.default_min_cluster_size == 8 + assert config.allow_model_download is False + assert config.db_path == tmp_path / ".codeclone/analytics/corpus_clustering.sqlite3" + + +def test_analytics_nested_table_parsed(tmp_path: Path) -> None: + _write_pyproject( + tmp_path, + """ +[tool.codeclone.analytics] +db_path = "custom/analytics.sqlite3" +default_min_cluster_size = 12 +allow_model_download = true +""", + ) + config = resolve_analytics_config(tmp_path) + assert config.db_path == tmp_path / "custom/analytics.sqlite3" + assert config.default_min_cluster_size == 12 + assert config.allow_model_download is True + + +def test_analytics_unknown_key_rejected(tmp_path: Path) -> None: + _write_pyproject( + tmp_path, + """ +[tool.codeclone.analytics] +unexpected = true +""", + ) + with pytest.raises(ValueError, match="unexpected"): + resolve_analytics_config(tmp_path) diff --git a/tests/test_sqlite_readonly_openers.py b/tests/test_sqlite_readonly_openers.py index 05bb207c..22ac951e 100644 --- a/tests/test_sqlite_readonly_openers.py +++ b/tests/test_sqlite_readonly_openers.py @@ -7,6 +7,7 @@ from __future__ import annotations import sqlite3 +import sys from pathlib import Path import pytest @@ -16,6 +17,7 @@ open_audit_db_readonly, ) from codeclone.audit.validation import AuditSchemaError +from codeclone.memory.schema import open_memory_db, open_memory_db_readonly from codeclone.surfaces.mcp._workspace_intent_schema import ( IntentRegistrySchemaError, open_intent_registry_db, @@ -110,9 +112,41 @@ def test_intent_readonly_opener_rejects_stale_schema_without_migration( raw.close() +@pytest.mark.parametrize( + "module_name", + [ + "codeclone.audit.schema", + "codeclone.memory.schema", + "codeclone.analytics.schema", + "codeclone.surfaces.mcp._workspace_intent_schema", + ], +) +def test_schema_module_import_does_not_load_observability(module_name: str) -> None: + for name in list(sys.modules): + if name == "codeclone.observability" or name.startswith( + "codeclone.observability." + ): + sys.modules.pop(name, None) + + import importlib + + importlib.import_module(module_name) + + assert not any( + name == "codeclone.observability" or name.startswith("codeclone.observability.") + for name in sys.modules + ) + + @pytest.mark.parametrize( "opener", - [open_audit_db, open_audit_db_readonly, open_intent_registry_db], + [ + open_audit_db, + open_audit_db_readonly, + open_intent_registry_db, + open_memory_db, + open_memory_db_readonly, + ], ) def test_domain_openers_attach_observability( tmp_path: Path, @@ -121,16 +155,23 @@ def test_domain_openers_attach_observability( ) -> None: audit_path = tmp_path / "audit.sqlite3" intent_path = tmp_path / "intents.sqlite3" + memory_path = tmp_path / "memory.sqlite3" open_audit_db(audit_path).close() + open_memory_db(memory_path).close() calls: list[sqlite3.Connection] = [] monkeypatch.setattr( - "codeclone.observability.instrument_db_connection", + "codeclone.observability.runtime.instrument_db_connection", calls.append, ) selected = opener assert callable(selected) - path = intent_path if selected is open_intent_registry_db else audit_path + if selected is open_intent_registry_db: + path = intent_path + elif selected in (open_memory_db, open_memory_db_readonly): + path = memory_path + else: + path = audit_path conn = selected(path) try: assert calls == [conn] @@ -146,7 +187,7 @@ def test_intent_readonly_opener_attaches_observability( open_intent_registry_db(db_path).close() calls: list[sqlite3.Connection] = [] monkeypatch.setattr( - "codeclone.observability.instrument_db_connection", + "codeclone.observability.runtime.instrument_db_connection", calls.append, ) diff --git a/uv.lock b/uv.lock index 46ad84da..45910b16 100644 --- a/uv.lock +++ b/uv.lock @@ -347,6 +347,14 @@ dependencies = [ ] [package.optional-dependencies] +analytics = [ + { name = "fastembed" }, + { name = "hdbscan" }, + { name = "lancedb" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "umap-learn", marker = "python_full_version < '3.14'" }, +] coverage-xml = [ { name = "defusedxml" }, ] @@ -384,9 +392,12 @@ token-bench = [ requires-dist = [ { name = "build", marker = "extra == 'dev'", specifier = ">=1.4.3" }, { name = "defusedxml", marker = "extra == 'coverage-xml'", specifier = ">=0.7.1,<0.8" }, + { name = "fastembed", marker = "extra == 'analytics'", specifier = ">=0.8.0,<0.9" }, { name = "fastembed", marker = "extra == 'semantic-fastembed'", specifier = ">=0.8.0,<0.9" }, { name = "fastembed", marker = "extra == 'semantic-local'", specifier = ">=0.8.0,<0.9" }, + { name = "hdbscan", marker = "extra == 'analytics'", specifier = ">=0.8.0" }, { name = "httpx", marker = "extra == 'mcp'", specifier = ">=0.27.1,<1" }, + { name = "lancedb", marker = "extra == 'analytics'", specifier = ">=0.33.0" }, { name = "lancedb", marker = "extra == 'semantic-lancedb'", specifier = ">=0.33.0" }, { name = "lancedb", marker = "extra == 'semantic-local'", specifier = ">=0.33.0" }, { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.27.2,<2" }, @@ -401,11 +412,13 @@ requires-dist = [ { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=7.1.0" }, { name = "rich", specifier = ">=15.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.17" }, + { name = "scikit-learn", marker = "extra == 'analytics'", specifier = ">=1.5.0" }, { name = "tiktoken", marker = "extra == 'token-bench'", specifier = ">=0.13.0" }, { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.1" }, { name = "twine", marker = "extra == 'dev'", specifier = ">=6.2.0" }, + { name = "umap-learn", marker = "python_full_version < '3.14' and extra == 'analytics'", specifier = ">=0.5.0" }, ] -provides-extras = ["mcp", "token-bench", "coverage-xml", "semantic-lancedb", "semantic-fastembed", "semantic-local", "perf", "dev"] +provides-extras = ["mcp", "token-bench", "coverage-xml", "semantic-lancedb", "semantic-fastembed", "semantic-local", "analytics", "perf", "dev"] [[package]] name = "colorama" @@ -647,7 +660,7 @@ name = "exceptiongroup" version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } wheels = [ @@ -712,6 +725,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "hdbscan" +version = "0.8.44" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b0/51/b476849d27980d1ce5ba0a172891d37441b0112047894350951f6b169266/hdbscan-0.8.44.tar.gz", hash = "sha256:1ac6196fabdd42072284b60c9be7b9b504b5f4f25cf7a551a8af29a3c7963a4d", size = 7094270, upload-time = "2026-06-01T18:56:31.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/11/2eaf3dda078c19b26194c4332e387d21b0a8986eb6cab18a0c2a0b51d9c7/hdbscan-0.8.44-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:80101ed4897ecbbb5ec36358c4d4c5c38ce4e53b9da5996923d1ec72d21ce665", size = 2615940, upload-time = "2026-06-01T18:55:48.073Z" }, + { url = "https://files.pythonhosted.org/packages/0a/70/dee6ed06d46a45f2054c2c9b5a1563058e775a0ace1412a52a41e5c1d826/hdbscan-0.8.44-cp310-cp310-macosx_15_0_x86_64.whl", hash = "sha256:91dac2f5668b946e3b6335bf6ea4c95fd446af45f7169cf0ab93fea34d4b0e73", size = 2025466, upload-time = "2026-06-01T19:13:28.405Z" }, + { url = "https://files.pythonhosted.org/packages/96/2f/646551ef9caf71211648a0accec50fa625a391e1530abc0d0db9a1c5ab78/hdbscan-0.8.44-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c72c1f357adfd9e9c0f402d7cc256bcf38a04ed833754661ec2600cc04e47c1b", size = 5712965, upload-time = "2026-06-01T18:56:39.348Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6a/7c94a9b888248074b2082fe88ca72a705c2ff7601a7bdbb824111f3686e1/hdbscan-0.8.44-cp310-cp310-win_amd64.whl", hash = "sha256:be163d32e71a7ca9e3fd0c6867fdaa7a0c1989f3edc5048ba211beb3949eb96d", size = 1965101, upload-time = "2026-06-01T18:56:23.273Z" }, + { url = "https://files.pythonhosted.org/packages/b6/75/7ea947869eeb877c3d2b23ce2cbfcc9c5e8e31dd10d81e04add53f3f0c7e/hdbscan-0.8.44-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e83276c6147d1ec74359ac2d07b2df9d0deb5e248194660967bcde6437a8518c", size = 2603287, upload-time = "2026-06-01T18:55:43.43Z" }, + { url = "https://files.pythonhosted.org/packages/2a/99/17dd8a7e845504c3b8f9443bb1b15d20461c70c0c305186fa3526bb55277/hdbscan-0.8.44-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6089b8accd805a3a7409b20247e29e54dea6118785771127a4fc6c5eda24f0ef", size = 5929276, upload-time = "2026-06-01T18:56:33.065Z" }, + { url = "https://files.pythonhosted.org/packages/13/8d/eec0040bd273c9e43409672c8cec1956775aca3ebafad8a9e3f5d25cfd80/hdbscan-0.8.44-cp311-cp311-win_amd64.whl", hash = "sha256:df6d6268022747a60c9990cecf446bc7a71621ff92bc51c86f5958d1cd451870", size = 1964877, upload-time = "2026-06-01T18:56:12.515Z" }, + { url = "https://files.pythonhosted.org/packages/5e/f0/4f719c1275158a13918124ce7d798be4b4ae2898469b467216fde974e443/hdbscan-0.8.44-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:cc4917a57f73984137bc6f14cddd1a140907102522174af8d8add42668a5e196", size = 2590501, upload-time = "2026-06-01T18:55:52.587Z" }, + { url = "https://files.pythonhosted.org/packages/5c/95/4f08d8adeba894453a057f1b87d24fb155e040601137cb82536d9708ff30/hdbscan-0.8.44-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d67f2b3628a80764a07fff3a994df2c6b2d9a6b9c8024edde7b36c184f6657f6", size = 5868686, upload-time = "2026-06-01T18:56:33.787Z" }, + { url = "https://files.pythonhosted.org/packages/b5/a2/959f617dbff9ebe0ef9b55f61c84b7ad00e5eb91b47b838b3a1a2bf0aa95/hdbscan-0.8.44-cp312-cp312-win_amd64.whl", hash = "sha256:29bffe0ef8a8191e6cd5af3dc7fdf5e7f6334eea8fa39ee8488f2f16911c1cdf", size = 1947427, upload-time = "2026-06-01T18:56:14.433Z" }, + { url = "https://files.pythonhosted.org/packages/a2/cb/c4d5c76145a053a7ac04d5106872ce44b199783a4249536c2d9f140c3c07/hdbscan-0.8.44-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9256a7028f017b257c5eba05fe332a744a410c0dcb9971eaa9ed6c5efaac50d5", size = 2580855, upload-time = "2026-06-01T18:55:37.972Z" }, + { url = "https://files.pythonhosted.org/packages/0e/73/12786f8e999959dbf56dfd2b0e4a7e7cc3ccfdcf3a464b0f019bc93e9556/hdbscan-0.8.44-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3400d93a299228e86dec77cc114b834cdd48413f7999eaddaecba045c38ee915", size = 5850796, upload-time = "2026-06-01T18:56:40.535Z" }, + { url = "https://files.pythonhosted.org/packages/4a/83/02a8d5c03e1fa535965c1339ee3f4d2c0ffd30163904aa817a970d1200c9/hdbscan-0.8.44-cp313-cp313-win_amd64.whl", hash = "sha256:0f4fa78c76459e1e00282fd8d67a6834ca46ba232e38414f69bb1bacc2c263ce", size = 1947052, upload-time = "2026-06-01T18:57:01.255Z" }, + { url = "https://files.pythonhosted.org/packages/15/7e/bd23accbaf40a4cc7a179c22129b81023b5daeb3a4d7af6c5f825f29e0ab/hdbscan-0.8.44-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:2351eb81f21491b5b50bdc5ecaa2d88f58608f51f5906774cb38747f21b18053", size = 2603654, upload-time = "2026-06-01T18:55:41.641Z" }, + { url = "https://files.pythonhosted.org/packages/87/10/1b1379a3576e3d9af5c931f942d321017fd2129dad21bc850063706a8ec9/hdbscan-0.8.44-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7ee278a8da6043671031079b245488ee27565f48a83616637dae0b2c4886ea1", size = 5817791, upload-time = "2026-06-01T18:56:40.999Z" }, + { url = "https://files.pythonhosted.org/packages/db/ae/74d1aee2a7c6c39b16a8d4fa7aa3d029d7d08d0c5461d7b55f9619b48598/hdbscan-0.8.44-cp314-cp314-win_amd64.whl", hash = "sha256:5ea248dcaca951861e811411bf3eb9954f932f3a90c8bbe5629b5ee8479e011e", size = 1969244, upload-time = "2026-06-01T18:56:29.289Z" }, +] + [[package]] name = "hf-xet" version = "1.5.1" @@ -849,7 +895,7 @@ name = "importlib-metadata" version = "9.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "zipp" }, + { name = "zipp", marker = "python_full_version < '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a9/01/15bb152d77b21318514a96f43af312635eb2500c96b55398d020c93d86ea/importlib_metadata-9.0.0.tar.gz", hash = "sha256:a4f57ab599e6a2e3016d7595cfd72eb4661a5106e787a95bcc90c7105b831efc", size = 56405, upload-time = "2026-03-20T06:42:56.999Z" } wheels = [ @@ -910,6 +956,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" }, ] +[[package]] +name = "joblib" +version = "1.5.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, +] + [[package]] name = "jsonschema" version = "4.26.0" @@ -1092,6 +1147,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/62/b40b382fa0c66fee1478073eb8db352a4a6beda4a1adccf1df911d8c289c/librt-0.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dee008f20b542e3cd162ba338a7f9ec0f6d23d395f66fe8aeeec3c9d067ea253", size = 102572, upload-time = "2026-05-10T18:17:06.809Z" }, ] +[[package]] +name = "llvmlite" +version = "0.47.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/88/a8952b6d5c21e74cbf158515b779666f692846502623e9e3c39d8e8ba25f/llvmlite-0.47.0.tar.gz", hash = "sha256:62031ce968ec74e95092184d4b0e857e444f8fdff0b8f9213707699570c33ccc", size = 193614, upload-time = "2026-03-31T18:29:53.497Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/f5/a1bde3aa8c43524b0acaf3f72fb3d80a32dd29dbb42d7dc434f84584cdcc/llvmlite-0.47.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41270b0b1310717f717cf6f2a9c68d3c43bd7905c33f003825aebc361d0d1b17", size = 37232772, upload-time = "2026-03-31T18:28:12.198Z" }, + { url = "https://files.pythonhosted.org/packages/7c/fb/76d88fc05ee1f9c1a6efe39eb493c4a727e5d1690412469017cd23bcb776/llvmlite-0.47.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f9d118bc1dd7623e0e65ca9ac485ec6dd543c3b77bc9928ddc45ebd34e1e30a7", size = 56275179, upload-time = "2026-03-31T18:28:15.725Z" }, + { url = "https://files.pythonhosted.org/packages/4d/08/29da7f36217abd56a0c389ef9a18bea47960826e691ced1a36c92c6ce93c/llvmlite-0.47.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ea5cfb04a6ab5b18e46be72b41b015975ba5980c4ddb41f1975b83e19031063", size = 55128632, upload-time = "2026-03-31T18:28:19.946Z" }, + { url = "https://files.pythonhosted.org/packages/df/f8/5e12e9ed447d65f04acf6fcf2d79cded2355640b5131a46cee4c99a5949d/llvmlite-0.47.0-cp310-cp310-win_amd64.whl", hash = "sha256:166b896a2262a2039d5fc52df5ee1659bd1ccd081183df7a2fba1b74702dd5ea", size = 38138402, upload-time = "2026-03-31T18:28:23.327Z" }, + { url = "https://files.pythonhosted.org/packages/34/0b/b9d1911cfefa61399821dfb37f486d83e0f42630a8d12f7194270c417002/llvmlite-0.47.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:74090f0dcfd6f24ebbef3f21f11e38111c4d7e6919b54c4416e1e357c3446b07", size = 37232770, upload-time = "2026-03-31T18:28:26.765Z" }, + { url = "https://files.pythonhosted.org/packages/46/27/5799b020e4cdfb25a7c951c06a96397c135efcdc21b78d853bbd9c814c7d/llvmlite-0.47.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ca14f02e29134e837982497959a8e2193d6035235de1cb41a9cb2bd6da4eedbb", size = 56275177, upload-time = "2026-03-31T18:28:31.01Z" }, + { url = "https://files.pythonhosted.org/packages/7e/51/48a53fedf01cb1f3f43ef200be17ebf83c8d9a04018d3783c1a226c342c2/llvmlite-0.47.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:12a69d4bb05f402f30477e21eeabe81911e7c251cecb192bed82cd83c9db10d8", size = 55128631, upload-time = "2026-03-31T18:28:36.046Z" }, + { url = "https://files.pythonhosted.org/packages/a2/50/59227d06bdc96e23322713c381af4e77420949d8cd8a042c79e0043096cc/llvmlite-0.47.0-cp311-cp311-win_amd64.whl", hash = "sha256:c37d6eb7aaabfa83ab9c2ff5b5cdb95a5e6830403937b2c588b7490724e05327", size = 38138400, upload-time = "2026-03-31T18:28:40.076Z" }, + { url = "https://files.pythonhosted.org/packages/fa/48/4b7fe0e34c169fa2f12532916133e0b219d2823b540733651b34fdac509a/llvmlite-0.47.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:306a265f408c259067257a732c8e159284334018b4083a9e35f67d19792b164f", size = 37232769, upload-time = "2026-03-31T18:28:43.735Z" }, + { url = "https://files.pythonhosted.org/packages/e6/4b/e3f2cd17822cf772a4a51a0a8080b0032e6d37b2dbe8cfb724eac4e31c52/llvmlite-0.47.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5853bf26160857c0c2573415ff4efe01c4c651e59e2c55c2a088740acfee51cd", size = 56275178, upload-time = "2026-03-31T18:28:48.342Z" }, + { url = "https://files.pythonhosted.org/packages/b6/55/a3b4a543185305a9bdf3d9759d53646ed96e55e7dfd43f53e7a421b8fbae/llvmlite-0.47.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:003bcf7fa579e14db59c1a1e113f93ab8a06b56a4be31c7f08264d1d4072d077", size = 55128632, upload-time = "2026-03-31T18:28:52.901Z" }, + { url = "https://files.pythonhosted.org/packages/2f/f5/d281ae0f79378a5a91f308ea9fdb9f9cc068fddd09629edc0725a5a8fde1/llvmlite-0.47.0-cp312-cp312-win_amd64.whl", hash = "sha256:f3079f25bdc24cd9d27c4b2b5e68f5f60c4fdb7e8ad5ee2b9b006007558f9df7", size = 38138692, upload-time = "2026-03-31T18:28:57.147Z" }, + { url = "https://files.pythonhosted.org/packages/77/6f/4615353e016799f80fa52ccb270a843c413b22361fadda2589b2922fb9b0/llvmlite-0.47.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a3c6a735d4e1041808434f9d440faa3d78d9b4af2ee64d05a66f351883b6ceec", size = 37232771, upload-time = "2026-03-31T18:29:01.324Z" }, + { url = "https://files.pythonhosted.org/packages/31/b8/69f5565f1a280d032525878a86511eebed0645818492feeb169dfb20ae8e/llvmlite-0.47.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2699a74321189e812d476a43d6d7f652f51811e7b5aad9d9bba842a1c7927acb", size = 56275178, upload-time = "2026-03-31T18:29:05.748Z" }, + { url = "https://files.pythonhosted.org/packages/d6/da/b32cafcb926fb0ce2aa25553bf32cb8764af31438f40e2481df08884c947/llvmlite-0.47.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6c6951e2b29930227963e53ee152441f0e14be92e9d4231852102d986c761e40", size = 55128632, upload-time = "2026-03-31T18:29:11.235Z" }, + { url = "https://files.pythonhosted.org/packages/46/9f/4898b44e4042c60fafcb1162dfb7014f6f15b1ec19bf29cfea6bf26df90d/llvmlite-0.47.0-cp313-cp313-win_amd64.whl", hash = "sha256:c2e9adf8698d813a9a5efb2d4370caf344dbc1e145019851fee6a6f319ba760e", size = 38138695, upload-time = "2026-03-31T18:29:15.43Z" }, + { url = "https://files.pythonhosted.org/packages/1c/d4/33c8af00f0bf6f552d74f3a054f648af2c5bc6bece97972f3bfadce4f5ec/llvmlite-0.47.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:de966c626c35c9dff5ae7bf12db25637738d0df83fc370cf793bc94d43d92d14", size = 37232773, upload-time = "2026-03-31T18:29:19.453Z" }, + { url = "https://files.pythonhosted.org/packages/64/1d/a760e993e0c0ba6db38d46b9f48f6c7dceb8ac838824997fb9e25f97bc04/llvmlite-0.47.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ddbccff2aeaff8670368340a158abefc032fe9b3ccf7d9c496639263d00151aa", size = 56275176, upload-time = "2026-03-31T18:29:24.149Z" }, + { url = "https://files.pythonhosted.org/packages/84/3b/e679bc3b29127182a7f4aa2d2e9e5bea42adb93fb840484147d59c236299/llvmlite-0.47.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4a7b778a2e144fc64468fb9bf509ac1226c9813a00b4d7afea5d988c4e22fca", size = 55128631, upload-time = "2026-03-31T18:29:29.536Z" }, + { url = "https://files.pythonhosted.org/packages/be/f7/19e2a09c62809c9e63bbd14ce71fb92c6ff7b7b3045741bb00c781efc3c9/llvmlite-0.47.0-cp314-cp314-win_amd64.whl", hash = "sha256:694e3c2cdc472ed2bd8bd4555ca002eec4310961dd58ef791d508f57b5cc4c94", size = 39153826, upload-time = "2026-03-31T18:29:33.681Z" }, + { url = "https://files.pythonhosted.org/packages/40/a1/581a8c707b5e80efdbbe1dd94527404d33fe50bceb71f39d5a7e11bd57b7/llvmlite-0.47.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:92ec8a169a20b473c1c54d4695e371bde36489fc1efa3688e11e99beba0abf9c", size = 37232772, upload-time = "2026-03-31T18:29:37.952Z" }, + { url = "https://files.pythonhosted.org/packages/11/03/16090dd6f74ba2b8b922276047f15962fbeea0a75d5601607edb301ba945/llvmlite-0.47.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa1cbd800edd3b20bc141521f7fd45a6185a5b84109aa6855134e81397ffe72b", size = 56275178, upload-time = "2026-03-31T18:29:42.58Z" }, + { url = "https://files.pythonhosted.org/packages/f5/cb/0abf1dd4c5286a95ffe0c1d8c67aec06b515894a0dd2ac97f5e27b82ab0b/llvmlite-0.47.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6725179b89f03b17dabe236ff3422cb8291b4c1bf40af152826dfd34e350ae8", size = 55128632, upload-time = "2026-03-31T18:29:46.939Z" }, + { url = "https://files.pythonhosted.org/packages/4f/79/d3bbab197e86e0ff4f9c07122895b66a3e0d024247fcff7f12c473cb36d9/llvmlite-0.47.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6842cf6f707ec4be3d985a385ad03f72b2d724439e118fcbe99b2929964f0453", size = 39153839, upload-time = "2026-03-31T18:29:51.004Z" }, +] + [[package]] name = "loguru" version = "0.7.3" @@ -1351,6 +1438,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] +[[package]] +name = "narwhals" +version = "2.22.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/62/3c/c4ef2164a71c1a63d7f1ae411c4082c5fa872405106db60a4b7114989ad7/narwhals-2.22.1.tar.gz", hash = "sha256:d62920805a0a43b7ff8b54b0c0d3142d796f8a9301836ada37e573d6a33cbcd9", size = 647493, upload-time = "2026-06-05T12:34:34.051Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/ca/36339329c4604adbcc99c899b7eb1ce1a555c499b6a6860757dc9bfed36d/narwhals-2.22.1-py3-none-any.whl", hash = "sha256:60567d774edf77db53906f89d9fbd164e66e56d66d388e1e6990f17ac33cfb53", size = 454815, upload-time = "2026-06-05T12:34:32.289Z" }, +] + [[package]] name = "nh3" version = "0.3.5" @@ -1394,6 +1490,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" }, ] +[[package]] +name = "numba" +version = "0.65.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "llvmlite" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/c5/db2ac3685833d626c0dcae6bd2330cd68433e1fd248d15f70998160d3ad7/numba-0.65.1.tar.gz", hash = "sha256:19357146c32fe9ed25059ab915e8465fb13951cf6b0aace3826b76886373ab23", size = 2765600, upload-time = "2026-04-24T02:02:56.551Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/1b/3c5a7daf683a95465bf23504bcd1a2d5db8cd5e5e276ca87505d020dffe9/numba-0.65.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:9d993ed0a257aa4116e6f553f114004bcfdee540c7276ab8ea48f650d514c452", size = 2680870, upload-time = "2026-04-24T02:02:10.623Z" }, + { url = "https://files.pythonhosted.org/packages/0f/a4/1831836814018a898e7d252aebe09c0f3ce1f26d145b68264b4ae0be6822/numba-0.65.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5f098109f361681e57295f7e84d8ab2426902539a141811de0703ace52826981", size = 3739780, upload-time = "2026-04-24T02:02:13.097Z" }, + { url = "https://files.pythonhosted.org/packages/9c/1b/a813ddc81def09e257d2b1f67521982ce4b06204a87268796ffc8187271c/numba-0.65.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:973fd8173f2312815e6b7aaae887c4ce8a817eeff46a4f8840b828305b75bc95", size = 3446722, upload-time = "2026-04-24T02:02:15.083Z" }, + { url = "https://files.pythonhosted.org/packages/09/52/ee1d8b3becda384fe0552221641e05aa668a35e8a77470db4db7f6475000/numba-0.65.1-cp310-cp310-win_amd64.whl", hash = "sha256:c63aa0c4193694026452da55d0ef9d85156c1a7a333454c103bb30dec81b7bf8", size = 2747539, upload-time = "2026-04-24T02:02:16.79Z" }, + { url = "https://files.pythonhosted.org/packages/96/b3/650500c2eab4534d98e9166f4298e0f3c69c742afdf24e6eabccd1f16ad8/numba-0.65.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:7020d74b19cdb8cff16506542fdd510756e28c5e7f3bd0b7f574f0f42272fcd9", size = 2680563, upload-time = "2026-04-24T02:02:18.414Z" }, + { url = "https://files.pythonhosted.org/packages/44/0b/0615dbedb98f5b32a35a53290fbdc6e22306968109278d7e58df82d7a9f6/numba-0.65.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f80ed83774b5173abd6581cd8d2165d1d38e13d2e5c8155c0c0b421784745420", size = 3745018, upload-time = "2026-04-24T02:02:20.252Z" }, + { url = "https://files.pythonhosted.org/packages/49/aa/4361698f35bf63bff67dfe6c90493731177f48ede954f77b0588731537bc/numba-0.65.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ed425a43b0a5f9772f2f4e2dd0bbd12eabecae1af0b24efcfd4e053f012aac6", size = 3450962, upload-time = "2026-04-24T02:02:22.449Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9a/af61ec03b3116c161fd7a06b9e8a265729a8718458333e8ffbb06d9a3978/numba-0.65.1-cp311-cp311-win_amd64.whl", hash = "sha256:df40a5028a975b9ea66f6a2a3f7abbdbd541a863070e34ed367aff21141248e4", size = 2747417, upload-time = "2026-04-24T02:02:24.43Z" }, + { url = "https://files.pythonhosted.org/packages/57/bc/76f8f8c5cf9adee47fdb7bbb03be8900f76f902d451d7477cf12b845e1de/numba-0.65.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ac3f1e77c352dd0ea9712732c2d8f9ca507717435eec5b5013bf138ac33c4a08", size = 2681371, upload-time = "2026-04-24T02:02:26.105Z" }, + { url = "https://files.pythonhosted.org/packages/69/47/a415af0283e4db0398104c6d1c11c9861a98dc67a7aa442a7769ed5d6196/numba-0.65.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:52bc6f3ceb8fcaff9b2ae26b4c6b1e9fee39db8d355534c0fe4f39a901246b84", size = 3802467, upload-time = "2026-04-24T02:02:27.712Z" }, + { url = "https://files.pythonhosted.org/packages/46/36/246f73ec99cfeab2f2cb2ce7d4218766cc36a2da418901223f4f4da9c813/numba-0.65.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:90ca10b3463bae0bd70589726fe3c77d01d6b5fc86bee54bcdf9fb6b47c28977", size = 3502628, upload-time = "2026-04-24T02:02:29.763Z" }, + { url = "https://files.pythonhosted.org/packages/db/9e/3c679b2ee078425b9e99a91e44f8d132a6830d8ccce5227bc5e9181aeed8/numba-0.65.1-cp312-cp312-win_amd64.whl", hash = "sha256:5971c632be2a2351500431f46213821dba8d02b18a9f7d02fd36bd2743e41a6a", size = 2750611, upload-time = "2026-04-24T02:02:31.477Z" }, + { url = "https://files.pythonhosted.org/packages/79/37/14a4579049c1eb673afd0de0cb4842982acd55b9ce2643e763db858bcea0/numba-0.65.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1735c15c1134a5108b4d6a5c77fc0947924ea066a738dc09a52008c13df9cad3", size = 2681344, upload-time = "2026-04-24T02:02:33.65Z" }, + { url = "https://files.pythonhosted.org/packages/a0/22/b8d873f6466b20aa563fc9b33acd48dec89a07803ddaa2f1c8ca1cd33126/numba-0.65.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c09f49117ef255e1f1c6dad0c7a1ed39868243862a73be5706793241a3755f1b", size = 3810619, upload-time = "2026-04-24T02:02:36.041Z" }, + { url = "https://files.pythonhosted.org/packages/62/08/e16a8b5d9a018962ebb5c66be662317cde32b9f5dab08441f90bed5522fb/numba-0.65.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:594a8680b3fadac99e97e489b1fd89007177e5336713745c3b769528c635a464", size = 3509783, upload-time = "2026-04-24T02:02:38.245Z" }, + { url = "https://files.pythonhosted.org/packages/fd/a5/03c970d57f4c1741354837353ce39fb5206952ae1dba8922d29c86f64805/numba-0.65.1-cp313-cp313-win_amd64.whl", hash = "sha256:85be74c0d036842699a30058f82fb88fc5ffdc59f7615cab5792ea92914c9b62", size = 2750534, upload-time = "2026-04-24T02:02:39.903Z" }, + { url = "https://files.pythonhosted.org/packages/4f/2e/8aed9b726d9ba5f11ad287645fd479e88278db3060a25cb1225d730eb2b7/numba-0.65.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:33f5eb68eb1c843511615d14663ce60258525d6a4c65ab040e2c2b0c4cf17450", size = 2681554, upload-time = "2026-04-24T02:02:41.812Z" }, + { url = "https://files.pythonhosted.org/packages/87/96/f3eb235fafa82a34e2ab5dd7dc9ffff998ebf5f0bbc23fa56a96aeb44da6/numba-0.65.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71e73029bf53a62cc6afcf96be4bd942290d8b4c55f0a454fb536158115790f7", size = 3779602, upload-time = "2026-04-24T02:02:43.726Z" }, + { url = "https://files.pythonhosted.org/packages/09/90/b0f09b48752d23640b8284f22aa597737e8adaddc7fbfacc4708b7f73a4c/numba-0.65.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a07635e0be926b9bdbffb09137c230fb13f6ec0e564914ba937cee12ce3eb35", size = 3479532, upload-time = "2026-04-24T02:02:45.427Z" }, + { url = "https://files.pythonhosted.org/packages/56/46/3f7fc04fb853559e74b210e0b62c19974ec844cefec611f9e535f4da3761/numba-0.65.1-cp314-cp314-win_amd64.whl", hash = "sha256:2a20fcdabdefbdacf88d85caf70c3b18c4bcb7ebb8f82e6a19486383dd26ab63", size = 2752637, upload-time = "2026-04-24T02:02:47.664Z" }, + { url = "https://files.pythonhosted.org/packages/81/7b/c1a341a9067367778f4152a5f01061cf281fb09582c92c510ec4918cabf6/numba-0.65.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:548dd4b3a4508d5062768d1514b2cd7b015f9a25ec7af651c50dee243965e652", size = 2684600, upload-time = "2026-04-24T02:02:49.653Z" }, + { url = "https://files.pythonhosted.org/packages/03/36/98ddbcf3e4f04a6dd07e1c67249955920579ba4af6bb6868e3088f4ed282/numba-0.65.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:78abc28feff2c2ff8307fff3975b6438352759c9acb797ecd6b1fb6e7e39e31d", size = 3817198, upload-time = "2026-04-24T02:02:51.266Z" }, + { url = "https://files.pythonhosted.org/packages/a3/83/0dad21057ece5a835599f5d24099b091703995e23dbbf894f259e91c010b/numba-0.65.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee7676cb389555805f9b9a1840cbcd1ea6c8bd5376ab6918e3a29c5ea1dbda20", size = 3533862, upload-time = "2026-04-24T02:02:52.987Z" }, + { url = "https://files.pythonhosted.org/packages/32/36/8be7118ffd4c8440881046eac3d0982cc5ab42909508cf5d67024d62a2e4/numba-0.65.1-cp314-cp314t-win_amd64.whl", hash = "sha256:20609346e3bd75204950dcbbfe383a8d7dbf4902f442aedbf00f97fef4aa8f38", size = 2758237, upload-time = "2026-04-24T02:02:54.612Z" }, +] + [[package]] name = "numpy" version = "2.2.6" @@ -2211,6 +2344,24 @@ crypto = [ { name = "cryptography" }, ] +[[package]] +name = "pynndescent" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "llvmlite" }, + { name = "numba" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4a/fb/7f58c397fb31666756457ee2ac4c0289ef2daad57f4ae4be8dec12f80b03/pynndescent-0.6.0.tar.gz", hash = "sha256:7ffde0fb5b400741e055a9f7d377e3702e02250616834231f6c209e39aac24f5", size = 2992987, upload-time = "2026-01-08T21:29:58.943Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/e6/94145d714402fd5ade00b5661f2d0ab981219e07f7db9bfa16786cdb9c04/pynndescent-0.6.0-py3-none-any.whl", hash = "sha256:dc8c74844e4c7f5cbd1e0cd6909da86fdc789e6ff4997336e344779c3d5538ef", size = 73511, upload-time = "2026-01-08T21:29:57.306Z" }, +] + [[package]] name = "pyproject-hooks" version = "1.2.0" @@ -2895,6 +3046,242 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/c7/c53e8dbff9c9dc4b7928773421ae294a5d28fcb8dcda1a089579d3a7e510/ruff-0.15.17-py3-none-win_arm64.whl", hash = "sha256:f3be1fbb34bcdfd146240d8fb92a709d4c2c8191348580a3c044ec60fa0b4456", size = 11355275, upload-time = "2026-06-11T17:54:43.635Z" }, ] +[[package]] +name = "scikit-learn" +version = "1.7.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "threadpoolctl", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/c2/a7855e41c9d285dfe86dc50b250978105dce513d6e459ea66a6aeb0e1e0c/scikit_learn-1.7.2.tar.gz", hash = "sha256:20e9e49ecd130598f1ca38a1d85090e1a600147b9c02fa6f15d69cb53d968fda", size = 7193136, upload-time = "2025-09-09T08:21:29.075Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/3e/daed796fd69cce768b8788401cc464ea90b306fb196ae1ffed0b98182859/scikit_learn-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b33579c10a3081d076ab403df4a4190da4f4432d443521674637677dc91e61f", size = 9336221, upload-time = "2025-09-09T08:20:19.328Z" }, + { url = "https://files.pythonhosted.org/packages/1c/ce/af9d99533b24c55ff4e18d9b7b4d9919bbc6cd8f22fe7a7be01519a347d5/scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:36749fb62b3d961b1ce4fedf08fa57a1986cd409eff2d783bca5d4b9b5fce51c", size = 8653834, upload-time = "2025-09-09T08:20:22.073Z" }, + { url = "https://files.pythonhosted.org/packages/58/0e/8c2a03d518fb6bd0b6b0d4b114c63d5f1db01ff0f9925d8eb10960d01c01/scikit_learn-1.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7a58814265dfc52b3295b1900cfb5701589d30a8bb026c7540f1e9d3499d5ec8", size = 9660938, upload-time = "2025-09-09T08:20:24.327Z" }, + { url = "https://files.pythonhosted.org/packages/2b/75/4311605069b5d220e7cf5adabb38535bd96f0079313cdbb04b291479b22a/scikit_learn-1.7.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a847fea807e278f821a0406ca01e387f97653e284ecbd9750e3ee7c90347f18", size = 9477818, upload-time = "2025-09-09T08:20:26.845Z" }, + { url = "https://files.pythonhosted.org/packages/7f/9b/87961813c34adbca21a6b3f6b2bea344c43b30217a6d24cc437c6147f3e8/scikit_learn-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:ca250e6836d10e6f402436d6463d6c0e4d8e0234cfb6a9a47835bd392b852ce5", size = 8886969, upload-time = "2025-09-09T08:20:29.329Z" }, + { url = "https://files.pythonhosted.org/packages/43/83/564e141eef908a5863a54da8ca342a137f45a0bfb71d1d79704c9894c9d1/scikit_learn-1.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7509693451651cd7361d30ce4e86a1347493554f172b1c72a39300fa2aea79e", size = 9331967, upload-time = "2025-09-09T08:20:32.421Z" }, + { url = "https://files.pythonhosted.org/packages/18/d6/ba863a4171ac9d7314c4d3fc251f015704a2caeee41ced89f321c049ed83/scikit_learn-1.7.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:0486c8f827c2e7b64837c731c8feff72c0bd2b998067a8a9cbc10643c31f0fe1", size = 8648645, upload-time = "2025-09-09T08:20:34.436Z" }, + { url = "https://files.pythonhosted.org/packages/ef/0e/97dbca66347b8cf0ea8b529e6bb9367e337ba2e8be0ef5c1a545232abfde/scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89877e19a80c7b11a2891a27c21c4894fb18e2c2e077815bcade10d34287b20d", size = 9715424, upload-time = "2025-09-09T08:20:36.776Z" }, + { url = "https://files.pythonhosted.org/packages/f7/32/1f3b22e3207e1d2c883a7e09abb956362e7d1bd2f14458c7de258a26ac15/scikit_learn-1.7.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8da8bf89d4d79aaec192d2bda62f9b56ae4e5b4ef93b6a56b5de4977e375c1f1", size = 9509234, upload-time = "2025-09-09T08:20:38.957Z" }, + { url = "https://files.pythonhosted.org/packages/9f/71/34ddbd21f1da67c7a768146968b4d0220ee6831e4bcbad3e03dd3eae88b6/scikit_learn-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:9b7ed8d58725030568523e937c43e56bc01cadb478fc43c042a9aca1dacb3ba1", size = 8894244, upload-time = "2025-09-09T08:20:41.166Z" }, + { url = "https://files.pythonhosted.org/packages/a7/aa/3996e2196075689afb9fce0410ebdb4a09099d7964d061d7213700204409/scikit_learn-1.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d91a97fa2b706943822398ab943cde71858a50245e31bc71dba62aab1d60a96", size = 9259818, upload-time = "2025-09-09T08:20:43.19Z" }, + { url = "https://files.pythonhosted.org/packages/43/5d/779320063e88af9c4a7c2cf463ff11c21ac9c8bd730c4a294b0000b666c9/scikit_learn-1.7.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:acbc0f5fd2edd3432a22c69bed78e837c70cf896cd7993d71d51ba6708507476", size = 8636997, upload-time = "2025-09-09T08:20:45.468Z" }, + { url = "https://files.pythonhosted.org/packages/5c/d0/0c577d9325b05594fdd33aa970bf53fb673f051a45496842caee13cfd7fe/scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e5bf3d930aee75a65478df91ac1225ff89cd28e9ac7bd1196853a9229b6adb0b", size = 9478381, upload-time = "2025-09-09T08:20:47.982Z" }, + { url = "https://files.pythonhosted.org/packages/82/70/8bf44b933837ba8494ca0fc9a9ab60f1c13b062ad0197f60a56e2fc4c43e/scikit_learn-1.7.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4d6e9deed1a47aca9fe2f267ab8e8fe82ee20b4526b2c0cd9e135cea10feb44", size = 9300296, upload-time = "2025-09-09T08:20:50.366Z" }, + { url = "https://files.pythonhosted.org/packages/c6/99/ed35197a158f1fdc2fe7c3680e9c70d0128f662e1fee4ed495f4b5e13db0/scikit_learn-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:6088aa475f0785e01bcf8529f55280a3d7d298679f50c0bb70a2364a82d0b290", size = 8731256, upload-time = "2025-09-09T08:20:52.627Z" }, + { url = "https://files.pythonhosted.org/packages/ae/93/a3038cb0293037fd335f77f31fe053b89c72f17b1c8908c576c29d953e84/scikit_learn-1.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b7dacaa05e5d76759fb071558a8b5130f4845166d88654a0f9bdf3eb57851b7", size = 9212382, upload-time = "2025-09-09T08:20:54.731Z" }, + { url = "https://files.pythonhosted.org/packages/40/dd/9a88879b0c1104259136146e4742026b52df8540c39fec21a6383f8292c7/scikit_learn-1.7.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:abebbd61ad9e1deed54cca45caea8ad5f79e1b93173dece40bb8e0c658dbe6fe", size = 8592042, upload-time = "2025-09-09T08:20:57.313Z" }, + { url = "https://files.pythonhosted.org/packages/46/af/c5e286471b7d10871b811b72ae794ac5fe2989c0a2df07f0ec723030f5f5/scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:502c18e39849c0ea1a5d681af1dbcf15f6cce601aebb657aabbfe84133c1907f", size = 9434180, upload-time = "2025-09-09T08:20:59.671Z" }, + { url = "https://files.pythonhosted.org/packages/f1/fd/df59faa53312d585023b2da27e866524ffb8faf87a68516c23896c718320/scikit_learn-1.7.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a4c328a71785382fe3fe676a9ecf2c86189249beff90bf85e22bdb7efaf9ae0", size = 9283660, upload-time = "2025-09-09T08:21:01.71Z" }, + { url = "https://files.pythonhosted.org/packages/a7/c7/03000262759d7b6f38c836ff9d512f438a70d8a8ddae68ee80de72dcfb63/scikit_learn-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:63a9afd6f7b229aad94618c01c252ce9e6fa97918c5ca19c9a17a087d819440c", size = 8702057, upload-time = "2025-09-09T08:21:04.234Z" }, + { url = "https://files.pythonhosted.org/packages/55/87/ef5eb1f267084532c8e4aef98a28b6ffe7425acbfd64b5e2f2e066bc29b3/scikit_learn-1.7.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9acb6c5e867447b4e1390930e3944a005e2cb115922e693c08a323421a6966e8", size = 9558731, upload-time = "2025-09-09T08:21:06.381Z" }, + { url = "https://files.pythonhosted.org/packages/93/f8/6c1e3fc14b10118068d7938878a9f3f4e6d7b74a8ddb1e5bed65159ccda8/scikit_learn-1.7.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:2a41e2a0ef45063e654152ec9d8bcfc39f7afce35b08902bfe290c2498a67a6a", size = 9038852, upload-time = "2025-09-09T08:21:08.628Z" }, + { url = "https://files.pythonhosted.org/packages/83/87/066cafc896ee540c34becf95d30375fe5cbe93c3b75a0ee9aa852cd60021/scikit_learn-1.7.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98335fb98509b73385b3ab2bd0639b1f610541d3988ee675c670371d6a87aa7c", size = 9527094, upload-time = "2025-09-09T08:21:11.486Z" }, + { url = "https://files.pythonhosted.org/packages/9c/2b/4903e1ccafa1f6453b1ab78413938c8800633988c838aa0be386cbb33072/scikit_learn-1.7.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:191e5550980d45449126e23ed1d5e9e24b2c68329ee1f691a3987476e115e09c", size = 9367436, upload-time = "2025-09-09T08:21:13.602Z" }, + { url = "https://files.pythonhosted.org/packages/b5/aa/8444be3cfb10451617ff9d177b3c190288f4563e6c50ff02728be67ad094/scikit_learn-1.7.2-cp313-cp313t-win_amd64.whl", hash = "sha256:57dc4deb1d3762c75d685507fbd0bc17160144b2f2ba4ccea5dc285ab0d0e973", size = 9275749, upload-time = "2025-09-09T08:21:15.96Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/dee5acf66837852e8e68df6d8d3a6cb22d3df997b733b032f513d95205b7/scikit_learn-1.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fa8f63940e29c82d1e67a45d5297bdebbcb585f5a5a50c4914cc2e852ab77f33", size = 9208906, upload-time = "2025-09-09T08:21:18.557Z" }, + { url = "https://files.pythonhosted.org/packages/3c/30/9029e54e17b87cb7d50d51a5926429c683d5b4c1732f0507a6c3bed9bf65/scikit_learn-1.7.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f95dc55b7902b91331fa4e5845dd5bde0580c9cd9612b1b2791b7e80c3d32615", size = 8627836, upload-time = "2025-09-09T08:21:20.695Z" }, + { url = "https://files.pythonhosted.org/packages/60/18/4a52c635c71b536879f4b971c2cedf32c35ee78f48367885ed8025d1f7ee/scikit_learn-1.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9656e4a53e54578ad10a434dc1f993330568cfee176dff07112b8785fb413106", size = 9426236, upload-time = "2025-09-09T08:21:22.645Z" }, + { url = "https://files.pythonhosted.org/packages/99/7e/290362f6ab582128c53445458a5befd471ed1ea37953d5bcf80604619250/scikit_learn-1.7.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96dc05a854add0e50d3f47a1ef21a10a595016da5b007c7d9cd9d0bffd1fcc61", size = 9312593, upload-time = "2025-09-09T08:21:24.65Z" }, + { url = "https://files.pythonhosted.org/packages/8e/87/24f541b6d62b1794939ae6422f8023703bbf6900378b2b34e0b4384dfefd/scikit_learn-1.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:bb24510ed3f9f61476181e4db51ce801e2ba37541def12dc9333b946fc7a9cf8", size = 8820007, upload-time = "2025-09-09T08:21:26.713Z" }, +] + +[[package]] +name = "scikit-learn" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.15'", + "python_full_version == '3.14.*'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version >= '3.11'" }, + { name = "narwhals", marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "threadpoolctl", marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fa/6f/37092bdb25f712817231799fc5674d8e704066a8a70c1d2d40517e18b4ab/scikit_learn-1.9.0.tar.gz", hash = "sha256:8833266989d3a5110178a9fae30783675460724d0e1efb13b14901d2c660c557", size = 7750767, upload-time = "2026-06-02T11:54:32.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/be/e844fd9586e66540a15b71924d17a6cbc1bb749e81ddd0a796bcdba4c055/scikit_learn-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9db6f4d34e68c8899e4cab27fdf8eafe6ed21f2ba52ceb25ea250cd237f8e47b", size = 8789686, upload-time = "2026-06-02T11:53:05.439Z" }, + { url = "https://files.pythonhosted.org/packages/42/e2/ff880f62677a17d035817d543cb0fc8727d01eccbee81c5f7fc733a9d856/scikit_learn-1.9.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f401448645a3e7bc115aa3c094097865155b34bff1cba8101857d9104e99074c", size = 8256782, upload-time = "2026-06-02T11:53:08.904Z" }, + { url = "https://files.pythonhosted.org/packages/25/64/eb40435e1a508ab1b4e284ce43ae80f6a162e5be5e38ed5a6fab467a9ea4/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd3a8ef0c758555a3b23c03adaa858af32f7736785ded50ad5991f59c4ed03fa", size = 8992419, upload-time = "2026-06-02T11:53:11.551Z" }, + { url = "https://files.pythonhosted.org/packages/8d/da/4810a28e473185429e45a57eebcc91fc991b33d889cc0676063e671db03d/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7e254636164090da847715a27f8e5478feb98c40a9e0ee90cbd277de9e5ceb8", size = 9281411, upload-time = "2026-06-02T11:53:15.063Z" }, + { url = "https://files.pythonhosted.org/packages/3b/67/be3d369f40d8178ba3bd86635d132e08cb5329b023e4669d9426d84bc007/scikit_learn-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:5dc1818c77575d149e25fce9ef82dd7b7263ae372f03494158668ad632a69759", size = 8272736, upload-time = "2026-06-02T11:53:18.108Z" }, + { url = "https://files.pythonhosted.org/packages/37/79/a733f02dc2118da7e77a134b34f39f40201a353311b011d20859d2db3556/scikit_learn-1.9.0-cp311-cp311-win_arm64.whl", hash = "sha256:366652351f092b219c248f1e72821e841960a63d8f358f1dcfd54dc1cbdbbc28", size = 7919564, upload-time = "2026-06-02T11:53:21.2Z" }, + { url = "https://files.pythonhosted.org/packages/ac/20/75f915ff375d6249e6550ac740fdbbd66159a068fd3af1400ff62036b07a/scikit_learn-1.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2bd41b0d201bc81575531b96b713d3eb5e5f50fb0b82101ff0f92294fdc236ac", size = 8741122, upload-time = "2026-06-02T11:53:24.08Z" }, + { url = "https://files.pythonhosted.org/packages/cc/d5/2b5148f2279196775e1db2aeb85d14b70ac80e7e32b3b28e7ebeafb0901d/scikit_learn-1.9.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5be45aa4a42a68a533913a6ed736cf309de2226411c79ef8d609a5456f1939b1", size = 8261512, upload-time = "2026-06-02T11:53:27.183Z" }, + { url = "https://files.pythonhosted.org/packages/a0/ee/5adbc77656b71f9456a2f5a7a9fdb4bcf9207a6b962889f1c2f9323afa4e/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e50ed4da51974e86e940690e9a3d82e729b62b5a49f7c9bac534d515d39d86f", size = 8837603, upload-time = "2026-06-02T11:53:30.328Z" }, + { url = "https://files.pythonhosted.org/packages/6c/c2/63fdda36c56437eeb44aaf9493c8bcd62ce230ab1598924fc626ffbfa943/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:056c92bb67ad4c28463c2f2653d9701449201e7e7a9e94e321be0f71c4fef2b8", size = 9132097, upload-time = "2026-06-02T11:53:33.456Z" }, + { url = "https://files.pythonhosted.org/packages/83/a4/c8e67227c680e2259c8864ae72ff48b06e16a6f51253a22167aa02a8aa4e/scikit_learn-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4306775fad04cc4b472a1b15af1ae9cede1540fbfcc17fbce3767cd8dc7ae283", size = 8211173, upload-time = "2026-06-02T11:53:36.602Z" }, + { url = "https://files.pythonhosted.org/packages/cf/fd/3c0863792e98e67e9184aa4029288a175935eb65443afcd30d4f143450cf/scikit_learn-1.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:26e22435f63bcdcf396b574273f29f13dd531f5ea035801f5be10ba1540a4e60", size = 7867451, upload-time = "2026-06-02T11:53:39.075Z" }, + { url = "https://files.pythonhosted.org/packages/3c/01/cf3310626b6d48d3e9be69a1223f9180360b5e6edb045f50fade723ce494/scikit_learn-1.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:80746d63bd4b6eaca54d36fe5feaf4d28bb38dc6f9470f81c7cad7c40155f119", size = 8705188, upload-time = "2026-06-02T11:53:41.964Z" }, + { url = "https://files.pythonhosted.org/packages/3e/04/5acd7ae280c5f93b6ac5ef6cdec14eef4c8d1cd91d85b3292989c94d96b1/scikit_learn-1.9.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:5b934c45c252844a91d69fda3a34cff5e7307e1db10d77cb10a3980312c74713", size = 8228299, upload-time = "2026-06-02T11:53:44.817Z" }, + { url = "https://files.pythonhosted.org/packages/0c/39/ffe829a5b8ecb40a518724a997794657fdc354ada5e8fe8e64d998c0bac9/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38c3dcb9a1ffb85505ec53d54c7b4aea0cff70050425a7760c2af661ac85df05", size = 8789690, upload-time = "2026-06-02T11:53:47.461Z" }, + { url = "https://files.pythonhosted.org/packages/1f/88/8dab5de10c638c083772a6be83a3d8106ced492f74a928c8693638e5bb50/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da76d09304a4706db7cc1e3ebaa3b6b98a67365cc11d2996c4f1e58ba47df714", size = 9087723, upload-time = "2026-06-02T11:53:50.702Z" }, + { url = "https://files.pythonhosted.org/packages/20/3f/7917ca72464038f6240ec70c29f94862d08a34a74291ae4d4ec5eb8186a0/scikit_learn-1.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5808d98f15c6bf6d9d96d2348c1997392a5888ce7097e664105f930c4bca1277", size = 8184330, upload-time = "2026-06-02T11:53:53.396Z" }, + { url = "https://files.pythonhosted.org/packages/78/c7/15739eb2f61fda3c54639e9942414e5a19ad8a8d1f5a3266afad7cb7df80/scikit_learn-1.9.0-cp313-cp313-win_arm64.whl", hash = "sha256:d77f54c017633791bc0225a43e2f8d03745fdcfe4880268fcc4df15f505dec2e", size = 7840653, upload-time = "2026-06-02T11:53:56.035Z" }, + { url = "https://files.pythonhosted.org/packages/f4/7d/c9a35cf59b20a86fec24d306f1547b78dec194b08d367ce2a3e4854169d9/scikit_learn-1.9.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9656acd4e93f74e0b66c8a36c88830a99252dfa900044d36bc2212ae89a47162", size = 8713289, upload-time = "2026-06-02T11:53:58.788Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a7/552a7821597c632b907f7bfe8f36f9f572777af8ef8a48353041cf8e091a/scikit_learn-1.9.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:24360002ae845e7866522b0a5bbf690802e7bc388cac8663502e78aa98598aa2", size = 8245141, upload-time = "2026-06-02T11:54:01.694Z" }, + { url = "https://files.pythonhosted.org/packages/7d/79/f4a0c4fe9711154cddabf913471153af79056382ddc612cfe5ee0ff4b72e/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5162ad10a418c8a282dde04c9aa06965de3e9a65f33c1440c0ae69bb1a09d913", size = 8847671, upload-time = "2026-06-02T11:54:04.448Z" }, + { url = "https://files.pythonhosted.org/packages/f0/af/4d72d9e475ac83719160c662619e4bf7b95c19507cd582e7d0167a3c3dae/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fea2cc5677ab49d6f5bade978c866da44957b712d92e9635e8b4f723013c3cb", size = 9118104, upload-time = "2026-06-02T11:54:07.205Z" }, + { url = "https://files.pythonhosted.org/packages/a2/d5/6a58eea2cb9abbb9b3f2bb8b2cfb3243d1152d69f442d256c7af71304769/scikit_learn-1.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:64fa347efc1c839c487433e40c5144d38c336e8a2b59c81aa8660373945c2673", size = 8290674, upload-time = "2026-06-02T11:54:10.087Z" }, + { url = "https://files.pythonhosted.org/packages/65/5b/d4c879cf358f1187141cf90ced473f087183489090244f50c124a2ee478b/scikit_learn-1.9.0-cp314-cp314-win_arm64.whl", hash = "sha256:1b944b6db288f6b926e3650026ddafb988929de95d11fc2cc5fa117773c9ba42", size = 7978807, upload-time = "2026-06-02T11:54:12.769Z" }, + { url = "https://files.pythonhosted.org/packages/8a/43/bfae3121ec67ae09150d453c442c7c1cc166e9aefe056e6ab3b7728a5cfc/scikit_learn-1.9.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4ccacf04ca5f4b492158a5f28afe0ace43f81b2571e4b9a66d34848b46128949", size = 9031941, upload-time = "2026-06-02T11:54:15.436Z" }, + { url = "https://files.pythonhosted.org/packages/75/b0/20a4546eb17f3b25d3c66df15810411c14ed5065bcfab50b53c96fb627b2/scikit_learn-1.9.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ee1a8db2c18c08e34c7412d4b10be1cac214cd4ea7dc9715a6a327eb49a37c96", size = 8613528, upload-time = "2026-06-02T11:54:18.842Z" }, + { url = "https://files.pythonhosted.org/packages/18/3c/e440e039bb82cd19004edaaad00acbde0fb9b461083c3ecf37941c557312/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:147e9329ef0e39f75d4cffa02b2aa48d827832684926cd5210d9a2cb5c57246b", size = 8855050, upload-time = "2026-06-02T11:54:21.699Z" }, + { url = "https://files.pythonhosted.org/packages/43/26/b341b8dab5998da6270a3a42c2152c578501354d36f944b5856757035ef8/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bad8f8b9950321b54c965fdcbac6c6c55e79e16646b49977bcf3668d3870a1a", size = 9097190, upload-time = "2026-06-02T11:54:24.454Z" }, + { url = "https://files.pythonhosted.org/packages/fb/de/b650b4d69b84468cfa2e28a3ff7b8103743029e6446ce1a97fe060ef688c/scikit_learn-1.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:78fc56eafd4edb9575d2d8950d1dd152061abb573341a1cb7e099fc40f6c6666", size = 8963204, upload-time = "2026-06-02T11:54:27.428Z" }, + { url = "https://files.pythonhosted.org/packages/ee/f3/ff83d76d7418112e5a61326443cdda87be3545dd8d6599c95b2481a4419e/scikit_learn-1.9.0-cp314-cp314t-win_arm64.whl", hash = "sha256:051075bda8b7aab87b1906ab3d4740a1e1224a19d7b3781a576736edc94e76aa", size = 8222661, upload-time = "2026-06-02T11:54:30.192Z" }, +] + +[[package]] +name = "scipy" +version = "1.15.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/2f/4966032c5f8cc7e6a60f1b2e0ad686293b9474b65246b0c642e3ef3badd0/scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c", size = 38702770, upload-time = "2025-05-08T16:04:20.849Z" }, + { url = "https://files.pythonhosted.org/packages/a0/6e/0c3bf90fae0e910c274db43304ebe25a6b391327f3f10b5dcc638c090795/scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253", size = 30094511, upload-time = "2025-05-08T16:04:27.103Z" }, + { url = "https://files.pythonhosted.org/packages/ea/b1/4deb37252311c1acff7f101f6453f0440794f51b6eacb1aad4459a134081/scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aef683a9ae6eb00728a542b796f52a5477b78252edede72b8327a886ab63293f", size = 22368151, upload-time = "2025-05-08T16:04:31.731Z" }, + { url = "https://files.pythonhosted.org/packages/38/7d/f457626e3cd3c29b3a49ca115a304cebb8cc6f31b04678f03b216899d3c6/scipy-1.15.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:1c832e1bd78dea67d5c16f786681b28dd695a8cb1fb90af2e27580d3d0967e92", size = 25121732, upload-time = "2025-05-08T16:04:36.596Z" }, + { url = "https://files.pythonhosted.org/packages/db/0a/92b1de4a7adc7a15dcf5bddc6e191f6f29ee663b30511ce20467ef9b82e4/scipy-1.15.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:263961f658ce2165bbd7b99fa5135195c3a12d9bef045345016b8b50c315cb82", size = 35547617, upload-time = "2025-05-08T16:04:43.546Z" }, + { url = "https://files.pythonhosted.org/packages/8e/6d/41991e503e51fc1134502694c5fa7a1671501a17ffa12716a4a9151af3df/scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2abc762b0811e09a0d3258abee2d98e0c703eee49464ce0069590846f31d40", size = 37662964, upload-time = "2025-05-08T16:04:49.431Z" }, + { url = "https://files.pythonhosted.org/packages/25/e1/3df8f83cb15f3500478c889be8fb18700813b95e9e087328230b98d547ff/scipy-1.15.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed7284b21a7a0c8f1b6e5977ac05396c0d008b89e05498c8b7e8f4a1423bba0e", size = 37238749, upload-time = "2025-05-08T16:04:55.215Z" }, + { url = "https://files.pythonhosted.org/packages/93/3e/b3257cf446f2a3533ed7809757039016b74cd6f38271de91682aa844cfc5/scipy-1.15.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5380741e53df2c566f4d234b100a484b420af85deb39ea35a1cc1be84ff53a5c", size = 40022383, upload-time = "2025-05-08T16:05:01.914Z" }, + { url = "https://files.pythonhosted.org/packages/d1/84/55bc4881973d3f79b479a5a2e2df61c8c9a04fcb986a213ac9c02cfb659b/scipy-1.15.3-cp310-cp310-win_amd64.whl", hash = "sha256:9d61e97b186a57350f6d6fd72640f9e99d5a4a2b8fbf4b9ee9a841eab327dc13", size = 41259201, upload-time = "2025-05-08T16:05:08.166Z" }, + { url = "https://files.pythonhosted.org/packages/96/ab/5cc9f80f28f6a7dff646c5756e559823614a42b1939d86dd0ed550470210/scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b", size = 38714255, upload-time = "2025-05-08T16:05:14.596Z" }, + { url = "https://files.pythonhosted.org/packages/4a/4a/66ba30abe5ad1a3ad15bfb0b59d22174012e8056ff448cb1644deccbfed2/scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba", size = 30111035, upload-time = "2025-05-08T16:05:20.152Z" }, + { url = "https://files.pythonhosted.org/packages/4b/fa/a7e5b95afd80d24313307f03624acc65801846fa75599034f8ceb9e2cbf6/scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65", size = 22384499, upload-time = "2025-05-08T16:05:24.494Z" }, + { url = "https://files.pythonhosted.org/packages/17/99/f3aaddccf3588bb4aea70ba35328c204cadd89517a1612ecfda5b2dd9d7a/scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1", size = 25152602, upload-time = "2025-05-08T16:05:29.313Z" }, + { url = "https://files.pythonhosted.org/packages/56/c5/1032cdb565f146109212153339f9cb8b993701e9fe56b1c97699eee12586/scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889", size = 35503415, upload-time = "2025-05-08T16:05:34.699Z" }, + { url = "https://files.pythonhosted.org/packages/bd/37/89f19c8c05505d0601ed5650156e50eb881ae3918786c8fd7262b4ee66d3/scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982", size = 37652622, upload-time = "2025-05-08T16:05:40.762Z" }, + { url = "https://files.pythonhosted.org/packages/7e/31/be59513aa9695519b18e1851bb9e487de66f2d31f835201f1b42f5d4d475/scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9", size = 37244796, upload-time = "2025-05-08T16:05:48.119Z" }, + { url = "https://files.pythonhosted.org/packages/10/c0/4f5f3eeccc235632aab79b27a74a9130c6c35df358129f7ac8b29f562ac7/scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594", size = 40047684, upload-time = "2025-05-08T16:05:54.22Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a7/0ddaf514ce8a8714f6ed243a2b391b41dbb65251affe21ee3077ec45ea9a/scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb", size = 41246504, upload-time = "2025-05-08T16:06:00.437Z" }, + { url = "https://files.pythonhosted.org/packages/37/4b/683aa044c4162e10ed7a7ea30527f2cbd92e6999c10a8ed8edb253836e9c/scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019", size = 38766735, upload-time = "2025-05-08T16:06:06.471Z" }, + { url = "https://files.pythonhosted.org/packages/7b/7e/f30be3d03de07f25dc0ec926d1681fed5c732d759ac8f51079708c79e680/scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6", size = 30173284, upload-time = "2025-05-08T16:06:11.686Z" }, + { url = "https://files.pythonhosted.org/packages/07/9c/0ddb0d0abdabe0d181c1793db51f02cd59e4901da6f9f7848e1f96759f0d/scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477", size = 22446958, upload-time = "2025-05-08T16:06:15.97Z" }, + { url = "https://files.pythonhosted.org/packages/af/43/0bce905a965f36c58ff80d8bea33f1f9351b05fad4beaad4eae34699b7a1/scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c", size = 25242454, upload-time = "2025-05-08T16:06:20.394Z" }, + { url = "https://files.pythonhosted.org/packages/56/30/a6f08f84ee5b7b28b4c597aca4cbe545535c39fe911845a96414700b64ba/scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45", size = 35210199, upload-time = "2025-05-08T16:06:26.159Z" }, + { url = "https://files.pythonhosted.org/packages/0b/1f/03f52c282437a168ee2c7c14a1a0d0781a9a4a8962d84ac05c06b4c5b555/scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49", size = 37309455, upload-time = "2025-05-08T16:06:32.778Z" }, + { url = "https://files.pythonhosted.org/packages/89/b1/fbb53137f42c4bf630b1ffdfc2151a62d1d1b903b249f030d2b1c0280af8/scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e", size = 36885140, upload-time = "2025-05-08T16:06:39.249Z" }, + { url = "https://files.pythonhosted.org/packages/2e/2e/025e39e339f5090df1ff266d021892694dbb7e63568edcfe43f892fa381d/scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539", size = 39710549, upload-time = "2025-05-08T16:06:45.729Z" }, + { url = "https://files.pythonhosted.org/packages/e6/eb/3bf6ea8ab7f1503dca3a10df2e4b9c3f6b3316df07f6c0ded94b281c7101/scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed", size = 40966184, upload-time = "2025-05-08T16:06:52.623Z" }, + { url = "https://files.pythonhosted.org/packages/73/18/ec27848c9baae6e0d6573eda6e01a602e5649ee72c27c3a8aad673ebecfd/scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759", size = 38728256, upload-time = "2025-05-08T16:06:58.696Z" }, + { url = "https://files.pythonhosted.org/packages/74/cd/1aef2184948728b4b6e21267d53b3339762c285a46a274ebb7863c9e4742/scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62", size = 30109540, upload-time = "2025-05-08T16:07:04.209Z" }, + { url = "https://files.pythonhosted.org/packages/5b/d8/59e452c0a255ec352bd0a833537a3bc1bfb679944c4938ab375b0a6b3a3e/scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb", size = 22383115, upload-time = "2025-05-08T16:07:08.998Z" }, + { url = "https://files.pythonhosted.org/packages/08/f5/456f56bbbfccf696263b47095291040655e3cbaf05d063bdc7c7517f32ac/scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730", size = 25163884, upload-time = "2025-05-08T16:07:14.091Z" }, + { url = "https://files.pythonhosted.org/packages/a2/66/a9618b6a435a0f0c0b8a6d0a2efb32d4ec5a85f023c2b79d39512040355b/scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825", size = 35174018, upload-time = "2025-05-08T16:07:19.427Z" }, + { url = "https://files.pythonhosted.org/packages/b5/09/c5b6734a50ad4882432b6bb7c02baf757f5b2f256041da5df242e2d7e6b6/scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7", size = 37269716, upload-time = "2025-05-08T16:07:25.712Z" }, + { url = "https://files.pythonhosted.org/packages/77/0a/eac00ff741f23bcabd352731ed9b8995a0a60ef57f5fd788d611d43d69a1/scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11", size = 36872342, upload-time = "2025-05-08T16:07:31.468Z" }, + { url = "https://files.pythonhosted.org/packages/fe/54/4379be86dd74b6ad81551689107360d9a3e18f24d20767a2d5b9253a3f0a/scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126", size = 39670869, upload-time = "2025-05-08T16:07:38.002Z" }, + { url = "https://files.pythonhosted.org/packages/87/2e/892ad2862ba54f084ffe8cc4a22667eaf9c2bcec6d2bff1d15713c6c0703/scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163", size = 40988851, upload-time = "2025-05-08T16:08:33.671Z" }, + { url = "https://files.pythonhosted.org/packages/1b/e9/7a879c137f7e55b30d75d90ce3eb468197646bc7b443ac036ae3fe109055/scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8", size = 38863011, upload-time = "2025-05-08T16:07:44.039Z" }, + { url = "https://files.pythonhosted.org/packages/51/d1/226a806bbd69f62ce5ef5f3ffadc35286e9fbc802f606a07eb83bf2359de/scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5", size = 30266407, upload-time = "2025-05-08T16:07:49.891Z" }, + { url = "https://files.pythonhosted.org/packages/e5/9b/f32d1d6093ab9eeabbd839b0f7619c62e46cc4b7b6dbf05b6e615bbd4400/scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e", size = 22540030, upload-time = "2025-05-08T16:07:54.121Z" }, + { url = "https://files.pythonhosted.org/packages/e7/29/c278f699b095c1a884f29fda126340fcc201461ee8bfea5c8bdb1c7c958b/scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb", size = 25218709, upload-time = "2025-05-08T16:07:58.506Z" }, + { url = "https://files.pythonhosted.org/packages/24/18/9e5374b617aba742a990581373cd6b68a2945d65cc588482749ef2e64467/scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723", size = 34809045, upload-time = "2025-05-08T16:08:03.929Z" }, + { url = "https://files.pythonhosted.org/packages/e1/fe/9c4361e7ba2927074360856db6135ef4904d505e9b3afbbcb073c4008328/scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb", size = 36703062, upload-time = "2025-05-08T16:08:09.558Z" }, + { url = "https://files.pythonhosted.org/packages/b7/8e/038ccfe29d272b30086b25a4960f757f97122cb2ec42e62b460d02fe98e9/scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4", size = 36393132, upload-time = "2025-05-08T16:08:15.34Z" }, + { url = "https://files.pythonhosted.org/packages/10/7e/5c12285452970be5bdbe8352c619250b97ebf7917d7a9a9e96b8a8140f17/scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5", size = 38979503, upload-time = "2025-05-08T16:08:21.513Z" }, + { url = "https://files.pythonhosted.org/packages/81/06/0a5e5349474e1cbc5757975b21bd4fad0e72ebf138c5592f191646154e06/scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca", size = 40308097, upload-time = "2025-05-08T16:08:27.627Z" }, +] + +[[package]] +name = "scipy" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.15'", + "python_full_version == '3.14.*'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/75/b4ce781849931fef6fd529afa6b63711d5a733065722d0c3e2724af9e40a/scipy-1.17.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1f95b894f13729334fb990162e911c9e5dc1ab390c58aa6cbecb389c5b5e28ec", size = 31613675, upload-time = "2026-02-23T00:16:00.13Z" }, + { url = "https://files.pythonhosted.org/packages/f7/58/bccc2861b305abdd1b8663d6130c0b3d7cc22e8d86663edbc8401bfd40d4/scipy-1.17.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e18f12c6b0bc5a592ed23d3f7b891f68fd7f8241d69b7883769eb5d5dfb52696", size = 28162057, upload-time = "2026-02-23T00:16:09.456Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ee/18146b7757ed4976276b9c9819108adbc73c5aad636e5353e20746b73069/scipy-1.17.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a3472cfbca0a54177d0faa68f697d8ba4c80bbdc19908c3465556d9f7efce9ee", size = 20334032, upload-time = "2026-02-23T00:16:17.358Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e6/cef1cf3557f0c54954198554a10016b6a03b2ec9e22a4e1df734936bd99c/scipy-1.17.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:766e0dc5a616d026a3a1cffa379af959671729083882f50307e18175797b3dfd", size = 22709533, upload-time = "2026-02-23T00:16:25.791Z" }, + { url = "https://files.pythonhosted.org/packages/4d/60/8804678875fc59362b0fb759ab3ecce1f09c10a735680318ac30da8cd76b/scipy-1.17.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:744b2bf3640d907b79f3fd7874efe432d1cf171ee721243e350f55234b4cec4c", size = 33062057, upload-time = "2026-02-23T00:16:36.931Z" }, + { url = "https://files.pythonhosted.org/packages/09/7d/af933f0f6e0767995b4e2d705a0665e454d1c19402aa7e895de3951ebb04/scipy-1.17.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43af8d1f3bea642559019edfe64e9b11192a8978efbd1539d7bc2aaa23d92de4", size = 35349300, upload-time = "2026-02-23T00:16:49.108Z" }, + { url = "https://files.pythonhosted.org/packages/b4/3d/7ccbbdcbb54c8fdc20d3b6930137c782a163fa626f0aef920349873421ba/scipy-1.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd96a1898c0a47be4520327e01f874acfd61fb48a9420f8aa9f6483412ffa444", size = 35127333, upload-time = "2026-02-23T00:17:01.293Z" }, + { url = "https://files.pythonhosted.org/packages/e8/19/f926cb11c42b15ba08e3a71e376d816ac08614f769b4f47e06c3580c836a/scipy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4eb6c25dd62ee8d5edf68a8e1c171dd71c292fdae95d8aeb3dd7d7de4c364082", size = 37741314, upload-time = "2026-02-23T00:17:12.576Z" }, + { url = "https://files.pythonhosted.org/packages/95/da/0d1df507cf574b3f224ccc3d45244c9a1d732c81dcb26b1e8a766ae271a8/scipy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:d30e57c72013c2a4fe441c2fcb8e77b14e152ad48b5464858e07e2ad9fbfceff", size = 36607512, upload-time = "2026-02-23T00:17:23.424Z" }, + { url = "https://files.pythonhosted.org/packages/68/7f/bdd79ceaad24b671543ffe0ef61ed8e659440eb683b66f033454dcee90eb/scipy-1.17.1-cp311-cp311-win_arm64.whl", hash = "sha256:9ecb4efb1cd6e8c4afea0daa91a87fbddbce1b99d2895d151596716c0b2e859d", size = 24599248, upload-time = "2026-02-23T00:17:34.561Z" }, + { url = "https://files.pythonhosted.org/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954, upload-time = "2026-02-23T00:17:49.855Z" }, + { url = "https://files.pythonhosted.org/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662, upload-time = "2026-02-23T00:18:01.64Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366, upload-time = "2026-02-23T00:18:12.015Z" }, + { url = "https://files.pythonhosted.org/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017, upload-time = "2026-02-23T00:18:21.502Z" }, + { url = "https://files.pythonhosted.org/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842, upload-time = "2026-02-23T00:18:35.367Z" }, + { url = "https://files.pythonhosted.org/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890, upload-time = "2026-02-23T00:18:49.188Z" }, + { url = "https://files.pythonhosted.org/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557, upload-time = "2026-02-23T00:18:54.74Z" }, + { url = "https://files.pythonhosted.org/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856, upload-time = "2026-02-23T00:19:00.307Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682, upload-time = "2026-02-23T00:19:07.67Z" }, + { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" }, + { url = "https://files.pythonhosted.org/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199, upload-time = "2026-02-23T00:19:17.192Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001, upload-time = "2026-02-23T00:19:22.241Z" }, + { url = "https://files.pythonhosted.org/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719, upload-time = "2026-02-23T00:19:26.329Z" }, + { url = "https://files.pythonhosted.org/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595, upload-time = "2026-02-23T00:19:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429, upload-time = "2026-02-23T00:19:35.536Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952, upload-time = "2026-02-23T00:19:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063, upload-time = "2026-02-23T00:19:47.547Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449, upload-time = "2026-02-23T00:19:53.238Z" }, + { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943, upload-time = "2026-02-23T00:20:50.89Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621, upload-time = "2026-02-23T00:20:55.871Z" }, + { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708, upload-time = "2026-02-23T00:19:58.694Z" }, + { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135, upload-time = "2026-02-23T00:20:03.934Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977, upload-time = "2026-02-23T00:20:07.935Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601, upload-time = "2026-02-23T00:20:12.161Z" }, + { url = "https://files.pythonhosted.org/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667, upload-time = "2026-02-23T00:20:17.208Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159, upload-time = "2026-02-23T00:20:23.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771, upload-time = "2026-02-23T00:20:28.636Z" }, + { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910, upload-time = "2026-02-23T00:20:34.743Z" }, + { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980, upload-time = "2026-02-23T00:20:40.575Z" }, + { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543, upload-time = "2026-02-23T00:20:45.313Z" }, + { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510, upload-time = "2026-02-23T00:21:01.015Z" }, + { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131, upload-time = "2026-02-23T00:21:05.888Z" }, + { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032, upload-time = "2026-02-23T00:21:09.904Z" }, + { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766, upload-time = "2026-02-23T00:21:14.313Z" }, + { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007, upload-time = "2026-02-23T00:21:19.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333, upload-time = "2026-02-23T00:21:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066, upload-time = "2026-02-23T00:21:31.358Z" }, + { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763, upload-time = "2026-02-23T00:21:37.247Z" }, + { url = "https://files.pythonhosted.org/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984, upload-time = "2026-02-23T00:22:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877, upload-time = "2026-02-23T00:22:39.798Z" }, + { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750, upload-time = "2026-02-23T00:21:42.289Z" }, + { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858, upload-time = "2026-02-23T00:21:47.706Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723, upload-time = "2026-02-23T00:21:52.039Z" }, + { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098, upload-time = "2026-02-23T00:21:56.185Z" }, + { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397, upload-time = "2026-02-23T00:22:01.404Z" }, + { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163, upload-time = "2026-02-23T00:22:07.024Z" }, + { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291, upload-time = "2026-02-23T00:22:12.585Z" }, + { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317, upload-time = "2026-02-23T00:22:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327, upload-time = "2026-02-23T00:22:24.442Z" }, + { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165, upload-time = "2026-02-23T00:22:29.563Z" }, +] + [[package]] name = "secretstorage" version = "3.5.0" @@ -2964,6 +3351,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, ] +[[package]] +name = "threadpoolctl" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, +] + [[package]] name = "tiktoken" version = "0.13.0" @@ -3174,6 +3570,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, ] +[[package]] +name = "umap-learn" +version = "0.5.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numba" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pynndescent" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/ee/af4171241117f85c74b5ca6448ea1033cc28d599c13651d67289bacd4083/umap_learn-0.5.12.tar.gz", hash = "sha256:6aff02ecac5f2aad9f3c65ee518d7ae93e1a985ae38721fdcffceee4232c33c7", size = 96672, upload-time = "2026-04-08T20:03:54.012Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/98/f63318ccbe75c810011fe9233884c5d348d94d90005de1b79e5f93bef9c0/umap_learn-0.5.12-py3-none-any.whl", hash = "sha256:f2a85d2a2adcb52b541bed9b27a23ca169b56bb1b23283abeebfb8dfb8a42fe5", size = 91849, upload-time = "2026-04-08T20:03:52.561Z" }, +] + [[package]] name = "urllib3" version = "2.7.0" diff --git a/zensical.toml b/zensical.toml index eea1fc09..9f83a1ab 100644 --- a/zensical.toml +++ b/zensical.toml @@ -38,6 +38,7 @@ nav = [ ] }, { "Diagnostics" = [ { "Platform Observability" = "guide/observability/diagnostics.md" }, + { "Corpus Analytics" = "guide/analytics/overview.md" }, ] }, { "MCP" = [ { "Overview" = "guide/mcp/README.md" }, @@ -129,6 +130,7 @@ nav = [ { "Testing as Spec" = "book/23-testing-as-spec.md" }, { "Compatibility and Versioning" = "book/24-compatibility-and-versioning.md" }, { "Platform Observability" = "book/26-platform-observability.md" }, + { "Corpus Analytics" = "book/27-corpus-analytics.md" }, ] }, { "MCP interface" = [ { "Overview" = "book/25-mcp-interface/index.md" }, From 52f9800f4206c10faf4d65e6acca64f886f7be70 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 19:04:51 +0500 Subject: [PATCH 271/318] feat(core): retrieval telemetry counters + record_counter span primitive --- codeclone/memory/retrieval/service.py | 36 ++++++++++++++++++++++++++- codeclone/observability/__init__.py | 2 ++ codeclone/observability/runtime.py | 13 ++++++++++ tests/test_observability_runtime.py | 24 ++++++++++++++++++ tests/test_semantic_search_service.py | 34 +++++++++++++++++++++++++ 5 files changed, 108 insertions(+), 1 deletion(-) diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py index 839d4a2c..705656bc 100644 --- a/codeclone/memory/retrieval/service.py +++ b/codeclone/memory/retrieval/service.py @@ -12,6 +12,7 @@ from ...config.memory_defaults import DEFAULT_MEMORY_STATEMENT_PREVIEW_CHARS from ...contracts import SEMANTIC_INDEX_FORMAT_VERSION +from ...observability import is_observability_enabled, record_counter, span from ..embedding import embed_query from ..enums import LinkRelation, MemoryConfidence, MemoryRecordType, MemoryStatus from ..exceptions import MemoryContractError, MemorySemanticUnavailableError @@ -1303,7 +1304,10 @@ def _semantic_hits( # Search each lane with its own top-k budget so a dense source (e.g. audit) # cannot crowd memory hits out of one shared top-k (#3). The index applies # the source filter, so results arrive already lane-scoped. - vector = embed_query(provider, query) + # The embed is the expensive step (lazy model load); give it its own span so + # embedding load time is observable separately from the vector search. + with span(name="retrieval.embed_query"): + vector = embed_query(provider, query) proximity: dict[str, float] = {} for hit in index.search(vector, k=k, source="memory"): proximity.setdefault(hit.source_id, hit.score) @@ -1312,6 +1316,28 @@ def _semantic_hits( return proximity, audit_hits, trajectory_hits +def _record_search_telemetry( + *, + fts_records: Sequence[MemoryRecord], + proximity: Mapping[str, float], + audit_hits: Sequence[SemanticHit], + trajectory_hits: Sequence[SemanticHit], + candidates: Sequence[MemoryRecord], +) -> None: + # Lane-hit telemetry for the hybrid search, attributed to the active span. + # No-op outside an observability span; the caller guards the call so the + # set-math below is skipped entirely when observability is disabled. + candidate_ids = {record.id for record in candidates} + overlap = sum(1 for record in fts_records if record.id in proximity) + filtered = sum(1 for record_id in proximity if record_id not in candidate_ids) + record_counter("retrieval.fts_hits", len(fts_records)) + record_counter("retrieval.vector_memory_hits", len(proximity)) + record_counter("retrieval.vector_audit_hits", len(audit_hits)) + record_counter("retrieval.vector_trajectory_hits", len(trajectory_hits)) + record_counter("retrieval.fts_vector_overlap", overlap) + record_counter("retrieval.semantic_filtered", filtered) + + def _hydrate_audit_events( audit_db_path: Path | None, hits: Sequence[SemanticHit] ) -> list[dict[str, object]]: @@ -1501,6 +1527,14 @@ def _handle_semantic_search_mode( if status is not None else _semantic_disabled_block() ) + if is_observability_enabled(): + _record_search_telemetry( + fts_records=fts_records, + proximity=proximity, + audit_hits=audit_hits, + trajectory_hits=trajectory_hits, + candidates=candidates, + ) effective_stale = include_stale or "stale" in statuses visible = [ record diff --git a/codeclone/observability/__init__.py b/codeclone/observability/__init__.py index 18a89f2e..f9f0b3a8 100644 --- a/codeclone/observability/__init__.py +++ b/codeclone/observability/__init__.py @@ -24,6 +24,7 @@ is_observability_enabled, operation, payload_capture_enabled, + record_counter, record_db_query, record_elapsed_span, shutdown, @@ -40,6 +41,7 @@ "is_observability_enabled", "operation", "payload_capture_enabled", + "record_counter", "record_db_query", "record_elapsed_span", "shutdown", diff --git a/codeclone/observability/runtime.py b/codeclone/observability/runtime.py index 5fddb4d4..63129fbe 100644 --- a/codeclone/observability/runtime.py +++ b/codeclone/observability/runtime.py @@ -480,6 +480,18 @@ def record_db_query(sql: str) -> None: span_handle.add_db_fingerprint(fingerprint) +def record_counter(key: str, value: int = 1) -> None: + """Add ``value`` to the named counter on the active span. No-op outside a + span (or when disabled). Companion to ``record_db_query`` for non-SQL + counters — e.g. retrieval lane hits emitted by the memory query path. + Performance telemetry only — never audit or contract truth. + """ + span_handle = _CURRENT_SPAN.get() + if span_handle is None: + return + span_handle.add_counter(key, value) + + def instrument_db_connection(conn: sqlite3.Connection) -> None: """Attach the per-span DB-query counter to ``conn``. No-op (and no per-query trace overhead) when observability is disabled for this process. @@ -498,6 +510,7 @@ def instrument_db_connection(conn: sqlite3.Connection) -> None: "is_observability_enabled", "operation", "payload_capture_enabled", + "record_counter", "record_db_query", "record_elapsed_span", "shutdown", diff --git a/tests/test_observability_runtime.py b/tests/test_observability_runtime.py index 6fd64f28..4ba5011c 100644 --- a/tests/test_observability_runtime.py +++ b/tests/test_observability_runtime.py @@ -10,6 +10,7 @@ from collections.abc import Iterator from pathlib import Path +import orjson import pytest from codeclone.config.observability import ObservabilityConfig @@ -17,6 +18,7 @@ bootstrap, is_observability_enabled, operation, + record_counter, shutdown, span, ) @@ -81,6 +83,28 @@ def test_enabled_persists_operation_and_nested_spans(tmp_path: Path) -> None: conn.close() +def test_record_counter_attributes_to_active_span(tmp_path: Path) -> None: + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + with operation(name="search", surface="mcp"), span(name="root"): + record_counter("retrieval.fts_hits", 3) + record_counter("retrieval.fts_hits", 2) # accumulates onto the same key + record_counter("retrieval.vector_memory_hits") # default value 1 + # Outside any span the counter is inert (no raise, nothing recorded). + record_counter("retrieval.fts_hits", 99) + shutdown() + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + (counters_json,) = conn.execute( + "SELECT counters_json FROM platform_spans WHERE name='root'" + ).fetchone() + finally: + conn.close() + counters = orjson.loads(counters_json) + assert counters["retrieval.fts_hits"] == 5 + assert counters["retrieval.vector_memory_hits"] == 1 + + def test_cross_process_correlation_and_parent(tmp_path: Path) -> None: bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) with operation(name="A", surface="mcp", correlation_id="corrX") as a: diff --git a/tests/test_semantic_search_service.py b/tests/test_semantic_search_service.py index ca694eb9..187f16ae 100644 --- a/tests/test_semantic_search_service.py +++ b/tests/test_semantic_search_service.py @@ -5,9 +5,12 @@ # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations +import dataclasses from collections.abc import Mapping, Sequence from pathlib import Path +import pytest + from codeclone.memory.embedding import EmbeddingProvider from codeclone.memory.exceptions import MemorySemanticUnavailableError from codeclone.memory.retrieval import query_engineering_memory @@ -15,6 +18,7 @@ from codeclone.memory.sqlite_store import SqliteEngineeringMemoryStore from tests.memory_fixtures import ( insert_audit_event, + make_module_record, memory_store, seed_document_link, seed_module_role, @@ -273,6 +277,36 @@ def test_semantic_search_degrades_to_fts_when_model_unavailable(tmp_path: Path) assert fts.id in _record_ids(result) +def test_record_search_telemetry_emits_lane_counters( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.retrieval import service as service_module + + captured: dict[str, int] = {} + monkeypatch.setattr( + service_module, + "record_counter", + lambda key, value=1: captured.__setitem__(key, value), + ) + record_a = dataclasses.replace(make_module_record("proj", "codeclone/a.py"), id="a") + record_b = dataclasses.replace(make_module_record("proj", "codeclone/b.py"), id="b") + service_module._record_search_telemetry( + fts_records=[record_a, record_b], + proximity={"b": 0.9, "c": 0.8}, # b overlaps FTS; c is vector-only + audit_hits=[SemanticHit(source_id="e1", source="audit", score=0.5)], + trajectory_hits=[], + candidates=[record_a, record_b], # c was dropped by the filter + ) + assert captured == { + "retrieval.fts_hits": 2, + "retrieval.vector_memory_hits": 2, + "retrieval.vector_audit_hits": 1, + "retrieval.vector_trajectory_hits": 0, + "retrieval.fts_vector_overlap": 1, + "retrieval.semantic_filtered": 1, + } + + def test_unavailable_index_falls_back_to_fts(tmp_path: Path) -> None: with memory_store(tmp_path) as (root, project, store, db_path): fts = seed_module_role( From 92cf143ac9dcf1f6b8521cfa2c8e6d920d40470a Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 19:22:09 +0500 Subject: [PATCH 272/318] refactor(core): batch trajectory hydration to fix experience.distill N+1 --- codeclone/memory/trajectory/store.py | 186 +++++++++++++++++++------- tests/test_memory_trajectory_store.py | 39 +++++- 2 files changed, 171 insertions(+), 54 deletions(-) diff --git a/codeclone/memory/trajectory/store.py b/codeclone/memory/trajectory/store.py index c0c95ddd..00006f85 100644 --- a/codeclone/memory/trajectory/store.py +++ b/codeclone/memory/trajectory/store.py @@ -10,8 +10,9 @@ import sqlite3 import uuid from collections import Counter, defaultdict -from collections.abc import Iterable, Mapping, Sequence +from collections.abc import Callable, Iterable, Mapping, Sequence from pathlib import Path +from typing import TypeVar import orjson @@ -43,6 +44,47 @@ _SQLITE_IN_QUERY_BATCH = 500 +_T = TypeVar("_T") + +# Batch child-table loads for trajectory hydration. Each is ORDER BY +# trajectory_id first (rows for one trajectory stay contiguous) then the same +# keys the per-trajectory query uses, so grouping preserves identical ordering. +_STEPS_BATCH_SQL = ( + "SELECT * FROM memory_trajectory_steps " + "WHERE trajectory_id IN ({placeholders}) " + "ORDER BY trajectory_id ASC, step_index ASC" +) +_SUBJECTS_BATCH_SQL = ( + "SELECT trajectory_id, subject_kind, subject_key, relation " + "FROM memory_trajectory_subjects " + "WHERE trajectory_id IN ({placeholders}) " + "ORDER BY trajectory_id ASC, subject_kind ASC, subject_key ASC" +) +_EVIDENCE_BATCH_SQL = ( + "SELECT trajectory_id, evidence_kind, ref, locator, digest, created_at_utc " + "FROM memory_trajectory_evidence " + "WHERE trajectory_id IN ({placeholders}) " + "ORDER BY trajectory_id ASC, created_at_utc ASC, evidence_kind ASC, ref ASC" +) + + +def _group_rows_by_trajectory_id( + conn: sqlite3.Connection, + *, + ids: Sequence[str], + sql: str, + build: Callable[[sqlite3.Row], _T], +) -> dict[str, list[_T]]: + """Run ``sql`` (one ``{placeholders}`` slot) over ``ids`` in chunks and group + the rows by ``trajectory_id``, preserving SQL order within each group.""" + grouped: dict[str, list[_T]] = {trajectory_id: [] for trajectory_id in ids} + for batch in chunked(tuple(ids), _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + rows = conn.execute(sql.format(placeholders=placeholders), batch).fetchall() + for row in rows: + grouped.setdefault(str(row["trajectory_id"]), []).append(build(row)) + return grouped + def _project_and_upsert_workflow( conn: sqlite3.Connection, @@ -567,16 +609,13 @@ def search_trajectories( return _find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) -def find_trajectory(conn: sqlite3.Connection, trajectory_id: str) -> Trajectory | None: - row = conn.execute( - "SELECT * FROM memory_trajectories WHERE id=?", - (trajectory_id,), - ).fetchone() - if row is None: - return None - steps = _steps_for_trajectory(conn, trajectory_id) - subjects = _subjects_for_trajectory(conn, trajectory_id) - evidence = _evidence_for_trajectory(conn, trajectory_id) +def _row_to_trajectory( + row: sqlite3.Row, + *, + steps: Sequence[TrajectoryStep], + subjects: Sequence[TrajectorySubject], + evidence: Sequence[TrajectoryEvidence], +) -> Trajectory: return Trajectory( id=str(row["id"]), project_id=str(row["project_id"]), @@ -608,16 +647,58 @@ def find_trajectory(conn: sqlite3.Connection, trajectory_id: str) -> Trajectory ) +def find_trajectory(conn: sqlite3.Connection, trajectory_id: str) -> Trajectory | None: + row = conn.execute( + "SELECT * FROM memory_trajectories WHERE id=?", + (trajectory_id,), + ).fetchone() + if row is None: + return None + return _row_to_trajectory( + row, + steps=_steps_for_trajectory(conn, trajectory_id), + subjects=_subjects_for_trajectory(conn, trajectory_id), + evidence=_evidence_for_trajectory(conn, trajectory_id), + ) + + def _find_trajectories_by_ids( conn: sqlite3.Connection, ids: Sequence[str], ) -> list[Trajectory]: - hydrated: list[Trajectory] = [] - for trajectory_id in ids: - trajectory = find_trajectory(conn, trajectory_id) - if trajectory is not None: - hydrated.append(trajectory) - return hydrated + # Batch hydration: 4 chunked IN(...) queries total instead of 4 per id + # (row + steps + subjects + evidence). Preserves input id order and the + # per-trajectory child ordering of the single-id path. + id_list = [str(trajectory_id) for trajectory_id in ids] + if not id_list: + return [] + rows_by_id: dict[str, sqlite3.Row] = {} + for batch in chunked(tuple(id_list), _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + for row in conn.execute( + f"SELECT * FROM memory_trajectories WHERE id IN ({placeholders})", + batch, + ).fetchall(): + rows_by_id[str(row["id"])] = row + present_ids = [tid for tid in id_list if tid in rows_by_id] + steps_by_id = _group_rows_by_trajectory_id( + conn, ids=present_ids, sql=_STEPS_BATCH_SQL, build=_row_to_step + ) + subjects_by_id = _group_rows_by_trajectory_id( + conn, ids=present_ids, sql=_SUBJECTS_BATCH_SQL, build=_row_to_subject + ) + evidence_by_id = _group_rows_by_trajectory_id( + conn, ids=present_ids, sql=_EVIDENCE_BATCH_SQL, build=_row_to_evidence + ) + return [ + _row_to_trajectory( + rows_by_id[tid], + steps=steps_by_id.get(tid, []), + subjects=subjects_by_id.get(tid, []), + evidence=evidence_by_id.get(tid, []), + ) + for tid in present_ids + ] def count_trajectories(conn: sqlite3.Connection, *, project_id: str) -> int: @@ -744,6 +825,40 @@ def _insert_evidence( ) +def _row_to_step(row: sqlite3.Row) -> TrajectoryStep: + return TrajectoryStep( + step_index=int(row["step_index"]), + audit_sequence=int(row["audit_sequence"]), + event_id=str(row["event_id"]), + event_type=str(row["event_type"]), + status=_optional_text(row["status"]), + run_id=_optional_text(row["run_id"]), + report_digest=_optional_text(row["report_digest"]), + event_core_sha256=str(row["event_core_sha256"]), + event_core_json=str(row["event_core_json"]), + summary=_optional_text(row["summary"]), + created_at_utc=str(row["created_at_utc"]), + ) + + +def _row_to_subject(row: sqlite3.Row) -> TrajectorySubject: + return TrajectorySubject( + subject_kind=str(row["subject_kind"]), + subject_key=str(row["subject_key"]), + relation=str(row["relation"]), + ) + + +def _row_to_evidence(row: sqlite3.Row) -> TrajectoryEvidence: + return TrajectoryEvidence( + evidence_kind=str(row["evidence_kind"]), + ref=str(row["ref"]), + locator=_optional_text(row["locator"]), + digest=_optional_text(row["digest"]), + created_at_utc=str(row["created_at_utc"]), + ) + + def _steps_for_trajectory( conn: sqlite3.Connection, trajectory_id: str, @@ -753,22 +868,7 @@ def _steps_for_trajectory( "ORDER BY step_index ASC", (trajectory_id,), ).fetchall() - return [ - TrajectoryStep( - step_index=int(row["step_index"]), - audit_sequence=int(row["audit_sequence"]), - event_id=str(row["event_id"]), - event_type=str(row["event_type"]), - status=_optional_text(row["status"]), - run_id=_optional_text(row["run_id"]), - report_digest=_optional_text(row["report_digest"]), - event_core_sha256=str(row["event_core_sha256"]), - event_core_json=str(row["event_core_json"]), - summary=_optional_text(row["summary"]), - created_at_utc=str(row["created_at_utc"]), - ) - for row in rows - ] + return [_row_to_step(row) for row in rows] def _subjects_for_trajectory( @@ -780,14 +880,7 @@ def _subjects_for_trajectory( "WHERE trajectory_id=? ORDER BY subject_kind ASC, subject_key ASC", (trajectory_id,), ).fetchall() - return [ - TrajectorySubject( - subject_kind=str(row["subject_kind"]), - subject_key=str(row["subject_key"]), - relation=str(row["relation"]), - ) - for row in rows - ] + return [_row_to_subject(row) for row in rows] def _evidence_for_trajectory( @@ -800,16 +893,7 @@ def _evidence_for_trajectory( "ORDER BY created_at_utc ASC, evidence_kind ASC, ref ASC", (trajectory_id,), ).fetchall() - return [ - TrajectoryEvidence( - evidence_kind=str(row["evidence_kind"]), - ref=str(row["ref"]), - locator=_optional_text(row["locator"]), - digest=_optional_text(row["digest"]), - created_at_utc=str(row["created_at_utc"]), - ) - for row in rows - ] + return [_row_to_evidence(row) for row in rows] def _projection_run_id( diff --git a/tests/test_memory_trajectory_store.py b/tests/test_memory_trajectory_store.py index 6074e3e0..1c8f0c8a 100644 --- a/tests/test_memory_trajectory_store.py +++ b/tests/test_memory_trajectory_store.py @@ -16,7 +16,9 @@ from .memory_fixtures import memory_store -def _write_workflow_events(root: Path, audit_db: Path) -> None: +def _write_workflow_events( + root: Path, audit_db: Path, *, intent_id: str = "intent-test-001" +) -> None: root_digest = repo_root_digest(root.resolve()) writer = SqliteAuditWriter( db_path=audit_db, @@ -31,7 +33,7 @@ def _write_workflow_events(root: Path, audit_db: Path) -> None: repo_root_digest=root_digest, agent_pid=100, agent_label="tester", - intent_id="intent-test-001", + intent_id=intent_id, run_id="abc12345", report_digest="1" * 64, status="active", @@ -53,7 +55,7 @@ def _write_workflow_events(root: Path, audit_db: Path) -> None: repo_root_digest=root_digest, agent_pid=100, agent_label="tester", - intent_id="intent-test-001", + intent_id=intent_id, run_id="def67890", report_digest="2" * 64, status="accepted", @@ -106,6 +108,37 @@ def test_rebuild_trajectories_from_audit_is_idempotent(tmp_path: Path) -> None: assert store.latest_trajectory_projection_run(project_id=project.id) is not None +def test_find_trajectories_by_ids_batch_matches_single_path(tmp_path: Path) -> None: + from codeclone.memory.trajectory import store as trajectory_store + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = tmp_path / "audit.sqlite3" + for intent_id in ("intent-test-001", "intent-test-002", "intent-test-003"): + _write_workflow_events(root, audit_db, intent_id=intent_id) + store.rebuild_trajectories_from_audit( + project=project, root_path=root, audit_db_path=audit_db + ) + + conn = store._conn + ids = [item.id for item in store.list_trajectories(project_id=project.id)] + assert len(ids) == 3 + + # Batch hydration is identical to the per-id single path, in input order. + single = [trajectory_store.find_trajectory(conn, tid) for tid in ids] + assert trajectory_store._find_trajectories_by_ids(conn, ids) == single + + # Order is preserved, missing ids are skipped, empty input yields []. + reversed_ids = list(reversed(ids)) + assert trajectory_store._find_trajectories_by_ids(conn, reversed_ids) == [ + trajectory_store.find_trajectory(conn, tid) for tid in reversed_ids + ] + assert ( + trajectory_store._find_trajectories_by_ids(conn, ["missing", *ids]) + == single + ) + assert trajectory_store._find_trajectories_by_ids(conn, []) == [] + + def test_rebuild_supersedes_duplicate_workflow_projection_rows(tmp_path: Path) -> None: with memory_store(tmp_path) as (root, project, store, _db_path): audit_db = tmp_path / "audit.sqlite3" From 2b675d233a16c1b2865f7fd72fd842f08640e384 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 23:27:01 +0500 Subject: [PATCH 273/318] refactor(core): batch semantic.reindex sources to fix N+1 --- codeclone/memory/semantic/sources.py | 44 ++++++++++-------- codeclone/memory/sqlite_store.py | 8 ++++ codeclone/memory/trajectory/store.py | 11 ++--- tests/test_memory_trajectory_store.py | 9 ++-- tests/test_semantic_sources.py | 64 ++++++++++++++++++++++++--- 5 files changed, 102 insertions(+), 34 deletions(-) diff --git a/codeclone/memory/semantic/sources.py b/codeclone/memory/semantic/sources.py index 379598db..8d8b079b 100644 --- a/codeclone/memory/semantic/sources.py +++ b/codeclone/memory/semantic/sources.py @@ -30,6 +30,13 @@ _PAGE_SIZE = 200 +def _primary_path(subjects: Sequence[MemorySubject]) -> str | None: + for subject in subjects: + if subject.subject_kind == "path": + return subject.subject_key + return None + + class IndexSource(Protocol): """A source of deterministic projections to feed the semantic index. @@ -49,7 +56,9 @@ class _MemoryReadStore(Protocol): def query_records(self, query: MemoryQuery) -> Sequence[MemoryRecord]: ... - def list_subjects_for_memory(self, memory_id: str) -> list[MemorySubject]: ... + def list_subjects_for_memories( + self, memory_ids: Sequence[str] + ) -> dict[str, list[MemorySubject]]: ... class _TrajectoryReadStore(Protocol): @@ -60,7 +69,7 @@ def list_trajectories( limit: int = 20, ) -> list[TrajectoryListItem]: ... - def find_trajectory(self, trajectory_id: str) -> Trajectory | None: ... + def find_trajectories(self, trajectory_ids: Sequence[str]) -> list[Trajectory]: ... class MemoryIndexSource: @@ -91,25 +100,25 @@ def iter_projections(self) -> Iterator[SemanticProjection]: offset=offset, ) ) - for record in records: - if not is_indexed_memory_type(record.type): - continue - if record.status not in _INDEXED_STATUSES: - continue + indexed = [ + record + for record in records + if is_indexed_memory_type(record.type) + and record.status in _INDEXED_STATUSES + ] + # One batched subject load per page instead of a query per record. + subjects_by_id = self._store.list_subjects_for_memories( + [record.id for record in indexed] + ) + for record in indexed: yield project_memory_record( record, - subject_path=self._primary_path(record.id), + subject_path=_primary_path(subjects_by_id.get(record.id, [])), ) if len(records) < _PAGE_SIZE: return offset += _PAGE_SIZE - def _primary_path(self, memory_id: str) -> str | None: - for subject in self._store.list_subjects_for_memory(memory_id): - if subject.subject_kind == "path": - return subject.subject_key - return None - class TrajectoryIndexSource: """Trajectory memory as a semantic source. @@ -136,10 +145,9 @@ def iter_projections(self) -> Iterator[SemanticProjection]: limit=_PAGE_SIZE + offset, ) page = items[offset : offset + _PAGE_SIZE] - for item in page: - trajectory = self._store.find_trajectory(item.id) - if trajectory is not None: - yield project_trajectory(trajectory) + # Batch-hydrate the page instead of one find_trajectory per item. + for trajectory in self._store.find_trajectories([item.id for item in page]): + yield project_trajectory(trajectory) if len(page) < _PAGE_SIZE: return offset += _PAGE_SIZE diff --git a/codeclone/memory/sqlite_store.py b/codeclone/memory/sqlite_store.py index 5b9a240b..66034ed2 100644 --- a/codeclone/memory/sqlite_store.py +++ b/codeclone/memory/sqlite_store.py @@ -198,6 +198,14 @@ def find_trajectory(self, trajectory_id: str) -> Trajectory | None: return find_trajectory(self._conn, trajectory_id) + def find_trajectories( + self, + trajectory_ids: Sequence[str], + ) -> list[Trajectory]: + from .trajectory.store import find_trajectories_by_ids + + return find_trajectories_by_ids(self._conn, trajectory_ids) + def load_trajectory_patch_trail( self, trajectory_id: str, diff --git a/codeclone/memory/trajectory/store.py b/codeclone/memory/trajectory/store.py index 00006f85..49faf5f7 100644 --- a/codeclone/memory/trajectory/store.py +++ b/codeclone/memory/trajectory/store.py @@ -438,7 +438,7 @@ def list_canonical_trajectories_for_export( """, (project_id, max(1, int(limit))), ).fetchall() - trajectories = _find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) + trajectories = find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) from .export_context import select_canonical_trajectories return select_canonical_trajectories(trajectories) @@ -539,7 +539,7 @@ def list_trajectories_for_subjects( """, (*params, max(1, int(limit))), ).fetchall() - return _find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) + return find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) def list_trajectories_for_intent_id( @@ -557,7 +557,7 @@ def list_trajectories_for_intent_id( """, (project_id, intent_id), ).fetchall() - return tuple(_find_trajectories_by_ids(conn, [str(row["id"]) for row in rows])) + return tuple(find_trajectories_by_ids(conn, [str(row["id"]) for row in rows])) def search_trajectories( @@ -606,7 +606,7 @@ def search_trajectories( """, (*params, max(1, int(limit))), ).fetchall() - return _find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) + return find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) def _row_to_trajectory( @@ -662,7 +662,7 @@ def find_trajectory(conn: sqlite3.Connection, trajectory_id: str) -> Trajectory ) -def _find_trajectories_by_ids( +def find_trajectories_by_ids( conn: sqlite3.Connection, ids: Sequence[str], ) -> list[Trajectory]: @@ -1014,6 +1014,7 @@ def load_trajectory_patch_trails( __all__ = [ "count_trajectories", + "find_trajectories_by_ids", "find_trajectory", "latest_projection_run", "list_trajectories", diff --git a/tests/test_memory_trajectory_store.py b/tests/test_memory_trajectory_store.py index 1c8f0c8a..5f7d2029 100644 --- a/tests/test_memory_trajectory_store.py +++ b/tests/test_memory_trajectory_store.py @@ -125,18 +125,17 @@ def test_find_trajectories_by_ids_batch_matches_single_path(tmp_path: Path) -> N # Batch hydration is identical to the per-id single path, in input order. single = [trajectory_store.find_trajectory(conn, tid) for tid in ids] - assert trajectory_store._find_trajectories_by_ids(conn, ids) == single + assert trajectory_store.find_trajectories_by_ids(conn, ids) == single # Order is preserved, missing ids are skipped, empty input yields []. reversed_ids = list(reversed(ids)) - assert trajectory_store._find_trajectories_by_ids(conn, reversed_ids) == [ + assert trajectory_store.find_trajectories_by_ids(conn, reversed_ids) == [ trajectory_store.find_trajectory(conn, tid) for tid in reversed_ids ] assert ( - trajectory_store._find_trajectories_by_ids(conn, ["missing", *ids]) - == single + trajectory_store.find_trajectories_by_ids(conn, ["missing", *ids]) == single ) - assert trajectory_store._find_trajectories_by_ids(conn, []) == [] + assert trajectory_store.find_trajectories_by_ids(conn, []) == [] def test_rebuild_supersedes_duplicate_workflow_projection_rows(tmp_path: Path) -> None: diff --git a/tests/test_semantic_sources.py b/tests/test_semantic_sources.py index 0196010b..f87c7311 100644 --- a/tests/test_semantic_sources.py +++ b/tests/test_semantic_sources.py @@ -48,8 +48,12 @@ def __init__( def query_records(self, query: MemoryQuery) -> Sequence[MemoryRecord]: return self._records[query.offset : query.offset + query.limit] - def list_subjects_for_memory(self, memory_id: str) -> list[MemorySubject]: - return self._subjects.get(memory_id, []) + def list_subjects_for_memories( + self, memory_ids: Sequence[str] + ) -> dict[str, list[MemorySubject]]: + return { + memory_id: self._subjects.get(memory_id, []) for memory_id in memory_ids + } class _FakeTrajectoryStore: @@ -79,8 +83,12 @@ def list_trajectories( ] return items[:limit] - def find_trajectory(self, trajectory_id: str) -> Trajectory | None: - return self._trajectories.get(trajectory_id) + def find_trajectories(self, trajectory_ids: Sequence[str]) -> list[Trajectory]: + return [ + self._trajectories[tid] + for tid in trajectory_ids + if tid in self._trajectories + ] def _prose( @@ -257,8 +265,8 @@ def test_trajectory_source_name_missing_record_and_pagination() -> None: assert source.name() == "trajectory" class _MissingTrajectoryStore(_FakeTrajectoryStore): - def find_trajectory(self, trajectory_id: str) -> Trajectory | None: - return None + def find_trajectories(self, trajectory_ids: Sequence[str]) -> list[Trajectory]: + return [] assert ( list( @@ -312,6 +320,50 @@ def close(self) -> None: assert connection.closed is True +def test_memory_index_source_batches_subjects_per_page() -> None: + project_id = "proj-batch" + records = [_prose(project_id, statement=f"note {index}") for index in range(250)] + + class _CountingStore(_FakeStore): + batch_calls = 0 + + def list_subjects_for_memories( + self, memory_ids: Sequence[str] + ) -> dict[str, list[MemorySubject]]: + self.batch_calls += 1 + return super().list_subjects_for_memories(memory_ids) + + store = _CountingStore(records, {}) + projections = list( + MemoryIndexSource(store, project_id=project_id).iter_projections() + ) + assert len(projections) == 250 + # 250 records over 2 pages (200 + 50) -> 2 batched subject loads, not 250. + assert store.batch_calls == 2 + + +def test_trajectory_index_source_batches_hydration_per_page() -> None: + base = _trajectory("proj-traj") + trajectories = [ + dataclasses.replace(base, id=f"traj-{index}") for index in range(250) + ] + + class _CountingTrajectoryStore(_FakeTrajectoryStore): + batch_calls = 0 + + def find_trajectories(self, trajectory_ids: Sequence[str]) -> list[Trajectory]: + self.batch_calls += 1 + return super().find_trajectories(trajectory_ids) + + store = _CountingTrajectoryStore(trajectories) + projections = list( + TrajectoryIndexSource(store, project_id="proj-traj").iter_projections() + ) + assert len(projections) == 250 + # 2 pages (200 + 50) -> 2 batched hydrations, not 250 find_trajectory calls. + assert store.batch_calls == 2 + + def _trajectory(project_id: str) -> Trajectory: return Trajectory( id="traj-1", From 26a30152e5d404f58cc785763399c3941c14e644 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 13 Jun 2026 23:33:04 +0500 Subject: [PATCH 274/318] feat(analytics): corpus clustering inspectability (sweep, diagnostics, noise) + integrity --- CHANGELOG.md | 9 +- .../analytics/clustering/canonicalize.py | 7 +- codeclone/analytics/clustering/diagnostics.py | 124 +++- codeclone/analytics/clustering/pipeline.py | 19 +- codeclone/analytics/clustering/sweep.py | 31 + .../corpus/adapters/intent_historical.py | 45 +- codeclone/analytics/corpus/normalizer.py | 6 +- codeclone/analytics/corpus/snapshot.py | 31 +- .../analytics/corpus/trajectory_selection.py | 8 +- codeclone/analytics/embedding/generation.py | 36 +- codeclone/analytics/export/json_export.py | 220 +++++- codeclone/analytics/integrity.py | 210 ++++++ codeclone/analytics/report/html.py | 283 ++++++-- codeclone/analytics/schema.py | 323 ++++++++- codeclone/analytics/store/protocols.py | 18 + codeclone/analytics/store/sqlite.py | 3 + codeclone/analytics/store/vectors_lancedb.py | 124 +++- codeclone/analytics/workflow.py | 135 +++- codeclone/config/analytics.py | 40 +- codeclone/contracts/__init__.py | 8 +- codeclone/surfaces/cli/analytics.py | 127 ++-- docs/README-pypi.md | 2 +- docs/book/10-config-and-defaults.md | 5 +- docs/book/11-cli.md | 10 +- docs/book/24-compatibility-and-versioning.md | 19 +- docs/book/27-corpus-analytics.md | 402 ++++++++--- docs/book/appendix/b-schema-layouts.md | 33 +- docs/guide/analytics/overview.md | 43 +- tests/test_analytics_cli.py | 610 ++++++++++++++++ tests/test_analytics_foundation.py | 672 +++++++++++++++++- tests/test_analytics_integration.py | 472 +++++++++++- tests/test_analytics_integrity.py | 360 ++++++++++ tests/test_analytics_reporting.py | 362 ++++++++++ tests/test_analytics_store.py | 392 ++++++++++ tests/test_analytics_trajectory_selection.py | 11 + tests/test_config_analytics.py | 35 + tests/test_sqlite_readonly_openers.py | 52 ++ 37 files changed, 4913 insertions(+), 374 deletions(-) create mode 100644 codeclone/analytics/integrity.py create mode 100644 tests/test_analytics_cli.py create mode 100644 tests/test_analytics_integrity.py create mode 100644 tests/test_analytics_reporting.py create mode 100644 tests/test_analytics_store.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d96bd09f..8516a7ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -71,11 +71,14 @@ Added next_tool guidance. Workspace hygiene warnings, audit events, token-budget tracking, and documentation-contract linting were also added. * **Corpus Analytics (intent lane, Slice 1).** Optional offline clustering of - historical change-control intents via `codeclone analytics corpus …`. + historical change-control intents via `codeclone analytics …`. Requires `codeclone[analytics]`. Reads audit + trajectory (+ optional registry overlay), writes SQLite/LanceDB artifacts under `.codeclone/analytics/`, and - exports JSON/HTML. Separate embedding contract from Engineering Memory semantic - index; `[tool.codeclone.analytics]` configures paths and clustering defaults. + exports inspectable JSON/HTML with sweep comparison, cluster diagnostics, + noise exploration, explicit heuristic recommendation vs maintainer selection, + and runtime observability spans. Analytics embeddings and their lifecycle are + separate from the Engineering Memory semantic index; + `[tool.codeclone.analytics]` configures paths and clustering defaults. Changed diff --git a/codeclone/analytics/clustering/canonicalize.py b/codeclone/analytics/clustering/canonicalize.py index ec344e94..4715d211 100644 --- a/codeclone/analytics/clustering/canonicalize.py +++ b/codeclone/analytics/clustering/canonicalize.py @@ -15,6 +15,8 @@ def canonicalize_partitions( partitions: Sequence[ClusterPartition], + *, + coordinates: dict[str, tuple[float, ...]], ) -> tuple[ClusterPartition, ...]: """Assign display order: size desc, medoid asc, membership_digest asc.""" non_noise = [part for part in partitions if part.cluster_label != NOISE_LABEL] @@ -22,7 +24,10 @@ def canonicalize_partitions( non_noise.sort( key=lambda part: ( -len(part.snapshot_item_ids), - part.snapshot_item_ids[0] if part.snapshot_item_ids else "", + medoid_item_id( + member_ids=part.snapshot_item_ids, + coordinates=coordinates, + ), part.membership_digest, ) ) diff --git a/codeclone/analytics/clustering/diagnostics.py b/codeclone/analytics/clustering/diagnostics.py index 2f2596d7..3eb17c64 100644 --- a/codeclone/analytics/clustering/diagnostics.py +++ b/codeclone/analytics/clustering/diagnostics.py @@ -46,16 +46,16 @@ def metadata_distribution( items: Sequence[CorpusItemRecord], *, field: str, + min_sample_size: int, ) -> dict[str, CorrelationCell]: counts: Counter[str] = Counter() for item in items: payload = _metadata_object(item.metadata_json) - value = payload.get(field) - key = str(value) if value is not None else "null" - counts[key] += 1 + for key in _metadata_values(payload.get(field)): + counts[key] += 1 total = len(items) return { - key: _cell(count, total, min_sample_size=5) + key: _cell(count, total, min_sample_size=min_sample_size) for key, count in sorted(counts.items()) } @@ -98,6 +98,10 @@ def build_cluster_diagnostics( "quality_tier", "scope_check_status", "verification_status", + "scope_expanded", + "anomaly_kinds", + "declared_file_count", + "changed_file_count", ) distributions = { field: { @@ -110,21 +114,51 @@ def build_cluster_diagnostics( for key, cell in metadata_distribution( member_items, field=field, + min_sample_size=min_correlation_sample_size, ).items() } for field in metadata_fields } - return { + representatives = _representative_ids( + member_ids=partition.snapshot_item_ids, + medoid=medoid, + coordinates=coordinates, + membership_strengths=membership_strengths, + ) + boundary_items = _boundary_ids( + member_ids=partition.snapshot_item_ids, + medoid=medoid, + coordinates=coordinates, + membership_strengths=membership_strengths, + ) + diagnostics: dict[str, object] = { "cluster_label": partition.cluster_label, "membership_digest": partition.membership_digest, "size": size, "size_percent": cluster_size_percent(size, total_items), "medoid_snapshot_item_id": medoid, "average_membership_strength": avg_strength, - "representatives": list(partition.snapshot_item_ids[:5]), + "representatives": list(representatives), + "boundary_items": list(boundary_items), "metadata_distributions": distributions, "min_correlation_sample_size": min_correlation_sample_size, } + if partition.cluster_label == NOISE_LABEL: + diagnostics["noise_items"] = [ + { + "snapshot_item_id": item.snapshot_item_id, + "flags": _flags_dict( + noise_explorer_flags( + item=item, + membership_strength=membership_strengths.get( + item.snapshot_item_id + ), + ) + ), + } + for item in sorted(member_items, key=lambda entry: entry.snapshot_item_id) + ] + return diagnostics def noise_explorer_flags( @@ -199,6 +233,84 @@ def _metadata_object(text: str) -> dict[str, object]: return parsed if isinstance(parsed, dict) else {} +def _metadata_values(value: object) -> tuple[str, ...]: + if value is None: + return ("null",) + if isinstance(value, list): + normalized = tuple(sorted({str(item) for item in value})) + return normalized or ("none",) + if isinstance(value, bool): + return ("true" if value else "false",) + return (str(value),) + + +def _representative_ids( + *, + member_ids: Sequence[str], + medoid: str, + coordinates: Mapping[str, tuple[float, ...]], + membership_strengths: Mapping[str, float | None], + limit: int = 5, +) -> tuple[str, ...]: + if not member_ids: + return () + ordered = sorted( + (item_id for item_id in member_ids if item_id != medoid), + key=lambda item_id: ( + -_strength(membership_strengths.get(item_id)), + _distance_from(item_id, medoid, coordinates), + item_id, + ), + ) + return tuple(([medoid] if medoid else []) + ordered)[:limit] + + +def _boundary_ids( + *, + member_ids: Sequence[str], + medoid: str, + coordinates: Mapping[str, tuple[float, ...]], + membership_strengths: Mapping[str, float | None], + limit: int = 5, +) -> tuple[str, ...]: + ordered = sorted( + member_ids, + key=lambda item_id: ( + _strength(membership_strengths.get(item_id)), + -_distance_from(item_id, medoid, coordinates), + item_id, + ), + ) + return tuple(ordered[:limit]) + + +def _strength(value: float | None) -> float: + return value if value is not None else 1.0 + + +def _distance_from( + item_id: str, + anchor_id: str, + coordinates: Mapping[str, tuple[float, ...]], +) -> float: + item = coordinates.get(item_id) + anchor = coordinates.get(anchor_id) + if item is None or anchor is None: + return float("inf") + return _euclidean(item, anchor) + + +def _flags_dict(flags: NoiseExplorerFlags) -> dict[str, bool]: + return { + "short_text": flags.short_text, + "long_text": flags.long_text, + "multiple_paragraphs": flags.multiple_paragraphs, + "high_conjunction_count": flags.high_conjunction_count, + "template_match": flags.template_match, + "low_membership_strength": flags.low_membership_strength, + } + + def _cell(numerator: int, denominator: int, *, min_sample_size: int) -> CorrelationCell: insufficient = denominator < min_sample_size rate = (numerator / denominator) if denominator and not insufficient else None diff --git a/codeclone/analytics/clustering/pipeline.py b/codeclone/analytics/clustering/pipeline.py index da57d56b..0ec6e9f3 100644 --- a/codeclone/analytics/clustering/pipeline.py +++ b/codeclone/analytics/clustering/pipeline.py @@ -55,6 +55,23 @@ def _l2_normalize(matrix: list[list[float]]) -> list[list[float]]: return normalized +def _validate_embedding_matrix(embeddings: Sequence[Sequence[float]]) -> int: + if not embeddings: + return 0 + width = len(embeddings[0]) + if width <= 0: + raise ValueError("embedding vectors must not be empty") + for index, row in enumerate(embeddings): + if len(row) != width: + raise ValueError( + f"embedding dimension mismatch at row {index}: " + f"actual={len(row)}, expected={width}" + ) + if not all(math.isfinite(float(value)) for value in row): + raise ValueError(f"embedding row {index} contains non-finite values") + return width + + def _load_sklearn_pca() -> Any: # Any: optional sklearn import boundary try: decomposition = importlib.import_module("sklearn.decomposition") @@ -89,7 +106,7 @@ def run_clustering_pipeline( if not snapshot_item_ids: return None n_samples = len(snapshot_item_ids) - n_features = len(embeddings[0]) if embeddings else 0 + n_features = _validate_embedding_matrix(embeddings) effective = resolve_effective_parameters( requested, n_samples=n_samples, diff --git a/codeclone/analytics/clustering/sweep.py b/codeclone/analytics/clustering/sweep.py index 3ea3153f..9bc9d5b0 100644 --- a/codeclone/analytics/clustering/sweep.py +++ b/codeclone/analytics/clustering/sweep.py @@ -6,8 +6,10 @@ from __future__ import annotations +import sys from collections.abc import Sequence from dataclasses import dataclass +from importlib.metadata import PackageNotFoundError, version from ...utils.json_io import json_text from ..corpus.keys import sha256_hex @@ -119,6 +121,7 @@ def run_digest( embedding_generation_id: str, effective: EffectiveClusteringParameters, random_seed: int, + algorithm_manifest: dict[str, object], ) -> str: payload = { "snapshot_id": snapshot_id, @@ -128,12 +131,39 @@ def run_digest( "min_cluster_size": effective.min_cluster_size, "min_samples": effective.min_samples, "cluster_selection_method": effective.cluster_selection_method, + "n_samples": effective.n_samples, + "n_features": effective.n_features, }, "random_seed": random_seed, + "algorithm_manifest": algorithm_manifest, } return sha256_hex(json_text(payload, sort_keys=True)) +def clustering_algorithm_manifest() -> dict[str, object]: + return { + "python_version": f"{sys.version_info.major}.{sys.version_info.minor}", + "numpy_version": _package_version("numpy"), + "scipy_version": _package_version("scipy"), + "scikit_learn_version": _package_version("scikit-learn"), + "hdbscan_version": _package_version("hdbscan"), + "vector_preprocessing": "l2_normalize", + "pca_solver": "full", + "pca_whiten": False, + "clustering_input": "pca_reduced_coordinates", + "hdbscan_implementation": "hdbscan", + "clustering_metric": "euclidean", + "hdbscan_core_dist_n_jobs": 1, + } + + +def _package_version(distribution: str) -> str: + try: + return version(distribution) + except PackageNotFoundError: + return "unknown" + + __all__ = [ "SWEEP_MIN_CLUSTER_SIZES", "SWEEP_MIN_SAMPLES", @@ -141,6 +171,7 @@ def run_digest( "SWEEP_SELECTION_METHODS", "SweepCandidate", "SweepCandidateResult", + "clustering_algorithm_manifest", "iter_sweep_candidates", "rank_sweep_results", "run_digest", diff --git a/codeclone/analytics/corpus/adapters/intent_historical.py b/codeclone/analytics/corpus/adapters/intent_historical.py index d56025e1..695af7ab 100644 --- a/codeclone/analytics/corpus/adapters/intent_historical.py +++ b/codeclone/analytics/corpus/adapters/intent_historical.py @@ -15,7 +15,7 @@ from ....audit.events import repo_root_digest from ....audit.reader import AuditRecord, read_intent_declared_records -from ....audit.validation import DEFAULT_AUDIT_PATH +from ....audit.validation import AUDIT_SCHEMA_VERSION, DEFAULT_AUDIT_PATH from ....config.intent_registry import ( IntentRegistryConfigError, resolve_intent_registry_config, @@ -236,10 +236,21 @@ def extract_historical_intent_items( "source": "patch_trail", "digest": patch_trail_digest, }, - "registry_overlay": {"present": False}, } - metadata: dict[str, object] = {} + metadata: dict[str, object] = { + "agent_client_raw": None, + "agent_family": "unknown", + "outcome": None, + "quality_tier": None, + "finished_at_utc": None, + "scope_expanded": None, + "anomaly_kinds": None, + "scope_check_status": None, + "verification_status": None, + "declared_file_count": None, + "changed_file_count": None, + } agent_raw: str | None = None if selected_trajectory is not None: agent_raw = trajectory_agent_label(selected_trajectory) @@ -273,8 +284,6 @@ def extract_historical_intent_items( if resolved_registry_db is not None else None ) - if registry_overlay is not None: - provenance["registry_overlay"] = {"present": True} rep_input = IntentRepresentationInput( description=description, @@ -293,7 +302,12 @@ def extract_historical_intent_items( project_id=group_project_id, intent_id=intent_id, ), - source_content_digest=source_content_digest(description), + source_content_digest=source_content_digest( + _raw_representation_inputs( + representation_kind=representation_kind, + payload=rep_input, + ) + ), provenance=provenance, metadata=metadata, registry_overlay=registry_overlay, @@ -326,6 +340,23 @@ def build_source_digest_items( return tuple(sorted(digest_items, key=lambda entry: entry.source_record_key)) +def _raw_representation_inputs( + *, + representation_kind: str, + payload: IntentRepresentationInput, +) -> dict[str, object]: + raw: dict[str, object] = {"description": payload.description} + if representation_kind.endswith("description_with_frame.v1"): + raw.update( + { + "intent_kind": payload.intent_kind, + "declared_path_families": sorted(set(payload.declared_path_families)), + "declared_constraints": sorted(set(payload.declared_constraints)), + } + ) + return raw + + def compute_source_digest( *, items: Sequence[HistoricalIntentSourceItem], @@ -411,7 +442,7 @@ def materialize_corpus_item( def default_source_schema_versions() -> dict[str, str]: return { - "audit": "4", + "audit": AUDIT_SCHEMA_VERSION, "memory": ENGINEERING_MEMORY_SCHEMA_VERSION, "patch_trail": PATCH_TRAIL_SCHEMA_VERSION, } diff --git a/codeclone/analytics/corpus/normalizer.py b/codeclone/analytics/corpus/normalizer.py index 7bca5a28..78e94e99 100644 --- a/codeclone/analytics/corpus/normalizer.py +++ b/codeclone/analytics/corpus/normalizer.py @@ -11,6 +11,7 @@ from dataclasses import dataclass from ...contracts import CORPUS_NORMALIZER_VERSION +from ...utils.json_io import json_text from .keys import sha256_hex _DIGEST_PATTERN = re.compile( @@ -65,8 +66,9 @@ def normalize_corpus_text(raw: str) -> NormalizedText: ) -def source_content_digest(raw: str) -> str: - return normalize_corpus_text(raw).digest +def source_content_digest(raw_inputs: object) -> str: + """Hash canonical raw representation inputs before text normalization.""" + return sha256_hex(json_text(raw_inputs, sort_keys=True)) __all__ = ["NormalizedText", "normalize_corpus_text", "source_content_digest"] diff --git a/codeclone/analytics/corpus/snapshot.py b/codeclone/analytics/corpus/snapshot.py index 182fe0cd..4d5eb7ad 100644 --- a/codeclone/analytics/corpus/snapshot.py +++ b/codeclone/analytics/corpus/snapshot.py @@ -9,7 +9,6 @@ import uuid from pathlib import Path -from ...audit.validation import DEFAULT_AUDIT_PATH from ...config.analytics import AnalyticsConfig, resolve_analytics_config from ...memory.project import compute_project_id, resolve_memory_db_path from ...report.meta import current_report_timestamp_utc @@ -26,11 +25,22 @@ from .keys import representation_version_for_kind -def _relative_store_paths(root_path: Path) -> dict[str, str]: +def _manifest_path(root_path: Path, path: Path) -> str: + try: + return path.resolve().relative_to(root_path.resolve()).as_posix() + except ValueError: + return "" + + +def _relative_store_paths( + root_path: Path, + *, + audit_db_path: Path, + memory_db_path: Path, +) -> dict[str, str]: return { - "audit": DEFAULT_AUDIT_PATH, - "memory": ".codeclone/memory/engineering_memory.sqlite3", - "analytics": ".codeclone/analytics/corpus_clustering.sqlite3", + "audit": _manifest_path(root_path, audit_db_path), + "memory": _manifest_path(root_path, memory_db_path), } @@ -49,10 +59,12 @@ def build_intent_snapshot( try: lane: CorpusLane = "intent" rep_version = representation_version_for_kind(representation_kind) + memory_db_path = resolve_memory_db_path(resolved_root) source_items = extract_historical_intent_items( root_path=resolved_root, representation_kind=representation_kind, - memory_db_path=resolve_memory_db_path(resolved_root), + audit_db_path=analytics_config.audit_db_path, + memory_db_path=memory_db_path, registry_db_path=registry_db_path, ) source_digest = compute_source_digest( @@ -106,7 +118,12 @@ def build_intent_snapshot( representation_kind=representation_kind, representation_version=rep_version, source_stores_json=json_text( - _relative_store_paths(resolved_root), sort_keys=True + _relative_store_paths( + resolved_root, + audit_db_path=analytics_config.audit_db_path, + memory_db_path=memory_db_path, + ), + sort_keys=True, ), source_schema_versions_json=json_text( default_source_schema_versions(), diff --git a/codeclone/analytics/corpus/trajectory_selection.py b/codeclone/analytics/corpus/trajectory_selection.py index 73b8c6b1..c58413c4 100644 --- a/codeclone/analytics/corpus/trajectory_selection.py +++ b/codeclone/analytics/corpus/trajectory_selection.py @@ -56,13 +56,13 @@ def select_trajectory_for_intent( finish_candidates = [item for item in candidates if _has_verified_finish(item)] pool = finish_candidates if finish_candidates else list(candidates) - pool.sort( + selected = max( + pool, key=lambda item: ( - -_terminal_audit_sequence(item), + _terminal_audit_sequence(item), item.id, - ) + ), ) - selected = pool[0] discarded = tuple( sorted( trajectory.id for trajectory in candidates if trajectory.id != selected.id diff --git a/codeclone/analytics/embedding/generation.py b/codeclone/analytics/embedding/generation.py index 5b04c81d..19518d15 100644 --- a/codeclone/analytics/embedding/generation.py +++ b/codeclone/analytics/embedding/generation.py @@ -9,6 +9,7 @@ import importlib import uuid from collections.abc import Sequence +from contextlib import suppress from dataclasses import dataclass from ...config.analytics import AnalyticsConfig @@ -17,7 +18,7 @@ from ...memory.embedding.fastembed_provider import FastEmbedEmbeddingProvider from ...report.meta import current_report_timestamp_utc from ..contracts import CorpusItemRecord, EmbeddingGenerationRecord, EmbeddingItemRecord -from ..exceptions import AnalyticsCapabilityError +from ..exceptions import AnalyticsCapabilityError, AnalyticsWorkflowError from ..store.protocols import CorpusStore from ..store.vectors_lancedb import AnalyticsVectorStore, vector_digest, vector_row_key @@ -61,11 +62,13 @@ def generate_embeddings_for_snapshot( ) -> EmbeddingBatchResult: items = store.list_items(snapshot_id) if not items: - msg = f"snapshot has no items: {snapshot_id}" - raise ValueError(msg) + raise AnalyticsWorkflowError(f"snapshot has no items: {snapshot_id}") active_provider = provider or _resolve_fastembed_provider(config) texts = [item.normalized_text for item in items] - vectors = embed_documents(active_provider, texts) + try: + vectors = embed_documents(active_provider, texts) + except Exception as exc: + raise AnalyticsWorkflowError(f"analytics embedding failed: {exc}") from exc generation_id = f"emb-{uuid.uuid4().hex[:16]}" provider_id = active_provider.model_id.split(":", 1)[0] if provider_id not in {"fastembed", "diagnostic-hash-v1"}: @@ -93,7 +96,6 @@ def generate_embeddings_for_snapshot( vector_preprocessing="l2_normalize", created_at_utc=current_report_timestamp_utc(), ) - store.insert_embedding_generation(generation) embedding_items: list[EmbeddingItemRecord] = [] vector_rows: list[dict[str, object]] = [] for item, vector in zip(items, vectors, strict=True): @@ -117,19 +119,19 @@ def generate_embeddings_for_snapshot( "vector": vector, } ) - store.insert_embedding_items(embedding_items) - stored_items = store.list_embedding_items(embedding_generation_id=generation_id) - if len(stored_items) != len(embedding_items): - msg = ( - "embedding item count mismatch after persist: " - f"expected {len(embedding_items)}, stored {len(stored_items)}" + try: + store.insert_embedding_generation(generation) + store.insert_embedding_items(embedding_items) + vector_store.write_vectors( + embedding_generation_id=generation_id, + rows=vector_rows, ) - raise ValueError(msg) - vector_store.write_vectors( - embedding_generation_id=generation_id, - rows=vector_rows, - ) - store.commit() + store.commit() + except Exception: + store.rollback() + with suppress(Exception): + vector_store.delete_generation(generation_id) + raise return EmbeddingBatchResult( embedding_generation_id=generation_id, item_count=len(items), diff --git a/codeclone/analytics/export/json_export.py b/codeclone/analytics/export/json_export.py index a1da7378..5019860c 100644 --- a/codeclone/analytics/export/json_export.py +++ b/codeclone/analytics/export/json_export.py @@ -10,9 +10,25 @@ from ...contracts import CORPUS_EXPORT_SCHEMA_VERSION from ...utils.json_io import json_text -from ..contracts import ClusteringRunRecord, CorpusItemRecord, CorpusSnapshotRecord +from ..clustering.models import NOISE_LABEL +from ..clustering.sweep import score_clustering_result +from ..contracts import ( + ClusterAssignmentRecord, + ClusteringRunRecord, + ClusterSummaryRecord, + CorpusItemRecord, + CorpusSnapshotRecord, + EmbeddingGenerationRecord, + EmbeddingItemRecord, +) +from ..exceptions import AnalyticsWorkflowError +from ..integrity import validate_generation_metadata, validate_persisted_run from ..store.sqlite import SqliteCorpusAnalyticsStore +_REPRODUCIBILITY_NOTE = ( + "Full vector reproducibility is not guaranteed from model id alone." +) + def export_clustering_json( *, @@ -20,29 +36,48 @@ def export_clustering_json( snapshot_id: str, clustering_run_id: str, ) -> str: - snapshot = store.get_snapshot(snapshot_id) - if snapshot is None: - msg = f"unknown snapshot: {snapshot_id}" - raise ValueError(msg) - run = store.get_clustering_run(clustering_run_id) - if run is None: - msg = f"unknown clustering run: {clustering_run_id}" - raise ValueError(msg) - items = store.list_items(snapshot_id) - assignments = store.list_assignments(clustering_run_id) - summaries = store.list_summaries(clustering_run_id) - generation = store.get_embedding_generation(run.embedding_generation_id) + snapshot, generation = _validated_context( + store=store, + snapshot_id=snapshot_id, + clustering_run_id=clustering_run_id, + ) + run = validate_persisted_run( + store=store, + snapshot_id=snapshot_id, + clustering_run_id=clustering_run_id, + ) + detail = _run_detail(store=store, run=run) payload: dict[str, object] = { "schema_version": CORPUS_EXPORT_SCHEMA_VERSION, "snapshot": _snapshot_dict(snapshot), - "embedding_generation": _generation_dict(generation) if generation else None, - "clustering_run": _run_dict(run), - "clusters": [_summary_dict(summary) for summary in summaries], - "assignments": [_assignment_dict(item) for item in assignments], - "items": [_item_dict(item) for item in items], + "embedding_generation": _generation_dict(generation), + "embedding_items": [ + _embedding_item_dict(item) + for item in store.list_embedding_items( + embedding_generation_id=run.embedding_generation_id + ) + ], + "clustering_run": detail["run"], + "clusters": detail["clusters"], + "assignments": detail["assignments"], + "noise_items": detail["noise_items"], + "items": [_item_dict(item) for item in store.list_items(snapshot_id)], "exact_model_artifact_reproducibility": ( - generation.exact_model_artifact_reproducibility if generation else False + generation.exact_model_artifact_reproducibility ), + "reproducibility_statement": ( + None + if generation.exact_model_artifact_reproducibility + else _REPRODUCIBILITY_NOTE + ), + "sweep_candidates": [ + _run_summary(store=store, run=candidate) + for candidate in store.list_clustering_runs( + snapshot_id=snapshot_id, + embedding_generation_id=run.embedding_generation_id, + ) + if candidate.status == "completed" + ], } return json_text(payload, sort_keys=True, indent=True, trailing_newline=True) @@ -53,19 +88,123 @@ def export_sweep_comparison_json( snapshot_id: str, embedding_generation_id: str, ) -> str: - runs = store.list_clustering_runs( + snapshot = store.get_snapshot(snapshot_id) + if snapshot is None: + raise AnalyticsWorkflowError(f"unknown snapshot: {snapshot_id}") + generation = store.get_embedding_generation(embedding_generation_id) + if generation is None: + raise AnalyticsWorkflowError( + f"unknown embedding generation: {embedding_generation_id}" + ) + validate_generation_metadata( + store=store, snapshot_id=snapshot_id, embedding_generation_id=embedding_generation_id, + items=store.list_items(snapshot_id), ) + candidates: list[dict[str, object]] = [] + for run in store.list_clustering_runs( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + ): + if run.status != "completed": + continue + validate_persisted_run( + store=store, + snapshot_id=snapshot_id, + clustering_run_id=run.clustering_run_id, + ) + candidates.append(_run_detail(store=store, run=run)) payload = { "schema_version": CORPUS_EXPORT_SCHEMA_VERSION, - "snapshot_id": snapshot_id, - "embedding_generation_id": embedding_generation_id, - "candidates": [_run_dict(run) for run in runs], + "snapshot": _snapshot_dict(snapshot), + "embedding_generation": _generation_dict(generation), + "embedding_items": [ + _embedding_item_dict(item) + for item in store.list_embedding_items( + embedding_generation_id=embedding_generation_id + ) + ], + "candidates": candidates, + "exact_model_artifact_reproducibility": ( + generation.exact_model_artifact_reproducibility + ), + "reproducibility_statement": ( + None + if generation.exact_model_artifact_reproducibility + else _REPRODUCIBILITY_NOTE + ), } return json_text(payload, sort_keys=True, indent=True, trailing_newline=True) +def _validated_context( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + clustering_run_id: str, +) -> tuple[CorpusSnapshotRecord, EmbeddingGenerationRecord]: + run = validate_persisted_run( + store=store, + snapshot_id=snapshot_id, + clustering_run_id=clustering_run_id, + ) + snapshot = store.get_snapshot(snapshot_id) + generation = store.get_embedding_generation(run.embedding_generation_id) + assert snapshot is not None + assert generation is not None + return snapshot, generation + + +def _run_detail( + *, + store: SqliteCorpusAnalyticsStore, + run: ClusteringRunRecord, +) -> dict[str, object]: + assignments = store.list_assignments(run.clustering_run_id) + summaries = store.list_summaries(run.clustering_run_id) + noise_items = [ + item.snapshot_item_id + for item in assignments + if item.cluster_label == NOISE_LABEL + ] + cluster_count = len( + { + item.cluster_label + for item in assignments + if item.cluster_label != NOISE_LABEL + } + ) + noise_fraction = len(noise_items) / len(assignments) if assignments else 1.0 + return { + "run": { + **_run_dict(run), + "score": score_clustering_result( + cluster_count=cluster_count, + noise_fraction=noise_fraction, + n_samples=len(assignments), + ), + "cluster_count": cluster_count, + "noise_count": len(noise_items), + "noise_fraction": noise_fraction, + }, + "clusters": [_summary_dict(summary) for summary in summaries], + "assignments": [_assignment_dict(item) for item in assignments], + "noise_items": noise_items, + } + + +def _run_summary( + *, + store: SqliteCorpusAnalyticsStore, + run: ClusteringRunRecord, +) -> dict[str, object]: + detail = _run_detail(store=store, run=run) + payload = detail["run"] + assert isinstance(payload, dict) + return dict(payload) + + def _snapshot_dict(snapshot: CorpusSnapshotRecord) -> dict[str, object]: return { "snapshot_id": snapshot.snapshot_id, @@ -81,12 +220,19 @@ def _snapshot_dict(snapshot: CorpusSnapshotRecord) -> dict[str, object]: def _run_dict(run: ClusteringRunRecord) -> dict[str, object]: + effective_parameters = json.loads(run.effective_parameters_json) + algorithm_manifest = ( + effective_parameters.get("algorithm_manifest", {}) + if isinstance(effective_parameters, dict) + else {} + ) return { "clustering_run_id": run.clustering_run_id, "snapshot_id": run.snapshot_id, "embedding_generation_id": run.embedding_generation_id, "requested_parameters": json.loads(run.requested_parameters_json), - "effective_parameters": json.loads(run.effective_parameters_json), + "effective_parameters": effective_parameters, + "algorithm_manifest": algorithm_manifest, "random_seed": run.random_seed, "run_digest": run.run_digest, "recommended_by_heuristic": run.recommended_by_heuristic, @@ -98,10 +244,7 @@ def _run_dict(run: ClusteringRunRecord) -> dict[str, object]: } -def _summary_dict(summary: object) -> dict[str, object]: - from ..contracts import ClusterSummaryRecord - - assert isinstance(summary, ClusterSummaryRecord) +def _summary_dict(summary: ClusterSummaryRecord) -> dict[str, object]: return { "cluster_label": summary.cluster_label, "display_cluster_id": summary.display_cluster_id, @@ -111,10 +254,7 @@ def _summary_dict(summary: object) -> dict[str, object]: } -def _assignment_dict(assignment: object) -> dict[str, object]: - from ..contracts import ClusterAssignmentRecord - - assert isinstance(assignment, ClusterAssignmentRecord) +def _assignment_dict(assignment: ClusterAssignmentRecord) -> dict[str, object]: return { "snapshot_item_id": assignment.snapshot_item_id, "cluster_label": assignment.cluster_label, @@ -138,10 +278,9 @@ def _item_dict(item: CorpusItemRecord) -> dict[str, object]: } -def _generation_dict(generation: object) -> dict[str, object]: - from ..contracts import EmbeddingGenerationRecord - - assert isinstance(generation, EmbeddingGenerationRecord) +def _generation_dict( + generation: EmbeddingGenerationRecord, +) -> dict[str, object]: return { "embedding_generation_id": generation.embedding_generation_id, "provider_id": generation.provider_id, @@ -160,4 +299,13 @@ def _generation_dict(generation: object) -> dict[str, object]: } +def _embedding_item_dict(item: EmbeddingItemRecord) -> dict[str, object]: + return { + "snapshot_item_id": item.snapshot_item_id, + "vector_row_key": item.vector_row_key, + "vector_digest": item.vector_digest, + "dimensions": item.dimensions, + } + + __all__ = ["export_clustering_json", "export_sweep_comparison_json"] diff --git a/codeclone/analytics/integrity.py b/codeclone/analytics/integrity.py new file mode 100644 index 00000000..e47bbc9a --- /dev/null +++ b/codeclone/analytics/integrity.py @@ -0,0 +1,210 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections import defaultdict +from collections.abc import Sequence + +from ..contracts import CORPUS_EMBEDDING_CONTRACT_VERSION +from .contracts import ( + ClusteringRunRecord, + CorpusItemRecord, + EmbeddingGenerationRecord, + EmbeddingItemRecord, +) +from .corpus.keys import membership_digest +from .exceptions import AnalyticsWorkflowError +from .store.protocols import CorpusStore, VectorGenerationStore +from .store.vectors_lancedb import vector_digest, vector_row_key + + +def validate_generation_metadata( + *, + store: CorpusStore, + snapshot_id: str, + embedding_generation_id: str, + items: Sequence[CorpusItemRecord], +) -> tuple[EmbeddingGenerationRecord, tuple[EmbeddingItemRecord, ...]]: + generation = store.get_embedding_generation(embedding_generation_id) + if generation is None: + raise AnalyticsWorkflowError( + f"unknown embedding generation: {embedding_generation_id}" + ) + if generation.embedding_contract_version != CORPUS_EMBEDDING_CONTRACT_VERSION: + raise AnalyticsWorkflowError( + "unsupported analytics embedding contract: " + f"{generation.embedding_contract_version}; " + f"expected {CORPUS_EMBEDDING_CONTRACT_VERSION}. " + "Generate a new analytics embedding batch." + ) + if ( + generation.embedding_similarity_metric != "cosine" + or generation.vector_preprocessing != "l2_normalize" + ): + raise AnalyticsWorkflowError( + "embedding generation does not match the fixed analytics " + "cosine/L2 preprocessing contract" + ) + expected_ids = {item.snapshot_item_id for item in items} + embedding_items = store.list_embedding_items( + embedding_generation_id=embedding_generation_id + ) + actual_ids = {item.snapshot_item_id for item in embedding_items} + if actual_ids != expected_ids: + raise AnalyticsWorkflowError( + "embedding generation does not match snapshot " + f"{snapshot_id}: missing={len(expected_ids - actual_ids)}, " + f"foreign={len(actual_ids - expected_ids)}" + ) + for item in embedding_items: + if item.dimensions != generation.dimensions: + raise AnalyticsWorkflowError( + "embedding dimension mismatch in metadata for " + f"{item.snapshot_item_id}: item={item.dimensions}, " + f"generation={generation.dimensions}" + ) + expected_key = vector_row_key( + embedding_generation_id=embedding_generation_id, + snapshot_item_id=item.snapshot_item_id, + ) + if item.vector_row_key != expected_key: + raise AnalyticsWorkflowError( + f"invalid vector row key for {item.snapshot_item_id}" + ) + return generation, embedding_items + + +def load_validated_snapshot_vectors( + *, + store: CorpusStore, + vector_store: VectorGenerationStore, + snapshot_id: str, + embedding_generation_id: str, + items: Sequence[CorpusItemRecord], +) -> list[list[float]]: + generation, embedding_items = validate_generation_metadata( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + items=items, + ) + metadata_by_id = {item.snapshot_item_id: item for item in embedding_items} + sidecar_ids = set( + vector_store.list_generation_item_ids( + embedding_generation_id=embedding_generation_id, + limit=len(metadata_by_id) + 1, + ) + ) + if sidecar_ids != set(metadata_by_id): + raise AnalyticsWorkflowError( + "analytics vector generation does not match embedding metadata: " + f"missing={len(set(metadata_by_id) - sidecar_ids)}, " + f"foreign={len(sidecar_ids - set(metadata_by_id))}" + ) + rows = vector_store.read_vector_rows( + embedding_generation_id=embedding_generation_id, + snapshot_item_ids=[item.snapshot_item_id for item in items], + ) + if set(rows) != set(metadata_by_id): + raise AnalyticsWorkflowError( + "analytics vector sidecar does not match embedding metadata: " + f"missing={len(set(metadata_by_id) - set(rows))}, " + f"foreign={len(set(rows) - set(metadata_by_id))}" + ) + vectors: list[list[float]] = [] + for corpus_item in items: + item_id = corpus_item.snapshot_item_id + row = rows[item_id] + metadata = metadata_by_id[item_id] + vector = row["vector"] + if not isinstance(vector, list): + raise AnalyticsWorkflowError(f"invalid vector payload for {item_id}") + typed_vector = [float(value) for value in vector] + if len(typed_vector) != generation.dimensions: + raise AnalyticsWorkflowError( + f"vector dimension mismatch for {item_id}: " + f"actual={len(typed_vector)}, expected={generation.dimensions}" + ) + actual_digest = vector_digest(typed_vector) + if ( + row["vector_digest"] != actual_digest + or metadata.vector_digest != actual_digest + ): + raise AnalyticsWorkflowError(f"vector digest mismatch for {item_id}") + if row["vector_row_key"] != metadata.vector_row_key: + raise AnalyticsWorkflowError(f"vector row key mismatch for {item_id}") + vectors.append(typed_vector) + return vectors + + +def validate_persisted_run( + *, + store: CorpusStore, + snapshot_id: str, + clustering_run_id: str, +) -> ClusteringRunRecord: + snapshot = store.get_snapshot(snapshot_id) + if snapshot is None: + raise AnalyticsWorkflowError(f"unknown snapshot: {snapshot_id}") + run = store.get_clustering_run(clustering_run_id) + if run is None: + raise AnalyticsWorkflowError(f"unknown clustering run: {clustering_run_id}") + if run.snapshot_id != snapshot_id: + raise AnalyticsWorkflowError( + f"clustering run {clustering_run_id} belongs to snapshot " + f"{run.snapshot_id}, not {snapshot_id}" + ) + if run.status != "completed": + raise AnalyticsWorkflowError( + f"clustering run is not completed: {clustering_run_id} ({run.status})" + ) + items = store.list_items(snapshot_id) + validate_generation_metadata( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=run.embedding_generation_id, + items=items, + ) + expected_ids = {item.snapshot_item_id for item in items} + assignments = store.list_assignments(clustering_run_id) + actual_ids = {item.snapshot_item_id for item in assignments} + if actual_ids != expected_ids: + raise AnalyticsWorkflowError( + "clustering assignments do not match snapshot items: " + f"missing={len(expected_ids - actual_ids)}, " + f"foreign={len(actual_ids - expected_ids)}" + ) + members_by_label: defaultdict[int, list[str]] = defaultdict(list) + for assignment in assignments: + members_by_label[assignment.cluster_label].append(assignment.snapshot_item_id) + summaries = store.list_summaries(clustering_run_id) + if {item.cluster_label for item in summaries} != set(members_by_label): + raise AnalyticsWorkflowError("cluster summaries do not match assignments") + for summary in summaries: + members = members_by_label[summary.cluster_label] + digest = membership_digest(members) + if summary.size != len(members) or summary.membership_digest != digest: + raise AnalyticsWorkflowError( + f"cluster summary integrity mismatch for label {summary.cluster_label}" + ) + if any( + item.membership_digest != digest + for item in assignments + if item.cluster_label == summary.cluster_label + ): + raise AnalyticsWorkflowError( + "assignment membership digest mismatch for label " + f"{summary.cluster_label}" + ) + return run + + +__all__ = [ + "load_validated_snapshot_vectors", + "validate_generation_metadata", + "validate_persisted_run", +] diff --git a/codeclone/analytics/report/html.py b/codeclone/analytics/report/html.py index 1cfdeb98..0ae7c484 100644 --- a/codeclone/analytics/report/html.py +++ b/codeclone/analytics/report/html.py @@ -8,9 +8,13 @@ import html import json -from collections.abc import Sequence +from collections.abc import Mapping, Sequence +from ..clustering.models import NOISE_LABEL +from ..clustering.sweep import score_clustering_result from ..contracts import ClusteringRunRecord, ClusterSummaryRecord, CorpusSnapshotRecord +from ..exceptions import AnalyticsWorkflowError +from ..integrity import validate_persisted_run from ..store.sqlite import SqliteCorpusAnalyticsStore @@ -21,39 +25,61 @@ def render_analytics_html( run: ClusteringRunRecord, comparison_only: bool = False, ) -> str: - summaries = store.list_summaries(run.clustering_run_id) + if run.snapshot_id != snapshot.snapshot_id: + raise AnalyticsWorkflowError( + f"run {run.clustering_run_id} does not belong to {snapshot.snapshot_id}" + ) generation = store.get_embedding_generation(run.embedding_generation_id) - reproducibility_note = "" - if generation is not None and not generation.exact_model_artifact_reproducibility: - reproducibility_note = ( - "

    Full vector reproducibility is not guaranteed from model id " - "alone.

    " + if generation is None: + raise AnalyticsWorkflowError( + f"missing embedding generation: {run.embedding_generation_id}" ) if comparison_only: body = _render_comparison_table(store, snapshot.snapshot_id, run) title = "Corpus Analytics Sweep Comparison" + run_line = "" else: - body = _render_detail_view(summaries) + validate_persisted_run( + store=store, + snapshot_id=snapshot.snapshot_id, + clustering_run_id=run.clustering_run_id, + ) + body = _render_detail_view(store=store, snapshot=snapshot, run=run) title = "Corpus Analytics Cluster Report" + run_line = f"

    Run: {html.escape(run.clustering_run_id)}

    " + reproducibility_note = "" + if not generation.exact_model_artifact_reproducibility: + reproducibility_note = ( + '

    Reproducibility: Full vector ' + "reproducibility is not guaranteed from model id alone.

    " + ) return f""" + {html.escape(title)}

    {html.escape(title)}

    -

    Snapshot: {html.escape(snapshot.snapshot_id)}

    -

    Run: {html.escape(run.clustering_run_id)}

    -

    Recommended by heuristic: {run.recommended_by_heuristic}

    -

    Selected by maintainer: {run.selected_by_maintainer}

    +

    Snapshot: {html.escape(snapshot.snapshot_id)}

    +{run_line} {reproducibility_note} {body} @@ -66,46 +92,155 @@ def _render_comparison_table( snapshot_id: str, current_run: ClusteringRunRecord, ) -> str: - runs = store.list_clustering_runs( + rows: list[str] = [] + for run in store.list_clustering_runs( snapshot_id=snapshot_id, embedding_generation_id=current_run.embedding_generation_id, - ) - rows = [ - "" - f"{html.escape(run.clustering_run_id)}" - f"{html.escape(run.effective_parameters_json)}" - f"{run.recommended_by_heuristic}" - f"{run.selected_by_maintainer}" - "" - for run in runs - ] + ): + if run.status != "completed": + continue + validate_persisted_run( + store=store, + snapshot_id=snapshot_id, + clustering_run_id=run.clustering_run_id, + ) + assignments = store.list_assignments(run.clustering_run_id) + cluster_count = len( + { + item.cluster_label + for item in assignments + if item.cluster_label != NOISE_LABEL + } + ) + noise_count = sum( + 1 for item in assignments if item.cluster_label == NOISE_LABEL + ) + noise_fraction = noise_count / len(assignments) if assignments else 1.0 + score = score_clustering_result( + cluster_count=cluster_count, + noise_fraction=noise_fraction, + n_samples=len(assignments), + ) + rows.append( + "" + f"{html.escape(run.clustering_run_id)}" + f"{html.escape(run.requested_parameters_json)}" + f"{html.escape(run.effective_parameters_json)}" + f"{cluster_count}{noise_fraction:.3f}" + f"{score:.3f}" + f"{run.recommended_by_heuristic}" + f"{run.selected_by_maintainer}" + "" + ) return ( - "" - "" - "" + "

    Candidate runs

    " + '

    Recommendation is heuristic evidence; maintainer ' + "selection remains an explicit separate decision.

    " + "
    RunEffective ParametersRecommendedSelected
    " + "" + "" "" + "".join(rows) + "
    RunRequestedEffectiveClustersNoise fractionScoreRecommendedMaintainer selected
    " ) -def _render_detail_view(summaries: Sequence[ClusterSummaryRecord]) -> str: - sections: list[str] = ["

    Clusters

    "] +def _render_detail_view( + *, + store: SqliteCorpusAnalyticsStore, + snapshot: CorpusSnapshotRecord, + run: ClusteringRunRecord, +) -> str: + summaries = store.list_summaries(run.clustering_run_id) + assignments = store.list_assignments(run.clustering_run_id) + items = { + item.snapshot_item_id: item for item in store.list_items(snapshot.snapshot_id) + } + noise_count = sum(1 for item in assignments if item.cluster_label == NOISE_LABEL) + cluster_count = sum(1 for item in summaries if item.cluster_label != NOISE_LABEL) + sections = [ + "

    Overview

    ", + "" + f"" + f"" + f"" + "" + f"" + "" + f"" + "" + f"" + "" + f"" + "
    Corpus items{snapshot.record_count}
    Clusters{cluster_count}
    Noise items{noise_count}
    Recommended by heuristic{run.recommended_by_heuristic}
    Selected by maintainer{run.selected_by_maintainer}
    Requested parameters{html.escape(run.requested_parameters_json)}
    Effective parameters{html.escape(run.effective_parameters_json)}
    ", + _render_cluster_index(summaries), + ] for summary in summaries: - diagnostics = json.loads(summary.diagnostics_json) - if not isinstance(diagnostics, dict): - diagnostics = {} - display = summary.display_cluster_id - label = "noise" if display is None else str(display) - sections.append(f"

    Cluster {html.escape(label)}

    ") - sections.append(f"

    Size: {summary.size}

    ") - distributions = diagnostics.get("metadata_distributions") - if isinstance(distributions, dict): - sections.append(_render_distributions(distributions)) + diagnostics = _diagnostics(summary) + sections.append(_render_cluster_panel(summary, diagnostics, items)) return "\n".join(sections) +def _render_cluster_index(summaries: Sequence[ClusterSummaryRecord]) -> str: + rows = [] + for summary in summaries: + diagnostics = _diagnostics(summary) + label = _display_label(summary) + rows.append( + "" + f"{html.escape(label)}{summary.size}" + f"{_float_value(diagnostics.get('size_percent')):.2f}%" + f"{html.escape(str(diagnostics.get('average_membership_strength')))}" + "" + f"{html.escape(str(diagnostics.get('medoid_snapshot_item_id', '')))}" + "" + "" + ) + return ( + "

    Cluster index

    " + "" + "" + "" + "".join(rows) + "
    ClusterSizeCorpus %Average membershipMedoid
    " + ) + + +def _render_cluster_panel( + summary: ClusterSummaryRecord, + diagnostics: Mapping[str, object], + items: Mapping[str, object], +) -> str: + label = _display_label(summary) + css = "cluster noise" if summary.cluster_label == NOISE_LABEL else "cluster" + parts = [ + f'

    Cluster {html.escape(label)}

    ', + f"

    Size: {summary.size}; membership digest: " + f"{html.escape(summary.membership_digest)}

    ", + _render_id_group("Representatives", diagnostics.get("representatives")), + _render_id_group("Boundary items", diagnostics.get("boundary_items")), + _render_id_group("Nearest clusters", diagnostics.get("nearest_clusters")), + ] + distributions = diagnostics.get("metadata_distributions") + if isinstance(distributions, dict): + parts.append("

    Metadata correlations

    ") + parts.append(_render_distributions(distributions)) + if summary.cluster_label == NOISE_LABEL: + parts.append(_render_noise_explorer(diagnostics, items)) + parts.append("
    ") + return "\n".join(parts) + + +def _render_id_group(title: str, value: object) -> str: + if not isinstance(value, list) or not value: + return f'

    {html.escape(title)}

    None

    ' + pills = "".join( + f'{html.escape(str(item))}' for item in value + ) + return f"

    {html.escape(title)}

    {pills}

    " + + def _render_distributions(distributions: dict[str, object]) -> str: parts = [ - "" + "
    FieldValueRate
    " + "" + "" ] for field, values in sorted(distributions.items()): if not isinstance(values, dict): @@ -113,20 +248,70 @@ def _render_distributions(distributions: dict[str, object]) -> str: for key, cell in sorted(values.items()): if not isinstance(cell, dict): continue - numerator = cell.get("numerator") - denominator = cell.get("denominator") - rate = cell.get("rate") insufficient = bool(cell.get("insufficient_sample")) - rate_text = "n/a" if insufficient else str(rate) css = ' class="insufficient"' if insufficient else "" + rate = "n/a" if insufficient else str(cell.get("rate")) parts.append( f"" f"" - f"" + f"" + f"" + f"" ) parts.append("
    FieldValueNumeratorDenominatorRate
    {html.escape(str(field))}{html.escape(str(key))}{html.escape(rate_text)} " - f"({numerator}/{denominator})
    {html.escape(str(cell.get('numerator')))}{html.escape(str(cell.get('denominator')))}{html.escape(rate)}
    ") return "".join(parts) +def _render_noise_explorer( + diagnostics: Mapping[str, object], + items: Mapping[str, object], +) -> str: + rows: list[str] = [] + noise_items = diagnostics.get("noise_items") + if not isinstance(noise_items, list): + return '

    Noise explorer

    No noise items.

    ' + for entry in noise_items: + if not isinstance(entry, dict): + continue + item_id = str(entry.get("snapshot_item_id", "")) + item = items.get(item_id) + text = str(getattr(item, "normalized_text", "")) + flags = entry.get("flags") + active_flags = [] + if isinstance(flags, dict): + active_flags = sorted(key for key, enabled in flags.items() if enabled) + rows.append( + "" + f"{html.escape(item_id)}" + f"{html.escape(', '.join(active_flags) or 'none')}" + f"{html.escape(text[:240])}" + "" + ) + return ( + "

    Noise explorer

    " + "" + "" + "".join(rows) + "
    ItemObservable flagsNormalized text preview
    " + ) + + +def _diagnostics(summary: ClusterSummaryRecord) -> dict[str, object]: + try: + payload = json.loads(summary.diagnostics_json) + except json.JSONDecodeError: + return {} + return payload if isinstance(payload, dict) else {} + + +def _display_label(summary: ClusterSummaryRecord) -> str: + if summary.display_cluster_id is None: + return "noise" + return str(summary.display_cluster_id) + + +def _float_value(value: object) -> float: + if isinstance(value, int | float): + return float(value) + return 0.0 + + __all__ = ["render_analytics_html"] diff --git a/codeclone/analytics/schema.py b/codeclone/analytics/schema.py index 05e88c6b..27a5a0f2 100644 --- a/codeclone/analytics/schema.py +++ b/codeclone/analytics/schema.py @@ -131,13 +131,314 @@ "ON clustering_runs(snapshot_id, embedding_generation_id)", "CREATE INDEX IF NOT EXISTS idx_cluster_assignments_run " "ON cluster_assignments(clustering_run_id, cluster_label)", + "CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_items_vector_row_key " + "ON embedding_items(vector_row_key)", + "CREATE UNIQUE INDEX IF NOT EXISTS idx_cluster_summaries_display " + "ON cluster_summaries(clustering_run_id, display_cluster_id) " + "WHERE display_cluster_id IS NOT NULL", +) + +_INTEGRITY_TRIGGERS = ( + """ + CREATE TRIGGER IF NOT EXISTS analytics_corpus_item_snapshot_guard + BEFORE INSERT ON corpus_items + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_snapshots WHERE snapshot_id=NEW.snapshot_id + ) + BEGIN + SELECT RAISE(ABORT, 'unknown corpus snapshot'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_corpus_item_snapshot_update_guard + BEFORE UPDATE OF snapshot_id ON corpus_items + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_snapshots WHERE snapshot_id=NEW.snapshot_id + ) + BEGIN + SELECT RAISE(ABORT, 'unknown corpus snapshot'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_snapshot_delete_guard + BEFORE DELETE ON corpus_snapshots + WHEN EXISTS ( + SELECT 1 FROM corpus_items WHERE snapshot_id=OLD.snapshot_id + ) OR EXISTS ( + SELECT 1 FROM clustering_runs WHERE snapshot_id=OLD.snapshot_id + ) + BEGIN + SELECT RAISE(ABORT, 'corpus snapshot is still referenced'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_corpus_item_delete_guard + BEFORE DELETE ON corpus_items + WHEN EXISTS ( + SELECT 1 FROM embedding_items + WHERE snapshot_item_id=OLD.snapshot_item_id + ) OR EXISTS ( + SELECT 1 FROM cluster_assignments + WHERE snapshot_item_id=OLD.snapshot_item_id + ) + BEGIN + SELECT RAISE(ABORT, 'corpus item is still referenced'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_embedding_item_generation_guard + BEFORE INSERT ON embedding_items + WHEN NOT EXISTS ( + SELECT 1 FROM embedding_generations + WHERE embedding_generation_id=NEW.embedding_generation_id + ) + BEGIN + SELECT RAISE(ABORT, 'unknown embedding generation'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_embedding_item_update_guard + BEFORE UPDATE OF embedding_generation_id, snapshot_item_id + ON embedding_items + WHEN NOT EXISTS ( + SELECT 1 FROM embedding_generations + WHERE embedding_generation_id=NEW.embedding_generation_id + ) OR NOT EXISTS ( + SELECT 1 FROM corpus_items + WHERE snapshot_item_id=NEW.snapshot_item_id + ) + BEGIN + SELECT RAISE(ABORT, 'invalid embedding item reference'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_generation_delete_guard + BEFORE DELETE ON embedding_generations + WHEN EXISTS ( + SELECT 1 FROM embedding_items + WHERE embedding_generation_id=OLD.embedding_generation_id + ) OR EXISTS ( + SELECT 1 FROM clustering_runs + WHERE embedding_generation_id=OLD.embedding_generation_id + ) + BEGIN + SELECT RAISE(ABORT, 'embedding generation is still referenced'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_embedding_item_snapshot_guard + BEFORE INSERT ON embedding_items + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_items + WHERE snapshot_item_id=NEW.snapshot_item_id + ) + BEGIN + SELECT RAISE(ABORT, 'unknown snapshot item'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_clustering_run_guard + BEFORE INSERT ON clustering_runs + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_snapshots WHERE snapshot_id=NEW.snapshot_id + ) OR NOT EXISTS ( + SELECT 1 FROM embedding_generations + WHERE embedding_generation_id=NEW.embedding_generation_id + ) OR NEW.status NOT IN ('pending', 'running', 'completed', 'failed') + BEGIN + SELECT RAISE(ABORT, 'invalid clustering run reference or status'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_clustering_run_update_guard + BEFORE UPDATE OF snapshot_id, embedding_generation_id, status + ON clustering_runs + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_snapshots WHERE snapshot_id=NEW.snapshot_id + ) OR NOT EXISTS ( + SELECT 1 FROM embedding_generations + WHERE embedding_generation_id=NEW.embedding_generation_id + ) OR NEW.status NOT IN ('pending', 'running', 'completed', 'failed') + BEGIN + SELECT RAISE(ABORT, 'invalid clustering run reference or status'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_clustering_run_delete_guard + BEFORE DELETE ON clustering_runs + WHEN EXISTS ( + SELECT 1 FROM cluster_assignments + WHERE clustering_run_id=OLD.clustering_run_id + ) OR EXISTS ( + SELECT 1 FROM cluster_summaries + WHERE clustering_run_id=OLD.clustering_run_id + ) + BEGIN + SELECT RAISE(ABORT, 'clustering run is still referenced'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_assignment_guard + BEFORE INSERT ON cluster_assignments + WHEN NOT EXISTS ( + SELECT 1 + FROM clustering_runs AS run + JOIN corpus_items AS item ON item.snapshot_id=run.snapshot_id + WHERE run.clustering_run_id=NEW.clustering_run_id + AND item.snapshot_item_id=NEW.snapshot_item_id + ) + BEGIN + SELECT RAISE(ABORT, 'assignment does not belong to run snapshot'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_assignment_update_guard + BEFORE UPDATE OF clustering_run_id, snapshot_item_id + ON cluster_assignments + WHEN NOT EXISTS ( + SELECT 1 + FROM clustering_runs AS run + JOIN corpus_items AS item ON item.snapshot_id=run.snapshot_id + WHERE run.clustering_run_id=NEW.clustering_run_id + AND item.snapshot_item_id=NEW.snapshot_item_id + ) + BEGIN + SELECT RAISE(ABORT, 'assignment does not belong to run snapshot'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_summary_guard + BEFORE INSERT ON cluster_summaries + WHEN NOT EXISTS ( + SELECT 1 FROM clustering_runs + WHERE clustering_run_id=NEW.clustering_run_id + ) OR NOT EXISTS ( + SELECT 1 FROM cluster_assignments + WHERE clustering_run_id=NEW.clustering_run_id + AND cluster_label=NEW.cluster_label + ) + BEGIN + SELECT RAISE(ABORT, 'summary has no matching run assignments'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_summary_update_guard + BEFORE UPDATE OF clustering_run_id, cluster_label + ON cluster_summaries + WHEN NOT EXISTS ( + SELECT 1 FROM clustering_runs + WHERE clustering_run_id=NEW.clustering_run_id + ) OR NOT EXISTS ( + SELECT 1 FROM cluster_assignments + WHERE clustering_run_id=NEW.clustering_run_id + AND cluster_label=NEW.cluster_label + ) + BEGIN + SELECT RAISE(ABORT, 'summary has no matching run assignments'); + END + """, ) +def _install_integrity_triggers(conn: sqlite3.Connection) -> None: + for statement in _INTEGRITY_TRIGGERS: + conn.execute(statement) + + +def _migrate_1_0_to_1_1(conn: sqlite3.Connection) -> None: + orphan_checks = ( + ( + "corpus_items", + "SELECT COUNT(*) FROM corpus_items AS item " + "LEFT JOIN corpus_snapshots AS snap " + "ON snap.snapshot_id=item.snapshot_id " + "WHERE snap.snapshot_id IS NULL", + ), + ( + "embedding_items", + "SELECT COUNT(*) FROM embedding_items AS item " + "LEFT JOIN embedding_generations AS generation " + "ON generation.embedding_generation_id=item.embedding_generation_id " + "LEFT JOIN corpus_items AS corpus " + "ON corpus.snapshot_item_id=item.snapshot_item_id " + "WHERE generation.embedding_generation_id IS NULL " + "OR corpus.snapshot_item_id IS NULL", + ), + ( + "clustering_runs", + "SELECT COUNT(*) FROM clustering_runs AS run " + "LEFT JOIN corpus_snapshots AS snap " + "ON snap.snapshot_id=run.snapshot_id " + "LEFT JOIN embedding_generations AS generation " + "ON generation.embedding_generation_id=run.embedding_generation_id " + "WHERE snap.snapshot_id IS NULL " + "OR generation.embedding_generation_id IS NULL " + "OR run.status NOT IN ('pending','running','completed','failed')", + ), + ( + "cluster_assignments", + "SELECT COUNT(*) FROM cluster_assignments AS assignment " + "LEFT JOIN clustering_runs AS run " + "ON run.clustering_run_id=assignment.clustering_run_id " + "LEFT JOIN corpus_items AS item " + "ON item.snapshot_id=run.snapshot_id " + "AND item.snapshot_item_id=assignment.snapshot_item_id " + "WHERE run.clustering_run_id IS NULL " + "OR item.snapshot_item_id IS NULL", + ), + ( + "cluster_summaries", + "SELECT COUNT(*) FROM cluster_summaries AS summary " + "LEFT JOIN clustering_runs AS run " + "ON run.clustering_run_id=summary.clustering_run_id " + "LEFT JOIN cluster_assignments AS assignment " + "ON assignment.clustering_run_id=summary.clustering_run_id " + "AND assignment.cluster_label=summary.cluster_label " + "WHERE run.clustering_run_id IS NULL " + "OR assignment.snapshot_item_id IS NULL", + ), + ( + "embedding_items.vector_row_key", + "SELECT COUNT(*) FROM (" + "SELECT vector_row_key FROM embedding_items " + "GROUP BY vector_row_key HAVING COUNT(*) > 1" + ")", + ), + ( + "cluster_summaries.display_cluster_id", + "SELECT COUNT(*) FROM (" + "SELECT clustering_run_id, display_cluster_id " + "FROM cluster_summaries " + "WHERE display_cluster_id IS NOT NULL " + "GROUP BY clustering_run_id, display_cluster_id " + "HAVING COUNT(*) > 1" + ")", + ), + ) + for table, query in orphan_checks: + count = int(conn.execute(query).fetchone()[0]) + if count: + raise AnalyticsStoreError( + f"cannot migrate analytics schema: {table} has {count} " + "invalid reference(s)" + ) + for statement in _INDEXES: + conn.execute(statement) + _install_integrity_triggers(conn) + conn.execute( + f"UPDATE {_ANALYTICS_META_TABLE} SET value=? WHERE key='schema_version'", + (CORPUS_ANALYTICS_STORE_SCHEMA_VERSION,), + ) + conn.commit() + + def ensure_analytics_schema(conn: sqlite3.Connection) -> None: current = get_meta_value( conn, meta_table=_ANALYTICS_META_TABLE, key="schema_version" ) + if current == "1.0": + _migrate_1_0_to_1_1(conn) + return if current is not None and current != CORPUS_ANALYTICS_STORE_SCHEMA_VERSION: raise AnalyticsStoreError(f"unsupported analytics schema version: {current}") if current is None: @@ -151,12 +452,29 @@ def ensure_analytics_schema(conn: sqlite3.Connection) -> None: "created_at_utc": current_report_timestamp_utc(), }, ) + _install_integrity_triggers(conn) + conn.commit() + + +def validate_analytics_schema(conn: sqlite3.Connection) -> None: + current = get_meta_value( + conn, meta_table=_ANALYTICS_META_TABLE, key="schema_version" + ) + if current != CORPUS_ANALYTICS_STORE_SCHEMA_VERSION: + raise AnalyticsStoreError( + "analytics store requires writable migration to schema " + f"{CORPUS_ANALYTICS_STORE_SCHEMA_VERSION}; found {current or 'missing'}" + ) def open_analytics_db(path: Path) -> sqlite3.Connection: from ..observability.sqlite_access import open_instrumented_sqlite_db - return open_instrumented_sqlite_db(path, ensure_schema=ensure_analytics_schema) + return open_instrumented_sqlite_db( + path, + ensure_schema=ensure_analytics_schema, + foreign_keys=True, + ) def open_analytics_db_readonly(path: Path) -> sqlite3.Connection: @@ -164,7 +482,7 @@ def open_analytics_db_readonly(path: Path) -> sqlite3.Connection: return open_instrumented_sqlite_db_readonly( path, - validate_schema=ensure_analytics_schema, + validate_schema=validate_analytics_schema, ) @@ -172,4 +490,5 @@ def open_analytics_db_readonly(path: Path) -> sqlite3.Connection: "ensure_analytics_schema", "open_analytics_db", "open_analytics_db_readonly", + "validate_analytics_schema", ] diff --git a/codeclone/analytics/store/protocols.py b/codeclone/analytics/store/protocols.py index c31c0186..85f87031 100644 --- a/codeclone/analytics/store/protocols.py +++ b/codeclone/analytics/store/protocols.py @@ -109,6 +109,8 @@ def list_summaries( def commit(self) -> None: ... + def rollback(self) -> None: ... + def close(self) -> None: ... @@ -127,6 +129,22 @@ def read_vectors( snapshot_item_ids: Sequence[str], ) -> dict[str, list[float]]: ... + def read_vector_rows( + self, + *, + embedding_generation_id: str, + snapshot_item_ids: Sequence[str], + ) -> dict[str, dict[str, object]]: ... + + def list_generation_item_ids( + self, + *, + embedding_generation_id: str, + limit: int, + ) -> tuple[str, ...]: ... + + def delete_generation(self, embedding_generation_id: str) -> None: ... + def close(self) -> None: ... diff --git a/codeclone/analytics/store/sqlite.py b/codeclone/analytics/store/sqlite.py index 46e2baea..65d09198 100644 --- a/codeclone/analytics/store/sqlite.py +++ b/codeclone/analytics/store/sqlite.py @@ -388,6 +388,9 @@ def list_summaries( def commit(self) -> None: self._conn.commit() + def rollback(self) -> None: + self._conn.rollback() + def close(self) -> None: self._conn.close() diff --git a/codeclone/analytics/store/vectors_lancedb.py b/codeclone/analytics/store/vectors_lancedb.py index d8c268a1..509d3ceb 100644 --- a/codeclone/analytics/store/vectors_lancedb.py +++ b/codeclone/analytics/store/vectors_lancedb.py @@ -8,13 +8,15 @@ import hashlib import importlib +import math +import struct from collections.abc import Mapping, Sequence from pathlib import Path from types import ModuleType from typing import Protocol, cast from ..corpus.keys import sha256_hex -from ..exceptions import AnalyticsCapabilityError +from ..exceptions import AnalyticsCapabilityError, AnalyticsStoreError _TABLE_NAME = "corpus_vectors" _ID_QUERY_BATCH = 500 @@ -38,11 +40,30 @@ def when_not_matched_insert_all(self) -> _LanceMergeInsert: ... def execute(self, records: list[dict[str, object]]) -> None: ... +class _ArrowType(Protocol): + @property + def list_size(self) -> int: ... + + +class _ArrowField(Protocol): + @property + def type(self) -> _ArrowType: ... + + +class _ArrowSchema(Protocol): + def field(self, name: str) -> _ArrowField: ... + + class _LanceTable(Protocol): + @property + def schema(self) -> _ArrowSchema: ... + def search(self, vector: list[float] | None = None) -> _LanceSearchQuery: ... def merge_insert(self, key: str) -> _LanceMergeInsert: ... + def delete(self, predicate: str) -> None: ... + class _LanceConnection(Protocol): def open_table(self, name: str) -> _LanceTable: ... @@ -79,8 +100,8 @@ def vector_row_key(*, embedding_generation_id: str, snapshot_item_id: str) -> st def vector_digest(vector: Sequence[float]) -> str: - payload = ",".join(f"{value:.8f}" for value in vector) - return hashlib.sha256(payload.encode("utf-8")).hexdigest() + payload = b"".join(struct.pack(" None: def _open_or_create_table(self, pyarrow: ModuleType) -> _LanceTable: try: - return self._conn.open_table(_TABLE_NAME) + table = self._conn.open_table(_TABLE_NAME) except ValueError as exc: if f"Table '{_TABLE_NAME}' was not found" not in str(exc): raise - return self._conn.create_table( - _TABLE_NAME, - schema=_schema(pyarrow, self._dimension), - exist_ok=True, - ) + return self._conn.create_table( + _TABLE_NAME, + schema=_schema(pyarrow, self._dimension), + exist_ok=True, + ) + field = table.schema.field("vector") + actual_dimension = getattr(field.type, "list_size", None) + if actual_dimension != self._dimension: + raise AnalyticsStoreError( + "analytics vector store dimension mismatch: " + f"existing={actual_dimension}, configured={self._dimension}" + ) + return table def write_vectors( self, @@ -120,6 +149,13 @@ def write_vectors( msg = "vector must be a list of floats" raise TypeError(msg) float_vector = [float(value) for value in vector] + if len(float_vector) != self._dimension: + raise AnalyticsStoreError( + f"vector dimension mismatch: actual={len(float_vector)}, " + f"expected={self._dimension}" + ) + if not all(math.isfinite(value) for value in float_vector): + raise AnalyticsStoreError("vectors must contain only finite values") row_key = vector_row_key( embedding_generation_id=embedding_generation_id, snapshot_item_id=snapshot_item_id, @@ -148,18 +184,42 @@ def read_vectors( embedding_generation_id: str, snapshot_item_ids: Sequence[str], ) -> dict[str, list[float]]: + loaded: dict[str, list[float]] = {} + for item_id, row in self.read_vector_rows( + embedding_generation_id=embedding_generation_id, + snapshot_item_ids=snapshot_item_ids, + ).items(): + vector = row.get("vector") + if isinstance(vector, list): + loaded[item_id] = [float(value) for value in vector] + return loaded + + def read_vector_rows( + self, + *, + embedding_generation_id: str, + snapshot_item_ids: Sequence[str], + ) -> dict[str, dict[str, object]]: if not snapshot_item_ids: return {} - loaded: dict[str, list[float]] = {} + loaded: dict[str, dict[str, object]] = {} ordered = sorted(set(snapshot_item_ids)) for start in range(0, len(ordered), _ID_QUERY_BATCH): batch = ordered[start : start + _ID_QUERY_BATCH] - quoted = ", ".join(f"'{item}'" for item in batch) + quoted = ", ".join(_sql_literal(item) for item in batch) rows = ( self._table.search(None) - .select(["snapshot_item_id", "vector"]) + .select( + [ + "vector_row_key", + "snapshot_item_id", + "vector_digest", + "vector", + ] + ) .where( - f"embedding_generation_id = '{embedding_generation_id}' " + "embedding_generation_id = " + f"{_sql_literal(embedding_generation_id)} " f"AND snapshot_item_id IN ({quoted})" ) .limit(len(batch)) @@ -169,11 +229,47 @@ def read_vectors( item_id = row.get("snapshot_item_id") vector = row.get("vector") if isinstance(item_id, str) and isinstance(vector, list): - loaded[item_id] = [float(value) for value in vector] + loaded[item_id] = { + "vector_row_key": str(row.get("vector_row_key", "")), + "vector_digest": str(row.get("vector_digest", "")), + "vector": [float(value) for value in vector], + } return loaded + def delete_generation(self, embedding_generation_id: str) -> None: + self._table.delete( + f"embedding_generation_id = {_sql_literal(embedding_generation_id)}" + ) + + def list_generation_item_ids( + self, + *, + embedding_generation_id: str, + limit: int, + ) -> tuple[str, ...]: + if limit <= 0: + return () + rows = ( + self._table.search(None) + .select(["snapshot_item_id"]) + .where(f"embedding_generation_id = {_sql_literal(embedding_generation_id)}") + .limit(limit) + .to_list() + ) + return tuple( + sorted( + str(item_id) + for row in rows + if isinstance((item_id := row.get("snapshot_item_id")), str) + ) + ) + def close(self) -> None: return None +def _sql_literal(value: str) -> str: + return "'" + value.replace("'", "''") + "'" + + __all__ = ["AnalyticsVectorStore", "vector_digest", "vector_row_key"] diff --git a/codeclone/analytics/workflow.py b/codeclone/analytics/workflow.py index 574dd42a..71bcf5d1 100644 --- a/codeclone/analytics/workflow.py +++ b/codeclone/analytics/workflow.py @@ -8,11 +8,11 @@ import uuid from collections.abc import Sequence -from dataclasses import dataclass +from dataclasses import dataclass, replace from pathlib import Path from ..config.analytics import AnalyticsConfig, resolve_analytics_config -from ..observability import operation +from ..observability import span from ..report.meta import current_report_timestamp_utc from ..utils.json_io import json_text from .clustering.canonicalize import ( @@ -25,10 +25,16 @@ compute_centroids, nearest_cluster_ids, ) -from .clustering.models import NOISE_LABEL, ClusteringParameters -from .clustering.pipeline import run_clustering_pipeline +from .clustering.models import ( + NOISE_LABEL, + ClusteringParameters, + ClusteringPipelineResult, + ClusterPartition, +) +from .clustering.pipeline import resolve_effective_parameters, run_clustering_pipeline from .clustering.sweep import ( SweepCandidateResult, + clustering_algorithm_manifest, iter_sweep_candidates, rank_sweep_results, run_digest, @@ -44,9 +50,9 @@ from .embedding.generation import ( EmbeddingBatchResult, generate_embeddings_for_snapshot, - load_snapshot_vectors, ) from .exceptions import AnalyticsWorkflowError +from .integrity import load_validated_snapshot_vectors, validate_persisted_run from .store.protocols import SnapshotBuildResult from .store.sqlite import SqliteCorpusAnalyticsStore from .store.vectors_lancedb import AnalyticsVectorStore @@ -73,7 +79,7 @@ def run_snapshot( representation_kind: str, config: AnalyticsConfig | None = None, ) -> SnapshotBuildResult: - with operation(name="analytics.snapshot", surface="cli"): + with span(name="analytics.snapshot"): return build_intent_snapshot( root_path=root_path, representation_kind=representation_kind, @@ -100,7 +106,7 @@ def run_embed( if known: msg = f"{msg}; known snapshots: {known}" raise AnalyticsWorkflowError(msg) - with operation(name="analytics.embed", surface="cli"): + with span(name="analytics.embed"): return generate_embeddings_for_snapshot( store=store, vector_store=vector_store, @@ -128,12 +134,14 @@ def run_clustering( dimension=resolved_config.embedding_dimension, ) try: - with operation(name="analytics.cluster", surface="cli"): + with span(name="analytics.cluster"): items = store.list_items(snapshot_id) if not items: raise AnalyticsWorkflowError("snapshot has no corpus items") - vectors = load_snapshot_vectors( + vectors = load_validated_snapshot_vectors( + store=store, vector_store=vector_store, + snapshot_id=snapshot_id, embedding_generation_id=embedding_generation_id, items=items, ) @@ -184,6 +192,11 @@ def select_cluster_run( run = store.get_clustering_run(clustering_run_id) if run is None: raise AnalyticsWorkflowError(f"unknown clustering run: {clustering_run_id}") + validate_persisted_run( + store=store, + snapshot_id=run.snapshot_id, + clustering_run_id=clustering_run_id, + ) store.set_selected_run( snapshot_id=run.snapshot_id, embedding_generation_id=run.embedding_generation_id, @@ -203,7 +216,9 @@ def run_build( config: AnalyticsConfig | None = None, ) -> BuildResult: resolved_config = config or resolve_analytics_config(root_path) - with operation(name="analytics.build", surface="cli"): + if use_recommended and not sweep: + raise AnalyticsWorkflowError("--use-recommended requires --sweep") + with span(name="analytics.build"): snapshot = run_snapshot( root_path=root_path, representation_kind=representation_kind, @@ -235,8 +250,6 @@ def run_build( break finally: store.close() - if use_recommended and recommended is None and run_ids: - recommended = run_ids[0] return BuildResult( snapshot_id=snapshot.snapshot_id, embedding_generation_id=embed.embedding_generation_id, @@ -259,6 +272,10 @@ def _run_sweep( n_samples=len(item_ids), n_features=len(vectors[0]) if vectors else 0, ) + if not candidates: + raise AnalyticsWorkflowError( + "corpus is too small for the configured clustering sweep" + ) run_ids: list[str] = [] scored: list[SweepCandidateResult] = [] for candidate in candidates: @@ -320,17 +337,16 @@ def _execute_single_run( config: AnalyticsConfig, recommended_by_heuristic: bool, ) -> str: - pipeline = run_clustering_pipeline( - snapshot_item_ids=item_ids, - embeddings=vectors, - requested=requested, - random_seed=config.cluster_random_seed, + effective = resolve_effective_parameters( + requested, + n_samples=len(item_ids), + n_features=len(vectors[0]) if vectors else 0, ) - if pipeline is None: + if effective is None: raise AnalyticsWorkflowError("clustering parameters produced no valid run") run_id = f"run-{uuid.uuid4().hex[:16]}" created_at = current_report_timestamp_utc() - effective = pipeline.effective_parameters + algorithm_manifest = clustering_algorithm_manifest() run = ClusteringRunRecord( clustering_run_id=run_id, snapshot_id=snapshot_id, @@ -352,6 +368,7 @@ def _execute_single_run( "cluster_selection_method": effective.cluster_selection_method, "n_samples": effective.n_samples, "n_features": effective.n_features, + "algorithm_manifest": algorithm_manifest, }, sort_keys=True, ), @@ -361,18 +378,76 @@ def _execute_single_run( embedding_generation_id=embedding_generation_id, effective=effective, random_seed=config.cluster_random_seed, + algorithm_manifest=algorithm_manifest, ), recommended_by_heuristic=recommended_by_heuristic, selected_by_maintainer=False, - status="completed", + status="running", created_at_utc=created_at, - finished_at_utc=current_report_timestamp_utc(), + finished_at_utc=None, error_message=None, ) store.insert_clustering_run(run) - partitions = canonicalize_partitions(pipeline.partitions) + store.commit() + try: + pipeline = run_clustering_pipeline( + snapshot_item_ids=item_ids, + embeddings=vectors, + requested=requested, + random_seed=config.cluster_random_seed, + ) + if pipeline is None: + raise AnalyticsWorkflowError("clustering parameters produced no valid run") + coordinates = dict(zip(item_ids, pipeline.reduced_coordinates, strict=True)) + partitions = canonicalize_partitions( + pipeline.partitions, + coordinates=coordinates, + ) + _persist_run_artifacts( + store=store, + run_id=run_id, + item_ids=item_ids, + items=items, + pipeline=pipeline, + partitions=partitions, + coordinates=coordinates, + config=config, + ) + store.update_clustering_run( + replace( + run, + status="completed", + finished_at_utc=current_report_timestamp_utc(), + ) + ) + store.commit() + except Exception as exc: + store.rollback() + store.update_clustering_run( + replace( + run, + status="failed", + finished_at_utc=current_report_timestamp_utc(), + error_message=str(exc), + ) + ) + store.commit() + raise + return run_id + + +def _persist_run_artifacts( + *, + store: SqliteCorpusAnalyticsStore, + run_id: str, + item_ids: list[str], + items: Sequence[CorpusItemRecord], + pipeline: ClusteringPipelineResult, + partitions: Sequence[ClusterPartition], + coordinates: dict[str, tuple[float, ...]], + config: AnalyticsConfig, +) -> None: membership_map = partition_membership_map(partitions) - coordinates = dict(zip(item_ids, pipeline.reduced_coordinates, strict=True)) items_by_id = {item.snapshot_item_id: item for item in items} strength_by_id = dict(zip(item_ids, pipeline.membership_strengths, strict=True)) assignments: list[ClusterAssignmentRecord] = [] @@ -405,12 +480,15 @@ def _execute_single_run( min_correlation_sample_size=config.min_correlation_sample_size, ) if partition.cluster_label != NOISE_LABEL: - diagnostics["nearest_clusters"] = list( - nearest_cluster_ids( - cluster_label=partition.cluster_label, - centroids=centroids, - ) + nearest_labels = nearest_cluster_ids( + cluster_label=partition.cluster_label, + centroids=centroids, ) + diagnostics["nearest_clusters"] = [ + display_id + for label in nearest_labels + if (display_id := display_map.get(label)) is not None + ] summaries.append( ClusterSummaryRecord( clustering_run_id=run_id, @@ -422,7 +500,6 @@ def _execute_single_run( ) ) store.insert_cluster_summaries(summaries) - return run_id __all__ = [ diff --git a/codeclone/config/analytics.py b/codeclone/config/analytics.py index 2f988464..726ea56f 100644 --- a/codeclone/config/analytics.py +++ b/codeclone/config/analytics.py @@ -8,9 +8,11 @@ from dataclasses import dataclass from pathlib import Path +from typing import Literal -from pydantic import BaseModel, ConfigDict, Field, ValidationError +from pydantic import BaseModel, ConfigDict, Field +from ..audit.validation import DEFAULT_AUDIT_PATH from ..utils.repo_paths import RepoPathPolicy, resolve_under_repo_root from .analytics_specs import ANALYTICS_NESTED_TABLE_KEY from .memory import resolve_memory_config @@ -38,14 +40,14 @@ class AnalyticsPyprojectTable(BaseModel): vectors_path: str | None = None embedding_model: str | None = None embedding_dimension: int | None = Field(default=None, gt=0) - embedding_provider: str | None = None + embedding_provider: Literal["fastembed"] | None = None embedding_cache_dir: str | None = None min_correlation_sample_size: int | None = Field(default=None, gt=0) cluster_random_seed: int | None = None default_pca_dimensions: int | None = Field(default=None, gt=0) default_min_cluster_size: int | None = Field(default=None, gt=0) default_min_samples: int | None = Field(default=None, gt=0) - default_cluster_selection_method: str | None = None + default_cluster_selection_method: Literal["eom", "leaf"] | None = None allow_model_download: bool | None = None @@ -53,6 +55,7 @@ class AnalyticsPyprojectTable(BaseModel): class AnalyticsConfig: db_path: Path vectors_path: Path + audit_db_path: Path embedding_model: str embedding_dimension: int embedding_provider: str @@ -72,23 +75,15 @@ def _resolve_path(root_path: Path, raw: str | None, default_relative: str) -> Pa return resolve_under_repo_root(root_path, selected, policy=policy) -def _load_pyproject_table(root_path: Path) -> AnalyticsPyprojectTable | None: - payload = load_pyproject_config(root_path) - raw = payload.get(ANALYTICS_NESTED_TABLE_KEY) - if raw is None: - return None - if not isinstance(raw, dict): - msg = "tool.codeclone.analytics must be a table" - raise TypeError(msg) - try: - return AnalyticsPyprojectTable.model_validate(raw) - except ValidationError as exc: - raise ValueError(str(exc)) from exc - - def resolve_analytics_config(root_path: Path) -> AnalyticsConfig: resolved_root = root_path.resolve() - table = _load_pyproject_table(resolved_root) + payload = load_pyproject_config(resolved_root) + raw_table = payload.get(ANALYTICS_NESTED_TABLE_KEY) + table = ( + AnalyticsPyprojectTable.model_validate(raw_table) + if isinstance(raw_table, dict) + else None + ) # The FastEmbed model artifact is a multi-hundred-MB download; analytics # vectors are kept separate (own LanceDB sidecar + embedding_generation_id), # but the model weights are shared with Engineering Memory rather than @@ -106,6 +101,15 @@ def resolve_analytics_config(root_path: Path) -> AnalyticsConfig: table.vectors_path if table is not None else None, DEFAULT_ANALYTICS_VECTORS_RELATIVE, ), + audit_db_path=_resolve_path( + resolved_root, + ( + str(payload["audit_path"]) + if payload.get("audit_path") is not None + else None + ), + DEFAULT_AUDIT_PATH, + ), embedding_model=( table.embedding_model if table is not None and table.embedding_model is not None diff --git a/codeclone/contracts/__init__.py b/codeclone/contracts/__init__.py index 10d1d202..90fdf4e6 100644 --- a/codeclone/contracts/__init__.py +++ b/codeclone/contracts/__init__.py @@ -41,11 +41,11 @@ # Corpus analytics store (.codeclone/analytics/corpus_clustering.sqlite3) and # derived export/representation contracts. Bump independently from memory schema. -CORPUS_ANALYTICS_STORE_SCHEMA_VERSION: Final = "1.0" -CORPUS_EXPORT_SCHEMA_VERSION: Final = "1.0" -CORPUS_REPRESENTATION_CONTRACT_VERSION: Final = "1" +CORPUS_ANALYTICS_STORE_SCHEMA_VERSION: Final = "1.1" +CORPUS_EXPORT_SCHEMA_VERSION: Final = "1.1" +CORPUS_REPRESENTATION_CONTRACT_VERSION: Final = "2" CORPUS_NORMALIZER_VERSION: Final = "1" -CORPUS_EMBEDDING_CONTRACT_VERSION: Final = "1" +CORPUS_EMBEDDING_CONTRACT_VERSION: Final = "2" CORPUS_AGENT_LABEL_CONTRACT_VERSION: Final = "1" CORPUS_PARTITION_MAP_VERSION: Final = "1" diff --git a/codeclone/surfaces/cli/analytics.py b/codeclone/surfaces/cli/analytics.py index 1ff97977..af45df8c 100644 --- a/codeclone/surfaces/cli/analytics.py +++ b/codeclone/surfaces/cli/analytics.py @@ -9,6 +9,7 @@ from __future__ import annotations import argparse +import sqlite3 import sys from collections.abc import Callable from pathlib import Path @@ -23,11 +24,16 @@ INTENT_REPRESENTATION_DESCRIPTION, INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME, ) -from ...analytics.exceptions import AnalyticsCapabilityError, AnalyticsWorkflowError +from ...analytics.exceptions import ( + AnalyticsCapabilityError, + AnalyticsError, + AnalyticsWorkflowError, +) from ...analytics.export.json_export import ( export_clustering_json, export_sweep_comparison_json, ) +from ...analytics.integrity import validate_persisted_run from ...analytics.report.html import render_analytics_html from ...analytics.store.sqlite import SqliteCorpusAnalyticsStore from ...analytics.workflow import ( @@ -39,8 +45,14 @@ select_cluster_run, ) from ...config.analytics import resolve_analytics_config +from ...config.observability import resolve_observability_config from ...contracts import ExitCode -from ...utils.json_io import write_json_document_atomically +from ...observability import bootstrap, operation, shutdown, span +from ...utils.json_io import ( + json_text, + write_json_document_atomically, + write_json_text_atomically, +) def _representation_kind(raw: str) -> str: @@ -89,8 +101,8 @@ def _build_parser() -> argparse.ArgumentParser: cluster = sub.add_parser("cluster", help="Cluster embedded snapshot") _add_root(cluster) - cluster.add_argument("--snapshot-id", required=True) - cluster.add_argument("--embedding-generation-id", required=True) + cluster.add_argument("--snapshot-id") + cluster.add_argument("--embedding-generation-id") cluster.add_argument("--sweep", action="store_true") cluster.add_argument("--select-run", dest="select_run", default=None) @@ -143,14 +155,14 @@ def _run_snapshot_command(args: argparse.Namespace, root: Path) -> int: if args.output_json is not None: write_json_document_atomically(args.output_json, payload) else: - print(payload) + _print_json(payload) return ExitCode.SUCCESS def _run_embed_command(args: argparse.Namespace, root: Path) -> int: _require_capability("embed") embed_result = run_embed(root_path=root, snapshot_id=args.snapshot_id) - print( + _print_json( { "embedding_generation_id": embed_result.embedding_generation_id, "item_count": embed_result.item_count, @@ -163,8 +175,13 @@ def _run_cluster_command(args: argparse.Namespace, root: Path) -> int: if args.select_run: _require_capability("base") select_cluster_run(root_path=root, clustering_run_id=args.select_run) - print({"selected_run_id": args.select_run}) + _print_json({"selected_run_id": args.select_run}) return ExitCode.SUCCESS + if not args.snapshot_id or not args.embedding_generation_id: + raise AnalyticsWorkflowError( + "--snapshot-id and --embedding-generation-id are required " + "unless --select-run is used" + ) _require_capability("cluster") run_ids = run_clustering( root_path=root, @@ -172,7 +189,7 @@ def _run_cluster_command(args: argparse.Namespace, root: Path) -> int: embedding_generation_id=args.embedding_generation_id, sweep=args.sweep, ) - print({"clustering_run_ids": list(run_ids)}) + _print_json({"clustering_run_ids": list(run_ids)}) return ExitCode.SUCCESS @@ -183,7 +200,7 @@ def _write_build_exports( build_result: BuildResult, ) -> None: config = resolve_analytics_config(root) - store = SqliteCorpusAnalyticsStore.open(config.db_path) + store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) try: snapshot = store.get_snapshot(build_result.snapshot_id) if snapshot is None: @@ -193,36 +210,41 @@ def _write_build_exports( if build_result.clustering_run_ids else None ) - if args.json_out is not None and primary_run_id is not None: - if args.sweep and not args.use_recommended: - text = export_sweep_comparison_json( + with span(name="analytics.report"): + if args.json_out is not None and primary_run_id is not None: + if args.sweep and not args.use_recommended: + text = export_sweep_comparison_json( + store=store, + snapshot_id=build_result.snapshot_id, + embedding_generation_id=build_result.embedding_generation_id, + ) + else: + text = export_clustering_json( + store=store, + snapshot_id=build_result.snapshot_id, + clustering_run_id=primary_run_id, + ) + args.json_out.parent.mkdir(parents=True, exist_ok=True) + write_json_text_atomically(args.json_out, text) + if args.html_out is not None and primary_run_id is not None: + run = store.get_clustering_run(primary_run_id) + if run is None: + raise AnalyticsWorkflowError("clustering run missing after build") + rendered = render_analytics_html( store=store, - snapshot_id=build_result.snapshot_id, - embedding_generation_id=build_result.embedding_generation_id, + snapshot=snapshot, + run=run, + comparison_only=args.sweep and not args.use_recommended, ) - else: - text = export_clustering_json( - store=store, - snapshot_id=build_result.snapshot_id, - clustering_run_id=primary_run_id, - ) - args.json_out.write_text(text, encoding="utf-8") - if args.html_out is not None and primary_run_id is not None: - run = store.get_clustering_run(primary_run_id) - if run is None: - raise AnalyticsWorkflowError("clustering run missing after build") - html = render_analytics_html( - store=store, - snapshot=snapshot, - run=run, - comparison_only=args.sweep and not args.use_recommended, - ) - args.html_out.write_text(html, encoding="utf-8") + args.html_out.parent.mkdir(parents=True, exist_ok=True) + write_json_text_atomically(args.html_out, rendered) finally: store.close() def _run_build_command(args: argparse.Namespace, root: Path) -> int: + if args.use_recommended and not args.sweep: + raise AnalyticsWorkflowError("--use-recommended requires --sweep") _require_capability("full") build_result = run_build( root_path=root, @@ -232,7 +254,7 @@ def _run_build_command(args: argparse.Namespace, root: Path) -> int: ) if args.json_out is not None or args.html_out is not None: _write_build_exports(args=args, root=root, build_result=build_result) - print( + _print_json( { "snapshot_id": build_result.snapshot_id, "embedding_generation_id": build_result.embedding_generation_id, @@ -248,8 +270,10 @@ def _run_clusters_command(args: argparse.Namespace, root: Path) -> int: config = resolve_analytics_config(root) store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) try: + if store.get_snapshot(args.snapshot_id) is None: + raise AnalyticsWorkflowError(f"unknown snapshot: {args.snapshot_id}") runs = store.list_clustering_runs(snapshot_id=args.snapshot_id) - print( + _print_json( [ { "clustering_run_id": run.clustering_run_id, @@ -276,9 +300,10 @@ def _run_cluster_show_command(args: argparse.Namespace, root: Path) -> int: clustering_run_id=args.run_id, ) if args.output is not None: - args.output.write_text(text, encoding="utf-8") + args.output.parent.mkdir(parents=True, exist_ok=True) + write_json_text_atomically(args.output, text) else: - print(text) + print(text, end="") finally: store.close() return ExitCode.SUCCESS @@ -289,13 +314,18 @@ def _run_outliers_command(args: argparse.Namespace, root: Path) -> int: config = resolve_analytics_config(root) store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) try: + validate_persisted_run( + store=store, + snapshot_id=args.snapshot_id, + clustering_run_id=args.run_id, + ) assignments = store.list_assignments(args.run_id) noise = [ item.snapshot_item_id for item in assignments if item.cluster_label == NOISE_LABEL ] - print({"noise_items": noise}) + _print_json({"noise_items": noise}) finally: store.close() return ExitCode.SUCCESS @@ -318,18 +348,23 @@ def analytics_main(argv: list[str] | None = None) -> int: parser = _build_parser() args = parser.parse_args(argv) root = Path(args.root).resolve() - handler = _COMMAND_HANDLERS.get(args.command) - if handler is None: - parser.error(f"unknown command: {args.command}") - return ExitCode.INTERNAL_ERROR - try: - return handler(args, root) - except AnalyticsCapabilityError as exc: - print(str(exc), file=sys.stderr) + if not root.is_dir(): + print(f"repository root is not a directory: {root}", file=sys.stderr) return ExitCode.CONTRACT_ERROR - except AnalyticsWorkflowError as exc: + handler = _COMMAND_HANDLERS[args.command] + try: + bootstrap(resolve_observability_config(), root=root) + with operation(name=f"cli.analytics.{args.command}", surface="cli"): + return handler(args, root) + except (AnalyticsError, OSError, ValueError, sqlite3.Error) as exc: print(str(exc), file=sys.stderr) return ExitCode.CONTRACT_ERROR + finally: + shutdown() + + +def _print_json(payload: object) -> None: + print(json_text(payload, sort_keys=True)) __all__ = ["analytics_main"] diff --git a/docs/README-pypi.md b/docs/README-pypi.md index 911959ac..62fcd4bd 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -65,7 +65,7 @@ Engineering Memory, Corpus Analytics, and runtime diagnostics: ```bash uv tool install "codeclone[analytics]" -codeclone analytics build --root . --use-recommended +codeclone analytics build --root . --sweep --use-recommended codeclone memory trajectory dashboard --root . CODECLONE_OBSERVABILITY_ENABLED=1 codeclone . codeclone observability trace --root . --html /tmp/codeclone-observer.html diff --git a/docs/book/10-config-and-defaults.md b/docs/book/10-config-and-defaults.md index 9adc384d..1cd2189e 100644 --- a/docs/book/10-config-and-defaults.md +++ b/docs/book/10-config-and-defaults.md @@ -349,7 +349,10 @@ contracts. Optional intent corpus clustering uses `[tool.codeclone.analytics]`. Install `codeclone[analytics]` before running `codeclone analytics …`. Paths resolve -under the repository root. Full key list: +under the repository root. The historical audit source is inherited from the +top-level `[tool.codeclone].audit_path`; it is not duplicated in the analytics +table. `embedding_provider` currently accepts only `fastembed`, and +`default_cluster_selection_method` accepts `eom` or `leaf`. Full key list: [Corpus Analytics](27-corpus-analytics.md#configuration). Refs: diff --git a/docs/book/11-cli.md b/docs/book/11-cli.md index 4e968995..88701e79 100644 --- a/docs/book/11-cli.md +++ b/docs/book/11-cli.md @@ -140,10 +140,16 @@ Refs: - Corpus Analytics commands are terminal-only, offline clustering of historical intents (requires `codeclone[analytics]`): - `codeclone analytics snapshot|embed|cluster|build|clusters|cluster-show|outliers` - - `build` runs snapshot → embed → cluster; `--use-recommended` selects the - heuristic sweep winner. + - `build` runs snapshot → embed → cluster. `--use-recommended` requires + `--sweep` and renders the heuristic winner; it does not mark a maintainer + selection. + - `cluster --select-run RUN_ID` is the only CLI action that sets + `selected_by_maintainer=true`. - Representations: `description` (default) or `description_with_frame`. - Artifacts live under `.codeclone/analytics/` (SQLite metadata + LanceDB vectors). + - Expected capability, schema, ownership, and integrity errors exit `2` + without a traceback. Inspection/export commands require only the base + install and open analytics metadata read-only. - Full contract: [Corpus Analytics](27-corpus-analytics.md). - Controller and workspace query flags are mutually exclusive where enforced: - `--blast-radius` and `--patch-verify` cannot be combined. diff --git a/docs/book/24-compatibility-and-versioning.md b/docs/book/24-compatibility-and-versioning.md index f31f9128..285f768b 100644 --- a/docs/book/24-compatibility-and-versioning.md +++ b/docs/book/24-compatibility-and-versioning.md @@ -41,10 +41,10 @@ Current contract versions: - `EXPERIENCE_DISTILLATION_VERSION = "experience-v1"` (derived Experience rows) - `SEMANTIC_INDEX_FORMAT_VERSION = "1"` (LanceDB sidecar; separate from SQLite memory schema) - `PLATFORM_OBSERVABILITY_SCHEMA_VERSION = "1.0"` (dev-only telemetry SQLite) -- `CORPUS_ANALYTICS_STORE_SCHEMA_VERSION = "1.0"` (corpus analytics SQLite) -- `CORPUS_EXPORT_SCHEMA_VERSION = "1.0"` (clustering JSON export) -- `CORPUS_REPRESENTATION_CONTRACT_VERSION = "1"` (intent representation payloads) -- `CORPUS_EMBEDDING_CONTRACT_VERSION = "1"` (analytics embedding sidecar) +- `CORPUS_ANALYTICS_STORE_SCHEMA_VERSION = "1.1"` (corpus analytics SQLite) +- `CORPUS_EXPORT_SCHEMA_VERSION = "1.1"` (clustering JSON export) +- `CORPUS_REPRESENTATION_CONTRACT_VERSION = "2"` (intent representation payloads) +- `CORPUS_EMBEDDING_CONTRACT_VERSION = "2"` (analytics embedding sidecar) Refs: @@ -79,6 +79,15 @@ Version bump rules: SQLite layout or export semantics change incompatibly; rebuild analytics artifacts rather than treating them as analysis truth ( see [27-corpus-analytics.md](27-corpus-analytics.md)) + - store `1.1` adds migration-time orphan checks, relationship guards, and + unique vector/display keys; + - export `1.1` includes complete snapshot/generation/run manifests, + assignments, diagnostics, noise, and sweep candidates; + - representation `2` hashes raw representation-owned inputs before + normalization and excludes live registry state from source identity; + - embedding `2` defines vector digests over canonical little-endian + float32 bytes. Older embedding generations are rejected and must be + regenerated. Operational compatibility rules: @@ -135,6 +144,8 @@ Refs: | Engineering Memory schema bump | Older DBs migrate or re-init per `schema_migrate.py` | | Semantic index format bump | LanceDB sidecar invalidated; run `memory semantic rebuild` | | Platform Observability bump | Local diagnostic store reader/writer must migrate together | +| Corpus analytics store bump | Writable open migrates supported stores; read-only open rejects stale schema | +| Corpus embedding contract bump | Existing generations must be regenerated before clustering | ## Determinism / canonicalization diff --git a/docs/book/27-corpus-analytics.md b/docs/book/27-corpus-analytics.md index e9dc504b..e7583b63 100644 --- a/docs/book/27-corpus-analytics.md +++ b/docs/book/27-corpus-analytics.md @@ -1,43 +1,80 @@ # Corpus Analytics -Corpus Analytics is an optional, offline analytics lane for clustering historical -change-control intents. It reads audit, Engineering Memory trajectory, and optional -workspace intent registry overlays, builds immutable corpus snapshots, generates -**separate** analytics embeddings, and runs deterministic PCA + HDBSCAN clustering. +Corpus Analytics is an optional, offline analytics lane for clustering +historical change-control intents. It reconstructs an intent corpus from +retained controller evidence, creates immutable-by-contract snapshots, writes +separate analytics embeddings, and runs deterministic PCA + HDBSCAN clustering. -It is **not** analysis truth: outputs never affect reports, gates, baselines, -cache compatibility, Engineering Memory records, or edit authorization. +It is **derived evidence, not authority**. Corpus Analytics never changes the +canonical structural report, reports/gates/baselines, cache compatibility, +Engineering Memory governance, or edit authorization. -For a practical walkthrough, see the -[Corpus Analytics guide](../guide/analytics/overview.md). +For a command-oriented walkthrough, see the +[Corpus Analytics guide](../guide/analytics/overview.md). Configuration is +indexed in [Config and Defaults](10-config-and-defaults.md), CLI behavior in +[CLI](11-cli.md), and storage layout in +[Schema Layouts](appendix/b-schema-layouts.md). -## Trust boundary +## Trust Boundary ```mermaid flowchart LR - A["Audit DB
    intent.declared"] --> B["Corpus snapshot
    SQLite metadata"] - C["Trajectory / patch trail"] --> B - D["Optional registry overlay"] --> B - B --> E["Analytics embeddings
    LanceDB sidecar"] - E --> F["PCA + HDBSCAN clustering"] - F --> G["JSON / HTML export"] - B -. "must not influence" .-> H["Reports, gates, baseline, memory authority"] + A["Audit DB
    intent.declared"] --> S["Corpus snapshot
    SQLite metadata"] + T["Trajectory projection
    outcome and quality"] --> S + P["Patch Trail
    scope and verification facts"] --> S + R["Live registry overlay
    inspection only"] -.-> S + S --> E["Embedding generation
    separate LanceDB sidecar"] + E --> C["L2 normalize
    PCA(full)
    HDBSCAN(euclidean)"] + C --> D["Persisted assignments
    summaries and diagnostics"] + D --> J["JSON export"] + D --> H["Self-contained HTML"] + S -. "never authorizes or gates" .-> X["Structural report
    baseline, gates, memory governance"] ``` -Properties: +Source ownership is explicit: -- requires `codeclone[analytics]` optional dependencies; -- stores artifacts under `.codeclone/analytics/` (SQLite + LanceDB); -- uses dedicated contract versions in `codeclone/contracts/__init__.py` - (`CORPUS_ANALYTICS_STORE_SCHEMA_VERSION`, `CORPUS_EXPORT_SCHEMA_VERSION`, …); -- keeps analytics vectors separate from Engineering Memory semantic index, while - reusing the shared FastEmbed model artifact (not a second download); -- excludes live registry text from normalized corpus digests; -- opens all SQLite through `codeclone/observability/sqlite_access.py` (wrapping - `codeclone/utils/sqlite_store.py`) so connection-open is instrumented only when - observability is enabled (lazy import). +| Fact | Owner | +|------|-------| +| Original description and declaration order | Earliest audit `intent.declared` by `audit_sequence` | +| Declared/changed files and verification facts | Patch Trail | +| Outcome, quality tier, labels, anomalies | Selected current-version trajectory | +| Lease/status and other live coordination state | Optional registry overlay | -## Install +The registry overlay is exported for inspection when present, but it never +changes normalized text, representation identity, or `source_digest`. + +## End-to-End Lifecycle + +```mermaid +sequenceDiagram + participant U as Maintainer + participant CLI as codeclone analytics + participant SQL as Analytics SQLite + participant V as Analytics LanceDB + participant O as Platform Observability + + U->>CLI: snapshot --root . + CLI->>O: analytics.snapshot span + CLI->>SQL: snapshot + corpus items + U->>CLI: embed --snapshot-id SNAPSHOT + CLI->>O: analytics.embed span + CLI->>V: float32 vectors + CLI->>SQL: generation + row keys + digests + U->>CLI: cluster --snapshot-id ... --embedding-generation-id ... + CLI->>O: analytics.cluster span + CLI->>SQL: running run + CLI->>V: validated vectors + CLI->>SQL: completed assignments and summaries + U->>CLI: cluster-show or build outputs + CLI->>O: analytics.report span + CLI-->>U: atomic JSON / HTML +``` + +Every clustering run is inserted as `running`. A successful run atomically +commits assignments, summaries, and `completed`; a processing error rolls those +artifacts back and persists `failed` with an error message. + +## Installation ```bash uv sync --extra analytics @@ -47,83 +84,240 @@ pip install "codeclone[analytics]" Capability tiers: -| Tier | Packages | Commands | -|-----------|----------------------------------|-----------------------------------| -| `base` | core only | snapshot metadata, list runs | -| `embed` | fastembed + lancedb | `embed`, vector IO | -| `cluster` | scikit-learn + hdbscan | `cluster`, sweep, diagnostics | -| `full` | all of the above | `build` end-to-end | +| Tier | Packages | Commands | +|------|----------|----------| +| `base` | core only | `snapshot`, `clusters`, `cluster-show`, `outliers`, `cluster --select-run` | +| `embed` | FastEmbed + LanceDB | `embed` | +| `cluster` | scikit-learn + external `hdbscan` | clustering and sweep | +| `full` | all of the above | `build` | + +Missing optional dependencies are contract errors (exit `2`) with an install +hint. Inspection/export commands do not import FastEmbed. -`umap-learn` (in the `analytics` extra, Python versions before 3.14) is optional and -used only for the HTML report's 2-D visualization — never for clustering input. +`umap-learn` remains an optional dependency on supported Python versions, but +Slice 1 does not emit a UMAP visualization. Any later UMAP view must be labeled +visualization-only and must never feed clustering. ## Configuration -`[tool.codeclone.analytics]` in `pyproject.toml` overrides repository-local -defaults. Paths resolve under the repository root unless absolute. - -| Key | Default | Role | -|-----------------------------------|-------------------------------------------------|-------------------------------------------| -| `db_path` | `.codeclone/analytics/corpus_clustering.sqlite3`| Snapshot / clustering metadata SQLite | -| `vectors_path` | `.codeclone/analytics/corpus_vectors` | Analytics LanceDB directory | -| `embedding_model` | `BAAI/bge-small-en-v1.5` | FastEmbed model id | -| `embedding_dimension` | `384` | Vector width contract | -| `embedding_provider` | `fastembed` | Embedding backend | -| `embedding_cache_dir` | inherits memory (`.codeclone/memory/fastembed`) | Shared FastEmbed model artifact cache | -| `min_correlation_sample_size` | `5` | Minimum sample size for correlation stats | -| `cluster_random_seed` | `42` | Deterministic clustering seed | -| `default_pca_dimensions` | `64` | PCA projection width | -| `default_min_cluster_size` | `8` | HDBSCAN `min_cluster_size` default | -| `default_min_samples` | `3` | HDBSCAN `min_samples` default | -| `default_cluster_selection_method`| `eom` | HDBSCAN selection method | -| `allow_model_download` | inherits memory (`false` by default) | FastEmbed may download the model when `true` | - -Resolver: `codeclone/config/analytics.py:resolve_analytics_config`. - -`embedding_cache_dir` and `allow_model_download` default to the resolved -`[tool.codeclone.memory.semantic]` values, so the FastEmbed model is downloaded -once and shared with Engineering Memory. Only the analytics **vectors** -(`vectors_path`) and snapshot/clustering metadata (`db_path`) live under -`.codeclone/analytics/`. - -## CLI - -Terminal-only commands under `codeclone analytics`: - -| Command | Purpose | -|-----------------|------------------------------------------------------| -| `snapshot` | Build immutable intent corpus snapshot | -| `embed` | Generate analytics embeddings for a snapshot | -| `cluster` | Cluster embedded snapshot (optional `--sweep`) | -| `build` | Snapshot → embed → cluster end-to-end | -| `clusters` | List clustering runs for a snapshot | -| `cluster-show` | Export one clustering run as JSON | -| `outliers` | Show noise-cluster assignments | - -Representations: - -- `description` — intent text only (default); -- `description_with_frame` — adds bounded structural frame fields. - -Sweep modes write both `recommended_by_heuristic` and -`selected_by_maintainer` metadata; maintainer selection is explicit via -`cluster --select-run`. - -Full CLI contract: [11-cli.md](11-cli.md). - -## Module map - -| Area | Path | -|-----------------|-------------------------------------------| -| Config | `codeclone/config/analytics.py` | -| Workflow | `codeclone/analytics/workflow.py` | -| Corpus adapters | `codeclone/analytics/corpus/` | -| Store | `codeclone/analytics/store/` | -| Clustering | `codeclone/analytics/clustering/` | -| CLI | `codeclone/surfaces/cli/analytics.py` | - -## Refs - -- `codeclone/contracts/__init__.py` — corpus analytics version constants -- `tests/test_analytics_foundation.py`, `tests/test_analytics_integration.py` -- `docs/book/appendix/b-schema-layouts.md` — store layout summary +`[tool.codeclone.analytics]` overrides repository-local defaults. Relative paths +resolve from the repository root; absolute paths are allowed but are represented +as `` in snapshot manifests so user-specific paths do not enter +portable identity. + +| Key | Default | Contract | +|-----|---------|----------| +| `db_path` | `.codeclone/analytics/corpus_clustering.sqlite3` | Analytics metadata store | +| `vectors_path` | `.codeclone/analytics/corpus_vectors` | Dedicated LanceDB vectors | +| `embedding_model` | `BAAI/bge-small-en-v1.5` | FastEmbed model id | +| `embedding_dimension` | `384` | Vector width | +| `embedding_provider` | `fastembed` | Only supported provider in Slice 1 | +| `embedding_cache_dir` | memory semantic cache | Shared model artifact cache, not shared vectors | +| `min_correlation_sample_size` | `5` | Correlation denominator guard | +| `cluster_random_seed` | `42` | PCA deterministic seed | +| `default_pca_dimensions` | `64` | Requested PCA width | +| `default_min_cluster_size` | `8` | HDBSCAN default | +| `default_min_samples` | `3` | HDBSCAN default | +| `default_cluster_selection_method` | `eom` | `eom` or `leaf` | +| `allow_model_download` | memory semantic setting | Whether FastEmbed may download | + +The historical audit database follows top-level +`[tool.codeclone].audit_path`. This prevents Analytics from silently reading a +different audit source than the controller. + +## Identity And Digests + +Corpus identity has three layers: + +```text +source_record_key = sha256(project_id + "\n" + intent_id) +representation_key = sha256(lane + kind + version + source_record_key) +snapshot_item_id = sha256(snapshot_id + "\n" + representation_key) +``` + +`source_digest` hashes source schema versions, lane, representation contract, +normalizer version, and sorted source/provenance digests. It excludes: + +- snapshot ids and timestamps; +- absolute source paths; +- live registry overlay state. + +Representation contract `2` hashes raw representation-owned fields **before** +normalization. For `description_with_frame`, that includes description, intent +kind, declared path families, and typed declared constraints. + +Cluster membership identity is: + +```text +membership_digest = sha256(sorted(snapshot_item_ids) joined by "\n") +``` + +HDBSCAN numeric labels are not stable identity. Display ids are assigned after +canonical ordering by size descending, actual PCA-space medoid item id, then +membership digest. Noise remains an explicit non-display bucket. + +## Storage And Integrity + +Current analytics store schema is `1.1`. + +- Writable open migrates supported `1.0` stores after checking for orphan and + duplicate records. +- Read-only open never migrates and rejects a stale schema. +- SQLite relationship triggers reject orphan-producing inserts/updates/deletes. +- Vector row keys and non-null display cluster ids are unique. +- Reporting and inspection open the metadata store read-only. + +SQLite and LanceDB cannot participate in one physical transaction. The +embedding workflow therefore: + +1. computes a new generation; +2. stages SQLite metadata; +3. writes LanceDB rows; +4. commits SQLite only after the sidecar write succeeds; +5. rolls back metadata and removes the generation on ordinary failures. + +Before clustering, CodeClone validates the generation contract, exact snapshot +item set, dimensions, row keys, and vector digests. Cross-snapshot runs, +missing sidecar rows, stale embedding contracts, or corrupted float32 payloads +are rejected rather than accepted as completed analytics. + +## Embedding Reproducibility + +Embedding contract `2` stores: + +- provider and provider package version; +- model id, optional revision, optional artifact fingerprint; +- dimensions and embedding contract version; +- cosine similarity manifest and L2 preprocessing contract; +- vector row key and SHA-256 digest over canonical little-endian float32 bytes. + +When model revision/artifact fingerprint is unavailable, +`exact_model_artifact_reproducibility=false`. JSON and HTML then state: + +> Full vector reproducibility is not guaranteed from model id alone. + +Exact reproduction additionally depends on the model artifact, provider and +numeric-library versions, hardware/runtime behavior, and identical normalized +inputs. Old embedding contract generations must be regenerated. + +## Clustering Contract + +The fixed path is: + +```text +float32 embeddings + -> L2 normalization + -> PCA(svd_solver="full", whiten=false, random_state=42) + -> external hdbscan.HDBSCAN(metric="euclidean", core_dist_n_jobs=1) + -> canonical partitions + -> diagnostics +``` + +The run manifest records Python, NumPy, SciPy, scikit-learn, and HDBSCAN +versions plus all fixed algorithm choices. `run_digest` covers snapshot, +embedding generation, effective sample/feature dimensions, effective +parameters, random seed, and the algorithm manifest. + +A sweep discards invalid small-corpus candidates and deduplicates requested +settings that collapse to the same effective parameters. A corpus with no valid +candidate fails explicitly instead of producing an empty successful sweep. + +Sweep ranking sets exactly one `recommended_by_heuristic=true`. +`selected_by_maintainer` remains false until an explicit: + +```bash +codeclone analytics cluster --root . --select-run RUN_ID +``` + +Recommendation is evidence, not a human decision. + +## Diagnostics + +Each cluster summary includes: + +- size and corpus percentage; +- average membership strength; +- PCA-space medoid; +- representatives and low-strength/far-boundary items; +- nearest cluster ids by PCA centroid distance; +- metadata distributions with numerator and denominator; +- explicit `insufficient_sample` when the denominator is below the configured + guard. + +The noise explorer emits only observable text/membership flags: +`short_text`, `long_text`, `multiple_paragraphs`, +`high_conjunction_count`, `template_match`, and +`low_membership_strength`. It does not invent semantic classes. + +## CLI And Reports + +The approved direct namespace is `codeclone analytics`: + +| Command | Purpose | +|---------|---------| +| `snapshot` | Build an intent corpus snapshot | +| `embed` | Generate a separate analytics embedding generation | +| `cluster` | Run one configuration or a bounded sweep | +| `build` | Run snapshot → embed → cluster | +| `clusters` | List runs for a snapshot | +| `cluster-show` | Export one completed run as JSON | +| `outliers` | Emit noise assignment ids | + +`build --sweep --use-recommended` renders the heuristic winner but does not +record a maintainer selection. `--use-recommended` without `--sweep` is rejected +before dependency checks or artifact creation. + +Output behavior: + +- single-run JSON contains snapshot/generation manifests, embedding references, + run and algorithm manifest, clusters, assignments, noise, item metadata, and + compatible sweep summaries; +- sweep JSON contains every completed candidate with full assignments, + diagnostics, and recommendation/selection flags; +- sweep HTML without `--use-recommended` is comparison-only; +- detailed HTML includes overview, cluster index, representative/boundary + groups, correlations with denominators, and the noise explorer; +- JSON and HTML are self-contained and written atomically to explicit output + paths. + +Expected user/config/capability/schema/integrity errors exit `2` on stderr +without a traceback. + +## Observability + +With `CODECLONE_OBSERVABILITY_ENABLED=1`, the CLI creates one operation named +`cli.analytics.` with nested spans: + +- `analytics.snapshot` +- `analytics.embed` +- `analytics.cluster` +- `analytics.build` +- `analytics.report` when an export is rendered + +Observability is bootstrapped before analytics stores open, so instrumented +SQLite queries are attributed to the active stage. These measurements are +development telemetry only; see +[Platform Observability](26-platform-observability.md). + +## Cross-Links + +- Historical trajectory evidence: + [Trajectory Quality and Passport](13-engineering-memory/trajectory-quality-and-passport.md) +- Runtime configuration: + [Config and Defaults](10-config-and-defaults.md) +- Exit semantics and terminal surfaces: + [CLI](11-cli.md) +- Version bump rules: + [Compatibility and Versioning](24-compatibility-and-versioning.md) +- SQLite/LanceDB layout: + [Schema Layouts](appendix/b-schema-layouts.md) + +## Locked By Tests + +- `tests/test_analytics_foundation.py` +- `tests/test_analytics_trajectory_selection.py` +- `tests/test_analytics_integration.py` +- `tests/test_analytics_cli.py` +- `tests/test_config_analytics.py` +- `tests/test_sqlite_readonly_openers.py` +- `tests/test_architecture.py::test_analytics_package_does_not_import_forbidden_surfaces` diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index feabc69f..e6a3b53e 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -886,22 +886,39 @@ stores. See [Platform Observability](../26-platform-observability.md) for configuration, privacy, query, and anti-inference rules. -## Corpus analytics store (`1.0`) +## Corpus analytics store (`1.1`) Optional SQLite database (default `.codeclone/analytics/corpus_clustering.sqlite3`) and LanceDB vector directory (default `.codeclone/analytics/corpus_vectors`). Derived offline analytics — not report, baseline, cache, audit, or Engineering Memory truth. -| Artifact | Role | -|---------------------|-------------------------------------------------------------| -| `corpus_snapshots` | Immutable snapshot metadata and source digests | -| `embedding_runs` | Analytics embedding generation records | -| `clustering_runs` | PCA/HDBSCAN parameters, diagnostics, membership digest | -| LanceDB sidecar | Separate vectors from Engineering Memory semantic index | +| Artifact | Role | +|-------------------------|-----------------------------------------------------------------------| +| `corpus_snapshots` | Immutable-by-contract snapshot metadata and source digests | +| `corpus_items` | Normalized representation, metadata, and optional registry overlay | +| `embedding_generations` | Provider/model/preprocessing manifest | +| `embedding_items` | Vector row keys, float32 digests, dimensions; no vector blobs | +| `clustering_runs` | Requested/effective parameters, algorithm manifest, lifecycle status | +| `cluster_assignments` | Per-run item label, strength, and membership digest | +| `cluster_summaries` | Canonical display id and persisted diagnostics per cluster/noise | +| LanceDB sidecar | Separate float32 vectors from Engineering Memory semantic index | Store schema version: `CORPUS_ANALYTICS_STORE_SCHEMA_VERSION` in -`codeclone/contracts/__init__.py` (currently **`1.0`**). +`codeclone/contracts/__init__.py` (currently **`1.1`**). + +Writable open migrates `1.0` to `1.1` only after checking existing snapshots, +embedding references, clustering assignments/summaries, vector row keys, and +display ids. Read-only open never migrates and rejects stale schema. SQLite +triggers prevent orphan-producing inserts, relationship updates, and parent +deletes; unique indexes protect vector row keys and non-null display cluster ids. + +The SQLite transaction and LanceDB sidecar cannot share one physical +transaction. The embedding workflow therefore writes metadata and vectors as +one controlled operation, rolls SQLite back and removes the generation on +ordinary failures, and validates row keys, dimensions, and float32 digests +before clustering. Crash residue is detected as an integrity error rather than +accepted as a completed generation. See [Corpus Analytics](../27-corpus-analytics.md) for CLI, configuration, and trust boundaries. diff --git a/docs/guide/analytics/overview.md b/docs/guide/analytics/overview.md index 82778a7e..86a08b15 100644 --- a/docs/guide/analytics/overview.md +++ b/docs/guide/analytics/overview.md @@ -19,10 +19,13 @@ uv sync --extra analytics Build snapshot, embeddings, and a recommended clustering run in one step: ```bash -codeclone analytics build --root . --use-recommended +codeclone analytics build --root . --sweep --use-recommended ``` -Write artifacts to explicit paths: +`--use-recommended` requires `--sweep`. It renders the heuristic winner for +inspection; it does **not** set `selected_by_maintainer`. + +Write a detailed single-run report to explicit paths: ```bash codeclone analytics build \ @@ -32,6 +35,16 @@ codeclone analytics build \ --json-out /tmp/corpus-clusters.json ``` +Write a sweep comparison without choosing a primary detail view: + +```bash +codeclone analytics build \ + --root . \ + --sweep \ + --html-out /tmp/corpus-sweep.html \ + --json-out /tmp/corpus-sweep.json +``` + ## Step-by-step ```bash @@ -51,12 +64,38 @@ codeclone analytics cluster \ codeclone analytics clusters --root . --snapshot-id SNAPSHOT_ID codeclone analytics cluster-show \ --root . --snapshot-id SNAPSHOT_ID --run-id RUN_ID + +# 5. Record an explicit maintainer choice +codeclone analytics cluster --root . --select-run RUN_ID ``` ## Configuration Defaults live in `[tool.codeclone.analytics]` inside `pyproject.toml`. See [Corpus Analytics contract](../../book/27-corpus-analytics.md) for the full table. +The historical audit source follows top-level `[tool.codeclone].audit_path`. + +## Reproducibility + +Exports persist snapshot and embedding manifests, vector digests, requested and +effective parameters, fixed PCA/HDBSCAN settings, package versions, and the +random seed. Unless the model revision and artifact fingerprint are known, +CodeClone explicitly reports that full vector reproducibility is not guaranteed +from the model id alone. + +Existing embedding generations created under an incompatible embedding contract +are rejected. Run `embed` again for the same snapshot to create a compatible +generation. + +## Failure behavior + +- Expected input, capability, schema, and artifact-integrity errors exit with + code `2` and no traceback. +- A clustering run is persisted as `running`, then becomes `completed` or + `failed`; failed runs contain no committed assignments or summaries. +- JSON and HTML outputs are written atomically. +- Snapshot, embed, cluster, and report spans are recorded only when + `CODECLONE_OBSERVABILITY_ENABLED=1`. ## What this is not diff --git a/tests/test_analytics_cli.py b/tests/test_analytics_cli.py new file mode 100644 index 00000000..2726cd8e --- /dev/null +++ b/tests/test_analytics_cli.py @@ -0,0 +1,610 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +from argparse import Namespace +from collections.abc import Iterator +from contextlib import contextmanager +from pathlib import Path +from types import SimpleNamespace +from typing import cast + +import pytest + +from codeclone.analytics.capabilities import CapabilityStatus +from codeclone.analytics.contracts import ( + ClusterAssignmentRecord, + ClusteringRunRecord, + ClusteringRunStatus, + CorpusSnapshotRecord, +) +from codeclone.analytics.embedding.generation import EmbeddingBatchResult +from codeclone.analytics.exceptions import ( + AnalyticsCapabilityError, + AnalyticsWorkflowError, +) +from codeclone.analytics.store.protocols import SnapshotBuildResult +from codeclone.analytics.store.sqlite import SqliteCorpusAnalyticsStore +from codeclone.analytics.workflow import BuildResult +from codeclone.contracts import ExitCode +from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, +) +from codeclone.surfaces.cli import analytics as analytics_cli +from tests.fixtures.analytics.helpers import write_intent_declared_event + + +def _snapshot() -> CorpusSnapshotRecord: + return CorpusSnapshotRecord( + snapshot_id="snapshot", + lane="intent", + representation_kind="intent.description.v1", + representation_version="2", + source_stores_json="{}", + source_schema_versions_json="{}", + record_count=2, + source_digest="digest", + created_at_utc="2026-01-01T00:00:00Z", + ) + + +def _run( + run_id: str = "run", + *, + status: str = "completed", +) -> ClusteringRunRecord: + return ClusteringRunRecord( + clustering_run_id=run_id, + snapshot_id="snapshot", + embedding_generation_id="embedding", + requested_parameters_json="{}", + effective_parameters_json="{}", + random_seed=42, + run_digest="digest", + recommended_by_heuristic=True, + selected_by_maintainer=False, + status=cast(ClusteringRunStatus, status), + created_at_utc="2026-01-01T00:00:00Z", + finished_at_utc="2026-01-01T00:00:01Z", + error_message=None, + ) + + +class _ReadStore: + def __init__(self) -> None: + self.snapshot: CorpusSnapshotRecord | None = _snapshot() + self.runs: tuple[ClusteringRunRecord, ...] = (_run(),) + self.assignments = ( + ClusterAssignmentRecord("run", "noise", -1, 0.1, "noise-digest"), + ClusterAssignmentRecord("run", "clustered", 0, 0.9, "cluster-digest"), + ) + self.closed = False + + def get_snapshot(self, _snapshot_id: str) -> CorpusSnapshotRecord | None: + return self.snapshot + + def list_clustering_runs( + self, + *, + snapshot_id: str, + embedding_generation_id: str | None = None, + ) -> tuple[ClusteringRunRecord, ...]: + assert snapshot_id == "snapshot" + assert embedding_generation_id in {None, "embedding"} + return self.runs + + def get_clustering_run(self, run_id: str) -> ClusteringRunRecord | None: + return next( + (run for run in self.runs if run.clustering_run_id == run_id), + None, + ) + + def list_assignments( + self, + _run_id: str, + ) -> tuple[ClusterAssignmentRecord, ...]: + return self.assignments + + def close(self) -> None: + self.closed = True + + +@pytest.fixture +def quiet_cli_runtime(monkeypatch: pytest.MonkeyPatch) -> None: + @contextmanager + def fake_operation(**_kwargs: object) -> Iterator[None]: + yield + + monkeypatch.setattr(analytics_cli, "bootstrap", lambda *_args, **_kwargs: None) + monkeypatch.setattr(analytics_cli, "shutdown", lambda: None) + monkeypatch.setattr(analytics_cli, "operation", fake_operation) + + +def test_analytics_namespace_is_direct() -> None: + parser = analytics_cli._build_parser() + help_text = parser.format_help() + + assert help_text.startswith("usage: codeclone analytics") + assert " analytics corpus " not in help_text + assert set(analytics_cli._COMMAND_HANDLERS) == { + "build", + "cluster", + "cluster-show", + "clusters", + "embed", + "outliers", + "snapshot", + } + + +def test_representation_and_capability_contracts( + monkeypatch: pytest.MonkeyPatch, +) -> None: + assert analytics_cli._representation_kind("description").endswith("description.v1") + assert analytics_cli._representation_kind("description_with_frame").endswith( + "description_with_frame.v1" + ) + with pytest.raises(AnalyticsWorkflowError, match="unsupported representation"): + analytics_cli._representation_kind("unknown") + + monkeypatch.setattr( + analytics_cli, + "check_capability", + lambda _capability: CapabilityStatus(False, ("fastembed", "lancedb")), + ) + with pytest.raises(AnalyticsCapabilityError, match="fastembed, lancedb"): + analytics_cli._require_capability("embed") + + +def test_cluster_missing_ids_returns_contract_error_without_traceback( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + code = analytics_cli.analytics_main(["cluster", "--root", str(tmp_path)]) + + captured = capsys.readouterr() + assert code == int(ExitCode.CONTRACT_ERROR) + assert "--snapshot-id and --embedding-generation-id are required" in captured.err + assert "Traceback" not in captured.err + + +def test_use_recommended_requires_sweep_before_capability_check( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setattr( + analytics_cli, + "_require_capability", + lambda _capability: pytest.fail("capability check must not run"), + ) + + code = analytics_cli.analytics_main( + ["build", "--root", str(tmp_path), "--use-recommended"] + ) + + captured = capsys.readouterr() + assert code == int(ExitCode.CONTRACT_ERROR) + assert "--use-recommended requires --sweep" in captured.err + assert not (tmp_path / ".codeclone").exists() + + +def test_snapshot_stdout_is_json_and_bootstrap_precedes_handler( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + events: list[str] = [] + + @contextmanager + def fake_operation(**_kwargs: object) -> Iterator[None]: + events.append("operation") + yield + + monkeypatch.setattr( + analytics_cli, + "bootstrap", + lambda *_args, **_kwargs: events.append("bootstrap"), + ) + monkeypatch.setattr(analytics_cli, "shutdown", lambda: events.append("shutdown")) + monkeypatch.setattr(analytics_cli, "operation", fake_operation) + monkeypatch.setattr(analytics_cli, "_require_capability", lambda _capability: None) + + def fake_snapshot(**_kwargs: object) -> SnapshotBuildResult: + events.append("snapshot") + return SnapshotBuildResult( + snapshot_id="snap-1", + source_digest="digest-1", + record_count=3, + ) + + monkeypatch.setattr( + analytics_cli, + "run_snapshot", + fake_snapshot, + ) + + code = analytics_cli.analytics_main(["snapshot", "--root", str(tmp_path)]) + + payload = json.loads(capsys.readouterr().out) + assert code == int(ExitCode.SUCCESS) + assert payload == { + "record_count": 3, + "snapshot_id": "snap-1", + "source_digest": "digest-1", + } + assert events == ["bootstrap", "operation", "snapshot", "shutdown"] + + +def test_snapshot_observability_captures_span_and_db_queries( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + write_intent_declared_event( + db_path=tmp_path / ".codeclone/db/audit.sqlite3", + repo_root=tmp_path, + intent_id="intent-a", + description="Observe analytics snapshot", + ) + monkeypatch.setenv("CODECLONE_OBSERVABILITY_ENABLED", "1") + monkeypatch.setattr( + "codeclone.analytics.corpus.snapshot.resolve_memory_db_path", + lambda _root: tmp_path / ".codeclone/memory/missing.sqlite3", + ) + + code = analytics_cli.analytics_main(["snapshot", "--root", str(tmp_path)]) + + assert code == int(ExitCode.SUCCESS) + conn = open_observability_store(observability_store_path(tmp_path)) + try: + operation_row = conn.execute( + "SELECT name, status FROM platform_operations" + ).fetchone() + span_row = conn.execute( + "SELECT name, counters_json FROM platform_spans " + "WHERE name='analytics.snapshot'" + ).fetchone() + finally: + conn.close() + assert operation_row == ("cli.analytics.snapshot", "ok") + assert span_row is not None + counters = json.loads(span_row[1]) + assert counters["db_queries"] > 0 + + +def test_snapshot_command_writes_atomic_json( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(analytics_cli, "_require_capability", lambda _value: None) + monkeypatch.setattr( + analytics_cli, + "run_snapshot", + lambda **_kwargs: SnapshotBuildResult("snapshot", "digest", 2), + ) + output = tmp_path / "nested" / "snapshot.json" + code = analytics_cli._run_snapshot_command( + Namespace(representation="description", output_json=output), + tmp_path, + ) + assert code == ExitCode.SUCCESS + assert json.loads(output.read_text(encoding="utf-8"))["snapshot_id"] == "snapshot" + + +def test_embed_and_cluster_commands_emit_results( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + capabilities: list[str] = [] + monkeypatch.setattr( + analytics_cli, + "_require_capability", + lambda value: capabilities.append(value), + ) + monkeypatch.setattr( + analytics_cli, + "run_embed", + lambda **_kwargs: EmbeddingBatchResult("embedding", 2), + ) + assert ( + analytics_cli._run_embed_command( + Namespace(snapshot_id="snapshot"), + tmp_path, + ) + == ExitCode.SUCCESS + ) + assert json.loads(capsys.readouterr().out) == { + "embedding_generation_id": "embedding", + "item_count": 2, + } + + selected: list[str] = [] + monkeypatch.setattr( + analytics_cli, + "select_cluster_run", + lambda **kwargs: selected.append(str(kwargs["clustering_run_id"])), + ) + assert ( + analytics_cli._run_cluster_command( + Namespace( + select_run="run", + snapshot_id=None, + embedding_generation_id=None, + sweep=False, + ), + tmp_path, + ) + == ExitCode.SUCCESS + ) + assert selected == ["run"] + assert json.loads(capsys.readouterr().out) == {"selected_run_id": "run"} + + monkeypatch.setattr( + analytics_cli, + "run_clustering", + lambda **_kwargs: ("run-a", "run-b"), + ) + assert ( + analytics_cli._run_cluster_command( + Namespace( + select_run=None, + snapshot_id="snapshot", + embedding_generation_id="embedding", + sweep=True, + ), + tmp_path, + ) + == ExitCode.SUCCESS + ) + assert json.loads(capsys.readouterr().out) == { + "clustering_run_ids": ["run-a", "run-b"] + } + assert capabilities == ["embed", "base", "cluster"] + + +def test_build_command_runs_exports_and_prints_manifest( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setattr(analytics_cli, "_require_capability", lambda _value: None) + result = BuildResult("snapshot", "embedding", ("run",), "run") + monkeypatch.setattr(analytics_cli, "run_build", lambda **_kwargs: result) + exported: list[BuildResult] = [] + monkeypatch.setattr( + analytics_cli, + "_write_build_exports", + lambda **kwargs: exported.append(kwargs["build_result"]), + ) + code = analytics_cli._run_build_command( + Namespace( + use_recommended=True, + sweep=True, + representation="description_with_frame", + json_out=tmp_path / "report.json", + html_out=None, + ), + tmp_path, + ) + assert code == ExitCode.SUCCESS + assert exported == [result] + assert json.loads(capsys.readouterr().out)["recommended_run_id"] == "run" + + +def test_clusters_command_rejects_unknown_snapshot_and_lists_runs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setattr(analytics_cli, "_require_capability", lambda _value: None) + config = SimpleNamespace(db_path=tmp_path / "analytics.sqlite3") + monkeypatch.setattr(analytics_cli, "resolve_analytics_config", lambda _root: config) + store = _ReadStore() + monkeypatch.setattr( + SqliteCorpusAnalyticsStore, + "open_readonly", + lambda _path: store, + ) + assert ( + analytics_cli._run_clusters_command( + Namespace(snapshot_id="snapshot"), + tmp_path, + ) + == ExitCode.SUCCESS + ) + assert json.loads(capsys.readouterr().out) == [ + { + "clustering_run_id": "run", + "recommended_by_heuristic": True, + "selected_by_maintainer": False, + "status": "completed", + } + ] + assert store.closed is True + + missing = _ReadStore() + missing.snapshot = None + monkeypatch.setattr( + SqliteCorpusAnalyticsStore, + "open_readonly", + lambda _path: missing, + ) + with pytest.raises(AnalyticsWorkflowError, match="unknown snapshot"): + analytics_cli._run_clusters_command( + Namespace(snapshot_id="missing"), + tmp_path, + ) + assert missing.closed is True + + +def test_cluster_show_supports_stdout_and_file( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setattr(analytics_cli, "_require_capability", lambda _value: None) + monkeypatch.setattr( + analytics_cli, + "resolve_analytics_config", + lambda _root: SimpleNamespace(db_path=tmp_path / "analytics.sqlite3"), + ) + stores: list[_ReadStore] = [] + + def open_store(_path: Path) -> _ReadStore: + store = _ReadStore() + stores.append(store) + return store + + monkeypatch.setattr( + SqliteCorpusAnalyticsStore, + "open_readonly", + open_store, + ) + monkeypatch.setattr( + analytics_cli, + "export_clustering_json", + lambda **_kwargs: '{"run":"run"}\n', + ) + assert ( + analytics_cli._run_cluster_show_command( + Namespace(snapshot_id="snapshot", run_id="run", output=None), + tmp_path, + ) + == ExitCode.SUCCESS + ) + assert capsys.readouterr().out == '{"run":"run"}\n' + output = tmp_path / "nested" / "run.json" + assert ( + analytics_cli._run_cluster_show_command( + Namespace(snapshot_id="snapshot", run_id="run", output=output), + tmp_path, + ) + == ExitCode.SUCCESS + ) + assert output.read_text(encoding="utf-8") == '{"run":"run"}\n' + assert all(store.closed for store in stores) + + +def test_outliers_command_validates_run_and_filters_noise( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setattr(analytics_cli, "_require_capability", lambda _value: None) + monkeypatch.setattr( + analytics_cli, + "resolve_analytics_config", + lambda _root: SimpleNamespace(db_path=tmp_path / "analytics.sqlite3"), + ) + store = _ReadStore() + monkeypatch.setattr( + SqliteCorpusAnalyticsStore, + "open_readonly", + lambda _path: store, + ) + validated: list[str] = [] + monkeypatch.setattr( + analytics_cli, + "validate_persisted_run", + lambda **kwargs: validated.append(str(kwargs["clustering_run_id"])), + ) + assert ( + analytics_cli._run_outliers_command( + Namespace(snapshot_id="snapshot", run_id="run"), + tmp_path, + ) + == ExitCode.SUCCESS + ) + assert validated == ["run"] + assert json.loads(capsys.readouterr().out) == {"noise_items": ["noise"]} + assert store.closed is True + + +def test_build_export_routing_and_missing_artifacts( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + config = SimpleNamespace(db_path=tmp_path / "analytics.sqlite3") + monkeypatch.setattr(analytics_cli, "resolve_analytics_config", lambda _root: config) + store = _ReadStore() + monkeypatch.setattr( + SqliteCorpusAnalyticsStore, + "open_readonly", + lambda _path: store, + ) + monkeypatch.setattr( + analytics_cli, + "export_sweep_comparison_json", + lambda **_kwargs: '{"kind":"sweep"}\n', + ) + monkeypatch.setattr( + analytics_cli, + "render_analytics_html", + lambda **kwargs: f"{kwargs['comparison_only']}", + ) + args = Namespace( + json_out=tmp_path / "nested" / "sweep.json", + html_out=tmp_path / "nested" / "sweep.html", + sweep=True, + use_recommended=False, + ) + analytics_cli._write_build_exports( + args=args, + root=tmp_path, + build_result=BuildResult("snapshot", "embedding", ("run",), None), + ) + assert json.loads(args.json_out.read_text(encoding="utf-8")) == {"kind": "sweep"} + assert args.html_out.read_text(encoding="utf-8") == "True" + + store.snapshot = None + with pytest.raises(AnalyticsWorkflowError, match="snapshot missing"): + analytics_cli._write_build_exports( + args=args, + root=tmp_path, + build_result=BuildResult("snapshot", "embedding", ("run",), None), + ) + store.snapshot = _snapshot() + store.runs = () + with pytest.raises(AnalyticsWorkflowError, match="clustering run missing"): + analytics_cli._write_build_exports( + args=args, + root=tmp_path, + build_result=BuildResult("snapshot", "embedding", ("run",), None), + ) + + +def test_analytics_main_handles_invalid_root_and_expected_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + quiet_cli_runtime: None, + capsys: pytest.CaptureFixture[str], +) -> None: + missing = tmp_path / "missing" + assert ( + analytics_cli.analytics_main(["snapshot", "--root", str(missing)]) + == ExitCode.CONTRACT_ERROR + ) + assert "not a directory" in capsys.readouterr().err + + monkeypatch.setattr( + analytics_cli, + "_COMMAND_HANDLERS", + { + **analytics_cli._COMMAND_HANDLERS, + "snapshot": lambda _args, _root: (_ for _ in ()).throw( + ValueError("expected failure") + ), + }, + ) + assert ( + analytics_cli.analytics_main(["snapshot", "--root", str(tmp_path)]) + == ExitCode.CONTRACT_ERROR + ) + assert "expected failure" in capsys.readouterr().err diff --git a/tests/test_analytics_foundation.py b/tests/test_analytics_foundation.py index f938d0ea..1d12a1c6 100644 --- a/tests/test_analytics_foundation.py +++ b/tests/test_analytics_foundation.py @@ -6,14 +6,62 @@ from __future__ import annotations +import importlib +import math +import types from dataclasses import replace +from importlib.metadata import PackageNotFoundError from pathlib import Path -from codeclone.analytics.agent_labels import map_agent_family -from codeclone.analytics.clustering.diagnostics import correlation_rate -from codeclone.analytics.clustering.sweep import iter_sweep_candidates -from codeclone.analytics.contracts import INTENT_REPRESENTATION_DESCRIPTION +import pytest + +from codeclone.analytics.agent_labels import ( + agent_family_rules, + agent_label_contract_version, + map_agent_family, +) +from codeclone.analytics.capabilities import check_capability, install_hint +from codeclone.analytics.clustering.canonicalize import ( + canonicalize_partitions, + display_cluster_id_map, + medoid_item_id, + partition_membership_map, +) +from codeclone.analytics.clustering.diagnostics import ( + build_cluster_diagnostics, + cluster_size_percent, + compute_centroids, + correlation_rate, + metadata_distribution, + nearest_cluster_ids, + noise_explorer_flags, +) +from codeclone.analytics.clustering.models import ( + ClusteringParameters, + ClusterPartition, +) +from codeclone.analytics.clustering.pipeline import ( + is_noise_label, + resolve_effective_parameters, + run_clustering_pipeline, +) +from codeclone.analytics.clustering.sweep import ( + SweepCandidate, + SweepCandidateResult, + clustering_algorithm_manifest, + iter_sweep_candidates, + rank_sweep_results, + run_digest, + score_clustering_result, +) +from codeclone.analytics.contracts import ( + INTENT_REPRESENTATION_DESCRIPTION, + INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME, + CorpusItemRecord, +) +from codeclone.analytics.corpus.adapters import intent_historical from codeclone.analytics.corpus.adapters.intent_historical import ( + HistoricalIntentSourceItem, compute_source_digest, extract_historical_intent_items, materialize_corpus_item, @@ -23,6 +71,19 @@ snapshot_item_id, source_record_key, ) +from codeclone.analytics.corpus.normalizer import source_content_digest +from codeclone.analytics.corpus.representations.intent import ( + IntentRepresentationInput, + build_intent_description_v1, + build_intent_description_with_frame_v1, + build_representation_text, + declared_constraints_from_audit_payload, + declared_path_families_from_patch_trail, + representation_digest, +) +from codeclone.analytics.exceptions import AnalyticsCapabilityError +from codeclone.audit.reader import AuditRecord +from codeclone.surfaces.mcp._workspace_intent_schema import open_intent_registry_db from tests.fixtures.analytics.helpers import write_intent_declared_event @@ -48,6 +109,28 @@ def _seed_intent_repo( return root +def _corpus_item( + item_id: str = "item", + *, + text: str = "short", + metadata_json: str = "{}", +) -> CorpusItemRecord: + return CorpusItemRecord( + snapshot_id="snap", + representation_key=f"rep-{item_id}", + snapshot_item_id=item_id, + source_record_key=f"source-{item_id}", + project_id="project", + intent_id=f"intent-{item_id}", + normalized_text=text, + normalized_digest=f"normalized-{item_id}", + normalizer_version="1", + representation_digest=f"representation-{item_id}", + metadata_json=metadata_json, + registry_overlay_json=None, + ) + + def test_identity_keys() -> None: project_id = "proj-abc" intent_id = "intent-1" @@ -100,6 +183,83 @@ def test_registry_not_in_normalized_text(tmp_path: Path) -> None: assert before[6] == after[6] +def test_registry_overlay_does_not_change_source_digest(tmp_path: Path) -> None: + root = _seed_intent_repo(tmp_path, description="Stable historical intent") + registry_db = root / ".codeclone" / "db" / "intents.sqlite3" + before_items = extract_historical_intent_items( + root_path=root, + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + registry_db_path=registry_db, + ) + before_digest = compute_source_digest( + items=before_items, + lane="intent", + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + representation_version="2", + source_schema_versions={"audit": "4"}, + ) + conn = open_intent_registry_db(registry_db) + try: + conn.execute( + """ + INSERT INTO workspace_intents ( + agent_pid, agent_start_epoch, intent_id, declared_at_utc, + payload_json, closed_at_utc, updated_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + 1, + 1, + "intent-a", + "2026-01-01T00:00:00Z", + '{"status":"active"}', + None, + "2026-01-01T00:00:00Z", + ), + ) + conn.commit() + finally: + conn.close() + after_items = extract_historical_intent_items( + root_path=root, + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + registry_db_path=registry_db, + ) + after_digest = compute_source_digest( + items=after_items, + lane="intent", + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + representation_version="2", + source_schema_versions={"audit": "4"}, + ) + assert before_digest == after_digest + assert after_items[0].registry_overlay is not None + + +def test_source_content_digest_hashes_raw_inputs_before_normalization() -> None: + assert source_content_digest({"description": "Add validation"}) != ( + source_content_digest({"description": "validation"}) + ) + plain = IntentRepresentationInput( + description="Validate request", + intent_kind="feature", + declared_path_families=(), + declared_constraints=(), + ) + changed = replace(plain, intent_kind="fix") + assert source_content_digest( + { + "description": plain.description, + "intent_kind": plain.intent_kind, + } + ) != source_content_digest( + { + "description": changed.description, + "intent_kind": changed.intent_kind, + } + ) + + def test_intent_adapter_audit_first(tmp_path: Path) -> None: root = _seed_intent_repo(tmp_path, description="First description") write_intent_declared_event( @@ -175,12 +335,516 @@ def test_correlation_sample_guard() -> None: assert cell.rate is None +def test_cluster_diagnostics_use_configured_guard_and_emit_noise_flags() -> None: + item = replace( + _corpus_item(), + metadata_json='{"agent_family":"cursor","anomaly_kinds":["scope_expanded"]}', + ) + diagnostics = build_cluster_diagnostics( + partition=ClusterPartition( + cluster_label=-1, + snapshot_item_ids=("item",), + membership_digest="membership", + ), + items_by_id={"item": item}, + coordinates={"item": (0.0, 0.0)}, + membership_strengths={"item": 0.1}, + total_items=1, + min_correlation_sample_size=2, + ) + distributions = diagnostics["metadata_distributions"] + assert isinstance(distributions, dict) + agent = distributions["agent_family"] + assert isinstance(agent, dict) + assert agent["cursor"]["insufficient_sample"] is True + noise_items = diagnostics["noise_items"] + assert isinstance(noise_items, list) + assert noise_items[0]["flags"]["short_text"] is True + + def test_sweep_effective_dedup() -> None: candidates = iter_sweep_candidates(n_samples=10, n_features=384) keys = [candidate.dedupe_key for candidate in candidates] assert len(keys) == len(set(keys)) +def test_cluster_display_order_uses_medoid_not_first_member() -> None: + first_by_member = ClusterPartition( + cluster_label=10, + snapshot_item_ids=("a", "z", "zz"), + membership_digest="digest-10", + ) + first_by_medoid = ClusterPartition( + cluster_label=20, + snapshot_item_ids=("b", "c", "d"), + membership_digest="digest-20", + ) + coordinates: dict[str, tuple[float, ...]] = { + "a": (0.0,), + "z": (10.0,), + "zz": (11.0,), + "b": (0.0,), + "c": (1.0,), + "d": (2.0,), + } + + canonical = canonicalize_partitions( + (first_by_member, first_by_medoid), + coordinates=coordinates, + ) + + assert [partition.cluster_label for partition in canonical] == [20, 10] + assert display_cluster_id_map(canonical) == {20: 1, 10: 2} + + def test_agent_family_mapping() -> None: assert map_agent_family("cursor-vscode") == "cursor" assert map_agent_family(None) == "unknown" + + +def test_agent_family_contract_handles_all_labels() -> None: + assert map_agent_family(" ") == "unknown" + assert map_agent_family("prefix CLAUDE-code") == "claude" + assert map_agent_family("codex-cli") == "codex" + assert map_agent_family("vscode-extension") == "vscode" + assert map_agent_family("mcp-client") == "mcp" + assert map_agent_family("human") == "unknown" + assert agent_label_contract_version() + assert tuple(agent_family_rules()) + + +def test_capability_matrix_and_import_probe(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "codeclone.analytics.capabilities._package_available", + lambda package: package in {"lancedb", "sklearn"}, + ) + assert check_capability("base").available is True + assert check_capability("embed").missing_packages == ("fastembed",) + assert check_capability("cluster").missing_packages == ("hdbscan",) + assert check_capability("full").missing_packages == ("fastembed", "hdbscan") + assert install_hint(()) == "uv sync --extra analytics" + assert install_hint(("fastembed",)) == "uv sync --extra analytics" + + def missing_import(_name: str) -> object: + raise ImportError + + monkeypatch.setattr(importlib, "import_module", missing_import) + from codeclone.analytics.capabilities import _package_available + + assert _package_available("missing") is False + + +def test_intent_representation_contracts() -> None: + payload = IntentRepresentationInput( + description=" Add\r\nanalytics ", + intent_kind=" feature ", + declared_path_families=("tests", "codeclone", "tests"), + declared_constraints=("z=2", "a=1", "z=2"), + ) + assert build_intent_description_v1(payload.description) == "Add\nanalytics" + framed = build_intent_description_with_frame_v1(payload) + assert "INTENT_KIND:\nfeature" in framed + assert "DECLARED_PATH_FAMILIES:\ncodeclone, tests" in framed + assert "DECLARED_CONSTRAINTS:\na=1; z=2" in framed + assert ( + build_representation_text( + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + payload=payload, + ) + == "Add\nanalytics" + ) + assert ( + build_representation_text( + representation_kind=INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME, + payload=payload, + ) + == framed + ) + assert representation_digest( + representation_kind=INTENT_REPRESENTATION_DESCRIPTION, + normalized_text="Add analytics", + ) + with pytest.raises(ValueError, match="unsupported representation"): + build_representation_text(representation_kind="unknown", payload=payload) + + +def test_declared_context_extractors_are_bounded_and_typed() -> None: + assert declared_path_families_from_patch_trail(None) == () + assert declared_path_families_from_patch_trail({"declared_files": "bad"}) == () + assert declared_path_families_from_patch_trail( + { + "declared_files": [ + "./codeclone/a.py", + r"tests\test_a.py", + 42, + "", + "./", + ] + } + ) == ("codeclone", "tests") + assert declared_path_families_from_patch_trail( + {"declared_files": [f"dir-{index}/file.py" for index in range(20)]}, + limit=3, + ) == ("dir-0", "dir-1", "dir-10") + + assert declared_constraints_from_audit_payload(None) == () + assert declared_constraints_from_audit_payload( + { + "verification_profile": " strict ", + "dirty_scope_policy": "", + "on_conflict": 3, + "scope": { + "allowed_files": ["a.py"], + "allowed_related": [], + "forbidden": ["b.py", "c.py"], + }, + } + ) == ( + "scope.allowed_files_count=1", + "scope.forbidden_count=2", + "verification_profile=strict", + ) + assert declared_constraints_from_audit_payload({"scope": "bad"}) == () + + +def test_canonical_helpers_cover_empty_missing_and_noise() -> None: + assert medoid_item_id(member_ids=(), coordinates={}) == "" + assert medoid_item_id(member_ids=("only",), coordinates={}) == "only" + assert ( + medoid_item_id( + member_ids=("missing", "present"), + coordinates={"present": (0.0,)}, + ) + == "missing" + ) + partitions = ( + ClusterPartition(1, ("a", "b"), "stored"), + ClusterPartition(-1, ("noise",), "noise-stored"), + ) + canonical = canonicalize_partitions(partitions, coordinates={"a": (0.0,)}) + assert display_cluster_id_map(canonical) == {1: 1, -1: None} + membership = partition_membership_map(canonical) + assert membership["a"] == membership["b"] + assert membership["noise"] != membership["a"] + + +def test_diagnostics_helpers_cover_metadata_and_distance_edges() -> None: + invalid = _corpus_item("invalid", metadata_json="{") + scalar = _corpus_item("scalar", metadata_json="[]") + rich = _corpus_item( + "rich", + text="