diff --git a/CHANGELOG.md b/CHANGELOG.md index 560b808..54da0d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ This project has a published GitHub Release line, but no stable support or API g ## [Unreleased] +- Add non-UTF-8 error-contract coverage for `dedupe` and `conflicts`. + ### Added - Added a consolidated post-audit action plan for the v0.4.0 hardening and release-preparation sequence. diff --git a/tests/test_cli.py b/tests/test_cli.py index 6d4cd9f..f9d85b6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -268,6 +268,32 @@ def test_conflicts_returns_two_for_symlinked_instruction_file(self) -> None: "conflicts: AGENTS.md\n", ) + + def test_conflicts_returns_two_for_non_utf8_instruction_file(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + root = Path(tmp_dir) + original_bytes = b"# Agent instructions\n\n\xff\xfe\n" + agents_file = root / "AGENTS.md" + agents_file.write_bytes(original_bytes) + + stdout = io.StringIO() + stderr = io.StringIO() + + with redirect_stdout(stdout), redirect_stderr(stderr): + exit_code = main(["conflicts", str(root)]) + + self.assertEqual(agents_file.read_bytes(), original_bytes) + + self.assertEqual(exit_code, 2) + self.assertEqual(stdout.getvalue(), "") + self.assertEqual( + stderr.getvalue(), + "ERROR: instruction file is not valid UTF-8 and cannot be checked " + "for conflicts: AGENTS.md\n", + ) + self.assertNotIn("\\xff", stderr.getvalue()) + self.assertNotIn("255", stderr.getvalue()) + def test_dedupe_reports_duplicate_lines(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: root = Path(tmp_dir) @@ -297,6 +323,32 @@ def test_dedupe_reports_duplicate_lines(self) -> None: self.assertIn("AGENTS.md:3", text) self.assertIn("CLAUDE.md:3", text) + + def test_dedupe_returns_two_for_non_utf8_instruction_file(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + root = Path(tmp_dir) + original_bytes = b"# Agent instructions\n\n\xff\xfe\n" + agents_file = root / "AGENTS.md" + agents_file.write_bytes(original_bytes) + + stdout = io.StringIO() + stderr = io.StringIO() + + with redirect_stdout(stdout), redirect_stderr(stderr): + exit_code = main(["dedupe", str(root)]) + + self.assertEqual(agents_file.read_bytes(), original_bytes) + + self.assertEqual(exit_code, 2) + self.assertEqual(stdout.getvalue(), "") + self.assertEqual( + stderr.getvalue(), + "ERROR: instruction file is not valid UTF-8 and cannot be " + "deduplicated: AGENTS.md\n", + ) + self.assertNotIn("\\xff", stderr.getvalue()) + self.assertNotIn("255", stderr.getvalue()) + def test_dedupe_returns_one_when_no_instruction_files_are_found(self) -> None: output = io.StringIO() diff --git a/tests/test_conflicts.py b/tests/test_conflicts.py index a8d1009..8fb1fdd 100644 --- a/tests/test_conflicts.py +++ b/tests/test_conflicts.py @@ -125,6 +125,19 @@ def test_reports_all_current_rule_family_conflicts(self) -> None: self.assertEqual([location.path for location in group.allow_locations], ["AGENTS.md"]) self.assertEqual([location.path for location in group.block_locations], ["CLAUDE.md"]) + + def test_rejects_non_utf8_instruction_files(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + root = Path(tmp_dir) + original_bytes = b"# Agent instructions\n\n\xff\xfe\n" + agents_file = root / "AGENTS.md" + agents_file.write_bytes(original_bytes) + + with self.assertRaisesRegex(ValueError, "not valid UTF-8"): + build_conflict_report(root, discover_instruction_files(root)) + + self.assertEqual(agents_file.read_bytes(), original_bytes) + def test_rejects_symlinked_instruction_files(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: root = Path(tmp_dir) diff --git a/tests/test_dedupe.py b/tests/test_dedupe.py index 37ba2e1..9ef8582 100644 --- a/tests/test_dedupe.py +++ b/tests/test_dedupe.py @@ -42,6 +42,19 @@ def test_ignores_short_boilerplate_lines(self) -> None: self.assertEqual(report.duplicate_group_count, 0) + + def test_rejects_non_utf8_instruction_files(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + root = Path(tmp_dir) + original_bytes = b"# Agent instructions\n\n\xff\xfe\n" + agents_file = root / "AGENTS.md" + agents_file.write_bytes(original_bytes) + + with self.assertRaisesRegex(ValueError, "not valid UTF-8"): + build_dedupe_report(root, discover_instruction_files(root)) + + self.assertEqual(agents_file.read_bytes(), original_bytes) + def test_rejects_symlinked_instruction_files(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: root = Path(tmp_dir)