Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ This project has a published GitHub Release line, but no stable support or API g

## [Unreleased]

- Add non-UTF-8 error-contract coverage for `dedupe` and `conflicts`.

### Added

- Added a consolidated post-audit action plan for the v0.4.0 hardening and release-preparation sequence.
Expand Down
52 changes: 52 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,32 @@ def test_conflicts_returns_two_for_symlinked_instruction_file(self) -> None:
"conflicts: AGENTS.md\n",
)


def test_conflicts_returns_two_for_non_utf8_instruction_file(self) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
root = Path(tmp_dir)
original_bytes = b"# Agent instructions\n\n\xff\xfe\n"
agents_file = root / "AGENTS.md"
agents_file.write_bytes(original_bytes)

stdout = io.StringIO()
stderr = io.StringIO()

with redirect_stdout(stdout), redirect_stderr(stderr):
exit_code = main(["conflicts", str(root)])

self.assertEqual(agents_file.read_bytes(), original_bytes)

self.assertEqual(exit_code, 2)
self.assertEqual(stdout.getvalue(), "")
self.assertEqual(
stderr.getvalue(),
"ERROR: instruction file is not valid UTF-8 and cannot be checked "
"for conflicts: AGENTS.md\n",
)
self.assertNotIn("\\xff", stderr.getvalue())
self.assertNotIn("255", stderr.getvalue())

def test_dedupe_reports_duplicate_lines(self) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
root = Path(tmp_dir)
Expand Down Expand Up @@ -297,6 +323,32 @@ def test_dedupe_reports_duplicate_lines(self) -> None:
self.assertIn("AGENTS.md:3", text)
self.assertIn("CLAUDE.md:3", text)


def test_dedupe_returns_two_for_non_utf8_instruction_file(self) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
root = Path(tmp_dir)
original_bytes = b"# Agent instructions\n\n\xff\xfe\n"
agents_file = root / "AGENTS.md"
agents_file.write_bytes(original_bytes)

stdout = io.StringIO()
stderr = io.StringIO()

with redirect_stdout(stdout), redirect_stderr(stderr):
exit_code = main(["dedupe", str(root)])

self.assertEqual(agents_file.read_bytes(), original_bytes)

self.assertEqual(exit_code, 2)
self.assertEqual(stdout.getvalue(), "")
self.assertEqual(
stderr.getvalue(),
"ERROR: instruction file is not valid UTF-8 and cannot be "
"deduplicated: AGENTS.md\n",
)
self.assertNotIn("\\xff", stderr.getvalue())
self.assertNotIn("255", stderr.getvalue())

def test_dedupe_returns_one_when_no_instruction_files_are_found(self) -> None:
output = io.StringIO()

Expand Down
13 changes: 13 additions & 0 deletions tests/test_conflicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,19 @@ def test_reports_all_current_rule_family_conflicts(self) -> None:
self.assertEqual([location.path for location in group.allow_locations], ["AGENTS.md"])
self.assertEqual([location.path for location in group.block_locations], ["CLAUDE.md"])


def test_rejects_non_utf8_instruction_files(self) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
root = Path(tmp_dir)
original_bytes = b"# Agent instructions\n\n\xff\xfe\n"
agents_file = root / "AGENTS.md"
agents_file.write_bytes(original_bytes)

with self.assertRaisesRegex(ValueError, "not valid UTF-8"):
build_conflict_report(root, discover_instruction_files(root))

self.assertEqual(agents_file.read_bytes(), original_bytes)

def test_rejects_symlinked_instruction_files(self) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
root = Path(tmp_dir)
Expand Down
13 changes: 13 additions & 0 deletions tests/test_dedupe.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@ def test_ignores_short_boilerplate_lines(self) -> None:

self.assertEqual(report.duplicate_group_count, 0)


def test_rejects_non_utf8_instruction_files(self) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
root = Path(tmp_dir)
original_bytes = b"# Agent instructions\n\n\xff\xfe\n"
agents_file = root / "AGENTS.md"
agents_file.write_bytes(original_bytes)

with self.assertRaisesRegex(ValueError, "not valid UTF-8"):
build_dedupe_report(root, discover_instruction_files(root))

self.assertEqual(agents_file.read_bytes(), original_bytes)

def test_rejects_symlinked_instruction_files(self) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
root = Path(tmp_dir)
Expand Down