diff --git a/.gitignore b/.gitignore index 73e921b..a89bec2 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ # Python cache __pycache__/ *.pyc +.hypothesis/ # IDE and editor .idea/ @@ -33,6 +34,7 @@ settings.json # Build output build/ +dist/ # Serena working directory .serena/ diff --git a/pyproject.toml b/pyproject.toml index 9d9abe4..0b48039 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dacli" -version = "0.4.36" +version = "0.4.37" description = "Documentation Access CLI - Navigate and query large documentation projects" readme = "README.md" license = { text = "MIT" } diff --git a/scripts/build_zipapp.sh b/scripts/build_zipapp.sh new file mode 100755 index 0000000..e6a1416 --- /dev/null +++ b/scripts/build_zipapp.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Build a self-contained dacli.pyz using Python's zipapp module. +# +# The resulting .pyz file can be dropped into any directory and run with: +# python3 dacli.pyz --help +# +# Requirements (build machine only): Python 3.12+, pip +# The .pyz is cross-platform (Linux, macOS, Windows) because all +# bundled dependencies are pure Python. +set -euo pipefail + +PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +BUILD_DIR=$(mktemp -d) +OUTPUT="${PROJECT_DIR}/dist/dacli.pyz" + +cleanup() { rm -rf "$BUILD_DIR"; } +trap cleanup EXIT + +mkdir -p "$(dirname "$OUTPUT")" + +echo "Installing CLI dependencies into build directory..." +pip install --quiet --target "$BUILD_DIR" click pyyaml pathspec + +echo "Copying dacli source..." +cp -r "$PROJECT_DIR/src/dacli" "$BUILD_DIR/dacli" + +# Create __main__.py for zipapp entry point +cat > "$BUILD_DIR/__main__.py" << 'ENTRY' +from dacli.cli import cli + +cli() +ENTRY + +echo "Building zipapp..." +python3 -m zipapp "$BUILD_DIR" -o "$OUTPUT" -p "/usr/bin/env python3" -c + +echo "" +echo "Built: $OUTPUT ($(du -h "$OUTPUT" | cut -f1))" +echo "Usage: python3 $OUTPUT --help" diff --git a/src/dacli/__init__.py b/src/dacli/__init__.py index 3f7dfc4..319a7ec 100644 --- a/src/dacli/__init__.py +++ b/src/dacli/__init__.py @@ -4,4 +4,4 @@ through hierarchical, content-aware access via the Model Context Protocol (MCP). """ -__version__ = "0.4.36" +__version__ = "0.4.37" diff --git a/src/dacli/cli.py b/src/dacli/cli.py index 7563718..280389b 100644 --- a/src/dacli/cli.py +++ b/src/dacli/cli.py @@ -29,7 +29,7 @@ from dacli.asciidoc_parser import AsciidocStructureParser from dacli.file_handler import FileReadError, FileSystemHandler, FileWriteError from dacli.markdown_parser import MarkdownStructureParser -from dacli.mcp_app import _build_index +from dacli.index_builder import build_index from dacli.services import ( compute_hash, get_project_metadata, @@ -290,7 +290,7 @@ def __init__( self.markdown_parser = MarkdownStructureParser(base_path=docs_root) # Build index - _build_index( + build_index( docs_root, self.index, self.asciidoc_parser, diff --git a/src/dacli/index_builder.py b/src/dacli/index_builder.py new file mode 100644 index 0000000..ae0759c --- /dev/null +++ b/src/dacli/index_builder.py @@ -0,0 +1,149 @@ +"""Index builder for dacli. + +Builds the in-memory StructureIndex from documents in a docs root directory. +This module has no heavy external dependencies (no fastmcp, pydantic, etc.), +enabling the CLI to be packaged as a lightweight cross-platform zipapp. +""" + +import logging +from pathlib import Path + +from dacli.asciidoc_parser import AsciidocStructureParser, CircularIncludeError +from dacli.file_utils import find_doc_files +from dacli.markdown_parser import MarkdownStructureParser +from dacli.models import Document +from dacli.structure_index import StructureIndex + +logger = logging.getLogger(__name__) + + +def build_index( + docs_root: Path, + index: StructureIndex, + asciidoc_parser: AsciidocStructureParser, + markdown_parser: MarkdownStructureParser, + *, + respect_gitignore: bool = True, + include_hidden: bool = False, +) -> None: + """Build the structure index from documents in docs_root. + + Args: + docs_root: Root directory containing documentation + index: StructureIndex to populate + asciidoc_parser: Parser for AsciiDoc files + markdown_parser: Parser for Markdown files + respect_gitignore: If True, exclude files matching .gitignore patterns + include_hidden: If True, include files in hidden directories + """ + documents: list[Document] = [] + + # Find all AsciiDoc files first (Issue #184) + all_adoc_files = list( + find_doc_files( + docs_root, "*.adoc", respect_gitignore=respect_gitignore, include_hidden=include_hidden + ) + ) + + # Scan for include directives to identify included files (Issue #184) + # Included files should not be parsed as separate root documents + included_files: set[Path] = set() + for adoc_file in all_adoc_files: + included_files.update(AsciidocStructureParser.scan_includes(adoc_file)) + + # Filter: only parse files that are NOT included by others (Issue #184) + root_adoc_files = [f for f in all_adoc_files if f not in included_files] + + # Issue #251: Detect circular includes in the include graph + # Files that include each other circularly all end up in included_files + # with none of them becoming root documents. Detect these cycles. + circular_include_errors: list[dict] = [] + if all_adoc_files: + include_graph: dict[Path, set[Path]] = {} + for adoc_file in all_adoc_files: + resolved = adoc_file.resolve() + includes = AsciidocStructureParser.scan_includes(adoc_file) + include_graph[resolved] = includes + + circular_files: set[Path] = set() + visited: set[Path] = set() + in_stack: set[Path] = set() + + def _find_cycles(node: Path, path_list: list[Path]) -> None: + if node in in_stack: + cycle_start = path_list.index(node) + for f in path_list[cycle_start:]: + circular_files.add(f) + return + if node in visited: + return + visited.add(node) + in_stack.add(node) + path_list.append(node) + for neighbor in include_graph.get(node, set()): + _find_cycles(neighbor, path_list) + path_list.pop() + in_stack.remove(node) + + for adoc_file in all_adoc_files: + _find_cycles(adoc_file.resolve(), []) + + for circ_file in circular_files: + message = f"Circular include detected: {circ_file.name} " f"is part of an include cycle" + circular_include_errors.append( + { + "file": circ_file, + "include_chain": list(circular_files), + "message": message, + } + ) + + logger.info( + f"Found {len(all_adoc_files)} AsciiDoc files, " + f"{len(included_files)} included, " + f"{len(root_adoc_files)} root documents" + ) + + # Parse root AsciiDoc files only + for adoc_file in root_adoc_files: + try: + doc = asciidoc_parser.parse_file(adoc_file) + documents.append(doc) + except CircularIncludeError as e: + # Issue #251: Catch circular includes during parsing too + logger.warning("Circular include in %s: %s", adoc_file, e) + circular_include_errors.append( + { + "file": adoc_file, + "include_chain": e.include_chain, + "message": str(e), + } + ) + except Exception as e: + # Log but continue with other files + logger.warning("Failed to parse %s: %s", adoc_file, e) + + # Find and parse Markdown files + for md_file in find_doc_files( + docs_root, "*.md", respect_gitignore=respect_gitignore, include_hidden=include_hidden + ): + try: + md_doc = markdown_parser.parse_file(md_file) + # Convert MarkdownDocument to Document + doc = Document( + file_path=md_doc.file_path, + title=md_doc.title, + sections=md_doc.sections, + elements=md_doc.elements, + ) + documents.append(doc) + except Exception as e: + logger.warning("Failed to parse %s: %s", md_file, e) + + # Build index + warnings = index.build_from_documents(documents) + for warning in warnings: + logger.warning("Index: %s", warning) + + # Issue #251: Store circular include errors on the index for validation + index._circular_include_errors = circular_include_errors diff --git a/src/dacli/mcp_app.py b/src/dacli/mcp_app.py index 27afaf8..1442142 100644 --- a/src/dacli/mcp_app.py +++ b/src/dacli/mcp_app.py @@ -20,11 +20,10 @@ from fastmcp import FastMCP from dacli import __version__ -from dacli.asciidoc_parser import AsciidocStructureParser, CircularIncludeError +from dacli.asciidoc_parser import AsciidocStructureParser from dacli.file_handler import FileReadError, FileSystemHandler, FileWriteError -from dacli.file_utils import find_doc_files +from dacli.index_builder import build_index as _build_index from dacli.markdown_parser import MarkdownStructureParser -from dacli.models import Document from dacli.services import ( compute_hash, get_project_metadata, @@ -632,135 +631,3 @@ def validate_structure() -> dict: return service_validate_structure(index, docs_root) return mcp - - -def _build_index( - docs_root: Path, - index: StructureIndex, - asciidoc_parser: AsciidocStructureParser, - markdown_parser: MarkdownStructureParser, - *, - respect_gitignore: bool = True, - include_hidden: bool = False, -) -> None: - """Build the structure index from documents in docs_root. - - Args: - docs_root: Root directory containing documentation - index: StructureIndex to populate - asciidoc_parser: Parser for AsciiDoc files - markdown_parser: Parser for Markdown files - respect_gitignore: If True, exclude files matching .gitignore patterns - include_hidden: If True, include files in hidden directories - """ - documents: list[Document] = [] - - # Find all AsciiDoc files first (Issue #184) - all_adoc_files = list( - find_doc_files( - docs_root, "*.adoc", respect_gitignore=respect_gitignore, include_hidden=include_hidden - ) - ) - - # Scan for include directives to identify included files (Issue #184) - # Included files should not be parsed as separate root documents - included_files: set[Path] = set() - for adoc_file in all_adoc_files: - included_files.update(AsciidocStructureParser.scan_includes(adoc_file)) - - # Filter: only parse files that are NOT included by others (Issue #184) - root_adoc_files = [f for f in all_adoc_files if f not in included_files] - - # Issue #251: Detect circular includes in the include graph - # Files that include each other circularly all end up in included_files - # with none of them becoming root documents. Detect these cycles. - circular_include_errors: list[dict] = [] - if all_adoc_files: - include_graph: dict[Path, set[Path]] = {} - for adoc_file in all_adoc_files: - resolved = adoc_file.resolve() - includes = AsciidocStructureParser.scan_includes(adoc_file) - include_graph[resolved] = includes - - circular_files: set[Path] = set() - visited: set[Path] = set() - in_stack: set[Path] = set() - - def _find_cycles(node: Path, path_list: list[Path]) -> None: - if node in in_stack: - cycle_start = path_list.index(node) - for f in path_list[cycle_start:]: - circular_files.add(f) - return - if node in visited: - return - visited.add(node) - in_stack.add(node) - path_list.append(node) - for neighbor in include_graph.get(node, set()): - _find_cycles(neighbor, path_list) - path_list.pop() - in_stack.remove(node) - - for adoc_file in all_adoc_files: - _find_cycles(adoc_file.resolve(), []) - - for circ_file in circular_files: - message = f"Circular include detected: {circ_file.name} " f"is part of an include cycle" - circular_include_errors.append( - { - "file": circ_file, - "include_chain": list(circular_files), - "message": message, - } - ) - - logger.info( - f"Found {len(all_adoc_files)} AsciiDoc files, " - f"{len(included_files)} included, " - f"{len(root_adoc_files)} root documents" - ) - - # Parse root AsciiDoc files only - for adoc_file in root_adoc_files: - try: - doc = asciidoc_parser.parse_file(adoc_file) - documents.append(doc) - except CircularIncludeError as e: - # Issue #251: Catch circular includes during parsing too - logger.warning("Circular include in %s: %s", adoc_file, e) - circular_include_errors.append( - { - "file": adoc_file, - "include_chain": e.include_chain, - "message": str(e), - } - ) - except Exception as e: - # Log but continue with other files - logger.warning("Failed to parse %s: %s", adoc_file, e) - - # Find and parse Markdown files - for md_file in find_doc_files( - docs_root, "*.md", respect_gitignore=respect_gitignore, include_hidden=include_hidden - ): - try: - md_doc = markdown_parser.parse_file(md_file) - # Convert MarkdownDocument to Document - doc = Document( - file_path=md_doc.file_path, - title=md_doc.title, - sections=md_doc.sections, - elements=md_doc.elements, - ) - documents.append(doc) - except Exception as e: - logger.warning("Failed to parse %s: %s", md_file, e) - - # Build index - warnings = index.build_from_documents(documents) - for warning in warnings: - logger.warning("Index: %s", warning) - - # Issue #251: Store circular include errors on the index for validation - index._circular_include_errors = circular_include_errors diff --git a/tests/test_cli_packaging.py b/tests/test_cli_packaging.py new file mode 100644 index 0000000..fe5ab71 --- /dev/null +++ b/tests/test_cli_packaging.py @@ -0,0 +1,109 @@ +"""Tests for CLI packaging: verify the CLI can work without heavy MCP dependencies. + +The CLI was refactored to import build_index from index_builder instead of +mcp_app, enabling packaging as a cross-platform zipapp with only pure-Python +dependencies (click, pyyaml, pathspec). +""" + +import ast +import importlib +import sys +from pathlib import Path + +SRC_DIR = Path(__file__).parent.parent / "src" + + +class TestIndexBuilderModule: + """Test that index_builder.py is self-contained and works independently.""" + + def test_index_builder_imports_no_heavy_deps(self): + """index_builder.py must not import fastmcp, pydantic, or cryptography.""" + source = (SRC_DIR / "dacli" / "index_builder.py").read_text() + tree = ast.parse(source) + + heavy_deps = {"fastmcp", "pydantic", "cryptography", "fastapi", "uvicorn", "pydocket"} + imported_modules = set() + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + imported_modules.add(alias.name.split(".")[0]) + elif isinstance(node, ast.ImportFrom) and node.module: + imported_modules.add(node.module.split(".")[0]) + + violations = imported_modules & heavy_deps + assert not violations, ( + f"index_builder.py imports heavy dependencies: {violations}. " + "This breaks cross-platform zipapp packaging." + ) + + def test_build_index_function_exists(self): + """build_index function should be importable from index_builder.""" + from dacli.index_builder import build_index + + assert callable(build_index) + + def test_build_index_works(self, tmp_path): + """build_index should successfully index documents.""" + from dacli.asciidoc_parser import AsciidocStructureParser + from dacli.index_builder import build_index + from dacli.markdown_parser import MarkdownStructureParser + from dacli.structure_index import StructureIndex + + # Create a minimal AsciiDoc file + doc = tmp_path / "test.adoc" + doc.write_text("= Test Document\n\n== Section One\n\nContent here.\n") + + index = StructureIndex() + build_index( + tmp_path, + index, + AsciidocStructureParser(tmp_path), + MarkdownStructureParser(tmp_path), + respect_gitignore=False, + ) + + structure = index.get_structure() + assert len(structure["sections"]) > 0 + + +class TestCliImportChain: + """Test that the CLI module does not transitively import heavy dependencies.""" + + def test_cli_module_does_not_import_fastmcp_directly(self): + """cli.py must not directly import from mcp_app or fastmcp.""" + source = (SRC_DIR / "dacli" / "cli.py").read_text() + tree = ast.parse(source) + + forbidden = {"fastmcp", "dacli.mcp_app"} + imported_modules = set() + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + imported_modules.add(alias.name) + elif isinstance(node, ast.ImportFrom) and node.module: + imported_modules.add(node.module) + + violations = imported_modules & forbidden + assert not violations, ( + f"cli.py imports from {violations}. " + "The CLI must import build_index from index_builder, not mcp_app." + ) + + +class TestMcpAppBackwardCompat: + """Test that mcp_app still exposes _build_index for backward compatibility.""" + + def test_build_index_importable_from_mcp_app(self): + """_build_index should still be importable from mcp_app.""" + from dacli.mcp_app import _build_index + + assert callable(_build_index) + + def test_mcp_app_build_index_is_same_function(self): + """mcp_app._build_index should be the same as index_builder.build_index.""" + from dacli.index_builder import build_index + from dacli.mcp_app import _build_index + + assert _build_index is build_index diff --git a/tests/test_included_files_integration.py b/tests/test_included_files_integration.py index 09812a1..5e5415a 100644 --- a/tests/test_included_files_integration.py +++ b/tests/test_included_files_integration.py @@ -5,7 +5,7 @@ from dacli.asciidoc_parser import AsciidocStructureParser from dacli.markdown_parser import MarkdownStructureParser -from dacli.mcp_app import _build_index +from dacli.index_builder import build_index as _build_index from dacli.structure_index import StructureIndex diff --git a/uv.lock b/uv.lock index 84565e7..6b90add 100644 --- a/uv.lock +++ b/uv.lock @@ -386,7 +386,7 @@ wheels = [ [[package]] name = "dacli" -version = "0.4.36" +version = "0.4.37" source = { editable = "." } dependencies = [ { name = "click" },