Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dacli"
version = "0.4.29"
version = "0.4.30"
description = "Documentation Access CLI - Navigate and query large documentation projects"
readme = "README.md"
license = { text = "MIT" }
Expand Down
2 changes: 1 addition & 1 deletion src/dacli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
"""


__version__ = "0.4.29"
__version__ = "0.4.30"
10 changes: 6 additions & 4 deletions src/dacli/asciidoc_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
collect_all_sections,
find_section_by_path,
slugify,
strip_doc_extension,
)

# Regex patterns from specification
Expand Down Expand Up @@ -189,6 +190,9 @@ def _get_file_prefix(self, file_path: Path) -> str:
The file prefix is the relative path from base_path to file_path,
without the file extension. This ensures unique paths across documents.

Issue #266: Only strips known extensions (.md, .adoc) to preserve dots
in filenames (e.g. version numbers like "report_v1.2.3.adoc").

Args:
file_path: Path to the document being parsed

Expand All @@ -198,10 +202,8 @@ def _get_file_prefix(self, file_path: Path) -> str:
try:
relative = file_path.relative_to(self.base_path)
except ValueError:
# file_path is not relative to base_path, use just the stem
relative = Path(file_path.stem)
# Remove extension and convert to forward slashes
return str(relative.with_suffix("")).replace("\\", "/")
relative = Path(file_path.name)
return strip_doc_extension(relative)

def get_section(
self, doc: AsciidocDocument, path: str
Expand Down
2 changes: 1 addition & 1 deletion src/dacli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def __init__(
self.index = StructureIndex()
self.file_handler = FileSystemHandler()
self.asciidoc_parser = AsciidocStructureParser(base_path=docs_root)
self.markdown_parser = MarkdownStructureParser()
self.markdown_parser = MarkdownStructureParser(base_path=docs_root)

# Build index
_build_index(
Expand Down
13 changes: 7 additions & 6 deletions src/dacli/markdown_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
collect_all_sections,
find_section_by_path,
slugify,
strip_doc_extension,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -118,6 +119,9 @@ def _get_file_prefix(self, file_path: Path) -> str:
The file prefix is the relative path from base_path to file_path,
without the file extension. This ensures unique paths across documents.

Issue #266: Only strips known extensions (.md, .adoc) to preserve dots
in filenames (e.g. version numbers like "report_v1.2.3.md").

Args:
file_path: Path to the document being parsed

Expand All @@ -128,13 +132,10 @@ def _get_file_prefix(self, file_path: Path) -> str:
try:
relative = file_path.relative_to(self.base_path)
except ValueError:
# file_path is not relative to base_path, use just the stem
relative = Path(file_path.stem)
relative = Path(file_path.name)
else:
# No base_path provided, use just the stem
relative = Path(file_path.stem)
# Remove extension and convert to forward slashes
return str(relative.with_suffix("")).replace("\\", "/")
relative = Path(file_path.name)
return strip_doc_extension(relative)

def parse_file(self, file_path: Path) -> MarkdownDocument:
"""Parse a single Markdown file.
Expand Down
2 changes: 1 addition & 1 deletion src/dacli/mcp_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def create_mcp_server(
index = StructureIndex()
file_handler = FileSystemHandler()
asciidoc_parser = AsciidocStructureParser(base_path=docs_root)
markdown_parser = MarkdownStructureParser()
markdown_parser = MarkdownStructureParser(base_path=docs_root)

# Build initial index
_build_index(
Expand Down
25 changes: 25 additions & 0 deletions src/dacli/parser_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,34 @@
"""

import re
from pathlib import Path

from dacli.models import Section

# Known document extensions to strip from file paths (Issue #266)
KNOWN_DOC_EXTENSIONS = {".md", ".adoc", ".asciidoc"}


def strip_doc_extension(file_path: Path) -> str:
"""Remove only known document extensions from a file path.

Unlike Path.with_suffix(""), this only removes known extensions (.md, .adoc,
.asciidoc) and preserves dots that are part of the filename (e.g. version
numbers like "report_v1.2.3.md" → "report_v1.2.3").

Args:
file_path: Path to strip extension from

Returns:
String path with known extension removed, using forward slashes.
"""
path_str = str(file_path).replace("\\", "/")
suffix = file_path.suffix.lower()
if suffix in KNOWN_DOC_EXTENSIONS:
# Remove only the last suffix if it's a known doc extension
return path_str[: -len(file_path.suffix)]
return path_str


def slugify(text: str) -> str:
"""Convert text to URL-friendly slug.
Expand Down
14 changes: 14 additions & 0 deletions src/dacli/services/validation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,20 @@ def validate_structure(index: StructureIndex, docs_root: Path) -> dict:
"message": pw.message,
})

# Issue #268: Include duplicate-path warnings from index build
for build_warning in index._build_warnings:
if "Duplicate section path" in build_warning:
# Parse the warning string to extract the path
# Format: "Duplicate section path: 'path' (first at file:line, duplicate at file:line)"
import re
match = re.search(r"Duplicate section path: '([^']+)'", build_warning)
dup_path = match.group(1) if match else "unknown"
warnings.append({
"type": "duplicate_path",
"path": dup_path,
"message": build_warning,
})

# Issue #219: Check for unresolved includes
for doc in index._documents:
# Only AsciiDoc documents have includes (check for attribute)
Expand Down
3 changes: 3 additions & 0 deletions src/dacli/structure_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def __init__(self) -> None:
self._documents: list[Document] = []
self._top_level_sections: list[Section] = []
self._circular_include_errors: list[dict] = []
self._build_warnings: list[str] = [] # Issue #268: Store duplicate path warnings
self._index_ready: bool = False

def build_from_documents(self, documents: list[Document]) -> list[str]:
Expand Down Expand Up @@ -99,6 +100,7 @@ def build_from_documents(self, documents: list[Document]) -> list[str]:
for element in doc.elements:
self._index_element(element)

self._build_warnings = warnings # Issue #268: Store for validation
self._index_ready = True
logger.info(
f"Index built: {len(self._path_to_section)} sections, "
Expand Down Expand Up @@ -494,6 +496,7 @@ def clear(self) -> None:
self._documents.clear()
self._top_level_sections.clear()
self._circular_include_errors.clear()
self._build_warnings.clear()
self._index_ready = False

def stats(self) -> dict:
Expand Down
89 changes: 89 additions & 0 deletions tests/test_dotted_filenames_266.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Tests for dotted filenames producing unique paths (Issue #266)."""

from dacli.markdown_parser import MarkdownStructureParser
from dacli.structure_index import StructureIndex


class TestDottedFilenames:
"""Files with dots in names (e.g. version numbers) must have unique paths."""

def test_version_numbered_files_have_unique_paths(self, tmp_path):
"""DACLI_TEST_RESULTS_v0.4.27.md and v0.4.28.md must not collide."""
f1 = tmp_path / "RESULTS_v0.4.27.md"
f2 = tmp_path / "RESULTS_v0.4.28.md"
f1.write_text("# Results v0.4.27\n\nContent.\n")
f2.write_text("# Results v0.4.28\n\nContent.\n")

parser = MarkdownStructureParser(base_path=tmp_path)
doc1 = parser.parse_file(f1)
doc2 = parser.parse_file(f2)

index = StructureIndex()
warnings = index.build_from_documents([doc1, doc2])

assert len(warnings) == 0, f"Unexpected warnings: {warnings}"

structure = index.get_structure()
paths = [s["path"] for s in structure["sections"]]
assert len(paths) == len(set(paths)), f"Duplicate paths found: {paths}"

def test_cli_context_passes_base_path_to_markdown_parser(self, tmp_path):
"""CliContext must pass docs_root as base_path to MarkdownStructureParser."""
from dacli.cli import CliContext

f1 = tmp_path / "test_v1.2.3.md"
f1.write_text("# Test v1.2.3\n")

ctx = CliContext(
docs_root=tmp_path,
output_format="json",
pretty=False,
)
# The markdown parser should have base_path set
assert ctx.markdown_parser.base_path == tmp_path

def test_file_prefix_without_base_path_strips_only_known_extensions(self, tmp_path):
"""Without base_path, only .md extension should be stripped, not version dots."""
f1 = tmp_path / "report_v2.1.5.md"
f1.write_text("# Report\n")

# BUG #266: Without base_path, Path(stem).with_suffix("") strips ".5"
parser = MarkdownStructureParser() # No base_path!
doc = parser.parse_file(f1)

# The path should preserve the full version number
assert doc.sections[0].path == "report_v2.1.5"

def test_get_file_prefix_preserves_version_dots(self, tmp_path):
"""_get_file_prefix must not strip version-like suffixes."""
parser = MarkdownStructureParser(base_path=tmp_path)
prefix = parser._get_file_prefix(tmp_path / "data_v3.2.1.md")
assert prefix == "data_v3.2.1"

def test_subdirectory_file_with_dots(self, tmp_path):
"""Files with dots in subdirectories also get correct paths."""
sub = tmp_path / "reports"
sub.mkdir()
f1 = sub / "sprint_2.0.1.md"
f1.write_text("# Sprint 2.0.1\n\nNotes.\n")

parser = MarkdownStructureParser(base_path=tmp_path)
doc = parser.parse_file(f1)

assert doc.sections[0].path == "reports/sprint_2.0.1"


class TestDottedFilenamesAsciiDoc:
"""AsciiDoc files with dots in names must also have unique paths."""

def test_asciidoc_file_with_version_dots(self, tmp_path):
"""AsciiDoc _get_file_prefix must preserve version dots."""
from dacli.asciidoc_parser import AsciidocStructureParser

f1 = tmp_path / "release_v1.2.3.adoc"
f1.write_text("= Release v1.2.3\n\nContent.\n")

parser = AsciidocStructureParser(base_path=tmp_path)
doc = parser.parse_file(f1)

assert doc.sections[0].path == "release_v1.2.3"
111 changes: 111 additions & 0 deletions tests/test_validate_duplicate_warnings_268.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""Tests for duplicate-path warnings in JSON validate output (Issue #268)."""

from dacli.models import Document, Section, SourceLocation
from dacli.services.validation_service import validate_structure
from dacli.structure_index import StructureIndex


class TestDuplicatePathWarningsInValidation:
"""Duplicate-path warnings must appear in validate_structure JSON output."""

def test_duplicate_paths_appear_in_validation_warnings(self, tmp_path):
"""When documents have duplicate section paths, validate reports them."""
doc1 = Document(
file_path=tmp_path / "a.md",
title="A",
sections=[
Section(
title="Introduction",
level=1,
path="intro",
source_location=SourceLocation(file=tmp_path / "a.md", line=1),
)
],
elements=[],
)
doc2 = Document(
file_path=tmp_path / "b.md",
title="B",
sections=[
Section(
title="Introduction",
level=1,
path="intro", # Same path as doc1!
source_location=SourceLocation(file=tmp_path / "b.md", line=1),
)
],
elements=[],
)

index = StructureIndex()
build_warnings = index.build_from_documents([doc1, doc2])
assert len(build_warnings) > 0, "Should have duplicate path warnings"

result = validate_structure(index, tmp_path)
warning_types = [w["type"] for w in result["warnings"]]
assert "duplicate_path" in warning_types, (
f"duplicate_path not in warnings: {result['warnings']}"
)

def test_duplicate_path_warning_includes_details(self, tmp_path):
"""Duplicate path warning includes path, files and line numbers."""
doc1 = Document(
file_path=tmp_path / "a.md",
title="A",
sections=[
Section(
title="Setup",
level=1,
path="setup",
source_location=SourceLocation(file=tmp_path / "a.md", line=5),
)
],
elements=[],
)
doc2 = Document(
file_path=tmp_path / "b.md",
title="B",
sections=[
Section(
title="Setup",
level=1,
path="setup",
source_location=SourceLocation(file=tmp_path / "b.md", line=3),
)
],
elements=[],
)

index = StructureIndex()
index.build_from_documents([doc1, doc2])

result = validate_structure(index, tmp_path)
dup_warnings = [w for w in result["warnings"] if w["type"] == "duplicate_path"]
assert len(dup_warnings) == 1

warning = dup_warnings[0]
assert warning["path"] == "setup"
assert "message" in warning

def test_no_duplicate_warnings_when_paths_unique(self, tmp_path):
"""No duplicate_path warnings when all paths are unique."""
doc = Document(
file_path=tmp_path / "a.md",
title="A",
sections=[
Section(
title="Intro",
level=1,
path="intro",
source_location=SourceLocation(file=tmp_path / "a.md", line=1),
)
],
elements=[],
)

index = StructureIndex()
index.build_from_documents([doc])

result = validate_structure(index, tmp_path)
dup_warnings = [w for w in result["warnings"] if w["type"] == "duplicate_path"]
assert len(dup_warnings) == 0
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading