Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ Development happens on a fork to keep `upstream/main` stable for `uv tool instal

- Documentation, Issues, Pull-Requests etc. is always written in english
- use responsible-vibe-mcp wherever suitable
- **Use dacli for documentation access:** When reading or modifying the project documentation in `src/docs/`, use `uv run dacli --docs-root src/docs` instead of reading files directly. Use `dacli search` to find relevant sections, `dacli section` to read content, and `dacli update`/`dacli insert` for modifications. This eats our own dog food and validates the tool while working.

## Commands

Expand Down Expand Up @@ -221,6 +222,7 @@ Located in `src/docs/arc42/chapters/09_architecture_decisions.adoc`:
| `validate_structure` | Validate documentation structure |
| `update_section` | Update section content (with optimistic locking) |
| `insert_content` | Insert content before/after sections |
| `ask_documentation_tool` | [experimental] Ask a question about the docs using an LLM |

For detailed tool documentation, see `src/docs/50-user-manual/`.

5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dacli"
version = "0.4.27"
version = "0.4.28"
description = "Documentation Access CLI - Navigate and query large documentation projects"
readme = "README.md"
license = { text = "MIT" }
Expand Down Expand Up @@ -33,6 +33,9 @@ Repository = "https://github.com/docToolchain/dacli"
Issues = "https://github.com/docToolchain/dacli/issues"

[project.optional-dependencies]
llm = [
"anthropic>=0.40.0",
]
dev = [
"pytest>=8.0.0",
"pytest-asyncio>=0.24.0",
Expand Down
2 changes: 1 addition & 1 deletion src/dacli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
"""


__version__ = "0.4.27"
__version__ = "0.4.28"
49 changes: 49 additions & 0 deletions src/dacli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from dacli.markdown_parser import MarkdownStructureParser
from dacli.mcp_app import _build_index
from dacli.services import (
ask_documentation,
compute_hash,
get_project_metadata,
get_section_metadata,
Expand Down Expand Up @@ -133,6 +134,7 @@ def _get_section_append_line(

# Command aliases for shorter typing
COMMAND_ALIASES = {
"a": "ask",
"s": "search",
"sec": "section",
"str": "structure",
Expand All @@ -149,6 +151,7 @@ def _get_section_append_line(
"Read": ["section", "elements"],
"Validate": ["validate"],
"Edit": ["update", "insert"],
"Experimental": ["ask"],
}

# Reverse lookup: command -> alias
Expand Down Expand Up @@ -847,5 +850,51 @@ def ensure_trailing_blank_line(content: str) -> str:
sys.exit(EXIT_WRITE_ERROR)


@cli.command(epilog="""
\b
[experimental] This command uses an LLM to answer questions.
Requires Claude Code CLI or ANTHROPIC_API_KEY.

Examples:
dacli ask "What is this project about?"
dacli ask "How do I install?" --provider anthropic-api
dacli a "What commands are available?" # Using alias
""")
@click.argument("question")
@click.option(
"--provider",
default=None,
help="LLM provider: claude-code or anthropic-api (default: auto-detect)",
)
@click.option(
"--max-sections",
type=int,
default=None,
help="Limit number of sections to check (default: all)",
)
@pass_context
def ask(ctx: CliContext, question: str, provider: str | None, max_sections: int | None):
"""[experimental] Ask a question about the documentation using an LLM."""

def _progress(current: int, total: int, filename: str):
click.echo(f" Checking file {current}/{total}: {filename}...", err=True)
sys.stderr.flush()

result = ask_documentation(
question=question,
index=ctx.index,
file_handler=ctx.file_handler,
provider_name=provider,
max_sections=max_sections,
progress_callback=_progress,
)

if "error" in result:
click.echo(format_output(ctx, result))
sys.exit(EXIT_ERROR)

click.echo(format_output(ctx, result))


if __name__ == "__main__":
cli()
31 changes: 31 additions & 0 deletions src/dacli/mcp_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from dacli.markdown_parser import MarkdownStructureParser
from dacli.models import Document
from dacli.services import (
ask_documentation,
compute_hash,
get_project_metadata,
get_section_metadata,
Expand Down Expand Up @@ -618,6 +619,36 @@ def validate_structure() -> dict:
"""
return service_validate_structure(index, docs_root)

@mcp.tool()
def ask_documentation_tool(
question: str,
provider: str | None = None,
max_sections: int | None = None,
) -> dict:
"""[experimental] Ask a question about the documentation using an LLM.

Searches for relevant documentation sections, builds a context prompt,
and calls an LLM provider to generate an answer. Requires Claude Code CLI
or ANTHROPIC_API_KEY environment variable.

Args:
question: The question to ask about the documentation.
provider: LLM provider to use - 'claude-code' or 'anthropic-api'.
If None, auto-detects (prefers Claude Code CLI).
max_sections: Limit sections to check (default: all sections).

Returns:
Dictionary with 'answer', 'provider', 'model', 'sections_used',
and 'experimental' flag. On error, returns dict with 'error' key.
"""
return ask_documentation(
question=question,
index=index,
file_handler=file_handler,
provider_name=provider,
max_sections=max_sections,
)

return mcp


Expand Down
2 changes: 2 additions & 0 deletions src/dacli/services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
Services accept dependencies (index, file_handler) and return dict results.
"""

from dacli.services.ask_service import ask_documentation
from dacli.services.content_service import compute_hash, update_section
from dacli.services.metadata_service import get_project_metadata, get_section_metadata
from dacli.services.validation_service import validate_structure

__all__ = [
"ask_documentation",
"get_project_metadata",
"get_section_metadata",
"validate_structure",
Expand Down
214 changes: 214 additions & 0 deletions src/dacli/services/ask_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
"""Ask service for the experimental LLM-powered documentation Q&A.

Implements iterative context building as described in Issue #186:
1. Collect all documentation files from the index
2. Iterate through files one by one, passing each file's content + question
+ previous findings to the LLM — the LLM decides relevance
3. Consolidate all findings into a final answer with source references

File-based iteration is more efficient than section-based: a typical project
has ~35 files vs ~460 sections, reducing LLM calls by ~13x while providing
better context (full file content) per call.
"""

from collections.abc import Callable
from pathlib import Path

from dacli.file_handler import FileSystemHandler
from dacli.services.llm_provider import get_provider
from dacli.structure_index import StructureIndex

ITERATION_PROMPT = """\
Question: {question}

Previous findings:
{previous_findings}

Current file: {file_path}
---
{file_content}
---

Task:
1. Does this file contain information relevant to the question?
2. If yes, extract key points.
3. Note what information is still missing to fully answer the question.

Respond concisely:
KEY_POINTS: [bullet list of relevant findings, or "none"]
MISSING: [what's still needed, or "nothing"]"""

CONSOLIDATION_PROMPT = """\
Question: {question}

All findings from documentation:
{accumulated_findings}

Files consulted:
{sources_list}

Task: Provide a final, consolidated answer that:
1. Directly answers the question
2. Synthesizes information from all files
3. Is clear and well-structured

Provide only the answer, no meta-commentary."""


def _get_all_files(index: StructureIndex) -> list[dict]:
"""Get all documentation files from the index.

Returns a list of dicts with 'file' (Path) and 'name' (str) keys,
sorted by file name for deterministic ordering.
"""
files = []
for file_path in sorted(index._file_to_sections.keys()):
files.append({
"file": file_path,
"name": file_path.name,
})
return files


def _read_file_content(
file_path: Path,
file_handler: FileSystemHandler,
) -> str | None:
"""Read the full content of a documentation file.

Returns None if the file cannot be read.
"""
try:
return file_handler.read_file(file_path)
except Exception:
return None


def ask_documentation(
question: str,
index: StructureIndex,
file_handler: FileSystemHandler,
provider_name: str | None = None,
max_sections: int | None = None,
progress_callback: Callable[[int, int, str], None] | None = None,
) -> dict:
"""Answer a question about the documentation using iterative LLM reasoning.

Implements the iterative approach from Issue #186:
1. Collect all documentation files
2. Iterate through each file, letting the LLM decide relevance
and accumulate findings
3. Consolidate all findings into a final answer

No keyword search is used — the LLM handles semantic matching,
so synonyms and natural language questions work correctly.

Args:
question: The user's question.
index: Structure index for searching.
file_handler: File handler for reading content.
provider_name: LLM provider name (None for auto-detect).
max_sections: Limit files to iterate (None = all files).

Returns:
Dict with 'answer', 'provider', 'sources', 'iterations',
'sections_used', and 'experimental' keys.
On error, returns dict with 'error' key.
"""
try:
provider = get_provider(preferred=provider_name)
except RuntimeError as e:
return {"error": str(e)}

# Step 1: Get all documentation files
all_files = _get_all_files(index)

# Optionally limit files (None = all)
if max_sections is not None:
files_to_check = all_files[:max_sections]
else:
files_to_check = all_files

# Step 2: Iterate through files, accumulating findings
accumulated_findings = ""
sources = []
iterations = 0

total_files = len(files_to_check)

for file_info in files_to_check:
content = _read_file_content(file_info["file"], file_handler)
if content is None or not content.strip():
continue

iterations += 1

if progress_callback:
progress_callback(iterations, total_files, file_info["name"])

prompt = ITERATION_PROMPT.format(
question=question,
previous_findings=accumulated_findings or "(none yet)",
file_path=file_info["name"],
file_content=content,
)

try:
response = provider.ask(
"You are analyzing documentation files to answer a question. "
"Extract relevant key points concisely.",
prompt,
)
accumulated_findings += (
f"\n\nFrom '{file_info['name']}':\n"
f"{response.text}"
)
sources.append({
"file": str(file_info["file"]),
"name": file_info["name"],
})
except RuntimeError:
continue

# Step 3: Consolidation
if accumulated_findings:
sources_list = "\n".join(
f"- {s['name']}" for s in sources
)
consolidation_prompt = CONSOLIDATION_PROMPT.format(
question=question,
accumulated_findings=accumulated_findings,
sources_list=sources_list,
)
try:
final_response = provider.ask(
"You are a documentation assistant. Provide a clear, "
"consolidated answer based on the findings. Answer in "
"the same language as the question.",
consolidation_prompt,
)
answer = final_response.text
except RuntimeError as e:
return {"error": f"Consolidation failed: {e}"}
else:
try:
response = provider.ask(
"You are a documentation assistant.",
f"No documentation files were available.\n\n"
f"Question: {question}\n\n"
f"Please let the user know that no documentation "
f"content was found.",
)
answer = response.text
except RuntimeError as e:
return {"error": str(e)}

return {
"answer": answer,
"provider": provider.name,
"model": getattr(provider, "model", None),
"sources": sources,
"iterations": iterations,
"sections_used": len(sources),
"experimental": True,
}
Loading