Skip to content

Commit 6480eea

Browse files
Johann-Peter Hartmannclaude
andcommitted
Update research agent to use pre-indexed symbol data
Replace disk-based tree-sitter tools with database-backed symbol tools: - symbol_outline: Query Symbol table for file structure - symbol_find: Search symbols by name across codebase - symbol_callers: Find callers via SymbolEdge graph - symbol_callees: Find callees via SymbolEdge graph This leverages the pre-indexed symbol data from sync instead of re-parsing files at query time. The agent now uses: - RAG: hybrid_search for semantic code search - Symbol Index: symbol_* tools for code graph navigation - LSP: lsp_* tools for live definition/reference lookups 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 6146c5c commit 6480eea

File tree

1 file changed

+226
-89
lines changed
  • packages/core/contextmine_core/research

1 file changed

+226
-89
lines changed

packages/core/contextmine_core/research/agent.py

Lines changed: 226 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -356,117 +356,256 @@ async def lsp_hover(file_path: str, line: int, column: int = 0) -> str:
356356
return f"LSP hover failed: {e}"
357357

358358
# =========================================================================
359-
# TREE-SITTER TOOLS
359+
# SYMBOL INDEX TOOLS (use pre-indexed data from database)
360360
# =========================================================================
361361

362362
@tool
363-
async def ts_outline(file_path: str) -> str:
364-
"""Get the outline of all functions, classes, and symbols in a file.
363+
async def symbol_outline(file_path: str) -> str:
364+
"""Get the outline of all indexed functions, classes, and symbols in a file.
365365
366-
Use this to quickly understand the structure of a file without reading all code.
367-
Returns a list of symbols with their line numbers.
366+
Uses the pre-indexed symbol database for fast lookup.
367+
Returns a list of symbols with their line numbers and signatures.
368368
"""
369369
try:
370-
from contextmine_core.treesitter import extract_outline
370+
from contextmine_core.database import get_async_session
371+
from contextmine_core.models import Document, Symbol
372+
from sqlalchemy import select
371373

372-
symbols = extract_outline(file_path, include_children=True)
374+
async with get_async_session() as session:
375+
# Find document by URI
376+
doc_stmt = select(Document).where(Document.uri == file_path)
377+
doc_result = await session.execute(doc_stmt)
378+
doc = doc_result.scalar_one_or_none()
373379

374-
if not symbols:
375-
return f"No symbols found in {file_path}"
380+
if not doc:
381+
return f"File not found in index: {file_path}"
376382

377-
outline_lines = []
378-
for sym in symbols:
379-
outline_lines.append(
380-
f"{sym.kind.value} {sym.name} (L{sym.start_line}-{sym.end_line})"
383+
# Get all symbols for this document, ordered by line
384+
sym_stmt = (
385+
select(Symbol).where(Symbol.document_id == doc.id).order_by(Symbol.start_line)
381386
)
382-
for child in sym.children:
387+
sym_result = await session.execute(sym_stmt)
388+
symbols = sym_result.scalars().all()
389+
390+
if not symbols:
391+
return f"No symbols indexed for {file_path}"
392+
393+
outline_lines = []
394+
for sym in symbols:
395+
indent = " " if sym.parent_name else ""
396+
sig = f" - {sym.signature}" if sym.signature else ""
383397
outline_lines.append(
384-
f" {child.kind.value} {child.name} (L{child.start_line}-{child.end_line})"
398+
f"{indent}{sym.kind.value} {sym.name} (L{sym.start_line}-{sym.end_line}){sig}"
385399
)
386400

387-
summary = f"Found {len(symbols)} top-level symbols:\n" + "\n".join(outline_lines[:30])
388-
if len(outline_lines) > 30:
389-
summary += f"\n... and {len(outline_lines) - 30} more"
401+
summary = f"Found {len(symbols)} indexed symbols:\n" + "\n".join(outline_lines[:40])
402+
if len(outline_lines) > 40:
403+
summary += f"\n... and {len(outline_lines) - 40} more"
390404

391-
return summary
405+
return summary
392406

393407
except Exception as e:
394-
logger.warning("ts_outline failed: %s", e)
395-
return f"Tree-sitter outline failed: {e}"
408+
logger.warning("symbol_outline failed: %s", e)
409+
return f"Symbol outline failed: {e}"
396410

397411
@tool
398-
async def ts_find_symbol(file_path: str, name: str) -> str:
399-
"""Find a specific function, class, or method by name in a file.
412+
async def symbol_find(name: str, file_path: str | None = None) -> str:
413+
"""Find a symbol by name in the indexed codebase.
400414
401-
Use this when you know the symbol name but want to see its full implementation.
415+
Uses the pre-indexed symbol database. Optionally filter by file path.
402416
Returns the symbol's source code as evidence.
403417
"""
404418
run = run_holder["run"]
405419
try:
406-
from contextmine_core.treesitter import find_symbol_by_name, get_symbol_content
407-
408-
symbol = find_symbol_by_name(file_path, name)
420+
from contextmine_core.database import get_async_session
421+
from contextmine_core.models import Document, Symbol
422+
from sqlalchemy import select
409423

410-
if not symbol:
411-
return f"Symbol '{name}' not found in {file_path}"
424+
async with get_async_session() as session:
425+
stmt = select(Symbol).join(Document)
412426

413-
content = get_symbol_content(symbol)
427+
if file_path:
428+
stmt = stmt.where(Document.uri == file_path)
414429

415-
evidence = Evidence(
416-
id=f"ev-{run.run_id[:8]}-{len(run.evidence) + 1:03d}",
417-
file_path=file_path,
418-
start_line=symbol.start_line,
419-
end_line=symbol.end_line,
420-
content=content[:2000],
421-
reason=f"Found {symbol.kind.value} '{name}' via Tree-sitter",
422-
provenance="treesitter",
423-
symbol_id=symbol.name,
424-
symbol_kind=symbol.kind.value,
425-
)
426-
run.add_evidence(evidence)
430+
# Search by name (exact match first, then contains)
431+
stmt = stmt.where(Symbol.name == name)
432+
result = await session.execute(stmt)
433+
symbols = result.scalars().all()
434+
435+
if not symbols:
436+
# Try partial match
437+
stmt = select(Symbol).join(Document).where(Symbol.name.ilike(f"%{name}%"))
438+
if file_path:
439+
stmt = stmt.where(Document.uri == file_path)
440+
stmt = stmt.limit(10)
441+
result = await session.execute(stmt)
442+
symbols = result.scalars().all()
443+
444+
if not symbols:
445+
return f"Symbol '{name}' not found in index"
446+
447+
output_parts = []
448+
for sym in symbols[:5]:
449+
# Get document content for the symbol
450+
doc = sym.document
451+
lines = (doc.content or "").split("\n")
452+
start_idx = max(0, sym.start_line - 1)
453+
end_idx = min(len(lines), sym.end_line)
454+
content = "\n".join(lines[start_idx:end_idx])
455+
456+
evidence = Evidence(
457+
id=f"ev-{run.run_id[:8]}-{len(run.evidence) + 1:03d}",
458+
file_path=doc.uri or "unknown",
459+
start_line=sym.start_line,
460+
end_line=sym.end_line,
461+
content=content[:2000],
462+
reason=f"Found indexed {sym.kind.value} '{sym.name}'",
463+
provenance="symbol_index",
464+
symbol_id=sym.qualified_name,
465+
symbol_kind=sym.kind.value,
466+
)
467+
run.add_evidence(evidence)
468+
output_parts.append(
469+
f"[{evidence.id}] {sym.kind.value} '{sym.qualified_name}' at {doc.uri}:{sym.start_line}-{sym.end_line}\n```\n{content[:800]}\n```"
470+
)
427471

428-
return f"[{evidence.id}] {symbol.kind.value} '{name}' at {file_path}:{symbol.start_line}-{symbol.end_line}\n```\n{content[:1000]}\n```"
472+
return f"Found {len(symbols)} symbol(s):\n\n" + "\n\n".join(output_parts)
429473

430474
except Exception as e:
431-
logger.warning("ts_find_symbol failed: %s", e)
432-
return f"Tree-sitter find_symbol failed: {e}"
475+
logger.warning("symbol_find failed: %s", e)
476+
return f"Symbol find failed: {e}"
433477

434478
@tool
435-
async def ts_enclosing_symbol(file_path: str, line: int) -> str:
436-
"""Find what function, class, or method contains a specific line.
479+
async def symbol_callers(name: str, file_path: str | None = None) -> str:
480+
"""Find all functions/methods that call a given symbol.
437481
438-
Use this to understand the context of a code location.
439-
Returns the enclosing symbol's source code as evidence.
482+
Uses the pre-indexed symbol graph (SymbolEdge table).
483+
Returns callers as evidence.
440484
"""
441485
run = run_holder["run"]
442486
try:
443-
from contextmine_core.treesitter import find_enclosing_symbol, get_symbol_content
487+
from contextmine_core.database import get_async_session
488+
from contextmine_core.models import Document, Symbol, SymbolEdge, SymbolEdgeType
489+
from sqlalchemy import select
490+
from sqlalchemy.orm import selectinload
491+
492+
async with get_async_session() as session:
493+
# Find the target symbol
494+
stmt = select(Symbol).join(Document).where(Symbol.name == name)
495+
if file_path:
496+
stmt = stmt.where(Document.uri == file_path)
497+
stmt = stmt.options(selectinload(Symbol.incoming_edges))
498+
result = await session.execute(stmt)
499+
target_symbols = result.scalars().all()
500+
501+
if not target_symbols:
502+
return f"Symbol '{name}' not found in index"
503+
504+
output_parts = []
505+
for target in target_symbols[:3]:
506+
# Get incoming CALLS edges
507+
edges_stmt = (
508+
select(SymbolEdge)
509+
.where(SymbolEdge.target_symbol_id == target.id)
510+
.where(SymbolEdge.edge_type == SymbolEdgeType.CALLS)
511+
.options(
512+
selectinload(SymbolEdge.source_symbol).selectinload(Symbol.document)
513+
)
514+
)
515+
edges_result = await session.execute(edges_stmt)
516+
edges = edges_result.scalars().all()
517+
518+
for edge in edges[:10]:
519+
caller = edge.source_symbol
520+
doc = caller.document
521+
lines = (doc.content or "").split("\n")
522+
start_idx = max(0, caller.start_line - 1)
523+
end_idx = min(len(lines), caller.end_line)
524+
content = "\n".join(lines[start_idx:end_idx])
525+
526+
evidence = Evidence(
527+
id=f"ev-{run.run_id[:8]}-{len(run.evidence) + 1:03d}",
528+
file_path=doc.uri or "unknown",
529+
start_line=caller.start_line,
530+
end_line=caller.end_line,
531+
content=content[:2000],
532+
reason=f"Caller of '{name}' (line {edge.source_line})",
533+
provenance="symbol_graph",
534+
symbol_id=caller.qualified_name,
535+
symbol_kind=caller.kind.value,
536+
)
537+
run.add_evidence(evidence)
538+
output_parts.append(
539+
f"[{evidence.id}] {caller.kind.value} '{caller.qualified_name}' calls '{name}' at line {edge.source_line}\n {doc.uri}:{caller.start_line}"
540+
)
541+
542+
if not output_parts:
543+
return f"No callers found for '{name}'"
544+
545+
return f"Found {len(output_parts)} caller(s):\n" + "\n".join(output_parts)
546+
547+
except Exception as e:
548+
logger.warning("symbol_callers failed: %s", e)
549+
return f"Symbol callers failed: {e}"
550+
551+
@tool
552+
async def symbol_callees(name: str, file_path: str | None = None) -> str:
553+
"""Find all functions/methods that a given symbol calls.
554+
555+
Uses the pre-indexed symbol graph (SymbolEdge table).
556+
Returns callees as a list.
557+
"""
558+
try:
559+
from contextmine_core.database import get_async_session
560+
from contextmine_core.models import Document, Symbol, SymbolEdge, SymbolEdgeType
561+
from sqlalchemy import select
562+
from sqlalchemy.orm import selectinload
444563

445-
symbol = find_enclosing_symbol(file_path, line)
564+
async with get_async_session() as session:
565+
# Find the source symbol
566+
stmt = select(Symbol).join(Document).where(Symbol.name == name)
567+
if file_path:
568+
stmt = stmt.where(Document.uri == file_path)
569+
stmt = stmt.options(selectinload(Symbol.outgoing_edges))
570+
result = await session.execute(stmt)
571+
source_symbols = result.scalars().all()
572+
573+
if not source_symbols:
574+
return f"Symbol '{name}' not found in index"
575+
576+
output_parts = []
577+
for source in source_symbols[:3]:
578+
# Get outgoing CALLS edges
579+
edges_stmt = (
580+
select(SymbolEdge)
581+
.where(SymbolEdge.source_symbol_id == source.id)
582+
.where(SymbolEdge.edge_type == SymbolEdgeType.CALLS)
583+
.options(
584+
selectinload(SymbolEdge.target_symbol).selectinload(Symbol.document)
585+
)
586+
)
587+
edges_result = await session.execute(edges_stmt)
588+
edges = edges_result.scalars().all()
446589

447-
if not symbol:
448-
return f"Line {line} is not inside any symbol in {file_path}"
590+
for edge in edges[:10]:
591+
callee = edge.target_symbol
592+
doc = callee.document
593+
sig = f" - {callee.signature}" if callee.signature else ""
449594

450-
content = get_symbol_content(symbol)
595+
output_parts.append(
596+
f"{callee.kind.value} '{callee.qualified_name}'{sig}\n {doc.uri}:{callee.start_line}"
597+
)
451598

452-
evidence = Evidence(
453-
id=f"ev-{run.run_id[:8]}-{len(run.evidence) + 1:03d}",
454-
file_path=file_path,
455-
start_line=symbol.start_line,
456-
end_line=symbol.end_line,
457-
content=content[:2000],
458-
reason=f"Enclosing {symbol.kind.value} for line {line}",
459-
provenance="treesitter",
460-
symbol_id=symbol.name,
461-
symbol_kind=symbol.kind.value,
462-
)
463-
run.add_evidence(evidence)
599+
if not output_parts:
600+
return f"No callees found for '{name}'"
464601

465-
return f"[{evidence.id}] Line {line} is inside {symbol.kind.value} '{symbol.name}' (L{symbol.start_line}-{symbol.end_line})\n```\n{content[:1000]}\n```"
602+
return f"'{name}' calls {len(output_parts)} function(s):\n" + "\n".join(
603+
output_parts
604+
)
466605

467606
except Exception as e:
468-
logger.warning("ts_enclosing_symbol failed: %s", e)
469-
return f"Tree-sitter enclosing_symbol failed: {e}"
607+
logger.warning("symbol_callees failed: %s", e)
608+
return f"Symbol callees failed: {e}"
470609

471610
# Build tools list
472611
tools = [hybrid_search, open_span, finalize]
@@ -479,13 +618,8 @@ async def ts_enclosing_symbol(file_path: str, line: int) -> str:
479618
except ImportError:
480619
logger.info("LSP tools not available (multilspy not installed)")
481620

482-
# Add Tree-sitter tools (may fail if tree-sitter not available)
483-
try:
484-
from contextmine_core.treesitter import extract_outline # noqa: F401
485-
486-
tools.extend([ts_outline, ts_find_symbol, ts_enclosing_symbol])
487-
except ImportError:
488-
logger.info("Tree-sitter tools not available")
621+
# Symbol index tools always available (use database)
622+
tools.extend([symbol_outline, symbol_find, symbol_callers, symbol_callees])
489623

490624
return tools
491625

@@ -743,28 +877,31 @@ def _build_system_prompt(self, question: str, scope: str | None) -> str:
743877
744878
## Available Tools
745879
746-
### Search & Read
747-
- **hybrid_search** - Search the codebase using BM25 + vector retrieval
748-
- **open_span** - Read specific lines from a file
880+
### RAG Search
881+
- **hybrid_search** - Search the codebase using BM25 + vector retrieval (pre-indexed chunks)
882+
883+
### Symbol Index (pre-indexed via Tree-sitter)
884+
- **symbol_outline** - Get all indexed symbols in a file (functions, classes, methods)
885+
- **symbol_find** - Find a symbol by name across the codebase
886+
- **symbol_callers** - Find all functions that call a given symbol
887+
- **symbol_callees** - Find all functions that a symbol calls
749888
750889
### LSP (Language Server Protocol)
751-
- **lsp_definition** - Jump to where a symbol is defined
752-
- **lsp_references** - Find all usages of a symbol across the codebase
753-
- **lsp_hover** - Get type signature and documentation for a symbol
890+
- **lsp_definition** - Jump to where a symbol is defined (live analysis)
891+
- **lsp_references** - Find all usages of a symbol (live analysis)
892+
- **lsp_hover** - Get type signature and documentation
754893
755-
### Tree-sitter (Code Structure)
756-
- **ts_outline** - Get outline of functions, classes, and symbols in a file
757-
- **ts_find_symbol** - Find a specific symbol by name and get its source code
758-
- **ts_enclosing_symbol** - Find what function/class contains a specific line
894+
### Read
895+
- **open_span** - Read specific lines from a file
759896
760897
### Finalize
761898
- **finalize** - Submit your final answer with citations
762899
763900
## Instructions
764901
765-
1. Start by searching for relevant code using hybrid_search
766-
2. Use LSP tools to navigate definitions and find usages
767-
3. Use Tree-sitter tools to understand file structure
902+
1. Start by searching for relevant code using hybrid_search (RAG)
903+
2. Use symbol_* tools to navigate the pre-indexed code graph
904+
3. Use LSP tools for precise definition/reference lookups
768905
4. Use open_span to examine specific code sections in detail
769906
5. Collect evidence until you can confidently answer the question
770907
6. Call finalize with your answer including citation IDs like [ev-abc-001]

0 commit comments

Comments
 (0)