@@ -356,117 +356,256 @@ async def lsp_hover(file_path: str, line: int, column: int = 0) -> str:
356356 return f"LSP hover failed: { e } "
357357
358358 # =========================================================================
359- # TREE-SITTER TOOLS
359+ # SYMBOL INDEX TOOLS (use pre-indexed data from database)
360360 # =========================================================================
361361
362362 @tool
363- async def ts_outline (file_path : str ) -> str :
364- """Get the outline of all functions, classes, and symbols in a file.
363+ async def symbol_outline (file_path : str ) -> str :
364+ """Get the outline of all indexed functions, classes, and symbols in a file.
365365
366- Use this to quickly understand the structure of a file without reading all code .
367- Returns a list of symbols with their line numbers.
366+ Uses the pre-indexed symbol database for fast lookup .
367+ Returns a list of symbols with their line numbers and signatures .
368368 """
369369 try :
370- from contextmine_core .treesitter import extract_outline
370+ from contextmine_core .database import get_async_session
371+ from contextmine_core .models import Document , Symbol
372+ from sqlalchemy import select
371373
372- symbols = extract_outline (file_path , include_children = True )
374+ async with get_async_session () as session :
375+ # Find document by URI
376+ doc_stmt = select (Document ).where (Document .uri == file_path )
377+ doc_result = await session .execute (doc_stmt )
378+ doc = doc_result .scalar_one_or_none ()
373379
374- if not symbols :
375- return f"No symbols found in { file_path } "
380+ if not doc :
381+ return f"File not found in index: { file_path } "
376382
377- outline_lines = []
378- for sym in symbols :
379- outline_lines .append (
380- f"{ sym .kind .value } { sym .name } (L{ sym .start_line } -{ sym .end_line } )"
383+ # Get all symbols for this document, ordered by line
384+ sym_stmt = (
385+ select (Symbol ).where (Symbol .document_id == doc .id ).order_by (Symbol .start_line )
381386 )
382- for child in sym .children :
387+ sym_result = await session .execute (sym_stmt )
388+ symbols = sym_result .scalars ().all ()
389+
390+ if not symbols :
391+ return f"No symbols indexed for { file_path } "
392+
393+ outline_lines = []
394+ for sym in symbols :
395+ indent = " " if sym .parent_name else ""
396+ sig = f" - { sym .signature } " if sym .signature else ""
383397 outline_lines .append (
384- f" { child .kind .value } { child .name } (L{ child .start_line } -{ child .end_line } )"
398+ f"{ indent } { sym .kind .value } { sym .name } (L{ sym .start_line } -{ sym .end_line } ){ sig } "
385399 )
386400
387- summary = f"Found { len (symbols )} top-level symbols:\n " + "\n " .join (outline_lines [:30 ])
388- if len (outline_lines ) > 30 :
389- summary += f"\n ... and { len (outline_lines ) - 30 } more"
401+ summary = f"Found { len (symbols )} indexed symbols:\n " + "\n " .join (outline_lines [:40 ])
402+ if len (outline_lines ) > 40 :
403+ summary += f"\n ... and { len (outline_lines ) - 40 } more"
390404
391- return summary
405+ return summary
392406
393407 except Exception as e :
394- logger .warning ("ts_outline failed: %s" , e )
395- return f"Tree-sitter outline failed: { e } "
408+ logger .warning ("symbol_outline failed: %s" , e )
409+ return f"Symbol outline failed: { e } "
396410
397411 @tool
398- async def ts_find_symbol ( file_path : str , name : str ) -> str :
399- """Find a specific function, class, or method by name in a file .
412+ async def symbol_find ( name : str , file_path : str | None = None ) -> str :
413+ """Find a symbol by name in the indexed codebase .
400414
401- Use this when you know the symbol name but want to see its full implementation .
415+ Uses the pre-indexed symbol database. Optionally filter by file path .
402416 Returns the symbol's source code as evidence.
403417 """
404418 run = run_holder ["run" ]
405419 try :
406- from contextmine_core .treesitter import find_symbol_by_name , get_symbol_content
407-
408- symbol = find_symbol_by_name ( file_path , name )
420+ from contextmine_core .database import get_async_session
421+ from contextmine_core . models import Document , Symbol
422+ from sqlalchemy import select
409423
410- if not symbol :
411- return f"Symbol ' { name } ' not found in { file_path } "
424+ async with get_async_session () as session :
425+ stmt = select ( Symbol ). join ( Document )
412426
413- content = get_symbol_content (symbol )
427+ if file_path :
428+ stmt = stmt .where (Document .uri == file_path )
414429
415- evidence = Evidence (
416- id = f"ev-{ run .run_id [:8 ]} -{ len (run .evidence ) + 1 :03d} " ,
417- file_path = file_path ,
418- start_line = symbol .start_line ,
419- end_line = symbol .end_line ,
420- content = content [:2000 ],
421- reason = f"Found { symbol .kind .value } '{ name } ' via Tree-sitter" ,
422- provenance = "treesitter" ,
423- symbol_id = symbol .name ,
424- symbol_kind = symbol .kind .value ,
425- )
426- run .add_evidence (evidence )
430+ # Search by name (exact match first, then contains)
431+ stmt = stmt .where (Symbol .name == name )
432+ result = await session .execute (stmt )
433+ symbols = result .scalars ().all ()
434+
435+ if not symbols :
436+ # Try partial match
437+ stmt = select (Symbol ).join (Document ).where (Symbol .name .ilike (f"%{ name } %" ))
438+ if file_path :
439+ stmt = stmt .where (Document .uri == file_path )
440+ stmt = stmt .limit (10 )
441+ result = await session .execute (stmt )
442+ symbols = result .scalars ().all ()
443+
444+ if not symbols :
445+ return f"Symbol '{ name } ' not found in index"
446+
447+ output_parts = []
448+ for sym in symbols [:5 ]:
449+ # Get document content for the symbol
450+ doc = sym .document
451+ lines = (doc .content or "" ).split ("\n " )
452+ start_idx = max (0 , sym .start_line - 1 )
453+ end_idx = min (len (lines ), sym .end_line )
454+ content = "\n " .join (lines [start_idx :end_idx ])
455+
456+ evidence = Evidence (
457+ id = f"ev-{ run .run_id [:8 ]} -{ len (run .evidence ) + 1 :03d} " ,
458+ file_path = doc .uri or "unknown" ,
459+ start_line = sym .start_line ,
460+ end_line = sym .end_line ,
461+ content = content [:2000 ],
462+ reason = f"Found indexed { sym .kind .value } '{ sym .name } '" ,
463+ provenance = "symbol_index" ,
464+ symbol_id = sym .qualified_name ,
465+ symbol_kind = sym .kind .value ,
466+ )
467+ run .add_evidence (evidence )
468+ output_parts .append (
469+ f"[{ evidence .id } ] { sym .kind .value } '{ sym .qualified_name } ' at { doc .uri } :{ sym .start_line } -{ sym .end_line } \n ```\n { content [:800 ]} \n ```"
470+ )
427471
428- return f"[ { evidence . id } ] { symbol . kind . value } ' { name } ' at { file_path } : { symbol . start_line } - { symbol . end_line } \n ``` \n { content [: 1000 ] } \n ```"
472+ return f"Found { len ( symbols ) } symbol(s): \n \n " + " \n \n " . join ( output_parts )
429473
430474 except Exception as e :
431- logger .warning ("ts_find_symbol failed: %s" , e )
432- return f"Tree-sitter find_symbol failed: { e } "
475+ logger .warning ("symbol_find failed: %s" , e )
476+ return f"Symbol find failed: { e } "
433477
434478 @tool
435- async def ts_enclosing_symbol ( file_path : str , line : int ) -> str :
436- """Find what function, class, or method contains a specific line .
479+ async def symbol_callers ( name : str , file_path : str | None = None ) -> str :
480+ """Find all functions/methods that call a given symbol .
437481
438- Use this to understand the context of a code location .
439- Returns the enclosing symbol's source code as evidence.
482+ Uses the pre-indexed symbol graph (SymbolEdge table) .
483+ Returns callers as evidence.
440484 """
441485 run = run_holder ["run" ]
442486 try :
443- from contextmine_core .treesitter import find_enclosing_symbol , get_symbol_content
487+ from contextmine_core .database import get_async_session
488+ from contextmine_core .models import Document , Symbol , SymbolEdge , SymbolEdgeType
489+ from sqlalchemy import select
490+ from sqlalchemy .orm import selectinload
491+
492+ async with get_async_session () as session :
493+ # Find the target symbol
494+ stmt = select (Symbol ).join (Document ).where (Symbol .name == name )
495+ if file_path :
496+ stmt = stmt .where (Document .uri == file_path )
497+ stmt = stmt .options (selectinload (Symbol .incoming_edges ))
498+ result = await session .execute (stmt )
499+ target_symbols = result .scalars ().all ()
500+
501+ if not target_symbols :
502+ return f"Symbol '{ name } ' not found in index"
503+
504+ output_parts = []
505+ for target in target_symbols [:3 ]:
506+ # Get incoming CALLS edges
507+ edges_stmt = (
508+ select (SymbolEdge )
509+ .where (SymbolEdge .target_symbol_id == target .id )
510+ .where (SymbolEdge .edge_type == SymbolEdgeType .CALLS )
511+ .options (
512+ selectinload (SymbolEdge .source_symbol ).selectinload (Symbol .document )
513+ )
514+ )
515+ edges_result = await session .execute (edges_stmt )
516+ edges = edges_result .scalars ().all ()
517+
518+ for edge in edges [:10 ]:
519+ caller = edge .source_symbol
520+ doc = caller .document
521+ lines = (doc .content or "" ).split ("\n " )
522+ start_idx = max (0 , caller .start_line - 1 )
523+ end_idx = min (len (lines ), caller .end_line )
524+ content = "\n " .join (lines [start_idx :end_idx ])
525+
526+ evidence = Evidence (
527+ id = f"ev-{ run .run_id [:8 ]} -{ len (run .evidence ) + 1 :03d} " ,
528+ file_path = doc .uri or "unknown" ,
529+ start_line = caller .start_line ,
530+ end_line = caller .end_line ,
531+ content = content [:2000 ],
532+ reason = f"Caller of '{ name } ' (line { edge .source_line } )" ,
533+ provenance = "symbol_graph" ,
534+ symbol_id = caller .qualified_name ,
535+ symbol_kind = caller .kind .value ,
536+ )
537+ run .add_evidence (evidence )
538+ output_parts .append (
539+ f"[{ evidence .id } ] { caller .kind .value } '{ caller .qualified_name } ' calls '{ name } ' at line { edge .source_line } \n { doc .uri } :{ caller .start_line } "
540+ )
541+
542+ if not output_parts :
543+ return f"No callers found for '{ name } '"
544+
545+ return f"Found { len (output_parts )} caller(s):\n " + "\n " .join (output_parts )
546+
547+ except Exception as e :
548+ logger .warning ("symbol_callers failed: %s" , e )
549+ return f"Symbol callers failed: { e } "
550+
551+ @tool
552+ async def symbol_callees (name : str , file_path : str | None = None ) -> str :
553+ """Find all functions/methods that a given symbol calls.
554+
555+ Uses the pre-indexed symbol graph (SymbolEdge table).
556+ Returns callees as a list.
557+ """
558+ try :
559+ from contextmine_core .database import get_async_session
560+ from contextmine_core .models import Document , Symbol , SymbolEdge , SymbolEdgeType
561+ from sqlalchemy import select
562+ from sqlalchemy .orm import selectinload
444563
445- symbol = find_enclosing_symbol (file_path , line )
564+ async with get_async_session () as session :
565+ # Find the source symbol
566+ stmt = select (Symbol ).join (Document ).where (Symbol .name == name )
567+ if file_path :
568+ stmt = stmt .where (Document .uri == file_path )
569+ stmt = stmt .options (selectinload (Symbol .outgoing_edges ))
570+ result = await session .execute (stmt )
571+ source_symbols = result .scalars ().all ()
572+
573+ if not source_symbols :
574+ return f"Symbol '{ name } ' not found in index"
575+
576+ output_parts = []
577+ for source in source_symbols [:3 ]:
578+ # Get outgoing CALLS edges
579+ edges_stmt = (
580+ select (SymbolEdge )
581+ .where (SymbolEdge .source_symbol_id == source .id )
582+ .where (SymbolEdge .edge_type == SymbolEdgeType .CALLS )
583+ .options (
584+ selectinload (SymbolEdge .target_symbol ).selectinload (Symbol .document )
585+ )
586+ )
587+ edges_result = await session .execute (edges_stmt )
588+ edges = edges_result .scalars ().all ()
446589
447- if not symbol :
448- return f"Line { line } is not inside any symbol in { file_path } "
590+ for edge in edges [:10 ]:
591+ callee = edge .target_symbol
592+ doc = callee .document
593+ sig = f" - { callee .signature } " if callee .signature else ""
449594
450- content = get_symbol_content (symbol )
595+ output_parts .append (
596+ f"{ callee .kind .value } '{ callee .qualified_name } '{ sig } \n { doc .uri } :{ callee .start_line } "
597+ )
451598
452- evidence = Evidence (
453- id = f"ev-{ run .run_id [:8 ]} -{ len (run .evidence ) + 1 :03d} " ,
454- file_path = file_path ,
455- start_line = symbol .start_line ,
456- end_line = symbol .end_line ,
457- content = content [:2000 ],
458- reason = f"Enclosing { symbol .kind .value } for line { line } " ,
459- provenance = "treesitter" ,
460- symbol_id = symbol .name ,
461- symbol_kind = symbol .kind .value ,
462- )
463- run .add_evidence (evidence )
599+ if not output_parts :
600+ return f"No callees found for '{ name } '"
464601
465- return f"[{ evidence .id } ] Line { line } is inside { symbol .kind .value } '{ symbol .name } ' (L{ symbol .start_line } -{ symbol .end_line } )\n ```\n { content [:1000 ]} \n ```"
602+ return f"'{ name } ' calls { len (output_parts )} function(s):\n " + "\n " .join (
603+ output_parts
604+ )
466605
467606 except Exception as e :
468- logger .warning ("ts_enclosing_symbol failed: %s" , e )
469- return f"Tree-sitter enclosing_symbol failed: { e } "
607+ logger .warning ("symbol_callees failed: %s" , e )
608+ return f"Symbol callees failed: { e } "
470609
471610 # Build tools list
472611 tools = [hybrid_search , open_span , finalize ]
@@ -479,13 +618,8 @@ async def ts_enclosing_symbol(file_path: str, line: int) -> str:
479618 except ImportError :
480619 logger .info ("LSP tools not available (multilspy not installed)" )
481620
482- # Add Tree-sitter tools (may fail if tree-sitter not available)
483- try :
484- from contextmine_core .treesitter import extract_outline # noqa: F401
485-
486- tools .extend ([ts_outline , ts_find_symbol , ts_enclosing_symbol ])
487- except ImportError :
488- logger .info ("Tree-sitter tools not available" )
621+ # Symbol index tools always available (use database)
622+ tools .extend ([symbol_outline , symbol_find , symbol_callers , symbol_callees ])
489623
490624 return tools
491625
@@ -743,28 +877,31 @@ def _build_system_prompt(self, question: str, scope: str | None) -> str:
743877
744878## Available Tools
745879
746- ### Search & Read
747- - **hybrid_search** - Search the codebase using BM25 + vector retrieval
748- - **open_span** - Read specific lines from a file
880+ ### RAG Search
881+ - **hybrid_search** - Search the codebase using BM25 + vector retrieval (pre-indexed chunks)
882+
883+ ### Symbol Index (pre-indexed via Tree-sitter)
884+ - **symbol_outline** - Get all indexed symbols in a file (functions, classes, methods)
885+ - **symbol_find** - Find a symbol by name across the codebase
886+ - **symbol_callers** - Find all functions that call a given symbol
887+ - **symbol_callees** - Find all functions that a symbol calls
749888
750889### LSP (Language Server Protocol)
751- - **lsp_definition** - Jump to where a symbol is defined
752- - **lsp_references** - Find all usages of a symbol across the codebase
753- - **lsp_hover** - Get type signature and documentation for a symbol
890+ - **lsp_definition** - Jump to where a symbol is defined (live analysis)
891+ - **lsp_references** - Find all usages of a symbol (live analysis)
892+ - **lsp_hover** - Get type signature and documentation
754893
755- ### Tree-sitter (Code Structure)
756- - **ts_outline** - Get outline of functions, classes, and symbols in a file
757- - **ts_find_symbol** - Find a specific symbol by name and get its source code
758- - **ts_enclosing_symbol** - Find what function/class contains a specific line
894+ ### Read
895+ - **open_span** - Read specific lines from a file
759896
760897### Finalize
761898- **finalize** - Submit your final answer with citations
762899
763900## Instructions
764901
765- 1. Start by searching for relevant code using hybrid_search
766- 2. Use LSP tools to navigate definitions and find usages
767- 3. Use Tree-sitter tools to understand file structure
902+ 1. Start by searching for relevant code using hybrid_search (RAG)
903+ 2. Use symbol_* tools to navigate the pre-indexed code graph
904+ 3. Use LSP tools for precise definition/reference lookups
7689054. Use open_span to examine specific code sections in detail
7699065. Collect evidence until you can confidently answer the question
7709076. Call finalize with your answer including citation IDs like [ev-abc-001]
0 commit comments