@@ -13,6 +13,10 @@ import cc.unitmesh.agent.tool.impl.CodebaseInsightsTool
1313import cc.unitmesh.agent.tool.impl.HotFileInfo
1414import cc.unitmesh.agent.tool.schema.DeclarativeToolSchema
1515import cc.unitmesh.agent.tool.schema.SchemaPropertyBuilder.string
16+ import cc.unitmesh.codegraph.model.CodeElementType
17+ import cc.unitmesh.codegraph.model.CodeNode
18+ import cc.unitmesh.codegraph.parser.CodeParser
19+ import cc.unitmesh.codegraph.parser.Language
1620import cc.unitmesh.devins.filesystem.ProjectFileSystem
1721import cc.unitmesh.devins.parser.CodeFence
1822import cc.unitmesh.indexer.DomainDictService
@@ -68,22 +72,25 @@ data class DomainDictCallbacks(
6872)
6973
7074/* *
71- * DomainDictAgent - Simple, DDD-focused domain dictionary generator
75+ * DomainDictAgent - DDD-focused domain dictionary generator
7276 *
73- * Design principles:
74- * 1. Extract REAL data from codebase (class names, patterns)
75- * 2. Filter and clean (remove generic terms, tests)
76- * 3. Use AI ONLY for translation/description (with strict input)
77+ * Design Principles (DDD perspective):
78+ * 1. Extract REAL business entities from code (not technical infrastructure)
79+ * 2. Focus on HOT FILES (frequently changed = core business logic)
80+ * 3. Use TreeSitter to parse class/function names from important files
81+ * 4. Filter out technical suffixes (Controller, Service, Repository, etc.)
82+ * 5. AI only translates business concepts, NOT implementation details
7783 *
7884 * 3-Step Process:
79- * 1. Analyze: Scan codebase for meaningful class/concept names
80- * 2. Generate: Use AI to translate names to Chinese with descriptions
85+ * 1. Analyze: Scan Git history for hot files, use TreeSitter to extract class/function names
86+ * 2. Generate: Use AI with DDD principles to translate business concepts
8187 * 3. Save: Merge with existing dictionary
8288 */
8389class DomainDictAgent (
8490 private val llmService : KoogLLMService ,
8591 private val fileSystem : ProjectFileSystem ,
8692 private val domainDictService : DomainDictService ,
93+ private val codeParser : CodeParser ? = null ,
8794 maxDefaultIterations : Int = 1 ,
8895 private val enableStreaming : Boolean = true
8996) : SubAgent<DomainDictContext, ToolResult.AgentResult>(
@@ -271,28 +278,41 @@ class DomainDictAgent(
271278 return result
272279 }
273280
274- private fun extractMeaningfulNames (
281+ /* *
282+ * Extract meaningful names using TreeSitter parsing on hot files
283+ * Priority: Hot files (frequently changed) contain core business logic
284+ */
285+ private suspend fun extractMeaningfulNames (
275286 insights : CodebaseInsightsResult ,
276287 onProgress : (String ) -> Unit
277288 ): List <String > {
278289 val names = mutableSetOf<String >()
279290
280- // 1. Extract from hot file names (most important)
291+ // 1. Use TreeSitter to parse hot files and extract class/function names
292+ if (codeParser != null ) {
293+ onProgress(" 🌲 Using TreeSitter to parse hot files..." )
294+ val hotFilesWithCode = parseHotFilesWithTreeSitter(insights.hotFiles, onProgress)
295+ names.addAll(hotFilesWithCode)
296+ }
297+
298+ // 2. Fallback: Extract from file names
281299 for (file in insights.hotFiles) {
282300 val fileName = file.path.substringAfterLast(" /" ).substringBeforeLast(" ." )
283- if (isValidDomainName(fileName)) {
284- names.add(fileName)
301+ val domainName = extractDomainFromFileName(fileName)
302+ if (domainName != null && isValidDomainName(domainName)) {
303+ names.add(domainName)
285304 }
286305
287306 // Extract class name if available
288307 file.className?.let { className ->
289- if (isValidDomainName(className)) {
290- names.add(className)
308+ val extracted = extractDomainFromClassName(className)
309+ if (extracted != null && isValidDomainName(extracted)) {
310+ names.add(extracted)
291311 }
292312 }
293313 }
294314
295- // 2 . Extract from domain concepts (filtered)
315+ // 3 . Extract from domain concepts (filtered)
296316 for (concept in insights.domainConcepts) {
297317 if (isValidDomainName(concept.name) && concept.occurrences >= 2 ) {
298318 names.add(concept.name)
@@ -302,14 +322,149 @@ class DomainDictAgent(
302322 return names.toList().sortedBy { it }
303323 }
304324
325+ /* *
326+ * Parse hot files using TreeSitter to extract class and function names
327+ * These are the REAL important concepts in the codebase
328+ */
329+ private suspend fun parseHotFilesWithTreeSitter (
330+ hotFiles : List <HotFileInfo >,
331+ onProgress : (String ) -> Unit
332+ ): Set <String > {
333+ val names = mutableSetOf<String >()
334+ val parser = codeParser ? : return names
335+
336+ // Take top 30 hot files for deep analysis
337+ val topHotFiles = hotFiles.take(30 )
338+ var parsedCount = 0
339+
340+ for (file in topHotFiles) {
341+ val language = detectLanguage(file.path) ? : continue
342+
343+ try {
344+ val content = fileSystem.readFile(file.path) ? : continue
345+ val nodes = parser.parseNodes(content, file.path, language)
346+
347+ // Extract class names and function names
348+ for (node in nodes) {
349+ when (node.type) {
350+ CodeElementType .CLASS , CodeElementType .INTERFACE , CodeElementType .ENUM -> {
351+ val domainName = extractDomainFromClassName(node.name)
352+ if (domainName != null && isValidDomainName(domainName)) {
353+ names.add(domainName)
354+ }
355+ }
356+ CodeElementType .METHOD , CodeElementType .FUNCTION -> {
357+ // Extract domain concepts from method names
358+ val methodDomain = extractDomainFromMethodName(node.name)
359+ if (methodDomain != null && isValidDomainName(methodDomain)) {
360+ names.add(methodDomain)
361+ }
362+ }
363+ else -> {}
364+ }
365+ }
366+ parsedCount++
367+ } catch (e: Exception ) {
368+ // Skip files that fail to parse
369+ }
370+ }
371+
372+ if (parsedCount > 0 ) {
373+ onProgress(" 📦 Parsed $parsedCount hot files, found ${names.size} domain concepts" )
374+ }
375+
376+ return names
377+ }
378+
379+ /* *
380+ * Detect programming language from file extension
381+ */
382+ private fun detectLanguage (filePath : String ): Language ? {
383+ val ext = filePath.substringAfterLast(" ." , " " ).lowercase()
384+ return when (ext) {
385+ " java" -> Language .JAVA
386+ " kt" , " kts" -> Language .KOTLIN
387+ " py" -> Language .PYTHON
388+ " js" , " jsx" -> Language .JAVASCRIPT
389+ " ts" , " tsx" -> Language .TYPESCRIPT
390+ " go" -> Language .GO
391+ " rs" -> Language .RUST
392+ else -> null
393+ }
394+ }
395+
396+ /* *
397+ * Extract domain concept from file name (remove technical suffixes)
398+ * e.g., "DomainDictAgent" -> "DomainDict"
399+ */
400+ private fun extractDomainFromFileName (fileName : String ): String? {
401+ // Remove technical suffixes
402+ val suffixes = listOf (
403+ " Controller" , " Service" , " Repository" , " Dao" , " Mapper" ,
404+ " Impl" , " Helper" , " Utils" , " Util" , " Factory" , " Builder" ,
405+ " Handler" , " Listener" , " Adapter" , " Wrapper" , " Provider" ,
406+ " Agent" , " Tool" , " Config" , " Configuration" , " Settings" ,
407+ " Test" , " Spec" , " Mock" , " Fake" , " Stub"
408+ )
409+
410+ var name = fileName
411+ for (suffix in suffixes) {
412+ if (name.endsWith(suffix) && name.length > suffix.length) {
413+ name = name.removeSuffix(suffix)
414+ break
415+ }
416+ }
417+
418+ return if (name.length >= 3 ) name else null
419+ }
420+
421+ /* *
422+ * Extract domain concept from class name
423+ */
424+ private fun extractDomainFromClassName (className : String ): String? {
425+ return extractDomainFromFileName(className)
426+ }
427+
428+ /* *
429+ * Extract domain concept from method name
430+ * e.g., "createBlogPost" -> "BlogPost"
431+ * e.g., "validatePayment" -> "Payment"
432+ */
433+ private fun extractDomainFromMethodName (methodName : String ): String? {
434+ // Skip common prefixes
435+ val prefixes = listOf (
436+ " get" , " set" , " is" , " has" , " can" , " should" , " will" ,
437+ " create" , " update" , " delete" , " find" , " fetch" , " load" ,
438+ " save" , " add" , " remove" , " build" , " parse" , " validate" ,
439+ " check" , " process" , " handle" , " execute" , " run" , " init" ,
440+ " on" , " to" , " from"
441+ )
442+
443+ var name = methodName
444+ for (prefix in prefixes) {
445+ if (name.startsWith(prefix) && name.length > prefix.length) {
446+ val remainder = name.removePrefix(prefix)
447+ if (remainder.isNotEmpty() && remainder[0 ].isUpperCase()) {
448+ name = remainder
449+ break
450+ }
451+ }
452+ }
453+
454+ return if (name.length >= 4 && name[0 ].isUpperCase()) name else null
455+ }
456+
305457 /* *
306458 * Check if a name is a valid domain concept (not a generic term)
459+ * Using DDD principles to filter out technical infrastructure
307460 */
308461 private fun isValidDomainName (name : String ): Boolean {
309462 if (name.length < 4 ) return false // Skip very short names
310463 if (name.length > 50 ) return false
311464
312- // Skip generic/common terms
465+ val lowerName = name.lowercase()
466+
467+ // Skip generic/common terms (infrastructure, not domain)
313468 val skipTerms = setOf (
314469 // Testing
315470 " test" , " tests" , " spec" , " mock" , " stub" , " fake" ,
@@ -335,25 +490,42 @@ class DomainDictAgent(
335490 " button" , " text" , " label" , " field" , " input" , " output" ,
336491 " editor" , " renderer" , " painter" , " drawer" ,
337492 " exception" , " error" , " warning" , " message" ,
338- " checks" , " diff" , " check"
493+ " checks" , " diff" , " check" , " unknown "
339494 )
340495
341- val lowerName = name.lowercase()
342-
343496 // Exact match skip
344497 if (lowerName in skipTerms) return false
345498
346- // Skip IntelliJ platform concepts
499+ // Skip IntelliJ platform concepts (infrastructure)
347500 val platformTerms = setOf (
348501 " anaction" , " applicationmanager" , " project" , " psifile" , " psielement" ,
349502 " virtualfile" , " document" , " editor" , " intention" , " inspection" ,
350503 " psiclass" , " psimethod" , " psifield" , " psitype" , " psivariable" ,
351504 " language" , " filetype" , " module" , " facet" , " artifact" ,
352505 " toolwindow" , " notification" , " progress" , " indicator" ,
353- " runnable" , " callable" , " future" , " promise" , " deferred"
506+ " runnable" , " callable" , " future" , " promise" , " deferred" ,
507+ // JetBrains specific
508+ " jbcolor" , " jbinsets" , " jbui" , " jbpopup" , " jblist" ,
509+ // Java Swing/AWT
510+ " jcomponent" , " jpanel" , " jbutton" , " jlabel" , " jframe" ,
511+ " swing" , " awt" , " graphics"
354512 )
355513 if (platformTerms.any { lowerName.contains(it) }) return false
356514
515+ // Skip technical suffixes that indicate infrastructure
516+ val technicalSuffixes = setOf (
517+ " controller" , " service" , " repository" , " dao" , " mapper" ,
518+ " dto" , " vo" , " po" , " entity" , " request" , " response" ,
519+ " config" , " configuration" , " settings" , " properties" ,
520+ " handler" , " listener" , " callback" , " adapter" , " wrapper" ,
521+ " factory" , " builder" , " provider" , " manager" , " registry" ,
522+ " helper" , " util" , " utils" , " tool" , " tools" ,
523+ " impl" , " implementation" , " abstract" , " base" , " default" ,
524+ " exception" , " error" , " filter" , " interceptor" ,
525+ " capable" , " aware" , " enabled" , " disabled"
526+ )
527+ if (technicalSuffixes.any { lowerName.endsWith(it) }) return false
528+
357529 // Contains skip (for compound names like "TestHelper")
358530 val containsSkip = setOf (" test" , " spec" , " mock" , " fake" , " stub" , " factory" , " util" )
359531 if (containsSkip.any { lowerName.contains(it) }) return false
@@ -384,27 +556,48 @@ class DomainDictAgent(
384556
385557 val namesList = names.joinToString(" \n " ) { " - $it " }
386558
559+ // DDD-focused prompt, inspired by indexer.vm
387560 val prompt = """
388- 你是一个技术文档翻译专家。请将以下代码中的类名/概念名翻译成简洁的中文术语。
389-
390- ## 要翻译的名称:
561+ 你是一个 DDD(领域驱动设计)专家,负责构建业务导向的中英文词典。请从以下代码名称中提取重要的业务概念。
562+
563+ **提取原则:**
564+
565+ ✅ 应该提取的内容:
566+ - 核心业务实体(如:Blog、Comment、Payment、User 等名词)
567+ - 业务概念和领域模型(如:Member、Points、Order)
568+ - 难以理解的词汇或拼音缩写
569+ - 领域特定术语
570+
571+ ❌ 应该排除的内容:
572+ 1. 技术词汇:Controller、Service、Repository、Mapper、DTO、VO、PO、Entity、Request、Response、Config 等
573+ 2. 实现细节和数据传输对象:包含 "Request"、"Response"、"Dto"、"Entity" 后缀的条目
574+ 3. 技术操作动词:validate、check、convert、deserialize、serialize、encode、decode 等
575+ 4. 方法名中的技术操作:如 "checkIfVipAccount" 应只提取 "VIP Account"
576+ 5. 通用库 API(如 Spring、OkHttp)和通用类名(如 List、Map)
577+
578+ **处理规则:**
579+ 1. 如果提取的条目包含技术后缀(如 "CreateCommentDto"),转换为纯业务概念(如 "Comment")
580+ 2. 如果方法名包含技术操作(如 "checkIfVipAccount"),提取业务含义("VIP Account")
581+ 3. 如果类名包含技术词汇后缀,移除后缀再添加到词典
582+
583+ ## 要分析的名称:
391584$namesList
392-
585+
393586## 输出格式 (JSON):
394- ```json
395- {
396- "entries": [
397- {"chinese": "中文术语 ", "codeTranslation": "ClassName ", "description": "一句话描述功能 "}
398- ]
399- }
400- ```
401-
402- ## 规则 :
403- 1. chinese: 简洁的中文术语( 2-6个字)
404- 2. codeTranslation: 保持原始类名
405- 3. description: 一句话描述(不超过30字)
406- 4. 只翻译有意义的领域概念
407- 5. 跳过无法理解或太通用的名称
587+ ```json
588+ {
589+ "entries": [
590+ {"chinese": "博客 ", "codeTranslation": "Blog ", "description": "博客文章 "}
591+ ]
592+ }
593+ ```
594+
595+ ## 输出规则 :
596+ 1. chinese: 简洁的中文术语( 2-6个字)
597+ 2. codeTranslation: 纯业务概念名(移除技术后缀)
598+ 3. description: 一句话业务描述(不超过20字)
599+ 4. 只输出有意义的业务概念,跳过技术实现细节
600+ 5. 如果无法理解或太通用,直接跳过不输出
408601
409602请直接输出JSON,不要其他解释。
410603 """ .trimIndent()
0 commit comments