Skip to content

Commit d456553

Browse files
authored
feat: add collection language config (#1436)
1 parent 2710a81 commit d456553

File tree

6 files changed

+48
-28
lines changed

6 files changed

+48
-28
lines changed

aperag/api/components/schemas/collection.yaml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,6 @@ knowledgeGraphConfig:
113113
type: object
114114
description: Configuration for knowledge graph generation
115115
properties:
116-
language:
117-
type: string
118-
description: Language for entity extraction and query responses
119-
default: "simplified chinese"
120-
example: "simplified chinese"
121116
entity_types:
122117
type: array
123118
items:
@@ -157,8 +152,17 @@ collectionConfig:
157152
allOf:
158153
- $ref: '#/knowledgeGraphConfig'
159154
default:
160-
language: "simplified chinese"
161155
entity_types: ["organization", "person", "geo", "event", "product", "technology", "date", "category"]
156+
language:
157+
type: string
158+
description: Language for the collection content and processing
159+
default: "zh-CN"
160+
enum:
161+
- "zh-CN"
162+
- "en-US"
163+
- "ja-JP"
164+
- "ko-KR"
165+
example: "zh-CN"
162166
embedding:
163167
$ref: './model.yaml#/modelSpec'
164168
completion:

aperag/graph/lightrag_manager.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,12 @@ async def create_lightrag_instance(collection: Collection) -> LightRAG:
8383
kg_config = config.knowledge_graph_config
8484
language = LightRAGConfig.DEFAULT_LANGUAGE
8585
entity_types = PROMPTS["DEFAULT_ENTITY_TYPES"]
86+
87+
# Use collection-level language if available
88+
if config.language:
89+
language = config.language
90+
8691
if kg_config:
87-
if kg_config.language:
88-
language = kg_config.language
8992
if kg_config.entity_types:
9093
entity_types = kg_config.entity_types
9194

aperag/schema/view_models.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
# generated by datamodel-codegen:
1616
# filename: openapi.merged.yaml
17-
# timestamp: 2026-01-29T03:34:01+00:00
17+
# timestamp: 2026-02-03T09:01:18+00:00
1818

1919
from __future__ import annotations
2020

@@ -71,11 +71,6 @@ class KnowledgeGraphConfig(BaseModel):
7171
Configuration for knowledge graph generation
7272
"""
7373

74-
language: Optional[str] = Field(
75-
'simplified chinese',
76-
description='Language for entity extraction and query responses',
77-
examples=['simplified chinese'],
78-
)
7974
entity_types: Optional[list[str]] = Field(
8075
[
8176
'organization',
@@ -114,7 +109,6 @@ class CollectionConfig(BaseModel):
114109
knowledge_graph_config: Optional[KnowledgeGraphConfig] = Field(
115110
default_factory=lambda: KnowledgeGraphConfig.model_validate(
116111
{
117-
'language': 'simplified chinese',
118112
'entity_types': [
119113
'organization',
120114
'person',
@@ -124,10 +118,15 @@ class CollectionConfig(BaseModel):
124118
'technology',
125119
'date',
126120
'category',
127-
],
121+
]
128122
}
129123
)
130124
)
125+
language: Optional[Literal['zh-CN', 'en-US', 'ja-JP', 'ko-KR']] = Field(
126+
'zh-CN',
127+
description='Language for the collection content and processing',
128+
examples=['zh-CN'],
129+
)
131130
embedding: Optional[ModelSpec] = None
132131
completion: Optional[ModelSpec] = None
133132
path: Optional[str] = Field(None, description='Path for local and ftp sources')

web/src/api/models/collection-config.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ export interface CollectionConfig {
6868
* @memberof CollectionConfig
6969
*/
7070
'knowledge_graph_config'?: KnowledgeGraphConfig;
71+
/**
72+
* Language for the collection content and processing
73+
* @type {string}
74+
* @memberof CollectionConfig
75+
*/
76+
'language'?: CollectionConfigLanguageEnum;
7177
/**
7278
*
7379
* @type {ModelSpec}
@@ -201,3 +207,13 @@ export interface CollectionConfig {
201207
'space_id'?: string;
202208
}
203209

210+
export const CollectionConfigLanguageEnum = {
211+
zh_CN: 'zh-CN',
212+
en_US: 'en-US',
213+
ja_JP: 'ja-JP',
214+
ko_KR: 'ko-KR'
215+
} as const;
216+
217+
export type CollectionConfigLanguageEnum = typeof CollectionConfigLanguageEnum[keyof typeof CollectionConfigLanguageEnum];
218+
219+

web/src/api/models/knowledge-graph-config.ts

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,6 @@
2020
* @interface KnowledgeGraphConfig
2121
*/
2222
export interface KnowledgeGraphConfig {
23-
/**
24-
* Language for entity extraction and query responses
25-
* @type {string}
26-
* @memberof KnowledgeGraphConfig
27-
*/
28-
'language'?: string;
2923
/**
3024
* List of entity types to extract during graph indexing
3125
* @type {Array<string>}

web/src/api/openapi.merged.yaml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4580,11 +4580,6 @@ components:
45804580
type: object
45814581
description: Configuration for knowledge graph generation
45824582
properties:
4583-
language:
4584-
type: string
4585-
description: Language for entity extraction and query responses
4586-
default: simplified chinese
4587-
example: simplified chinese
45884583
entity_types:
45894584
type: array
45904585
items:
@@ -4635,7 +4630,6 @@ components:
46354630
allOf:
46364631
- $ref: '#/components/schemas/knowledgeGraphConfig'
46374632
default:
4638-
language: simplified chinese
46394633
entity_types:
46404634
- organization
46414635
- person
@@ -4645,6 +4639,16 @@ components:
46454639
- technology
46464640
- date
46474641
- category
4642+
language:
4643+
type: string
4644+
description: Language for the collection content and processing
4645+
default: zh-CN
4646+
enum:
4647+
- zh-CN
4648+
- en-US
4649+
- ja-JP
4650+
- ko-KR
4651+
example: zh-CN
46484652
embedding:
46494653
$ref: '#/components/schemas/modelSpec'
46504654
completion:

0 commit comments

Comments
 (0)