Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions .claude/skills/dingo-verify/scripts/fact_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,11 @@ def build_config(
"key": api_key,
"model": model,
"api_url": api_url,
"parameters": {
"temperature": 0,
"agent_config": {
"max_concurrent_claims": max_concurrent,
"max_iterations": 50,
"tools": tools_config,
}
"temperature": 0,
"agent_config": {
"max_concurrent_claims": max_concurrent,
"max_iterations": 50,
"tools": tools_config,
}
}
}]
Expand Down
12 changes: 5 additions & 7 deletions clawhub/scripts/fact_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,11 @@ def build_config(
"key": api_key,
"model": model,
"api_url": api_url,
"parameters": {
"temperature": 0,
"agent_config": {
"max_concurrent_claims": max_concurrent,
"max_iterations": 50,
"tools": tools_config,
}
"temperature": 0,
"agent_config": {
"max_concurrent_claims": max_concurrent,
"max_iterations": 50,
"tools": tools_config,
}
}
}]
Expand Down
3 changes: 2 additions & 1 deletion dingo/config/input_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,11 @@ class EmbeddingConfigArgs(BaseModel):


class EvaluatorLLMArgs(BaseModel):
model_config = {"extra": "allow"}

model: Optional[str] = None
key: Optional[str] = None
api_url: Optional[str] = None
parameters: Optional[dict] = None
embedding_config: Optional[EmbeddingConfigArgs] = None


Expand Down
53 changes: 25 additions & 28 deletions dingo/model/llm/agent/agent_article_fact_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,23 +343,21 @@ class ArticleFactChecker(BaseAgent):
"config": {
"key": "your-openai-api-key",
"model": "gpt-4o-mini",
"parameters": {
"agent_config": {
"max_iterations": 10,
"overall_timeout": 900,
"max_concurrent_claims": 5,
"tools": {
"claims_extractor": {
"api_key": "your-openai-api-key",
"max_claims": 50,
"claim_types": ["factual", "institutional", "statistical", "attribution"]
},
"tavily_search": {
"api_key": "your-tavily-api-key",
"max_results": 5
},
"arxiv_search": {"max_results": 5}
}
"agent_config": {
"max_iterations": 10,
"overall_timeout": 900,
"max_concurrent_claims": 5,
"tools": {
"claims_extractor": {
"api_key": "your-openai-api-key",
"max_claims": 50,
"claim_types": ["factual", "institutional", "statistical", "attribution"]
},
"tavily_search": {
"api_key": "your-tavily-api-key",
"max_results": 5
},
"arxiv_search": {"max_results": 5}
}
}
}
Expand Down Expand Up @@ -399,8 +397,8 @@ def _get_output_dir(cls) -> Optional[str]:
Returns:
Output directory path (created if needed), or None if saving is disabled.
"""
params = cls.dynamic_config.parameters or {}
agent_cfg = params.get('agent_config') or {}
extra_params = cls.dynamic_config.model_extra
agent_cfg = extra_params.get('agent_config') or {}

explicit_path = agent_cfg.get('output_path')
if explicit_path:
Expand Down Expand Up @@ -821,9 +819,8 @@ def eval(cls, input_data: Data) -> EvalDetail:
output_dir = cls._get_output_dir()

if cls.dynamic_config:
if cls.dynamic_config.parameters is None:
cls.dynamic_config.parameters = {}
cls.dynamic_config.parameters.setdefault("temperature", 0)
if 'temperature' not in cls.dynamic_config.model_extra:
cls.dynamic_config.temperature = 0

if output_dir and input_data.content:
cls._save_article_content(output_dir, input_data.content)
Expand Down Expand Up @@ -946,8 +943,8 @@ async def _async_extract_claims(cls, input_data: Data) -> List[Dict]:
"""
from dingo.model.llm.agent.tools.claims_extractor import ClaimsExtractor, ClaimsExtractorConfig

params = cls.dynamic_config.parameters or {}
agent_cfg = params.get('agent_config') or {}
extra_params = cls.dynamic_config.model_extra
agent_cfg = extra_params.get('agent_config') or {}
extractor_cfg = agent_cfg.get('tools', {}).get('claims_extractor', {})

config_kwargs: Dict[str, Any] = {
Expand Down Expand Up @@ -1043,8 +1040,8 @@ async def _async_verify_single_claim(
@classmethod
def _get_max_concurrent_claims(cls) -> int:
"""Read max_concurrent_claims from agent_config or use class default."""
params = cls.dynamic_config.parameters or {}
agent_cfg = params.get('agent_config') or {}
extra_params = cls.dynamic_config.model_extra
agent_cfg = extra_params.get('agent_config') or {}
return agent_cfg.get('max_concurrent_claims', cls.max_concurrent_claims)

@classmethod
Expand All @@ -1054,8 +1051,8 @@ def _get_overall_timeout(cls) -> float:
Returns:
Positive timeout in seconds, clamped to [30, 7200].
"""
params = cls.dynamic_config.parameters or {}
agent_cfg = params.get('agent_config') or {}
extra_params = cls.dynamic_config.model_extra
agent_cfg = extra_params.get('agent_config') or {}
raw = agent_cfg.get('overall_timeout', cls.overall_timeout)
try:
timeout = float(raw)
Expand Down
16 changes: 7 additions & 9 deletions dingo/model/llm/agent/agent_fact_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,13 @@ class AgentFactCheck(BaseAgent):
"key": "your-openai-api-key",
"api_url": "https://api.openai.com/v1",
"model": "gpt-4.1-mini-2025-04-14",
"parameters": {
"agent_config": {
"max_iterations": 5,
"tools": {
"tavily_search": {
"api_key": "your-tavily-api-key",
"max_results": 5,
"search_depth": "advanced"
}
"agent_config": {
"max_iterations": 5,
"tools": {
"tavily_search": {
"api_key": "your-tavily-api-key",
"max_results": 5,
"search_depth": "advanced"
}
}
}
Expand Down
16 changes: 7 additions & 9 deletions dingo/model/llm/agent/agent_hallucination.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,13 @@ class AgentHallucination(BaseAgent):
"key": "your-openai-api-key",
"api_url": "https://api.openai.com/v1",
"model": "gpt-4.1-mini-2025-04-14",
"parameters": {
"agent_config": {
"max_iterations": 3,
"tools": {
"tavily_search": {
"api_key": "your-tavily-api-key",
"max_results": 5,
"search_depth": "advanced"
}
"agent_config": {
"max_iterations": 3,
"tools": {
"tavily_search": {
"api_key": "your-tavily-api-key",
"max_results": 5,
"search_depth": "advanced"
}
}
}
Expand Down
12 changes: 6 additions & 6 deletions dingo/model/llm/agent/agent_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,22 +327,22 @@ def get_openai_llm_from_dingo_config(dynamic_config):
)

# Extract parameters
params = dynamic_config.parameters or {}
extra_params = dynamic_config.model_extra

# Create ChatOpenAI instance
llm = ChatOpenAI(
api_key=dynamic_config.key,
base_url=dynamic_config.api_url,
model=dynamic_config.model or "gpt-4.1-mini",
temperature=params.get("temperature", 0.3),
max_tokens=params.get("max_tokens", 4096),
top_p=params.get("top_p", 1.0),
timeout=params.get("timeout", 30)
temperature=extra_params.get("temperature", 0.3),
max_tokens=extra_params.get("max_tokens", 4096),
top_p=extra_params.get("top_p", 1.0),
timeout=extra_params.get("timeout", 30)
)

log.debug(
f"Created ChatOpenAI: model={dynamic_config.model}, "
f"temp={params.get('temperature', 0.3)}"
f"temp={extra_params.get('temperature', 0.3)}"
)

return llm
10 changes: 5 additions & 5 deletions dingo/model/llm/agent/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,16 +146,16 @@ def get_tool_config(cls, tool_name: str) -> Dict[str, Any]:
Extract tool configuration from agent's dynamic_config.

Configuration is expected in:
dynamic_config.parameters.agent_config.tools.{tool_name}
dynamic_config.agent_config.tools.{tool_name}

Args:
tool_name: Name of the tool

Returns:
Dict of configuration values for the tool
"""
params = cls.dynamic_config.parameters or {}
agent_config = params.get('agent_config', {})
extra_params = cls.dynamic_config.model_extra
agent_config = extra_params.get('agent_config', {})
tools_config = agent_config.get('tools', {})
return tools_config.get(tool_name, {})

Expand Down Expand Up @@ -184,8 +184,8 @@ def get_max_iterations(cls) -> int:
Returns:
Maximum number of iterations allowed
"""
params = cls.dynamic_config.parameters or {}
agent_config = params.get('agent_config', {})
extra_params = cls.dynamic_config.model_extra
agent_config = extra_params.get('agent_config', {})
return agent_config.get('max_iterations', cls.max_iterations)

@classmethod
Expand Down
12 changes: 4 additions & 8 deletions dingo/model/llm/base_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,22 +82,18 @@ def send_messages(cls, messages: List):
else:
model_name = cls.client.models.list().data[0].id

params = cls.dynamic_config.parameters
cls.validate_config(params)
extra_params = cls.dynamic_config.model_extra
cls.validate_config(extra_params)

completions = cls.client.chat.completions.create(
model=model_name,
messages=messages,
temperature=params.get("temperature", 0.3) if params else 0.3,
top_p=params.get("top_p", 1) if params else 1,
max_tokens=params.get("max_tokens", 4000) if params else 4000,
presence_penalty=params.get("presence_penalty", 0) if params else 0,
frequency_penalty=params.get("frequency_penalty", 0) if params else 0,
**extra_params,
)
Comment on lines +85 to 92
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Using **extra_params directly in chat.completions.create is problematic for two reasons:

  1. TypeError Risk: model_extra contains all additional fields from the configuration, including evaluator-specific settings like threshold, agent_config, min_difficulty, etc. Passing these unknown keyword arguments to the OpenAI client will cause a TypeError and crash the evaluation. This is especially critical for evaluators like LLMInstructionClarity or LLMTaskDifficulty that store their own parameters in the same config object.
  2. Loss of Defaults: This change removes the previous default values for temperature (0.3), top_p (1), and max_tokens (4000). If these are not explicitly provided in the configuration, the OpenAI client will use its own defaults (e.g., temperature=1.0), which may lead to inconsistent or lower-quality results compared to previous versions.

It is safer to explicitly extract the supported LLM parameters with their defaults, ensuring compatibility and maintaining consistent behavior, similar to the implementation in AgentWrapper.get_openai_llm_from_dingo_config.

        extra_params = cls.dynamic_config.model_extra or {}
        cls.validate_config(extra_params)

        completions = cls.client.chat.completions.create(
            model=model_name,
            messages=messages,
            temperature=extra_params.get("temperature", 0.3),
            top_p=extra_params.get("top_p", 1),
            max_tokens=extra_params.get("max_tokens", 4000),
            presence_penalty=extra_params.get("presence_penalty", 0),
            frequency_penalty=extra_params.get("frequency_penalty", 0),
        )


if completions.choices[0].finish_reason == "length":
raise ExceedMaxTokens(
f"Exceed max tokens: {params.get('max_tokens', 4000) if params else 4000}"
f"Exceed max tokens: {extra_params.get('max_tokens', 4000)}"
)

return str(completions.choices[0].message.content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,8 @@ def process_response(cls, response: str) -> EvalDetail:

# 判断是否通过(默认阈值 6.0)
threshold = 6.0
if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
threshold = cls.dynamic_config.parameters.get('threshold', 6.0)
if hasattr(cls, 'dynamic_config'):
threshold = cls.dynamic_config.model_extra.get('threshold', 6.0)

if score >= threshold:
result.status = False
Expand Down
8 changes: 4 additions & 4 deletions dingo/model/llm/instruction_quality/llm_task_difficulty.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,14 +321,14 @@ def process_response(cls, response: str) -> EvalDetail:

# 难度评估没有"通过/不通过"的概念,只是描述性的
# 但为了兼容框架,我们设置一个合理的默认行为
# 可以通过 parameters 配置 min_difficulty 和 max_difficulty
# 可以通过 config 中的 min_difficulty 和 max_difficulty 配置难度范围
result.status = False # 默认不标记为问题
result.label = [f"TASK_DIFFICULTY.{difficulty_level.upper()}"]

# 如果配置了难度范围要求,进行检查
if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
min_difficulty = cls.dynamic_config.parameters.get('min_difficulty', 0)
max_difficulty = cls.dynamic_config.parameters.get('max_difficulty', 10)
if hasattr(cls, 'dynamic_config'):
min_difficulty = cls.dynamic_config.model_extra.get('min_difficulty', 0)
max_difficulty = cls.dynamic_config.model_extra.get('max_difficulty', 10)

if difficulty_score < min_difficulty:
result.status = True
Expand Down
17 changes: 5 additions & 12 deletions dingo/model/llm/rag/llm_rag_answer_relevancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,14 +242,8 @@ def eval(cls, input_data: Data) -> EvalDetail:

try:
# 增加温度参数以提高问题生成的随机性
if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
if 'temperature' not in cls.dynamic_config.parameters:
cls.dynamic_config.parameters['temperature'] = 0.7
else:
# 如果没有parameters,创建一个包含temperature的parameters
current_params = cls.dynamic_config.parameters or {}
current_params['temperature'] = 0.7
cls.dynamic_config.parameters = current_params
if hasattr(cls, 'dynamic_config') and 'temperature' not in cls.dynamic_config.model_extra:
cls.dynamic_config.temperature = 0.7

# 生成多个相关问题
generated_questions = cls.generate_multiple_questions(input_data, cls.strictness)
Expand All @@ -263,10 +257,9 @@ def eval(cls, input_data: Data) -> EvalDetail:

# 根据分数判断是否通过,默认阈值为5
threshold = 5
if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
threshold = cls.dynamic_config.parameters.get('threshold', 5)
# 检查是否有自定义的strictness参数
cls.strictness = cls.dynamic_config.parameters.get('strictness', 3)
if hasattr(cls, 'dynamic_config'):
threshold = cls.dynamic_config.model_extra.get('threshold', 5)
cls.strictness = cls.dynamic_config.model_extra.get('strictness', 3)

# 构建详细的reason文本
all_reasons = []
Expand Down
4 changes: 2 additions & 2 deletions dingo/model/llm/rag/llm_rag_context_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ def process_response(cls, responses: List[str]) -> EvalDetail:

# 根据分数判断是否通过,默认阈值为5
threshold = 5
if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
threshold = cls.dynamic_config.parameters.get('threshold', 5)
if hasattr(cls, 'dynamic_config'):
threshold = cls.dynamic_config.model_extra.get('threshold', 5)

if score >= threshold:
result.status = False
Expand Down
4 changes: 2 additions & 2 deletions dingo/model/llm/rag/llm_rag_context_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,8 @@ def process_response(cls, response: str) -> EvalDetail:

# 根据分数判断是否通过,默认阈值为5
threshold = 5
if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
threshold = cls.dynamic_config.parameters.get('threshold', 5)
if hasattr(cls, 'dynamic_config'):
threshold = cls.dynamic_config.model_extra.get('threshold', 5)

if score >= threshold:
result.status = False
Expand Down
4 changes: 2 additions & 2 deletions dingo/model/llm/rag/llm_rag_context_relevancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ def process_response(cls, response: str) -> EvalDetail:

# 根据分数判断是否通过,默认阈值为5
threshold = 5
if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
threshold = cls.dynamic_config.parameters.get('threshold', 5)
if hasattr(cls, 'dynamic_config'):
threshold = cls.dynamic_config.model_extra.get('threshold', 5)

if score >= threshold:
result.status = False
Expand Down
4 changes: 2 additions & 2 deletions dingo/model/llm/rag/llm_rag_faithfulness.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,8 +290,8 @@ def process_response(cls, response: str) -> EvalDetail:

# 根据分数判断是否通过,默认阈值为5
threshold = 5
if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
threshold = cls.dynamic_config.parameters.get('threshold', 5)
if hasattr(cls, 'dynamic_config'):
threshold = cls.dynamic_config.model_extra.get('threshold', 5)

if score >= threshold:
result.status = False
Expand Down
4 changes: 2 additions & 2 deletions dingo/model/llm/vlm_layout_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ def send_messages(cls, messages: List):
else:
model_name = cls.client.models.list().data[0].id

params = cls.dynamic_config.parameters
cls.validate_config(params)
extra_params = cls.dynamic_config.model_extra
cls.validate_config(extra_params)

completions = cls.client.chat.completions.create(
model=model_name,
Expand Down
Loading
Loading