Skip to content

Commit 94be5eb

Browse files
[CI] Add CI case for MTP accept ratio (#5570)
* Implement test for MTP accept ratio Add test for MTP accept ratio with assertions on results and metrics. * Update test_ernie_21b_mtp.py * Refactor test_mtp_accept_ratio for baseline comparison Refactor test_mtp_accept_ratio to compare results against baseline file and metrics. * Fix formatting issues in test_ernie_21b_mtp.py --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
1 parent ac73165 commit 94be5eb

File tree

1 file changed

+68
-0
lines changed

1 file changed

+68
-0
lines changed

tests/e2e/test_ernie_21b_mtp.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,71 @@ def test_non_chat_usage_non_stream(api_url):
297297
assert payload["max_tokens"] >= usage["completion_tokens"], "completion_tokens大于max_tokens"
298298
assert payload["metadata"]["min_tokens"] <= usage["completion_tokens"], "completion_tokens小于min_tokens"
299299
assert usage["total_tokens"] == total_tokens, "total_tokens不等于prompt_tokens + completion_tokens"
300+
301+
302+
def test_mtp_accept_ratio(api_url):
303+
"""测试mtp接受率"""
304+
payload = {
305+
"model": "default",
306+
"messages": [
307+
{
308+
"role": "user",
309+
"content": "国外项目风险管理研究起步较早,理论体系成熟。早期研究集中于保险与金融领域,后逐步扩展至工程项目、"
310+
"公共管理等多领域。在理论层面,COSO《企业风险管理——整合框架》和ISO31000标准为风险管理提供了系统性"
311+
"指导,强调风险识别、评估、应对与监控的全流程管理。风险识别方法包括故障树分析、事件树分析等;风险评估"
312+
"则广泛应用VaR模型、蒙特卡洛模拟等量化工具。应对策略涵盖规避、转移、减轻和接受等,并衍生出风险共享、"
313+
"升级等复杂策略。此外,组织文化、管理层支持等因素对风险管理有效性影响显著。近年来,随着科技发展,"
314+
"人工智能、大数据等技术被引入风险管理,推动其向智能化、自动化方向发展。请介绍一下国外关于项目风险管理"
315+
"的文献研究综述,300字以内",
316+
},
317+
],
318+
"stream": True,
319+
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
320+
"temperature": 0,
321+
"seed": 23,
322+
"top_p": 0,
323+
}
324+
325+
print("fastdeploy answer is :")
326+
327+
try:
328+
# TODO: 第一次和第二次存在diff,后面正常,暂时多请求一次
329+
response = send_request(url=api_url, payload=payload)
330+
chunks = get_stream_chunks(response)
331+
response = send_request(url=api_url, payload=payload)
332+
chunks = get_stream_chunks(response)
333+
for idx, chunk in enumerate(chunks):
334+
print(f"\nchunk[{idx}]:\n{json.dumps(chunk, ensure_ascii=False)}")
335+
result = "".join([x["choices"][0]["delta"]["content"] for x in chunks[:-1]])
336+
speculate_metrics = chunks[-2]["choices"][0]["speculate_metrics"]
337+
except Exception as e:
338+
print(f"解析失败: {e}")
339+
print("\nresult:\n", result)
340+
341+
base_path = os.getenv("MODEL_PATH")
342+
baseline_path = os.path.join(base_path, "21b_mtp_accept_ratio_baseline.txt")
343+
with open(baseline_path, "r", encoding="utf-8") as f:
344+
baseline = f.read()
345+
baseline_ratio = {
346+
"accepted_tokens": 131,
347+
"rejected_tokens": 23,
348+
"accept_ratio": 0.4122137404580153,
349+
"average_accept_length": 1.7012987012987013,
350+
"accept_ratio_per_head": [0.7012987012987013],
351+
}
352+
353+
response = send_request(url=api_url, payload=payload)
354+
chunks = get_stream_chunks(response)
355+
result_2 = "".join([x["choices"][0]["delta"]["content"] for x in chunks[:-1]])
356+
speculate_metrics_2 = chunks[-2]["choices"][0]["speculate_metrics"]
357+
print("chunks:", chunks[-2])
358+
print("baseline", speculate_metrics)
359+
print("speculate_metrics_2", speculate_metrics_2)
360+
assert result_2 == baseline, f"与baseline存在diff,result_2: {result}\n baseline: {baseline}"
361+
assert speculate_metrics_2 == baseline_ratio, (
362+
f"speculate_metrics存在diff," f"speculate_metrics_2: {speculate_metrics_2}\n " f"baseline: {baseline_ratio}"
363+
)
364+
assert speculate_metrics_2["accept_ratio"] > 0, "accept_ratio异常"
365+
prompt_tokens = chunks[-1]["usage"]["prompt_tokens"]
366+
cached_tokens = chunks[-1]["usage"]["prompt_tokens_details"]["cached_tokens"]
367+
assert cached_tokens == prompt_tokens // 64 * 64, "cached_tokens数量有问题"

0 commit comments

Comments
 (0)