@@ -297,3 +297,71 @@ def test_non_chat_usage_non_stream(api_url):
297297 assert payload ["max_tokens" ] >= usage ["completion_tokens" ], "completion_tokens大于max_tokens"
298298 assert payload ["metadata" ]["min_tokens" ] <= usage ["completion_tokens" ], "completion_tokens小于min_tokens"
299299 assert usage ["total_tokens" ] == total_tokens , "total_tokens不等于prompt_tokens + completion_tokens"
300+
301+
302+ def test_mtp_accept_ratio (api_url ):
303+ """测试mtp接受率"""
304+ payload = {
305+ "model" : "default" ,
306+ "messages" : [
307+ {
308+ "role" : "user" ,
309+ "content" : "国外项目风险管理研究起步较早,理论体系成熟。早期研究集中于保险与金融领域,后逐步扩展至工程项目、"
310+ "公共管理等多领域。在理论层面,COSO《企业风险管理——整合框架》和ISO31000标准为风险管理提供了系统性"
311+ "指导,强调风险识别、评估、应对与监控的全流程管理。风险识别方法包括故障树分析、事件树分析等;风险评估"
312+ "则广泛应用VaR模型、蒙特卡洛模拟等量化工具。应对策略涵盖规避、转移、减轻和接受等,并衍生出风险共享、"
313+ "升级等复杂策略。此外,组织文化、管理层支持等因素对风险管理有效性影响显著。近年来,随着科技发展,"
314+ "人工智能、大数据等技术被引入风险管理,推动其向智能化、自动化方向发展。请介绍一下国外关于项目风险管理"
315+ "的文献研究综述,300字以内" ,
316+ },
317+ ],
318+ "stream" : True ,
319+ "stream_options" : {"include_usage" : True , "continuous_usage_stats" : True },
320+ "temperature" : 0 ,
321+ "seed" : 23 ,
322+ "top_p" : 0 ,
323+ }
324+
325+ print ("fastdeploy answer is :" )
326+
327+ try :
328+ # TODO: 第一次和第二次存在diff,后面正常,暂时多请求一次
329+ response = send_request (url = api_url , payload = payload )
330+ chunks = get_stream_chunks (response )
331+ response = send_request (url = api_url , payload = payload )
332+ chunks = get_stream_chunks (response )
333+ for idx , chunk in enumerate (chunks ):
334+ print (f"\n chunk[{ idx } ]:\n { json .dumps (chunk , ensure_ascii = False )} " )
335+ result = "" .join ([x ["choices" ][0 ]["delta" ]["content" ] for x in chunks [:- 1 ]])
336+ speculate_metrics = chunks [- 2 ]["choices" ][0 ]["speculate_metrics" ]
337+ except Exception as e :
338+ print (f"解析失败: { e } " )
339+ print ("\n result:\n " , result )
340+
341+ base_path = os .getenv ("MODEL_PATH" )
342+ baseline_path = os .path .join (base_path , "21b_mtp_accept_ratio_baseline.txt" )
343+ with open (baseline_path , "r" , encoding = "utf-8" ) as f :
344+ baseline = f .read ()
345+ baseline_ratio = {
346+ "accepted_tokens" : 131 ,
347+ "rejected_tokens" : 23 ,
348+ "accept_ratio" : 0.4122137404580153 ,
349+ "average_accept_length" : 1.7012987012987013 ,
350+ "accept_ratio_per_head" : [0.7012987012987013 ],
351+ }
352+
353+ response = send_request (url = api_url , payload = payload )
354+ chunks = get_stream_chunks (response )
355+ result_2 = "" .join ([x ["choices" ][0 ]["delta" ]["content" ] for x in chunks [:- 1 ]])
356+ speculate_metrics_2 = chunks [- 2 ]["choices" ][0 ]["speculate_metrics" ]
357+ print ("chunks:" , chunks [- 2 ])
358+ print ("baseline" , speculate_metrics )
359+ print ("speculate_metrics_2" , speculate_metrics_2 )
360+ assert result_2 == baseline , f"与baseline存在diff,result_2: { result } \n baseline: { baseline } "
361+ assert speculate_metrics_2 == baseline_ratio , (
362+ f"speculate_metrics存在diff," f"speculate_metrics_2: { speculate_metrics_2 } \n " f"baseline: { baseline_ratio } "
363+ )
364+ assert speculate_metrics_2 ["accept_ratio" ] > 0 , "accept_ratio异常"
365+ prompt_tokens = chunks [- 1 ]["usage" ]["prompt_tokens" ]
366+ cached_tokens = chunks [- 1 ]["usage" ]["prompt_tokens_details" ]["cached_tokens" ]
367+ assert cached_tokens == prompt_tokens // 64 * 64 , "cached_tokens数量有问题"
0 commit comments