@@ -74,6 +74,8 @@ class RequestFuncOutput:
7474 tpot : float = 0.0 # avg next-token latencies
7575 prompt_len : int = 0
7676 prompt_tokens : int = 0 # 推理侧返回输入token数
77+ reasoning_tokens : int = 0 # 思考长度
78+ res_ttft : int = 0 # 包含思考首token时延
7779 error : str = ""
7880 metrics : dict = field (default_factory = dict )
7981
@@ -198,11 +200,14 @@ async def async_request_eb_openai_chat_completions(
198200 request_id = "None"
199201
200202 ttft = 0.0
203+ res_ttft = 0.0
201204 st = time .perf_counter ()
202205 most_recent_timestamp = st
203206 token_timestamps = []
204207 try :
205- async with session .post (url = api_url , json = payload , headers = headers ) as response :
208+ async with session .post (
209+ url = api_url , json = payload , headers = headers , read_bufsize = 10 * 1024 * 1024
210+ ) as response :
206211 data = {}
207212 if response .status == 200 :
208213 async for chunk_bytes in response .content :
@@ -242,6 +247,14 @@ async def async_request_eb_openai_chat_completions(
242247 else :
243248 output .itl .append (timestamp - most_recent_timestamp )
244249
250+ # response首token
251+ if res_ttft == 0.0 :
252+ if content :
253+ res_ttft = choices [0 ]["arrival_time" ]
254+ output .res_ttft = res_ttft
255+ usage = data .get ("usage" , {})
256+ output .reasoning_tokens = max (usage .get ("completion_tokens" , 0 ) - 1 , 0 )
257+
245258 output .generated_text += content or ""
246259 output .reasoning_content += reason_content or ""
247260 # print(f"####content:{data}")
@@ -262,6 +275,7 @@ async def async_request_eb_openai_chat_completions(
262275
263276 if output .generated_text .strip () == "" :
264277 output .success = False
278+ output .reasoning_tokens = output .output_tokens
265279 output .error = "No generated text found!"
266280 else :
267281 output .success = True
@@ -284,7 +298,7 @@ async def async_request_eb_openai_chat_completions(
284298 output .request_id = request_id
285299
286300 # 保存失败请求结果
287- if not output .success :
301+ if not output .success or output . output_tokens == 0 :
288302 with open ("error_output.txt" , "a" ) as f :
289303 f .write (str (output ) + "\n " )
290304 if pbar :
0 commit comments