Skip to content

Commit f45c131

Browse files
authored
update (#5625)
1 parent 94be5eb commit f45c131

File tree

3 files changed

+195
-49
lines changed

3 files changed

+195
-49
lines changed

benchmarks/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ python -m pip install -r requirements.txt
4646
--shuffle:是否打乱数据集,默认False不打乱
4747
--seed:打乱数据集时的随机种子,默认0
4848
--pd-metrics:开启PD分离metrics指标收集,会添加请求参数collect_metrics=True,默认False
49+
--ip-list:支持多个ip:port,将总请求数以及总并发数均分到每个IP,按整除取余分配。例:0.0.0.0:1211,0.0.0.0:1222,默认为空
4950
```
5051

5152
##### /v1/chat/completions接口压测单条数据调试

benchmarks/backend_request_func.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ class RequestFuncOutput:
7474
tpot: float = 0.0 # avg next-token latencies
7575
prompt_len: int = 0
7676
prompt_tokens: int = 0 # 推理侧返回输入token数
77+
reasoning_tokens: int = 0 # 思考长度
78+
res_ttft: int = 0 # 包含思考首token时延
7779
error: str = ""
7880
metrics: dict = field(default_factory=dict)
7981

@@ -198,11 +200,14 @@ async def async_request_eb_openai_chat_completions(
198200
request_id = "None"
199201

200202
ttft = 0.0
203+
res_ttft = 0.0
201204
st = time.perf_counter()
202205
most_recent_timestamp = st
203206
token_timestamps = []
204207
try:
205-
async with session.post(url=api_url, json=payload, headers=headers) as response:
208+
async with session.post(
209+
url=api_url, json=payload, headers=headers, read_bufsize=10 * 1024 * 1024
210+
) as response:
206211
data = {}
207212
if response.status == 200:
208213
async for chunk_bytes in response.content:
@@ -242,6 +247,14 @@ async def async_request_eb_openai_chat_completions(
242247
else:
243248
output.itl.append(timestamp - most_recent_timestamp)
244249

250+
# response首token
251+
if res_ttft == 0.0:
252+
if content:
253+
res_ttft = choices[0]["arrival_time"]
254+
output.res_ttft = res_ttft
255+
usage = data.get("usage", {})
256+
output.reasoning_tokens = max(usage.get("completion_tokens", 0) - 1, 0)
257+
245258
output.generated_text += content or ""
246259
output.reasoning_content += reason_content or ""
247260
# print(f"####content:{data}")
@@ -262,6 +275,7 @@ async def async_request_eb_openai_chat_completions(
262275

263276
if output.generated_text.strip() == "":
264277
output.success = False
278+
output.reasoning_tokens = output.output_tokens
265279
output.error = "No generated text found!"
266280
else:
267281
output.success = True
@@ -284,7 +298,7 @@ async def async_request_eb_openai_chat_completions(
284298
output.request_id = request_id
285299

286300
# 保存失败请求结果
287-
if not output.success:
301+
if not output.success or output.output_tokens == 0:
288302
with open("error_output.txt", "a") as f:
289303
f.write(str(output) + "\n")
290304
if pbar:

0 commit comments

Comments
 (0)