Skip to content

Commit e602dcd

Browse files
author
wangzaijun
committed
fix
1 parent bbd8769 commit e602dcd

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

lightllm/server/api_cli.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -318,23 +318,28 @@ def make_argument_parser() -> argparse.ArgumentParser:
318318
type=str,
319319
nargs="+",
320320
choices=["None", "triton", "fa3", "flashinfer"],
321-
default=["None"],
322-
help="""prefill attention kernel used in llm""",
321+
default=["triton"],
322+
help="""prefill attention kernel used in llm.
323+
None: automatically select backend based on current GPU device,
324+
not supported yet, will support in future""",
323325
)
324326
parser.add_argument(
325327
"--llm_decode_att_backend",
326328
type=str,
327329
nargs="+",
328330
choices=["None", "triton", "fa3", "flashinfer"],
329-
default=["None"],
330-
help="""decode attention kernel used in llm""",
331+
default=["triton"],
332+
help="""decode attention kernel used in llm.
333+
None: automatically select backend based on current GPU device,
334+
not supported yet, will support in future""",
331335
)
332336
parser.add_argument(
333337
"--llm_kv_type",
334338
type=str,
335-
choices=["None", "int8kv", "int4kv", "fp8kv"],
339+
choices=["None", "int8kv", "int4kv"],
336340
default="None",
337-
help="""kv type used in llm, None for dtype that llm used in config.json""",
341+
help="""kv type used in llm, None for dtype that llm used in config.json.
342+
fp8kv: not fully supported yet, will support in future""",
338343
)
339344
parser.add_argument(
340345
"--llm_kv_quant_group_size",

lightllm/utils/envs_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def get_diverse_max_batch_shared_group_size() -> int:
215215

216216
@lru_cache(maxsize=None)
217217
def enable_diverse_mode_gqa_decode_fast_kernel() -> bool:
218-
return get_env_start_args().diverse_mode and "ppl_int8kv_flashdecoding_diverse" in get_env_start_args().mode
218+
return get_env_start_args().diverse_mode and "int8kv" == get_env_start_args().llm_kv_type
219219

220220

221221
@lru_cache(maxsize=None)

0 commit comments

Comments
 (0)