Skip to content

Commit 7877ef4

Browse files
committed
Add docker cmdline options to handle long context
Signed-off-by: Neelesh Gokhale <neelesh.gokhale@intel.com>
1 parent 7982652 commit 7877ef4

File tree

4 files changed

+77
-43
lines changed

4 files changed

+77
-43
lines changed

.cd/server/server_output.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ VLLM_DECODE_BS_BUCKET_MIN
1414
VLLM_DECODE_BS_BUCKET_STEP
1515
VLLM_PROMPT_SEQ_BUCKET_MIN
1616
VLLM_PROMPT_SEQ_BUCKET_STEP
17+
VLLM_PROMPT_CTX_BUCKET_STEP
1718
VLLM_DECODE_BLOCK_BUCKET_MIN
1819
VLLM_DECODE_BLOCK_BUCKET_STEP
1920
NUM_HIDDEN_LAYERS
@@ -51,7 +52,6 @@ EST_GRAPH_RESERVE_MEM
5152
VLLM_GRAPH_RESERVED_MEM
5253
KV_CACHE_MEM
5354
MAX_NUM_SEQS
54-
VLLM_PROMPT_SEQ_BUCKET_MAX
5555
VLLM_CONTIGUOUS_PA
5656
VLLM_DEFRAG
5757
ASYNC_SCHEDULING

.cd/server/server_user.env

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ VLLM_DECODE_BS_BUCKET_STEP
55
VLLM_PROMPT_BS_BUCKET_STEP
66
VLLM_PROMPT_BS_BUCKET_MAX
77
VLLM_PROMPT_SEQ_BUCKET_STEP
8+
VLLM_PROMPT_CTX_BUCKET_STEP
89
VLLM_SKIP_WARMUP
910
MAX_MODEL_LEN
1011
MAX_NUM_SEQS

.cd/server/settings_vllm.csv

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
1-
MODEL,TENSOR_PARALLEL_SIZE,MAX_MODEL_LEN,TOTAL_GPU_MEM,UNAVAILABLE_MEM_ABS,MODEL_MEM_FROM_CONFIG,MODEL_DTYPE,QUANT_DTYPE,MODEL_MEM,PROFILER_MEM_OVERHEAD,APPROX_MEM_PER_GRAPH_MB,fsdpa,GPU_FREE_MEM_TARGET,BLOCK_SIZE,VLLM_PROMPT_BS_BUCKET_MIN,VLLM_PROMPT_BS_BUCKET_STEP,VLLM_DECODE_BS_BUCKET_MIN,VLLM_DECODE_BS_BUCKET_STEP,VLLM_PROMPT_SEQ_BUCKET_MIN,VLLM_PROMPT_SEQ_BUCKET_STEP,VLLM_DECODE_BLOCK_BUCKET_MIN,VLLM_DECODE_BLOCK_BUCKET_STEP,VLLM_PROMPT_BS_BUCKET_MAX,NUM_HIDDEN_LAYERS,HIDDEN_SIZE,NUM_KEY_VALUE_HEADS,NUM_ATTENTION_HEADS,CACHE_DTYPE_BYTES,LIMIT_MODEL_LEN,PT_HPU_LAZY_MODE,VLLM_SKIP_WARMUP,VLLM_EXPONENTIAL_BUCKETING,MAX_NUM_BATCHED_TOKENS,ENABLE_PREFIX_CACHING,VLLM_CONTIGUOUS_PA,VLLM_DEFRAG,ASYNC_SCHEDULING,VLLM_WEIGHT_LOAD_FORCE_SYNC
2-
meta-llama/Llama-3.1-8B-Instruct,1,4352,128,2,16060522496,2,2,14.95752716,5.5,10,1,1,128,1,32,1,32,128,256,128,256,1,32,4096,8,32,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
3-
meta-llama/Llama-3.1-70B-Instruct,4,4352,512,2,1.41107E+11,2,2,131.4165192,5.5,20,1,1,128,1,32,1,32,128,256,128,256,1,80,8192,8,64,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
4-
meta-llama/Llama-3.3-70B-Instruct,4,4352,512,2,1.41107E+11,2,2,131.4165192,5.5,20,1,1,128,1,32,1,32,128,256,128,256,1,80,8192,8,64,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
5-
meta-llama/Llama-3.2-1B-Instruct,1,4352,128,2,2471645608,2,2,2.301899351,5.5,5,1,1,128,1,32,1,32,128,256,128,256,1,16,2048,8,32,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
6-
meta-llama/Llama-3.2-3B-Instruct,1,4352,128,2,6425499648,2,2,5.984212875,5.5,10,1,1,128,1,32,1,32,128,256,128,256,1,28,3072,8,24,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
7-
mistralai/Mixtral-8x7B-Instruct-v0.1,2,4352,256,2,93405585408,2,2,86.99073029,5.5,10,1,1,128,1,32,1,32,128,256,128,256,1,32,4096,8,32,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
8-
mistralai/Mixtral-8x22B-Instruct-v0.1,4,4352,512,2,2.8126E+11,2,2,261.9439201,5.5,10,1,1,128,1,32,1,32,128,256,128,256,1,56,6144,8,48,2,65536,1,FALSE,FALSE,2048,false,true,true,1,1
9-
mistralai/Mistral-7B-Instruct-v0.2,1,4352,128,2,14483464192,2,2,13.48877716,5.5,10,1,9,128,1,32,1,32,128,256,128,256,1,32,4096,8,32,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
10-
meta-llama/Llama-3.1-405B-Instruct,8,4352,1024,2,8.11707E+11,2,2,755.9608459,5.5,20,1,1,128,1,32,1,32,128,256,128,256,1,126,16384,8,128,2,131072,1,FALSE,FALSE,2048,false,true,true,1,1
11-
Qwen/Qwen2.5-14B-Instruct,1,4352,128,2,29540067328,2,2,27.51133156,5.5,10,0,12,128,1,32,1,32,128,256,128,256,1,48,5120,8,40,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
12-
deepseek-ai/DeepSeek-R1-Distill-Llama-70B,4,4352,512,2,1.41107E+11,2,2,131.4165192,5.5,20,1,1,128,1,32,1,32,128,256,128,256,1,80,8192,8,64,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
13-
Qwen/Qwen2.5-32B-Instruct,1,4352,128,2,65527752704,2,2,61.02747536,5.5,10,1,16,128,1,32,1,32,128,256,128,256,1,64,5120,8,40,2,32768,1,FALSE,FALSE,2048,false,true,true,1,1
14-
Qwen/Qwen2.5-72B-Instruct,4,4352,512,2,1.45412E+11,2,2,135.4258575,5.5,10,0,3,128,1,32,1,32,128,256,128,256,1,80,8192,8,64,2,32768,1,FALSE,FALSE,2048,false,true,true,1,1
15-
Qwen/Qwen2.5-7B-Instruct,1,4352,128,2,15231233024,2,2,14.18519115,5.5,10,0,3,128,1,32,1,32,128,256,128,256,1,28,3584,4,28,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
16-
Qwen/Qwen2.5-32B-Instruct,1,4352,128,2,65527752704,2,2,61.02747536,5.5,10,0,3,128,1,32,1,32,128,256,128,256,1,64,5120,8,40,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
17-
ibm-granite/granite-8b-code-instruct-4k,1,4096,128,2,21474836480,2,2,20.00000000,5.5,10,0,3,128,1,32,1,32,128,256,128,256,1,36,4096,8,32,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
18-
ibm-granite/granite-20b-code-instruct-8k,1,4352,128,2,40133986304,2,2,37.37,5.5,10,0,3,128,1,32,1,32,128,256,128,256,1,52,6144,1,48,2,65536,1,FALSE,FALSE,2048,false,true,true,1,0
19-
Qwen/Qwen2.5-VL-7B-Instruct,1,8448,128,2,15231233024,2,2,14.18519115,5.5,10,0,3,128,1,32,1,32,128,256,128,256,1,28,3584,4,28,2,32768,1,FALSE,FALSE,2048,false,false,false,1,0
1+
MODEL,TENSOR_PARALLEL_SIZE,MAX_MODEL_LEN,TOTAL_GPU_MEM,UNAVAILABLE_MEM_ABS,MODEL_MEM_FROM_CONFIG,MODEL_DTYPE,QUANT_DTYPE,MODEL_MEM,PROFILER_MEM_OVERHEAD,APPROX_MEM_PER_GRAPH_MB,fsdpa,GPU_FREE_MEM_TARGET,BLOCK_SIZE,VLLM_PROMPT_BS_BUCKET_MIN,VLLM_PROMPT_BS_BUCKET_STEP,VLLM_DECODE_BS_BUCKET_MIN,VLLM_DECODE_BS_BUCKET_STEP,VLLM_PROMPT_SEQ_BUCKET_MIN,VLLM_PROMPT_SEQ_BUCKET_STEP,VLLM_PROMPT_CTX_BUCKET_STEP,VLLM_DECODE_BLOCK_BUCKET_MIN,VLLM_DECODE_BLOCK_BUCKET_STEP,VLLM_PROMPT_BS_BUCKET_MAX,NUM_HIDDEN_LAYERS,HIDDEN_SIZE,NUM_KEY_VALUE_HEADS,NUM_ATTENTION_HEADS,CACHE_DTYPE_BYTES,LIMIT_MODEL_LEN,PT_HPU_LAZY_MODE,VLLM_SKIP_WARMUP,VLLM_EXPONENTIAL_BUCKETING,MAX_NUM_BATCHED_TOKENS,ENABLE_PREFIX_CACHING,VLLM_CONTIGUOUS_PA,VLLM_DEFRAG,ASYNC_SCHEDULING,VLLM_WEIGHT_LOAD_FORCE_SYNC
2+
meta-llama/Llama-3.1-8B-Instruct,1,4352,128,2,16060522496,2,2,14.95752716,0,10,1,9,128,1,32,1,32,128,256,1,128,256,1,32,4096,8,32,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
3+
meta-llama/Llama-3.1-70B-Instruct,4,4352,512,2,1.41107E+11,2,2,131.4165192,0,20,1,5,128,1,32,1,32,128,256,1,128,256,1,80,8192,8,64,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
4+
meta-llama/Llama-3.3-70B-Instruct,4,4352,512,2,1.41107E+11,2,2,131.4165192,0,20,1,5,128,1,32,1,32,128,256,1,128,256,1,80,8192,8,64,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
5+
meta-llama/Llama-3.2-1B-Instruct,1,4352,128,2,2471645608,2,2,2.301899351,5.5,5,1,1,128,1,32,1,32,128,256,1,128,256,1,16,2048,8,32,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
6+
meta-llama/Llama-3.2-3B-Instruct,1,4352,128,2,6425499648,2,2,5.984212875,5.5,10,1,1,128,1,32,1,32,128,256,1,128,256,1,28,3072,8,24,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
7+
mistralai/Mixtral-8x7B-Instruct-v0.1,2,4352,256,2,93405585408,2,2,86.99073029,5.5,10,1,1,128,1,32,1,32,128,256,1,128,256,1,32,4096,8,32,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
8+
mistralai/Mixtral-8x22B-Instruct-v0.1,4,4352,512,2,2.8126E+11,2,2,261.9439201,5.5,10,1,1,128,1,32,1,32,128,256,1,128,256,1,56,6144,8,48,2,65536,1,FALSE,FALSE,2048,false,true,true,1,1
9+
mistralai/Mistral-7B-Instruct-v0.2,1,4352,128,2,14483464192,2,2,13.48877716,5.5,10,1,9,128,1,32,1,32,128,256,1,128,256,1,32,4096,8,32,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
10+
meta-llama/Llama-3.1-405B-Instruct,8,4352,1024,2,8.11707E+11,2,2,755.9608459,5.5,20,1,1,128,1,32,1,32,128,256,1,128,256,1,126,16384,8,128,2,131072,1,FALSE,FALSE,2048,false,true,true,1,1
11+
Qwen/Qwen2.5-14B-Instruct,1,4352,128,2,29540067328,2,2,27.51133156,5.5,10,0,12,128,1,32,1,32,128,256,1,128,256,1,48,5120,8,40,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
12+
deepseek-ai/DeepSeek-R1-Distill-Llama-70B,4,4352,512,2,1.41107E+11,2,2,131.4165192,5.5,20,32,128,1,32,1,1,1,128,256,1,128,256,1,80,8192,8,64,2,131072,1,FALSE,FALSE,2048,false,true,true,1,0
13+
Qwen/Qwen2.5-32B-Instruct,1,4352,128,2,65527752704,2,2,61.02747536,5.5,10,1,16,128,1,32,1,32,128,256,1,128,256,1,64,5120,8,40,2,32768,1,FALSE,FALSE,2048,false,true,true,1,1
14+
Qwen/Qwen2.5-72B-Instruct,4,4352,512,2,1.45412E+11,2,2,135.4258575,5.5,10,0,3,128,1,32,1,32,128,256,1,128,256,1,80,8192,8,64,2,32768,1,FALSE,FALSE,2048,false,true,true,1,1
15+
Qwen/Qwen2.5-7B-Instruct,1,4352,128,2,15231233024,2,2,14.18519115,5.5,10,0,3,128,1,32,1,32,128,256,1,128,256,1,28,3584,4,28,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
16+
ibm-granite/granite-8b-code-instruct-4k,1,4096,128,2,21474836480,2,2,20.00000000,5.5,10,0,3,128,1,32,1,32,128,256,1,128,256,1,36,4096,8,32,2,32768,1,FALSE,FALSE,2048,false,true,true,1,0
17+
ibm-granite/granite-20b-code-instruct-8k,1,4352,128,2,40133986304,2,2,37.37,5.5,10,0,3,128,1,32,1,32,128,256,1,128,256,1,52,6144,1,48,2,65536,1,FALSE,FALSE,2048,false,true,true,1,0
18+
Qwen/Qwen2.5-VL-7B-Instruct,1,8448,128,2,15231233024,2,2,14.18519115,5.5,10,0,3,128,1,32,1,32,128,256,1,128,256,1,28,3584,4,28,2,32768,1,FALSE,FALSE,2048,false,false,false,1,0

.cd/server/vllm_autocalc_rules.py

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -70,22 +70,36 @@ def calc_EST_HPU_BLOCKS(ctx):
7070

7171

7272
def calc_DECODE_BS_RAMP_GRAPHS(ctx):
73-
return 1 + int(math.log(ctx['VLLM_DECODE_BS_BUCKET_STEP'] / ctx['VLLM_DECODE_BS_BUCKET_MIN'], 2))
73+
if ctx['VLLM_EXPONENTIAL_BUCKETING']:
74+
return 1 + math.ceil(math.log(ctx['EST_MAX_NUM_SEQS'], 2))
75+
else:
76+
return 1 + int(math.log(ctx['VLLM_DECODE_BS_BUCKET_STEP'] / ctx['VLLM_DECODE_BS_BUCKET_MIN'], 2))
7477

7578

7679
def calc_DECODE_BS_STEP_GRAPHS(ctx):
77-
return max(
78-
0, int(1 + (ctx['EST_MAX_NUM_SEQS'] - ctx['VLLM_DECODE_BS_BUCKET_STEP']) / ctx['VLLM_DECODE_BS_BUCKET_STEP']))
80+
if ctx['VLLM_EXPONENTIAL_BUCKETING']:
81+
return 0
82+
else:
83+
return max(
84+
0,
85+
int(1 + (ctx['EST_MAX_NUM_SEQS'] - ctx['VLLM_DECODE_BS_BUCKET_STEP']) / ctx['VLLM_DECODE_BS_BUCKET_STEP']))
7986

8087

8188
def calc_DECODE_BLOCK_RAMP_GRAPHS(ctx):
82-
return 1 + int(math.log(ctx['VLLM_DECODE_BLOCK_BUCKET_STEP'] / ctx['VLLM_DECODE_BLOCK_BUCKET_MIN'], 2))
89+
if ctx['VLLM_EXPONENTIAL_BUCKETING']:
90+
return 1 + math.ceil(math.log(ctx['EST_HPU_BLOCKS'], 2))
91+
else:
92+
return 1 + int(math.log(ctx['VLLM_DECODE_BLOCK_BUCKET_STEP'] / ctx['VLLM_DECODE_BLOCK_BUCKET_MIN'], 2))
8393

8494

8595
def calc_DECODE_BLOCK_STEP_GRAPHS(ctx):
86-
return max(
87-
0,
88-
int(1 + (ctx['EST_HPU_BLOCKS'] - ctx['VLLM_DECODE_BLOCK_BUCKET_STEP']) / ctx['VLLM_DECODE_BLOCK_BUCKET_STEP']))
96+
if ctx['VLLM_EXPONENTIAL_BUCKETING']:
97+
return 0
98+
else:
99+
return max(
100+
0,
101+
int(1 +
102+
(ctx['EST_HPU_BLOCKS'] - ctx['VLLM_DECODE_BLOCK_BUCKET_STEP']) / ctx['VLLM_DECODE_BLOCK_BUCKET_STEP']))
89103

90104

91105
def calc_NUM_DECODE_GRAPHS(ctx):
@@ -99,26 +113,38 @@ def calc_NUM_DECODE_GRAPHS(ctx):
99113

100114

101115
def calc_PROMPT_BS_RAMP_GRAPHS(ctx):
102-
return 1 + int(
103-
math.log(
104-
min(ctx['VLLM_PROMPT_BS_BUCKET_MAX'], ctx['VLLM_PROMPT_BS_BUCKET_STEP']) / ctx['VLLM_PROMPT_BS_BUCKET_MIN'],
105-
2))
116+
if ctx['VLLM_EXPONENTIAL_BUCKETING']:
117+
return 1 + math.ceil(math.log(ctx['VLLM_PROMPT_BS_BUCKET_MAX'], 2))
118+
else:
119+
return 1 + int(
120+
math.log(
121+
min(ctx['VLLM_PROMPT_BS_BUCKET_MAX'], ctx['VLLM_PROMPT_BS_BUCKET_STEP']) /
122+
ctx['VLLM_PROMPT_BS_BUCKET_MIN'], 2))
106123

107124

108125
def calc_PROMPT_BS_STEP_GRAPHS(ctx):
109-
return max(
110-
0,
111-
int(1 +
112-
(ctx['VLLM_PROMPT_BS_BUCKET_MAX'] - ctx['VLLM_PROMPT_BS_BUCKET_STEP']) / ctx['VLLM_PROMPT_BS_BUCKET_STEP']))
126+
if ctx['VLLM_EXPONENTIAL_BUCKETING']:
127+
return 0
128+
else:
129+
return max(
130+
0,
131+
int(1 + (ctx['VLLM_PROMPT_BS_BUCKET_MAX'] - ctx['VLLM_PROMPT_BS_BUCKET_STEP']) /
132+
ctx['VLLM_PROMPT_BS_BUCKET_STEP']))
113133

114134

115135
def calc_PROMPT_SEQ_RAMP_GRAPHS(ctx):
116-
return 1 + int(math.log(ctx['VLLM_PROMPT_SEQ_BUCKET_STEP'] / ctx['VLLM_PROMPT_SEQ_BUCKET_MIN'], 2))
136+
if ctx['VLLM_EXPONENTIAL_BUCKETING']:
137+
return 1 + math.ceil(math.log(ctx['MAX_NUM_BATCHED_TOKENS'], 2))
138+
else:
139+
return 1 + int(math.log(ctx['VLLM_PROMPT_SEQ_BUCKET_STEP'] / ctx['VLLM_PROMPT_SEQ_BUCKET_MIN'], 2))
117140

118141

119142
def calc_PROMPT_SEQ_STEP_GRAPHS(ctx):
120-
return int(1 + (min(ctx['MAX_NUM_BATCHED_TOKENS'], ctx['MAX_MODEL_LEN']) - ctx['VLLM_PROMPT_SEQ_BUCKET_STEP']) /
121-
ctx['VLLM_PROMPT_SEQ_BUCKET_STEP'])
143+
if ctx['VLLM_EXPONENTIAL_BUCKETING']:
144+
return 0
145+
else:
146+
return int(1 + (min(ctx['MAX_NUM_BATCHED_TOKENS'], ctx['MAX_MODEL_LEN']) - ctx['VLLM_PROMPT_SEQ_BUCKET_STEP']) /
147+
ctx['VLLM_PROMPT_SEQ_BUCKET_STEP'])
122148

123149

124150
def calc_EST_NUM_PROMPT_GRAPHS(ctx):
@@ -127,19 +153,27 @@ def calc_EST_NUM_PROMPT_GRAPHS(ctx):
127153
graphs_2d = prompt_bs_graphs * prompt_seq_graphs
128154
if prompt_bs_graphs > 1:
129155
graphs_2d = graphs_2d / 2
130-
ctx_block_graphs_max = (ctx['MAX_MODEL_LEN'] - ctx['VLLM_PROMPT_SEQ_BUCKET_MIN']) / ctx['BLOCK_SIZE']
131-
ctx_block_graphs_min = max(1, (ctx['MAX_MODEL_LEN'] - ctx['MAX_NUM_BATCHED_TOKENS']) / ctx['BLOCK_SIZE'])
156+
ctx_blocks_max = max(1, (ctx['MAX_MODEL_LEN'] - ctx['VLLM_PROMPT_SEQ_BUCKET_MIN']) / ctx['BLOCK_SIZE'])
157+
ctx_blocks_min = max(1, (ctx['MAX_MODEL_LEN'] - ctx['MAX_NUM_BATCHED_TOKENS']) / ctx['BLOCK_SIZE'])
158+
if ctx['VLLM_EXPONENTIAL_BUCKETING']:
159+
ctx_block_graphs_max = 2 if ctx_blocks_max == 1 else math.ceil(math.log(ctx_blocks_max, 2))
160+
ctx_block_graphs_min = 2 if ctx_blocks_min == 1 else math.ceil(math.log(ctx_blocks_min, 2))
161+
else:
162+
ctx_block_graphs_max = max(1, ctx_blocks_max / ctx['VLLM_PROMPT_CTX_BUCKET_STEP']) # ctx step
163+
ctx_block_graphs_min = max(1, ctx_blocks_min / ctx['VLLM_PROMPT_CTX_BUCKET_STEP']) # ctx step
132164
graphs_3d = graphs_2d * (ctx_block_graphs_max + ctx_block_graphs_min) / 2
133165
return graphs_3d
134166

135167

136168
def calc_EST_GRAPH_PROMPT_RATIO(ctx):
137-
return math.ceil(ctx['EST_NUM_PROMPT_GRAPHS'] /
138-
(ctx['EST_NUM_PROMPT_GRAPHS'] + ctx['NUM_DECODE_GRAPHS']) * 100) / 100
169+
est_prompt_graph_mem = ctx['EST_NUM_PROMPT_GRAPHS'] * ctx['APPROX_MEM_PER_GRAPH_MB']
170+
est_decode_graph_mem = ctx['NUM_DECODE_GRAPHS'] * ctx['APPROX_MEM_PER_GRAPH_MB']
171+
est_graph_prompt_ratio = est_prompt_graph_mem / (est_prompt_graph_mem + est_decode_graph_mem)
172+
return est_graph_prompt_ratio
139173

140174

141175
def calc_VLLM_GRAPH_PROMPT_RATIO(ctx):
142-
return math.ceil(min(max(ctx['EST_GRAPH_PROMPT_RATIO'], 0.1), 0.9) * 10) / 10
176+
return math.ceil(min(max(ctx['EST_GRAPH_PROMPT_RATIO'], 0.01), 0.99) * 100) / 100
143177

144178

145179
def calc_DECODE_GRAPH_TARGET_GB(ctx):

0 commit comments

Comments
 (0)