Skip to content

Commit dac1fe0

Browse files
committed
support nemor gym config
Signed-off-by: ruit <ruit@nvidia.com>
1 parent 5835ce7 commit dac1fe0

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ policy:
6565
max_total_sequence_length: 32768
6666
precision: "bfloat16"
6767
logprob_chunk_size: 1024
68+
offload_optimizer_for_logprob: false
6869

6970
dtensor_cfg:
7071
_v2: false
@@ -210,6 +211,7 @@ policy:
210211
num_first_layers_in_bf16: 0
211212
expose_http_server: true
212213
skip_tokenizer_init: false
214+
kv_cache_dtype: null
213215
http_server_serving_chat_kwargs:
214216
# This is the tool parser for Qwen 3 4B Instruct. This needs to be changed for other models.
215217
enable_auto_tools: true
@@ -232,8 +234,8 @@ policy:
232234
num_nodes: null # Decides number of nodes to be dedicated to generation
233235

234236
data:
235-
train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/train.jsonl
236-
validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/validation.jsonl
237+
train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/train.jsonl
238+
validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/validation.jsonl
237239
shuffle: true
238240
num_workers: 0
239241

@@ -243,10 +245,10 @@ env:
243245
nemo_gym: # This is passed into NeMo-Gym as the initial_global_config_dict
244246
config_paths:
245247
- responses_api_models/vllm_model/configs/vllm_model_for_training.yaml # Required! And it must be *for_training
246-
- resources_servers/library_judge_math/configs/library_judge_math.yaml
247-
library_judge_math:
248+
- resources_servers/math_with_judge/configs/math_with_judge.yaml
249+
math_with_judge:
248250
resources_servers:
249-
library_judge_math:
251+
math_with_judge:
250252
judge_model_server:
251253
name: policy_model
252254
should_use_judge: false

0 commit comments

Comments
 (0)