|
17 | 17 | import os |
18 | 18 | import pprint |
19 | 19 | from itertools import chain, repeat |
20 | | -from typing import Optional |
| 20 | +from typing import Dict, Optional |
21 | 21 |
|
22 | 22 | # Increase the W&B single object size warning threshold. Initially 100_000 (100 KB) -> 10_000_000 (10 MB) |
23 | 23 | import wandb.util |
24 | 24 |
|
25 | 25 | wandb.util.VALUE_BYTES_LIMIT = 10_000_000 |
26 | 26 |
|
27 | 27 | import ray |
| 28 | +from datasets import concatenate_datasets |
28 | 29 | from omegaconf import OmegaConf |
29 | 30 | from wandb import Table |
30 | 31 |
|
|
42 | 43 | setup, |
43 | 44 | ) |
44 | 45 | from nemo_rl.algorithms.utils import get_tokenizer |
45 | | -from nemo_rl.data.datasets import AllTaskProcessedDataset |
46 | | -from nemo_rl.data.interfaces import DatumSpec |
47 | | -from nemo_rl.distributed.ray_actor_environment_registry import ( |
48 | | - get_actor_python_env, |
| 46 | +from nemo_rl.data.datasets import ( |
| 47 | + AllTaskProcessedDataset, |
| 48 | + load_response_dataset, |
| 49 | + update_single_dataset_config, |
49 | 50 | ) |
| 51 | +from nemo_rl.data.interfaces import DatumSpec |
50 | 52 | from nemo_rl.distributed.virtual_cluster import init_ray |
51 | 53 | from nemo_rl.environments.nemo_gym import ( |
52 | | - NemoGym, |
53 | 54 | NemoGymConfig, |
54 | 55 | nemo_gym_example_to_nemo_rl_datum_spec, |
55 | 56 | setup_nemo_gym_config, |
56 | 57 | ) |
| 58 | +from nemo_rl.environments.utils import create_env |
57 | 59 | from nemo_rl.experience.rollouts import run_async_nemo_gym_rollout |
58 | 60 | from nemo_rl.models.generation import configure_generation_config |
59 | 61 | from nemo_rl.utils.config import load_config, parse_hydra_overrides |
@@ -109,6 +111,80 @@ def setup_single_nemo_gym_dataset( |
109 | 111 | ) |
110 | 112 |
|
111 | 113 |
|
| 114 | +def setup_data( |
| 115 | + tokenizer: TokenizerType, |
| 116 | + data_config: Dict, |
| 117 | + env_configs: Dict, |
| 118 | + seed: int, |
| 119 | +) -> tuple[ |
| 120 | + AllTaskProcessedDataset, |
| 121 | + Optional[AllTaskProcessedDataset], |
| 122 | + dict[str, EnvironmentInterface], |
| 123 | + dict[str, EnvironmentInterface], |
| 124 | +]: |
| 125 | + print("\n▶ Setting up data...") |
| 126 | + # setup train dataset |
| 127 | + data_list = [] |
| 128 | + task_data_processors = {} |
| 129 | + |
| 130 | + if isinstance(data_config["train"], dict): |
| 131 | + data_config["train"] = [data_config["train"]] |
| 132 | + for cfg in data_config["train"]: |
| 133 | + update_single_dataset_config(cfg, data_config["default"]) |
| 134 | + data = load_response_dataset(cfg, seed) |
| 135 | + data_list.append(data) |
| 136 | + task_data_processors[data.task_name] = (data.task_spec, data.processor) |
| 137 | + |
| 138 | + merged_data = concatenate_datasets([data.dataset for data in data_list]) |
| 139 | + dataset = AllTaskProcessedDataset( |
| 140 | + merged_data, |
| 141 | + tokenizer, |
| 142 | + None, |
| 143 | + task_data_processors, |
| 144 | + max_seq_length=data_config["max_input_seq_length"], |
| 145 | + ) |
| 146 | + print(f" ✓ Training dataset loaded with {len(dataset)} samples.") |
| 147 | + |
| 148 | + # setup validation dataset |
| 149 | + val_task_data_processors = {} |
| 150 | + val_data_list = [] |
| 151 | + |
| 152 | + for data in data_list: |
| 153 | + if hasattr(data, "val_dataset") and data.val_dataset is not None: |
| 154 | + val_data_list.append(data.val_dataset) |
| 155 | + # bind task_name to task_data_processors |
| 156 | + task_name = data.task_name |
| 157 | + val_task_data_processors[task_name] = task_data_processors[task_name] |
| 158 | + |
| 159 | + if data_config["validation"] is not None: |
| 160 | + if isinstance(data_config["validation"], dict): |
| 161 | + data_config["validation"] = [data_config["validation"]] |
| 162 | + |
| 163 | + for cfg in data_config["validation"]: |
| 164 | + update_single_dataset_config(cfg, data_config["default"]) |
| 165 | + val_data = load_response_dataset(cfg, seed) |
| 166 | + val_data_list.append(val_data.dataset) |
| 167 | + # bind task_name to task_data_processors |
| 168 | + val_task_data_processors[val_data.task_name] = ( |
| 169 | + val_data.task_spec, |
| 170 | + val_data.processor, |
| 171 | + ) |
| 172 | + |
| 173 | + val_dataset = None |
| 174 | + if len(val_data_list) > 0: |
| 175 | + merged_val_data = concatenate_datasets(val_data_list) |
| 176 | + val_dataset = AllTaskProcessedDataset( |
| 177 | + merged_val_data, |
| 178 | + tokenizer, |
| 179 | + None, |
| 180 | + val_task_data_processors, |
| 181 | + max_seq_length=data_config["max_input_seq_length"], |
| 182 | + ) |
| 183 | + print(f" ✓ Validation dataset loaded with {len(val_dataset)} samples.") |
| 184 | + |
| 185 | + return dataset, val_dataset |
| 186 | + |
| 187 | + |
112 | 188 | # These types are directly imported from grpo_train since if something about the architecture changes we want to immediately fail. |
113 | 189 | def collect_trajectories( |
114 | 190 | policy: ColocatablePolicyInterface, |
@@ -202,13 +278,11 @@ def main() -> None: |
202 | 278 | assert _should_use_nemo_gym(config) |
203 | 279 |
|
204 | 280 | print("\n▶ Setting up data...") |
205 | | - train_dataset = setup_single_nemo_gym_dataset( |
206 | | - jsonl_fpath=config["data"]["train_jsonl_fpath"], |
207 | | - tokenizer=tokenizer, |
208 | | - ) |
209 | | - val_dataset = setup_single_nemo_gym_dataset( |
210 | | - jsonl_fpath=config["data"]["validation_jsonl_fpath"], |
| 281 | + train_dataset, val_dataset = setup_data( |
211 | 282 | tokenizer=tokenizer, |
| 283 | + data_config=config["data"], |
| 284 | + env_configs=config["env"], |
| 285 | + seed=config["grpo"]["seed"], |
212 | 286 | ) |
213 | 287 |
|
214 | 288 | # Validation dataset config setup. |
@@ -254,13 +328,7 @@ def main() -> None: |
254 | 328 | base_urls=policy_generation.dp_openai_server_base_urls, |
255 | 329 | initial_global_config_dict=config["env"]["nemo_gym"], |
256 | 330 | ) |
257 | | - nemo_gym = NemoGym.options( |
258 | | - runtime_env={ |
259 | | - "py_executable": get_actor_python_env( |
260 | | - "nemo_rl.environments.nemo_gym.NemoGym" |
261 | | - ), |
262 | | - } |
263 | | - ).remote(nemo_gym_config) |
| 331 | + nemo_gym = create_env(env_name="nemo_gym", env_config=nemo_gym_config) |
264 | 332 | # Blocking wait for NeMo-Gym to spin up |
265 | 333 | ray.get(nemo_gym.health_check.remote()) |
266 | 334 | task_to_env = {"nemo_gym": nemo_gym} |
|
0 commit comments