NVIDIA-NeMo
diff --git a/‎resources_servers/tau2_bench/README.md‎
Lines changed: 3 additions & 0 deletions b/‎resources_servers/tau2_bench/README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎resources_servers/tau2_bench/app.py‎
Lines changed: 47 additions & 0 deletions b/‎resources_servers/tau2_bench/app.py‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎resources_servers/tau2_bench/configs/tau2_bench.yaml‎
Lines changed: 20 additions & 0 deletions b/‎resources_servers/tau2_bench/configs/tau2_bench.yaml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎resources_servers/tau2_bench/data/.gitignore‎
Lines changed: 9 additions & 0 deletions b/‎resources_servers/tau2_bench/data/.gitignore‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎resources_servers/tau2_bench/data/example_retail_demo.jsonl‎
Lines changed: 8 additions & 0 deletions b/‎resources_servers/tau2_bench/data/example_retail_demo.jsonl‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎resources_servers/tau2_bench/requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎resources_servers/tau2_bench/requirements.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎resources_servers/tau2_bench/tests/test_app.py‎
Lines changed: 32 additions & 0 deletions b/‎resources_servers/tau2_bench/tests/test_app.py‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎responses_api_agents/tau2_agent/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎responses_api_agents/tau2_agent/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎responses_api_agents/tau2_agent/README.md‎
Lines changed: 60 additions & 0 deletions b/‎responses_api_agents/tau2_agent/README.md‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎responses_api_agents/tau2_agent/__init__.py‎ b/‎responses_api_agents/tau2_agent/__init__.py‎
@@ -0,0 +1,3 @@
+# Description
+
+Please note that this is a dummy resource environment, as the tau^2 agent needs to be run under a different response_api_agents, and the implementations and instructions can be found at https://github.com/NVIDIA-NeMo/Gym/tree/main/responses_api_agents/tau2_agent.
@@ -0,0 +1,47 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pydantic import BaseModel
+
+from fastapi import FastAPI
+
+from nemo_gym.base_resources_server import (
+    SimpleResourcesServer,
+    BaseResourcesServerConfig,
+    BaseVerifyRequest,
+    BaseVerifyResponse,
+)
+
+
+class Tau2BenchResourcesServerConfig(BaseResourcesServerConfig):
+    pass
+
+
+class Tau2BenchResourcesServer(SimpleResourcesServer):
+    config: Tau2BenchResourcesServerConfig
+
+    def setup_webserver(self) -> FastAPI:
+        app = super().setup_webserver()
+
+        # Additional server routes go here! e.g.:
+        # app.post("/get_weather")(self.get_weather)
+
+        return app
+
+    async def verify(self, body: BaseVerifyRequest) -> BaseVerifyResponse:
+        return BaseVerifyResponse(**body.model_dump(), reward=1.0)
+
+
+if __name__ == "__main__":
+    Tau2BenchResourcesServer.run_webserver()
@@ -0,0 +1,20 @@
+tau2_bench_resources_server:
+  resources_servers:
+    tau2_bench:
+      entrypoint: app.py
+      domain: agent
+tau2_agent:
+  responses_api_agents:
+    tau2_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: tau2_bench_resources_server
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      # user_model_server:
+      #   type: responses_api_models
+      #   name: policy_model
+      concurrency: 16
+      tau2_domain: airline
@@ -0,0 +1,9 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
+tau2/
+simulations/
+*rollouts*.jsonl
+*rollouts*.json
@@ -0,0 +1,8 @@
+{"id": 0, "task_domain": "retail", "task_id": 0, "responses_create_params": {"input": []}, "agent_ref": {"type": "responses_api_agents", "name": "tau2_agent"}}
+{"id": 1, "task_domain": "retail", "task_id": 1, "responses_create_params": {"input": []}, "agent_ref": {"type": "responses_api_agents", "name": "tau2_agent"}}
+{"id": 2, "task_domain": "retail", "task_id": 2, "responses_create_params": {"input": []}, "agent_ref": {"type": "responses_api_agents", "name": "tau2_agent"}}
+{"id": 3, "task_domain": "retail", "task_id": 3, "responses_create_params": {"input": []}, "agent_ref": {"type": "responses_api_agents", "name": "tau2_agent"}}
+{"id": 4, "task_domain": "retail", "task_id": 4, "responses_create_params": {"input": []}, "agent_ref": {"type": "responses_api_agents", "name": "tau2_agent"}}
+{"id": 5, "task_domain": "retail", "task_id": 5, "responses_create_params": {"input": []}, "agent_ref": {"type": "responses_api_agents", "name": "tau2_agent"}}
+{"id": 6, "task_domain": "retail", "task_id": 6, "responses_create_params": {"input": []}, "agent_ref": {"type": "responses_api_agents", "name": "tau2_agent"}}
+{"id": 7, "task_domain": "retail", "task_id": 7, "responses_create_params": {"input": []}, "agent_ref": {"type": "responses_api_agents", "name": "tau2_agent"}}
@@ -0,0 +1 @@
+-e nemo-gym[dev] @ ../../
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from unittest.mock import MagicMock
+
+from nemo_gym.server_utils import ServerClient
+from resources_servers.tau2_bench.app import (
+    Tau2BenchResourcesServer,
+    Tau2BenchResourcesServerConfig,
+)
+
+
+class TestApp:
+    def test_sanity(self) -> None:
+        config = Tau2BenchResourcesServerConfig(
+            name="tau2_bench_agent",
+            host="0.0.0.0",
+            port=8080,
+            entrypoint="",
+        )
+        Tau2BenchResourcesServer(config=config, server_client=MagicMock(spec=ServerClient))
@@ -0,0 +1,2 @@
+results/
+data/
@@ -0,0 +1,60 @@
+Tau2 agent — how to run experiments
+=================================
+
+This document shows the minimal steps to run tau2 experiments locally.
+
+*Steps*
+1) Configure your API Key
+```bash
+echo "policy_base_url: https://api.openai.com/v1
+policy_api_key: your-openai-api-key
+policy_model_name: gpt-4.1-2025-04-14" > env.yaml
+```
+
+2) Setup Tau^2 data
+
+- Download the `tau2` folder (https://github.com/sierra-research/tau2-bench/tree/main/data/tau2). 
+- Save it to `resources_servers/tau2_bench/data/`.
+- Configure data path (*don't forget* to modify the path accordingly):
+```bash
+export TAU2_DATA_DIR="/your_path/to/resources_servers/tau2_bench/data/"
+```
+
+3) Launch the NemoGym server
+- In the *first terminal*, launch the server.
+
+Example server for `openai_model`:
+```bash
+config_paths="responses_api_agents/tau2_agent/configs/tau2_agent.yaml,\
+responses_api_models/openai_model/configs/openai_model.yaml,\
+resources_servers/tau2_bench/configs/tau2_bench.yaml"
+
+ng_run "+config_paths=[$config_paths]" \
++tau2_agent.responses_api_agents.tau2_agent.resources_server.name=tau2_bench_resources_server
+```
+
+Example server for `vllm_model`:
+```bash
+config_paths="responses_api_agents/tau2_agent/configs/tau2_agent.yaml,\
+responses_api_models/vllm_model/configs/vllm_model.yaml,\
+resources_servers/tau2_bench/configs/tau2_bench.yaml"
+
+ng_run "+config_paths=[$config_paths]" \
+    +tau2_agent.responses_api_agents.tau2_agent.resources_server.name=tau2_bench_resources_server \
++policy_model.responses_api_models.vllm_model.return_token_id_information=true
+```
+
+4) Prepare experiment input
+- Prepare an input JSONL file describing which domain/task(s) to run. Set the path in the `input_jsonl_fpath`. An example is in `resources_servers/tau2_bench/data/example_retail_demo.jsonl`
+
+5) Collect rollouts from Tau^2 Bench (separate terminal)
+- In the *second (separate) terminal*, launch the rollout script to kick off the experiment:
+
+```bash
+ng_collect_rollouts +agent_name=tau2_agent \
+    +input_jsonl_fpath=resources_servers/tau2_bench/data/example_retail_demo.jsonl \
+    +output_jsonl_fpath=resources_servers/tau2_bench/data/example_retail_demo_rollouts.jsonl \
+    +limit=1 \
+    +num_repeats=1 \
+    +num_samples_in_parallel=null
+```
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Description`
	`2`	`+`
	`3`	`+Please note that this is a dummy resource environment, as the tau^2 agent needs to be run under a different response_api_agents, and the implementations and instructions can be found at https://github.com/NVIDIA-NeMo/Gym/tree/main/responses_api_agents/tau2_agent.`