Skip to content

Commit 3d4e14d

Browse files
committed
feat: added support for libero
1 parent ac7a9c9 commit 3d4e14d

File tree

2 files changed

+84
-7
lines changed

2 files changed

+84
-7
lines changed

README.md

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,20 @@ pip install -ve .
2828

2929
### Environment and Policy Installation
3030
On top of vlagents you can then install a simulation environment where the agent acts.
31-
We currently support [maniskill](https://github.com/haosulab/ManiSkill) with more to come.
31+
We currently the following environments:
32+
- [maniskill](https://github.com/haosulab/ManiSkill)
33+
- [robot control stack](https://github.com/RobotControlStack/robot-control-stack)
34+
- [libero](https://github.com/Lifelong-Robot-Learning/LIBERO)
35+
36+
3237
In order to avoid dependency conflicts, use a second conda/pip environment to install your policy.
33-
We currently support [octo](https://github.com/octo-models/octo) and [openvla](https://github.com/openvla/openvla).
38+
We currently support the following policies:
39+
- [octo](https://github.com/octo-models/octo)
40+
- [openvla](https://github.com/openvla/openvla)
41+
- [openpi](https://github.com/Physical-Intelligence/openpi)
42+
- [vjepa2-ac](https://github.com/facebookresearch/vjepa2)
43+
- [diffusion policy](https://github.com/real-stanford/diffusion_policy)
44+
3445

3546
### Octo
3647
To use Octo as an agent/policy you need to create a new conda environment:
@@ -135,6 +146,9 @@ pip install -ve .
135146

136147
```
137148

149+
### Diffusion Policy
150+
Currently located on the branch `diffusion_policy`.
151+
138152
## Usage
139153
To start an vlagents server use the `start-server` command where `kwargs` is a dictionary of the constructor arguments of the policy you want to start e.g.
140154
```shell

src/vlagents/evaluator_envs.py

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,7 @@ def __init__(self, env_id, seed, **env_kwargs):
132132
# TODO: one could save only every nth episode by adding an episode counter which steps the record env only
133133
# when the counter is divisible by n otherwise steps the normal env
134134
logging.info(f"Creating ManiSkill env {env_id}")
135-
if "video_dir" in env_kwargs:
136-
output_dir = env_kwargs["video_dir"]
137-
del env_kwargs["video_dir"]
138-
else:
139-
output_dir = None
135+
output_dir = env_kwargs.pop("video_dir", None)
140136
super().__init__(env_id, seed, **env_kwargs)
141137
logging.info(f"Created ManiSkill env {env_id}")
142138
if "human_render_camera_configs" in env_kwargs:
@@ -204,11 +200,78 @@ def do_import():
204200
EvaluatorEnv.register("PokeCube-v1", ManiSkill)
205201

206202

203+
class Libero(EvaluatorEnv):
204+
205+
def __init__(self, env_id: str, seed: int, **env_kwargs) -> None:
206+
logging.info("Creating Libero env")
207+
self.env, self._language_instruction, self.task_name, self.task_suite, self.task_id, self.task = self._make_gym(
208+
env_id, seed, **env_kwargs
209+
)
210+
logging.info(
211+
f"Created Libero env, task suite: {env_id}, task id: {self.task_id}, task name {self.task_name}, instruction: {self._language_instruction}"
212+
)
213+
self.env_id = env_id
214+
self.seed = seed
215+
216+
def _make_gym(self, env_id, seed, **env_kwargs):
217+
from libero.libero import benchmark, get_libero_path
218+
from libero.libero.envs import OffScreenRenderEnv
219+
220+
benchmark_dict = benchmark.get_benchmark_dict()
221+
222+
task_suite = benchmark_dict[env_id]()
223+
task_id = min(max(env_kwargs.pop("task_id", 0), 0), task_suite.n_tasks - 1)
224+
task = task_suite.get_task(task_id)
225+
226+
task_bddl_file = os.path.join(get_libero_path("bddl_files"), task.problem_folder, task.bddl_file)
227+
env = OffScreenRenderEnv(
228+
bddl_file_name=task_bddl_file,
229+
**env_kwargs,
230+
)
231+
env.seed(seed)
232+
return env, task.language, task.name, task_suite, task_id, task
233+
234+
def translate_obs(self, obs: dict[str, Any]) -> Obs:
235+
return Obs(
236+
cameras=dict(rgb_side=obs["agentview_image"]),
237+
gripper=obs["robot0_gripper_qpos"] / 0.04, # normalize
238+
)
239+
240+
def step(self, action: Act) -> tuple[Obs, float, bool, bool, dict]:
241+
# change gripper to libero format (-1, 1) where -1 is open
242+
action.action[-1] = (1 - action.action[-1]) * 2 - 1.0
243+
obs, reward, done, info = self.env.step(action.action)
244+
return self.translate_obs(obs), reward, done, done, info
245+
246+
def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Obs, dict[str, Any]]:
247+
obs, info = self.env.reset()
248+
init_states = self.task_suite.get_task_init_states(
249+
self.task_id
250+
) # for benchmarking purpose, we fix the a set of initial states
251+
init_state_id = 0
252+
self.env.set_init_state(init_states[init_state_id])
253+
254+
return self.translate_obs(obs), info
255+
256+
@property
257+
def language_instruction(self) -> str:
258+
return self._language_instruction
259+
260+
261+
EvaluatorEnv.register("libero_10", Libero)
262+
EvaluatorEnv.register("libero_90", Libero)
263+
EvaluatorEnv.register("libero_100", Libero)
264+
EvaluatorEnv.register("libero_spatial", Libero)
265+
EvaluatorEnv.register("libero_object", Libero)
266+
EvaluatorEnv.register("libero_goal", Libero)
267+
268+
207269
@dataclass
208270
class EvalConfig:
209271
env_id: str
210272
env_kwargs: dict[str, Any]
211273
max_steps_per_episode: int = 100
274+
# TODO: add seed, on same machine and jpeg encoding
212275

213276

214277
@dataclass

0 commit comments

Comments
 (0)