Merge pull request #11 from RobotControlStack/juelg/libero

juelg · web-flow · commit 4a75e5209ff8 · 2026-01-16T17:11:09.000+01:00
feat: libero support
diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml
@@ -37,7 +37,7 @@ jobs:
           path: dist/
 
   upload_pypi:
-    needs: [build_dist]
+    needs: [pipeline]
     runs-on: ubuntu-latest
     # Upload to PyPI on release
     if: always() && github.event_name == 'release' && github.event.action == 'published'
diff --git a/Makefile b/Makefile
@@ -2,8 +2,8 @@ PYSRC = src
 
 # Python
 checkformat:
-	isort --check-only ${PYSRC}
-	black --check ${PYSRC}
+	isort --check-only ${PYSRC}/vlagents
+	black --check ${PYSRC}/vlagents
 
 format:
 	isort ${PYSRC}
diff --git a/README.md b/README.md
@@ -13,23 +13,35 @@ The work also includes a section on related engineering challenges regarding jax
 
 ## Installation
 
+### Pip Installation (Recommended)
+```shell
+pip install vlagents
+```
+
 ### Local Installation
 ```shell
-git clone https://github.com/juelg/vlagents.git
+git clone https://https://github.com/RobotControlStack/vlagents.git
 cd vlagents
 pip install -ve .
 ```
 
-### Repo Installation
-```shell
-pip install git+https://github.com/juelg/vlagents.git
-```
 
 ### Environment and Policy Installation
 On top of vlagents you can then install a simulation environment where the agent acts.
-We currently support [maniskill](https://github.com/haosulab/ManiSkill) with more to come.
+We currently the following environments:
+- [maniskill](https://github.com/haosulab/ManiSkill)
+- [robot control stack](https://github.com/RobotControlStack/robot-control-stack)
+- [libero](https://github.com/Lifelong-Robot-Learning/LIBERO)
+
+
 In order to avoid dependency conflicts, use a second conda/pip environment to install your policy.
-We currently support [octo](https://github.com/octo-models/octo) and [openvla](https://github.com/openvla/openvla).
+We currently support the following policies:
+- [octo](https://github.com/octo-models/octo)
+- [openvla](https://github.com/openvla/openvla)
+- [openpi](https://github.com/Physical-Intelligence/openpi)
+- [vjepa2-ac](https://github.com/facebookresearch/vjepa2)
+- [diffusion policy](https://github.com/real-stanford/diffusion_policy)
+
 
 ### Octo
 To use Octo as an agent/policy you need to create a new conda environment:
@@ -134,6 +146,9 @@ pip install -ve .
 
 ```
 
+### Diffusion Policy
+Currently located on the branch `diffusion_policy`.
+
 ## Usage
 To start an vlagents server use the `start-server` command where `kwargs` is a dictionary of the constructor arguments of the policy you want to start e.g.
 ```shell
@@ -190,9 +205,8 @@ If you find the agent useful for your work, please consider citing the original
 ```
 @inproceedings{juelg2025refinedpolicydistillationvla,
     title={{Refined Policy Distillation}: {F}rom {VLA} Generalists to {RL} Experts}, 
-    author={Tobias Jülg and Wolfram Burgard and Florian Walter},
+    author={Tobias J{\"u}lg and Wolfram Burgard and Florian Walter},
     year={2025},
     booktitle={Proc.~of the IEEE/RSJ Int.~Conf.~on Intelligent Robots and Systems (IROS)},
-    note={Accepted for publication.}
 }
 ```
diff --git a/src/tests/test_libero.py b/src/tests/test_libero.py
@@ -0,0 +1,51 @@
+if __name__ == "__main__":
+    import datetime
+    import os
+
+    import numpy as np
+    from PIL import Image
+
+    from lerobot.envs.libero import LiberoEnv
+    from vlagents.__main__ import _run_eval
+    from vlagents.evaluator_envs import AgentConfig, EvalConfig
+
+    # main_app()
+    # test
+    os.environ["RUN_PATH"] = "test_output"
+    _run_eval(
+        output_path="test_output",
+        eval_cfgs=[
+            EvalConfig(
+                env_id="libero_10",
+                env_kwargs={"controller": "OSC_POSE", "camera_heights": 256, "camera_widths": 256},
+                max_steps_per_episode=100,
+            )
+        ],
+        agent_cfg=AgentConfig(host="localhost", port=8080, agent_name="test", agent_kwargs={}),
+        n_processes=1,
+        n_gpus=1,
+        episodes=1,
+    )
+
+    # from libero.libero import benchmark, get_libero_path
+    # from libero.libero.envs import OffScreenRenderEnv
+    # benchmark_dict = benchmark.get_benchmark_dict()
+
+    # task_suite = benchmark_dict["libero_10"]()
+    # env = LiberoEnv(task_suite, task_id=0, task_suite_name="libero_10")
+    # env.reset()
+    # im = []
+    # im2 = []
+    # for i in range(100):
+    #     obs, reward, done, truncated, info = env.step(np.zeros(7))
+    #     im2.append(Image.fromarray(obs["pixels"]["image"]))
+    #     im.append(Image.fromarray(obs["pixels"]["image2"]))
+    # env.close()
+
+    # im[0].save(
+    #     f"{str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))}.gif",
+    #     save_all=True,
+    #     append_images=im[1:],
+    #     duration=0.2 * 1000,
+    #     loop=0,
+    # )
diff --git a/src/vlagents/__main__.py b/src/vlagents/__main__.py
diff --git a/src/vlagents/evaluator_envs.py b/src/vlagents/evaluator_envs.py