Merge branch 'dev' into ms3-docs

haosulab · Mar 6, 2024 · 618e88b · 618e88b
2 parents ec56452 + 49d2b50
commit 618e88b
Show file tree

Hide file tree

Showing 160 changed files with 2,275 additions and 9,992 deletions.
diff --git a/README.md b/README.md
@@ -31,7 +31,7 @@ From pip:
 
 ```bash
 # This is temporary in order to install a in-dev version of sapien 3
-pip install manualtest/sapien-3.0.0.dev20240221+fa245b5-cp39-cp39-manylinux2014_x86_64.whl
+pip install manualtest/sapien-3.0.0.dev20240305+5d84989-cp310-cp310-manylinux2014_x86_64.whl
 ```
 
 From github:

diff --git a/examples/baselines/ppo/README.md b/examples/baselines/ppo/README.md
@@ -5,7 +5,7 @@ Code adapted from [CleanRL](https://github.com/vwxyzjn/cleanrl/)
 State based
 ```bash
 python ppo.py --num_envs=1024 --update_epochs=8 --num_minibatches=32 --env_id="PickCube-v1" --total_timesteps=50000000
-python ppo.py --num_envs=2048 --update_epochs=8 --num_minibatches=32  --env_id="PushCube-v1" --total_timesteps=100000000 --num-steps=12
+python ppo.py --num_envs=2048 --update_epochs=8 --num_minibatches=32  --env_id="PushCube-v1" --total_timesteps=10000000 --eval_freq=10
 python ppo.py --num_envs=1024 --update_epochs=8 --num_minibatches=32 --env_id="StackCube-v1" --total_timesteps=100000000
 python ppo.py --num_envs=512 --update_epochs=8 --num_minibatches=32 --env_id="TwoRobotStackCube-v1" --total_timesteps=100000000 --num-steps=100
 python ppo.py --num_envs=512 --update_epochs=8 --num_minibatches=32 --env_id="TwoRobotPickCube-v1" --total_timesteps=100000000 --num-steps=100

diff --git a/examples/baselines/ppo/ppo.py b/examples/baselines/ppo/ppo.py
@@ -57,6 +57,8 @@ class Args:
     """the number of parallel evaluation environments"""
     num_steps: int = 50
     """the number of steps to run in each environment per policy rollout"""
+    num_eval_steps: int = 50
+    """the number of steps to run in each evaluation environment during evaluation"""
     anneal_lr: bool = False
     """Toggle learning rate annealing for policy and value networks"""
     gamma: float = 0.8
@@ -183,9 +185,9 @@ def get_action_and_value(self, x, action=None):
         envs = FlattenActionSpaceWrapper(envs)
         eval_envs = FlattenActionSpaceWrapper(eval_envs)
     if args.capture_video:
-        eval_envs = RecordEpisode(eval_envs, output_dir=f"runs/{run_name}/videos", save_trajectory=False, video_fps=30)
-    envs = ManiSkillVectorEnv(envs, args.num_envs, ignore_terminations=False, **env_kwargs)
-    eval_envs = ManiSkillVectorEnv(eval_envs, args.num_eval_envs, ignore_terminations=True, **env_kwargs)
+        eval_envs = RecordEpisode(eval_envs, output_dir=f"runs/{run_name}/videos", save_trajectory=False, max_steps_per_video=args.num_eval_steps, video_fps=30)
+    envs = ManiSkillVectorEnv(envs, args.num_envs, **env_kwargs)
+    eval_envs = ManiSkillVectorEnv(eval_envs, args.num_eval_envs, ignore_terminations=False, **env_kwargs)
     assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"
 
     agent = Agent(envs).to(device)
@@ -225,18 +227,37 @@ def clip_action(action: torch.Tensor):
         if iteration % args.eval_freq == 1:
             # evaluate
             print("Evaluating")
-            eval_done = False
-            while not eval_done:
+            eval_envs.reset()
+            returns = []
+            eps_lens = []
+            successes = []
+            failures = []
+            for _ in range(args.num_eval_steps):
                 with torch.no_grad():
                     eval_obs, _, eval_terminations, eval_truncations, eval_infos = eval_envs.step(agent.get_action(eval_obs, deterministic=True))
-                if eval_truncations.any():
-                    eval_done = True
-            info = eval_infos["final_info"]
-            episodic_return = info['episode']['r'].mean().cpu().numpy()
-            print(f"eval_episodic_return={episodic_return}")
-            writer.add_scalar("charts/eval_success_rate", info["success"].float().mean().cpu().numpy(), global_step)
-            writer.add_scalar("charts/eval_episodic_return", episodic_return, global_step)
-            writer.add_scalar("charts/eval_episodic_length", info["elapsed_steps"].float().mean().cpu().numpy(), global_step)
+                    if "final_info" in eval_infos:
+                        mask = eval_infos["_final_info"]
+                        eps_lens.append(eval_infos["final_info"]["elapsed_steps"][mask].cpu().numpy())
+                        returns.append(eval_infos["final_info"]["episode"]["r"][mask].cpu().numpy())
+                        if "success" in eval_infos:
+                            successes.append(eval_infos["final_info"]["success"][mask].cpu().numpy())
+                        if "fail" in eval_infos:
+                            failures.append(eval_infos["final_info"]["fail"][mask].cpu().numpy())
+            returns = np.concatenate(returns)
+            eps_lens = np.concatenate(eps_lens)
+            print(f"Evaluated {args.num_eval_steps * args.num_envs} steps resulting in {len(eps_lens)} episodes")
+            if len(successes) > 0:
+                successes = np.concatenate(successes)
+                writer.add_scalar("charts/eval_success_rate", successes.mean(), global_step)
+                print(f"eval_success_rate={successes.mean()}")
+            if len(failures) > 0:
+                failures = np.concatenate(failures)
+                writer.add_scalar("charts/eval_fail_rate", failures.mean(), global_step)
+                print(f"eval_fail_rate={failures.mean()}")
+
+            print(f"eval_episodic_return={returns.mean()}")
+            writer.add_scalar("charts/eval_episodic_return", returns.mean(), global_step)
+            writer.add_scalar("charts/eval_episodic_length", eps_lens.mean(), global_step)
 
         if args.save_model and iteration % args.eval_freq == 1:
             model_path = f"runs/{run_name}/{args.exp_name}_{iteration}.cleanrl_model"
@@ -266,15 +287,17 @@ def clip_action(action: torch.Tensor):
             rewards[step] = reward.view(-1)
 
             if "final_info" in infos:
-                info = infos["final_info"]
-                done_mask = info["_final_info"]
-                episodic_return = info['episode']['r'][done_mask].mean().cpu().numpy()
-                # print(f"global_step={global_step}, episodic_return={episodic_return}")
-                writer.add_scalar("charts/success_rate", info["success"][done_mask].float().mean().cpu().numpy(), global_step)
+                final_info = infos["final_info"]
+                done_mask = final_info["_final_info"]
+                episodic_return = final_info['episode']['r'][done_mask].cpu().numpy().mean()
+                if "success" in final_info:
+                    writer.add_scalar("charts/success_rate", final_info["success"][done_mask].cpu().numpy().mean(), global_step)
+                if "fail" in final_info:
+                    writer.add_scalar("charts/fail_rate", final_info["fail"][done_mask].cpu().numpy().mean(), global_step)
                 writer.add_scalar("charts/episodic_return", episodic_return, global_step)
-                writer.add_scalar("charts/episodic_length", info["elapsed_steps"][done_mask].float().mean().cpu().numpy(), global_step)
+                writer.add_scalar("charts/episodic_length", final_info["elapsed_steps"][done_mask].cpu().numpy().mean(), global_step)
 
-                final_values[step, torch.arange(args.num_envs, device=device)[done_mask]] = agent.get_value(info["final_observation"][done_mask]).view(-1)
+                final_values[step, torch.arange(args.num_envs, device=device)[done_mask]] = agent.get_value(final_info["final_observation"][done_mask]).view(-1)
 
         # bootstrap value according to termination and truncation
         with torch.no_grad():

diff --git a/examples/benchmarking/benchmark_maniskill.py b/examples/benchmarking/benchmark_maniskill.py
@@ -1,21 +1,13 @@
-# py-spy record -f speedscope -r 1000 -o profile -- python manualtest/benchmark_gpu_sim.py
-# python manualtest/benchmark_orbit_sim.py --task "Isaac-Lift-Cube-Franka-v0" --num_envs 512 --headless
 import argparse
-import time
 
 import gymnasium as gym
 import numpy as np
-import sapien
 import sapien.physx
 import sapien.render
 import torch
 import tqdm
 
 import mani_skill2.envs
-from mani_skill2.envs.scenes.tasks.planner.planner import PickSubtask
-from mani_skill2.envs.scenes.tasks.sequential_task import SequentialTaskEnv
-from mani_skill2.utils.scene_builder.ai2thor.variants import ArchitecTHORSceneBuilder
-from mani_skill2.utils.scene_builder.replicacad.scene_builder import ReplicaCADSceneBuilder
 from mani_skill2.vector.wrappers.gymnasium import ManiSkillVectorEnv
 from profiling import Profiler
 from mani_skill2.utils.visualization.misc import images_to_video, tile_images

diff --git a/mani_skill2/agents/__init__.py b/mani_skill2/agents/__init__.py
@@ -0,0 +1 @@
+from .registration import REGISTERED_AGENTS
diff --git a/mani_skill2/agents/base_agent.py b/mani_skill2/agents/base_agent.py
@@ -11,13 +11,8 @@
 
 from mani_skill2 import format_path
 from mani_skill2.sensors.base_sensor import BaseSensor, BaseSensorConfig
-from mani_skill2.utils.sapien_utils import (
-    apply_urdf_config,
-    check_urdf_config,
-    parse_urdf_config,
-)
-from mani_skill2.utils.structs.actor import Actor
-from mani_skill2.utils.structs.articulation import Articulation
+from mani_skill2.utils import sapien_utils
+from mani_skill2.utils.structs import Actor, Articulation
 
 from .controllers.base_controller import (
     BaseController,
@@ -98,11 +93,11 @@ def _load_articulation(self):
 
         urdf_path = format_path(str(self.urdf_path))
 
-        urdf_config = parse_urdf_config(self.urdf_config, self.scene)
-        check_urdf_config(urdf_config)
+        urdf_config = sapien_utils.parse_urdf_config(self.urdf_config, self.scene)
+        sapien_utils.check_urdf_config(urdf_config)
 
         # TODO(jigu): support loading multiple convex collision shapes
-        apply_urdf_config(loader, urdf_config)
+        sapien_utils.apply_urdf_config(loader, urdf_config)
         self.robot: Articulation = loader.load(urdf_path)
         assert self.robot is not None, f"Fail to load URDF from {urdf_path}"
 

diff --git a/mani_skill2/agents/controllers/base_controller.py b/mani_skill2/agents/controllers/base_controller.py
@@ -16,8 +16,8 @@
     get_active_joint_indices,
     get_joints_by_names,
 )
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.common import clip_and_scale_action, normalize_action_space
-from mani_skill2.utils.sapien_utils import to_tensor
 from mani_skill2.utils.structs.articulation import Articulation
 from mani_skill2.utils.structs.joint import Joint
 from mani_skill2.utils.structs.types import Array
@@ -37,8 +37,10 @@ class BaseController:
     """the action space. If the number of parallel environments is > 1, this action space is also batched"""
     single_action_space: spaces.Space
     """The unbatched version of the action space which is also typically already normalized by this class"""
+    """The batched version of the action space which is also typically already normalized by this class"""
     _original_single_action_space: spaces.Space
     """The unbatched, original action space without any additional processing like normalization"""
+    """The batched, original action space without any additional processing like normalization"""
 
     def __init__(
         self,
@@ -162,8 +164,8 @@ def _clip_and_scale_action_space(self):
             self._original_single_action_space.low,
             self._original_single_action_space.high,
         )
-        self.action_space_low = to_tensor(low)
-        self.action_space_high = to_tensor(high)
+        self.action_space_low = sapien_utils.to_tensor(low)
+        self.action_space_high = sapien_utils.to_tensor(high)
 
     def _clip_and_scale_action(self, action):
         return clip_and_scale_action(
@@ -205,7 +207,6 @@ def __init__(
             )
         self._initialize_action_space()
         self._initialize_joints()
-        self._assert_fully_actuated()
 
         self.action_space = self.single_action_space
         if self.scene.num_envs > 1:
@@ -229,15 +230,6 @@ def _initialize_joints(self):
             self.joints.extend(controller.joints)
             self.joint_indices.extend(controller.joint_indices)
 
-    def _assert_fully_actuated(self):
-        active_joints = self.articulation.get_active_joints()
-        if len(active_joints) != len(self.joints) or set(active_joints) != set(
-            self.joints
-        ):
-            print("active_joints:", [x.name for x in active_joints])
-            print("controlled_joints:", [x.name for x in self.joints])
-            raise AssertionError("{} is not fully actuated".format(self.articulation))
-
     def set_drive_property(self):
         for controller in self.controllers.values():
             controller.set_drive_property()
@@ -251,7 +243,7 @@ def reset(self):
 
     def set_action(self, action: Dict[str, np.ndarray]):
         for uid, controller in self.controllers.items():
-            controller.set_action(to_tensor(action[uid]))
+            controller.set_action(sapien_utils.to_tensor(action[uid]))
 
     def before_simulation_step(self):
         if physx.is_gpu_enabled():

diff --git a/mani_skill2/agents/controllers/pd_ee_pose.py b/mani_skill2/agents/controllers/pd_ee_pose.py
@@ -12,12 +12,13 @@
 import torch
 from gymnasium import spaces
 
+from mani_skill2 import logger
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.common import clip_and_scale_action
 from mani_skill2.utils.geometry.rotation_conversions import (
     euler_angles_to_matrix,
     matrix_to_quaternion,
 )
-from mani_skill2.utils.sapien_utils import get_obj_by_name, to_numpy, to_tensor
 from mani_skill2.utils.structs.pose import Pose, vectorize_pose
 from mani_skill2.utils.structs.types import Array
 
@@ -28,6 +29,7 @@
 # NOTE(jigu): not necessary to inherit, just for convenience
 class PDEEPosController(PDJointPosController):
     config: "PDEEPosControllerConfig"
+    _target_pose = None
 
     def _initialize_joints(self):
         self.initial_qpos = None
@@ -59,12 +61,15 @@ def suppress_stdout_stderr():
         self.qmask[self.joint_indices] = 1
 
         if self.config.ee_link:
-            self.ee_link = get_obj_by_name(
+            self.ee_link = sapien_utils.get_obj_by_name(
                 self.articulation.get_links(), self.config.ee_link
             )
         else:
             # The child link of last joint is assumed to be the end-effector.
             self.ee_link = self.joints[-1].get_child_link()
+            logger.warn(
+                "Configuration did not define a ee_link name, using the child link of the last joint"
+            )
         self.ee_link_idx = self.articulation.get_links().index(self.ee_link)
 
     def _initialize_action_space(self):
@@ -87,7 +92,13 @@ def ee_pose_at_base(self):
 
     def reset(self):
         super().reset()
-        self._target_pose = self.ee_pose_at_base
+        if self._target_pose is None:
+            self._target_pose = self.ee_pose_at_base
+        else:
+            # TODO (stao): this is a strange way to mask setting individual batched pose parts
+            self._target_pose.raw_pose[
+                self.scene._reset_mask
+            ] = self.ee_pose_at_base.raw_pose[self.scene._reset_mask]
 
     def compute_ik(self, target_pose: Pose, action: Array, max_iterations=100):
         # Assume the target pose is defined in the base frame
@@ -105,12 +116,14 @@ def compute_ik(self, target_pose: Pose, action: Array, max_iterations=100):
             result, success, error = self.pmodel.compute_inverse_kinematics(
                 self.ee_link_idx,
                 target_pose.sp,
-                initial_qpos=to_numpy(self.articulation.get_qpos()).squeeze(0),
+                initial_qpos=sapien_utils.to_numpy(
+                    self.articulation.get_qpos()
+                ).squeeze(0),
                 active_qmask=self.qmask,
                 max_iterations=max_iterations,
             )
         if success:
-            return to_tensor([result[self.joint_indices]])
+            return sapien_utils.to_tensor([result[self.joint_indices]])
         else:
             return None
 
@@ -152,13 +165,14 @@ def set_action(self, action: Array):
 
     def get_state(self) -> dict:
         if self.config.use_target:
-            return {"target_pose": vectorize_pose(self._target_pose)}
+            return {"target_pose": self._target_pose.raw_pose}
         return {}
 
     def set_state(self, state: dict):
-        if self.config.use_target:
-            target_pose = state["target_pose"]
-            self._target_pose = sapien.Pose(target_pose[:3], target_pose[3:])
+        # if self.config.use_target:
+        #     target_pose = state["target_pose"]
+        #     self._target_pose = sapien.Pose(target_pose[:3], target_pose[3:])
+        raise NotImplementedError()
 
 
 @dataclass

diff --git a/mani_skill2/agents/controllers/pd_joint_pos.py b/mani_skill2/agents/controllers/pd_joint_pos.py
@@ -5,14 +5,16 @@
 import torch
 from gymnasium import spaces
 
-from mani_skill2.utils.sapien_utils import to_tensor
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.structs.types import Array
 
 from .base_controller import BaseController, ControllerConfig
 
 
 class PDJointPosController(BaseController):
     config: "PDJointPosControllerConfig"
+    _start_qpos = None
+    _target_qpos = None
 
     def _get_joint_limits(self):
         qlimits = self.articulation.get_qlimits()[0, self.joint_indices].cpu().numpy()
@@ -44,8 +46,19 @@ def set_drive_property(self):
     def reset(self):
         super().reset()
         self._step = 0  # counter of simulation steps after action is set
-        self._start_qpos = self.qpos
-        self._target_qpos = self.qpos
+        if self._start_qpos is None:
+            self._start_qpos = self.qpos.clone()
+        else:
+
+            self._start_qpos[self.scene._reset_mask] = self.qpos[
+                self.scene._reset_mask
+            ].clone()
+        if self._target_qpos is None:
+            self._target_qpos = self.qpos.clone()
+        else:
+            self._target_qpos[self.scene._reset_mask] = self.qpos[
+                self.scene._reset_mask
+            ].clone()
 
     def set_drive_targets(self, targets):
         self.articulation.set_joint_drive_targets(
@@ -54,7 +67,7 @@ def set_drive_targets(self, targets):
 
     def set_action(self, action: Array):
         action = self._preprocess_action(action)
-        action = to_tensor(action)
+        action = sapien_utils.to_tensor(action)
         self._step = 0
         self._start_qpos = self.qpos
         if self.config.use_delta:
@@ -63,8 +76,10 @@ def set_action(self, action: Array):
             else:
                 self._target_qpos = self._start_qpos + action
         else:
-            # Compatible with mimic controllers
-            self._target_qpos = torch.broadcast_to(action, self._start_qpos.shape)
+            # Compatible with mimic controllers. Need to clone here otherwise cannot do in-place replacements in the reset function
+            self._target_qpos = torch.broadcast_to(
+                action, self._start_qpos.shape
+            ).clone()
         if self.config.interpolate:
             self._step_size = (self._target_qpos - self._start_qpos) / self._sim_steps
         else: