From 45e24bf5c5dfa7c9c7ec5927608533d108afb7dc Mon Sep 17 00:00:00 2001
From: Stone Tao <stonezt2019@gmail.com>
Date: Tue, 5 Mar 2024 18:35:46 -0800
Subject: [PATCH] work

---
 .../utils/building/articulation_builder.py    |  8 +--
 mani_skill2/utils/building/articulations.py   |  6 +--
 mani_skill2/utils/common.py                   | 12 ++---
 mani_skill2/utils/structs/actor.py            |  6 +--
 mani_skill2/utils/structs/articulation.py     | 42 ++++++++-------
 mani_skill2/utils/structs/base.py             | 22 ++++----
 mani_skill2/utils/structs/joint.py            |  6 +--
 mani_skill2/utils/structs/pose.py             | 22 ++++----
 mani_skill2/utils/structs/render_camera.py    | 18 +++++--
 mani_skill2/utils/wrappers/flatten.py         |  4 +-
 mani_skill2/utils/wrappers/record.py          | 51 +++++++++++--------
 11 files changed, 109 insertions(+), 88 deletions(-)

diff --git a/mani_skill2/utils/building/articulation_builder.py b/mani_skill2/utils/building/articulation_builder.py
index 55c7987b8..be4930c64 100644
--- a/mani_skill2/utils/building/articulation_builder.py
+++ b/mani_skill2/utils/building/articulation_builder.py
@@ -11,7 +11,7 @@
 )
 from sapien.wrapper.articulation_builder import LinkBuilder
 
-from mani_skill2.utils.sapien_utils import to_tensor
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.structs.articulation import Articulation
 
 if TYPE_CHECKING:
@@ -111,10 +111,12 @@ def build(self, name=None, fix_root_link=None):
                 len(self.scene_mask) == self.scene.num_envs
             ), "Scene mask size is not correct. Must be the same as the number of sub scenes"
             num_arts = np.sum(num_arts)
-            self.scene_mask = to_tensor(self.scene_mask)
+            self.scene_mask = sapien_utils.to_tensor(self.scene_mask)
         else:
             # if scene mask is none, set it here
-            self.scene_mask = to_tensor(torch.ones((self.scene.num_envs), dtype=bool))
+            self.scene_mask = sapien_utils.to_tensor(
+                torch.ones((self.scene.num_envs), dtype=bool)
+            )
 
         articulations = []
 
diff --git a/mani_skill2/utils/building/articulations.py b/mani_skill2/utils/building/articulations.py
index 2088f2e0d..dcad2cc54 100644
--- a/mani_skill2/utils/building/articulations.py
+++ b/mani_skill2/utils/building/articulations.py
@@ -14,12 +14,12 @@
 
 from mani_skill2 import ASSET_DIR, PACKAGE_ASSET_DIR
 from mani_skill2.envs.scene import ManiSkillScene
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.geometry.trimesh_utils import (
     get_articulation_meshes,
     merge_meshes,
 )
 from mani_skill2.utils.io_utils import load_json
-from mani_skill2.utils.sapien_utils import apply_urdf_config, parse_urdf_config
 
 
 @dataclass
@@ -105,8 +105,8 @@ def build_preprocessed_partnet_mobility_articulation(
     # loader.multiple_collisions_decomposition="coacd"
     loader.disable_self_collisions = True
     urdf_path = MODEL_DBS["PartnetMobility"]["model_urdf_paths"][model_id]
-    urdf_config = parse_urdf_config(urdf_config or {}, scene)
-    apply_urdf_config(loader, urdf_config)
+    urdf_config = sapien_utils.parse_urdf_config(urdf_config or {}, scene)
+    sapien_utils.apply_urdf_config(loader, urdf_config)
     articulation = loader.load(str(urdf_path), name=name, scene_mask=scene_mask)
     metadata = ArticulationMetadata(
         joints=dict(), links=dict(), movable_links=[], bbox=None, scale=loader.scale
diff --git a/mani_skill2/utils/common.py b/mani_skill2/utils/common.py
index 72c1157e6..2776a50b6 100644
--- a/mani_skill2/utils/common.py
+++ b/mani_skill2/utils/common.py
@@ -7,7 +7,7 @@
 import torch
 from gymnasium import spaces
 
-from mani_skill2.utils.sapien_utils import to_tensor
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.structs.types import Array, Device
 
 from .logging_utils import logger
@@ -209,20 +209,20 @@ def flatten_state_dict(
             if state.size == 0:
                 state = None
             if use_torch:
-                state = to_tensor(state)
+                state = sapien_utils.to_tensor(state)
         elif isinstance(value, (tuple, list)):
             state = None if len(value) == 0 else value
             if use_torch:
-                state = to_tensor(state)
+                state = sapien_utils.to_tensor(state)
         elif isinstance(value, (bool, np.bool_, int, np.int32, np.int64)):
             # x = np.array(1) > 0 is np.bool_ instead of ndarray
             state = int(value)
             if use_torch:
-                state = to_tensor(state)
+                state = sapien_utils.to_tensor(state)
         elif isinstance(value, (float, np.float32, np.float64)):
             state = np.float32(value)
             if use_torch:
-                state = to_tensor(state)
+                state = sapien_utils.to_tensor(state)
         elif isinstance(value, np.ndarray):
             if value.ndim > 2:
                 raise AssertionError(
@@ -230,7 +230,7 @@ def flatten_state_dict(
                 )
             state = value if value.size > 0 else None
             if use_torch:
-                state = to_tensor(state)
+                state = sapien_utils.to_tensor(state)
 
         else:
             is_torch_tensor = False
diff --git a/mani_skill2/utils/structs/actor.py b/mani_skill2/utils/structs/actor.py
index 2e92a74d0..30f8cafef 100644
--- a/mani_skill2/utils/structs/actor.py
+++ b/mani_skill2/utils/structs/actor.py
@@ -10,7 +10,7 @@
 import sapien.render
 import torch
 
-from mani_skill2.utils.sapien_utils import to_numpy, to_tensor
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.structs.base import BaseStruct, PhysxRigidDynamicComponentStruct
 from mani_skill2.utils.structs.pose import Pose, to_sapien_pose, vectorize_pose
 from mani_skill2.utils.structs.types import Array
@@ -130,12 +130,12 @@ def get_state(self):
 
     def set_state(self, state: Array):
         if physx.is_gpu_enabled():
-            state = to_tensor(state)
+            state = sapien_utils.to_tensor(state)
             self.set_pose(Pose.create(state[:, :7]))
             self.set_linear_velocity(state[:, 7:10])
             self.set_angular_velocity(state[:, 10:13])
         else:
-            state = to_numpy(state[0])
+            state = sapien_utils.to_numpy(state[0])
             self.set_pose(sapien.Pose(state[0:3], state[3:7]))
             if self.px_body_type == "dynamic":
                 self.set_linear_velocity(state[7:10])
diff --git a/mani_skill2/utils/structs/articulation.py b/mani_skill2/utils/structs/articulation.py
index c7f210cc8..3b4de9c43 100644
--- a/mani_skill2/utils/structs/articulation.py
+++ b/mani_skill2/utils/structs/articulation.py
@@ -11,13 +11,8 @@
 import torch
 import trimesh
 
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.geometry.trimesh_utils import get_component_meshes, merge_meshes
-from mani_skill2.utils.sapien_utils import (
-    compute_total_impulse,
-    get_articulation_contacts,
-    to_numpy,
-    to_tensor,
-)
 from mani_skill2.utils.structs.base import BaseStruct
 from mani_skill2.utils.structs.joint import Joint
 from mani_skill2.utils.structs.link import Link
@@ -201,7 +196,7 @@ def get_state(self):
 
     def set_state(self, state: Array):
         if physx.is_gpu_enabled():
-            state = to_tensor(state)
+            state = sapien_utils.to_tensor(state)
             self.set_root_pose(Pose.create(state[:, :7]))
             self.set_root_linear_velocity(state[:, 7:10])
             self.set_root_angular_velocity(state[:, 10:13])
@@ -209,7 +204,7 @@ def set_state(self, state: Array):
             self.set_qpos(state[:, 13 : 13 + self.max_dof])
             self.set_qvel(state[:, 13 + self.max_dof :])
         else:
-            state = to_numpy(state[0])
+            state = sapien_utils.to_numpy(state[0])
             self.set_root_pose(sapien.Pose(state[0:3], state[3:7]))
             self.set_root_linear_velocity(state[7:10])
             self.set_root_angular_velocity(state[10:13])
@@ -283,13 +278,16 @@ def get_net_contact_forces(self, link_names: Union[List[str], Tuple[str]]):
             )
         else:
 
-            body_contacts = get_articulation_contacts(
+            body_contacts = sapien_utils.get_articulation_contacts(
                 self.px.get_contacts(),
                 self._objs[0],
                 included_links=[self.link_map[k]._objs[0] for k in link_names],
             )
             net_force = (
-                to_tensor(compute_total_impulse(body_contacts)) / self._scene.timestep
+                sapien_utils.to_tensor(
+                    sapien_utils.compute_total_impulse(body_contacts)
+                )
+                / self._scene.timestep
             )
             return net_force[None, :]
 
@@ -451,12 +449,12 @@ def qf(self):
     @qf.setter
     def qf(self, arg1: torch.Tensor):
         if physx.is_gpu_enabled():
-            arg1 = to_tensor(arg1)
+            arg1 = sapien_utils.to_tensor(arg1)
             self.px.cuda_articulation_qf.torch()[
                 self._data_index[self._scene._reset_mask], : self.max_dof
             ] = arg1
         else:
-            arg1 = to_numpy(arg1)
+            arg1 = sapien_utils.to_numpy(arg1)
             if len(arg1.shape) == 2:
                 arg1 = arg1[0]
             self._objs[0].qf = arg1
@@ -489,12 +487,12 @@ def qpos(self):
     @qpos.setter
     def qpos(self, arg1: torch.Tensor):
         if physx.is_gpu_enabled():
-            arg1 = to_tensor(arg1)
+            arg1 = sapien_utils.to_tensor(arg1)
             self.px.cuda_articulation_qpos.torch()[
                 self._data_index[self._scene._reset_mask], : self.max_dof
             ] = arg1
         else:
-            arg1 = to_numpy(arg1)
+            arg1 = sapien_utils.to_numpy(arg1)
             if len(arg1.shape) == 2:
                 arg1 = arg1[0]
             self._objs[0].qpos = arg1
@@ -511,12 +509,12 @@ def qvel(self):
     @qvel.setter
     def qvel(self, arg1: torch.Tensor):
         if physx.is_gpu_enabled():
-            arg1 = to_tensor(arg1)
+            arg1 = sapien_utils.to_tensor(arg1)
             self.px.cuda_articulation_qvel.torch()[
                 self._data_index[self._scene._reset_mask], : self.max_dof
             ] = arg1
         else:
-            arg1 = to_numpy(arg1)
+            arg1 = sapien_utils.to_numpy(arg1)
             if len(arg1.shape) == 2:
                 arg1 = arg1[0]
             self._objs[0].qvel = arg1
@@ -528,12 +526,12 @@ def root_angular_velocity(self) -> torch.Tensor:
     @root_angular_velocity.setter
     def root_angular_velocity(self, arg1: Array) -> None:
         if physx.is_gpu_enabled():
-            arg1 = to_tensor(arg1)
+            arg1 = sapien_utils.to_tensor(arg1)
             self.px.cuda_rigid_body_data.torch()[
                 self.root._body_data_index[self._scene._reset_mask], 10:13
             ] = arg1
         else:
-            arg1 = to_numpy(arg1)
+            arg1 = sapien_utils.to_numpy(arg1)
             if len(arg1.shape) == 2:
                 arg1 = arg1[0]
             self._objs[0].set_root_angular_velocity(arg1)
@@ -545,12 +543,12 @@ def root_linear_velocity(self) -> torch.Tensor:
     @root_linear_velocity.setter
     def root_linear_velocity(self, arg1: Array) -> None:
         if physx.is_gpu_enabled():
-            arg1 = to_tensor(arg1)
+            arg1 = sapien_utils.to_tensor(arg1)
             self.px.cuda_rigid_body_data.torch()[
                 self.root._body_data_index[self._scene._reset_mask], 7:10
             ] = arg1
         else:
-            arg1 = to_numpy(arg1)
+            arg1 = sapien_utils.to_numpy(arg1)
             if len(arg1.shape) == 2:
                 arg1 = arg1[0]
             self._objs[0].set_root_linear_velocity(arg1)
@@ -590,7 +588,7 @@ def set_joint_drive_targets(
         TODO (stao): can we use joint indices for the CPU sim as well? Some global joint indices?
         """
         if physx.is_gpu_enabled():
-            targets = to_tensor(targets)
+            targets = sapien_utils.to_tensor(targets)
             if joint_indices not in self._cached_joint_target_indices:
                 self._cached_joint_target_indices[joint_indices] = torch.meshgrid(
                     self._data_index, joint_indices, indexing="ij"
@@ -613,7 +611,7 @@ def set_joint_drive_velocity_targets(
         TODO (stao): can we use joint indices for the CPU sim as well? Some global joint indices?
         """
         if physx.is_gpu_enabled():
-            targets = to_tensor(targets)
+            targets = sapien_utils.to_tensor(targets)
             if joint_indices not in self._cached_joint_target_indices:
                 self._cached_joint_target_indices[joint_indices] = torch.meshgrid(
                     self._data_index, joint_indices, indexing="ij"
diff --git a/mani_skill2/utils/structs/base.py b/mani_skill2/utils/structs/base.py
index d9fba8745..cb146a474 100644
--- a/mani_skill2/utils/structs/base.py
+++ b/mani_skill2/utils/structs/base.py
@@ -7,12 +7,7 @@
 import sapien.physx as physx
 import torch
 
-from mani_skill2.utils.sapien_utils import (
-    compute_total_impulse,
-    get_actor_contacts,
-    to_numpy,
-    to_tensor,
-)
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.structs.decorators import before_gpu_init
 from mani_skill2.utils.structs.types import Array
 
@@ -94,11 +89,14 @@ def get_net_contact_forces(self):
                 / self._scene.timestep
             )
         else:
-            body_contacts = get_actor_contacts(
+            body_contacts = sapien_utils.get_actor_contacts(
                 self.px.get_contacts(), self._bodies[0].entity
             )
             net_force = (
-                to_tensor(compute_total_impulse(body_contacts)) / self._scene.timestep
+                sapien_utils.to_tensor(
+                    sapien_utils.compute_total_impulse(body_contacts)
+                )
+                / self._scene.timestep
             )
             return net_force[None, :]
 
@@ -299,12 +297,12 @@ def angular_velocity(self) -> torch.Tensor:
     @angular_velocity.setter
     def angular_velocity(self, arg1: Array):
         if physx.is_gpu_enabled():
-            arg1 = to_tensor(arg1)
+            arg1 = sapien_utils.to_tensor(arg1)
             self._body_data[
                 self._body_data_index[self._scene._reset_mask], 10:13
             ] = arg1
         else:
-            arg1 = to_numpy(arg1)
+            arg1 = sapien_utils.to_numpy(arg1)
             if len(arg1.shape) == 2:
                 arg1 = arg1[0]
             self._bodies[0].angular_velocity = arg1
@@ -368,10 +366,10 @@ def linear_velocity(self) -> torch.Tensor:
     @linear_velocity.setter
     def linear_velocity(self, arg1: Array):
         if physx.is_gpu_enabled():
-            arg1 = to_tensor(arg1)
+            arg1 = sapien_utils.to_tensor(arg1)
             self._body_data[self._body_data_index[self._scene._reset_mask], 7:10] = arg1
         else:
-            arg1 = to_numpy(arg1)
+            arg1 = sapien_utils.to_numpy(arg1)
             if len(arg1.shape) == 2:
                 arg1 = arg1[0]
             self._bodies[0].linear_velocity = arg1
diff --git a/mani_skill2/utils/structs/joint.py b/mani_skill2/utils/structs/joint.py
index 8f3cb6573..2c6b56c02 100644
--- a/mani_skill2/utils/structs/joint.py
+++ b/mani_skill2/utils/structs/joint.py
@@ -8,7 +8,7 @@
 import sapien.physx as physx
 import torch
 
-from mani_skill2.utils.sapien_utils import to_tensor
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.structs.base import BaseStruct
 from mani_skill2.utils.structs.decorators import before_gpu_init
 from mani_skill2.utils.structs.link import Link
@@ -194,7 +194,7 @@ def drive_target(self) -> torch.Tensor:
     @drive_target.setter
     def drive_target(self, arg1: Array) -> None:
         if physx.is_gpu_enabled():
-            arg1 = to_tensor(arg1)
+            arg1 = sapien_utils.to_tensor(arg1)
             raise NotImplementedError(
                 "Setting drive targets of individual joints is not implemented yet."
             )
@@ -217,7 +217,7 @@ def drive_velocity_target(self) -> torch.Tensor:
     @drive_velocity_target.setter
     def drive_velocity_target(self, arg1: Array) -> None:
         if physx.is_gpu_enabled():
-            arg1 = to_tensor(arg1)
+            arg1 = sapien_utils.to_tensor(arg1)
             raise NotImplementedError(
                 "Cannot set drive velocity targets at the moment in GPU simulation"
             )
diff --git a/mani_skill2/utils/structs/pose.py b/mani_skill2/utils/structs/pose.py
index f32fd9d64..aebbab658 100644
--- a/mani_skill2/utils/structs/pose.py
+++ b/mani_skill2/utils/structs/pose.py
@@ -6,12 +6,12 @@
 import sapien.physx as physx
 import torch
 
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.geometry.rotation_conversions import (
     quaternion_apply,
     quaternion_multiply,
     quaternion_to_matrix,
 )
-from mani_skill2.utils.sapien_utils import to_numpy, to_tensor
 from mani_skill2.utils.structs.types import Array, Device
 
 
@@ -24,7 +24,7 @@ def add_batch_dim(x):
 def to_batched_tensor(x: Union[List, Array]):
     if x is None:
         return None
-    return add_batch_dim(to_tensor(x))
+    return add_batch_dim(sapien_utils.to_tensor(x))
 
 
 @dataclass
@@ -95,13 +95,15 @@ def create_from_pq(
     @classmethod
     def create(cls, pose: Union[torch.Tensor, sapien.Pose, "Pose"]):
         if isinstance(pose, sapien.Pose):
-            raw_pose = torch.hstack([to_tensor(pose.p), to_tensor(pose.q)])
+            raw_pose = torch.hstack(
+                [sapien_utils.to_tensor(pose.p), sapien_utils.to_tensor(pose.q)]
+            )
             return cls(raw_pose=add_batch_dim(raw_pose))
         elif isinstance(pose, cls):
             return pose
         else:
             assert len(pose.shape) <= 2 and len(pose.shape) > 0
-            pose = to_tensor(pose)
+            pose = sapien_utils.to_tensor(pose)
             pose = add_batch_dim(pose)
             if pose.shape[-1] == 3:
                 return cls.create_from_pq(p=pose, device=pose.device)
@@ -167,7 +169,7 @@ def p(self):
 
     @p.setter
     def p(self, arg1: torch.Tensor):
-        self.raw_pose[..., :3] = to_tensor(arg1)
+        self.raw_pose[..., :3] = sapien_utils.to_tensor(arg1)
 
     @property
     def q(self):
@@ -175,7 +177,7 @@ def q(self):
 
     @q.setter
     def q(self, arg1: torch.Tensor):
-        self.raw_pose[..., 3:] = to_tensor(arg1)
+        self.raw_pose[..., 3:] = sapien_utils.to_tensor(arg1)
 
     # @property
     # def rpy(self) -> numpy.ndarray[numpy.float32, _Shape, _Shape[3]]:
@@ -193,7 +195,9 @@ def vectorize_pose(pose: Union[sapien.Pose, Pose]) -> torch.Tensor:
     """
     if isinstance(pose, sapien.Pose):
         if physx.is_gpu_enabled():
-            return torch.concatenate([to_tensor(pose.p), to_tensor(pose.q)])
+            return torch.concatenate(
+                [sapien_utils.to_tensor(pose.p), sapien_utils.to_tensor(pose.q)]
+            )
         else:
             return np.hstack([pose.p, pose.q])
     elif isinstance(pose, Pose):
@@ -213,7 +217,7 @@ def to_sapien_pose(pose: Union[torch.Tensor, sapien.Pose, Pose]) -> sapien.Pose:
         ), "pose is batched. Note that sapien Poses are not batched. If you want to use a batched Pose object use from mani_skill2.utils.structs.pose import Pose"
         if len(pose.shape) == 2:
             pose = pose[0]
-        pose = to_numpy(pose)
+        pose = sapien_utils.to_numpy(pose)
         return sapien.Pose(pose[:3], pose[3:])
     else:
         assert len(pose.shape) == 1 or (
@@ -221,5 +225,5 @@ def to_sapien_pose(pose: Union[torch.Tensor, sapien.Pose, Pose]) -> sapien.Pose:
         ), "pose is batched. Note that sapien Poses are not batched. If you want to use a batched Pose object use from mani_skill2.utils.structs.pose import Pose"
         if len(pose.shape) == 2:
             pose = pose[0]
-        pose = to_numpy(pose)
+        pose = sapien_utils.to_numpy(pose)
         return sapien.Pose(pose[:3], pose[3:])
diff --git a/mani_skill2/utils/structs/render_camera.py b/mani_skill2/utils/structs/render_camera.py
index 71ee0527b..bd89980b5 100644
--- a/mani_skill2/utils/structs/render_camera.py
+++ b/mani_skill2/utils/structs/render_camera.py
@@ -7,7 +7,7 @@
 import sapien.render
 import torch
 
-from mani_skill2.utils.sapien_utils import to_tensor
+from mani_skill2.utils import sapien_utils
 
 # NOTE (stao): commented out functions are functions that are not confirmed to be working in the wrapped class but the original class has
 
@@ -48,7 +48,9 @@ def __hash__(self):
     # TODO (stao): support extrinsic matrix changing
     @cache
     def get_extrinsic_matrix(self):
-        return to_tensor(self._render_cameras[0].get_extrinsic_matrix())[None, :]
+        return sapien_utils.to_tensor(self._render_cameras[0].get_extrinsic_matrix())[
+            None, :
+        ]
 
     def get_far(self) -> float:
         return self._render_cameras[0].get_far()
@@ -61,14 +63,18 @@ def get_height(self) -> int:
 
     @cache
     def get_intrinsic_matrix(self):
-        return to_tensor(self._render_cameras[0].get_intrinsic_matrix())[None, :]
+        return sapien_utils.to_tensor(self._render_cameras[0].get_intrinsic_matrix())[
+            None, :
+        ]
 
     def get_local_pose(self) -> sapien.Pose:
         return self._render_cameras[0].get_local_pose()
 
     @cache
     def get_model_matrix(self):
-        return to_tensor(self._render_cameras[0].get_model_matrix())[None, :]
+        return sapien_utils.to_tensor(self._render_cameras[0].get_model_matrix())[
+            None, :
+        ]
 
     def get_near(self) -> float:
         return self._render_cameras[0].get_near()
@@ -77,7 +83,9 @@ def get_picture(self, name: str):
         if physx.is_gpu_enabled():
             return self.camera_group.get_picture_cuda(name).torch()
         else:
-            return to_tensor(self._render_cameras[0].get_picture(name))[None, ...]
+            return sapien_utils.to_tensor(self._render_cameras[0].get_picture(name))[
+                None, ...
+            ]
 
     def get_picture_cuda(self, name: str):
         return self._render_cameras[0].get_picture_cuda(name)
diff --git a/mani_skill2/utils/wrappers/flatten.py b/mani_skill2/utils/wrappers/flatten.py
index 757d657e5..3c22276df 100644
--- a/mani_skill2/utils/wrappers/flatten.py
+++ b/mani_skill2/utils/wrappers/flatten.py
@@ -5,8 +5,8 @@
 from gymnasium.vector.utils import batch_space
 
 from mani_skill2.envs.sapien_env import BaseEnv
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.common import flatten_state_dict
-from mani_skill2.utils.sapien_utils import batch
 
 
 class FlattenObservationWrapper(gym.ObservationWrapper):
@@ -49,7 +49,7 @@ def action(self, action):
             self.base_env.num_envs == 1
             and action.shape == self.single_action_space.shape
         ):
-            action = batch(action)
+            action = sapien_utils.batch(action)
 
         # TODO (stao): This code only supports flat dictionary at the moment
         unflattened_action = dict()
diff --git a/mani_skill2/utils/wrappers/record.py b/mani_skill2/utils/wrappers/record.py
index 077568693..2811e384e 100644
--- a/mani_skill2/utils/wrappers/record.py
+++ b/mani_skill2/utils/wrappers/record.py
@@ -11,12 +11,12 @@
 
 from mani_skill2 import get_commit_info
 from mani_skill2.envs.sapien_env import BaseEnv
+from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.common import (
     extract_scalars_from_info,
     find_max_episode_steps_value,
 )
 from mani_skill2.utils.io_utils import dump_json
-from mani_skill2.utils.sapien_utils import batch, to_numpy
 from mani_skill2.utils.structs.types import Array
 from mani_skill2.utils.visualization.misc import (
     images_to_video,
@@ -278,7 +278,7 @@ def _base_env(self) -> BaseEnv:
 
     def capture_image(self):
         img = self.env.render()
-        img = to_numpy(img)
+        img = sapien_utils.to_numpy(img)
         if len(img.shape) > 3:
             img = tile_images(img, nrows=self.video_nrows)
         return img
@@ -297,16 +297,18 @@ def reset(
             if "env_idx" not in options:
                 self.flush_trajectory(env_idxs_to_flush=np.arange(self.num_envs))
             else:
-                self.flush_trajectory(env_idxs_to_flush=to_numpy(options["env_idx"]))
+                self.flush_trajectory(
+                    env_idxs_to_flush=sapien_utils.to_numpy(options["env_idx"])
+                )
 
         obs, info = super().reset(*args, seed=seed, options=options, **kwargs)
 
         if self.save_trajectory:
             state_dict = self._base_env.get_state_dict()
-            action = batch(self.action_space.sample())
+            action = sapien_utils.batch(self.action_space.sample())
             first_step = Step(
-                state=to_numpy(batch(state_dict)),
-                observation=to_numpy(batch(obs)),
+                state=sapien_utils.to_numpy(sapien_utils.batch(state_dict)),
+                observation=sapien_utils.to_numpy(sapien_utils.batch(obs)),
                 # note first reward/action etc. are ignored when saving trajectories to disk
                 action=action,
                 reward=np.zeros(
@@ -326,11 +328,11 @@ def reset(
                 env_episode_ptr=np.zeros((self.num_envs,), dtype=int),
             )
             if self.num_envs == 1:
-                first_step.observation = batch(first_step.observation)
-                first_step.action = batch(first_step.action)
+                first_step.observation = sapien_utils.batch(first_step.observation)
+                first_step.action = sapien_utils.batch(first_step.action)
             env_idx = np.arange(self.num_envs)
             if "env_idx" in options:
-                env_idx = to_numpy(options["env_idx"])
+                env_idx = sapien_utils.to_numpy(options["env_idx"])
             if self._trajectory_buffer is None:
                 # Initialize trajectory buffer on the first episode based on given observation (which should be generated after all wrappers)
                 self._trajectory_buffer = first_step
@@ -363,7 +365,7 @@ def recursive_replace(x, y):
                 if self._trajectory_buffer.fail is not None:
                     recursive_replace(self._trajectory_buffer.fail, first_step.fail)
         if "env_idx" in options:
-            options["env_idx"] = to_numpy(options["env_idx"])
+            options["env_idx"] = sapien_utils.to_numpy(options["env_idx"])
         self.last_reset_kwargs = copy.deepcopy(
             dict(seed=seed, options=options, **kwargs)
         )
@@ -386,41 +388,50 @@ def step(self, action):
                 truncated = self._base_env.elapsed_steps >= self.max_episode_steps
             state_dict = self._base_env.get_state_dict()
             self._trajectory_buffer.state = append_dict_array(
-                self._trajectory_buffer.state, to_numpy(batch(state_dict))
+                self._trajectory_buffer.state,
+                sapien_utils.to_numpy(sapien_utils.batch(state_dict)),
             )
             self._trajectory_buffer.observation = append_dict_array(
-                self._trajectory_buffer.observation, to_numpy(batch(obs))
+                self._trajectory_buffer.observation,
+                sapien_utils.to_numpy(sapien_utils.batch(obs)),
             )
 
             self._trajectory_buffer.action = append_dict_array(
-                self._trajectory_buffer.action, to_numpy(batch(action))
+                self._trajectory_buffer.action,
+                sapien_utils.to_numpy(sapien_utils.batch(action)),
             )
             self._trajectory_buffer.reward = append_dict_array(
-                self._trajectory_buffer.reward, to_numpy(batch(rew))
+                self._trajectory_buffer.reward,
+                sapien_utils.to_numpy(sapien_utils.batch(rew)),
             )
             self._trajectory_buffer.terminated = append_dict_array(
-                self._trajectory_buffer.terminated, to_numpy(batch(terminated))
+                self._trajectory_buffer.terminated,
+                sapien_utils.to_numpy(sapien_utils.batch(terminated)),
             )
             self._trajectory_buffer.truncated = append_dict_array(
-                self._trajectory_buffer.truncated, to_numpy(batch(truncated))
+                self._trajectory_buffer.truncated,
+                sapien_utils.to_numpy(sapien_utils.batch(truncated)),
             )
             done = terminated | truncated
             self._trajectory_buffer.done = append_dict_array(
-                self._trajectory_buffer.done, to_numpy(batch(done))
+                self._trajectory_buffer.done,
+                sapien_utils.to_numpy(sapien_utils.batch(done)),
             )
             if "success" in info:
                 self._trajectory_buffer.success = append_dict_array(
-                    self._trajectory_buffer.success, to_numpy(batch(info["success"]))
+                    self._trajectory_buffer.success,
+                    sapien_utils.to_numpy(sapien_utils.batch(info["success"])),
                 )
             else:
                 self._trajectory_buffer.success = None
             if "fail" in info:
                 self._trajectory_buffer.fail = append_dict_array(
-                    self._trajectory_buffer.fail, to_numpy(batch(info["fail"]))
+                    self._trajectory_buffer.fail,
+                    sapien_utils.to_numpy(sapien_utils.batch(info["fail"])),
                 )
             else:
                 self._trajectory_buffer.fail = None
-            self._last_info = to_numpy(info)
+            self._last_info = sapien_utils.to_numpy(info)
 
         if self.save_video:
             self._video_steps += 1