Quadruped rl (#294)

* init * fixes * Update README.md * fixes
haosulab · Apr 29, 2024 · 4cb96d1 · 4cb96d1
1 parent 469a975
commit 4cb96d1
Show file tree

Hide file tree

Showing 6 changed files with 41 additions and 15 deletions.
diff --git a/examples/baselines/ppo/README.md b/examples/baselines/ppo/README.md
@@ -67,6 +67,12 @@ python ppo.py --env_id="MS-CartPole-v1" \
    --gamma=0.99 --gae_lambda=0.95 \
    --eval_freq=5
 
+# note that quadruped simulation is still being optimized and the reward function for this task
+# is not that optimal although it works.
+python ppo.py --env_id="AnymalC-Reach-v1" \
+  --num_envs=1024 --update_epochs=8 --num_minibatches=32 \
+  --total_timesteps=75_000_000 --num-steps=200 --num-eval-steps=200 \
+  --gamma=0.99 --gae_lambda=0.95
 python ppo.py --env_id="UnitreeH1Stand-v1" \
   --num_envs=1024 --update_epochs=8 --num_minibatches=32 \
   --total_timesteps=100_000_000 --num-steps=100 --num-eval-steps=1000 \

diff --git a/mani_skill/agents/robots/anymal/anymal_c.py b/mani_skill/agents/robots/anymal/anymal_c.py
@@ -3,7 +3,7 @@
 import sapien
 import torch
 
-from mani_skill import PACKAGE_ASSET_DIR
+from mani_skill import ASSET_DIR
 from mani_skill.agents.base_agent import BaseAgent, Keyframe
 from mani_skill.agents.controllers import *
 from mani_skill.agents.registration import register_agent
@@ -14,7 +14,7 @@
 @register_agent()
 class ANYmalC(BaseAgent):
     uid = "anymal_c"
-    urdf_path = f"{PACKAGE_ASSET_DIR}/robots/anymal_c/urdf/anymal_old.urdf"
+    urdf_path = f"{ASSET_DIR}/robots/anymal_c/urdf/anymal.urdf"
     urdf_config = dict(
         _materials=dict(
             foot=dict(static_friction=2.0, dynamic_friction=2.0, restitution=0.0)
@@ -109,6 +109,7 @@ def is_standing(self, ground_height=0):
         high_enough = self.robot.pose.p[:, 2] > 0.35 + ground_height
         return aligned & high_enough
 
-    def is_fallen(self, ground_height=0):
-        """This quadruped is considered fallen if its body is 0.175m off the ground"""
-        return self.robot.pose.p[:, 2] < 0.175 + ground_height
+    def is_fallen(self):
+        """This quadruped is considered fallen if its body contacts the ground"""
+        forces = self.robot.get_net_contact_forces(["base"])
+        return torch.norm(forces, dim=-1).max(-1).values > 1
diff --git a/mani_skill/envs/tasks/quadruped/quadruped_reach.py b/mani_skill/envs/tasks/quadruped/quadruped_reach.py
@@ -12,14 +12,17 @@
 from mani_skill.utils.building import actors
 from mani_skill.utils.building.ground import build_ground
 from mani_skill.utils.registration import register_env
+from mani_skill.utils.structs.articulation_joint import ArticulationJoint
 from mani_skill.utils.structs.pose import Pose
 from mani_skill.utils.structs.types import GPUMemoryConfig, SceneConfig, SimConfig
 
 
 class QuadrupedReachEnv(BaseEnv):
-    SUPPORTED_ROBOTS = ["anymal-c"]
+    SUPPORTED_ROBOTS = ["anymal_c"]
     agent: ANYmalC
 
+    _UNDESIRED_CONTACT_LINK_NAMES: ArticulationJoint = None
+
     def __init__(self, *args, robot_uids="anymal-c", **kwargs):
         super().__init__(*args, robot_uids=robot_uids, **kwargs)
 
@@ -83,9 +86,9 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
             self.agent.robot.set_qpos(keyframe.qpos)
             # sample random goal
             xyz = torch.zeros((b, 3))
+            xyz[:, 0] = 2.5
             noise_scale = 1
             xyz[:, 0] = torch.rand(size=(b,)) * noise_scale - noise_scale / 2 + 2.5
-            noise_scale = 4
             xyz[:, 1] = torch.rand(size=(b,)) * noise_scale - noise_scale / 2
             self.goal.set_pose(Pose.create_from_pq(xyz))
 
@@ -112,6 +115,13 @@ def _get_obs_extra(self, info: Dict):
             )
         return obs
 
+    def _compute_undesired_contacts(self, threshold=1.0):
+        forces = self.agent.robot.get_net_contact_forces(
+            self._UNDESIRED_CONTACT_LINK_NAMES
+        )
+        contact_exists = torch.norm(forces, dim=-1).max(-1).values > threshold
+        return contact_exists
+
     def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
         robot_to_goal_dist = info["robot_to_goal_dist"]
         reaching_reward = 1 - torch.tanh(1 * robot_to_goal_dist)
@@ -121,8 +131,14 @@ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
         ang_vel_xy_l2 = (
             torch.square(self.agent.robot.root_angular_velocity[:, :2])
         ).sum(axis=1)
-        penalties = lin_vel_z_l2 * -0.15 + ang_vel_xy_l2 * -0.05
+        penalties = (
+            lin_vel_z_l2 * -2
+            + ang_vel_xy_l2 * -0.05
+            + self._compute_undesired_contacts() * -1
+        )
         reward = reaching_reward + penalties
+
+        reward[info["fail"]] = -100
         return reward
 
     def compute_normalized_dense_reward(
@@ -132,7 +148,9 @@ def compute_normalized_dense_reward(
         return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
 
 
-# @register_env("AnymalC-Reach-v1", max_episode_steps=200)
+@register_env("AnymalC-Reach-v1", max_episode_steps=200)
 class AnymalCReachEnv(QuadrupedReachEnv):
-    def __init__(self, *args, robot_uids="anymal-c", **kwargs):
+    _UNDESIRED_CONTACT_LINK_NAMES = ["LF_KFE", "RF_KFE", "LH_KFE", "RH_KFE"]
+
+    def __init__(self, *args, robot_uids="anymal_c", **kwargs):
         super().__init__(*args, robot_uids=robot_uids, **kwargs)
diff --git a/mani_skill/utils/download_asset.py b/mani_skill/utils/download_asset.py
@@ -160,7 +160,7 @@ def initialize_extra_sources():
         target_path="robots/ur10e",
     )
     DATA_SOURCES["anymal_c"] = DataSource(
-        url="https://github.com/haosulab/ManiSkill-ANYmalC/archive/refs/tags/v0.1.0.zip",
+        url="https://github.com/haosulab/ManiSkill-ANYmalC/archive/refs/tags/v0.1.1.zip",
         target_path="robots/anymal_c",
     )
     DATA_SOURCES["unitree_h1"] = DataSource(

diff --git a/mani_skill/utils/structs/actor.py b/mani_skill/utils/structs/actor.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from functools import cache
+from functools import cached_property
 from typing import TYPE_CHECKING, List, Literal, Union
 
 import numpy as np
@@ -147,7 +147,7 @@ def set_state(self, state: Array):
                 self.set_linear_velocity(state[7:10])
                 self.set_angular_velocity(state[10:13])
 
-    @cache
+    @cached_property
     def has_collision_shapes(self):
         assert (
             not self.merged
@@ -169,7 +169,7 @@ def hide_visual(self):
         As a result we do not permit hiding and showing visuals of objects with collision shapes as this affects the actual simulation.
         Note that this operation can also be fairly slow as we need to run px.gpu_apply_rigid_dynamic_data and px.gpu_fetch_rigid_dynamic_data.
         """
-        assert not self.has_collision_shapes()
+        assert not self.has_collision_shapes
         if self.hidden:
             return
         if physx.is_gpu_enabled():
@@ -191,7 +191,7 @@ def hide_visual(self):
         self.hidden = True
 
     def show_visual(self):
-        assert not self.has_collision_shapes()
+        assert not self.has_collision_shapes
         if not self.hidden:
             return
         # set hidden *before* setting/getting so not applied to self.before_hide_pose erroenously

diff --git a/mani_skill/utils/structs/articulation.py b/mani_skill/utils/structs/articulation.py
@@ -380,6 +380,7 @@ def get_net_contact_forces(self, link_names: Union[List[str], Tuple[str]]):
                 common.to_tensor(sapien_utils.compute_total_impulse(body_contacts))
                 / self._scene.timestep
             )
+            # TODO (stao): (unify contacts api between gpu / cpu)
             return net_force[None, :]
 
     # -------------------------------------------------------------------------- #