diff --git a/examples/baselines/ppo/README.md b/examples/baselines/ppo/README.md index be1d7b965..fbbd69482 100644 --- a/examples/baselines/ppo/README.md +++ b/examples/baselines/ppo/README.md @@ -67,6 +67,12 @@ python ppo.py --env_id="MS-CartPole-v1" \ --gamma=0.99 --gae_lambda=0.95 \ --eval_freq=5 +# note that quadruped simulation is still being optimized and the reward function for this task +# is not that optimal although it works. +python ppo.py --env_id="AnymalC-Reach-v1" \ + --num_envs=1024 --update_epochs=8 --num_minibatches=32 \ + --total_timesteps=75_000_000 --num-steps=200 --num-eval-steps=200 \ + --gamma=0.99 --gae_lambda=0.95 python ppo.py --env_id="UnitreeH1Stand-v1" \ --num_envs=1024 --update_epochs=8 --num_minibatches=32 \ --total_timesteps=100_000_000 --num-steps=100 --num-eval-steps=1000 \ diff --git a/mani_skill/agents/robots/anymal/anymal_c.py b/mani_skill/agents/robots/anymal/anymal_c.py index d8cf3c301..038c35496 100644 --- a/mani_skill/agents/robots/anymal/anymal_c.py +++ b/mani_skill/agents/robots/anymal/anymal_c.py @@ -3,7 +3,7 @@ import sapien import torch -from mani_skill import PACKAGE_ASSET_DIR +from mani_skill import ASSET_DIR from mani_skill.agents.base_agent import BaseAgent, Keyframe from mani_skill.agents.controllers import * from mani_skill.agents.registration import register_agent @@ -14,7 +14,7 @@ @register_agent() class ANYmalC(BaseAgent): uid = "anymal_c" - urdf_path = f"{PACKAGE_ASSET_DIR}/robots/anymal_c/urdf/anymal_old.urdf" + urdf_path = f"{ASSET_DIR}/robots/anymal_c/urdf/anymal.urdf" urdf_config = dict( _materials=dict( foot=dict(static_friction=2.0, dynamic_friction=2.0, restitution=0.0) @@ -109,6 +109,7 @@ def is_standing(self, ground_height=0): high_enough = self.robot.pose.p[:, 2] > 0.35 + ground_height return aligned & high_enough - def is_fallen(self, ground_height=0): - """This quadruped is considered fallen if its body is 0.175m off the ground""" - return self.robot.pose.p[:, 2] < 0.175 + ground_height + def is_fallen(self): + """This quadruped is considered fallen if its body contacts the ground""" + forces = self.robot.get_net_contact_forces(["base"]) + return torch.norm(forces, dim=-1).max(-1).values > 1 diff --git a/mani_skill/envs/tasks/quadruped/quadruped_reach.py b/mani_skill/envs/tasks/quadruped/quadruped_reach.py index f1e37ab50..2003e2a89 100644 --- a/mani_skill/envs/tasks/quadruped/quadruped_reach.py +++ b/mani_skill/envs/tasks/quadruped/quadruped_reach.py @@ -12,14 +12,17 @@ from mani_skill.utils.building import actors from mani_skill.utils.building.ground import build_ground from mani_skill.utils.registration import register_env +from mani_skill.utils.structs.articulation_joint import ArticulationJoint from mani_skill.utils.structs.pose import Pose from mani_skill.utils.structs.types import GPUMemoryConfig, SceneConfig, SimConfig class QuadrupedReachEnv(BaseEnv): - SUPPORTED_ROBOTS = ["anymal-c"] + SUPPORTED_ROBOTS = ["anymal_c"] agent: ANYmalC + _UNDESIRED_CONTACT_LINK_NAMES: ArticulationJoint = None + def __init__(self, *args, robot_uids="anymal-c", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) @@ -83,9 +86,9 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: dict): self.agent.robot.set_qpos(keyframe.qpos) # sample random goal xyz = torch.zeros((b, 3)) + xyz[:, 0] = 2.5 noise_scale = 1 xyz[:, 0] = torch.rand(size=(b,)) * noise_scale - noise_scale / 2 + 2.5 - noise_scale = 4 xyz[:, 1] = torch.rand(size=(b,)) * noise_scale - noise_scale / 2 self.goal.set_pose(Pose.create_from_pq(xyz)) @@ -112,6 +115,13 @@ def _get_obs_extra(self, info: Dict): ) return obs + def _compute_undesired_contacts(self, threshold=1.0): + forces = self.agent.robot.get_net_contact_forces( + self._UNDESIRED_CONTACT_LINK_NAMES + ) + contact_exists = torch.norm(forces, dim=-1).max(-1).values > threshold + return contact_exists + def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): robot_to_goal_dist = info["robot_to_goal_dist"] reaching_reward = 1 - torch.tanh(1 * robot_to_goal_dist) @@ -121,8 +131,14 @@ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): ang_vel_xy_l2 = ( torch.square(self.agent.robot.root_angular_velocity[:, :2]) ).sum(axis=1) - penalties = lin_vel_z_l2 * -0.15 + ang_vel_xy_l2 * -0.05 + penalties = ( + lin_vel_z_l2 * -2 + + ang_vel_xy_l2 * -0.05 + + self._compute_undesired_contacts() * -1 + ) reward = reaching_reward + penalties + + reward[info["fail"]] = -100 return reward def compute_normalized_dense_reward( @@ -132,7 +148,9 @@ def compute_normalized_dense_reward( return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward -# @register_env("AnymalC-Reach-v1", max_episode_steps=200) +@register_env("AnymalC-Reach-v1", max_episode_steps=200) class AnymalCReachEnv(QuadrupedReachEnv): - def __init__(self, *args, robot_uids="anymal-c", **kwargs): + _UNDESIRED_CONTACT_LINK_NAMES = ["LF_KFE", "RF_KFE", "LH_KFE", "RH_KFE"] + + def __init__(self, *args, robot_uids="anymal_c", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) diff --git a/mani_skill/utils/download_asset.py b/mani_skill/utils/download_asset.py index b07459eee..c0224e1cf 100644 --- a/mani_skill/utils/download_asset.py +++ b/mani_skill/utils/download_asset.py @@ -160,7 +160,7 @@ def initialize_extra_sources(): target_path="robots/ur10e", ) DATA_SOURCES["anymal_c"] = DataSource( - url="https://github.com/haosulab/ManiSkill-ANYmalC/archive/refs/tags/v0.1.0.zip", + url="https://github.com/haosulab/ManiSkill-ANYmalC/archive/refs/tags/v0.1.1.zip", target_path="robots/anymal_c", ) DATA_SOURCES["unitree_h1"] = DataSource( diff --git a/mani_skill/utils/structs/actor.py b/mani_skill/utils/structs/actor.py index e7a181245..9e242ec9e 100644 --- a/mani_skill/utils/structs/actor.py +++ b/mani_skill/utils/structs/actor.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from functools import cache +from functools import cached_property from typing import TYPE_CHECKING, List, Literal, Union import numpy as np @@ -147,7 +147,7 @@ def set_state(self, state: Array): self.set_linear_velocity(state[7:10]) self.set_angular_velocity(state[10:13]) - @cache + @cached_property def has_collision_shapes(self): assert ( not self.merged @@ -169,7 +169,7 @@ def hide_visual(self): As a result we do not permit hiding and showing visuals of objects with collision shapes as this affects the actual simulation. Note that this operation can also be fairly slow as we need to run px.gpu_apply_rigid_dynamic_data and px.gpu_fetch_rigid_dynamic_data. """ - assert not self.has_collision_shapes() + assert not self.has_collision_shapes if self.hidden: return if physx.is_gpu_enabled(): @@ -191,7 +191,7 @@ def hide_visual(self): self.hidden = True def show_visual(self): - assert not self.has_collision_shapes() + assert not self.has_collision_shapes if not self.hidden: return # set hidden *before* setting/getting so not applied to self.before_hide_pose erroenously diff --git a/mani_skill/utils/structs/articulation.py b/mani_skill/utils/structs/articulation.py index 4a1b08596..e110fa9ef 100644 --- a/mani_skill/utils/structs/articulation.py +++ b/mani_skill/utils/structs/articulation.py @@ -380,6 +380,7 @@ def get_net_contact_forces(self, link_names: Union[List[str], Tuple[str]]): common.to_tensor(sapien_utils.compute_total_impulse(body_contacts)) / self._scene.timestep ) + # TODO (stao): (unify contacts api between gpu / cpu) return net_force[None, :] # -------------------------------------------------------------------------- #