Skip to content

Commit

Permalink
Quadruped rl (#294)
Browse files Browse the repository at this point in the history
* init

* fixes

* Update README.md

* fixes
  • Loading branch information
StoneT2000 authored Apr 29, 2024
1 parent 469a975 commit 4cb96d1
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 15 deletions.
6 changes: 6 additions & 0 deletions examples/baselines/ppo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ python ppo.py --env_id="MS-CartPole-v1" \
--gamma=0.99 --gae_lambda=0.95 \
--eval_freq=5

# note that quadruped simulation is still being optimized and the reward function for this task
# is not that optimal although it works.
python ppo.py --env_id="AnymalC-Reach-v1" \
--num_envs=1024 --update_epochs=8 --num_minibatches=32 \
--total_timesteps=75_000_000 --num-steps=200 --num-eval-steps=200 \
--gamma=0.99 --gae_lambda=0.95
python ppo.py --env_id="UnitreeH1Stand-v1" \
--num_envs=1024 --update_epochs=8 --num_minibatches=32 \
--total_timesteps=100_000_000 --num-steps=100 --num-eval-steps=1000 \
Expand Down
11 changes: 6 additions & 5 deletions mani_skill/agents/robots/anymal/anymal_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sapien
import torch

from mani_skill import PACKAGE_ASSET_DIR
from mani_skill import ASSET_DIR
from mani_skill.agents.base_agent import BaseAgent, Keyframe
from mani_skill.agents.controllers import *
from mani_skill.agents.registration import register_agent
Expand All @@ -14,7 +14,7 @@
@register_agent()
class ANYmalC(BaseAgent):
uid = "anymal_c"
urdf_path = f"{PACKAGE_ASSET_DIR}/robots/anymal_c/urdf/anymal_old.urdf"
urdf_path = f"{ASSET_DIR}/robots/anymal_c/urdf/anymal.urdf"
urdf_config = dict(
_materials=dict(
foot=dict(static_friction=2.0, dynamic_friction=2.0, restitution=0.0)
Expand Down Expand Up @@ -109,6 +109,7 @@ def is_standing(self, ground_height=0):
high_enough = self.robot.pose.p[:, 2] > 0.35 + ground_height
return aligned & high_enough

def is_fallen(self, ground_height=0):
"""This quadruped is considered fallen if its body is 0.175m off the ground"""
return self.robot.pose.p[:, 2] < 0.175 + ground_height
def is_fallen(self):
"""This quadruped is considered fallen if its body contacts the ground"""
forces = self.robot.get_net_contact_forces(["base"])
return torch.norm(forces, dim=-1).max(-1).values > 1
28 changes: 23 additions & 5 deletions mani_skill/envs/tasks/quadruped/quadruped_reach.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
from mani_skill.utils.building import actors
from mani_skill.utils.building.ground import build_ground
from mani_skill.utils.registration import register_env
from mani_skill.utils.structs.articulation_joint import ArticulationJoint
from mani_skill.utils.structs.pose import Pose
from mani_skill.utils.structs.types import GPUMemoryConfig, SceneConfig, SimConfig


class QuadrupedReachEnv(BaseEnv):
SUPPORTED_ROBOTS = ["anymal-c"]
SUPPORTED_ROBOTS = ["anymal_c"]
agent: ANYmalC

_UNDESIRED_CONTACT_LINK_NAMES: ArticulationJoint = None

def __init__(self, *args, robot_uids="anymal-c", **kwargs):
super().__init__(*args, robot_uids=robot_uids, **kwargs)

Expand Down Expand Up @@ -83,9 +86,9 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
self.agent.robot.set_qpos(keyframe.qpos)
# sample random goal
xyz = torch.zeros((b, 3))
xyz[:, 0] = 2.5
noise_scale = 1
xyz[:, 0] = torch.rand(size=(b,)) * noise_scale - noise_scale / 2 + 2.5
noise_scale = 4
xyz[:, 1] = torch.rand(size=(b,)) * noise_scale - noise_scale / 2
self.goal.set_pose(Pose.create_from_pq(xyz))

Expand All @@ -112,6 +115,13 @@ def _get_obs_extra(self, info: Dict):
)
return obs

def _compute_undesired_contacts(self, threshold=1.0):
forces = self.agent.robot.get_net_contact_forces(
self._UNDESIRED_CONTACT_LINK_NAMES
)
contact_exists = torch.norm(forces, dim=-1).max(-1).values > threshold
return contact_exists

def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
robot_to_goal_dist = info["robot_to_goal_dist"]
reaching_reward = 1 - torch.tanh(1 * robot_to_goal_dist)
Expand All @@ -121,8 +131,14 @@ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
ang_vel_xy_l2 = (
torch.square(self.agent.robot.root_angular_velocity[:, :2])
).sum(axis=1)
penalties = lin_vel_z_l2 * -0.15 + ang_vel_xy_l2 * -0.05
penalties = (
lin_vel_z_l2 * -2
+ ang_vel_xy_l2 * -0.05
+ self._compute_undesired_contacts() * -1
)
reward = reaching_reward + penalties

reward[info["fail"]] = -100
return reward

def compute_normalized_dense_reward(
Expand All @@ -132,7 +148,9 @@ def compute_normalized_dense_reward(
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward


# @register_env("AnymalC-Reach-v1", max_episode_steps=200)
@register_env("AnymalC-Reach-v1", max_episode_steps=200)
class AnymalCReachEnv(QuadrupedReachEnv):
def __init__(self, *args, robot_uids="anymal-c", **kwargs):
_UNDESIRED_CONTACT_LINK_NAMES = ["LF_KFE", "RF_KFE", "LH_KFE", "RH_KFE"]

def __init__(self, *args, robot_uids="anymal_c", **kwargs):
super().__init__(*args, robot_uids=robot_uids, **kwargs)
2 changes: 1 addition & 1 deletion mani_skill/utils/download_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def initialize_extra_sources():
target_path="robots/ur10e",
)
DATA_SOURCES["anymal_c"] = DataSource(
url="https://github.com/haosulab/ManiSkill-ANYmalC/archive/refs/tags/v0.1.0.zip",
url="https://github.com/haosulab/ManiSkill-ANYmalC/archive/refs/tags/v0.1.1.zip",
target_path="robots/anymal_c",
)
DATA_SOURCES["unitree_h1"] = DataSource(
Expand Down
8 changes: 4 additions & 4 deletions mani_skill/utils/structs/actor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from dataclasses import dataclass
from functools import cache
from functools import cached_property
from typing import TYPE_CHECKING, List, Literal, Union

import numpy as np
Expand Down Expand Up @@ -147,7 +147,7 @@ def set_state(self, state: Array):
self.set_linear_velocity(state[7:10])
self.set_angular_velocity(state[10:13])

@cache
@cached_property
def has_collision_shapes(self):
assert (
not self.merged
Expand All @@ -169,7 +169,7 @@ def hide_visual(self):
As a result we do not permit hiding and showing visuals of objects with collision shapes as this affects the actual simulation.
Note that this operation can also be fairly slow as we need to run px.gpu_apply_rigid_dynamic_data and px.gpu_fetch_rigid_dynamic_data.
"""
assert not self.has_collision_shapes()
assert not self.has_collision_shapes
if self.hidden:
return
if physx.is_gpu_enabled():
Expand All @@ -191,7 +191,7 @@ def hide_visual(self):
self.hidden = True

def show_visual(self):
assert not self.has_collision_shapes()
assert not self.has_collision_shapes
if not self.hidden:
return
# set hidden *before* setting/getting so not applied to self.before_hide_pose erroenously
Expand Down
1 change: 1 addition & 0 deletions mani_skill/utils/structs/articulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ def get_net_contact_forces(self, link_names: Union[List[str], Tuple[str]]):
common.to_tensor(sapien_utils.compute_total_impulse(body_contacts))
/ self._scene.timestep
)
# TODO (stao): (unify contacts api between gpu / cpu)
return net_force[None, :]

# -------------------------------------------------------------------------- #
Expand Down

0 comments on commit 4cb96d1

Please sign in to comment.