From 3fb5aa4284e88857d57851463e96ccef79f529fa Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Tue, 18 Jul 2023 13:19:51 -0400 Subject: [PATCH] wip --- scripts/kitchen_precompute_prm.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/scripts/kitchen_precompute_prm.py b/scripts/kitchen_precompute_prm.py index 51d1597273..9e01c83e8d 100644 --- a/scripts/kitchen_precompute_prm.py +++ b/scripts/kitchen_precompute_prm.py @@ -7,6 +7,25 @@ from predicators.settings import CFG +class OrnsteinUhlenbeckActionNoise: + """Copied from https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py""" + def __init__(self, mu, sigma, theta=.15, dt=1e-2, x0=None): + self.theta = theta + self.mu = mu + self.sigma = sigma + self.dt = dt + self.x0 = x0 + self.reset() + + def __call__(self): + x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + self.sigma * np.sqrt(self.dt) * np.random.normal(size=self.mu.shape) + self.x_prev = x + return x + + def reset(self): + self.x_prev = self.x0 if self.x0 is not None else np.zeros_like(self.mu) + + def _main() -> None: utils.reset_config({ "env": "kitchen", @@ -28,10 +47,12 @@ def _main() -> None: # Sample random trajectories in 7 DOF space. rng = np.random.default_rng(CFG.seed) - max_delta = 0.1 + noise_scale = 0.1 max_steps_per_traj = 1000 + noise = OrnsteinUhlenbeckActionNoise(np.zeros(7), sigma=noise_scale * np.ones(7)) + noise.reset() for _ in range(max_steps_per_traj): - delta_act = rng.uniform(-max_delta, max_delta, size=7) + delta_act = noise() current_pos = gym_env.sim.data.qpos[:7] main_act = np.clip(current_pos + delta_act, joint_lower_lim, joint_upper_lim) act = np.concatenate([main_act, gym_env.sim.data.qpos[7:9]])