wip

bdaiinstitute · Jul 18, 2023 · 3fb5aa4 · 3fb5aa4
1 parent 22b9351
commit 3fb5aa4
Showing 1 changed file with 23 additions and 2 deletions.
diff --git a/scripts/kitchen_precompute_prm.py b/scripts/kitchen_precompute_prm.py
@@ -7,6 +7,25 @@
 from predicators.settings import CFG
 
 
+class OrnsteinUhlenbeckActionNoise:
+    """Copied from https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py"""
+    def __init__(self, mu, sigma, theta=.15, dt=1e-2, x0=None):
+        self.theta = theta
+        self.mu = mu
+        self.sigma = sigma
+        self.dt = dt
+        self.x0 = x0
+        self.reset()
+
+    def __call__(self):
+        x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + self.sigma * np.sqrt(self.dt) * np.random.normal(size=self.mu.shape)
+        self.x_prev = x
+        return x
+
+    def reset(self):
+        self.x_prev = self.x0 if self.x0 is not None else np.zeros_like(self.mu)
+
+
 def _main() -> None:
     utils.reset_config({
         "env": "kitchen",
@@ -28,10 +47,12 @@ def _main() -> None:
 
     # Sample random trajectories in 7 DOF space.
     rng = np.random.default_rng(CFG.seed)
-    max_delta = 0.1
+    noise_scale = 0.1
     max_steps_per_traj = 1000
+    noise = OrnsteinUhlenbeckActionNoise(np.zeros(7), sigma=noise_scale * np.ones(7))
+    noise.reset()
     for _ in range(max_steps_per_traj):
-        delta_act = rng.uniform(-max_delta, max_delta, size=7)
+        delta_act = noise()
         current_pos = gym_env.sim.data.qpos[:7]
         main_act = np.clip(current_pos + delta_act, joint_lower_lim, joint_upper_lim)
         act = np.concatenate([main_act, gym_env.sim.data.qpos[7:9]])