add sampling functions with action delay;

sampling the input from the true inputs
lasgroup · Oct 18, 2023 · 8ac4d24 · 8ac4d24
1 parent de7cfae
commit 8ac4d24
Show file tree

Hide file tree

Showing 3 changed files with 2 additions and 5 deletions.
diff --git a/experiments/data_provider.py b/experiments/data_provider.py
@@ -269,7 +269,6 @@ def provide_data_and_sim(data_source: str, data_spec: Dict[str, Any], data_seed:
             car_id = data_spec.get('car_id', 2)
             num_stacked_actions = data_spec.get('num_stacked_actions', 3)
             assert num_stacked_actions == 3, "We only support 3 stacked actions for now"
-            num_test = data_spec.get('num_samples_test', DEFAULTS_RACECAR_REAL['num_samples_test'])
 
             # Prepare simulator for bnn_training (the only difference is that here we can have also low fidelity sim)
             sim = RaceCarSim(encode_angle=True, use_blend=use_hf_sim, car_id=car_id)
@@ -321,13 +320,9 @@ def provide_data_and_sim(data_source: str, data_spec: Dict[str, Any], data_seed:
                                                                 num_stacked_actions=3,
                                                                 action_size=2)
 
-
             y_train = sim_for_sampling_data._typical_f(x_train)
             y_test = sim_for_sampling_data._typical_f(x_test)
             return x_train, y_train, x_test, y_test, sim
-
-
-
         elif data_source == 'racecar_hf':
             sim_hf = RaceCarSim(encode_angle=True, use_blend=True, only_pose=False)
             sim_lf = RaceCarSim(encode_angle=True, use_blend=False, only_pose=False)

diff --git a/experiments/offline_rl_from_recorded_data/exp.py b/experiments/offline_rl_from_recorded_data/exp.py
@@ -139,6 +139,7 @@ def experiment(horizon_len: int,
             data_source='racecar_from_true_input_data',
             data_spec={'num_samples_train': num_offline_collected_transitions,
                        'use_hf_sim': bool(high_fidelity),
+                       'sampling': 'iid',
                        'num_stacked_actions': 3},
             data_seed=int(int_data_seed),
         )

diff --git a/sim_transfer/rl/rl_on_offline_data.py b/sim_transfer/rl/rl_on_offline_data.py
@@ -367,6 +367,7 @@ def evaluate_policy_on_the_simulator(self,
         def reward_on_simulator(key: chex.PRNGKey):
             actions_buffer = jnp.zeros(shape=(self.action_dim * self.num_frame_stack))
             sim = RCCarSimEnv(encode_angle=True, use_tire_model=True,
+                              action_delay=1/30 * self.num_frame_stack,
                               margin_factor=self.car_reward_kwargs['margin_factor'],
                               ctrl_cost_weight=self.car_reward_kwargs['ctrl_cost_weight'], )
             obs = sim.reset(key)