diff --git a/experiments/offline_rl_from_recorded_data/exp.py b/experiments/offline_rl_from_recorded_data/exp.py index 255cd0b..25a301e 100644 --- a/experiments/offline_rl_from_recorded_data/exp.py +++ b/experiments/offline_rl_from_recorded_data/exp.py @@ -44,6 +44,7 @@ def experiment(horizon_len: int, num_epochs: int = 50, max_train_steps: int = 100_000, length_scale_aditive_sim_gp: float = 1.0, + input_from_recorded_data: int = 1, ): bnn_train_steps = min(num_epochs * num_offline_collected_transitions, max_train_steps) @@ -119,6 +120,7 @@ def experiment(horizon_len: int, num_epochs=num_epochs, max_train_steps=max_train_steps, length_scale_aditive_sim_gp=length_scale_aditive_sim_gp, + input_from_recorded_data=input_from_recorded_data ) total_config = SAC_KWARGS | config_dict @@ -135,8 +137,9 @@ def experiment(horizon_len: int, key, key_data_seed = jr.split(key, 2) int_data_seed = jr.randint(key_data_seed, (), minval=0, maxval=2 ** 13 - 1) if data_from_simulation: + source = 'racecar_from_true_input_data' if input_from_recorded_data else 'racecar_actionstack' x_train, y_train, x_test, y_test, sim = provide_data_and_sim( - data_source='racecar_from_true_input_data', + data_source=source, data_spec={'num_samples_train': num_offline_collected_transitions, 'use_hf_sim': bool(high_fidelity), 'sampling': 'iid', @@ -281,7 +284,8 @@ def main(args): bandwidth_svgd=args.bandwidth_svgd, num_epochs=args.num_epochs, max_train_steps=args.max_train_steps, - length_scale_aditive_sim_gp=args.length_scale_aditive_sim_gp + length_scale_aditive_sim_gp=args.length_scale_aditive_sim_gp, + input_from_recorded_data=args.input_from_recorded_data, ) @@ -317,5 +321,6 @@ def main(args): parser.add_argument('--num_epochs', type=int, default=20) parser.add_argument('--max_train_steps', type=int, default=2_000) parser.add_argument('--length_scale_aditive_sim_gp', type=float, default=1.0) + parser.add_argument('--input_from_recorded_data', type=int, default=1) args = parser.parse_args() main(args) diff --git a/experiments/offline_rl_from_recorded_data/launcher.py b/experiments/offline_rl_from_recorded_data/launcher.py index 2c14666..78b9282 100644 --- a/experiments/offline_rl_from_recorded_data/launcher.py +++ b/experiments/offline_rl_from_recorded_data/launcher.py @@ -1,7 +1,7 @@ import exp from experiments.util import generate_run_commands, generate_base_command, dict_permutations -PROJECT_NAME = 'OfflineRLCarV2BigExp' +PROJECT_NAME = 'OfflineRLSimulationWithDelay' _applicable_configs = { 'horizon_len': [200], @@ -17,7 +17,7 @@ 'margin_factor': [20.0], 'ctrl_cost_weight': [0.005], 'ctrl_diff_weight': [1.0], - 'num_offline_collected_transitions': [200, 400, 800, 1600, 2_000, 2_500, 3_000, 4_000, 5_000, 7_500, 10_000], + 'num_offline_collected_transitions': [20_000], 'test_data_ratio': [0.0], 'eval_on_all_offline_data': [1], 'eval_only_on_init_states': [1], @@ -25,10 +25,11 @@ 'bnn_batch_size': [32], 'likelihood_exponent': [1.0], 'train_sac_only_from_init_states': [0], - 'data_from_simulation': [0], + 'data_from_simulation': [1], 'num_frame_stack': [3], 'bandwidth_svgd': [0.05, 0.1, 0.2], - 'length_scale_aditive_sim_gp': [0.2, 1.0, 5.0] + 'length_scale_aditive_sim_gp': [0.2, 1.0, 5.0], + 'input_from_recorded_data': [1], } _applicable_configs_no_sim_prior = {'use_sim_prior': [0],