Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
lenarttreven committed Nov 14, 2023
2 parents e6ef088 + 783bae9 commit f60c138
Show file tree
Hide file tree
Showing 6 changed files with 364 additions and 115 deletions.
14 changes: 6 additions & 8 deletions experiments/data_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ def get_rccar_recorded_data_new(encode_angle: bool = True, skip_first_n_points:
if car_id == 1:
num_train_traj = 8
recordings_dir = os.path.join(DATA_DIR, 'recordings_rc_car_v1')
file_name = [f'recording_sep6_{i}.pickle' for i in
[1, 5, 12, 8, 3, 7, 10, 2, 4, 9, 6, 11]]
import glob
file_name = glob.glob(recordings_dir + '/*.pickle')
elif car_id == 2:
num_train_traj = 12
recordings_dir = os.path.join(DATA_DIR, 'recordings_rc_car_v2')
Expand Down Expand Up @@ -222,7 +222,7 @@ def prepare_rccar_data(transitions: Transition, encode_angles: bool = False, ski

def provide_data_and_sim(data_source: str, data_spec: Dict[str, Any], data_seed: int = 845672):
# load data
key_train, key_test, key_data = jax.random.split(jax.random.PRNGKey(data_seed), 3)
key_train, key_test = jax.random.split(jax.random.PRNGKey(data_seed), 2)
if data_source == 'sinusoids1d' or data_source == 'sinusoids2d':
from sim_transfer.sims.simulators import SinusoidsSim
defaults = DEFAULTS_SINUSOIDS
Expand Down Expand Up @@ -302,7 +302,7 @@ def provide_data_and_sim(data_source: str, data_spec: Dict[str, Any], data_seed:
# 1.st load data from the real car
x_train, y_train, x_test, y_test = get_rccar_recorded_data_new(encode_angle=True, action_stacking=True,
action_delay=num_stacked_actions,
car_id=car_id, key_data=key_data)
car_id=car_id)

# We delete y_train, y_test and replace it with the simulator output
del y_train, y_test
Expand Down Expand Up @@ -390,13 +390,11 @@ def provide_data_and_sim(data_source: str, data_spec: Dict[str, Any], data_seed:

if data_source.startswith('real_racecar_new_actionstack'):
x_train, y_train, x_test, y_test = get_rccar_recorded_data_new(encode_angle=True, action_stacking=True,
action_delay=3, car_id=car_id,
key_data=key_data)
action_delay=3, car_id=car_id)
sim_lf = StackedActionSimWrapper(sim_lf, num_stacked_actions=3, action_size=2)
elif data_source.startswith('real_racecar_new'):
x_train, y_train, x_test, y_test = get_rccar_recorded_data_new(encode_angle=True, action_stacking=False,
action_delay=3, car_id=car_id,
key_data=key_data)
action_delay=3, car_id=car_id)
else:
x_train, y_train, x_test, y_test = get_rccar_recorded_data(encode_angle=True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
from sim_transfer.hardware.car_env import CarEnv
from sim_transfer.rl.rl_on_offline_data import RLFromOfflineData
from sim_transfer.sims.util import plot_rc_trajectory
import pickle

ENTITY = 'sukhijab'


class RunSpec(NamedTuple):
group_name: str
run_id: str
reward_config: dict| None = None


def run_all_hardware_experiments(project_name_load: str,
Expand Down Expand Up @@ -58,8 +58,15 @@ def run_all_hardware_experiments(project_name_load: str,
for file in run.files():
if file.name.startswith(dir_to_save):
file.download(replace=True, root=os.path.join(local_dir, run.group, run.id))
runs_spec.append(RunSpec(group_name=run.group,
run_id=run.id))

keys = ['encode_angle', 'ctrl_cost_weight', 'margin_factor', 'ctrl_diff_weight']
reward_config = {}
for key in keys:
reward_config[key] = run.config[key]
runs_spec.append(RunSpec(group_name=run.group,
run_id=run.id,
reward_config=reward_config,
))

with open(os.path.join(local_dir, 'runs_spec.pkl'), 'wb') as handle:
pickle.dump(runs_spec, handle)
Expand All @@ -83,6 +90,7 @@ def run_all_hardware_experiments(project_name_load: str,
project_name=project_name_save,
group_name=run_spec.group_name,
run_id=run_spec.run_id,
reward_config=reward_config,
control_time_ms=control_time_ms)


Expand All @@ -91,15 +99,14 @@ def run_with_learned_policy(policy_params,
project_name: str,
group_name: str,
run_id: str,
reward_config: dict,
encode_angle: bool = True,
control_time_ms: float = 32,
):
"""
Num stacked frames: 3
"""
car_reward_kwargs = dict(encode_angle=encode_angle,
ctrl_cost_weight=0.005,
margin_factor=20)
car_reward_kwargs = reward_config

num_frame_stack = 3
action_dim = 2
Expand All @@ -109,9 +116,10 @@ def run_with_learned_policy(policy_params,
sac_kwargs=SAC_KWARGS,
x_train=jnp.zeros((10, state_dim + (num_frame_stack + 1) * action_dim)),
y_train=jnp.zeros((10, state_dim)),
x_test=jnp.zeros((10, state_dim + (num_frame_stack + 1)* action_dim)),
x_test=jnp.zeros((10, state_dim + (num_frame_stack + 1) * action_dim)),
y_test=jnp.zeros((10, state_dim)),
car_reward_kwargs=car_reward_kwargs)
car_reward_kwargs=car_reward_kwargs,
load_pretrained_bnn_model=False)
wandb.init(
project=project_name,
group=group_name,
Expand All @@ -125,7 +133,7 @@ def run_with_learned_policy(policy_params,
# env = CarEnv(encode_angle=True, num_frame_stacks=0, max_throttle=0.4,
# control_time_ms=27.9)
env = CarEnv(car_id=2, encode_angle=encode_angle, max_throttle=0.4, control_time_ms=control_time_ms,
num_frame_stacks=3)
num_frame_stacks=3, car_reward_kwargs=car_reward_kwargs)
obs, _ = env.reset()
print(obs)
observations = []
Expand Down Expand Up @@ -235,7 +243,7 @@ def run_with_learned_policy(policy_params,
wandb.log({'Trajectory_on_true_model': wandb.Image(fig)})
sim_obs = sim_obs[:state_dim]
for i in range(200):
obs = jnp.stack([sim_obs, sim_stacked_actions], axis=0)
obs = jnp.concatenate([sim_obs, sim_stacked_actions], axis=0)
sim_action = policy(obs)
# sim_action = np.array(sim_action)
z = jnp.concatenate([obs, sim_action], axis=-1)
Expand Down Expand Up @@ -296,11 +304,11 @@ def plot_error_on_the_trajectory(data):
filename_bnn_model = 'saved_data/use_sim_prior=1_use_grey_box=0_high_fidelity=0_num_offline_data' \
'=2500_share_of_x0s=0.5_train_sac_only_from_init_states=0_0.5/tshlnhs0/models/bnn_model.pkl'

with open(filename_policy, 'rb') as handle:
policy_params = pickle.load(handle)
# with open(filename_policy, 'rb') as handle:
# policy_params = pickle.load(handle)

with open(filename_bnn_model, 'rb') as handle:
bnn_model = pickle.load(handle)
# with open(filename_bnn_model, 'rb') as handle:
# bnn_model = pickle.load(handle)

# observations_for_plotting, actions_for_plotting = run_with_learned_policy(bnn_model=bnn_model,
# policy_params=policy_params,
Expand All @@ -311,8 +319,8 @@ def plot_error_on_the_trajectory(data):
# )

run_all_hardware_experiments(
project_name_load='OfflineRLHW_without_frame_stack',
project_name_save='OfflineRLHW_without_frame_stack_evaluation',
desired_config={'bandwidth_svgd': 0.2},
project_name_load='OfflineRLRunsWithGreyBox',
project_name_save='OfflineRLRunsWithGreyBox_evaluation',
desired_config={'bandwidth_svgd': 0.2, 'data_from_simulation': 0},
control_time_ms=32,
)
93 changes: 66 additions & 27 deletions experiments/offline_rl_from_recorded_data/exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sim_transfer.rl.rl_on_offline_data import RLFromOfflineData
from sim_transfer.sims.simulators import AdditiveSim, PredictStateChangeWrapper, GaussianProcessSim

ENTITY = 'sukhijab'
ENTITY = 'rojonas'


def experiment(horizon_len: int,
Expand Down Expand Up @@ -45,6 +45,7 @@ def experiment(horizon_len: int,
min_train_steps: int = 40_000,
length_scale_aditive_sim_gp: float = 1.0,
input_from_recorded_data: int = 1,
obtain_consecutive_data: int = 1,
):
bnn_train_steps = min(num_epochs * num_offline_collected_transitions, max_train_steps)
bnn_train_steps = max(bnn_train_steps, min_train_steps)
Expand All @@ -55,7 +56,8 @@ def experiment(horizon_len: int,
high_fidelity=high_fidelity,
num_offline_data=num_offline_collected_transitions,
share_of_x0s=share_of_x0s_in_sac_buffer,
train_sac_only_from_init_states=train_sac_only_from_init_states, )
train_sac_only_from_init_states=train_sac_only_from_init_states,
)
group_name = '_'.join(list(str(key) + '=' + str(value) for key, value in config_dict.items() if key != 'seed'))

car_reward_kwargs = dict(encode_angle=True,
Expand All @@ -69,7 +71,7 @@ def experiment(horizon_len: int,

SAC_KWARGS = dict(num_timesteps=sac_num_env_steps,
num_evals=20,
reward_scaling=10,
reward_scaling=1,
episode_length=horizon_len,
episode_length_eval=200,
action_repeat=1,
Expand All @@ -92,7 +94,9 @@ def experiment(horizon_len: int,
critic_hidden_layer_sizes=(64, 64),
normalize_observations=True,
deterministic_eval=True,
wandb_logging=True)
wandb_logging=True,
max_grad_norm=100,
)

config_dict = dict(horizon_len=horizon_len,
seed=seed,
Expand Down Expand Up @@ -121,7 +125,9 @@ def experiment(horizon_len: int,
max_train_steps=max_train_steps,
min_train_steps=min_train_steps,
length_scale_aditive_sim_gp=length_scale_aditive_sim_gp,
input_from_recorded_data=input_from_recorded_data
input_from_recorded_data=input_from_recorded_data,
data_from_simulation=data_from_simulation,
likelihood_exponent=likelihood_exponent,
)

total_config = SAC_KWARGS | config_dict | car_reward_kwargs
Expand All @@ -137,26 +143,53 @@ def experiment(horizon_len: int,
key = jr.PRNGKey(seed)
key, key_data_seed = jr.split(key, 2)
int_data_seed = jr.randint(key_data_seed, (), minval=0, maxval=2 ** 13 - 1)
if data_from_simulation:
source = 'racecar_from_true_input_data' if input_from_recorded_data else 'racecar_actionstack'
x_train, y_train, x_test, y_test, sim = provide_data_and_sim(
data_source=source,
data_spec={'num_samples_train': num_offline_collected_transitions,
'use_hf_sim': bool(high_fidelity),
'sampling': 'iid',
'num_stacked_actions': num_frame_stack},
data_seed=int(int_data_seed),
)
assert num_offline_collected_transitions <= 20_000, "Cannot have more than 20_000 points for training"
if bool(obtain_consecutive_data):
if bool(data_from_simulation):
source = 'racecar_from_true_input_data' if input_from_recorded_data else 'racecar_actionstack'
x_train, y_train, x_test, y_test, sim = provide_data_and_sim(
data_source=source,
data_spec={'num_samples_train': 20_000,
'use_hf_sim': bool(high_fidelity),
'sampling': 'iid',
'num_stacked_actions': num_frame_stack},
data_seed=int(int_data_seed),
)

else:
x_train, y_train, x_test, y_test, sim = provide_data_and_sim(
data_source='real_racecar_new_actionstack',
data_spec={'num_samples_train': 20_000,
'use_hf_sim': bool(high_fidelity),
'sampling': 'iid',
'num_stacked_actions': num_frame_stack,
},
data_seed=int(int_data_seed), )

x_train, y_train = x_train[:int(num_offline_collected_transitions)], \
y_train[:int(num_offline_collected_transitions)]
else:
x_train, y_train, x_test, y_test, sim = provide_data_and_sim(
data_source='real_racecar_new_actionstack',
data_spec={'num_samples_train': num_offline_collected_transitions,
'use_hf_sim': bool(high_fidelity),
'sampling': 'iid',
'num_stacked_actions': num_frame_stack,
},
data_seed=int(int_data_seed), )
if bool(data_from_simulation):
source = 'racecar_from_true_input_data' if input_from_recorded_data else 'racecar_actionstack'
x_train, y_train, x_test, y_test, sim = provide_data_and_sim(
data_source=source,
data_spec={'num_samples_train': int(num_offline_collected_transitions),
'use_hf_sim': bool(high_fidelity),
'sampling': 'iid',
'num_stacked_actions': num_frame_stack},
data_seed=int(int_data_seed),
)

else:
x_train, y_train, x_test, y_test, sim = provide_data_and_sim(
data_source='real_racecar_new_actionstack',
data_spec={'num_samples_train': int(num_offline_collected_transitions),
'use_hf_sim': bool(high_fidelity),
'sampling': 'iid',
'num_stacked_actions': num_frame_stack,
},
data_seed=int(int_data_seed), )


# Deal with randomness
key = jr.PRNGKey(seed)
Expand Down Expand Up @@ -197,7 +230,8 @@ def experiment(horizon_len: int,
function_sim=sim,
score_estimator='gp',
num_train_steps=bnn_train_steps,
num_f_samples=512,
num_f_samples=256,
lr=3e-4,
bandwidth_svgd=bandwidth_svgd,
num_measurement_points=num_measurement_points,
)
Expand All @@ -207,6 +241,7 @@ def experiment(horizon_len: int,
model = BNN_FSVGD_GreyBox(
**standard_params,
sim=sim,
lr=3e-4,
num_train_steps=bnn_train_steps,
bandwidth_svgd=bandwidth_svgd,
)
Expand All @@ -215,6 +250,7 @@ def experiment(horizon_len: int,
**standard_params,
num_train_steps=bnn_train_steps,
domain=sim.domain,
lr=3e-4,
bandwidth_svgd=bandwidth_svgd,
)

Expand Down Expand Up @@ -287,8 +323,10 @@ def main(args):
bandwidth_svgd=args.bandwidth_svgd,
num_epochs=args.num_epochs,
max_train_steps=args.max_train_steps,
min_train_steps=args.min_train_steps,
length_scale_aditive_sim_gp=args.length_scale_aditive_sim_gp,
input_from_recorded_data=args.input_from_recorded_data,
obtain_consecutive_data=args.obtain_consecutive_data,
)


Expand All @@ -308,8 +346,8 @@ def main(args):
parser.add_argument('--ctrl_diff_weight', type=float, default=0.01)
parser.add_argument('--num_offline_collected_transitions', type=int, default=20_000)
parser.add_argument('--use_sim_prior', type=int, default=0)
parser.add_argument('--use_grey_box', type=int, default=0)
parser.add_argument('--high_fidelity', type=int, default=0)
parser.add_argument('--use_grey_box', type=int, default=1)
parser.add_argument('--high_fidelity', type=int, default=1)
parser.add_argument('--num_measurement_points', type=int, default=8)
parser.add_argument('--bnn_batch_size', type=int, default=32)
parser.add_argument('--test_data_ratio', type=float, default=0.1)
Expand All @@ -318,13 +356,14 @@ def main(args):
parser.add_argument('--eval_on_all_offline_data', type=int, default=1)
parser.add_argument('--train_sac_only_from_init_states', type=int, default=0)
parser.add_argument('--likelihood_exponent', type=float, default=1.0)
parser.add_argument('--data_from_simulation', type=int, default=0)
parser.add_argument('--data_from_simulation', type=int, default=1)
parser.add_argument('--num_frame_stack', type=int, default=3)
parser.add_argument('--bandwidth_svgd', type=float, default=0.2)
parser.add_argument('--num_epochs', type=int, default=20)
parser.add_argument('--max_train_steps', type=int, default=100_000)
parser.add_argument('--min_train_steps', type=int, default=40_000)
parser.add_argument('--length_scale_aditive_sim_gp', type=float, default=1.0)
parser.add_argument('--input_from_recorded_data', type=int, default=1)
parser.add_argument('--obtain_consecutive_data', type=int, default=1)
args = parser.parse_args()
main(args)
Loading

0 comments on commit f60c138

Please sign in to comment.