From 42b22ea920beae69057bffd421cf15116788ab86 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Fri, 30 Jun 2023 09:41:52 -0400 Subject: [PATCH 1/5] manual feature selection for active sampler learning (#1490) --- .../active_sampler_learning_approach.py | 97 ++++++++++--------- predicators/ml_models.py | 3 + predicators/settings.py | 4 +- predicators/utils.py | 39 ++++++++ scripts/configs/active_sampler_learning.yaml | 54 ++++++++++- .../create_active_sampler_learning_plots.py | 12 ++- .../test_active_sampler_learning_approach.py | 18 ++-- 7 files changed, 168 insertions(+), 59 deletions(-) diff --git a/predicators/approaches/active_sampler_learning_approach.py b/predicators/approaches/active_sampler_learning_approach.py index 6f4493e129..c6b3fe2e27 100644 --- a/predicators/approaches/active_sampler_learning_approach.py +++ b/predicators/approaches/active_sampler_learning_approach.py @@ -22,8 +22,8 @@ from predicators.approaches.online_nsrt_learning_approach import \ OnlineNSRTLearningApproach from predicators.explorers import BaseExplorer, create_explorer -from predicators.ml_models import BinaryClassifierEnsemble, \ - MLPBinaryClassifier, MLPRegressor +from predicators.ml_models import BinaryClassifier, BinaryClassifierEnsemble, \ + KNeighborsClassifier, MLPBinaryClassifier, MLPRegressor from predicators.settings import CFG from predicators.structs import NSRT, Array, GroundAtom, LowLevelTrajectory, \ NSRTSampler, Object, ParameterizedOption, Predicate, Segment, State, \ @@ -133,7 +133,8 @@ def _update_sampler_data(self) -> None: continue if CFG.active_sampler_learning_model in [ - "myopic_classifier", "myopic_classifier_ensemble" + "myopic_classifier_mlp", "myopic_classifier_ensemble", + "myopic_classifier_knn" ]: label: Any = success else: @@ -159,7 +160,9 @@ def _check_option_success(self, option: _Option, segment: Segment) -> bool: def _learn_wrapped_samplers(self, online_learning_cycle: Optional[int]) -> None: """Update the NSRTs in place.""" - if CFG.active_sampler_learning_model == "myopic_classifier": + if CFG.active_sampler_learning_model in [ + "myopic_classifier_mlp", "myopic_classifier_knn" + ]: learner: _WrappedSamplerLearner = _ClassifierWrappedSamplerLearner( self._get_current_nsrts(), self._get_current_predicates(), online_learning_cycle) @@ -244,34 +247,38 @@ class _ClassifierWrappedSamplerLearner(_WrappedSamplerLearner): def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset, nsrt: NSRT) -> Tuple[NSRTSampler, NSRTSampler]: - X_classifier: List[List[Array]] = [] + X_classifier: List[Array] = [] y_classifier: List[int] = [] for state, option, _, label in nsrt_data: objects = option.objects params = option.params - # input is state features and option parameters - X_classifier.append([np.array(1.0)]) # start with bias term - for obj in objects: - X_classifier[-1].extend(state[obj]) - X_classifier[-1].extend(params) - assert not CFG.sampler_learning_use_goals + x_arr = utils.construct_active_sampler_input( + state, objects, params, option.parent) + X_classifier.append(x_arr) y_classifier.append(label) X_arr_classifier = np.array(X_classifier) # output is binary signal y_arr_classifier = np.array(y_classifier) - classifier = MLPBinaryClassifier( - seed=CFG.seed, - balance_data=CFG.mlp_classifier_balance_data, - max_train_iters=CFG.sampler_mlp_classifier_max_itr, - learning_rate=CFG.learning_rate, - weight_decay=CFG.weight_decay, - use_torch_gpu=CFG.use_torch_gpu, - train_print_every=CFG.pytorch_train_print_every, - n_iter_no_change=CFG.mlp_classifier_n_iter_no_change, - hid_sizes=CFG.mlp_classifier_hid_sizes, - n_reinitialize_tries=CFG. - sampler_mlp_classifier_n_reinitialize_tries, - weight_init="default") + if CFG.active_sampler_learning_model.endswith("mlp"): + classifier: BinaryClassifier = MLPBinaryClassifier( + seed=CFG.seed, + balance_data=CFG.mlp_classifier_balance_data, + max_train_iters=CFG.sampler_mlp_classifier_max_itr, + learning_rate=CFG.learning_rate, + weight_decay=CFG.weight_decay, + use_torch_gpu=CFG.use_torch_gpu, + train_print_every=CFG.pytorch_train_print_every, + n_iter_no_change=CFG.mlp_classifier_n_iter_no_change, + hid_sizes=CFG.mlp_classifier_hid_sizes, + n_reinitialize_tries=CFG. + sampler_mlp_classifier_n_reinitialize_tries, + weight_init="default") + else: + assert CFG.active_sampler_learning_model.endswith("knn") + n_neighbors = min(len(X_arr_classifier), + CFG.active_sampler_learning_knn_neighbors) + classifier = KNeighborsClassifier(seed=CFG.seed, + n_neighbors=n_neighbors) classifier.fit(X_arr_classifier, y_arr_classifier) # Save the sampler classifier for external analysis. @@ -281,6 +288,11 @@ def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset, with open(save_path, "wb") as f: pkl.dump(classifier, f) logging.info(f"Saved sampler classifier to {save_path}.") + save_path = f"{approach_save_path}_{nsrt.name}_" + \ + f"{self._online_learning_cycle}.sampler_classifier_data" + with open(save_path, "wb") as f: + pkl.dump((X_arr_classifier, y_arr_classifier), f) + logging.info(f"Saved sampler classifier data to {save_path}.") # Easiest way to access the base sampler. base_sampler = nsrt._sampler # pylint: disable=protected-access @@ -297,17 +309,14 @@ class _ClassifierEnsembleWrappedSamplerLearner(_WrappedSamplerLearner): def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset, nsrt: NSRT) -> Tuple[NSRTSampler, NSRTSampler]: - X_classifier: List[List[Array]] = [] + X_classifier: List[Array] = [] y_classifier: List[int] = [] for state, option, _, label in nsrt_data: objects = option.objects params = option.params - # input is state features and option parameters - X_classifier.append([np.array(1.0)]) # start with bias term - for obj in objects: - X_classifier[-1].extend(state[obj]) - X_classifier[-1].extend(params) - assert not CFG.sampler_learning_use_goals + x_arr = utils.construct_active_sampler_input( + state, objects, params, option.parent) + X_classifier.append(x_arr) y_classifier.append(label) X_arr_classifier = np.array(X_classifier) # output is binary signal @@ -443,17 +452,14 @@ def _sample_options_from_state(self, return sampled_options def _fit_regressor(self, nsrt_data: _OptionSamplerDataset) -> MLPRegressor: - X_regressor: List[List[Array]] = [] + X_regressor: List[Array] = [] y_regressor: List[Array] = [] for state, option, _, target in nsrt_data: objects = option.objects params = option.params - # input is state features and option parameters - X_regressor.append([np.array(1.0)]) # start with bias term - for obj in objects: - X_regressor[-1].extend(state[obj]) - X_regressor[-1].extend(params) - assert not CFG.sampler_learning_use_goals + x_arr = utils.construct_active_sampler_input( + state, objects, params, option.parent) + X_regressor.append(x_arr) y_regressor.append(np.array([target])) X_arr_regressor = np.array(X_regressor) y_arr_regressor = np.array(y_regressor) @@ -500,19 +506,18 @@ def _vector_score_fn_to_score_fn(vector_fn: Callable[[Array], float], def _score_fn(state: State, objects: Sequence[Object], param_lst: List[Array]) -> List[float]: - x_lst: List[Any] = [1.0] # start with bias term - sub = dict(zip(nsrt.parameters, objects)) - for var in nsrt.parameters: - x_lst.extend(state[sub[var]]) - assert not CFG.sampler_learning_use_goals - x = np.array(x_lst) - scores = [vector_fn(np.r_[x, p]) for p in param_lst] + xs = [ + utils.construct_active_sampler_input(state, objects, p, + nsrt.option) + for p in param_lst + ] + scores = [vector_fn(x) for x in xs] return scores return _score_fn -def _classifier_to_score_fn(classifier: MLPBinaryClassifier, +def _classifier_to_score_fn(classifier: BinaryClassifier, nsrt: NSRT) -> _ScoreFn: return _vector_score_fn_to_score_fn(classifier.predict_proba, nsrt) diff --git a/predicators/ml_models.py b/predicators/ml_models.py index 724f76ba73..a40ed1a257 100644 --- a/predicators/ml_models.py +++ b/predicators/ml_models.py @@ -278,6 +278,9 @@ def classify(self, x: Array) -> bool: def predict_proba(self, x: Array) -> float: probs = self._model.predict_proba([x])[0] + # Special case: only one class. + if probs.shape == (1, ): + return float(self.classify(x)) assert probs.shape == (2, ) # [P(x is class 0), P(x is class 1)] return probs[1] # return the second element of probs diff --git a/predicators/settings.py b/predicators/settings.py index b3a7660c55..76021d005b 100644 --- a/predicators/settings.py +++ b/predicators/settings.py @@ -518,7 +518,9 @@ class GlobalSettings: online_learning_max_novelty_count = 0 # active sampler learning parameters - active_sampler_learning_model = "myopic_classifier" + active_sampler_learning_model = "myopic_classifier_mlp" + active_sampler_learning_feature_selection = "all" + active_sampler_learning_knn_neighbors = 3 active_sampler_learning_use_teacher = True active_sampler_learning_num_samples = 100 active_sampler_learning_score_gamma = 0.5 diff --git a/predicators/utils.py b/predicators/utils.py index 03e8ef3153..92025cbe4a 100644 --- a/predicators/utils.py +++ b/predicators/utils.py @@ -255,6 +255,45 @@ def create_json_dict_from_task(task: Task) -> Dict[str, Any]: return {"objects": object_dict, "init": init_dict, "goal": goal_dict} +def construct_active_sampler_input(state: State, objects: Sequence[Object], + params: Array, + param_option: ParameterizedOption) -> Array: + """Helper function for active sampler learning and explorer.""" + + assert not CFG.sampler_learning_use_goals + sampler_input_lst = [1.0] # start with bias term + if CFG.active_sampler_learning_feature_selection == "all": + for obj in objects: + sampler_input_lst.extend(state[obj]) + sampler_input_lst.extend(params) + + else: + assert CFG.active_sampler_learning_feature_selection == "oracle" + assert CFG.env == "bumpy_cover" + if param_option.name == "Pick": + # In this case, the x-data should be + # [block_bumpy, relative_pick_loc] + assert len(objects) == 1 + block = objects[0] + block_pos = state[block][3] + block_bumpy = state[block][5] + sampler_input_lst.append(block_bumpy) + assert len(params) == 1 + sampler_input_lst.append(params[0] - block_pos) + else: + assert param_option.name == "Place" + assert len(objects) == 2 + block, target = objects + target_pos = state[target][3] + grasp = state[block][4] + target_width = state[target][2] + sampler_input_lst.extend([grasp, target_width]) + assert len(params) == 1 + sampler_input_lst.append(params[0] - target_pos) + + return np.array(sampler_input_lst) + + class _Geom2D(abc.ABC): """A 2D shape that contains some points.""" diff --git a/scripts/configs/active_sampler_learning.yaml b/scripts/configs/active_sampler_learning.yaml index 696a790f5c..32f95e51e5 100644 --- a/scripts/configs/active_sampler_learning.yaml +++ b/scripts/configs/active_sampler_learning.yaml @@ -33,15 +33,15 @@ NUM_SEEDS: 10 # with teacher. --- APPROACHES: - myopic_classifier: + myopic_classifier_mlp: NAME: "active_sampler_learning" FLAGS: - active_sampler_learning_model: "myopic_classifier" + active_sampler_learning_model: "myopic_classifier_mlp" active_sampler_learning_use_teacher: False teacher_classifier: NAME: "active_sampler_learning" FLAGS: - active_sampler_learning_model: "myopic_classifier" + active_sampler_learning_model: "myopic_classifier_mlp" active_sampler_learning_use_teacher: True fitted_q: NAME: "active_sampler_learning" @@ -75,7 +75,7 @@ NUM_SEEDS: 10 # Experiment comparing random_nsrts exploration to active_sampler exploration # both with and without an ensemble in the regional bumpy cover environment. # Use fewer online learning requests per cycle because we're only learning -the pick bumpy classifier. +# the pick bumpy classifier. --- APPROACHES: # No ensemble approach @@ -123,3 +123,49 @@ FLAGS: num_online_learning_cycles: 25 START_SEED: 123 NUM_SEEDS: 10 + +# Compare manual feature design to default feature design, with MLP vs KNN. +--- +APPROACHES: + mlp_manual_features: + NAME: "active_sampler_learning" + FLAGS: + active_sampler_learning_model: "myopic_classifier_mlp" + active_sampler_learning_feature_selection: "oracle" + mlp_all_features: + NAME: "active_sampler_learning" + FLAGS: + active_sampler_learning_model: "myopic_classifier_mlp" + active_sampler_learning_feature_selection: "all" + knn_manual_features: + NAME: "active_sampler_learning" + FLAGS: + active_sampler_learning_model: "myopic_classifier_knn" + active_sampler_learning_feature_selection: "oracle" + knn_all_features: + NAME: "active_sampler_learning" + FLAGS: + active_sampler_learning_model: "myopic_classifier_knn" + active_sampler_learning_feature_selection: "all" +ENVS: + bumpy_cover: + NAME: "bumpy_cover" +ARGS: + - "debug" +FLAGS: + active_sampler_learning_use_teacher: False + strips_learner: "oracle" + sampler_learner: "oracle" + bilevel_plan_without_sim: "True" + max_initial_demos: 0 + num_train_tasks: 1000 + num_test_tasks: 100 + max_num_steps_interaction_request: 50 + sampler_mlp_classifier_max_itr: 100000 + mlp_classifier_balance_data: False + pytorch_train_print_every: 10000 + explorer: "random_nsrts" # NOTE + online_nsrt_learning_requests_per_cycle: 1 + num_online_learning_cycles: 50 +START_SEED: 123 +NUM_SEEDS: 10 diff --git a/scripts/plotting/create_active_sampler_learning_plots.py b/scripts/plotting/create_active_sampler_learning_plots.py index 49868be04d..be0fbd834b 100644 --- a/scripts/plotting/create_active_sampler_learning_plots.py +++ b/scripts/plotting/create_active_sampler_learning_plots.py @@ -58,7 +58,7 @@ ], "Shifted Bumpy Cover": [ ("Myopic Classifier", "green", lambda df: df["EXPERIMENT_ID"].apply( - lambda v: "bumpy_cover-myopic_classifier" in v)), + lambda v: "bumpy_cover-myopic_classifier_mlp" in v)), ("Fitted Q", "purple", lambda df: df["EXPERIMENT_ID"].apply( lambda v: "bumpy_cover-fitted_q" in v)), ("Teacher Classifier", "brown", lambda df: df["EXPERIMENT_ID"].apply( @@ -72,6 +72,16 @@ ("Random Explore", "red", lambda df: df["EXPERIMENT_ID"].apply( lambda v: "regional_bumpy_cover-random-explore" in v)), ], + "Bumpy Cover Feature Design": [ + ("All Feats (MLP)", "black", lambda df: df["EXPERIMENT_ID"].apply( + lambda v: "bumpy_cover-mlp_all_features" in v)), + ("Oracle Feats (MLP)", "green", lambda df: df["EXPERIMENT_ID"].apply( + lambda v: "bumpy_cover-mlp_manual_features" in v)), + ("All Feats (KNN)", "blue", lambda df: df["EXPERIMENT_ID"].apply( + lambda v: "bumpy_cover-knn_all_features" in v)), + ("Oracle Feats (KNN)", "red", lambda df: df["EXPERIMENT_ID"].apply( + lambda v: "bumpy_cover-knn_manual_features" in v)), + ], } # If True, add (0, 0) to every plot. diff --git a/tests/approaches/test_active_sampler_learning_approach.py b/tests/approaches/test_active_sampler_learning_approach.py index a1e76d9ccd..6e310ac45c 100644 --- a/tests/approaches/test_active_sampler_learning_approach.py +++ b/tests/approaches/test_active_sampler_learning_approach.py @@ -16,18 +16,22 @@ from predicators.teacher import Teacher -@pytest.mark.parametrize("model_name,right_targets,num_demo", - [("myopic_classifier", False, 0), - ("myopic_classifier", True, 1), - ("myopic_classifier_ensemble", False, 0), - ("myopic_classifier_ensemble", False, 1), - ("fitted_q", False, 0), ("fitted_q", True, 0)]) -def test_active_sampler_learning_approach(model_name, right_targets, num_demo): +@pytest.mark.parametrize("model_name,right_targets,num_demo,feat_type", + [("myopic_classifier_mlp", False, 0, "all"), + ("myopic_classifier_mlp", True, 1, "all"), + ("myopic_classifier_ensemble", False, 0, "all"), + ("myopic_classifier_ensemble", False, 1, "all"), + ("fitted_q", False, 0, "all"), + ("fitted_q", True, 0, "all"), + ("myopic_classifier_knn", False, 0, "oracle")]) +def test_active_sampler_learning_approach(model_name, right_targets, num_demo, + feat_type): """Test for ActiveSamplerLearningApproach class, entire pipeline.""" utils.reset_config({ "env": "bumpy_cover", "approach": "active_sampler_learning", "active_sampler_learning_model": model_name, + "active_sampler_learning_feature_selection": feat_type, "timeout": 10, "strips_learner": "oracle", "sampler_learner": "oracle", From c6f95e96590356e675b0905fb4d9174cfec03696 Mon Sep 17 00:00:00 2001 From: Lilian Luong <43945489+lilianluong@users.noreply.github.com> Date: Tue, 4 Jul 2023 18:48:58 -0700 Subject: [PATCH 2/5] Refinement cost learning changes (#1492) * [refinement estimation] collect training data by timing refinement for each abstract action separately * [refinement estimation] GNN refinement estimator * [refinement estimation] formatting / minor changes to pass checks * [refinement estimation] shuffle training data + add weight decay for GNN estimator * [refinement estimation] disable normalization of some parts of the graph if desired * [refinement estimation] disable normalization of some parts of the graph if desired * [gnn refinement] autoformat/typing * [refinement learning] tests for GNNRefinementEstimator * [gnn refinement] improved tests for GNNRefinementEstimator * [gnn refinement] Encode the NSRT parameters in GNNRefinementEstimator graph inputs * [gnn refinement] minor edits after PR review * [gnn refinement] autoformat * [gnn refinement] autoformat * [exit garage] simplified exit_garage by compressing PickupObstacle and StoreObstacle into one option/nsrt ClearObstacle * [exit_garage] plot carried obstacle on top of others * [refinement learning] include execution cost in cost estimation, generate all skeletons before collecting refinement data * [exit_garage] bugfixes/coverage tests/autoformatting for exit_garage * [scripts] supercloud launch scripts/configs for refinement estimation * [supercloud scripts] Add SKIP argument to envs and approaches in supercloud configs * [refinement estimation] Option to save dataset at intervals while collecting, using flag --refinement_data_save_every * [refinement learning] option to pass in a number of data points to use instead of the full dataset * [refinement learning] actually pass in a fraction of the dataset to use during training, not an absolute number * [refinement learning] Added supercloud launch config for experiments/evaluation for refinement cost learning * [refinement learning] fix scripts, add print_every for CNN training * [scripts] local launch.py entry point addition + use_torch_gpu true for refinement learning * [refinement learning] add gnn specs to fixed_passage experiment * [refinement learning] fix for cnn with gpu * [gnn refinement] fix for gnn with gpu * [gnn refinement] coverage test fix for gnn with GPU * [gpu usage] reassign CUDA_VISIBLE_DEVICES if necessary in utils.py, revert CNNRefinementEstimator 'fix' * [gpu usage] ignore coverage tests for the CUDA_VISIBLE_DEVICES fix * [gnn_utils] handle empty edges, nodes, globals automatically * [exit_garage] exit_garage adjustments * minor changes * [refinement learning] learn to output time and low level action count separately then combine in get_cost() * [gnn refinement] bugfix * [refinement learning] refinement cost in results analysis * [refinement learning] bugfix * [refinement learning] bugfix * updated refinement cost learning config files * hacky fix for the delay issue for the first test of GNN/CNN estimators * checks * bugfix * one fix * fix segmentation * add back --------- Co-authored-by: Tom Silver --- .../approaches/bilevel_planning_approach.py | 2 +- predicators/envs/exit_garage.py | 39 +- predicators/gnn/gnn.py | 9 +- predicators/gnn/gnn_utils.py | 81 ++-- .../ground_truth_models/exit_garage/nsrts.py | 40 +- .../exit_garage/options.py | 120 +++--- predicators/main.py | 8 + predicators/nsrt_learning/segmentation.py | 4 +- predicators/planning.py | 52 ++- .../base_refinement_estimator.py | 3 + .../cnn_refinement_estimator.py | 19 +- .../gnn_refinement_estimator.py | 364 ++++++++++++++++++ .../oracle_refinement_estimator.py | 21 +- .../per_skeleton_estimator.py | 4 + .../tabular_refinement_estimator.py | 32 +- predicators/settings.py | 13 +- predicators/structs.py | 2 +- predicators/train_refinement_estimator.py | 112 ++++-- predicators/utils.py | 8 + scripts/analyze_results_directory.py | 1 + scripts/cluster_utils.py | 15 +- scripts/configs/refinement_cost_learning.yaml | 76 ++++ .../refinement_cost_learning_test.yaml | 83 ++++ scripts/local/launch.py | 6 +- scripts/supercloud/launch.py | 10 +- scripts/supercloud/submit_supercloud_job.py | 10 +- tests/approaches/test_oracle_approach.py | 4 +- tests/envs/test_exit_garage.py | 89 ++--- tests/nsrt_learning/test_segmentation.py | 2 +- .../test_base_refinement_estimator.py | 2 + .../test_cnn_refinement_estimator.py | 15 +- .../test_gnn_refinement_estimator.py | 220 +++++++++++ .../test_oracle_refinement_estimator.py | 25 +- .../test_tabular_refinement_estimator.py | 26 +- tests/test_train_refinement_estimator.py | 33 +- 35 files changed, 1198 insertions(+), 352 deletions(-) create mode 100644 predicators/refinement_estimators/gnn_refinement_estimator.py create mode 100644 scripts/configs/refinement_cost_learning.yaml create mode 100644 scripts/configs/refinement_cost_learning_test.yaml create mode 100644 tests/refinement_estimators/test_gnn_refinement_estimator.py diff --git a/predicators/approaches/bilevel_planning_approach.py b/predicators/approaches/bilevel_planning_approach.py index e101f7726a..6466e5bde4 100644 --- a/predicators/approaches/bilevel_planning_approach.py +++ b/predicators/approaches/bilevel_planning_approach.py @@ -150,7 +150,7 @@ def _save_metrics(self, metrics: Metrics, nsrts: Set[NSRT], for metric in [ "num_samples", "num_skeletons_optimized", "num_failures_discovered", "num_nodes_expanded", - "num_nodes_created", "plan_length" + "num_nodes_created", "plan_length", "refinement_time" ]: self._metrics[f"total_{metric}"] += metrics[metric] self._metrics["total_num_nsrts"] += len(nsrts) diff --git a/predicators/envs/exit_garage.py b/predicators/envs/exit_garage.py index 4f5fb2c82e..2e5effc213 100644 --- a/predicators/envs/exit_garage.py +++ b/predicators/envs/exit_garage.py @@ -51,7 +51,7 @@ class ExitGarageEnv(BaseEnv): robot_starting_y: ClassVar[float] = 0.8 obstacle_area_left_padding: ClassVar[float] = 0.4 obstacle_area_right_padding: ClassVar[float] = 0.1 - obstacle_area_vertical_padding: ClassVar[float] = 0.1 + obstacle_area_vertical_padding: ClassVar[float] = 0.05 car_starting_x: ClassVar[float] = 0.15 car_starting_y: ClassVar[float] = 0.3 @@ -64,7 +64,7 @@ class ExitGarageEnv(BaseEnv): _robot_type = Type("robot", ["x", "y", "carrying"]) # carrying: bool _obstacle_type = Type("obstacle", ["x", "y", "carried"]) # carried: bool # Convenience type for storage area, storing number of obstacles in it - # This is used in the StoreObstacle option to calculate where to place the + # This is used in the ClearObstacle option to calculate where to place the # a new obstacle in the storage area. _storage_type = Type("storage", ["num_stored"]) @@ -73,12 +73,6 @@ def __init__(self, use_gui: bool = True) -> None: # Predicates self._CarHasExited = Predicate("CarHasExited", [self._car_type], self._CarHasExited_holds) - self._CarryingObstacle = Predicate( - "CarryingObstacle", [self._robot_type, self._obstacle_type], - self._CarryingObstacle_holds) - self._NotCarryingObstacle = Predicate("NotCarryingObstacle", - [self._robot_type], - self._NotCarryingObstacle_holds) self._ObstacleCleared = Predicate("ObstacleCleared", [self._obstacle_type], self._ObstacleCleared_holds) @@ -150,8 +144,7 @@ def simulate(self, state: State, action: Action) -> State: else: # Place the current obstacle if in storage area and there is # no collision caused by doing so - if ry > 1.0 - self.storage_area_height and not \ - self._placed_object_collides(state, rx, ry): + if ry > 1.0 - self.storage_area_height: next_state.set(carried_obstacle, "x", rx) next_state.set(carried_obstacle, "y", ry) next_state.set(carried_obstacle, "carried", 0) @@ -179,9 +172,7 @@ def _generate_test_tasks(self) -> List[EnvironmentTask]: @property def predicates(self) -> Set[Predicate]: return { - self._CarHasExited, self._CarryingObstacle, - self._NotCarryingObstacle, self._ObstacleCleared, - self._ObstacleNotCleared + self._CarHasExited, self._ObstacleCleared, self._ObstacleNotCleared } @property @@ -232,6 +223,7 @@ def render_state_plt( self._exit_geom.plot(ax, color=exit_color) # Draw obstacles + carried_obstacle_geom: Optional[utils.Circle] = None for obstacle in state.get_objects(self._obstacle_type): if state.get(obstacle, "carried") == 1: # Obstacle is being carried, so draw it under the robot instead @@ -240,11 +232,12 @@ def render_state_plt( robot_y = state.get(self._robot, "y") carried_obstacle_geom = utils.Circle(robot_x, robot_y, self.obstacle_radius) - carried_obstacle_geom.plot(ax, color=carried_color) else: # Obstacle is not being carried, just draw normally obstacle_geom = self._object_to_geom(obstacle, state) obstacle_geom.plot(ax, color=obstacle_color) + if carried_obstacle_geom: + carried_obstacle_geom.plot(ax, color=carried_color) # Draw robot robot_geom = self._object_to_geom(self._robot, state) @@ -333,18 +326,6 @@ def _CarHasExited_holds(self, state: State, car_geom = self._object_to_geom(car, state) return car_geom.intersects(self._exit_geom) - def _CarryingObstacle_holds(self, state: State, - objects: Sequence[Object]) -> bool: - robot, obstacle = objects - robot_carrying_something = state.get(robot, "carrying") == 1 - obstacle_is_carried = state.get(obstacle, "carried") == 1 - return robot_carrying_something and obstacle_is_carried - - def _NotCarryingObstacle_holds(self, state: State, - objects: Sequence[Object]) -> bool: - robot, = objects - return state.get(robot, "carrying") == 0 - def _ObstacleCleared_holds(self, state: State, objects: Sequence[Object]) -> bool: obstacle, = objects @@ -398,12 +379,6 @@ def get_car_collision_object(cls, state: State) -> Optional[Object]: return obstacle return None - @classmethod - def _placed_object_collides(cls, state: State, new_x: float, - new_y: float) -> bool: - """Returns True if an obstacle placed at (new_x, new_y) would collide - with an existing obstacle in the storage area.""" - @classmethod def _robot_carrying_obstacle(cls, state: State) -> Optional[Object]: """If the robot is currently carrying an obstacle, return it; else diff --git a/predicators/gnn/gnn.py b/predicators/gnn/gnn.py index be0e97e6f8..fc3d7e51a4 100755 --- a/predicators/gnn/gnn.py +++ b/predicators/gnn/gnn.py @@ -45,7 +45,7 @@ def _aggregation_func(graph: Dict) -> Tuple[torch.Tensor, Array]: def _prepare_receiver_matrix(graph: Dict) -> torch.Tensor: num_nodes = graph['nodes'].size()[0] - columns = torch.arange(0, num_nodes).long() + columns = torch.arange(0, num_nodes).long().to(graph['nodes'].device) rec_m = graph['receivers'].view(-1)[:, None] == columns return rec_m.float() @@ -53,10 +53,11 @@ def _prepare_receiver_matrix(graph: Dict) -> torch.Tensor: def _aggregate_globals(graph: Dict, global_node_idxs: Array, global_edge_idxs: Array) -> torch.Tensor: num_graphs = graph['globals'].size()[0] - columns = torch.arange(0, num_graphs).long() + device = graph['globals'].device + columns = torch.arange(0, num_graphs).long().to(device) - node_idxs = torch.LongTensor(global_node_idxs)[:, None] - edge_idxs = torch.LongTensor(global_edge_idxs)[:, None] + node_idxs = torch.LongTensor(global_node_idxs)[:, None].to(device) + edge_idxs = torch.LongTensor(global_edge_idxs)[:, None].to(device) nodes_agg = torch.mm(graph['nodes'].t(), (node_idxs == columns).float()).t() diff --git a/predicators/gnn/gnn_utils.py b/predicators/gnn/gnn_utils.py index 103c5e5ebc..3a6f069a04 100755 --- a/predicators/gnn/gnn_utils.py +++ b/predicators/gnn/gnn_utils.py @@ -5,7 +5,7 @@ import collections import logging import time -from typing import Any, Callable, Dict, List, OrderedDict, Tuple +from typing import Any, Callable, Dict, List, Optional, OrderedDict, Tuple import numpy as np import torch @@ -14,13 +14,17 @@ from predicators.structs import Array -def train_model(model: Any, dataloaders: Dict, - optimizer: torch.optim.Optimizer, - criterion: Callable[[torch.Tensor, torch.Tensor], - torch.Tensor], - global_criterion: Callable[[torch.Tensor, torch.Tensor], - torch.Tensor], num_epochs: int, - do_validation: bool) -> OrderedDict[str, torch.Tensor]: +def train_model( + model: Any, + dataloaders: Dict, + optimizer: torch.optim.Optimizer, + criterion: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]], + global_criterion: Optional[Callable[[torch.Tensor, torch.Tensor], + torch.Tensor]], + num_epochs: int, + do_validation: bool, + device: Optional[torch.device] = None, +) -> OrderedDict[str, torch.Tensor]: """Optimize the model and save checkpoints.""" since = time.perf_counter() @@ -32,7 +36,7 @@ def train_model(model: Any, dataloaders: Dict, for epoch in range(num_epochs): if epoch % 100 == 0: - logging.info(f'Epoch {epoch}/{num_epochs-1}') + logging.info(f'Epoch {epoch}/{num_epochs - 1}') logging.info('-' * 10) # Each epoch has a training and validation phase if epoch % 100 == 0 and do_validation: @@ -64,6 +68,8 @@ def train_model(model: Any, dataloaders: Dict, output = outputs[-1] loss = torch.tensor(0.0) + if device is not None: + loss = loss.to(device) if criterion is not None: loss += criterion(output['nodes'], targets['nodes']) @@ -84,7 +90,7 @@ def train_model(model: Any, dataloaders: Dict, logging.info(f"running_loss: {running_loss}") if do_validation and \ - running_loss['val'] < best_seen_running_validation_loss: + running_loss['val'] < best_seen_running_validation_loss: best_seen_running_validation_loss = running_loss['val'] best_seen_model_weights = model.state_dict() best_seen_model_train_loss = running_loss['train'] @@ -110,7 +116,12 @@ def train_model(model: Any, dataloaders: Dict, return best_seen_model_weights -def compute_normalizers(data: List[Dict]) -> Dict[str, Tuple[Array, Array]]: +def compute_normalizers( + data: List[Dict], + normalize_nodes: bool = True, + normalize_edges: bool = True, + normalize_globals: bool = True, +) -> Dict[str, Tuple[Array, Array]]: """Compute the normalizers of the given list of graphs. These can be passed into normalize_graph. @@ -123,14 +134,17 @@ def compute_normalizers(data: List[Dict]) -> Dict[str, Tuple[Array, Array]]: node_data = np.array(node_data_lst) edge_data = np.array(edge_data_lst) global_data = np.array(global_data_lst) - node_normalizers = _compute_normalizer_array(node_data) - edge_normalizers = _compute_normalizer_array(edge_data) - global_normalizers = _compute_normalizer_array(global_data) - return { - "nodes": node_normalizers, - "edges": edge_normalizers, - "globals": global_normalizers - } + normalizers = {} + if normalize_nodes and len(node_data): + node_normalizers = _compute_normalizer_array(node_data) + normalizers["nodes"] = node_normalizers + if normalize_edges and len(edge_data): + edge_normalizers = _compute_normalizer_array(edge_data) + normalizers["edges"] = edge_normalizers + if normalize_globals and len(global_data): + global_normalizers = _compute_normalizer_array(global_data) + normalizers["globals"] = global_normalizers + return normalizers def _compute_normalizer_array(array_data: Array) -> Tuple[Array, Array]: @@ -157,7 +171,6 @@ def normalize_graph(graph: Dict, if k in normalizers: new_graph[k] = transform(graph[k], normalizers[k]) else: - assert k in ['n_node', 'n_edge', 'senders', 'receivers'] new_graph[k] = graph[k] return new_graph @@ -176,11 +189,13 @@ def _invert_normalize_array(array_data: Array, return (array_data * scale) + shift -def get_single_model_prediction(model: Any, single_input: Dict) -> Dict: +def get_single_model_prediction(model: Any, + single_input: Dict, + device: Optional[torch.device] = None) -> Dict: """Get a prediction from the given model on the given input.""" model.train(False) model.eval() - inputs = _create_super_graph([single_input]) + inputs = _create_super_graph([single_input], device=device) outputs = model(inputs.copy()) graphs = split_graphs(_convert_to_data(outputs[-1])) assert len(graphs) == 1 @@ -216,7 +231,7 @@ def _compute_stacked_offsets(sizes: List[Array], def _convert_to_data(graph: Dict) -> Dict: for key in graph.keys(): if graph[key] is not None: - graph[key] = graph[key].data + graph[key] = graph[key].cpu().data return graph @@ -337,7 +352,8 @@ def __getitem__(self, idx: int) -> Dict: return sample -def _create_super_graph(batches: List[Dict]) -> Dict: +def _create_super_graph(batches: List[Dict], + device: Optional[torch.device] = None) -> Dict: nodes = batches[0]['nodes'] edges = batches[0]['edges'] receivers = batches[0]['receivers'][:, None] @@ -362,7 +378,7 @@ def _create_super_graph(batches: List[Dict]) -> Dict: num_nodes = np.vstack((num_nodes, b['n_node'])) num_edges = np.vstack((num_edges, b['n_edge'])) - return { + super_graph = { 'n_node': torch.from_numpy(num_nodes), 'n_edge': @@ -378,15 +394,26 @@ def _create_super_graph(batches: List[Dict]) -> Dict: 'globals': (torch.from_numpy(globals_).float().requires_grad_() if globals_ is not None else None), } + # Convert Tensors to device + if device is not None: + for key, val in super_graph.items(): + super_graph[key] = val.to(device) if val is not None else val + return super_graph -def graph_batch_collate(batch: List[Dict]) -> Dict: +def graph_batch_collate(batch: List[Dict], + device: Optional[torch.device] = None) -> Dict: """Collate the given batch of graphs. Assumes batch is a dictionary where each key contains a list of graphs. """ return { - key: _create_super_graph([d[key] for d in batch]) + key: _create_super_graph([d[key] for d in batch], device=device) for key in batch[0] } + + +def get_graph_batch_collate_with_device(device: torch.device) -> Callable: + """Return a graph_batch_collate function that is given a device.""" + return lambda batch: graph_batch_collate(batch, device=device) diff --git a/predicators/ground_truth_models/exit_garage/nsrts.py b/predicators/ground_truth_models/exit_garage/nsrts.py index 237b986a33..272c467308 100644 --- a/predicators/ground_truth_models/exit_garage/nsrts.py +++ b/predicators/ground_truth_models/exit_garage/nsrts.py @@ -27,15 +27,12 @@ def get_nsrts(env_name: str, types: Dict[str, Type], # Predicates CarHasExited = predicates["CarHasExited"] - CarryingObstacle = predicates["CarryingObstacle"] - NotCarryingObstacle = predicates["NotCarryingObstacle"] ObstacleCleared = predicates["ObstacleCleared"] ObstacleNotCleared = predicates["ObstacleNotCleared"] # Options DriveCarToExit = options["DriveCarToExit"] - PickupObstacle = options["PickupObstacle"] - StoreObstacle = options["StoreObstacle"] + ClearObstacle = options["ClearObstacle"] nsrts = set() @@ -63,50 +60,25 @@ def random_sampler(state: State, goal: Set[GroundAtom], option_vars, random_sampler) nsrts.add(drive_car_to_exit_nsrt) - # PickupObstacle + # ClearObstacle robot = Variable("?robot", robot_type) obstacle = Variable("?obstacle", obstacle_type) parameters = [robot, obstacle] option_vars = [robot, obstacle] - option = PickupObstacle + option = ClearObstacle preconditions = { - LiftedAtom(NotCarryingObstacle, [robot]), LiftedAtom(ObstacleNotCleared, [obstacle]), } add_effects = { - LiftedAtom(CarryingObstacle, [robot, obstacle]), - } - delete_effects = { - LiftedAtom(NotCarryingObstacle, [robot]), - LiftedAtom(ObstacleNotCleared, [obstacle]), - } - ignore_effects = set() - pickup_obstacle_nsrt = NSRT("PickupObstacle", parameters, - preconditions, add_effects, delete_effects, - ignore_effects, option, option_vars, - random_sampler) - nsrts.add(pickup_obstacle_nsrt) - - # StoreObstacle - robot = Variable("?robot", robot_type) - obstacle = Variable("?obstacle", obstacle_type) - parameters = [robot, obstacle] - option_vars = [robot, obstacle] - option = StoreObstacle - preconditions = { - LiftedAtom(CarryingObstacle, [robot, obstacle]), - } - add_effects = { - LiftedAtom(NotCarryingObstacle, [robot]), LiftedAtom(ObstacleCleared, [obstacle]), } delete_effects = { - LiftedAtom(CarryingObstacle, [robot, obstacle]), + LiftedAtom(ObstacleNotCleared, [obstacle]), } ignore_effects = set() - store_obstacle_nsrt = NSRT("StoreObstacle", parameters, preconditions, + clear_obstacle_nsrt = NSRT("ClearObstacle", parameters, preconditions, add_effects, delete_effects, ignore_effects, option, option_vars, random_sampler) - nsrts.add(store_obstacle_nsrt) + nsrts.add(clear_obstacle_nsrt) return nsrts diff --git a/predicators/ground_truth_models/exit_garage/options.py b/predicators/ground_truth_models/exit_garage/options.py index 95dac21647..5da18fbb8e 100644 --- a/predicators/ground_truth_models/exit_garage/options.py +++ b/predicators/ground_truth_models/exit_garage/options.py @@ -31,8 +31,6 @@ def get_options(cls, env_name: str, types: Dict[str, Type], storage_type = types["storage"] CarHasExited = predicates["CarHasExited"] - CarryingObstacle = predicates["CarryingObstacle"] - NotCarryingObstacle = predicates["NotCarryingObstacle"] ObstacleCleared = predicates["ObstacleCleared"] ObstacleNotCleared = predicates["ObstacleNotCleared"] @@ -71,7 +69,14 @@ def _goal_fn(pt: Array) -> bool: target_y = 0.4 - ExitGarageEnv.exit_width / 2 target_theta = 0 if CFG.exit_garage_motion_planning_ignore_obstacles: - cls._plan_direct(state, memory, params, car, + start_pos_list = [ + state.get(car, "x"), + state.get(car, "y"), + ] + start_position = np.array(start_pos_list) + memory["action_plan"] = [] + memory["position_plan"] = [] + cls._plan_direct(memory, params, start_position, np.array([target_x, target_y]), 0, 1) return True success = cls._run_rrt(state, @@ -92,80 +97,68 @@ def _goal_fn(pt: Array) -> bool: terminal=_DriveCarToExit_terminal, ) - # PickupObstacle - def _PickupObstacle_terminal(state: State, memory: Dict, - objects: Sequence[Object], - params: Array) -> bool: - del memory, params # unused - return CarryingObstacle.holds(state, objects) - - def _PickupObstacle_initiable(state: State, memory: Dict, - objects: Sequence[Object], - params: Array) -> bool: - robot, obstacle = objects - if not ObstacleNotCleared.holds(state, objects[1:]): - return False # obstacle already picked or cleared - if not NotCarryingObstacle.holds(state, objects[:1]): - return False # robot already carrying something else - # Set up the target input for the motion planner. - target_x = state.get(obstacle, "x") - target_y = state.get(obstacle, "y") - cls._plan_direct(state, memory, params, robot, - np.array([target_x, target_y]), 2, 3) - # Append pickup action to memory action plan - memory["action_plan"].append( - Action(np.array([0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32))) - # Picking an obstacle takes a bit of time to plan, artificially - time.sleep(CFG.exit_garage_pick_place_refine_penalty) - return True - - PickupObstacle = ParameterizedOption( - "PickupObstacle", - types=[robot_type, obstacle_type], - params_space=Box(0, 1, (1, )), - policy=_motion_plan_policy, - initiable=_PickupObstacle_initiable, - terminal=_PickupObstacle_terminal, - ) - - # StoreObstacle - def _StoreObstacle_terminal(state: State, memory: Dict, + # ClearObstacle + def _ClearObstacle_terminal(state: State, memory: Dict, objects: Sequence[Object], params: Array) -> bool: del memory, params # unused - return ObstacleCleared.holds(state, objects[1:]) + _, obstacle = objects + return ObstacleCleared.holds(state, [obstacle]) - def _StoreObstacle_initiable(state: State, memory: Dict, + def _ClearObstacle_initiable(state: State, memory: Dict, objects: Sequence[Object], params: Array) -> bool: - robot, _ = objects - if not CarryingObstacle.holds(state, objects): - return False # obstacle isn't being carried, so can't store + robot, obstacle = objects + if not ObstacleNotCleared.holds(state, [obstacle]): + return False # obstacle already cleared + + memory["action_plan"] = [] + memory["position_plan"] = [] + start_pos_list = [ + state.get(robot, "x"), + state.get(robot, "y"), + ] + start_position = np.array(start_pos_list) + + # Straight-line plan to pickup obstacle + pickup_target_x = state.get(obstacle, "x") + pickup_target_y = state.get(obstacle, "y") + pickup_position = np.array([pickup_target_x, pickup_target_y]) + cls._plan_direct(memory, params, start_position, pickup_position, + 2, 3) + # Append pickup action to memory plans + memory["action_plan"].append( + Action(np.array([0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32))) + + # Straight-line plan to place obstacle storage, = state.get_objects(storage_type) num_stored = state.get(storage, "num_stored") # Set up the target input for the motion planner. target_x = (0.01 + ExitGarageEnv.obstacle_radius * 2) * num_stored target_x += ExitGarageEnv.obstacle_radius - target_y = 1.0 - ExitGarageEnv.storage_area_height / 2 - cls._plan_direct(state, memory, params, robot, + target_y = (ExitGarageEnv.y_ub - + ExitGarageEnv.storage_area_height / 2) + cls._plan_direct(memory, params, pickup_position, np.array([target_x, target_y]), 2, 3) # Append place action to memory action plan memory["action_plan"].append( Action(np.array([0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32))) - # Placing an obstacle takes a bit of time to plan, artificially - time.sleep(CFG.exit_garage_pick_place_refine_penalty) + + # Moving an obstacle takes a bit of time to plan, artificially + time.sleep(CFG.exit_garage_clear_refine_penalty) + return True - StoreObstacle = ParameterizedOption( - "StoreObstacle", + ClearObstacle = ParameterizedOption( + "ClearObstacle", types=[robot_type, obstacle_type], params_space=Box(0, 1, (1, )), policy=_motion_plan_policy, - initiable=_StoreObstacle_initiable, - terminal=_StoreObstacle_terminal, + initiable=_ClearObstacle_initiable, + terminal=_ClearObstacle_terminal, ) - return {DriveCarToExit, PickupObstacle, StoreObstacle} + return {DriveCarToExit, ClearObstacle} @classmethod def _run_rrt(cls, state: State, memory: Dict, params: Array, @@ -195,7 +188,7 @@ def _distance_fn(from_pt: Array, to_pt: Array) -> float: angle_dist = (from_pt[2] - to_pt[2] + np.pi) % (2 * np.pi) - np.pi # We need to scale the weight of the angle for the distance down # because it should matter but not as much as the position diff - scaled_angle_dist = angle_dist / (10 * np.pi) + scaled_angle_dist = angle_dist / (2 * np.pi) distance += scaled_angle_dist**2 return distance @@ -281,9 +274,9 @@ def _collision_fn(pt: Array) -> bool: return True @classmethod - def _plan_direct(cls, state: State, memory: Dict, params: Array, - move_obj: Object, target_position: Array, - x_action_idx: int, y_action_idx: int) -> None: + def _plan_direct(cls, memory: Dict, params: Array, start_position: Array, + target_position: Array, x_action_idx: int, + y_action_idx: int) -> None: """Set position and action plans for a straight line from the starting position to the target position. @@ -299,16 +292,11 @@ def _extend_fn(pt1: Array, pt2: Array) -> Iterator[Array]: yield pt1 * (1 - i / num) + pt2 * i / num # Run planning. - start_pos_list = [ - state.get(move_obj, "x"), - state.get(move_obj, "y"), - ] - start_position = np.array(start_pos_list) extender = _extend_fn(start_position, target_position) position_plan = [start_position] + list(extender) # The position plan is used for the termination check, and possibly # can be used for debug drawing in the rendering in the future. - memory["position_plan"] = position_plan + memory["position_plan"].extend(position_plan) # Convert the plan from position space to action space. deltas = np.subtract(position_plan[1:], position_plan[:-1]) @@ -319,4 +307,4 @@ def _create_action(dx: float, dy: float) -> Action: return Action(arr) action_plan = [_create_action(dx, dy) for (dx, dy) in deltas] - memory["action_plan"] = action_plan + memory["action_plan"].extend(action_plan) diff --git a/predicators/main.py b/predicators/main.py index 0109f49355..f6a72fc77e 100644 --- a/predicators/main.py +++ b/predicators/main.py @@ -292,6 +292,7 @@ def _run_testing(env: BaseEnv, cogman: CogMan) -> Metrics: num_solved = 0 cogman.reset_metrics() total_suc_time = 0.0 + total_low_level_action_cost = 0.0 total_num_solve_timeouts = 0 total_num_solve_failures = 0 total_num_execution_timeouts = 0 @@ -354,6 +355,10 @@ def _run_testing(env: BaseEnv, cogman: CogMan) -> Metrics: monitor=monitor) exec_time = execution_metrics["policy_call_time"] metrics[f"PER_TASK_task{test_task_idx}_exec_time"] = exec_time + if CFG.refinement_data_include_execution_cost: + total_low_level_action_cost += ( + len(traj[1]) * + CFG.refinement_data_low_level_execution_cost) # Save the successful trajectory, e.g., for playback on a robot. traj_file = f"{save_prefix}__task{test_task_idx+1}.traj" traj_file_path = Path(CFG.eval_trajectories_dir) / traj_file @@ -399,6 +404,9 @@ def _run_testing(env: BaseEnv, cogman: CogMan) -> Metrics: metrics["num_total"] = len(test_tasks) metrics["avg_suc_time"] = (total_suc_time / num_solved if num_solved > 0 else float("inf")) + metrics["avg_ref_cost"] = ((total_low_level_action_cost + + cogman.metrics["total_refinement_time"]) / + num_solved if num_solved > 0 else float("inf")) metrics["min_num_samples"] = cogman.metrics[ "min_num_samples"] if cogman.metrics["min_num_samples"] < float( "inf") else 0 diff --git a/predicators/nsrt_learning/segmentation.py b/predicators/nsrt_learning/segmentation.py index 68e8c1c466..b6f89c190a 100644 --- a/predicators/nsrt_learning/segmentation.py +++ b/predicators/nsrt_learning/segmentation.py @@ -59,9 +59,7 @@ def _segment_with_contact_changes( elif CFG.env == "coffee": keep_pred_names = {"Holding", "HandEmpty", "MachineOn", "CupFilled"} elif CFG.env == "exit_garage": - keep_pred_names = { - "CarryingObstacle", "NotCarryingObstacle", "CarHasExited" - } + keep_pred_names = {"ObstacleCleared", "CarHasExited"} else: raise NotImplementedError("Contact-based segmentation not implemented " f"for environment {CFG.env}.") diff --git a/predicators/planning.py b/predicators/planning.py index 848afed46f..97e046e1e6 100644 --- a/predicators/planning.py +++ b/predicators/planning.py @@ -168,6 +168,7 @@ def _sesame_plan_with_astar( gen = iter( sorted(proposed_skeletons, key=lambda s: estimator.get_cost(task, *s))) + refinement_start_time = time.perf_counter() for skeleton, atoms_sequence in gen: if CFG.sesame_use_necessary_atoms: atoms_seq = utils.compute_necessary_atoms_seq( @@ -188,6 +189,8 @@ def _sesame_plan_with_astar( f" samples, discovering " f"{int(metrics['num_failures_discovered'])} failures") metrics["plan_length"] = len(plan) + metrics["refinement_time"] = (time.perf_counter() - + refinement_start_time) return plan, skeleton, metrics partial_refinements.append((skeleton, plan)) if time.perf_counter() - start_time > timeout: @@ -488,11 +491,17 @@ def _skeleton_generator( raise _SkeletonSearchTimeout -def run_low_level_search(task: Task, option_model: _OptionModelBase, - skeleton: List[_GroundNSRT], - atoms_sequence: List[Set[GroundAtom]], seed: int, - timeout: float, metrics: Metrics, - max_horizon: int) -> Tuple[List[_Option], bool]: +def run_low_level_search( + task: Task, + option_model: _OptionModelBase, + skeleton: List[_GroundNSRT], + atoms_sequence: List[Set[GroundAtom]], + seed: int, + timeout: float, + metrics: Metrics, + max_horizon: int, + refinement_time: Optional[List[float]] = None +) -> Tuple[List[_Option], bool]: """Backtracking search over continuous values. Returns a sequence of options and a boolean. If the boolean is True, @@ -515,6 +524,12 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase, if nsrt.option.params_space.shape[0] > 0 else 1 for nsrt in skeleton ] plan: List[_Option] = [DummyOption for _ in skeleton] + # If refinement_time list is passed, record the refinement time + # distributed across each step of the skeleton + if refinement_time is not None: + assert len(refinement_time) == 0 + for _ in skeleton: + refinement_time.append(0) # The number of actions taken by each option in the plan. This is to # make sure that we do not exceed the task horizon. num_actions_per_option = [0 for _ in plan] @@ -525,10 +540,12 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase, discovered_failures: List[Optional[_DiscoveredFailure]] = [ None for _ in skeleton ] + plan_found = False while cur_idx < len(skeleton): if time.perf_counter() - start_time > timeout: return longest_failed_refinement, False assert num_tries[cur_idx] < max_tries[cur_idx] + try_start_time = time.perf_counter() # Good debug point #2: if you have a skeleton that you think is # reasonable, but sampling isn't working, print num_tries here to # see at what step the backtracking search is getting stuck. @@ -592,7 +609,7 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase, if all(a.holds(traj[cur_idx]) for a in expected_atoms): can_continue_on = True if cur_idx == len(skeleton): - return plan, True # success! + plan_found = True else: can_continue_on = False else: @@ -601,11 +618,17 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase, can_continue_on = True if cur_idx == len(skeleton): if task.goal_holds(traj[cur_idx]): - return plan, True # success! - can_continue_on = False + plan_found = True + else: + can_continue_on = False else: # The option is not initiable. can_continue_on = False + if refinement_time is not None: + try_end_time = time.perf_counter() + refinement_time[cur_idx - 1] += try_end_time - try_start_time + if plan_found: + return plan, True # success! if not can_continue_on: # we got stuck, time to resample / backtrack! # Update the longest_failed_refinement found so far. if cur_idx > len(longest_failed_refinement): @@ -616,7 +639,7 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase, # the longest_failed_refinement first. possible_failure = discovered_failures[cur_idx - 1] if possible_failure is not None and \ - CFG.sesame_propagate_failures == "immediately": + CFG.sesame_propagate_failures == "immediately": raise _DiscoveredFailureException( "Discovered a failure", possible_failure, {"longest_failed_refinement": longest_failed_refinement}) @@ -638,7 +661,7 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase, # high-level search continues. for possible_failure in discovered_failures: if possible_failure is not None and \ - CFG.sesame_propagate_failures == "after_exhaust": + CFG.sesame_propagate_failures == "after_exhaust": raise _DiscoveredFailureException( "Discovered a failure", possible_failure, { "longest_failed_refinement": @@ -731,7 +754,7 @@ def _update_sas_file_with_failure(discovered_failure: _DiscoveredFailure, assert line.isdigit() num_variables = int(line) # Change num variables - new_sas_file_lines.append(f"{num_variables+1}\n") + new_sas_file_lines.append(f"{num_variables + 1}\n") elif "end_variable" in line: count_variables += 1 new_sas_file_lines.append(line) @@ -820,7 +843,7 @@ def _update_sas_file_with_failure(discovered_failure: _DiscoveredFailure, # Append preconditions if operator_str.replace("\n", "") == ground_op_str: new_sas_file_lines.append( - f"{num_precondition_conditons+1}\n") + f"{num_precondition_conditons + 1}\n") new_sas_file_lines.append( f"{num_variables} 0\n") # additional precondition else: @@ -831,7 +854,7 @@ def _update_sas_file_with_failure(discovered_failure: _DiscoveredFailure, j]) # Append effects if obj.name.lower() in operator_str: - new_sas_file_lines.append(f"{num_effects+1}\n") + new_sas_file_lines.append(f"{num_effects + 1}\n") new_sas_file_lines.append( f"0 {num_variables} -1 0\n") # additional effect else: @@ -1066,6 +1089,7 @@ def _sesame_plan_with_fast_downward( try: necessary_atoms_seq = utils.compute_necessary_atoms_seq( skeleton, atoms_sequence, task.goal) + refinement_start_time = time.perf_counter() plan, suc = run_low_level_search(task, option_model, skeleton, necessary_atoms_seq, seed, low_level_timeout, metrics, @@ -1075,6 +1099,8 @@ def _sesame_plan_with_fast_downward( raise PlanningTimeout("Planning timed out in refinement!") raise PlanningFailure("Skeleton produced by FD not refinable!") metrics["plan_length"] = len(plan) + metrics["refinement_time"] = (time.perf_counter() - + refinement_start_time) return plan, skeleton, metrics except _DiscoveredFailureException as e: metrics["num_failures_discovered"] += 1 diff --git a/predicators/refinement_estimators/base_refinement_estimator.py b/predicators/refinement_estimators/base_refinement_estimator.py index 02feabc61a..ce4936190f 100644 --- a/predicators/refinement_estimators/base_refinement_estimator.py +++ b/predicators/refinement_estimators/base_refinement_estimator.py @@ -4,6 +4,8 @@ from pathlib import Path from typing import List, Set +import numpy as np + from predicators.envs import get_or_create_env from predicators.settings import CFG from predicators.structs import GroundAtom, Task, _GroundNSRT @@ -14,6 +16,7 @@ class BaseRefinementEstimator(abc.ABC): def __init__(self) -> None: self._env = get_or_create_env(CFG.env) + self._rng = np.random.default_rng(CFG.seed) @classmethod @abc.abstractmethod diff --git a/predicators/refinement_estimators/cnn_refinement_estimator.py b/predicators/refinement_estimators/cnn_refinement_estimator.py index 2080d33c4e..bc595a11ed 100644 --- a/predicators/refinement_estimators/cnn_refinement_estimator.py +++ b/predicators/refinement_estimators/cnn_refinement_estimator.py @@ -26,8 +26,12 @@ def get_name(cls) -> str: def _model_predict(self, model: CNNRegressor, initial_task: Task) -> float: input_img = self._get_rendered_initial_state(initial_task) - cost = model.predict(input_img) - return cost[0] + refinement_time, low_level_count = model.predict(input_img) + cost = refinement_time + if CFG.refinement_data_include_execution_cost: + cost += (low_level_count * + CFG.refinement_data_low_level_execution_cost) + return cost def train(self, data: List[RefinementDatapoint]) -> None: """Train the CNN regressors on the data points for that skeleton, @@ -36,17 +40,18 @@ def train(self, data: List[RefinementDatapoint]) -> None: # Go through data and group them by skeleton grouped_input_imgs = defaultdict(list) grouped_targets = defaultdict(list) - for task, skeleton, atoms_sequence, succeeded, refinement_time in data: + for (task, skeleton, atoms_sequence, succeeded, refinement_time, + low_level_count) in data: # Convert skeleton and atoms_sequence into an immutable dict key key = self._immutable_model_dict_key(skeleton, atoms_sequence) # Render the initial state for use as an input image matrix img = self._get_rendered_initial_state(task) grouped_input_imgs[key].append(img) # Compute target value from refinement time and possible failure - value = refinement_time + target_time = sum(refinement_time) if not succeeded: - value += CFG.refinement_data_failed_refinement_penalty - grouped_targets[key].append([value]) + target_time += CFG.refinement_data_failed_refinement_penalty + grouped_targets[key].append([target_time, sum(low_level_count)]) # For each (skeleton, atoms_sequence) key, fit a CNNRegressor self._model_dict = {} @@ -55,7 +60,7 @@ def train(self, data: List[RefinementDatapoint]) -> None: X = np.stack(grouped_input_imgs[key]) assert len(X.shape) == 4 # expect (N, 3, H, W) Y = np.array(grouped_targets[key]) - assert Y.shape == (X.shape[0], 1) + assert Y.shape == (X.shape[0], 2) model = self._create_regressor() logging.info(f"Training CNN for skeleton {i}/{total_num_keys} " f"using {X.shape[0]} data points...") diff --git a/predicators/refinement_estimators/gnn_refinement_estimator.py b/predicators/refinement_estimators/gnn_refinement_estimator.py new file mode 100644 index 0000000000..3533af5d91 --- /dev/null +++ b/predicators/refinement_estimators/gnn_refinement_estimator.py @@ -0,0 +1,364 @@ +"""A learning-based refinement cost estimator that trains a GNN regression +model mapping initial state, intermediate atoms, goal, and operator to cost, +and estimates refinement cost for a full skeleton by summing the model output +over individual actions in the skeleton.""" + +import functools +import logging +from collections import defaultdict +from pathlib import Path +from typing import Any, DefaultDict, Dict, List, Optional, Set, Tuple + +import dill as pkl +import numpy as np +import torch +from torch.utils.data import DataLoader + +from predicators import utils +from predicators.gnn.gnn import EncodeProcessDecode, setup_graph_net +from predicators.gnn.gnn_utils import GraphDictDataset, compute_normalizers, \ + get_graph_batch_collate_with_device, get_single_model_prediction, \ + normalize_graph, train_model +from predicators.ground_truth_models import get_gt_nsrts, get_gt_options +from predicators.refinement_estimators import BaseRefinementEstimator +from predicators.settings import CFG +from predicators.structs import NSRT, GroundAtom, NDArray, Predicate, \ + RefinementDatapoint, State, Task, _GroundNSRT + + +class GNNRefinementEstimator(BaseRefinementEstimator): + """A refinement cost estimator that uses a GNN to predict refinement cost + from an initial state, intermediate atoms, goal, and abstract action.""" + + def __init__(self) -> None: + super().__init__() + self._gnn: Optional[EncodeProcessDecode] = None + self._data_exemplar: Tuple[Dict, Dict] = ({}, {}) + self._nsrts: List[NSRT] = [] + self._max_nsrt_objects = 0 + self._node_feature_to_index: Dict[Any, int] = {} + self._edge_feature_to_index: Dict[Any, int] = {} + self._nullary_predicates: List[Predicate] = [] + self._input_normalizers: Dict = {} + self._target_normalizers: Dict = {} + self._mse_loss = torch.nn.MSELoss() + self._device = torch.device("cuda:0" if CFG.use_torch_gpu + and torch.cuda.is_available() else "cpu") + self._setup_fields() + + @classmethod + def get_name(cls) -> str: + return "gnn" + + @property + def is_learning_based(self) -> bool: + return True + + def get_cost(self, initial_task: Task, skeleton: List[_GroundNSRT], + atoms_sequence: List[Set[GroundAtom]]) -> float: + assert self._gnn is not None, "Need to train" + cost = 0 + state, goal = initial_task.init, initial_task.goal + # Run each step of the skeleton through the GNN model to estimate cost + for i, action in enumerate(skeleton): + atoms = atoms_sequence[i] + in_graph = self._graphify_single_input(state, atoms, goal, action) + if CFG.gnn_do_normalization: + in_graph = normalize_graph(in_graph, self._input_normalizers) + out_graph = get_single_model_prediction(self._gnn, + in_graph, + device=self._device) + if CFG.gnn_do_normalization: + out_graph = normalize_graph(out_graph, + self._target_normalizers, + invert=True) + refinement_time, low_level_count = out_graph["globals"] + cost += refinement_time + if CFG.refinement_data_include_execution_cost: + cost += (low_level_count * + CFG.refinement_data_low_level_execution_cost) + return cost + + def train(self, data: List[RefinementDatapoint]) -> None: + """Split up each RefinementDatapoint into distinct training data points + for the per-action GNN, and train the GNN regressor.""" + graph_inputs = [] + graph_targets = [] + for (task, skeleton, atoms_sequence, succeeded, refinement_time, + low_level_count) in data: + state, goal = task.init, task.goal + for i, action in enumerate(skeleton): + atoms = atoms_sequence[i] + target_time = refinement_time[i] + # Add failed penalty to the value if failure occurred + if not succeeded: + target_time += CFG.refinement_data_failed_refinement_penalty + # Convert input and target to graphs + graph_inputs.append( + self._graphify_single_input(state, atoms, goal, action)) + graph_targets.append( + self._graphify_single_target( + target_time, low_level_count[i] if succeeded else 0)) + assert len(graph_inputs) and len(graph_targets), "No usable data" + self._data_exemplar = (graph_inputs[0], graph_targets[0]) + + # Normalize if needed + if CFG.gnn_do_normalization: + # Update normalization constants. Note that we do this for both + # the input graph and the target graph. + self._input_normalizers = compute_normalizers(graph_inputs) + self._target_normalizers = compute_normalizers( + graph_targets, + normalize_nodes=False, + normalize_edges=False, + ) + graph_inputs = [ + normalize_graph(g, self._input_normalizers) + for g in graph_inputs + ] + graph_targets = [ + normalize_graph(g, self._target_normalizers) + for g in graph_targets + ] + # Run training. + if CFG.gnn_use_validation_set: + ## Split data, using 10% for validation. + num_validation = max(1, int(len(graph_inputs) * 0.1)) + else: + num_validation = 0 + shuffled_indices = self._rng.permutation(len(graph_inputs)) + graph_inputs = [graph_inputs[i] for i in shuffled_indices] + graph_targets = [graph_targets[i] for i in shuffled_indices] + train_inputs = graph_inputs[num_validation:] + train_targets = graph_targets[num_validation:] + val_inputs = graph_inputs[:num_validation] + val_targets = graph_targets[:num_validation] + train_dataset = GraphDictDataset(train_inputs, train_targets) + val_dataset = GraphDictDataset(val_inputs, val_targets) + # Set up model + self._gnn = setup_graph_net(train_dataset, + num_steps=CFG.gnn_num_message_passing, + layer_size=CFG.gnn_layer_size).to( + self._device) + # Set up Adam optimizer and dataloaders. + optimizer = torch.optim.Adam(self._gnn.parameters(), + lr=CFG.gnn_learning_rate, + weight_decay=CFG.gnn_weight_decay) + graph_batch_collate = get_graph_batch_collate_with_device(self._device) + train_dataloader = DataLoader(train_dataset, + batch_size=CFG.gnn_batch_size, + shuffle=True, + num_workers=0, + collate_fn=graph_batch_collate) + val_dataloader = DataLoader(val_dataset, + batch_size=CFG.gnn_batch_size, + shuffle=False, + num_workers=0, + collate_fn=graph_batch_collate) + dataloaders = {"train": train_dataloader, "val": val_dataloader} + ## Launch training code. + logging.info(f"Training GNN on {len(train_inputs)} examples") + best_model_dict = train_model(self._gnn, + dataloaders, + optimizer=optimizer, + criterion=None, + global_criterion=self._global_criterion, + num_epochs=CFG.gnn_num_epochs, + do_validation=CFG.gnn_use_validation_set, + device=self._device) + self._gnn.load_state_dict(best_model_dict) + + def _global_criterion(self, output: torch.Tensor, + target: torch.Tensor) -> torch.Tensor: + """Global criterion function for training GNN.""" + return self._mse_loss(output, target) + + def _graphify_single_input(self, state: State, atoms: Set[GroundAtom], + goal: Set[GroundAtom], + action: _GroundNSRT) -> Dict: + """Convert (initial state, atoms, goal, action) to graph.""" + all_objects = list(state) + object_to_node = {obj: i for i, obj in enumerate(all_objects)} + num_objects = len(all_objects) + num_node_features = len(self._node_feature_to_index) + num_edge_features = max(len(self._edge_feature_to_index), 1) + + G = functools.partial(utils.wrap_predicate, prefix="GOAL-") + R = functools.partial(utils.wrap_predicate, prefix="REV-") + + # Add 1 node per object and create node features array + graph: Dict[str, NDArray[np.float64]] = { + "n_node": np.reshape(num_objects, [1]).astype(np.int64) + } + node_features = np.zeros((num_objects, num_node_features)) + # Handle each object's state features + for obj in state: + obj_index = object_to_node[obj] + for feat, val in zip(obj.type.feature_names, state[obj]): + feat_index = self._node_feature_to_index[f"feat_{feat}"] + node_features[obj_index, feat_index] = val + + # Initialize feature vectors for nullary/binary predicates + edge_features_dict: DefaultDict[ + Tuple[int, int], + np.ndarray] = defaultdict(lambda: np.zeros(num_edge_features)) + atoms_globals = np.zeros(len(self._nullary_predicates), dtype=np.int64) + goal_globals = np.zeros(len(self._nullary_predicates), dtype=np.int64) + + # Handle atoms + for atom in atoms: + arity = atom.predicate.arity + if arity == 0: + atoms_globals[self._nullary_predicates.index( + atom.predicate)] = 1 + continue + obj0_index = object_to_node[atom.objects[0]] + if arity == 1: + atom_index = self._node_feature_to_index[atom.predicate] + node_features[obj0_index, atom_index] = 1 + elif arity == 2: + obj1_index = object_to_node[atom.objects[1]] + atom_index = self._edge_feature_to_index[atom.predicate] + edge_features_dict[(obj0_index, obj1_index)][atom_index] = 1 + rev_index = self._edge_feature_to_index[R(atom.predicate)] + edge_features_dict[(obj1_index, obj0_index)][rev_index] = 1 + + # Handle goal atoms + for atom in goal: + arity = atom.predicate.arity + if arity == 0: + goal_globals[self._nullary_predicates.index( + atom.predicate)] = 1 + continue + obj0_index = object_to_node[atom.objects[0]] + if arity == 1: + atom_index = self._node_feature_to_index[G(atom.predicate)] + node_features[obj0_index, atom_index] = 1 + elif arity == 2: + obj1_index = object_to_node[atom.objects[1]] + atom_index = self._edge_feature_to_index[G(atom.predicate)] + edge_features_dict[(obj0_index, obj1_index)][atom_index] = 1 + rev_index = self._edge_feature_to_index[G(R(atom.predicate))] + edge_features_dict[(obj1_index, obj0_index)][rev_index] = 1 + + # Handle action globals + action_globals = np.zeros(len(self._nsrts), dtype=np.int64) + action_globals[self._nsrts.index(action.parent)] = 1 + for i, action_obj in enumerate(action.objects): + obj_index = object_to_node[action_obj] + feat_index = self._node_feature_to_index[f"nsrt-{i}"] + node_features[obj_index, feat_index] = 1 + + # Organize + graph["nodes"] = node_features.astype(np.float32) + graph["globals"] = np.r_[atoms_globals, goal_globals, action_globals] + senders, receivers, edges = [], [], [] + for (sender, receiver), edge in edge_features_dict.items(): + senders.append(sender) + receivers.append(receiver) + edges.append(edge) + n_edge = len(edges) + graph["senders"] = np.reshape(senders, [n_edge]).astype(np.int64) + graph["receivers"] = np.reshape(receivers, [n_edge]).astype(np.int64) + graph["edges"] = np.reshape(edges, [n_edge, num_edge_features]) + graph["n_edge"] = np.reshape(n_edge, [1]).astype(np.int64) + + return graph + + @staticmethod + def _graphify_single_target(refinement_time: float, + low_level_count: int) -> Dict: + """Convert target cost into a graph.""" + graph = { + "n_node": np.array([1], dtype=np.int64), + "nodes": np.array([]), + "n_edge": np.array([0], dtype=np.int64), + "edges": np.array([]), + "senders": np.array([]), + "receivers": np.array([]), + "globals": np.array([refinement_time, low_level_count]), + } + return graph + + def _setup_fields(self) -> None: + """Assign indices to each node and edge feature, and also identify list + of nullary predicates.""" + self._node_feature_to_index = {} + self._edge_feature_to_index = {} + node_feature_index = 0 + edge_feature_index = 0 + self._nullary_predicates = [] + + G = functools.partial(utils.wrap_predicate, prefix="GOAL-") + R = functools.partial(utils.wrap_predicate, prefix="REV-") + + # Identify object types + obj_attrs_set = set() + for obj_type in sorted(self._env.types): + self._node_feature_to_index[ + f"type_{obj_type.name}"] = node_feature_index + node_feature_index += 1 + # Also list object features to add to node features later + for feat in obj_type.feature_names: + obj_attrs_set.add(f"feat_{feat}") + + # Identify predicates + for predicate in sorted(self._env.predicates): + arity = predicate.arity + assert arity <= 2, "Predicates with arity > 2 are not supported" + if arity == 0: + self._nullary_predicates.append(predicate) + elif arity == 1: + for feature in (predicate, G(predicate)): + self._node_feature_to_index[feature] = node_feature_index + node_feature_index += 1 + elif arity == 2: + for feature in (predicate, R(predicate), G(predicate), + G(R(predicate))): + self._edge_feature_to_index[feature] = edge_feature_index + edge_feature_index += 1 + + # Identify NSRTs + gt_nsrts = get_gt_nsrts(CFG.env, self._env.predicates, + get_gt_options(self._env.get_name())) + self._nsrts = sorted(gt_nsrts) + max_nsrt_objects = 0 + for nsrt in self._nsrts: + max_nsrt_objects = max(max_nsrt_objects, len(nsrt.parameters)) + self._max_nsrt_objects = max_nsrt_objects + for i in range(max_nsrt_objects): + self._node_feature_to_index[f"nsrt-{i}"] = node_feature_index + node_feature_index += 1 + + # Add object features + for obj_attr in sorted(obj_attrs_set): + self._node_feature_to_index[obj_attr] = node_feature_index + node_feature_index += 1 + + def save_model(self, filepath: Path) -> None: + info = { + "exemplar": self._data_exemplar, + "state_dict": self._gnn.state_dict() if self._gnn else None, + "input_normalizers": self._input_normalizers, + "target_normalizers": self._target_normalizers, + } + with open(filepath, "wb") as f: + pkl.dump(info, f) + + def load_model(self, filepath: Path) -> None: + with open(filepath, "rb") as f: + info = pkl.load(f) + self._data_exemplar = info["exemplar"] + ex_input, ex_target = self._data_exemplar + example_dataset = GraphDictDataset([ex_input], [ex_target]) + self._gnn = setup_graph_net(example_dataset, + num_steps=CFG.gnn_num_message_passing, + layer_size=CFG.gnn_layer_size).to( + self._device) + state_dict = info["state_dict"] + if state_dict is not None: + self._gnn.load_state_dict(info["state_dict"]) + self._input_normalizers = info["input_normalizers"] + self._target_normalizers = info["target_normalizers"] + # Run GNN once to avoid the weird delay issue + get_single_model_prediction(self._gnn, ex_input, device=self._device) diff --git a/predicators/refinement_estimators/oracle_refinement_estimator.py b/predicators/refinement_estimators/oracle_refinement_estimator.py index 197eba39c2..35878d7a0b 100644 --- a/predicators/refinement_estimators/oracle_refinement_estimator.py +++ b/predicators/refinement_estimators/oracle_refinement_estimator.py @@ -3,6 +3,7 @@ from typing import List, Set from predicators.envs import BaseEnv +from predicators.envs.exit_garage import ExitGarageEnv from predicators.refinement_estimators import BaseRefinementEstimator from predicators.settings import CFG from predicators.structs import GroundAtom, State, Task, _GroundNSRT @@ -75,11 +76,23 @@ def exit_garage_oracle_estimator( atoms_sequence: List[Set[GroundAtom]], ) -> float: """Oracle refinement estimation function for exit_garage env.""" - del env, initial_state, atoms_sequence # unused + del atoms_sequence # unused + + assert isinstance(env, ExitGarageEnv) + obstacle_radius = env.obstacle_radius + obstruction_ub = env.exit_top + 2 * obstacle_radius + obstruction_lb = env.exit_top - env.exit_height - 2 * obstacle_radius # Each picked-up obstacle decreases the refinement cost of DriveCarToExit - cost = 0 + # if it is in the direct path of the car to the exit, otherwise it has a + # positive cost and should be avoided + cost: float = 0 for ground_nsrt in skeleton: - if ground_nsrt.name == "PickupObstacle": - cost -= 1 + if ground_nsrt.name == "ClearObstacle": + obstacle = ground_nsrt.objects[1] + obstacle_y = initial_state.get(obstacle, "y") + if obstruction_lb < obstacle_y < obstruction_ub: + cost -= 1 + else: + cost += 0.5 return cost diff --git a/predicators/refinement_estimators/per_skeleton_estimator.py b/predicators/refinement_estimators/per_skeleton_estimator.py index 366781fffc..2b374b76fa 100644 --- a/predicators/refinement_estimators/per_skeleton_estimator.py +++ b/predicators/refinement_estimators/per_skeleton_estimator.py @@ -64,3 +64,7 @@ def save_model(self, filepath: Path) -> None: def load_model(self, filepath: Path) -> None: with open(filepath, "rb") as f: self._model_dict = pkl.load(f) + # Run every model once to avoid weird delay issue + if self._model_dict is not None: + for v in self._model_dict.values(): + self._model_predict(v, self._env.get_train_tasks()[0].task) diff --git a/predicators/refinement_estimators/tabular_refinement_estimator.py b/predicators/refinement_estimators/tabular_refinement_estimator.py index c5391046d8..b1e9d8c436 100644 --- a/predicators/refinement_estimators/tabular_refinement_estimator.py +++ b/predicators/refinement_estimators/tabular_refinement_estimator.py @@ -2,7 +2,7 @@ and atoms_sequence to average refinement time.""" from collections import defaultdict -from typing import List +from typing import List, Tuple import numpy as np @@ -12,7 +12,8 @@ from predicators.structs import RefinementDatapoint, Task -class TabularRefinementEstimator(PerSkeletonRefinementEstimator[float]): +class TabularRefinementEstimator(PerSkeletonRefinementEstimator[Tuple[float, + float]]): """A refinement cost estimator that memorizes refinement data using a tabular method.""" @@ -20,25 +21,34 @@ class TabularRefinementEstimator(PerSkeletonRefinementEstimator[float]): def get_name(cls) -> str: return "tabular" - def _model_predict(self, model: float, initial_task: Task) -> float: - return model + def _model_predict(self, model: Tuple[float, float], + initial_task: Task) -> float: + refinement_time, low_level_count = model + cost = refinement_time + if CFG.refinement_data_include_execution_cost: + cost += (low_level_count * + CFG.refinement_data_low_level_execution_cost) + return cost def train(self, data: List[RefinementDatapoint]) -> None: """Train the tabular refinement estimator on data by computing average refinement time per (skeleton, atoms_sequence) pair.""" - grouped_data = defaultdict(list) + grouped_times = defaultdict(list) + grouped_counts = defaultdict(list) # Go through data and group them by skeleton - for _, skeleton, atoms_sequence, succeeded, refinement_time in data: + for (_, skeleton, atoms_sequence, succeeded, refinement_time, + low_level_count) in data: # Convert skeleton and atoms_sequence into an immutable dict key key = self._immutable_model_dict_key(skeleton, atoms_sequence) - value = refinement_time + target_time = sum(refinement_time) # Add failed refinement penalty to the value if failure occurred if not succeeded: - value += CFG.refinement_data_failed_refinement_penalty - grouped_data[key].append(value) + target_time += CFG.refinement_data_failed_refinement_penalty + grouped_times[key].append(target_time) + grouped_counts[key].append(sum(low_level_count)) # Compute average time for each (skeleton, atoms_sequence) key processed_data = { - key: float(np.mean(times)) - for key, times in grouped_data.items() + key: (float(np.mean(times)), float(np.mean(grouped_counts[key]))) + for key, times in grouped_times.items() } self._model_dict = processed_data diff --git a/predicators/settings.py b/predicators/settings.py index 76021d005b..91978e43a9 100644 --- a/predicators/settings.py +++ b/predicators/settings.py @@ -285,7 +285,7 @@ class GlobalSettings: doors_draw_debug = False # narrow_passage env parameters - narrow_passage_open_door_refine_penalty = 0.2 + narrow_passage_open_door_refine_penalty = 0 narrow_passage_door_width_padding_lb = 1e-4 narrow_passage_door_width_padding_ub = 0.015 narrow_passage_passage_width_padding_lb = 5e-4 @@ -295,12 +295,12 @@ class GlobalSettings: narrow_passage_birrt_smooth_amt = 50 # exit_garage env parameters - exit_garage_pick_place_refine_penalty = 0.2 + exit_garage_clear_refine_penalty = 0 exit_garage_min_num_obstacles = 2 - exit_garage_max_num_obstacles = 4 # inclusive - exit_garage_rrt_extend_fn_threshold = 1e-4 + exit_garage_max_num_obstacles = 3 # inclusive + exit_garage_rrt_extend_fn_threshold = 1e-3 exit_garage_rrt_num_control_samples = 100 - exit_garage_rrt_num_attempts = 10 + exit_garage_rrt_num_attempts = 3 exit_garage_rrt_num_iters = 100 exit_garage_rrt_sample_goal_eps = 0.1 exit_garage_motion_planning_ignore_obstacles = False @@ -333,6 +333,7 @@ class GlobalSettings: gnn_num_message_passing = 3 gnn_layer_size = 16 gnn_learning_rate = 1e-3 + gnn_weight_decay = 0 gnn_num_epochs = 25000 gnn_batch_size = 128 gnn_do_normalization = False # performs worse in Cover when True @@ -539,6 +540,8 @@ class GlobalSettings: refinement_data_skeleton_generator_timeout = 20 refinement_data_low_level_search_timeout = 5 # timeout for refinement try refinement_data_failed_refinement_penalty = 5 # added time on failure + refinement_data_include_execution_cost = True + refinement_data_low_level_execution_cost = 0.05 # per action cost to add # CNN refinement cost estimator image pre-processing parameters cnn_refinement_estimator_crop = False # True diff --git a/predicators/structs.py b/predicators/structs.py index 4e3555895f..fe476ec0d2 100644 --- a/predicators/structs.py +++ b/predicators/structs.py @@ -1803,7 +1803,7 @@ def __len__(self) -> int: SamplerDatapoint = Tuple[State, VarToObjSub, _Option, Optional[Set[GroundAtom]]] RefinementDatapoint = Tuple[Task, List[_GroundNSRT], List[Set[GroundAtom]], - bool, float] + bool, List[float], List[int]] # For PDDLEnv environments, given a desired number of problems and an rng, # returns a list of that many PDDL problem strings. PDDLProblemGenerator = Callable[[int, np.random.Generator], List[str]] diff --git a/predicators/train_refinement_estimator.py b/predicators/train_refinement_estimator.py index a7ac3a397d..0490054b49 100644 --- a/predicators/train_refinement_estimator.py +++ b/predicators/train_refinement_estimator.py @@ -121,6 +121,12 @@ def _train_refinement_estimation_approach() -> None: assert refinement_estimator.is_learning_based, \ "Refinement estimator (--refinement_estimator) must be learning-based" + # Train with only a subset of dataset if desired + if CFG.refinement_train_with_frac_data >= 0: + num_points = int(len(dataset) * CFG.refinement_train_with_frac_data) + dataset = dataset[:num_points] + logging.info(f"Using {len(dataset)} data points") + # Train estimator train_start_time = time.perf_counter() refinement_estimator.train(dataset) @@ -145,8 +151,12 @@ def _get_refinement_estimation_parser() -> ArgumentParser: parser = utils.create_arg_parser() # Add script-specific flags to the parser parser.add_argument("--refinement_data_file_name", default="", type=str) + parser.add_argument("--refinement_data_save_every", default=-1, type=int) parser.add_argument("--skip_refinement_estimator_training", action="store_true") + parser.add_argument("--refinement_train_with_frac_data", + default=-1, + type=float) return parser @@ -158,36 +168,40 @@ def _generate_refinement_data( nsrts = get_gt_nsrts(CFG.env, preds, options) option_model = create_option_model(CFG.option_model_name) + # Create saved data directory. + os.makedirs(CFG.data_dir, exist_ok=True) + # Create file path. + temp_file_path = _get_data_file_path(temp=True) + data_file_path = _get_data_file_path() + # Generate the dataset and save it to file. dataset: List[RefinementDatapoint] = [] for test_task_idx, task in enumerate(train_tasks): try: - _collect_refinement_data_for_task(task, option_model, nsrts, preds, - env.types, + _collect_refinement_data_for_task(env, task, option_model, nsrts, + preds, env.types, CFG.seed + test_task_idx, dataset) logging.info(f"Task {test_task_idx+1} / {num_tasks}: Success") except (PlanningTimeout, _SkeletonSearchTimeout) as e: logging.info(f"Task {test_task_idx+1} / {num_tasks} failed by " f"timing out: {e}") + + # Save the intermediate dataset after every N training tasks + if CFG.refinement_data_save_every > 0 and \ + (test_task_idx + 1) % CFG.refinement_data_save_every == 0: + logging.info(f"Writing intermediate dataset to {temp_file_path}") + with open(temp_file_path, "wb") as f: + pkl.dump(dataset, f) + logging.info(f"Got {len(dataset)} data points.") - # Create saved data directory. - os.makedirs(CFG.data_dir, exist_ok=True) - # Create file path. - data_file_path = _get_data_file_path() - # Store the train tasks just in case we need it in the future. - # (Note: unpickling this doesn't work...) - # data_content = { - # "tasks": train_tasks, - # "data": dataset, - # } logging.info(f"Writing dataset to {data_file_path}") with open(data_file_path, "wb") as f: pkl.dump(dataset, f) return dataset -def _collect_refinement_data_for_task(task: Task, +def _collect_refinement_data_for_task(env: BaseEnv, task: Task, option_model: _OptionModelBase, nsrts: Set[NSRT], predicates: Set[Predicate], @@ -212,40 +226,64 @@ def _collect_refinement_data_for_task(task: Task, heuristic = utils.create_task_planning_heuristic( CFG.sesame_task_planning_heuristic, init_atoms, task.goal, reachable_nsrts, predicates, objects) + generated_skeletons = [] try: - gen = _skeleton_generator( - task, reachable_nsrts, init_atoms, heuristic, seed, - CFG.refinement_data_skeleton_generator_timeout, metrics, - CFG.refinement_data_num_skeletons) - for skeleton, atoms_sequence in gen: - necessary_atoms_seq = utils.compute_necessary_atoms_seq( - skeleton, atoms_sequence, task.goal) - refinement_start_time = time.perf_counter() - _, suc = run_low_level_search( - task, option_model, skeleton, necessary_atoms_seq, seed, - CFG.refinement_data_low_level_search_timeout, metrics, - CFG.horizon) - # Calculate time taken for refinement. - refinement_time = time.perf_counter() - refinement_start_time - # Add datapoint to dataset - data.append(( - task, - skeleton, - atoms_sequence, - suc, - refinement_time, - )) + for item in _skeleton_generator( + task, reachable_nsrts, init_atoms, heuristic, seed, + CFG.refinement_data_skeleton_generator_timeout, metrics, + CFG.refinement_data_num_skeletons): + generated_skeletons.append(item) except _MaxSkeletonsFailure: # Done finding skeletons - return + pass + logging.info(f"Trying to refine {len(generated_skeletons)} skeletons") + for skeleton, atoms_sequence in generated_skeletons: + necessary_atoms_seq = utils.compute_necessary_atoms_seq( + skeleton, atoms_sequence, task.goal) + # This list will be mutated by run_low_level_search to record + # the refinement time for each step of the skeleton + refinement_time_list: List[float] = [] + plan, suc = run_low_level_search( + task, + option_model, + skeleton, + necessary_atoms_seq, + seed, + CFG.refinement_data_low_level_search_timeout, + metrics, + CFG.horizon, + refinement_time=refinement_time_list) + assert len(refinement_time_list) == len(skeleton) + low_level_action_count: List[int] = [] + # On plan success, count the low level actions per abstract action + if suc and CFG.refinement_data_include_execution_cost: + s = task.init + for action in plan: + action_count = 0 + while not action.terminal(s): + s = env.simulate(s, action.policy(s)) + action_count += 1 + low_level_action_count.append(action_count) + assert len(low_level_action_count) == len(skeleton) + # Add datapoint to dataset + data.append(( + task, + skeleton, + atoms_sequence, + suc, + refinement_time_list, + low_level_action_count, + )) -def _get_data_file_path() -> Path: +def _get_data_file_path(temp: bool = False) -> Path: if len(CFG.refinement_data_file_name): file_name = CFG.refinement_data_file_name else: config_path_str = utils.get_config_path_str() file_name = f"refinement_data_{config_path_str}.data" + if temp: + file_name += ".temp" data_file_path = Path(CFG.data_dir) / file_name return data_file_path diff --git a/predicators/utils.py b/predicators/utils.py index 92025cbe4a..416abb0079 100644 --- a/predicators/utils.py +++ b/predicators/utils.py @@ -57,6 +57,14 @@ matplotlib.use("Agg") +# Unpickling CUDA models errs out if the device isn't recognized because of +# an unusual name, including in supercloud, but we can set it manually +if "CUDA_VISIBLE_DEVICES" in os.environ: # pragma: no cover + cuda_visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(",") + if len(cuda_visible_devices) and cuda_visible_devices[0] != "0": + cuda_visible_devices[0] = "0" + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(cuda_visible_devices) + def count_positives_for_ops( strips_ops: List[STRIPSOperator], diff --git a/scripts/analyze_results_directory.py b/scripts/analyze_results_directory.py index 29aa47c451..e44e6ff722 100644 --- a/scripts/analyze_results_directory.py +++ b/scripts/analyze_results_directory.py @@ -32,6 +32,7 @@ ("AVG_TEST_TIME", "avg_suc_time"), ("AVG_NODES_CREATED", "avg_num_nodes_created"), ("LEARNING_TIME", "learning_time"), + ("AVG_REF_COST", "avg_ref_cost"), # ("AVG_SAMPLES_PER_PLAN", "avg_num_samples"), # ("MIN_SAMPLES_PER_PLAN", "min_num_samples"), # ("MAX_SAMPLES_PER_PLAN", "max_num_samples"), diff --git a/scripts/cluster_utils.py b/scripts/cluster_utils.py index 2d1883d7a7..979062e2da 100644 --- a/scripts/cluster_utils.py +++ b/scripts/cluster_utils.py @@ -21,6 +21,7 @@ class RunConfig: args: List[str] # e.g. --make_test_videos flags: Dict[str, Any] # e.g. --num_train_tasks 1 use_gpu: bool # e.g. --use_gpu True + train_refinement_estimator: bool # e.g. --train_refinement_estimator True def __post_init__(self) -> None: # For simplicity, disallow overrides of the SAVE_DIRS. @@ -50,7 +51,9 @@ def config_to_logfile(cfg: RunConfig, suffix: str = ".log") -> str: else: assert isinstance(cfg, BatchSeedRunConfig) seed = None - return f"{cfg.env}__{cfg.approach}__{cfg.experiment_id}__{seed}" + suffix + name = "train_" if cfg.train_refinement_estimator else "" + name += f"{cfg.env}__{cfg.approach}__{cfg.experiment_id}__{seed}" + suffix + return name def config_to_cmd_flags(cfg: RunConfig) -> str: @@ -89,11 +92,19 @@ def generate_run_configs(config_filename: str, use_gpu = config["USE_GPU"] else: use_gpu = False + if "TRAIN_REFINEMENT_ESTIMATOR" in config.keys(): + train_refinement_estimator = config["TRAIN_REFINEMENT_ESTIMATOR"] + else: + train_refinement_estimator = False # Loop over approaches. for approach_exp_id, approach_config in config["APPROACHES"].items(): + if approach_config.get("SKIP", False): + continue approach = approach_config["NAME"] # Loop over envs. for env_exp_id, env_config in config["ENVS"].items(): + if env_config.get("SKIP", False): + continue env = env_config["NAME"] # Create the experiment ID, args, and flags. experiment_id = f"{env_exp_id}-{approach_exp_id}" @@ -111,11 +122,13 @@ def generate_run_configs(config_filename: str, if batch_seeds: yield BatchSeedRunConfig(experiment_id, approach, env, run_args, run_flags, use_gpu, + train_refinement_estimator, start_seed, num_seeds) else: for seed in range(start_seed, start_seed + num_seeds): yield SingleSeedRunConfig(experiment_id, approach, env, run_args, run_flags, use_gpu, + train_refinement_estimator, seed) diff --git a/scripts/configs/refinement_cost_learning.yaml b/scripts/configs/refinement_cost_learning.yaml new file mode 100644 index 0000000000..67ba95b98a --- /dev/null +++ b/scripts/configs/refinement_cost_learning.yaml @@ -0,0 +1,76 @@ +# Data collection and training for refinement cost estimation +--- +APPROACHES: + tabular: + NAME: "refinement_estimation" + SKIP: true + FLAGS: + refinement_estimator: "tabular" + cnn: + NAME: "refinement_estimation" + SKIP: true + FLAGS: + refinement_estimator: "cnn" + refinement_train_with_frac_data: 0.25 + gnn: + NAME: "refinement_estimation" + SKIP: true + FLAGS: + refinement_estimator: "gnn" +ENVS: + variable_passage: + NAME: "narrow_passage" + SKIP: true + FLAGS: + # cnn + learning_rate: 0.0002 + weight_decay: 0.001 + # gnn + gnn_num_message_passing: 3 + gnn_layer_size: 32 + gnn_learning_rate: 0.00001 + gnn_weight_decay: 0.0005 + gnn_num_epochs: 10000 + gnn_batch_size: 300 + gnn_do_normalization: true + gnn_use_validation_set: true + # data collection + num_train_tasks: 10000 + exit_garage: + NAME: "exit_garage" + SKIP: true + FLAGS: + exit_garage_min_num_obstacles: 2 + exit_garage_max_num_obstacles: 3 # inclusive + exit_garage_rrt_num_attempts: 3 + refinement_data_num_skeletons: 15 + refinement_estimation_num_skeletons_generated: 15 + refinement_data_low_level_search_timeout: 15 + refinement_data_failed_refinement_penalty: 20 + # cnn + learning_rate: 0.0002 + weight_decay: 0.001 + # gnn + gnn_num_message_passing: 3 + gnn_layer_size: 32 + gnn_learning_rate: 0.00001 + gnn_weight_decay: 0.0001 + gnn_num_epochs: 10000 + gnn_batch_size: 250 + gnn_do_normalization: true + gnn_use_validation_set: true + # data collection + num_train_tasks: 500 +ARGS: +# - "load_data" + - "skip_refinement_estimator_training" +FLAGS: # general flags + refinement_data_save_every: 100 + num_test_tasks: 50 + use_torch_gpu: true + pytorch_train_print_every: 10 + # refinement_data_file_name: "refinement_data_narrow_passage_10000_new2.data" +START_SEED: 457 +NUM_SEEDS: 1 +USE_GPU: true +TRAIN_REFINEMENT_ESTIMATOR: true diff --git a/scripts/configs/refinement_cost_learning_test.yaml b/scripts/configs/refinement_cost_learning_test.yaml new file mode 100644 index 0000000000..69a1c40474 --- /dev/null +++ b/scripts/configs/refinement_cost_learning_test.yaml @@ -0,0 +1,83 @@ +# Run experiments for refinement cost estimation +--- +APPROACHES: + benchmark: + NAME: "oracle" + SKIP: false + oracle: + NAME: "refinement_estimation" + SKIP: false + FLAGS: + refinement_estimator: "oracle" + tabular: + NAME: "refinement_estimation" + SKIP: false + FLAGS: + refinement_estimator: "tabular" + cnn: + NAME: "refinement_estimation" + SKIP: false + FLAGS: + refinement_estimator: "cnn" + gnn: + NAME: "refinement_estimation" + SKIP: false + FLAGS: + refinement_estimator: "gnn" +ENVS: + variable_passage: + NAME: "narrow_passage" + SKIP: false + FLAGS: + # gnn + gnn_num_message_passing: 3 + gnn_layer_size: 32 + gnn_do_normalization: true + timeout: 1 + fixed_passage: + NAME: "narrow_passage" + SKIP: false + FLAGS: + narrow_passage_door_width_padding_lb: 0.01 + narrow_passage_door_width_padding_ub: 0.01 + narrow_passage_passage_width_padding_lb: 0.0005 + narrow_passage_passage_width_padding_ub: 0.0005 + # gnn + gnn_num_message_passing: 3 + gnn_layer_size: 32 + gnn_do_normalization: true + timeout: 1 + exit_garage: + NAME: "exit_garage" + SKIP: true + FLAGS: + exit_garage_min_num_obstacles: 2 + exit_garage_max_num_obstacles: 3 # inclusive + exit_garage_rrt_num_attempts: 3 + refinement_estimation_num_skeletons_generated: 15 + # gnn + gnn_num_message_passing: 3 + gnn_layer_size: 32 + gnn_do_normalization: true + timeout: 20 + cluttered_garage: + NAME: "exit_garage" + SKIP: true + FLAGS: + exit_garage_min_num_obstacles: 4 + exit_garage_max_num_obstacles: 4 + exit_garage_rrt_num_attempts: 3 + refinement_estimation_num_skeletons_generated: 50 + # gnn + gnn_num_message_passing: 3 + gnn_layer_size: 32 + gnn_do_normalization: true + timeout: 30 +ARGS: [] +FLAGS: # general flags + num_test_tasks: 50 + use_torch_gpu: true + refinement_data_low_level_execution_cost: 0.05 +START_SEED: 456 +NUM_SEEDS: 5 +USE_GPU: true diff --git a/scripts/local/launch.py b/scripts/local/launch.py index 391fa41177..cbbdccad38 100644 --- a/scripts/local/launch.py +++ b/scripts/local/launch.py @@ -30,7 +30,11 @@ def _main() -> None: cmd_flags = config_to_cmd_flags(cfg) logfile = os.path.join("logs", config_to_logfile(cfg)) cmd_flags = config_to_cmd_flags(cfg) - cmd = f"python predicators/main.py {cmd_flags} > {logfile}" + if cfg.train_refinement_estimator: + entry_point = "train_refinement_estimator.py" + else: + entry_point = "main.py" + cmd = f"python predicators/{entry_point} {cmd_flags} > {logfile}" cmds.append(cmd) # Run the commands in order. num_cmds = len(cmds) diff --git a/scripts/supercloud/launch.py b/scripts/supercloud/launch.py index 78a6ef1034..3889c14495 100755 --- a/scripts/supercloud/launch.py +++ b/scripts/supercloud/launch.py @@ -57,9 +57,13 @@ def _launch_experiments(config_file: str) -> None: log_dir = "logs" log_prefix = config_to_logfile(cfg, suffix="") # Launch a job for this experiment. - submit_supercloud_job(cfg.experiment_id, log_dir, log_prefix, - cmd_flags, cfg.start_seed, cfg.num_seeds, - cfg.use_gpu) + if cfg.train_refinement_estimator: + entry_point = "train_refinement_estimator.py" + else: + entry_point = "main.py" + submit_supercloud_job(entry_point, cfg.experiment_id, log_dir, + log_prefix, cmd_flags, cfg.start_seed, + cfg.num_seeds, cfg.use_gpu) if __name__ == "__main__": diff --git a/scripts/supercloud/submit_supercloud_job.py b/scripts/supercloud/submit_supercloud_job.py index 2d52181314..eb37b9de27 100644 --- a/scripts/supercloud/submit_supercloud_job.py +++ b/scripts/supercloud/submit_supercloud_job.py @@ -19,11 +19,12 @@ def _run() -> None: log_dir = CFG.log_dir logfile_prefix = utils.get_config_path_str() args_and_flags_str = " ".join(sys.argv[1:]) - return submit_supercloud_job(job_name, log_dir, logfile_prefix, + return submit_supercloud_job("main.py", job_name, log_dir, logfile_prefix, args_and_flags_str, START_SEED, NUM_SEEDS) -def submit_supercloud_job(job_name: str, +def submit_supercloud_job(entry_point: str, + job_name: str, log_dir: str, logfile_prefix: str, args_and_flags_str: str, @@ -31,12 +32,13 @@ def submit_supercloud_job(job_name: str, num_seeds: int, use_gpu: bool = False) -> None: """Launch the supercloud job.""" + assert entry_point in ("main.py", "train_refinement_estimator.py") os.makedirs(log_dir, exist_ok=True) logfile_pattern = os.path.join(log_dir, f"{logfile_prefix}__%j.log") assert logfile_pattern.count("None") == 1 logfile_pattern = logfile_pattern.replace("None", "%a") - mystr = (f"#!/bin/bash\npython predicators/main.py {args_and_flags_str} " - f"--seed $SLURM_ARRAY_TASK_ID") + mystr = (f"#!/bin/bash\npython predicators/{entry_point} " + f"{args_and_flags_str} --seed $SLURM_ARRAY_TASK_ID") temp_run_file = "temp_run_file.sh" assert not os.path.exists(temp_run_file) with open(temp_run_file, "w", encoding="utf-8") as f: diff --git a/tests/approaches/test_oracle_approach.py b/tests/approaches/test_oracle_approach.py index e9b1c819ca..c1e85972ac 100644 --- a/tests/approaches/test_oracle_approach.py +++ b/tests/approaches/test_oracle_approach.py @@ -174,7 +174,7 @@ "doors_max_obstacles_per_room": 1, }] EXTRA_ARGS_ORACLE_APPROACH["exit_garage"] = [{ - "exit_garage_pick_place_refine_penalty": + "exit_garage_clear_refine_penalty": 0, "exit_garage_min_num_obstacles": 1, @@ -185,7 +185,7 @@ "exit_garage_rrt_sample_goal_eps": 0.3, }, { - "exit_garage_pick_place_refine_penalty": + "exit_garage_clear_refine_penalty": 0, "exit_garage_min_num_obstacles": 3, diff --git a/tests/envs/test_exit_garage.py b/tests/envs/test_exit_garage.py index 4de8b6e831..8109a0fd4b 100644 --- a/tests/envs/test_exit_garage.py +++ b/tests/envs/test_exit_garage.py @@ -21,21 +21,16 @@ def test_exit_garage_properties(): for task in env.get_test_tasks(): for obj in task.init: assert len(obj.type.feature_names) == len(task.init[obj]) - assert len(env.predicates) == 5 - (CarHasExited, CarryingObstacle, NotCarryingObstacle, ObstacleCleared, - ObstacleNotCleared) = sorted(env.predicates) + assert len(env.predicates) == 3 + CarHasExited, ObstacleCleared, ObstacleNotCleared = sorted(env.predicates) assert CarHasExited.name == "CarHasExited" - assert CarryingObstacle.name == "CarryingObstacle" - assert NotCarryingObstacle.name == "NotCarryingObstacle" assert ObstacleCleared.name == "ObstacleCleared" assert ObstacleNotCleared.name == "ObstacleNotCleared" assert env.goal_predicates == {CarHasExited} - assert len(get_gt_options(env.get_name())) == 3 - (DriveCarToExit, PickupObstacle, - StoreObstacle) = sorted(get_gt_options(env.get_name())) + assert len(get_gt_options(env.get_name())) == 2 + ClearObstacle, DriveCarToExit = sorted(get_gt_options(env.get_name())) + assert ClearObstacle.name == "ClearObstacle" assert DriveCarToExit.name == "DriveCarToExit" - assert PickupObstacle.name == "PickupObstacle" - assert StoreObstacle.name == "StoreObstacle" assert len(env.types) == 4 car_type, obstacle_type, robot_type, storage_type = sorted(env.types) assert car_type.name == "car" @@ -54,8 +49,7 @@ def test_exit_garage_actions(): "num_train_tasks": 1, }) env = ExitGarageEnv() - (CarHasExited, CarryingObstacle, NotCarryingObstacle, ObstacleCleared, - ObstacleNotCleared) = sorted(env.predicates) + CarHasExited, ObstacleCleared, ObstacleNotCleared = sorted(env.predicates) car_type, obstacle_type, robot_type, storage_type = sorted(env.types) # Create task with fixed initial state @@ -76,10 +70,8 @@ def test_exit_garage_actions(): storage, = state.get_objects(storage_type) # Assert starting state predicates assert not GroundAtom(CarHasExited, [car]).holds(state) - assert not GroundAtom(CarryingObstacle, [robot, obstacle]).holds(state) assert not GroundAtom(ObstacleCleared, [obstacle]).holds(state) assert GroundAtom(ObstacleNotCleared, [obstacle]).holds(state) - assert GroundAtom(NotCarryingObstacle, [robot]).holds(state) task = EnvironmentTask(state, goal) # Fixed action sequences to test (each is a list of action arrays) @@ -122,15 +114,15 @@ def test_exit_garage_actions(): assert s.get(robot, "x") == true_x assert s.get(robot, "y") == true_y # Robot shouldn't have picked up anything since it wasn't on an obstacle - assert not GroundAtom(CarryingObstacle, [robot, obstacle]).holds(s) - assert GroundAtom(NotCarryingObstacle, [robot]).holds(s) + assert s.get(robot, "carrying") == 0 + assert s.get(obstacle, "carried") == 0 # Test that going and picking up the obstacle works for action in pickup_actions: s = env.simulate(s, Action(action)) - assert GroundAtom(CarryingObstacle, [robot, obstacle]).holds(s) + assert s.get(robot, "carrying") == 1 + assert s.get(obstacle, "carried") == 1 assert not GroundAtom(ObstacleNotCleared, [obstacle]).holds(s) - assert not GroundAtom(NotCarryingObstacle, [robot]).holds(s) # Test that trying to place the obstacle outside storage does nothing true_x = s.get(robot, "x") @@ -140,18 +132,18 @@ def test_exit_garage_actions(): assert s.get(robot, "x") == true_x assert s.get(robot, "y") == true_y # Robot should still be carrying obstacle - assert GroundAtom(CarryingObstacle, [robot, obstacle]).holds(s) + assert s.get(robot, "carrying") == 1 + assert s.get(obstacle, "carried") == 1 assert not GroundAtom(ObstacleNotCleared, [obstacle]).holds(s) - assert not GroundAtom(NotCarryingObstacle, [robot]).holds(s) # Test that moving to storage and placing the obstacle works assert s.get(storage, "num_stored") == 0 for action in store_actions: s = env.simulate(s, Action(action)) # Check obstacle is placed - assert not GroundAtom(CarryingObstacle, [robot, obstacle]).holds(s) + assert s.get(robot, "carrying") == 0 + assert s.get(obstacle, "carried") == 0 assert GroundAtom(ObstacleCleared, [obstacle]).holds(s) - assert GroundAtom(NotCarryingObstacle, [robot]).holds(s) # Check obstacle and robot are in storage area assert s.get(robot, "y") > 0.8 assert s.get(obstacle, "y") > 0.8 @@ -160,7 +152,8 @@ def test_exit_garage_actions(): # Test that picking up in storage area does nothing s = env.simulate(s, Action(bad_robot_action)) - assert GroundAtom(NotCarryingObstacle, [robot]).holds(s) + assert s.get(robot, "carrying") == 0 + assert s.get(obstacle, "carried") == 0 # Test moving car to exit for action in drive_actions: @@ -303,7 +296,7 @@ def test_exit_garage_options(): """Tests for exit garage parametrized options.""" utils.reset_config({ "env": "exit_garage", - "exit_garage_pick_place_refine_penalty": 0, + "exit_garage_clear_refine_penalty": 0, "exit_garage_min_num_obstacles": 2, "exit_garage_max_num_obstacles": 2, "exit_garage_rrt_num_control_samples": 15, @@ -311,10 +304,8 @@ def test_exit_garage_options(): "num_train_tasks": 1, }) env = ExitGarageEnv() - (CarHasExited, CarryingObstacle, NotCarryingObstacle, ObstacleCleared, - ObstacleNotCleared) = sorted(env.predicates) - (DriveCarToExit, PickupObstacle, - StoreObstacle) = sorted(get_gt_options(env.get_name())) + CarHasExited, ObstacleCleared, ObstacleNotCleared = sorted(env.predicates) + ClearObstacle, DriveCarToExit = sorted(get_gt_options(env.get_name())) car_type, obstacle_type, robot_type, _ = sorted(env.types) # Create task with fixed initial state @@ -327,13 +318,12 @@ def test_exit_garage_options(): state.set(obstacle1, "x", 0.5) state.set(obstacle1, "y", 0.3) state.set(obstacle2, "x", 0.8) - state.set(obstacle2, "y", 0.1) + state.set(obstacle2, "y", 0.05) task = EnvironmentTask(state, goal) - # Test PickupObstacle, StoreObstacle, then DriveCarToExit + # Test ClearObstacle, then DriveCarToExit option_plan = [ - PickupObstacle.ground([robot, obstacle2], [0.2]), - StoreObstacle.ground([robot, obstacle2], [0.6]), + ClearObstacle.ground([robot, obstacle1], [0.2]), DriveCarToExit.ground([car], [0.7]), ] policy = utils.option_plan_to_policy(option_plan) @@ -346,37 +336,20 @@ def test_exit_garage_options(): exceptions_to_break_on={utils.OptionExecutionFailure}, ) final_state = traj.states[-1] + assert final_state.get(robot, "carrying") == 0 + assert final_state.get(obstacle1, "carried") == 0 + assert GroundAtom(ObstacleCleared, [obstacle1]).holds(final_state) + assert not GroundAtom(ObstacleNotCleared, [obstacle1]).holds(final_state) assert GroundAtom(CarHasExited, [car]).holds(final_state) - assert not GroundAtom(CarryingObstacle, - [robot, obstacle2]).holds(final_state) - assert GroundAtom(ObstacleCleared, [obstacle2]).holds(final_state) - assert not GroundAtom(ObstacleNotCleared, [obstacle2]).holds(final_state) - assert GroundAtom(NotCarryingObstacle, [robot]).holds(final_state) assert task.task.goal_holds(final_state) # Test scenarios where options shouldn't be initiable - # Test StoreObstacle when robot isn't carrying anything - assert GroundAtom(NotCarryingObstacle, [robot]).holds(state) - store_obstacle = StoreObstacle.ground([robot, obstacle1], [0.3]) - assert not store_obstacle.initiable(state) - - # Test StoreObstacle when robot carrying different obstacle - test_state = state.copy() - test_state.set(robot, "carrying", 1) - test_state.set(obstacle2, "carried", 1) - assert not store_obstacle.initiable(test_state) - - # Test PickupObstacle when robot carrying obstacle already - pickup_obstacle = PickupObstacle.ground([robot, obstacle1], [0.8]) - assert not pickup_obstacle.initiable(test_state) - - # Test PickupObstacle when obstacle already picked or stored - pickup_obstacle = PickupObstacle.ground([robot, obstacle2], [0.4]) - assert not pickup_obstacle.initiable(test_state) # already picked + # Test ClearObstacle when obstacle already picked or stored + clear_obstacle = ClearObstacle.ground([robot, obstacle2], [0.4]) test_state = state.copy() test_state.set(obstacle2, "y", 0.9) # obstacle2 already in storage - assert not pickup_obstacle.initiable(test_state) + assert not clear_obstacle.initiable(test_state) # Test DriveCarToExit when car is already in collision for some reason test_state.set(car, "x", 0.5) @@ -389,7 +362,7 @@ def test_exit_garage_failed_rrt(): if motion planning fails.""" utils.reset_config({ "env": "exit_garage", - "exit_garage_pick_place_refine_penalty": 0, + "exit_garage_clear_refine_penalty": 0, "exit_garage_min_num_obstacles": 6, "exit_garage_max_num_obstacles": 6, "exit_garage_rrt_num_attempts": 1, @@ -398,7 +371,7 @@ def test_exit_garage_failed_rrt(): "num_train_tasks": 1, }) env = ExitGarageEnv() - DriveCarToExit, _, _ = sorted(get_gt_options(env.get_name())) + _, DriveCarToExit = sorted(get_gt_options(env.get_name())) car_type, obstacle_type, _, _ = sorted(env.types) # Create task with fixed initial state diff --git a/tests/nsrt_learning/test_segmentation.py b/tests/nsrt_learning/test_segmentation.py index 0f4560006f..1418fdd7df 100644 --- a/tests/nsrt_learning/test_segmentation.py +++ b/tests/nsrt_learning/test_segmentation.py @@ -214,7 +214,7 @@ def test_contact_based_segmentation(env): "doors_min_room_exists_frac": 1.0, "doors_max_room_exists_frac": 1.0, "doors_birrt_smooth_amt": 0, - "exit_garage_pick_place_refine_penalty": 0, + "exit_garage_clear_refine_penalty": 0, "exit_garage_min_num_obstacles": 3, "exit_garage_max_num_obstacles": 3, "exit_garage_raise_environment_failure": True, diff --git a/tests/refinement_estimators/test_base_refinement_estimator.py b/tests/refinement_estimators/test_base_refinement_estimator.py index 5fec76af49..c996b1e60f 100644 --- a/tests/refinement_estimators/test_base_refinement_estimator.py +++ b/tests/refinement_estimators/test_base_refinement_estimator.py @@ -10,6 +10,8 @@ from predicators.refinement_estimators import BaseRefinementEstimator, \ create_refinement_estimator +# We don't run these tests for gnn because training on an empty dataset is +# not possible ESTIMATOR_NAMES = ["oracle", "tabular", "cnn"] diff --git a/tests/refinement_estimators/test_cnn_refinement_estimator.py b/tests/refinement_estimators/test_cnn_refinement_estimator.py index db847918ce..6b2118947c 100644 --- a/tests/refinement_estimators/test_cnn_refinement_estimator.py +++ b/tests/refinement_estimators/test_cnn_refinement_estimator.py @@ -28,10 +28,10 @@ def test_cnn_refinement_estimator(): assert estimator.get_name() == "cnn" assert estimator.is_learning_based with pytest.raises(AssertionError): - sample_task = NarrowPassageEnv().get_train_tasks()[0] + sample_task = NarrowPassageEnv().get_train_tasks()[0].task estimator.get_cost(sample_task, [], []) # Check that train actually runs - sample_data = [(sample_task, [], [], False, 5)] + sample_data = [(sample_task, [], [], False, [], [])] estimator.train(sample_data) # Check that get_cost works now that the estimator is trained estimator.get_cost(sample_task, [], []) @@ -49,6 +49,7 @@ def test_narrow_passage_cnn_refinement_estimator(): "cnn_refinement_estimator_crop": True, "cnn_refinement_estimator_crop_bounds": (0, 10, 0, 10), "cnn_refinement_estimator_downsample": 2, + "refinement_data_include_execution_cost": True, }) estimator = CNNRefinementEstimator() @@ -56,7 +57,7 @@ def test_narrow_passage_cnn_refinement_estimator(): env = NarrowPassageEnv() DoorIsClosed, DoorIsOpen, TouchedGoal = sorted(env.predicates) door_type, _, robot_type, target_type, _ = sorted(env.types) - sample_task = env.get_train_tasks()[0] + sample_task = env.get_train_tasks()[0].task sample_state = sample_task.init door, = sample_state.get_objects(door_type) robot, = sample_state.get_objects(robot_type) @@ -91,10 +92,12 @@ def test_narrow_passage_cnn_refinement_estimator(): # Create sample data to train using sample_data = [ - (sample_task, move_direct_skeleton, move_direct_atoms_seq, True, 4), + (sample_task, move_direct_skeleton, move_direct_atoms_seq, True, [4], + [3]), (sample_task, move_through_door_skeleton, move_through_door_atoms_seq, - True, 2), - (sample_task, move_direct_skeleton, move_direct_atoms_seq, False, 5), + True, [0.5, 1.5], [3, 5]), + (sample_task, move_direct_skeleton, move_direct_atoms_seq, False, [5], + []), ] estimator.train(sample_data) diff --git a/tests/refinement_estimators/test_gnn_refinement_estimator.py b/tests/refinement_estimators/test_gnn_refinement_estimator.py new file mode 100644 index 0000000000..bd1faed77a --- /dev/null +++ b/tests/refinement_estimators/test_gnn_refinement_estimator.py @@ -0,0 +1,220 @@ +"""Test cases for the GNN refinement cost estimator.""" + +import os +import shutil +from pathlib import Path +from unittest.mock import PropertyMock, patch + +import numpy as np +import pytest +from gym.spaces import Box + +import predicators.envs.narrow_passage +from predicators import utils +from predicators.envs.narrow_passage import NarrowPassageEnv +from predicators.ground_truth_models import get_gt_nsrts, get_gt_options +from predicators.ground_truth_models.narrow_passage import \ + NarrowPassageGroundTruthNSRTFactory, \ + NarrowPassageGroundTruthOptionFactory +from predicators.refinement_estimators.gnn_refinement_estimator import \ + GNNRefinementEstimator +from predicators.settings import CFG +from predicators.structs import NSRT, Action, GroundAtom, \ + ParameterizedOption, Predicate, Task, Variable + +_ENV_MODULE_NAME = predicators.envs.narrow_passage.__name__ + + +def test_gnn_refinement_estimator(): + """Test general properties of GNN refinement cost estimator.""" + utils.reset_config({ + "env": "narrow_passage", + "gnn_num_message_passing": 1, + "gnn_layer_size": 3, + "gnn_num_epochs": 1, + }) + estimator = GNNRefinementEstimator() + assert estimator.get_name() == "gnn" + assert estimator.is_learning_based + with pytest.raises(AssertionError): + sample_task = NarrowPassageEnv().get_train_tasks()[0].task + estimator.get_cost(sample_task, [], []) + + +def test_narrow_passage_gnn_refinement_estimator(): + """Test GNN refinement cost estimator for narrow_passage env.""" + utils.reset_config({ + "env": "narrow_passage", + "gnn_num_message_passing": 1, + "gnn_layer_size": 3, + "gnn_num_epochs": 1, + "gnn_do_normalization": True, + "refinement_data_include_execution_cost": True, + }) + estimator = GNNRefinementEstimator() + + # Get env objects and NSRTs + env = NarrowPassageEnv() + DoorIsClosed, DoorIsOpen, TouchedGoal = sorted(env.predicates) + door_type, _, robot_type, target_type, _ = sorted(env.types) + sample_task = env.get_train_tasks()[0].task + sample_state = sample_task.init + door, = sample_state.get_objects(door_type) + robot, = sample_state.get_objects(robot_type) + target, = sample_state.get_objects(target_type) + options = get_gt_options(env.get_name()) + gt_nsrts = get_gt_nsrts(CFG.env, env.predicates, options) + move_and_open_door_nsrt, move_to_target_nsrt = sorted(gt_nsrts) + + # Ground NSRTs using objects + ground_move_and_open_door = move_and_open_door_nsrt.ground([robot, door]) + ground_move_to_target = move_to_target_nsrt.ground([robot, target]) + # Ground atoms using objects + ground_door_is_closed = GroundAtom(DoorIsClosed, [door]) + ground_door_is_open = GroundAtom(DoorIsOpen, [door]) + ground_touched_goal = GroundAtom(TouchedGoal, [robot, target]) + + # Make valid test skeletons and atom_sequences + move_direct_skeleton = [ground_move_to_target] + move_direct_atoms_seq = [ + {ground_door_is_closed}, + {ground_door_is_closed, ground_touched_goal}, + ] + move_through_door_skeleton = [ + ground_move_and_open_door, + ground_move_to_target, + ] + move_through_door_atoms_seq = [ + {ground_door_is_closed}, + {ground_door_is_open}, + {ground_door_is_closed, ground_touched_goal}, + ] + + # Create sample data to train using + sample_data = [ + (sample_task, move_direct_skeleton, move_direct_atoms_seq, True, [4], + [3]), + (sample_task, move_through_door_skeleton, move_through_door_atoms_seq, + True, [0.5, 1.5], [3, 5]), + (sample_task, move_direct_skeleton, move_direct_atoms_seq, False, [5], + []), + ] + estimator.train(sample_data) + + # Test direct MoveToTarget skeleton returns finite cost + move_direct_cost = estimator.get_cost(sample_task, move_direct_skeleton, + move_direct_atoms_seq) + assert move_direct_cost < float('inf') + + # Test open door then move skeleton returns finite cost + move_through_door_cost = estimator.get_cost(sample_task, + move_through_door_skeleton, + move_through_door_atoms_seq) + assert move_through_door_cost < float('inf') + + +def test_gnn_refinement_estimator_arities(): + """Test GNN refinement cost estimator on mocked predicate/NSRT sets that + are 0-arity, unary, and binary.""" + utils.reset_config({ + "env": "narrow_passage", + "gnn_num_message_passing": 1, + "gnn_layer_size": 3, + "gnn_num_epochs": 1, + "gnn_use_validation_set": False, + }) + + # Get base environment, types, predicates, NSRTs + env = NarrowPassageEnv() + _, DoorIsOpen, TouchedGoal = sorted(env.predicates) + door_type, _, robot_type, target_type, _ = sorted(env.types) + base_options = get_gt_options(env.get_name()) + gt_nsrts = get_gt_nsrts(CFG.env, env.predicates, base_options) + move_and_open_door_option, _ = sorted(base_options) + move_and_open_door_nsrt, _ = sorted(gt_nsrts) + + # Make predicates of all arities + ZeroArityPred = Predicate("ZeroArityPred", [], lambda s, o: False) + UnaryPred = DoorIsOpen + BinaryPred = TouchedGoal + + # Make dummy options and NSRTs of all arities + + _policy = lambda _1, _2, _3, _4: Action( + np.array([0, 0, 0], dtype=np.float32)) + _initiable = lambda _1, _2, _3, _4: True + _sampler = lambda _1, _2, rng, _4: np.array([rng.uniform()], + dtype=np.float32) + + ZeroArityOption = ParameterizedOption("ZeroArityOption", [], + Box(0, 1, + (1, )), _policy, _initiable, + lambda _1, _2, _3, _4: True) + ZeroArityNSRT = NSRT("ZeroArityNSRT", [], set(), set(), set(), set(), + ZeroArityOption, [], _sampler) + + UnaryOption = ParameterizedOption("UnaryOption", [robot_type], + Box(0, 1, (1, )), _policy, _initiable, + lambda _1, _2, _3, _4: True) + robot = Variable("?robot", robot_type) + UnaryNSRT = NSRT("UnaryNSRT", [robot], set(), set(), set(), set(), + UnaryOption, [robot], _sampler) + BinaryNSRT = move_and_open_door_nsrt + + mock_preds = {ZeroArityPred, UnaryPred, BinaryPred} + mock_options = {ZeroArityOption, UnaryOption, move_and_open_door_option} + mock_nsrts = {ZeroArityNSRT, UnaryNSRT, BinaryNSRT} + + with patch(f"{_ENV_MODULE_NAME}.NarrowPassageEnv.predicates", + new_callable=PropertyMock) as mock_env, \ + patch.object(NarrowPassageGroundTruthOptionFactory, + "get_options", + return_value=mock_options), \ + patch.object(NarrowPassageGroundTruthNSRTFactory, + "get_nsrts", + return_value=mock_nsrts): + mock_env.return_value = mock_preds + # Test that _setup_fields() works + estimator = GNNRefinementEstimator() + estimator2 = GNNRefinementEstimator() + + # Make a test Task with all types of predicates/NSRTs involved + sample_task = env.get_train_tasks()[0].task + initial_state = sample_task.init + robot, = initial_state.get_objects(robot_type) + door, = initial_state.get_objects(door_type) + target, = initial_state.get_objects(target_type) + goal = { + GroundAtom(ZeroArityPred, []), + GroundAtom(UnaryPred, [door]), + GroundAtom(BinaryPred, [robot, target]), + } + task = Task(initial_state, goal) + + # Make a test skeleton and atoms_sequence + skeleton = [ZeroArityNSRT.ground([])] + atoms_sequence = [goal, goal] + + # Create sample refinement training data + data = [(task, skeleton, atoms_sequence, False, [5.0], [4])] + # Check that train() and _graphify_single_input() successfully run + estimator.train(data) + + # Test that getting a cost returns a finite cost + test_cost = estimator.get_cost(task, skeleton, atoms_sequence) + assert test_cost < float('inf') + + # Create fake directory to test saving and loading model + parent_dir = os.path.dirname(__file__) + approach_dir = os.path.join(parent_dir, "_fake_approach") + os.makedirs(approach_dir, exist_ok=True) + test_approach_path = Path(approach_dir) / "test.estimator" + estimator.save_model(test_approach_path) + estimator2.load_model(test_approach_path) + + # Check that the loaded model is the same as the saved one + test_cost2 = estimator2.get_cost(task, skeleton, atoms_sequence) + assert test_cost2 == test_cost + + # Remove temp directory + shutil.rmtree(approach_dir) diff --git a/tests/refinement_estimators/test_oracle_refinement_estimator.py b/tests/refinement_estimators/test_oracle_refinement_estimator.py index c73185c0a2..dbe2eea32a 100644 --- a/tests/refinement_estimators/test_oracle_refinement_estimator.py +++ b/tests/refinement_estimators/test_oracle_refinement_estimator.py @@ -18,7 +18,7 @@ def test_oracle_refinement_estimator(): assert estimator.get_name() == "oracle" assert not estimator.is_learning_based with pytest.raises(NotImplementedError): - sample_task = NarrowPassageEnv().get_train_tasks()[0] + sample_task = NarrowPassageEnv().get_train_tasks()[0].task estimator.get_cost(sample_task, [], []) @@ -94,6 +94,8 @@ def test_exit_garage_oracle_refinement_estimator(): """Test oracle refinement cost estimator for exit_garage env.""" utils.reset_config({ "env": "exit_garage", + "exit_garage_min_num_obstacles": 2, + "exit_garage_max_num_obstacles": 2, }) estimator = OracleRefinementEstimator() @@ -104,21 +106,19 @@ def test_exit_garage_oracle_refinement_estimator(): sample_state = sample_task.init car, = sample_state.get_objects(car_type) robot, = sample_state.get_objects(robot_type) - obstacles = sample_state.get_objects(obstacle_type) + obstacle1, obstacle2 = sample_state.get_objects(obstacle_type) + sample_state.set(obstacle1, "y", 0.6) + sample_state.set(obstacle2, "y", 0.4) task = Task(sample_state, sample_task.goal) gt_nsrts = get_gt_nsrts(CFG.env, env.predicates, get_gt_options(env.get_name())) - (drive_car_to_exit_nsrt, pickup_obstacle_nsrt, - store_obstacle_nsrt) = sorted(gt_nsrts) + clear_obstacle_nsrt, drive_car_to_exit_nsrt = sorted(gt_nsrts) # Ground NSRTs using objects ground_drive_car_to_exit = drive_car_to_exit_nsrt.ground([car]) - def ground_pickup_obstacle(obstacle): - return pickup_obstacle_nsrt.ground([robot, obstacle]) - - def ground_store_obstacle(obstacle): - return store_obstacle_nsrt.ground([robot, obstacle]) + def ground_clear_obstacle(obstacle): + return clear_obstacle_nsrt.ground([robot, obstacle]) # Test direct DriveCarToExit skeleton drive_direct_skeleton = [ground_drive_car_to_exit] @@ -127,13 +127,12 @@ def ground_store_obstacle(obstacle): # Test pickups and stores before driving long_skeleton = [ - ground_pickup_obstacle(obstacles[0]), - ground_store_obstacle(obstacles[0]), - ground_pickup_obstacle(obstacles[1]), + ground_clear_obstacle(obstacle1), + ground_clear_obstacle(obstacle2), ground_drive_car_to_exit, ] long_cost = estimator.get_cost(task, long_skeleton, []) - assert long_cost == -2 + assert long_cost == -0.5 # Make sure that sorting the costs considers the long skeleton cheaper assert sorted([drive_direct_cost, diff --git a/tests/refinement_estimators/test_tabular_refinement_estimator.py b/tests/refinement_estimators/test_tabular_refinement_estimator.py index 96cc9506fa..2e844cd442 100644 --- a/tests/refinement_estimators/test_tabular_refinement_estimator.py +++ b/tests/refinement_estimators/test_tabular_refinement_estimator.py @@ -20,22 +20,24 @@ def test_tabular_refinement_estimator(): assert estimator.get_name() == "tabular" assert estimator.is_learning_based with pytest.raises(AssertionError): - sample_task = NarrowPassageEnv().get_train_tasks()[0] + sample_task = NarrowPassageEnv().get_train_tasks()[0].task estimator.get_cost(sample_task, [], []) # Check that train actually runs - sample_data = [(sample_task, [], [], False, 5)] + sample_data = [(sample_task, [], [], False, [], [])] estimator.train(sample_data) # Check that the resulting dictionary is correct cost_dict = estimator._model_dict # pylint: disable=protected-access - assert cost_dict == {(tuple(), tuple()): 8} - assert estimator.get_cost(sample_task, [], []) == 8 + assert cost_dict == {(tuple(), tuple()): (3, 0)} + assert estimator.get_cost(sample_task, [], []) == 3 def test_narrow_passage_tabular_refinement_estimator(): """Test tabular refinement cost estimator for narrow_passage env.""" utils.reset_config({ "env": "narrow_passage", - "refinement_data_failed_refinement_penalty": 3 + "refinement_data_failed_refinement_penalty": 3, + "refinement_data_include_execution_cost": True, + "refinement_data_low_level_execution_cost": 0.01, }) estimator = TabularRefinementEstimator() @@ -43,7 +45,7 @@ def test_narrow_passage_tabular_refinement_estimator(): env = NarrowPassageEnv() DoorIsClosed, DoorIsOpen, TouchedGoal = sorted(env.predicates) door_type, _, robot_type, target_type, _ = sorted(env.types) - sample_task = env.get_train_tasks()[0] + sample_task = env.get_train_tasks()[0].task sample_state = sample_task.init door, = sample_state.get_objects(door_type) robot, = sample_state.get_objects(robot_type) @@ -78,23 +80,25 @@ def test_narrow_passage_tabular_refinement_estimator(): # Create sample data to train using sample_data = [ - (sample_task, move_direct_skeleton, move_direct_atoms_seq, True, 4), + (sample_task, move_direct_skeleton, move_direct_atoms_seq, True, [4], + [3]), (sample_task, move_through_door_skeleton, move_through_door_atoms_seq, - True, 2), - (sample_task, move_direct_skeleton, move_direct_atoms_seq, False, 5), + True, [0.5, 1.5], [3, 5]), + (sample_task, move_direct_skeleton, move_direct_atoms_seq, False, [5], + []), ] estimator.train(sample_data) # Test direct MoveToTarget skeleton move_direct_cost = estimator.get_cost(sample_task, move_direct_skeleton, move_direct_atoms_seq) - assert move_direct_cost == 6 # average of 2 samples: 4 and 5 + 3 + assert abs(move_direct_cost - 6.015) < 1e-5 # average of 4.03 and (5 + 3) # Test open door then move skeleton move_through_door_cost = estimator.get_cost(sample_task, move_through_door_skeleton, move_through_door_atoms_seq) - assert move_through_door_cost == 2 + assert abs(move_through_door_cost - 2.08) < 1e-5 # Test an impossible skeleton impossible_skeleton = [ diff --git a/tests/test_train_refinement_estimator.py b/tests/test_train_refinement_estimator.py index 070cde6c71..ad85e11fd8 100644 --- a/tests/test_train_refinement_estimator.py +++ b/tests/test_train_refinement_estimator.py @@ -53,11 +53,29 @@ def test_train_refinement_estimator(): # Test successful data generation and training temp_log_file = tempfile.NamedTemporaryFile(delete=False).name train_sys_argv = [ - "dummy", "--env", "narrow_passage", "--approach", - "refinement_estimation", "--refinement_estimator", "tabular", "--seed", - "123", "--num_train_tasks", "1", "--approach_dir", approach_dir, - "--data_dir", data_dir, "--refinement_data_file_name", "test.data", - "--log_file", temp_log_file + "dummy", + "--env", + "narrow_passage", + "--approach", + "refinement_estimation", + "--refinement_estimator", + "tabular", + "--seed", + "123", + "--num_train_tasks", + "1", + "--approach_dir", + approach_dir, + "--data_dir", + data_dir, + "--refinement_data_file_name", + "test.data", + "--refinement_data_save_every", + "1", + "--log_file", + temp_log_file, + "--refinement_train_with_frac_data", + "1.1", ] sys.argv = train_sys_argv _train_refinement_estimation_approach() @@ -114,5 +132,6 @@ def test_train_refinement_estimator(): sample_option_model = create_option_model("oracle") utils.reset_config_with_parser(parser) with pytest.raises(PlanningFailure): - _collect_refinement_data_for_task(sample_task, sample_option_model, - set(), set(), set(), 0, []) + _collect_refinement_data_for_task(sample_env, sample_task, + sample_option_model, set(), set(), + set(), 0, []) From 2d3849b2533acb8255e1e7b94b0f19a56eeca511 Mon Sep 17 00:00:00 2001 From: Nishanth Kumar Date: Thu, 6 Jul 2023 16:58:44 -0400 Subject: [PATCH 3/5] Implements new variant of cover env for testing active sampler learning approaches (#1493) * i think this works?? * done * done testing * revert unnecessary comments * fix tom comments --- predicators/envs/cover.py | 23 +++++++++++++++++ .../ground_truth_models/cover/nsrts.py | 16 +++++++++--- .../ground_truth_models/cover/options.py | 21 +++++++++++----- predicators/settings.py | 25 +++++++++++++------ tests/approaches/test_oracle_approach.py | 5 ++-- tests/envs/test_cover.py | 5 +++- tests/explorers/test_online_learning.py | 2 ++ 7 files changed, 77 insertions(+), 20 deletions(-) diff --git a/predicators/envs/cover.py b/predicators/envs/cover.py index b8eae2a805..84a3adef5a 100644 --- a/predicators/envs/cover.py +++ b/predicators/envs/cover.py @@ -996,6 +996,29 @@ def _Covers_holds(state: State, objects: Sequence[Object]) -> bool: (by - bh == 0) +class CoverEnvPlaceHard(CoverEnv): + """A cover environment where the only thing that's hard is placing. + Specifically, there is only one block and one target, and the default grasp + sampler always picks up the block directly in the middle. The robot is + allowed to place anywhere, and the default sampler tries placing in a + region that's 2x bigger than the target, often missing the target. The only + thing that needs to be learned is how to place to correctly cover the + target. + + This environment is specifically useful for testing various aspects + of different sampler learning approaches. + """ + _allow_free_space_placing: ClassVar[bool] = True + + @classmethod + def get_name(cls) -> str: + return "cover_place_hard" + + def _get_hand_regions(self, state: State) -> List[Tuple[float, float]]: + # Allow placing anywhere! + return [(0.0, 1.0)] + + class BumpyCoverEnv(CoverEnvRegrasp): """A variation on the cover regrasp environment where some blocks are 'bumpy', as indicated by a new feature of blocks. diff --git a/predicators/ground_truth_models/cover/nsrts.py b/predicators/ground_truth_models/cover/nsrts.py index f16cd73508..e82bb0967e 100644 --- a/predicators/ground_truth_models/cover/nsrts.py +++ b/predicators/ground_truth_models/cover/nsrts.py @@ -19,7 +19,7 @@ def get_env_names(cls) -> Set[str]: return { "cover", "cover_hierarchical_types", "cover_typed_options", "cover_regrasp", "cover_multistep_options", "pybullet_cover", - "cover_handempty", "bumpy_cover" + "cover_handempty", "bumpy_cover", "cover_place_hard" } @staticmethod @@ -48,7 +48,7 @@ def get_nsrts(env_name: str, types: Dict[str, Type], "cover_regrasp", "cover_handempty"): PickPlace = options["PickPlace"] elif env_name in ("cover_typed_options", "cover_multistep_options", - "bumpy_cover"): + "bumpy_cover", "cover_place_hard"): Pick, Place = options["Pick"], options["Place"] nsrts = set() @@ -77,7 +77,7 @@ def get_nsrts(env_name: str, types: Dict[str, Type], elif env_name == "bumpy_cover": option = Pick option_vars = [block] - elif env_name == "cover_typed_options": + elif env_name in ("cover_typed_options", "cover_place_hard"): option = Pick option_vars = [block] elif env_name == "cover_multistep_options": @@ -158,6 +158,8 @@ def pick_sampler(state: State, goal: Set[GroundAtom], ub = float( state.get(b, "pose") + state.get(b, "width") / 2) ub = min(ub, 1.0) + elif env_name == ("cover_place_hard"): + return np.array([state.get(b, "pose")], dtype=np.float32) return np.array(rng.uniform(lb, ub, size=(1, )), dtype=np.float32) @@ -196,9 +198,12 @@ def pick_sampler(state: State, goal: Set[GroundAtom], elif env_name == "bumpy_cover": option = Place option_vars = [block, target] - elif env_name == "cover_typed_options": + elif env_name in "cover_typed_options": option = Place option_vars = [target] + elif env_name == "cover_place_hard": + option = Place + option_vars = [block, target] elif env_name == "cover_multistep_options": option = Place option_vars = [block, robot, target] @@ -271,6 +276,9 @@ def place_sampler(state: State, goal: Set[GroundAtom], center += 3 * state.get(t, "width") / 4 lb = center - state.get(t, "width") / 2 ub = center + state.get(t, "width") / 2 + elif env_name == "cover_place_hard": + lb = float(state.get(t, "pose") - state.get(t, "width")) + ub = float(state.get(t, "pose") + state.get(t, "width")) else: lb = float( state.get(t, "pose") - state.get(t, "width") / 10) diff --git a/predicators/ground_truth_models/cover/options.py b/predicators/ground_truth_models/cover/options.py index 504e340043..ab6a6dfc70 100644 --- a/predicators/ground_truth_models/cover/options.py +++ b/predicators/ground_truth_models/cover/options.py @@ -142,7 +142,7 @@ class CoverTypedOptionsGroundTruthOptionFactory(GroundTruthOptionFactory): @classmethod def get_env_names(cls) -> Set[str]: - return {"cover_typed_options"} + return {"cover_typed_options", "cover_place_hard"} @classmethod def get_options(cls, env_name: str, types: Dict[str, Type], @@ -157,25 +157,34 @@ def _Pick_policy(s: State, m: Dict, o: Sequence[Object], del m # unused # The pick parameter is a RELATIVE position, so we need to # add the pose of the object. - pick_pose = s.get(o[0], "pose") + p[0] - pick_pose = min(max(pick_pose, 0.0), 1.0) - return Action(np.array([pick_pose], dtype=np.float32)) + if CFG.env == "cover_typed_options": + pick_pose = s.get(o[0], "pose") + p[0] + pick_pose = min(max(pick_pose, 0.0), 1.0) + return Action(np.array([pick_pose], dtype=np.float32)) + return Action(p) + + lb, ub = (0.0, 1.0) + if CFG.env == "cover_typed_options": + lb, ub = (-0.1, 0.1) Pick = utils.SingletonParameterizedOption("Pick", _Pick_policy, types=[block_type], params_space=Box( - -0.1, 0.1, (1, ))) + lb, ub, (1, ))) def _Place_policy(state: State, memory: Dict, objects: Sequence[Object], params: Array) -> Action: del state, memory, objects # unused return Action(params) # action is simply the parameter + place_types = [block_type, target_type] + if CFG.env == "cover_typed_options": + place_types = [target_type] Place = utils.SingletonParameterizedOption( "Place", _Place_policy, # use the parent class's policy - types=[target_type], + types=place_types, params_space=Box(0, 1, (1, ))) return {Pick, Place} diff --git a/predicators/settings.py b/predicators/settings.py index 91978e43a9..1be2b8e694 100644 --- a/predicators/settings.py +++ b/predicators/settings.py @@ -46,13 +46,6 @@ class GlobalSettings: # your call to utils.reset_config(). render_state_dpi = 150 - # cover env parameters - cover_num_blocks = 2 - cover_num_targets = 2 - cover_block_widths = [0.1, 0.07] - cover_target_widths = [0.05, 0.03] - cover_initial_holding_prob = 0.75 - # cover_multistep_options env parameters cover_multistep_action_limits = [-np.inf, np.inf] cover_multistep_degenerate_oracle_samplers = False @@ -693,6 +686,24 @@ def get_arg_specific_settings(args: Dict[str, Any]) -> Dict[str, Any]: # For the tools environment, keep it much lower. "tools": 1, })[args.get("env", "")], + + # Parameters specific to the cover environment. + # cover env parameters + cover_num_blocks=defaultdict(lambda: 2, { + "cover_place_hard": 1, + })[args.get("env", "")], + cover_num_targets=defaultdict(lambda: 2, { + "cover_place_hard": 1, + })[args.get("env", "")], + cover_block_widths=defaultdict(lambda: [0.1, 0.07], { + "cover_place_hard": [0.1], + })[args.get("env", "")], + cover_target_widths=defaultdict(lambda: [0.05, 0.03], { + "cover_place_hard": [0.05], + })[args.get("env", "")], + cover_initial_holding_prob=defaultdict(lambda: 0.75, { + "cover_place_hard": 0.0, + })[args.get("env", "")], ) diff --git a/tests/approaches/test_oracle_approach.py b/tests/approaches/test_oracle_approach.py index c1e85972ac..dccbbfcf69 100644 --- a/tests/approaches/test_oracle_approach.py +++ b/tests/approaches/test_oracle_approach.py @@ -15,8 +15,8 @@ ClutteredTablePlaceEnv from predicators.envs.coffee import CoffeeEnv from predicators.envs.cover import BumpyCoverEnv, CoverEnv, \ - CoverEnvHierarchicalTypes, CoverEnvRegrasp, CoverEnvTypedOptions, \ - CoverMultistepOptions, RegionalBumpyCoverEnv + CoverEnvHierarchicalTypes, CoverEnvPlaceHard, CoverEnvRegrasp, \ + CoverEnvTypedOptions, CoverMultistepOptions, RegionalBumpyCoverEnv from predicators.envs.doors import DoorsEnv from predicators.envs.exit_garage import ExitGarageEnv from predicators.envs.narrow_passage import NarrowPassageEnv @@ -46,6 +46,7 @@ ENV_NAME_AND_CLS = [ ("cover", CoverEnv), ("cover_typed_options", CoverEnvTypedOptions), + ("cover_place_hard", CoverEnvPlaceHard), ("cover_hierarchical_types", CoverEnvHierarchicalTypes), ("cover_regrasp", CoverEnvRegrasp), ("bumpy_cover", BumpyCoverEnv), ("cover_multistep_options", CoverMultistepOptions), diff --git a/tests/envs/test_cover.py b/tests/envs/test_cover.py index ac3cdb7e55..076a3b79e4 100644 --- a/tests/envs/test_cover.py +++ b/tests/envs/test_cover.py @@ -125,7 +125,10 @@ def test_cover(env_name): def test_cover_typed_options(): """Tests for CoverEnvTypedOptions class.""" - utils.reset_config({"env": "cover", "cover_initial_holding_prob": 0.0}) + utils.reset_config({ + "env": "cover_typed_options", + "cover_initial_holding_prob": 0.0 + }) env = CoverEnvTypedOptions() for task in env.get_train_tasks(): for obj in task.init: diff --git a/tests/explorers/test_online_learning.py b/tests/explorers/test_online_learning.py index 76bf30f298..fb24d3a13d 100644 --- a/tests/explorers/test_online_learning.py +++ b/tests/explorers/test_online_learning.py @@ -121,6 +121,7 @@ def test_interaction(): "load_data": True, "make_interaction_videos": False, }) + env = create_new_env("cover") # Invalid query type. with pytest.raises(AssertionError) as e: _run_pipeline(env, cogman, train_tasks, dataset) @@ -189,4 +190,5 @@ def test_interaction(): "make_interaction_videos": True, "max_num_steps_interaction_request": 3, }) + env = create_new_env("cover") _run_pipeline(env, cogman, train_tasks, dataset) From 1bdea20b165f09810f97a60a64256a09ac9b5a90 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Mon, 17 Jul 2023 17:15:13 -0400 Subject: [PATCH 4/5] active sampler explorer: consider multiple goals in case one is not reachable (#1494) --- .../explorers/active_sampler_explorer.py | 66 ++++++++++++------- .../explorers/test_active_sampler_explorer.py | 2 +- 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py index 91d8c61382..e79035dfca 100644 --- a/predicators/explorers/active_sampler_explorer.py +++ b/predicators/explorers/active_sampler_explorer.py @@ -1,14 +1,14 @@ """An explorer for active sampler learning.""" import logging -from typing import Callable, Dict, List, Optional, Set +from typing import Callable, Dict, Iterator, List, Optional, Set import numpy as np from gym.spaces import Box from predicators import utils from predicators.explorers.base_explorer import BaseExplorer -from predicators.planning import run_task_plan_once +from predicators.planning import PlanningFailure, run_task_plan_once from predicators.settings import CFG from predicators.structs import NSRT, ExplorationStrategy, GroundAtom, \ NSRTSampler, ParameterizedOption, Predicate, State, Task, Type, \ @@ -108,25 +108,47 @@ def _option_policy(state: State) -> _Option: if current_policy is None: # If the assigned goal hasn't yet been reached, try for it. if not assigned_task_goal_reached: - goal = assigned_task.goal - logging.info( - f"[Explorer] Pursuing assigned task goal: {goal}") + logging.info("[Explorer] Pursuing assigned task goal") + + def generate_goals() -> Iterator[Set[GroundAtom]]: + # Just a single goal. + yield assigned_task.goal + # Otherwise, practice. else: - # If there are no ground NSRTs that we've tried so far, - # just wait until we have tried to solve some task. - if len(self._ground_op_hist) == 0: - raise utils.OptionExecutionFailure( - "No ground operators to practice yet") - next_practice_nsrt = self._get_practice_ground_nsrt() - logging.info("[Explorer] Pursuing NRST preconditions " - f"{next_practice_nsrt.name}" - f"{next_practice_nsrt.objects}") - goal = next_practice_nsrt.preconditions - task = Task(state, goal) - logging.info(f"[Explorer] Replanning to {task.goal}") - current_policy = self._get_option_policy_for_task(task) - + logging.info("[Explorer] Pursuing NSRT preconditions") + + def generate_goals() -> Iterator[Set[GroundAtom]]: + nonlocal next_practice_nsrt + # Generate goals sorted by their descending score. + for op in sorted(self._ground_op_hist, + key=self._score_ground_op, + reverse=True): + nsrt = [ + n for n in self._nsrts if n.op == op.parent + ][0] + # NOTE: setting nonlocal variable. + next_practice_nsrt = nsrt.ground(op.objects) + yield next_practice_nsrt.preconditions + + # Try to plan to each goal until a task plan is found. + for goal in generate_goals(): + task = Task(state, goal) + logging.info(f"[Explorer] Replanning to {task.goal}") + try: + current_policy = self._get_option_policy_for_task(task) + # Not covering this case because the intention of this + # explorer is to be used in environments where any goal can + # be reached from anywhere, but we still don't want to + # crash in case that assumption is not met. + except PlanningFailure: # pragma: no cover + continue + logging.info("[Explorer] Plan found.") + break + # Terminate early if no goal could be found. + else: + logging.info("[Explorer] No reachable goal found.") + raise utils.RequestActPolicyFailure("Failed to find goal.") # Query the current policy. assert current_policy is not None try: @@ -179,12 +201,6 @@ def _update_ground_op_hist(self, state: State) -> None: self._ground_op_hist[last_executed_op] = [] self._ground_op_hist[last_executed_op].append(success) - def _get_practice_ground_nsrt(self) -> _GroundNSRT: - best_op = max(self._ground_op_hist, key=self._score_ground_op) - logging.info(f"[Explorer] Practicing {best_op.name}{best_op.objects}") - nsrt = [n for n in self._nsrts if n.op == best_op.parent][0] - return nsrt.ground(best_op.objects) - def _get_option_policy_for_task(self, task: Task) -> Callable[[State], _Option]: # Run task planning and then greedily execute. diff --git a/tests/explorers/test_active_sampler_explorer.py b/tests/explorers/test_active_sampler_explorer.py index b50133e901..5a2b1647b9 100644 --- a/tests/explorers/test_active_sampler_explorer.py +++ b/tests/explorers/test_active_sampler_explorer.py @@ -74,7 +74,7 @@ def test_active_sampler_explorer(): nsrt_to_explorer_sampler=nsrt_to_explorer_sampler) task_idx = 0 policy, _ = explorer.get_exploration_strategy(task_idx, 500) - with pytest.raises(utils.OptionExecutionFailure): + with pytest.raises(utils.RequestActPolicyFailure): policy(state) # Test that the PickFromBumpy operator is tried more than the others when From 2d943925ecbd076cc9d42b4475e2541dba88d421 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Mon, 17 Jul 2023 17:26:49 -0400 Subject: [PATCH 5/5] yapf --- tests/approaches/test_oracle_approach.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/approaches/test_oracle_approach.py b/tests/approaches/test_oracle_approach.py index e60cb20882..d7b4504fe3 100644 --- a/tests/approaches/test_oracle_approach.py +++ b/tests/approaches/test_oracle_approach.py @@ -45,7 +45,8 @@ _PDDL_ENV_MODULE_PATH = predicators.envs.pddl_env.__name__ ENV_NAME_AND_CLS = [ - ("cover", CoverEnv), ("cover_typed_options", CoverEnvTypedOptions), + ("cover", CoverEnv), + ("cover_typed_options", CoverEnvTypedOptions), ("cover_place_hard", CoverEnvPlaceHard), ("cover_hierarchical_types", CoverEnvHierarchicalTypes), ("cover_regrasp", CoverEnvRegrasp),