From 42b22ea920beae69057bffd421cf15116788ab86 Mon Sep 17 00:00:00 2001
From: Tom Silver <tomsilver@users.noreply.github.com>
Date: Fri, 30 Jun 2023 09:41:52 -0400
Subject: [PATCH 1/5] manual feature selection for active sampler learning
 (#1490)

---
 .../active_sampler_learning_approach.py       | 97 ++++++++++---------
 predicators/ml_models.py                      |  3 +
 predicators/settings.py                       |  4 +-
 predicators/utils.py                          | 39 ++++++++
 scripts/configs/active_sampler_learning.yaml  | 54 ++++++++++-
 .../create_active_sampler_learning_plots.py   | 12 ++-
 .../test_active_sampler_learning_approach.py  | 18 ++--
 7 files changed, 168 insertions(+), 59 deletions(-)

diff --git a/predicators/approaches/active_sampler_learning_approach.py b/predicators/approaches/active_sampler_learning_approach.py
index 6f4493e129..c6b3fe2e27 100644
--- a/predicators/approaches/active_sampler_learning_approach.py
+++ b/predicators/approaches/active_sampler_learning_approach.py
@@ -22,8 +22,8 @@
 from predicators.approaches.online_nsrt_learning_approach import \
     OnlineNSRTLearningApproach
 from predicators.explorers import BaseExplorer, create_explorer
-from predicators.ml_models import BinaryClassifierEnsemble, \
-    MLPBinaryClassifier, MLPRegressor
+from predicators.ml_models import BinaryClassifier, BinaryClassifierEnsemble, \
+    KNeighborsClassifier, MLPBinaryClassifier, MLPRegressor
 from predicators.settings import CFG
 from predicators.structs import NSRT, Array, GroundAtom, LowLevelTrajectory, \
     NSRTSampler, Object, ParameterizedOption, Predicate, Segment, State, \
@@ -133,7 +133,8 @@ def _update_sampler_data(self) -> None:
                             continue
 
                 if CFG.active_sampler_learning_model in [
-                        "myopic_classifier", "myopic_classifier_ensemble"
+                        "myopic_classifier_mlp", "myopic_classifier_ensemble",
+                        "myopic_classifier_knn"
                 ]:
                     label: Any = success
                 else:
@@ -159,7 +160,9 @@ def _check_option_success(self, option: _Option, segment: Segment) -> bool:
     def _learn_wrapped_samplers(self,
                                 online_learning_cycle: Optional[int]) -> None:
         """Update the NSRTs in place."""
-        if CFG.active_sampler_learning_model == "myopic_classifier":
+        if CFG.active_sampler_learning_model in [
+                "myopic_classifier_mlp", "myopic_classifier_knn"
+        ]:
             learner: _WrappedSamplerLearner = _ClassifierWrappedSamplerLearner(
                 self._get_current_nsrts(), self._get_current_predicates(),
                 online_learning_cycle)
@@ -244,34 +247,38 @@ class _ClassifierWrappedSamplerLearner(_WrappedSamplerLearner):
 
     def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset,
                             nsrt: NSRT) -> Tuple[NSRTSampler, NSRTSampler]:
-        X_classifier: List[List[Array]] = []
+        X_classifier: List[Array] = []
         y_classifier: List[int] = []
         for state, option, _, label in nsrt_data:
             objects = option.objects
             params = option.params
-            # input is state features and option parameters
-            X_classifier.append([np.array(1.0)])  # start with bias term
-            for obj in objects:
-                X_classifier[-1].extend(state[obj])
-            X_classifier[-1].extend(params)
-            assert not CFG.sampler_learning_use_goals
+            x_arr = utils.construct_active_sampler_input(
+                state, objects, params, option.parent)
+            X_classifier.append(x_arr)
             y_classifier.append(label)
         X_arr_classifier = np.array(X_classifier)
         # output is binary signal
         y_arr_classifier = np.array(y_classifier)
-        classifier = MLPBinaryClassifier(
-            seed=CFG.seed,
-            balance_data=CFG.mlp_classifier_balance_data,
-            max_train_iters=CFG.sampler_mlp_classifier_max_itr,
-            learning_rate=CFG.learning_rate,
-            weight_decay=CFG.weight_decay,
-            use_torch_gpu=CFG.use_torch_gpu,
-            train_print_every=CFG.pytorch_train_print_every,
-            n_iter_no_change=CFG.mlp_classifier_n_iter_no_change,
-            hid_sizes=CFG.mlp_classifier_hid_sizes,
-            n_reinitialize_tries=CFG.
-            sampler_mlp_classifier_n_reinitialize_tries,
-            weight_init="default")
+        if CFG.active_sampler_learning_model.endswith("mlp"):
+            classifier: BinaryClassifier = MLPBinaryClassifier(
+                seed=CFG.seed,
+                balance_data=CFG.mlp_classifier_balance_data,
+                max_train_iters=CFG.sampler_mlp_classifier_max_itr,
+                learning_rate=CFG.learning_rate,
+                weight_decay=CFG.weight_decay,
+                use_torch_gpu=CFG.use_torch_gpu,
+                train_print_every=CFG.pytorch_train_print_every,
+                n_iter_no_change=CFG.mlp_classifier_n_iter_no_change,
+                hid_sizes=CFG.mlp_classifier_hid_sizes,
+                n_reinitialize_tries=CFG.
+                sampler_mlp_classifier_n_reinitialize_tries,
+                weight_init="default")
+        else:
+            assert CFG.active_sampler_learning_model.endswith("knn")
+            n_neighbors = min(len(X_arr_classifier),
+                              CFG.active_sampler_learning_knn_neighbors)
+            classifier = KNeighborsClassifier(seed=CFG.seed,
+                                              n_neighbors=n_neighbors)
         classifier.fit(X_arr_classifier, y_arr_classifier)
 
         # Save the sampler classifier for external analysis.
@@ -281,6 +288,11 @@ def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset,
         with open(save_path, "wb") as f:
             pkl.dump(classifier, f)
         logging.info(f"Saved sampler classifier to {save_path}.")
+        save_path = f"{approach_save_path}_{nsrt.name}_" + \
+            f"{self._online_learning_cycle}.sampler_classifier_data"
+        with open(save_path, "wb") as f:
+            pkl.dump((X_arr_classifier, y_arr_classifier), f)
+        logging.info(f"Saved sampler classifier data to {save_path}.")
 
         # Easiest way to access the base sampler.
         base_sampler = nsrt._sampler  # pylint: disable=protected-access
@@ -297,17 +309,14 @@ class _ClassifierEnsembleWrappedSamplerLearner(_WrappedSamplerLearner):
 
     def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset,
                             nsrt: NSRT) -> Tuple[NSRTSampler, NSRTSampler]:
-        X_classifier: List[List[Array]] = []
+        X_classifier: List[Array] = []
         y_classifier: List[int] = []
         for state, option, _, label in nsrt_data:
             objects = option.objects
             params = option.params
-            # input is state features and option parameters
-            X_classifier.append([np.array(1.0)])  # start with bias term
-            for obj in objects:
-                X_classifier[-1].extend(state[obj])
-            X_classifier[-1].extend(params)
-            assert not CFG.sampler_learning_use_goals
+            x_arr = utils.construct_active_sampler_input(
+                state, objects, params, option.parent)
+            X_classifier.append(x_arr)
             y_classifier.append(label)
         X_arr_classifier = np.array(X_classifier)
         # output is binary signal
@@ -443,17 +452,14 @@ def _sample_options_from_state(self,
         return sampled_options
 
     def _fit_regressor(self, nsrt_data: _OptionSamplerDataset) -> MLPRegressor:
-        X_regressor: List[List[Array]] = []
+        X_regressor: List[Array] = []
         y_regressor: List[Array] = []
         for state, option, _, target in nsrt_data:
             objects = option.objects
             params = option.params
-            # input is state features and option parameters
-            X_regressor.append([np.array(1.0)])  # start with bias term
-            for obj in objects:
-                X_regressor[-1].extend(state[obj])
-            X_regressor[-1].extend(params)
-            assert not CFG.sampler_learning_use_goals
+            x_arr = utils.construct_active_sampler_input(
+                state, objects, params, option.parent)
+            X_regressor.append(x_arr)
             y_regressor.append(np.array([target]))
         X_arr_regressor = np.array(X_regressor)
         y_arr_regressor = np.array(y_regressor)
@@ -500,19 +506,18 @@ def _vector_score_fn_to_score_fn(vector_fn: Callable[[Array], float],
 
     def _score_fn(state: State, objects: Sequence[Object],
                   param_lst: List[Array]) -> List[float]:
-        x_lst: List[Any] = [1.0]  # start with bias term
-        sub = dict(zip(nsrt.parameters, objects))
-        for var in nsrt.parameters:
-            x_lst.extend(state[sub[var]])
-        assert not CFG.sampler_learning_use_goals
-        x = np.array(x_lst)
-        scores = [vector_fn(np.r_[x, p]) for p in param_lst]
+        xs = [
+            utils.construct_active_sampler_input(state, objects, p,
+                                                 nsrt.option)
+            for p in param_lst
+        ]
+        scores = [vector_fn(x) for x in xs]
         return scores
 
     return _score_fn
 
 
-def _classifier_to_score_fn(classifier: MLPBinaryClassifier,
+def _classifier_to_score_fn(classifier: BinaryClassifier,
                             nsrt: NSRT) -> _ScoreFn:
     return _vector_score_fn_to_score_fn(classifier.predict_proba, nsrt)
 
diff --git a/predicators/ml_models.py b/predicators/ml_models.py
index 724f76ba73..a40ed1a257 100644
--- a/predicators/ml_models.py
+++ b/predicators/ml_models.py
@@ -278,6 +278,9 @@ def classify(self, x: Array) -> bool:
 
     def predict_proba(self, x: Array) -> float:
         probs = self._model.predict_proba([x])[0]
+        # Special case: only one class.
+        if probs.shape == (1, ):
+            return float(self.classify(x))
         assert probs.shape == (2, )  # [P(x is class 0), P(x is class 1)]
         return probs[1]  # return the second element of probs
 
diff --git a/predicators/settings.py b/predicators/settings.py
index b3a7660c55..76021d005b 100644
--- a/predicators/settings.py
+++ b/predicators/settings.py
@@ -518,7 +518,9 @@ class GlobalSettings:
     online_learning_max_novelty_count = 0
 
     # active sampler learning parameters
-    active_sampler_learning_model = "myopic_classifier"
+    active_sampler_learning_model = "myopic_classifier_mlp"
+    active_sampler_learning_feature_selection = "all"
+    active_sampler_learning_knn_neighbors = 3
     active_sampler_learning_use_teacher = True
     active_sampler_learning_num_samples = 100
     active_sampler_learning_score_gamma = 0.5
diff --git a/predicators/utils.py b/predicators/utils.py
index 03e8ef3153..92025cbe4a 100644
--- a/predicators/utils.py
+++ b/predicators/utils.py
@@ -255,6 +255,45 @@ def create_json_dict_from_task(task: Task) -> Dict[str, Any]:
     return {"objects": object_dict, "init": init_dict, "goal": goal_dict}
 
 
+def construct_active_sampler_input(state: State, objects: Sequence[Object],
+                                   params: Array,
+                                   param_option: ParameterizedOption) -> Array:
+    """Helper function for active sampler learning and explorer."""
+
+    assert not CFG.sampler_learning_use_goals
+    sampler_input_lst = [1.0]  # start with bias term
+    if CFG.active_sampler_learning_feature_selection == "all":
+        for obj in objects:
+            sampler_input_lst.extend(state[obj])
+        sampler_input_lst.extend(params)
+
+    else:
+        assert CFG.active_sampler_learning_feature_selection == "oracle"
+        assert CFG.env == "bumpy_cover"
+        if param_option.name == "Pick":
+            # In this case, the x-data should be
+            # [block_bumpy, relative_pick_loc]
+            assert len(objects) == 1
+            block = objects[0]
+            block_pos = state[block][3]
+            block_bumpy = state[block][5]
+            sampler_input_lst.append(block_bumpy)
+            assert len(params) == 1
+            sampler_input_lst.append(params[0] - block_pos)
+        else:
+            assert param_option.name == "Place"
+            assert len(objects) == 2
+            block, target = objects
+            target_pos = state[target][3]
+            grasp = state[block][4]
+            target_width = state[target][2]
+            sampler_input_lst.extend([grasp, target_width])
+            assert len(params) == 1
+            sampler_input_lst.append(params[0] - target_pos)
+
+    return np.array(sampler_input_lst)
+
+
 class _Geom2D(abc.ABC):
     """A 2D shape that contains some points."""
 
diff --git a/scripts/configs/active_sampler_learning.yaml b/scripts/configs/active_sampler_learning.yaml
index 696a790f5c..32f95e51e5 100644
--- a/scripts/configs/active_sampler_learning.yaml
+++ b/scripts/configs/active_sampler_learning.yaml
@@ -33,15 +33,15 @@ NUM_SEEDS: 10
 # with teacher.
 ---
 APPROACHES:
-  myopic_classifier:
+  myopic_classifier_mlp:
     NAME: "active_sampler_learning"
     FLAGS:
-      active_sampler_learning_model: "myopic_classifier"
+      active_sampler_learning_model: "myopic_classifier_mlp"
       active_sampler_learning_use_teacher: False
   teacher_classifier:
     NAME: "active_sampler_learning"
     FLAGS:
-      active_sampler_learning_model: "myopic_classifier"
+      active_sampler_learning_model: "myopic_classifier_mlp"
       active_sampler_learning_use_teacher: True
   fitted_q:
     NAME: "active_sampler_learning"
@@ -75,7 +75,7 @@ NUM_SEEDS: 10
 # Experiment comparing random_nsrts exploration to active_sampler exploration
 # both with and without an ensemble in the regional bumpy cover environment.
 # Use fewer online learning requests per cycle because we're only learning
-the pick bumpy classifier.
+# the pick bumpy classifier.
 ---
 APPROACHES:
   # No ensemble approach
@@ -123,3 +123,49 @@ FLAGS:
   num_online_learning_cycles: 25
 START_SEED: 123
 NUM_SEEDS: 10
+
+# Compare manual feature design to default feature design, with MLP vs KNN.
+---
+APPROACHES:
+  mlp_manual_features:
+    NAME: "active_sampler_learning"
+    FLAGS:
+      active_sampler_learning_model: "myopic_classifier_mlp"
+      active_sampler_learning_feature_selection: "oracle"
+  mlp_all_features:
+    NAME: "active_sampler_learning"
+    FLAGS:
+      active_sampler_learning_model: "myopic_classifier_mlp"
+      active_sampler_learning_feature_selection: "all"
+  knn_manual_features:
+    NAME: "active_sampler_learning"
+    FLAGS:
+      active_sampler_learning_model: "myopic_classifier_knn"
+      active_sampler_learning_feature_selection: "oracle"
+  knn_all_features:
+    NAME: "active_sampler_learning"
+    FLAGS:
+      active_sampler_learning_model: "myopic_classifier_knn"
+      active_sampler_learning_feature_selection: "all"
+ENVS:
+  bumpy_cover:
+    NAME: "bumpy_cover"
+ARGS:
+  - "debug"
+FLAGS:
+  active_sampler_learning_use_teacher: False
+  strips_learner: "oracle"
+  sampler_learner: "oracle"
+  bilevel_plan_without_sim: "True"
+  max_initial_demos: 0
+  num_train_tasks: 1000
+  num_test_tasks: 100
+  max_num_steps_interaction_request: 50
+  sampler_mlp_classifier_max_itr: 100000
+  mlp_classifier_balance_data: False
+  pytorch_train_print_every: 10000
+  explorer: "random_nsrts"  # NOTE
+  online_nsrt_learning_requests_per_cycle: 1
+  num_online_learning_cycles: 50
+START_SEED: 123
+NUM_SEEDS: 10
diff --git a/scripts/plotting/create_active_sampler_learning_plots.py b/scripts/plotting/create_active_sampler_learning_plots.py
index 49868be04d..be0fbd834b 100644
--- a/scripts/plotting/create_active_sampler_learning_plots.py
+++ b/scripts/plotting/create_active_sampler_learning_plots.py
@@ -58,7 +58,7 @@
     ],
     "Shifted Bumpy Cover": [
         ("Myopic Classifier", "green", lambda df: df["EXPERIMENT_ID"].apply(
-            lambda v: "bumpy_cover-myopic_classifier" in v)),
+            lambda v: "bumpy_cover-myopic_classifier_mlp" in v)),
         ("Fitted Q", "purple", lambda df: df["EXPERIMENT_ID"].apply(
             lambda v: "bumpy_cover-fitted_q" in v)),
         ("Teacher Classifier", "brown", lambda df: df["EXPERIMENT_ID"].apply(
@@ -72,6 +72,16 @@
         ("Random Explore", "red", lambda df: df["EXPERIMENT_ID"].apply(
             lambda v: "regional_bumpy_cover-random-explore" in v)),
     ],
+    "Bumpy Cover Feature Design": [
+        ("All Feats (MLP)", "black", lambda df: df["EXPERIMENT_ID"].apply(
+            lambda v: "bumpy_cover-mlp_all_features" in v)),
+        ("Oracle Feats (MLP)", "green", lambda df: df["EXPERIMENT_ID"].apply(
+            lambda v: "bumpy_cover-mlp_manual_features" in v)),
+        ("All Feats (KNN)", "blue", lambda df: df["EXPERIMENT_ID"].apply(
+            lambda v: "bumpy_cover-knn_all_features" in v)),
+        ("Oracle Feats (KNN)", "red", lambda df: df["EXPERIMENT_ID"].apply(
+            lambda v: "bumpy_cover-knn_manual_features" in v)),
+    ],
 }
 
 # If True, add (0, 0) to every plot.
diff --git a/tests/approaches/test_active_sampler_learning_approach.py b/tests/approaches/test_active_sampler_learning_approach.py
index a1e76d9ccd..6e310ac45c 100644
--- a/tests/approaches/test_active_sampler_learning_approach.py
+++ b/tests/approaches/test_active_sampler_learning_approach.py
@@ -16,18 +16,22 @@
 from predicators.teacher import Teacher
 
 
-@pytest.mark.parametrize("model_name,right_targets,num_demo",
-                         [("myopic_classifier", False, 0),
-                          ("myopic_classifier", True, 1),
-                          ("myopic_classifier_ensemble", False, 0),
-                          ("myopic_classifier_ensemble", False, 1),
-                          ("fitted_q", False, 0), ("fitted_q", True, 0)])
-def test_active_sampler_learning_approach(model_name, right_targets, num_demo):
+@pytest.mark.parametrize("model_name,right_targets,num_demo,feat_type",
+                         [("myopic_classifier_mlp", False, 0, "all"),
+                          ("myopic_classifier_mlp", True, 1, "all"),
+                          ("myopic_classifier_ensemble", False, 0, "all"),
+                          ("myopic_classifier_ensemble", False, 1, "all"),
+                          ("fitted_q", False, 0, "all"),
+                          ("fitted_q", True, 0, "all"),
+                          ("myopic_classifier_knn", False, 0, "oracle")])
+def test_active_sampler_learning_approach(model_name, right_targets, num_demo,
+                                          feat_type):
     """Test for ActiveSamplerLearningApproach class, entire pipeline."""
     utils.reset_config({
         "env": "bumpy_cover",
         "approach": "active_sampler_learning",
         "active_sampler_learning_model": model_name,
+        "active_sampler_learning_feature_selection": feat_type,
         "timeout": 10,
         "strips_learner": "oracle",
         "sampler_learner": "oracle",

From c6f95e96590356e675b0905fb4d9174cfec03696 Mon Sep 17 00:00:00 2001
From: Lilian Luong <43945489+lilianluong@users.noreply.github.com>
Date: Tue, 4 Jul 2023 18:48:58 -0700
Subject: [PATCH 2/5] Refinement cost learning changes (#1492)

* [refinement estimation] collect training data by timing refinement for each abstract action separately

* [refinement estimation] GNN refinement estimator

* [refinement estimation] formatting / minor changes to pass checks

* [refinement estimation] shuffle training data + add weight decay for GNN estimator

* [refinement estimation] disable normalization of some parts of the graph if desired

* [refinement estimation] disable normalization of some parts of the graph if desired

* [gnn refinement] autoformat/typing

* [refinement learning] tests for GNNRefinementEstimator

* [gnn refinement] improved tests for GNNRefinementEstimator

* [gnn refinement] Encode the NSRT parameters in GNNRefinementEstimator graph inputs

* [gnn refinement] minor edits after PR review

* [gnn refinement] autoformat

* [gnn refinement] autoformat

* [exit garage] simplified exit_garage by compressing PickupObstacle and StoreObstacle into one option/nsrt ClearObstacle

* [exit_garage] plot carried obstacle on top of others

* [refinement learning] include execution cost in cost estimation, generate all skeletons before collecting refinement data

* [exit_garage] bugfixes/coverage tests/autoformatting for exit_garage

* [scripts] supercloud launch scripts/configs for refinement estimation

* [supercloud scripts] Add SKIP argument to envs and approaches in supercloud configs

* [refinement estimation] Option to save dataset at intervals while collecting, using flag --refinement_data_save_every

* [refinement learning] option to pass in a number of data points to use instead of the full dataset

* [refinement learning] actually pass in a fraction of the dataset to use during training, not an absolute number

* [refinement learning] Added supercloud launch config for experiments/evaluation for refinement cost learning

* [refinement learning] fix scripts, add print_every for CNN training

* [scripts] local launch.py entry point addition + use_torch_gpu true for refinement learning

* [refinement learning] add gnn specs to fixed_passage experiment

* [refinement learning] fix for cnn with gpu

* [gnn refinement] fix for gnn with gpu

* [gnn refinement] coverage test fix for gnn with GPU

* [gpu usage] reassign CUDA_VISIBLE_DEVICES if necessary in utils.py, revert CNNRefinementEstimator 'fix'

* [gpu usage] ignore coverage tests for the CUDA_VISIBLE_DEVICES fix

* [gnn_utils] handle empty edges, nodes, globals automatically

* [exit_garage] exit_garage adjustments

* minor changes

* [refinement learning] learn to output time and low level action count separately then combine in get_cost()

* [gnn refinement] bugfix

* [refinement learning] refinement cost in results analysis

* [refinement learning] bugfix

* [refinement learning] bugfix

* updated refinement cost learning config files

* hacky fix for the delay issue for the first test of GNN/CNN estimators

* checks

* bugfix

* one fix

* fix segmentation

* add back

---------

Co-authored-by: Tom Silver <tomssilver@gmail.com>
---
 .../approaches/bilevel_planning_approach.py   |   2 +-
 predicators/envs/exit_garage.py               |  39 +-
 predicators/gnn/gnn.py                        |   9 +-
 predicators/gnn/gnn_utils.py                  |  81 ++--
 .../ground_truth_models/exit_garage/nsrts.py  |  40 +-
 .../exit_garage/options.py                    | 120 +++---
 predicators/main.py                           |   8 +
 predicators/nsrt_learning/segmentation.py     |   4 +-
 predicators/planning.py                       |  52 ++-
 .../base_refinement_estimator.py              |   3 +
 .../cnn_refinement_estimator.py               |  19 +-
 .../gnn_refinement_estimator.py               | 364 ++++++++++++++++++
 .../oracle_refinement_estimator.py            |  21 +-
 .../per_skeleton_estimator.py                 |   4 +
 .../tabular_refinement_estimator.py           |  32 +-
 predicators/settings.py                       |  13 +-
 predicators/structs.py                        |   2 +-
 predicators/train_refinement_estimator.py     | 112 ++++--
 predicators/utils.py                          |   8 +
 scripts/analyze_results_directory.py          |   1 +
 scripts/cluster_utils.py                      |  15 +-
 scripts/configs/refinement_cost_learning.yaml |  76 ++++
 .../refinement_cost_learning_test.yaml        |  83 ++++
 scripts/local/launch.py                       |   6 +-
 scripts/supercloud/launch.py                  |  10 +-
 scripts/supercloud/submit_supercloud_job.py   |  10 +-
 tests/approaches/test_oracle_approach.py      |   4 +-
 tests/envs/test_exit_garage.py                |  89 ++---
 tests/nsrt_learning/test_segmentation.py      |   2 +-
 .../test_base_refinement_estimator.py         |   2 +
 .../test_cnn_refinement_estimator.py          |  15 +-
 .../test_gnn_refinement_estimator.py          | 220 +++++++++++
 .../test_oracle_refinement_estimator.py       |  25 +-
 .../test_tabular_refinement_estimator.py      |  26 +-
 tests/test_train_refinement_estimator.py      |  33 +-
 35 files changed, 1198 insertions(+), 352 deletions(-)
 create mode 100644 predicators/refinement_estimators/gnn_refinement_estimator.py
 create mode 100644 scripts/configs/refinement_cost_learning.yaml
 create mode 100644 scripts/configs/refinement_cost_learning_test.yaml
 create mode 100644 tests/refinement_estimators/test_gnn_refinement_estimator.py

diff --git a/predicators/approaches/bilevel_planning_approach.py b/predicators/approaches/bilevel_planning_approach.py
index e101f7726a..6466e5bde4 100644
--- a/predicators/approaches/bilevel_planning_approach.py
+++ b/predicators/approaches/bilevel_planning_approach.py
@@ -150,7 +150,7 @@ def _save_metrics(self, metrics: Metrics, nsrts: Set[NSRT],
         for metric in [
                 "num_samples", "num_skeletons_optimized",
                 "num_failures_discovered", "num_nodes_expanded",
-                "num_nodes_created", "plan_length"
+                "num_nodes_created", "plan_length", "refinement_time"
         ]:
             self._metrics[f"total_{metric}"] += metrics[metric]
         self._metrics["total_num_nsrts"] += len(nsrts)
diff --git a/predicators/envs/exit_garage.py b/predicators/envs/exit_garage.py
index 4f5fb2c82e..2e5effc213 100644
--- a/predicators/envs/exit_garage.py
+++ b/predicators/envs/exit_garage.py
@@ -51,7 +51,7 @@ class ExitGarageEnv(BaseEnv):
     robot_starting_y: ClassVar[float] = 0.8
     obstacle_area_left_padding: ClassVar[float] = 0.4
     obstacle_area_right_padding: ClassVar[float] = 0.1
-    obstacle_area_vertical_padding: ClassVar[float] = 0.1
+    obstacle_area_vertical_padding: ClassVar[float] = 0.05
     car_starting_x: ClassVar[float] = 0.15
     car_starting_y: ClassVar[float] = 0.3
 
@@ -64,7 +64,7 @@ class ExitGarageEnv(BaseEnv):
     _robot_type = Type("robot", ["x", "y", "carrying"])  # carrying: bool
     _obstacle_type = Type("obstacle", ["x", "y", "carried"])  # carried: bool
     # Convenience type for storage area, storing number of obstacles in it
-    # This is used in the StoreObstacle option to calculate where to place the
+    # This is used in the ClearObstacle option to calculate where to place the
     # a new obstacle in the storage area.
     _storage_type = Type("storage", ["num_stored"])
 
@@ -73,12 +73,6 @@ def __init__(self, use_gui: bool = True) -> None:
         # Predicates
         self._CarHasExited = Predicate("CarHasExited", [self._car_type],
                                        self._CarHasExited_holds)
-        self._CarryingObstacle = Predicate(
-            "CarryingObstacle", [self._robot_type, self._obstacle_type],
-            self._CarryingObstacle_holds)
-        self._NotCarryingObstacle = Predicate("NotCarryingObstacle",
-                                              [self._robot_type],
-                                              self._NotCarryingObstacle_holds)
         self._ObstacleCleared = Predicate("ObstacleCleared",
                                           [self._obstacle_type],
                                           self._ObstacleCleared_holds)
@@ -150,8 +144,7 @@ def simulate(self, state: State, action: Action) -> State:
             else:
                 # Place the current obstacle if in storage area and there is
                 # no collision caused by doing so
-                if ry > 1.0 - self.storage_area_height and not \
-                        self._placed_object_collides(state, rx, ry):
+                if ry > 1.0 - self.storage_area_height:
                     next_state.set(carried_obstacle, "x", rx)
                     next_state.set(carried_obstacle, "y", ry)
                     next_state.set(carried_obstacle, "carried", 0)
@@ -179,9 +172,7 @@ def _generate_test_tasks(self) -> List[EnvironmentTask]:
     @property
     def predicates(self) -> Set[Predicate]:
         return {
-            self._CarHasExited, self._CarryingObstacle,
-            self._NotCarryingObstacle, self._ObstacleCleared,
-            self._ObstacleNotCleared
+            self._CarHasExited, self._ObstacleCleared, self._ObstacleNotCleared
         }
 
     @property
@@ -232,6 +223,7 @@ def render_state_plt(
         self._exit_geom.plot(ax, color=exit_color)
 
         # Draw obstacles
+        carried_obstacle_geom: Optional[utils.Circle] = None
         for obstacle in state.get_objects(self._obstacle_type):
             if state.get(obstacle, "carried") == 1:
                 # Obstacle is being carried, so draw it under the robot instead
@@ -240,11 +232,12 @@ def render_state_plt(
                 robot_y = state.get(self._robot, "y")
                 carried_obstacle_geom = utils.Circle(robot_x, robot_y,
                                                      self.obstacle_radius)
-                carried_obstacle_geom.plot(ax, color=carried_color)
             else:
                 # Obstacle is not being carried, just draw normally
                 obstacle_geom = self._object_to_geom(obstacle, state)
                 obstacle_geom.plot(ax, color=obstacle_color)
+        if carried_obstacle_geom:
+            carried_obstacle_geom.plot(ax, color=carried_color)
 
         # Draw robot
         robot_geom = self._object_to_geom(self._robot, state)
@@ -333,18 +326,6 @@ def _CarHasExited_holds(self, state: State,
         car_geom = self._object_to_geom(car, state)
         return car_geom.intersects(self._exit_geom)
 
-    def _CarryingObstacle_holds(self, state: State,
-                                objects: Sequence[Object]) -> bool:
-        robot, obstacle = objects
-        robot_carrying_something = state.get(robot, "carrying") == 1
-        obstacle_is_carried = state.get(obstacle, "carried") == 1
-        return robot_carrying_something and obstacle_is_carried
-
-    def _NotCarryingObstacle_holds(self, state: State,
-                                   objects: Sequence[Object]) -> bool:
-        robot, = objects
-        return state.get(robot, "carrying") == 0
-
     def _ObstacleCleared_holds(self, state: State,
                                objects: Sequence[Object]) -> bool:
         obstacle, = objects
@@ -398,12 +379,6 @@ def get_car_collision_object(cls, state: State) -> Optional[Object]:
                 return obstacle
         return None
 
-    @classmethod
-    def _placed_object_collides(cls, state: State, new_x: float,
-                                new_y: float) -> bool:
-        """Returns True if an obstacle placed at (new_x, new_y) would collide
-        with an existing obstacle in the storage area."""
-
     @classmethod
     def _robot_carrying_obstacle(cls, state: State) -> Optional[Object]:
         """If the robot is currently carrying an obstacle, return it; else
diff --git a/predicators/gnn/gnn.py b/predicators/gnn/gnn.py
index be0e97e6f8..fc3d7e51a4 100755
--- a/predicators/gnn/gnn.py
+++ b/predicators/gnn/gnn.py
@@ -45,7 +45,7 @@ def _aggregation_func(graph: Dict) -> Tuple[torch.Tensor, Array]:
 
 def _prepare_receiver_matrix(graph: Dict) -> torch.Tensor:
     num_nodes = graph['nodes'].size()[0]
-    columns = torch.arange(0, num_nodes).long()
+    columns = torch.arange(0, num_nodes).long().to(graph['nodes'].device)
     rec_m = graph['receivers'].view(-1)[:, None] == columns
     return rec_m.float()
 
@@ -53,10 +53,11 @@ def _prepare_receiver_matrix(graph: Dict) -> torch.Tensor:
 def _aggregate_globals(graph: Dict, global_node_idxs: Array,
                        global_edge_idxs: Array) -> torch.Tensor:
     num_graphs = graph['globals'].size()[0]
-    columns = torch.arange(0, num_graphs).long()
+    device = graph['globals'].device
+    columns = torch.arange(0, num_graphs).long().to(device)
 
-    node_idxs = torch.LongTensor(global_node_idxs)[:, None]
-    edge_idxs = torch.LongTensor(global_edge_idxs)[:, None]
+    node_idxs = torch.LongTensor(global_node_idxs)[:, None].to(device)
+    edge_idxs = torch.LongTensor(global_edge_idxs)[:, None].to(device)
 
     nodes_agg = torch.mm(graph['nodes'].t(),
                          (node_idxs == columns).float()).t()
diff --git a/predicators/gnn/gnn_utils.py b/predicators/gnn/gnn_utils.py
index 103c5e5ebc..3a6f069a04 100755
--- a/predicators/gnn/gnn_utils.py
+++ b/predicators/gnn/gnn_utils.py
@@ -5,7 +5,7 @@
 import collections
 import logging
 import time
-from typing import Any, Callable, Dict, List, OrderedDict, Tuple
+from typing import Any, Callable, Dict, List, Optional, OrderedDict, Tuple
 
 import numpy as np
 import torch
@@ -14,13 +14,17 @@
 from predicators.structs import Array
 
 
-def train_model(model: Any, dataloaders: Dict,
-                optimizer: torch.optim.Optimizer,
-                criterion: Callable[[torch.Tensor, torch.Tensor],
-                                    torch.Tensor],
-                global_criterion: Callable[[torch.Tensor, torch.Tensor],
-                                           torch.Tensor], num_epochs: int,
-                do_validation: bool) -> OrderedDict[str, torch.Tensor]:
+def train_model(
+    model: Any,
+    dataloaders: Dict,
+    optimizer: torch.optim.Optimizer,
+    criterion: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]],
+    global_criterion: Optional[Callable[[torch.Tensor, torch.Tensor],
+                                        torch.Tensor]],
+    num_epochs: int,
+    do_validation: bool,
+    device: Optional[torch.device] = None,
+) -> OrderedDict[str, torch.Tensor]:
     """Optimize the model and save checkpoints."""
     since = time.perf_counter()
 
@@ -32,7 +36,7 @@ def train_model(model: Any, dataloaders: Dict,
 
     for epoch in range(num_epochs):
         if epoch % 100 == 0:
-            logging.info(f'Epoch {epoch}/{num_epochs-1}')
+            logging.info(f'Epoch {epoch}/{num_epochs - 1}')
             logging.info('-' * 10)
         # Each epoch has a training and validation phase
         if epoch % 100 == 0 and do_validation:
@@ -64,6 +68,8 @@ def train_model(model: Any, dataloaders: Dict,
                 output = outputs[-1]
 
                 loss = torch.tensor(0.0)
+                if device is not None:
+                    loss = loss.to(device)
                 if criterion is not None:
                     loss += criterion(output['nodes'], targets['nodes'])
 
@@ -84,7 +90,7 @@ def train_model(model: Any, dataloaders: Dict,
             logging.info(f"running_loss: {running_loss}")
 
             if do_validation and \
-               running_loss['val'] < best_seen_running_validation_loss:
+                    running_loss['val'] < best_seen_running_validation_loss:
                 best_seen_running_validation_loss = running_loss['val']
                 best_seen_model_weights = model.state_dict()
                 best_seen_model_train_loss = running_loss['train']
@@ -110,7 +116,12 @@ def train_model(model: Any, dataloaders: Dict,
     return best_seen_model_weights
 
 
-def compute_normalizers(data: List[Dict]) -> Dict[str, Tuple[Array, Array]]:
+def compute_normalizers(
+    data: List[Dict],
+    normalize_nodes: bool = True,
+    normalize_edges: bool = True,
+    normalize_globals: bool = True,
+) -> Dict[str, Tuple[Array, Array]]:
     """Compute the normalizers of the given list of graphs.
 
     These can be passed into normalize_graph.
@@ -123,14 +134,17 @@ def compute_normalizers(data: List[Dict]) -> Dict[str, Tuple[Array, Array]]:
     node_data = np.array(node_data_lst)
     edge_data = np.array(edge_data_lst)
     global_data = np.array(global_data_lst)
-    node_normalizers = _compute_normalizer_array(node_data)
-    edge_normalizers = _compute_normalizer_array(edge_data)
-    global_normalizers = _compute_normalizer_array(global_data)
-    return {
-        "nodes": node_normalizers,
-        "edges": edge_normalizers,
-        "globals": global_normalizers
-    }
+    normalizers = {}
+    if normalize_nodes and len(node_data):
+        node_normalizers = _compute_normalizer_array(node_data)
+        normalizers["nodes"] = node_normalizers
+    if normalize_edges and len(edge_data):
+        edge_normalizers = _compute_normalizer_array(edge_data)
+        normalizers["edges"] = edge_normalizers
+    if normalize_globals and len(global_data):
+        global_normalizers = _compute_normalizer_array(global_data)
+        normalizers["globals"] = global_normalizers
+    return normalizers
 
 
 def _compute_normalizer_array(array_data: Array) -> Tuple[Array, Array]:
@@ -157,7 +171,6 @@ def normalize_graph(graph: Dict,
         if k in normalizers:
             new_graph[k] = transform(graph[k], normalizers[k])
         else:
-            assert k in ['n_node', 'n_edge', 'senders', 'receivers']
             new_graph[k] = graph[k]
     return new_graph
 
@@ -176,11 +189,13 @@ def _invert_normalize_array(array_data: Array,
     return (array_data * scale) + shift
 
 
-def get_single_model_prediction(model: Any, single_input: Dict) -> Dict:
+def get_single_model_prediction(model: Any,
+                                single_input: Dict,
+                                device: Optional[torch.device] = None) -> Dict:
     """Get a prediction from the given model on the given input."""
     model.train(False)
     model.eval()
-    inputs = _create_super_graph([single_input])
+    inputs = _create_super_graph([single_input], device=device)
     outputs = model(inputs.copy())
     graphs = split_graphs(_convert_to_data(outputs[-1]))
     assert len(graphs) == 1
@@ -216,7 +231,7 @@ def _compute_stacked_offsets(sizes: List[Array],
 def _convert_to_data(graph: Dict) -> Dict:
     for key in graph.keys():
         if graph[key] is not None:
-            graph[key] = graph[key].data
+            graph[key] = graph[key].cpu().data
     return graph
 
 
@@ -337,7 +352,8 @@ def __getitem__(self, idx: int) -> Dict:
         return sample
 
 
-def _create_super_graph(batches: List[Dict]) -> Dict:
+def _create_super_graph(batches: List[Dict],
+                        device: Optional[torch.device] = None) -> Dict:
     nodes = batches[0]['nodes']
     edges = batches[0]['edges']
     receivers = batches[0]['receivers'][:, None]
@@ -362,7 +378,7 @@ def _create_super_graph(batches: List[Dict]) -> Dict:
         num_nodes = np.vstack((num_nodes, b['n_node']))
         num_edges = np.vstack((num_edges, b['n_edge']))
 
-    return {
+    super_graph = {
         'n_node':
         torch.from_numpy(num_nodes),
         'n_edge':
@@ -378,15 +394,26 @@ def _create_super_graph(batches: List[Dict]) -> Dict:
         'globals': (torch.from_numpy(globals_).float().requires_grad_()
                     if globals_ is not None else None),
     }
+    # Convert Tensors to device
+    if device is not None:
+        for key, val in super_graph.items():
+            super_graph[key] = val.to(device) if val is not None else val
+    return super_graph
 
 
-def graph_batch_collate(batch: List[Dict]) -> Dict:
+def graph_batch_collate(batch: List[Dict],
+                        device: Optional[torch.device] = None) -> Dict:
     """Collate the given batch of graphs.
 
     Assumes batch is a dictionary where each key contains a list of
     graphs.
     """
     return {
-        key: _create_super_graph([d[key] for d in batch])
+        key: _create_super_graph([d[key] for d in batch], device=device)
         for key in batch[0]
     }
+
+
+def get_graph_batch_collate_with_device(device: torch.device) -> Callable:
+    """Return a graph_batch_collate function that is given a device."""
+    return lambda batch: graph_batch_collate(batch, device=device)
diff --git a/predicators/ground_truth_models/exit_garage/nsrts.py b/predicators/ground_truth_models/exit_garage/nsrts.py
index 237b986a33..272c467308 100644
--- a/predicators/ground_truth_models/exit_garage/nsrts.py
+++ b/predicators/ground_truth_models/exit_garage/nsrts.py
@@ -27,15 +27,12 @@ def get_nsrts(env_name: str, types: Dict[str, Type],
 
         # Predicates
         CarHasExited = predicates["CarHasExited"]
-        CarryingObstacle = predicates["CarryingObstacle"]
-        NotCarryingObstacle = predicates["NotCarryingObstacle"]
         ObstacleCleared = predicates["ObstacleCleared"]
         ObstacleNotCleared = predicates["ObstacleNotCleared"]
 
         # Options
         DriveCarToExit = options["DriveCarToExit"]
-        PickupObstacle = options["PickupObstacle"]
-        StoreObstacle = options["StoreObstacle"]
+        ClearObstacle = options["ClearObstacle"]
 
         nsrts = set()
 
@@ -63,50 +60,25 @@ def random_sampler(state: State, goal: Set[GroundAtom],
                                       option_vars, random_sampler)
         nsrts.add(drive_car_to_exit_nsrt)
 
-        # PickupObstacle
+        # ClearObstacle
         robot = Variable("?robot", robot_type)
         obstacle = Variable("?obstacle", obstacle_type)
         parameters = [robot, obstacle]
         option_vars = [robot, obstacle]
-        option = PickupObstacle
+        option = ClearObstacle
         preconditions = {
-            LiftedAtom(NotCarryingObstacle, [robot]),
             LiftedAtom(ObstacleNotCleared, [obstacle]),
         }
         add_effects = {
-            LiftedAtom(CarryingObstacle, [robot, obstacle]),
-        }
-        delete_effects = {
-            LiftedAtom(NotCarryingObstacle, [robot]),
-            LiftedAtom(ObstacleNotCleared, [obstacle]),
-        }
-        ignore_effects = set()
-        pickup_obstacle_nsrt = NSRT("PickupObstacle", parameters,
-                                    preconditions, add_effects, delete_effects,
-                                    ignore_effects, option, option_vars,
-                                    random_sampler)
-        nsrts.add(pickup_obstacle_nsrt)
-
-        # StoreObstacle
-        robot = Variable("?robot", robot_type)
-        obstacle = Variable("?obstacle", obstacle_type)
-        parameters = [robot, obstacle]
-        option_vars = [robot, obstacle]
-        option = StoreObstacle
-        preconditions = {
-            LiftedAtom(CarryingObstacle, [robot, obstacle]),
-        }
-        add_effects = {
-            LiftedAtom(NotCarryingObstacle, [robot]),
             LiftedAtom(ObstacleCleared, [obstacle]),
         }
         delete_effects = {
-            LiftedAtom(CarryingObstacle, [robot, obstacle]),
+            LiftedAtom(ObstacleNotCleared, [obstacle]),
         }
         ignore_effects = set()
-        store_obstacle_nsrt = NSRT("StoreObstacle", parameters, preconditions,
+        clear_obstacle_nsrt = NSRT("ClearObstacle", parameters, preconditions,
                                    add_effects, delete_effects, ignore_effects,
                                    option, option_vars, random_sampler)
-        nsrts.add(store_obstacle_nsrt)
+        nsrts.add(clear_obstacle_nsrt)
 
         return nsrts
diff --git a/predicators/ground_truth_models/exit_garage/options.py b/predicators/ground_truth_models/exit_garage/options.py
index 95dac21647..5da18fbb8e 100644
--- a/predicators/ground_truth_models/exit_garage/options.py
+++ b/predicators/ground_truth_models/exit_garage/options.py
@@ -31,8 +31,6 @@ def get_options(cls, env_name: str, types: Dict[str, Type],
         storage_type = types["storage"]
 
         CarHasExited = predicates["CarHasExited"]
-        CarryingObstacle = predicates["CarryingObstacle"]
-        NotCarryingObstacle = predicates["NotCarryingObstacle"]
         ObstacleCleared = predicates["ObstacleCleared"]
         ObstacleNotCleared = predicates["ObstacleNotCleared"]
 
@@ -71,7 +69,14 @@ def _goal_fn(pt: Array) -> bool:
             target_y = 0.4 - ExitGarageEnv.exit_width / 2
             target_theta = 0
             if CFG.exit_garage_motion_planning_ignore_obstacles:
-                cls._plan_direct(state, memory, params, car,
+                start_pos_list = [
+                    state.get(car, "x"),
+                    state.get(car, "y"),
+                ]
+                start_position = np.array(start_pos_list)
+                memory["action_plan"] = []
+                memory["position_plan"] = []
+                cls._plan_direct(memory, params, start_position,
                                  np.array([target_x, target_y]), 0, 1)
                 return True
             success = cls._run_rrt(state,
@@ -92,80 +97,68 @@ def _goal_fn(pt: Array) -> bool:
             terminal=_DriveCarToExit_terminal,
         )
 
-        # PickupObstacle
-        def _PickupObstacle_terminal(state: State, memory: Dict,
-                                     objects: Sequence[Object],
-                                     params: Array) -> bool:
-            del memory, params  # unused
-            return CarryingObstacle.holds(state, objects)
-
-        def _PickupObstacle_initiable(state: State, memory: Dict,
-                                      objects: Sequence[Object],
-                                      params: Array) -> bool:
-            robot, obstacle = objects
-            if not ObstacleNotCleared.holds(state, objects[1:]):
-                return False  # obstacle already picked or cleared
-            if not NotCarryingObstacle.holds(state, objects[:1]):
-                return False  # robot already carrying something else
-            # Set up the target input for the motion planner.
-            target_x = state.get(obstacle, "x")
-            target_y = state.get(obstacle, "y")
-            cls._plan_direct(state, memory, params, robot,
-                             np.array([target_x, target_y]), 2, 3)
-            # Append pickup action to memory action plan
-            memory["action_plan"].append(
-                Action(np.array([0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32)))
-            # Picking an obstacle takes a bit of time to plan, artificially
-            time.sleep(CFG.exit_garage_pick_place_refine_penalty)
-            return True
-
-        PickupObstacle = ParameterizedOption(
-            "PickupObstacle",
-            types=[robot_type, obstacle_type],
-            params_space=Box(0, 1, (1, )),
-            policy=_motion_plan_policy,
-            initiable=_PickupObstacle_initiable,
-            terminal=_PickupObstacle_terminal,
-        )
-
-        # StoreObstacle
-        def _StoreObstacle_terminal(state: State, memory: Dict,
+        # ClearObstacle
+        def _ClearObstacle_terminal(state: State, memory: Dict,
                                     objects: Sequence[Object],
                                     params: Array) -> bool:
             del memory, params  # unused
-            return ObstacleCleared.holds(state, objects[1:])
+            _, obstacle = objects
+            return ObstacleCleared.holds(state, [obstacle])
 
-        def _StoreObstacle_initiable(state: State, memory: Dict,
+        def _ClearObstacle_initiable(state: State, memory: Dict,
                                      objects: Sequence[Object],
                                      params: Array) -> bool:
-            robot, _ = objects
-            if not CarryingObstacle.holds(state, objects):
-                return False  # obstacle isn't being carried, so can't store
+            robot, obstacle = objects
+            if not ObstacleNotCleared.holds(state, [obstacle]):
+                return False  # obstacle already cleared
+
+            memory["action_plan"] = []
+            memory["position_plan"] = []
+            start_pos_list = [
+                state.get(robot, "x"),
+                state.get(robot, "y"),
+            ]
+            start_position = np.array(start_pos_list)
+
+            # Straight-line plan to pickup obstacle
+            pickup_target_x = state.get(obstacle, "x")
+            pickup_target_y = state.get(obstacle, "y")
+            pickup_position = np.array([pickup_target_x, pickup_target_y])
+            cls._plan_direct(memory, params, start_position, pickup_position,
+                             2, 3)
+            # Append pickup action to memory plans
+            memory["action_plan"].append(
+                Action(np.array([0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32)))
+
+            # Straight-line plan to place obstacle
             storage, = state.get_objects(storage_type)
             num_stored = state.get(storage, "num_stored")
             # Set up the target input for the motion planner.
             target_x = (0.01 + ExitGarageEnv.obstacle_radius * 2) * num_stored
             target_x += ExitGarageEnv.obstacle_radius
-            target_y = 1.0 - ExitGarageEnv.storage_area_height / 2
-            cls._plan_direct(state, memory, params, robot,
+            target_y = (ExitGarageEnv.y_ub -
+                        ExitGarageEnv.storage_area_height / 2)
+            cls._plan_direct(memory, params, pickup_position,
                              np.array([target_x, target_y]), 2, 3)
             # Append place action to memory action plan
             memory["action_plan"].append(
                 Action(np.array([0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32)))
-            # Placing an obstacle takes a bit of time to plan, artificially
-            time.sleep(CFG.exit_garage_pick_place_refine_penalty)
+
+            # Moving an obstacle takes a bit of time to plan, artificially
+            time.sleep(CFG.exit_garage_clear_refine_penalty)
+
             return True
 
-        StoreObstacle = ParameterizedOption(
-            "StoreObstacle",
+        ClearObstacle = ParameterizedOption(
+            "ClearObstacle",
             types=[robot_type, obstacle_type],
             params_space=Box(0, 1, (1, )),
             policy=_motion_plan_policy,
-            initiable=_StoreObstacle_initiable,
-            terminal=_StoreObstacle_terminal,
+            initiable=_ClearObstacle_initiable,
+            terminal=_ClearObstacle_terminal,
         )
 
-        return {DriveCarToExit, PickupObstacle, StoreObstacle}
+        return {DriveCarToExit, ClearObstacle}
 
     @classmethod
     def _run_rrt(cls, state: State, memory: Dict, params: Array,
@@ -195,7 +188,7 @@ def _distance_fn(from_pt: Array, to_pt: Array) -> float:
             angle_dist = (from_pt[2] - to_pt[2] + np.pi) % (2 * np.pi) - np.pi
             # We need to scale the weight of the angle for the distance down
             # because it should matter but not as much as the position diff
-            scaled_angle_dist = angle_dist / (10 * np.pi)
+            scaled_angle_dist = angle_dist / (2 * np.pi)
             distance += scaled_angle_dist**2
             return distance
 
@@ -281,9 +274,9 @@ def _collision_fn(pt: Array) -> bool:
         return True
 
     @classmethod
-    def _plan_direct(cls, state: State, memory: Dict, params: Array,
-                     move_obj: Object, target_position: Array,
-                     x_action_idx: int, y_action_idx: int) -> None:
+    def _plan_direct(cls, memory: Dict, params: Array, start_position: Array,
+                     target_position: Array, x_action_idx: int,
+                     y_action_idx: int) -> None:
         """Set position and action plans for a straight line from the starting
         position to the target position.
 
@@ -299,16 +292,11 @@ def _extend_fn(pt1: Array, pt2: Array) -> Iterator[Array]:
                 yield pt1 * (1 - i / num) + pt2 * i / num
 
         # Run planning.
-        start_pos_list = [
-            state.get(move_obj, "x"),
-            state.get(move_obj, "y"),
-        ]
-        start_position = np.array(start_pos_list)
         extender = _extend_fn(start_position, target_position)
         position_plan = [start_position] + list(extender)
         # The position plan is used for the termination check, and possibly
         # can be used for debug drawing in the rendering in the future.
-        memory["position_plan"] = position_plan
+        memory["position_plan"].extend(position_plan)
         # Convert the plan from position space to action space.
         deltas = np.subtract(position_plan[1:], position_plan[:-1])
 
@@ -319,4 +307,4 @@ def _create_action(dx: float, dy: float) -> Action:
             return Action(arr)
 
         action_plan = [_create_action(dx, dy) for (dx, dy) in deltas]
-        memory["action_plan"] = action_plan
+        memory["action_plan"].extend(action_plan)
diff --git a/predicators/main.py b/predicators/main.py
index 0109f49355..f6a72fc77e 100644
--- a/predicators/main.py
+++ b/predicators/main.py
@@ -292,6 +292,7 @@ def _run_testing(env: BaseEnv, cogman: CogMan) -> Metrics:
     num_solved = 0
     cogman.reset_metrics()
     total_suc_time = 0.0
+    total_low_level_action_cost = 0.0
     total_num_solve_timeouts = 0
     total_num_solve_failures = 0
     total_num_execution_timeouts = 0
@@ -354,6 +355,10 @@ def _run_testing(env: BaseEnv, cogman: CogMan) -> Metrics:
                 monitor=monitor)
             exec_time = execution_metrics["policy_call_time"]
             metrics[f"PER_TASK_task{test_task_idx}_exec_time"] = exec_time
+            if CFG.refinement_data_include_execution_cost:
+                total_low_level_action_cost += (
+                    len(traj[1]) *
+                    CFG.refinement_data_low_level_execution_cost)
             # Save the successful trajectory, e.g., for playback on a robot.
             traj_file = f"{save_prefix}__task{test_task_idx+1}.traj"
             traj_file_path = Path(CFG.eval_trajectories_dir) / traj_file
@@ -399,6 +404,9 @@ def _run_testing(env: BaseEnv, cogman: CogMan) -> Metrics:
     metrics["num_total"] = len(test_tasks)
     metrics["avg_suc_time"] = (total_suc_time /
                                num_solved if num_solved > 0 else float("inf"))
+    metrics["avg_ref_cost"] = ((total_low_level_action_cost +
+                                cogman.metrics["total_refinement_time"]) /
+                               num_solved if num_solved > 0 else float("inf"))
     metrics["min_num_samples"] = cogman.metrics[
         "min_num_samples"] if cogman.metrics["min_num_samples"] < float(
             "inf") else 0
diff --git a/predicators/nsrt_learning/segmentation.py b/predicators/nsrt_learning/segmentation.py
index 68e8c1c466..b6f89c190a 100644
--- a/predicators/nsrt_learning/segmentation.py
+++ b/predicators/nsrt_learning/segmentation.py
@@ -59,9 +59,7 @@ def _segment_with_contact_changes(
     elif CFG.env == "coffee":
         keep_pred_names = {"Holding", "HandEmpty", "MachineOn", "CupFilled"}
     elif CFG.env == "exit_garage":
-        keep_pred_names = {
-            "CarryingObstacle", "NotCarryingObstacle", "CarHasExited"
-        }
+        keep_pred_names = {"ObstacleCleared", "CarHasExited"}
     else:
         raise NotImplementedError("Contact-based segmentation not implemented "
                                   f"for environment {CFG.env}.")
diff --git a/predicators/planning.py b/predicators/planning.py
index 848afed46f..97e046e1e6 100644
--- a/predicators/planning.py
+++ b/predicators/planning.py
@@ -168,6 +168,7 @@ def _sesame_plan_with_astar(
                 gen = iter(
                     sorted(proposed_skeletons,
                            key=lambda s: estimator.get_cost(task, *s)))
+            refinement_start_time = time.perf_counter()
             for skeleton, atoms_sequence in gen:
                 if CFG.sesame_use_necessary_atoms:
                     atoms_seq = utils.compute_necessary_atoms_seq(
@@ -188,6 +189,8 @@ def _sesame_plan_with_astar(
                         f" samples, discovering "
                         f"{int(metrics['num_failures_discovered'])} failures")
                     metrics["plan_length"] = len(plan)
+                    metrics["refinement_time"] = (time.perf_counter() -
+                                                  refinement_start_time)
                     return plan, skeleton, metrics
                 partial_refinements.append((skeleton, plan))
                 if time.perf_counter() - start_time > timeout:
@@ -488,11 +491,17 @@ def _skeleton_generator(
     raise _SkeletonSearchTimeout
 
 
-def run_low_level_search(task: Task, option_model: _OptionModelBase,
-                         skeleton: List[_GroundNSRT],
-                         atoms_sequence: List[Set[GroundAtom]], seed: int,
-                         timeout: float, metrics: Metrics,
-                         max_horizon: int) -> Tuple[List[_Option], bool]:
+def run_low_level_search(
+    task: Task,
+    option_model: _OptionModelBase,
+    skeleton: List[_GroundNSRT],
+    atoms_sequence: List[Set[GroundAtom]],
+    seed: int,
+    timeout: float,
+    metrics: Metrics,
+    max_horizon: int,
+    refinement_time: Optional[List[float]] = None
+) -> Tuple[List[_Option], bool]:
     """Backtracking search over continuous values.
 
     Returns a sequence of options and a boolean. If the boolean is True,
@@ -515,6 +524,12 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase,
         if nsrt.option.params_space.shape[0] > 0 else 1 for nsrt in skeleton
     ]
     plan: List[_Option] = [DummyOption for _ in skeleton]
+    # If refinement_time list is passed, record the refinement time
+    # distributed across each step of the skeleton
+    if refinement_time is not None:
+        assert len(refinement_time) == 0
+        for _ in skeleton:
+            refinement_time.append(0)
     # The number of actions taken by each option in the plan. This is to
     # make sure that we do not exceed the task horizon.
     num_actions_per_option = [0 for _ in plan]
@@ -525,10 +540,12 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase,
     discovered_failures: List[Optional[_DiscoveredFailure]] = [
         None for _ in skeleton
     ]
+    plan_found = False
     while cur_idx < len(skeleton):
         if time.perf_counter() - start_time > timeout:
             return longest_failed_refinement, False
         assert num_tries[cur_idx] < max_tries[cur_idx]
+        try_start_time = time.perf_counter()
         # Good debug point #2: if you have a skeleton that you think is
         # reasonable, but sampling isn't working, print num_tries here to
         # see at what step the backtracking search is getting stuck.
@@ -592,7 +609,7 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase,
                     if all(a.holds(traj[cur_idx]) for a in expected_atoms):
                         can_continue_on = True
                         if cur_idx == len(skeleton):
-                            return plan, True  # success!
+                            plan_found = True
                     else:
                         can_continue_on = False
                 else:
@@ -601,11 +618,17 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase,
                     can_continue_on = True
                     if cur_idx == len(skeleton):
                         if task.goal_holds(traj[cur_idx]):
-                            return plan, True  # success!
-                        can_continue_on = False
+                            plan_found = True
+                        else:
+                            can_continue_on = False
         else:
             # The option is not initiable.
             can_continue_on = False
+        if refinement_time is not None:
+            try_end_time = time.perf_counter()
+            refinement_time[cur_idx - 1] += try_end_time - try_start_time
+        if plan_found:
+            return plan, True  # success!
         if not can_continue_on:  # we got stuck, time to resample / backtrack!
             # Update the longest_failed_refinement found so far.
             if cur_idx > len(longest_failed_refinement):
@@ -616,7 +639,7 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase,
             # the longest_failed_refinement first.
             possible_failure = discovered_failures[cur_idx - 1]
             if possible_failure is not None and \
-               CFG.sesame_propagate_failures == "immediately":
+                CFG.sesame_propagate_failures == "immediately":
                 raise _DiscoveredFailureException(
                     "Discovered a failure", possible_failure,
                     {"longest_failed_refinement": longest_failed_refinement})
@@ -638,7 +661,7 @@ def run_low_level_search(task: Task, option_model: _OptionModelBase,
                     # high-level search continues.
                     for possible_failure in discovered_failures:
                         if possible_failure is not None and \
-                           CFG.sesame_propagate_failures == "after_exhaust":
+                            CFG.sesame_propagate_failures == "after_exhaust":
                             raise _DiscoveredFailureException(
                                 "Discovered a failure", possible_failure, {
                                     "longest_failed_refinement":
@@ -731,7 +754,7 @@ def _update_sas_file_with_failure(discovered_failure: _DiscoveredFailure,
                 assert line.isdigit()
                 num_variables = int(line)
                 # Change num variables
-                new_sas_file_lines.append(f"{num_variables+1}\n")
+                new_sas_file_lines.append(f"{num_variables + 1}\n")
             elif "end_variable" in line:
                 count_variables += 1
                 new_sas_file_lines.append(line)
@@ -820,7 +843,7 @@ def _update_sas_file_with_failure(discovered_failure: _DiscoveredFailure,
                 # Append preconditions
                 if operator_str.replace("\n", "") == ground_op_str:
                     new_sas_file_lines.append(
-                        f"{num_precondition_conditons+1}\n")
+                        f"{num_precondition_conditons + 1}\n")
                     new_sas_file_lines.append(
                         f"{num_variables} 0\n")  # additional precondition
                 else:
@@ -831,7 +854,7 @@ def _update_sas_file_with_failure(discovered_failure: _DiscoveredFailure,
                                                         j])
                 # Append effects
                 if obj.name.lower() in operator_str:
-                    new_sas_file_lines.append(f"{num_effects+1}\n")
+                    new_sas_file_lines.append(f"{num_effects + 1}\n")
                     new_sas_file_lines.append(
                         f"0 {num_variables} -1 0\n")  # additional effect
                 else:
@@ -1066,6 +1089,7 @@ def _sesame_plan_with_fast_downward(
         try:
             necessary_atoms_seq = utils.compute_necessary_atoms_seq(
                 skeleton, atoms_sequence, task.goal)
+            refinement_start_time = time.perf_counter()
             plan, suc = run_low_level_search(task, option_model, skeleton,
                                              necessary_atoms_seq, seed,
                                              low_level_timeout, metrics,
@@ -1075,6 +1099,8 @@ def _sesame_plan_with_fast_downward(
                     raise PlanningTimeout("Planning timed out in refinement!")
                 raise PlanningFailure("Skeleton produced by FD not refinable!")
             metrics["plan_length"] = len(plan)
+            metrics["refinement_time"] = (time.perf_counter() -
+                                          refinement_start_time)
             return plan, skeleton, metrics
         except _DiscoveredFailureException as e:
             metrics["num_failures_discovered"] += 1
diff --git a/predicators/refinement_estimators/base_refinement_estimator.py b/predicators/refinement_estimators/base_refinement_estimator.py
index 02feabc61a..ce4936190f 100644
--- a/predicators/refinement_estimators/base_refinement_estimator.py
+++ b/predicators/refinement_estimators/base_refinement_estimator.py
@@ -4,6 +4,8 @@
 from pathlib import Path
 from typing import List, Set
 
+import numpy as np
+
 from predicators.envs import get_or_create_env
 from predicators.settings import CFG
 from predicators.structs import GroundAtom, Task, _GroundNSRT
@@ -14,6 +16,7 @@ class BaseRefinementEstimator(abc.ABC):
 
     def __init__(self) -> None:
         self._env = get_or_create_env(CFG.env)
+        self._rng = np.random.default_rng(CFG.seed)
 
     @classmethod
     @abc.abstractmethod
diff --git a/predicators/refinement_estimators/cnn_refinement_estimator.py b/predicators/refinement_estimators/cnn_refinement_estimator.py
index 2080d33c4e..bc595a11ed 100644
--- a/predicators/refinement_estimators/cnn_refinement_estimator.py
+++ b/predicators/refinement_estimators/cnn_refinement_estimator.py
@@ -26,8 +26,12 @@ def get_name(cls) -> str:
 
     def _model_predict(self, model: CNNRegressor, initial_task: Task) -> float:
         input_img = self._get_rendered_initial_state(initial_task)
-        cost = model.predict(input_img)
-        return cost[0]
+        refinement_time, low_level_count = model.predict(input_img)
+        cost = refinement_time
+        if CFG.refinement_data_include_execution_cost:
+            cost += (low_level_count *
+                     CFG.refinement_data_low_level_execution_cost)
+        return cost
 
     def train(self, data: List[RefinementDatapoint]) -> None:
         """Train the CNN regressors on the data points for that skeleton,
@@ -36,17 +40,18 @@ def train(self, data: List[RefinementDatapoint]) -> None:
         # Go through data and group them by skeleton
         grouped_input_imgs = defaultdict(list)
         grouped_targets = defaultdict(list)
-        for task, skeleton, atoms_sequence, succeeded, refinement_time in data:
+        for (task, skeleton, atoms_sequence, succeeded, refinement_time,
+             low_level_count) in data:
             # Convert skeleton and atoms_sequence into an immutable dict key
             key = self._immutable_model_dict_key(skeleton, atoms_sequence)
             # Render the initial state for use as an input image matrix
             img = self._get_rendered_initial_state(task)
             grouped_input_imgs[key].append(img)
             # Compute target value from refinement time and possible failure
-            value = refinement_time
+            target_time = sum(refinement_time)
             if not succeeded:
-                value += CFG.refinement_data_failed_refinement_penalty
-            grouped_targets[key].append([value])
+                target_time += CFG.refinement_data_failed_refinement_penalty
+            grouped_targets[key].append([target_time, sum(low_level_count)])
 
         # For each (skeleton, atoms_sequence) key, fit a CNNRegressor
         self._model_dict = {}
@@ -55,7 +60,7 @@ def train(self, data: List[RefinementDatapoint]) -> None:
             X = np.stack(grouped_input_imgs[key])
             assert len(X.shape) == 4  # expect (N, 3, H, W)
             Y = np.array(grouped_targets[key])
-            assert Y.shape == (X.shape[0], 1)
+            assert Y.shape == (X.shape[0], 2)
             model = self._create_regressor()
             logging.info(f"Training CNN for skeleton {i}/{total_num_keys} "
                          f"using {X.shape[0]} data points...")
diff --git a/predicators/refinement_estimators/gnn_refinement_estimator.py b/predicators/refinement_estimators/gnn_refinement_estimator.py
new file mode 100644
index 0000000000..3533af5d91
--- /dev/null
+++ b/predicators/refinement_estimators/gnn_refinement_estimator.py
@@ -0,0 +1,364 @@
+"""A learning-based refinement cost estimator that trains a GNN regression
+model mapping initial state, intermediate atoms, goal, and operator to cost,
+and estimates refinement cost for a full skeleton by summing the model output
+over individual actions in the skeleton."""
+
+import functools
+import logging
+from collections import defaultdict
+from pathlib import Path
+from typing import Any, DefaultDict, Dict, List, Optional, Set, Tuple
+
+import dill as pkl
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+
+from predicators import utils
+from predicators.gnn.gnn import EncodeProcessDecode, setup_graph_net
+from predicators.gnn.gnn_utils import GraphDictDataset, compute_normalizers, \
+    get_graph_batch_collate_with_device, get_single_model_prediction, \
+    normalize_graph, train_model
+from predicators.ground_truth_models import get_gt_nsrts, get_gt_options
+from predicators.refinement_estimators import BaseRefinementEstimator
+from predicators.settings import CFG
+from predicators.structs import NSRT, GroundAtom, NDArray, Predicate, \
+    RefinementDatapoint, State, Task, _GroundNSRT
+
+
+class GNNRefinementEstimator(BaseRefinementEstimator):
+    """A refinement cost estimator that uses a GNN to predict refinement cost
+    from an initial state, intermediate atoms, goal, and abstract action."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._gnn: Optional[EncodeProcessDecode] = None
+        self._data_exemplar: Tuple[Dict, Dict] = ({}, {})
+        self._nsrts: List[NSRT] = []
+        self._max_nsrt_objects = 0
+        self._node_feature_to_index: Dict[Any, int] = {}
+        self._edge_feature_to_index: Dict[Any, int] = {}
+        self._nullary_predicates: List[Predicate] = []
+        self._input_normalizers: Dict = {}
+        self._target_normalizers: Dict = {}
+        self._mse_loss = torch.nn.MSELoss()
+        self._device = torch.device("cuda:0" if CFG.use_torch_gpu
+                                    and torch.cuda.is_available() else "cpu")
+        self._setup_fields()
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "gnn"
+
+    @property
+    def is_learning_based(self) -> bool:
+        return True
+
+    def get_cost(self, initial_task: Task, skeleton: List[_GroundNSRT],
+                 atoms_sequence: List[Set[GroundAtom]]) -> float:
+        assert self._gnn is not None, "Need to train"
+        cost = 0
+        state, goal = initial_task.init, initial_task.goal
+        # Run each step of the skeleton through the GNN model to estimate cost
+        for i, action in enumerate(skeleton):
+            atoms = atoms_sequence[i]
+            in_graph = self._graphify_single_input(state, atoms, goal, action)
+            if CFG.gnn_do_normalization:
+                in_graph = normalize_graph(in_graph, self._input_normalizers)
+            out_graph = get_single_model_prediction(self._gnn,
+                                                    in_graph,
+                                                    device=self._device)
+            if CFG.gnn_do_normalization:
+                out_graph = normalize_graph(out_graph,
+                                            self._target_normalizers,
+                                            invert=True)
+            refinement_time, low_level_count = out_graph["globals"]
+            cost += refinement_time
+            if CFG.refinement_data_include_execution_cost:
+                cost += (low_level_count *
+                         CFG.refinement_data_low_level_execution_cost)
+        return cost
+
+    def train(self, data: List[RefinementDatapoint]) -> None:
+        """Split up each RefinementDatapoint into distinct training data points
+        for the per-action GNN, and train the GNN regressor."""
+        graph_inputs = []
+        graph_targets = []
+        for (task, skeleton, atoms_sequence, succeeded, refinement_time,
+             low_level_count) in data:
+            state, goal = task.init, task.goal
+            for i, action in enumerate(skeleton):
+                atoms = atoms_sequence[i]
+                target_time = refinement_time[i]
+                # Add failed penalty to the value if failure occurred
+                if not succeeded:
+                    target_time += CFG.refinement_data_failed_refinement_penalty
+                # Convert input and target to graphs
+                graph_inputs.append(
+                    self._graphify_single_input(state, atoms, goal, action))
+                graph_targets.append(
+                    self._graphify_single_target(
+                        target_time, low_level_count[i] if succeeded else 0))
+        assert len(graph_inputs) and len(graph_targets), "No usable data"
+        self._data_exemplar = (graph_inputs[0], graph_targets[0])
+
+        # Normalize if needed
+        if CFG.gnn_do_normalization:
+            # Update normalization constants. Note that we do this for both
+            # the input graph and the target graph.
+            self._input_normalizers = compute_normalizers(graph_inputs)
+            self._target_normalizers = compute_normalizers(
+                graph_targets,
+                normalize_nodes=False,
+                normalize_edges=False,
+            )
+            graph_inputs = [
+                normalize_graph(g, self._input_normalizers)
+                for g in graph_inputs
+            ]
+            graph_targets = [
+                normalize_graph(g, self._target_normalizers)
+                for g in graph_targets
+            ]
+        # Run training.
+        if CFG.gnn_use_validation_set:
+            ## Split data, using 10% for validation.
+            num_validation = max(1, int(len(graph_inputs) * 0.1))
+        else:
+            num_validation = 0
+        shuffled_indices = self._rng.permutation(len(graph_inputs))
+        graph_inputs = [graph_inputs[i] for i in shuffled_indices]
+        graph_targets = [graph_targets[i] for i in shuffled_indices]
+        train_inputs = graph_inputs[num_validation:]
+        train_targets = graph_targets[num_validation:]
+        val_inputs = graph_inputs[:num_validation]
+        val_targets = graph_targets[:num_validation]
+        train_dataset = GraphDictDataset(train_inputs, train_targets)
+        val_dataset = GraphDictDataset(val_inputs, val_targets)
+        # Set up model
+        self._gnn = setup_graph_net(train_dataset,
+                                    num_steps=CFG.gnn_num_message_passing,
+                                    layer_size=CFG.gnn_layer_size).to(
+                                        self._device)
+        # Set up Adam optimizer and dataloaders.
+        optimizer = torch.optim.Adam(self._gnn.parameters(),
+                                     lr=CFG.gnn_learning_rate,
+                                     weight_decay=CFG.gnn_weight_decay)
+        graph_batch_collate = get_graph_batch_collate_with_device(self._device)
+        train_dataloader = DataLoader(train_dataset,
+                                      batch_size=CFG.gnn_batch_size,
+                                      shuffle=True,
+                                      num_workers=0,
+                                      collate_fn=graph_batch_collate)
+        val_dataloader = DataLoader(val_dataset,
+                                    batch_size=CFG.gnn_batch_size,
+                                    shuffle=False,
+                                    num_workers=0,
+                                    collate_fn=graph_batch_collate)
+        dataloaders = {"train": train_dataloader, "val": val_dataloader}
+        ## Launch training code.
+        logging.info(f"Training GNN on {len(train_inputs)} examples")
+        best_model_dict = train_model(self._gnn,
+                                      dataloaders,
+                                      optimizer=optimizer,
+                                      criterion=None,
+                                      global_criterion=self._global_criterion,
+                                      num_epochs=CFG.gnn_num_epochs,
+                                      do_validation=CFG.gnn_use_validation_set,
+                                      device=self._device)
+        self._gnn.load_state_dict(best_model_dict)
+
+    def _global_criterion(self, output: torch.Tensor,
+                          target: torch.Tensor) -> torch.Tensor:
+        """Global criterion function for training GNN."""
+        return self._mse_loss(output, target)
+
+    def _graphify_single_input(self, state: State, atoms: Set[GroundAtom],
+                               goal: Set[GroundAtom],
+                               action: _GroundNSRT) -> Dict:
+        """Convert (initial state, atoms, goal, action) to graph."""
+        all_objects = list(state)
+        object_to_node = {obj: i for i, obj in enumerate(all_objects)}
+        num_objects = len(all_objects)
+        num_node_features = len(self._node_feature_to_index)
+        num_edge_features = max(len(self._edge_feature_to_index), 1)
+
+        G = functools.partial(utils.wrap_predicate, prefix="GOAL-")
+        R = functools.partial(utils.wrap_predicate, prefix="REV-")
+
+        # Add 1 node per object and create node features array
+        graph: Dict[str, NDArray[np.float64]] = {
+            "n_node": np.reshape(num_objects, [1]).astype(np.int64)
+        }
+        node_features = np.zeros((num_objects, num_node_features))
+        # Handle each object's state features
+        for obj in state:
+            obj_index = object_to_node[obj]
+            for feat, val in zip(obj.type.feature_names, state[obj]):
+                feat_index = self._node_feature_to_index[f"feat_{feat}"]
+                node_features[obj_index, feat_index] = val
+
+        # Initialize feature vectors for nullary/binary predicates
+        edge_features_dict: DefaultDict[
+            Tuple[int, int],
+            np.ndarray] = defaultdict(lambda: np.zeros(num_edge_features))
+        atoms_globals = np.zeros(len(self._nullary_predicates), dtype=np.int64)
+        goal_globals = np.zeros(len(self._nullary_predicates), dtype=np.int64)
+
+        # Handle atoms
+        for atom in atoms:
+            arity = atom.predicate.arity
+            if arity == 0:
+                atoms_globals[self._nullary_predicates.index(
+                    atom.predicate)] = 1
+                continue
+            obj0_index = object_to_node[atom.objects[0]]
+            if arity == 1:
+                atom_index = self._node_feature_to_index[atom.predicate]
+                node_features[obj0_index, atom_index] = 1
+            elif arity == 2:
+                obj1_index = object_to_node[atom.objects[1]]
+                atom_index = self._edge_feature_to_index[atom.predicate]
+                edge_features_dict[(obj0_index, obj1_index)][atom_index] = 1
+                rev_index = self._edge_feature_to_index[R(atom.predicate)]
+                edge_features_dict[(obj1_index, obj0_index)][rev_index] = 1
+
+        # Handle goal atoms
+        for atom in goal:
+            arity = atom.predicate.arity
+            if arity == 0:
+                goal_globals[self._nullary_predicates.index(
+                    atom.predicate)] = 1
+                continue
+            obj0_index = object_to_node[atom.objects[0]]
+            if arity == 1:
+                atom_index = self._node_feature_to_index[G(atom.predicate)]
+                node_features[obj0_index, atom_index] = 1
+            elif arity == 2:
+                obj1_index = object_to_node[atom.objects[1]]
+                atom_index = self._edge_feature_to_index[G(atom.predicate)]
+                edge_features_dict[(obj0_index, obj1_index)][atom_index] = 1
+                rev_index = self._edge_feature_to_index[G(R(atom.predicate))]
+                edge_features_dict[(obj1_index, obj0_index)][rev_index] = 1
+
+        # Handle action globals
+        action_globals = np.zeros(len(self._nsrts), dtype=np.int64)
+        action_globals[self._nsrts.index(action.parent)] = 1
+        for i, action_obj in enumerate(action.objects):
+            obj_index = object_to_node[action_obj]
+            feat_index = self._node_feature_to_index[f"nsrt-{i}"]
+            node_features[obj_index, feat_index] = 1
+
+        # Organize
+        graph["nodes"] = node_features.astype(np.float32)
+        graph["globals"] = np.r_[atoms_globals, goal_globals, action_globals]
+        senders, receivers, edges = [], [], []
+        for (sender, receiver), edge in edge_features_dict.items():
+            senders.append(sender)
+            receivers.append(receiver)
+            edges.append(edge)
+        n_edge = len(edges)
+        graph["senders"] = np.reshape(senders, [n_edge]).astype(np.int64)
+        graph["receivers"] = np.reshape(receivers, [n_edge]).astype(np.int64)
+        graph["edges"] = np.reshape(edges, [n_edge, num_edge_features])
+        graph["n_edge"] = np.reshape(n_edge, [1]).astype(np.int64)
+
+        return graph
+
+    @staticmethod
+    def _graphify_single_target(refinement_time: float,
+                                low_level_count: int) -> Dict:
+        """Convert target cost into a graph."""
+        graph = {
+            "n_node": np.array([1], dtype=np.int64),
+            "nodes": np.array([]),
+            "n_edge": np.array([0], dtype=np.int64),
+            "edges": np.array([]),
+            "senders": np.array([]),
+            "receivers": np.array([]),
+            "globals": np.array([refinement_time, low_level_count]),
+        }
+        return graph
+
+    def _setup_fields(self) -> None:
+        """Assign indices to each node and edge feature, and also identify list
+        of nullary predicates."""
+        self._node_feature_to_index = {}
+        self._edge_feature_to_index = {}
+        node_feature_index = 0
+        edge_feature_index = 0
+        self._nullary_predicates = []
+
+        G = functools.partial(utils.wrap_predicate, prefix="GOAL-")
+        R = functools.partial(utils.wrap_predicate, prefix="REV-")
+
+        # Identify object types
+        obj_attrs_set = set()
+        for obj_type in sorted(self._env.types):
+            self._node_feature_to_index[
+                f"type_{obj_type.name}"] = node_feature_index
+            node_feature_index += 1
+            # Also list object features to add to node features later
+            for feat in obj_type.feature_names:
+                obj_attrs_set.add(f"feat_{feat}")
+
+        # Identify predicates
+        for predicate in sorted(self._env.predicates):
+            arity = predicate.arity
+            assert arity <= 2, "Predicates with arity > 2 are not supported"
+            if arity == 0:
+                self._nullary_predicates.append(predicate)
+            elif arity == 1:
+                for feature in (predicate, G(predicate)):
+                    self._node_feature_to_index[feature] = node_feature_index
+                    node_feature_index += 1
+            elif arity == 2:
+                for feature in (predicate, R(predicate), G(predicate),
+                                G(R(predicate))):
+                    self._edge_feature_to_index[feature] = edge_feature_index
+                    edge_feature_index += 1
+
+        # Identify NSRTs
+        gt_nsrts = get_gt_nsrts(CFG.env, self._env.predicates,
+                                get_gt_options(self._env.get_name()))
+        self._nsrts = sorted(gt_nsrts)
+        max_nsrt_objects = 0
+        for nsrt in self._nsrts:
+            max_nsrt_objects = max(max_nsrt_objects, len(nsrt.parameters))
+        self._max_nsrt_objects = max_nsrt_objects
+        for i in range(max_nsrt_objects):
+            self._node_feature_to_index[f"nsrt-{i}"] = node_feature_index
+            node_feature_index += 1
+
+        # Add object features
+        for obj_attr in sorted(obj_attrs_set):
+            self._node_feature_to_index[obj_attr] = node_feature_index
+            node_feature_index += 1
+
+    def save_model(self, filepath: Path) -> None:
+        info = {
+            "exemplar": self._data_exemplar,
+            "state_dict": self._gnn.state_dict() if self._gnn else None,
+            "input_normalizers": self._input_normalizers,
+            "target_normalizers": self._target_normalizers,
+        }
+        with open(filepath, "wb") as f:
+            pkl.dump(info, f)
+
+    def load_model(self, filepath: Path) -> None:
+        with open(filepath, "rb") as f:
+            info = pkl.load(f)
+        self._data_exemplar = info["exemplar"]
+        ex_input, ex_target = self._data_exemplar
+        example_dataset = GraphDictDataset([ex_input], [ex_target])
+        self._gnn = setup_graph_net(example_dataset,
+                                    num_steps=CFG.gnn_num_message_passing,
+                                    layer_size=CFG.gnn_layer_size).to(
+                                        self._device)
+        state_dict = info["state_dict"]
+        if state_dict is not None:
+            self._gnn.load_state_dict(info["state_dict"])
+        self._input_normalizers = info["input_normalizers"]
+        self._target_normalizers = info["target_normalizers"]
+        # Run GNN once to avoid the weird delay issue
+        get_single_model_prediction(self._gnn, ex_input, device=self._device)
diff --git a/predicators/refinement_estimators/oracle_refinement_estimator.py b/predicators/refinement_estimators/oracle_refinement_estimator.py
index 197eba39c2..35878d7a0b 100644
--- a/predicators/refinement_estimators/oracle_refinement_estimator.py
+++ b/predicators/refinement_estimators/oracle_refinement_estimator.py
@@ -3,6 +3,7 @@
 from typing import List, Set
 
 from predicators.envs import BaseEnv
+from predicators.envs.exit_garage import ExitGarageEnv
 from predicators.refinement_estimators import BaseRefinementEstimator
 from predicators.settings import CFG
 from predicators.structs import GroundAtom, State, Task, _GroundNSRT
@@ -75,11 +76,23 @@ def exit_garage_oracle_estimator(
     atoms_sequence: List[Set[GroundAtom]],
 ) -> float:
     """Oracle refinement estimation function for exit_garage env."""
-    del env, initial_state, atoms_sequence  # unused
+    del atoms_sequence  # unused
+
+    assert isinstance(env, ExitGarageEnv)
+    obstacle_radius = env.obstacle_radius
+    obstruction_ub = env.exit_top + 2 * obstacle_radius
+    obstruction_lb = env.exit_top - env.exit_height - 2 * obstacle_radius
 
     # Each picked-up obstacle decreases the refinement cost of DriveCarToExit
-    cost = 0
+    # if it is in the direct path of the car to the exit, otherwise it has a
+    # positive cost and should be avoided
+    cost: float = 0
     for ground_nsrt in skeleton:
-        if ground_nsrt.name == "PickupObstacle":
-            cost -= 1
+        if ground_nsrt.name == "ClearObstacle":
+            obstacle = ground_nsrt.objects[1]
+            obstacle_y = initial_state.get(obstacle, "y")
+            if obstruction_lb < obstacle_y < obstruction_ub:
+                cost -= 1
+            else:
+                cost += 0.5
     return cost
diff --git a/predicators/refinement_estimators/per_skeleton_estimator.py b/predicators/refinement_estimators/per_skeleton_estimator.py
index 366781fffc..2b374b76fa 100644
--- a/predicators/refinement_estimators/per_skeleton_estimator.py
+++ b/predicators/refinement_estimators/per_skeleton_estimator.py
@@ -64,3 +64,7 @@ def save_model(self, filepath: Path) -> None:
     def load_model(self, filepath: Path) -> None:
         with open(filepath, "rb") as f:
             self._model_dict = pkl.load(f)
+        # Run every model once to avoid weird delay issue
+        if self._model_dict is not None:
+            for v in self._model_dict.values():
+                self._model_predict(v, self._env.get_train_tasks()[0].task)
diff --git a/predicators/refinement_estimators/tabular_refinement_estimator.py b/predicators/refinement_estimators/tabular_refinement_estimator.py
index c5391046d8..b1e9d8c436 100644
--- a/predicators/refinement_estimators/tabular_refinement_estimator.py
+++ b/predicators/refinement_estimators/tabular_refinement_estimator.py
@@ -2,7 +2,7 @@
 and atoms_sequence to average refinement time."""
 
 from collections import defaultdict
-from typing import List
+from typing import List, Tuple
 
 import numpy as np
 
@@ -12,7 +12,8 @@
 from predicators.structs import RefinementDatapoint, Task
 
 
-class TabularRefinementEstimator(PerSkeletonRefinementEstimator[float]):
+class TabularRefinementEstimator(PerSkeletonRefinementEstimator[Tuple[float,
+                                                                      float]]):
     """A refinement cost estimator that memorizes refinement data using a
     tabular method."""
 
@@ -20,25 +21,34 @@ class TabularRefinementEstimator(PerSkeletonRefinementEstimator[float]):
     def get_name(cls) -> str:
         return "tabular"
 
-    def _model_predict(self, model: float, initial_task: Task) -> float:
-        return model
+    def _model_predict(self, model: Tuple[float, float],
+                       initial_task: Task) -> float:
+        refinement_time, low_level_count = model
+        cost = refinement_time
+        if CFG.refinement_data_include_execution_cost:
+            cost += (low_level_count *
+                     CFG.refinement_data_low_level_execution_cost)
+        return cost
 
     def train(self, data: List[RefinementDatapoint]) -> None:
         """Train the tabular refinement estimator on data by computing average
         refinement time per (skeleton, atoms_sequence) pair."""
-        grouped_data = defaultdict(list)
+        grouped_times = defaultdict(list)
+        grouped_counts = defaultdict(list)
         # Go through data and group them by skeleton
-        for _, skeleton, atoms_sequence, succeeded, refinement_time in data:
+        for (_, skeleton, atoms_sequence, succeeded, refinement_time,
+             low_level_count) in data:
             # Convert skeleton and atoms_sequence into an immutable dict key
             key = self._immutable_model_dict_key(skeleton, atoms_sequence)
-            value = refinement_time
+            target_time = sum(refinement_time)
             # Add failed refinement penalty to the value if failure occurred
             if not succeeded:
-                value += CFG.refinement_data_failed_refinement_penalty
-            grouped_data[key].append(value)
+                target_time += CFG.refinement_data_failed_refinement_penalty
+            grouped_times[key].append(target_time)
+            grouped_counts[key].append(sum(low_level_count))
         # Compute average time for each (skeleton, atoms_sequence) key
         processed_data = {
-            key: float(np.mean(times))
-            for key, times in grouped_data.items()
+            key: (float(np.mean(times)), float(np.mean(grouped_counts[key])))
+            for key, times in grouped_times.items()
         }
         self._model_dict = processed_data
diff --git a/predicators/settings.py b/predicators/settings.py
index 76021d005b..91978e43a9 100644
--- a/predicators/settings.py
+++ b/predicators/settings.py
@@ -285,7 +285,7 @@ class GlobalSettings:
     doors_draw_debug = False
 
     # narrow_passage env parameters
-    narrow_passage_open_door_refine_penalty = 0.2
+    narrow_passage_open_door_refine_penalty = 0
     narrow_passage_door_width_padding_lb = 1e-4
     narrow_passage_door_width_padding_ub = 0.015
     narrow_passage_passage_width_padding_lb = 5e-4
@@ -295,12 +295,12 @@ class GlobalSettings:
     narrow_passage_birrt_smooth_amt = 50
 
     # exit_garage env parameters
-    exit_garage_pick_place_refine_penalty = 0.2
+    exit_garage_clear_refine_penalty = 0
     exit_garage_min_num_obstacles = 2
-    exit_garage_max_num_obstacles = 4  # inclusive
-    exit_garage_rrt_extend_fn_threshold = 1e-4
+    exit_garage_max_num_obstacles = 3  # inclusive
+    exit_garage_rrt_extend_fn_threshold = 1e-3
     exit_garage_rrt_num_control_samples = 100
-    exit_garage_rrt_num_attempts = 10
+    exit_garage_rrt_num_attempts = 3
     exit_garage_rrt_num_iters = 100
     exit_garage_rrt_sample_goal_eps = 0.1
     exit_garage_motion_planning_ignore_obstacles = False
@@ -333,6 +333,7 @@ class GlobalSettings:
     gnn_num_message_passing = 3
     gnn_layer_size = 16
     gnn_learning_rate = 1e-3
+    gnn_weight_decay = 0
     gnn_num_epochs = 25000
     gnn_batch_size = 128
     gnn_do_normalization = False  # performs worse in Cover when True
@@ -539,6 +540,8 @@ class GlobalSettings:
     refinement_data_skeleton_generator_timeout = 20
     refinement_data_low_level_search_timeout = 5  # timeout for refinement try
     refinement_data_failed_refinement_penalty = 5  # added time on failure
+    refinement_data_include_execution_cost = True
+    refinement_data_low_level_execution_cost = 0.05  # per action cost to add
 
     # CNN refinement cost estimator image pre-processing parameters
     cnn_refinement_estimator_crop = False  # True
diff --git a/predicators/structs.py b/predicators/structs.py
index 4e3555895f..fe476ec0d2 100644
--- a/predicators/structs.py
+++ b/predicators/structs.py
@@ -1803,7 +1803,7 @@ def __len__(self) -> int:
 SamplerDatapoint = Tuple[State, VarToObjSub, _Option,
                          Optional[Set[GroundAtom]]]
 RefinementDatapoint = Tuple[Task, List[_GroundNSRT], List[Set[GroundAtom]],
-                            bool, float]
+                            bool, List[float], List[int]]
 # For PDDLEnv environments, given a desired number of problems and an rng,
 # returns a list of that many PDDL problem strings.
 PDDLProblemGenerator = Callable[[int, np.random.Generator], List[str]]
diff --git a/predicators/train_refinement_estimator.py b/predicators/train_refinement_estimator.py
index a7ac3a397d..0490054b49 100644
--- a/predicators/train_refinement_estimator.py
+++ b/predicators/train_refinement_estimator.py
@@ -121,6 +121,12 @@ def _train_refinement_estimation_approach() -> None:
     assert refinement_estimator.is_learning_based, \
         "Refinement estimator (--refinement_estimator) must be learning-based"
 
+    # Train with only a subset of dataset if desired
+    if CFG.refinement_train_with_frac_data >= 0:
+        num_points = int(len(dataset) * CFG.refinement_train_with_frac_data)
+        dataset = dataset[:num_points]
+        logging.info(f"Using {len(dataset)} data points")
+
     # Train estimator
     train_start_time = time.perf_counter()
     refinement_estimator.train(dataset)
@@ -145,8 +151,12 @@ def _get_refinement_estimation_parser() -> ArgumentParser:
     parser = utils.create_arg_parser()
     # Add script-specific flags to the parser
     parser.add_argument("--refinement_data_file_name", default="", type=str)
+    parser.add_argument("--refinement_data_save_every", default=-1, type=int)
     parser.add_argument("--skip_refinement_estimator_training",
                         action="store_true")
+    parser.add_argument("--refinement_train_with_frac_data",
+                        default=-1,
+                        type=float)
     return parser
 
 
@@ -158,36 +168,40 @@ def _generate_refinement_data(
     nsrts = get_gt_nsrts(CFG.env, preds, options)
     option_model = create_option_model(CFG.option_model_name)
 
+    # Create saved data directory.
+    os.makedirs(CFG.data_dir, exist_ok=True)
+    # Create file path.
+    temp_file_path = _get_data_file_path(temp=True)
+    data_file_path = _get_data_file_path()
+
     # Generate the dataset and save it to file.
     dataset: List[RefinementDatapoint] = []
     for test_task_idx, task in enumerate(train_tasks):
         try:
-            _collect_refinement_data_for_task(task, option_model, nsrts, preds,
-                                              env.types,
+            _collect_refinement_data_for_task(env, task, option_model, nsrts,
+                                              preds, env.types,
                                               CFG.seed + test_task_idx,
                                               dataset)
             logging.info(f"Task {test_task_idx+1} / {num_tasks}: Success")
         except (PlanningTimeout, _SkeletonSearchTimeout) as e:
             logging.info(f"Task {test_task_idx+1} / {num_tasks} failed by "
                          f"timing out: {e}")
+
+        # Save the intermediate dataset after every N training tasks
+        if CFG.refinement_data_save_every > 0 and \
+                (test_task_idx + 1) % CFG.refinement_data_save_every == 0:
+            logging.info(f"Writing intermediate dataset to {temp_file_path}")
+            with open(temp_file_path, "wb") as f:
+                pkl.dump(dataset, f)
+
     logging.info(f"Got {len(dataset)} data points.")
-    # Create saved data directory.
-    os.makedirs(CFG.data_dir, exist_ok=True)
-    # Create file path.
-    data_file_path = _get_data_file_path()
-    # Store the train tasks just in case we need it in the future.
-    # (Note: unpickling this doesn't work...)
-    # data_content = {
-    #     "tasks": train_tasks,
-    #     "data": dataset,
-    # }
     logging.info(f"Writing dataset to {data_file_path}")
     with open(data_file_path, "wb") as f:
         pkl.dump(dataset, f)
     return dataset
 
 
-def _collect_refinement_data_for_task(task: Task,
+def _collect_refinement_data_for_task(env: BaseEnv, task: Task,
                                       option_model: _OptionModelBase,
                                       nsrts: Set[NSRT],
                                       predicates: Set[Predicate],
@@ -212,40 +226,64 @@ def _collect_refinement_data_for_task(task: Task,
     heuristic = utils.create_task_planning_heuristic(
         CFG.sesame_task_planning_heuristic, init_atoms, task.goal,
         reachable_nsrts, predicates, objects)
+    generated_skeletons = []
     try:
-        gen = _skeleton_generator(
-            task, reachable_nsrts, init_atoms, heuristic, seed,
-            CFG.refinement_data_skeleton_generator_timeout, metrics,
-            CFG.refinement_data_num_skeletons)
-        for skeleton, atoms_sequence in gen:
-            necessary_atoms_seq = utils.compute_necessary_atoms_seq(
-                skeleton, atoms_sequence, task.goal)
-            refinement_start_time = time.perf_counter()
-            _, suc = run_low_level_search(
-                task, option_model, skeleton, necessary_atoms_seq, seed,
-                CFG.refinement_data_low_level_search_timeout, metrics,
-                CFG.horizon)
-            # Calculate time taken for refinement.
-            refinement_time = time.perf_counter() - refinement_start_time
-            # Add datapoint to dataset
-            data.append((
-                task,
-                skeleton,
-                atoms_sequence,
-                suc,
-                refinement_time,
-            ))
+        for item in _skeleton_generator(
+                task, reachable_nsrts, init_atoms, heuristic, seed,
+                CFG.refinement_data_skeleton_generator_timeout, metrics,
+                CFG.refinement_data_num_skeletons):
+            generated_skeletons.append(item)
     except _MaxSkeletonsFailure:
         # Done finding skeletons
-        return
+        pass
+    logging.info(f"Trying to refine {len(generated_skeletons)} skeletons")
+    for skeleton, atoms_sequence in generated_skeletons:
+        necessary_atoms_seq = utils.compute_necessary_atoms_seq(
+            skeleton, atoms_sequence, task.goal)
+        # This list will be mutated by run_low_level_search to record
+        # the refinement time for each step of the skeleton
+        refinement_time_list: List[float] = []
+        plan, suc = run_low_level_search(
+            task,
+            option_model,
+            skeleton,
+            necessary_atoms_seq,
+            seed,
+            CFG.refinement_data_low_level_search_timeout,
+            metrics,
+            CFG.horizon,
+            refinement_time=refinement_time_list)
+        assert len(refinement_time_list) == len(skeleton)
+        low_level_action_count: List[int] = []
+        # On plan success, count the low level actions per abstract action
+        if suc and CFG.refinement_data_include_execution_cost:
+            s = task.init
+            for action in plan:
+                action_count = 0
+                while not action.terminal(s):
+                    s = env.simulate(s, action.policy(s))
+                    action_count += 1
+                low_level_action_count.append(action_count)
+            assert len(low_level_action_count) == len(skeleton)
+        # Add datapoint to dataset
+        data.append((
+            task,
+            skeleton,
+            atoms_sequence,
+            suc,
+            refinement_time_list,
+            low_level_action_count,
+        ))
 
 
-def _get_data_file_path() -> Path:
+def _get_data_file_path(temp: bool = False) -> Path:
     if len(CFG.refinement_data_file_name):
         file_name = CFG.refinement_data_file_name
     else:
         config_path_str = utils.get_config_path_str()
         file_name = f"refinement_data_{config_path_str}.data"
+    if temp:
+        file_name += ".temp"
     data_file_path = Path(CFG.data_dir) / file_name
     return data_file_path
 
diff --git a/predicators/utils.py b/predicators/utils.py
index 92025cbe4a..416abb0079 100644
--- a/predicators/utils.py
+++ b/predicators/utils.py
@@ -57,6 +57,14 @@
 
 matplotlib.use("Agg")
 
+# Unpickling CUDA models errs out if the device isn't recognized because of
+# an unusual name, including in supercloud, but we can set it manually
+if "CUDA_VISIBLE_DEVICES" in os.environ:  # pragma: no cover
+    cuda_visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
+    if len(cuda_visible_devices) and cuda_visible_devices[0] != "0":
+        cuda_visible_devices[0] = "0"
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(cuda_visible_devices)
+
 
 def count_positives_for_ops(
     strips_ops: List[STRIPSOperator],
diff --git a/scripts/analyze_results_directory.py b/scripts/analyze_results_directory.py
index 29aa47c451..e44e6ff722 100644
--- a/scripts/analyze_results_directory.py
+++ b/scripts/analyze_results_directory.py
@@ -32,6 +32,7 @@
     ("AVG_TEST_TIME", "avg_suc_time"),
     ("AVG_NODES_CREATED", "avg_num_nodes_created"),
     ("LEARNING_TIME", "learning_time"),
+    ("AVG_REF_COST", "avg_ref_cost"),
     # ("AVG_SAMPLES_PER_PLAN", "avg_num_samples"),
     # ("MIN_SAMPLES_PER_PLAN", "min_num_samples"),
     # ("MAX_SAMPLES_PER_PLAN", "max_num_samples"),
diff --git a/scripts/cluster_utils.py b/scripts/cluster_utils.py
index 2d1883d7a7..979062e2da 100644
--- a/scripts/cluster_utils.py
+++ b/scripts/cluster_utils.py
@@ -21,6 +21,7 @@ class RunConfig:
     args: List[str]  # e.g. --make_test_videos
     flags: Dict[str, Any]  # e.g. --num_train_tasks 1
     use_gpu: bool  # e.g. --use_gpu True
+    train_refinement_estimator: bool  # e.g. --train_refinement_estimator True
 
     def __post_init__(self) -> None:
         # For simplicity, disallow overrides of the SAVE_DIRS.
@@ -50,7 +51,9 @@ def config_to_logfile(cfg: RunConfig, suffix: str = ".log") -> str:
     else:
         assert isinstance(cfg, BatchSeedRunConfig)
         seed = None
-    return f"{cfg.env}__{cfg.approach}__{cfg.experiment_id}__{seed}" + suffix
+    name = "train_" if cfg.train_refinement_estimator else ""
+    name += f"{cfg.env}__{cfg.approach}__{cfg.experiment_id}__{seed}" + suffix
+    return name
 
 
 def config_to_cmd_flags(cfg: RunConfig) -> str:
@@ -89,11 +92,19 @@ def generate_run_configs(config_filename: str,
             use_gpu = config["USE_GPU"]
         else:
             use_gpu = False
+        if "TRAIN_REFINEMENT_ESTIMATOR" in config.keys():
+            train_refinement_estimator = config["TRAIN_REFINEMENT_ESTIMATOR"]
+        else:
+            train_refinement_estimator = False
         # Loop over approaches.
         for approach_exp_id, approach_config in config["APPROACHES"].items():
+            if approach_config.get("SKIP", False):
+                continue
             approach = approach_config["NAME"]
             # Loop over envs.
             for env_exp_id, env_config in config["ENVS"].items():
+                if env_config.get("SKIP", False):
+                    continue
                 env = env_config["NAME"]
                 # Create the experiment ID, args, and flags.
                 experiment_id = f"{env_exp_id}-{approach_exp_id}"
@@ -111,11 +122,13 @@ def generate_run_configs(config_filename: str,
                 if batch_seeds:
                     yield BatchSeedRunConfig(experiment_id, approach, env,
                                              run_args, run_flags, use_gpu,
+                                             train_refinement_estimator,
                                              start_seed, num_seeds)
                 else:
                     for seed in range(start_seed, start_seed + num_seeds):
                         yield SingleSeedRunConfig(experiment_id, approach, env,
                                                   run_args, run_flags, use_gpu,
+                                                  train_refinement_estimator,
                                                   seed)
 
 
diff --git a/scripts/configs/refinement_cost_learning.yaml b/scripts/configs/refinement_cost_learning.yaml
new file mode 100644
index 0000000000..67ba95b98a
--- /dev/null
+++ b/scripts/configs/refinement_cost_learning.yaml
@@ -0,0 +1,76 @@
+# Data collection and training for refinement cost estimation
+---
+APPROACHES:
+  tabular:
+    NAME: "refinement_estimation"
+    SKIP: true
+    FLAGS:
+      refinement_estimator: "tabular"
+  cnn:
+    NAME: "refinement_estimation"
+    SKIP: true
+    FLAGS:
+      refinement_estimator: "cnn"
+      refinement_train_with_frac_data: 0.25
+  gnn:
+    NAME: "refinement_estimation"
+    SKIP: true
+    FLAGS:
+      refinement_estimator: "gnn"
+ENVS:
+  variable_passage:
+    NAME: "narrow_passage"
+    SKIP: true
+    FLAGS:
+      # cnn
+      learning_rate: 0.0002
+      weight_decay: 0.001
+      # gnn
+      gnn_num_message_passing: 3
+      gnn_layer_size: 32
+      gnn_learning_rate: 0.00001
+      gnn_weight_decay: 0.0005
+      gnn_num_epochs: 10000
+      gnn_batch_size: 300
+      gnn_do_normalization: true
+      gnn_use_validation_set: true
+      # data collection
+      num_train_tasks: 10000
+  exit_garage:
+    NAME: "exit_garage"
+    SKIP: true
+    FLAGS:
+      exit_garage_min_num_obstacles: 2
+      exit_garage_max_num_obstacles: 3  # inclusive
+      exit_garage_rrt_num_attempts: 3
+      refinement_data_num_skeletons: 15
+      refinement_estimation_num_skeletons_generated: 15
+      refinement_data_low_level_search_timeout: 15
+      refinement_data_failed_refinement_penalty: 20
+      # cnn
+      learning_rate: 0.0002
+      weight_decay: 0.001
+      # gnn
+      gnn_num_message_passing: 3
+      gnn_layer_size: 32
+      gnn_learning_rate: 0.00001
+      gnn_weight_decay: 0.0001
+      gnn_num_epochs: 10000
+      gnn_batch_size: 250
+      gnn_do_normalization: true
+      gnn_use_validation_set: true
+      # data collection
+      num_train_tasks: 500
+ARGS:
+#  - "load_data"
+ - "skip_refinement_estimator_training"
+FLAGS:  # general flags
+  refinement_data_save_every: 100
+  num_test_tasks: 50
+  use_torch_gpu: true
+  pytorch_train_print_every: 10
+  # refinement_data_file_name: "refinement_data_narrow_passage_10000_new2.data"
+START_SEED: 457
+NUM_SEEDS: 1
+USE_GPU: true
+TRAIN_REFINEMENT_ESTIMATOR: true
diff --git a/scripts/configs/refinement_cost_learning_test.yaml b/scripts/configs/refinement_cost_learning_test.yaml
new file mode 100644
index 0000000000..69a1c40474
--- /dev/null
+++ b/scripts/configs/refinement_cost_learning_test.yaml
@@ -0,0 +1,83 @@
+# Run experiments for refinement cost estimation
+---
+APPROACHES:
+  benchmark:
+    NAME: "oracle"
+    SKIP: false
+  oracle:
+    NAME: "refinement_estimation"
+    SKIP: false
+    FLAGS:
+      refinement_estimator: "oracle"
+  tabular:
+    NAME: "refinement_estimation"
+    SKIP: false
+    FLAGS:
+      refinement_estimator: "tabular"
+  cnn:
+    NAME: "refinement_estimation"
+    SKIP: false
+    FLAGS:
+      refinement_estimator: "cnn"
+  gnn:
+    NAME: "refinement_estimation"
+    SKIP: false
+    FLAGS:
+      refinement_estimator: "gnn"
+ENVS:
+  variable_passage:
+    NAME: "narrow_passage"
+    SKIP: false
+    FLAGS:
+      # gnn
+      gnn_num_message_passing: 3
+      gnn_layer_size: 32
+      gnn_do_normalization: true
+      timeout: 1
+  fixed_passage:
+    NAME: "narrow_passage"
+    SKIP: false
+    FLAGS:
+      narrow_passage_door_width_padding_lb: 0.01
+      narrow_passage_door_width_padding_ub: 0.01
+      narrow_passage_passage_width_padding_lb: 0.0005
+      narrow_passage_passage_width_padding_ub: 0.0005
+      # gnn
+      gnn_num_message_passing: 3
+      gnn_layer_size: 32
+      gnn_do_normalization: true
+      timeout: 1
+  exit_garage:
+    NAME: "exit_garage"
+    SKIP: true
+    FLAGS:
+      exit_garage_min_num_obstacles: 2
+      exit_garage_max_num_obstacles: 3  # inclusive
+      exit_garage_rrt_num_attempts: 3
+      refinement_estimation_num_skeletons_generated: 15
+      # gnn
+      gnn_num_message_passing: 3
+      gnn_layer_size: 32
+      gnn_do_normalization: true
+      timeout: 20
+  cluttered_garage:
+    NAME: "exit_garage"
+    SKIP: true
+    FLAGS:
+      exit_garage_min_num_obstacles: 4
+      exit_garage_max_num_obstacles: 4
+      exit_garage_rrt_num_attempts: 3
+      refinement_estimation_num_skeletons_generated: 50
+      # gnn
+      gnn_num_message_passing: 3
+      gnn_layer_size: 32
+      gnn_do_normalization: true
+      timeout: 30
+ARGS: []
+FLAGS:  # general flags
+  num_test_tasks: 50
+  use_torch_gpu: true
+  refinement_data_low_level_execution_cost: 0.05
+START_SEED: 456
+NUM_SEEDS: 5
+USE_GPU: true
diff --git a/scripts/local/launch.py b/scripts/local/launch.py
index 391fa41177..cbbdccad38 100644
--- a/scripts/local/launch.py
+++ b/scripts/local/launch.py
@@ -30,7 +30,11 @@ def _main() -> None:
         cmd_flags = config_to_cmd_flags(cfg)
         logfile = os.path.join("logs", config_to_logfile(cfg))
         cmd_flags = config_to_cmd_flags(cfg)
-        cmd = f"python predicators/main.py {cmd_flags} > {logfile}"
+        if cfg.train_refinement_estimator:
+            entry_point = "train_refinement_estimator.py"
+        else:
+            entry_point = "main.py"
+        cmd = f"python predicators/{entry_point} {cmd_flags} > {logfile}"
         cmds.append(cmd)
     # Run the commands in order.
     num_cmds = len(cmds)
diff --git a/scripts/supercloud/launch.py b/scripts/supercloud/launch.py
index 78a6ef1034..3889c14495 100755
--- a/scripts/supercloud/launch.py
+++ b/scripts/supercloud/launch.py
@@ -57,9 +57,13 @@ def _launch_experiments(config_file: str) -> None:
         log_dir = "logs"
         log_prefix = config_to_logfile(cfg, suffix="")
         # Launch a job for this experiment.
-        submit_supercloud_job(cfg.experiment_id, log_dir, log_prefix,
-                              cmd_flags, cfg.start_seed, cfg.num_seeds,
-                              cfg.use_gpu)
+        if cfg.train_refinement_estimator:
+            entry_point = "train_refinement_estimator.py"
+        else:
+            entry_point = "main.py"
+        submit_supercloud_job(entry_point, cfg.experiment_id, log_dir,
+                              log_prefix, cmd_flags, cfg.start_seed,
+                              cfg.num_seeds, cfg.use_gpu)
 
 
 if __name__ == "__main__":
diff --git a/scripts/supercloud/submit_supercloud_job.py b/scripts/supercloud/submit_supercloud_job.py
index 2d52181314..eb37b9de27 100644
--- a/scripts/supercloud/submit_supercloud_job.py
+++ b/scripts/supercloud/submit_supercloud_job.py
@@ -19,11 +19,12 @@ def _run() -> None:
     log_dir = CFG.log_dir
     logfile_prefix = utils.get_config_path_str()
     args_and_flags_str = " ".join(sys.argv[1:])
-    return submit_supercloud_job(job_name, log_dir, logfile_prefix,
+    return submit_supercloud_job("main.py", job_name, log_dir, logfile_prefix,
                                  args_and_flags_str, START_SEED, NUM_SEEDS)
 
 
-def submit_supercloud_job(job_name: str,
+def submit_supercloud_job(entry_point: str,
+                          job_name: str,
                           log_dir: str,
                           logfile_prefix: str,
                           args_and_flags_str: str,
@@ -31,12 +32,13 @@ def submit_supercloud_job(job_name: str,
                           num_seeds: int,
                           use_gpu: bool = False) -> None:
     """Launch the supercloud job."""
+    assert entry_point in ("main.py", "train_refinement_estimator.py")
     os.makedirs(log_dir, exist_ok=True)
     logfile_pattern = os.path.join(log_dir, f"{logfile_prefix}__%j.log")
     assert logfile_pattern.count("None") == 1
     logfile_pattern = logfile_pattern.replace("None", "%a")
-    mystr = (f"#!/bin/bash\npython predicators/main.py {args_and_flags_str} "
-             f"--seed $SLURM_ARRAY_TASK_ID")
+    mystr = (f"#!/bin/bash\npython predicators/{entry_point} "
+             f"{args_and_flags_str} --seed $SLURM_ARRAY_TASK_ID")
     temp_run_file = "temp_run_file.sh"
     assert not os.path.exists(temp_run_file)
     with open(temp_run_file, "w", encoding="utf-8") as f:
diff --git a/tests/approaches/test_oracle_approach.py b/tests/approaches/test_oracle_approach.py
index e9b1c819ca..c1e85972ac 100644
--- a/tests/approaches/test_oracle_approach.py
+++ b/tests/approaches/test_oracle_approach.py
@@ -174,7 +174,7 @@
     "doors_max_obstacles_per_room": 1,
 }]
 EXTRA_ARGS_ORACLE_APPROACH["exit_garage"] = [{
-    "exit_garage_pick_place_refine_penalty":
+    "exit_garage_clear_refine_penalty":
     0,
     "exit_garage_min_num_obstacles":
     1,
@@ -185,7 +185,7 @@
     "exit_garage_rrt_sample_goal_eps":
     0.3,
 }, {
-    "exit_garage_pick_place_refine_penalty":
+    "exit_garage_clear_refine_penalty":
     0,
     "exit_garage_min_num_obstacles":
     3,
diff --git a/tests/envs/test_exit_garage.py b/tests/envs/test_exit_garage.py
index 4de8b6e831..8109a0fd4b 100644
--- a/tests/envs/test_exit_garage.py
+++ b/tests/envs/test_exit_garage.py
@@ -21,21 +21,16 @@ def test_exit_garage_properties():
     for task in env.get_test_tasks():
         for obj in task.init:
             assert len(obj.type.feature_names) == len(task.init[obj])
-    assert len(env.predicates) == 5
-    (CarHasExited, CarryingObstacle, NotCarryingObstacle, ObstacleCleared,
-     ObstacleNotCleared) = sorted(env.predicates)
+    assert len(env.predicates) == 3
+    CarHasExited, ObstacleCleared, ObstacleNotCleared = sorted(env.predicates)
     assert CarHasExited.name == "CarHasExited"
-    assert CarryingObstacle.name == "CarryingObstacle"
-    assert NotCarryingObstacle.name == "NotCarryingObstacle"
     assert ObstacleCleared.name == "ObstacleCleared"
     assert ObstacleNotCleared.name == "ObstacleNotCleared"
     assert env.goal_predicates == {CarHasExited}
-    assert len(get_gt_options(env.get_name())) == 3
-    (DriveCarToExit, PickupObstacle,
-     StoreObstacle) = sorted(get_gt_options(env.get_name()))
+    assert len(get_gt_options(env.get_name())) == 2
+    ClearObstacle, DriveCarToExit = sorted(get_gt_options(env.get_name()))
+    assert ClearObstacle.name == "ClearObstacle"
     assert DriveCarToExit.name == "DriveCarToExit"
-    assert PickupObstacle.name == "PickupObstacle"
-    assert StoreObstacle.name == "StoreObstacle"
     assert len(env.types) == 4
     car_type, obstacle_type, robot_type, storage_type = sorted(env.types)
     assert car_type.name == "car"
@@ -54,8 +49,7 @@ def test_exit_garage_actions():
         "num_train_tasks": 1,
     })
     env = ExitGarageEnv()
-    (CarHasExited, CarryingObstacle, NotCarryingObstacle, ObstacleCleared,
-     ObstacleNotCleared) = sorted(env.predicates)
+    CarHasExited, ObstacleCleared, ObstacleNotCleared = sorted(env.predicates)
     car_type, obstacle_type, robot_type, storage_type = sorted(env.types)
 
     # Create task with fixed initial state
@@ -76,10 +70,8 @@ def test_exit_garage_actions():
     storage, = state.get_objects(storage_type)
     # Assert starting state predicates
     assert not GroundAtom(CarHasExited, [car]).holds(state)
-    assert not GroundAtom(CarryingObstacle, [robot, obstacle]).holds(state)
     assert not GroundAtom(ObstacleCleared, [obstacle]).holds(state)
     assert GroundAtom(ObstacleNotCleared, [obstacle]).holds(state)
-    assert GroundAtom(NotCarryingObstacle, [robot]).holds(state)
     task = EnvironmentTask(state, goal)
 
     # Fixed action sequences to test (each is a list of action arrays)
@@ -122,15 +114,15 @@ def test_exit_garage_actions():
     assert s.get(robot, "x") == true_x
     assert s.get(robot, "y") == true_y
     # Robot shouldn't have picked up anything since it wasn't on an obstacle
-    assert not GroundAtom(CarryingObstacle, [robot, obstacle]).holds(s)
-    assert GroundAtom(NotCarryingObstacle, [robot]).holds(s)
+    assert s.get(robot, "carrying") == 0
+    assert s.get(obstacle, "carried") == 0
 
     # Test that going and picking up the obstacle works
     for action in pickup_actions:
         s = env.simulate(s, Action(action))
-    assert GroundAtom(CarryingObstacle, [robot, obstacle]).holds(s)
+    assert s.get(robot, "carrying") == 1
+    assert s.get(obstacle, "carried") == 1
     assert not GroundAtom(ObstacleNotCleared, [obstacle]).holds(s)
-    assert not GroundAtom(NotCarryingObstacle, [robot]).holds(s)
 
     # Test that trying to place the obstacle outside storage does nothing
     true_x = s.get(robot, "x")
@@ -140,18 +132,18 @@ def test_exit_garage_actions():
     assert s.get(robot, "x") == true_x
     assert s.get(robot, "y") == true_y
     # Robot should still be carrying obstacle
-    assert GroundAtom(CarryingObstacle, [robot, obstacle]).holds(s)
+    assert s.get(robot, "carrying") == 1
+    assert s.get(obstacle, "carried") == 1
     assert not GroundAtom(ObstacleNotCleared, [obstacle]).holds(s)
-    assert not GroundAtom(NotCarryingObstacle, [robot]).holds(s)
 
     # Test that moving to storage and placing the obstacle works
     assert s.get(storage, "num_stored") == 0
     for action in store_actions:
         s = env.simulate(s, Action(action))
     # Check obstacle is placed
-    assert not GroundAtom(CarryingObstacle, [robot, obstacle]).holds(s)
+    assert s.get(robot, "carrying") == 0
+    assert s.get(obstacle, "carried") == 0
     assert GroundAtom(ObstacleCleared, [obstacle]).holds(s)
-    assert GroundAtom(NotCarryingObstacle, [robot]).holds(s)
     # Check obstacle and robot are in storage area
     assert s.get(robot, "y") > 0.8
     assert s.get(obstacle, "y") > 0.8
@@ -160,7 +152,8 @@ def test_exit_garage_actions():
 
     # Test that picking up in storage area does nothing
     s = env.simulate(s, Action(bad_robot_action))
-    assert GroundAtom(NotCarryingObstacle, [robot]).holds(s)
+    assert s.get(robot, "carrying") == 0
+    assert s.get(obstacle, "carried") == 0
 
     # Test moving car to exit
     for action in drive_actions:
@@ -303,7 +296,7 @@ def test_exit_garage_options():
     """Tests for exit garage parametrized options."""
     utils.reset_config({
         "env": "exit_garage",
-        "exit_garage_pick_place_refine_penalty": 0,
+        "exit_garage_clear_refine_penalty": 0,
         "exit_garage_min_num_obstacles": 2,
         "exit_garage_max_num_obstacles": 2,
         "exit_garage_rrt_num_control_samples": 15,
@@ -311,10 +304,8 @@ def test_exit_garage_options():
         "num_train_tasks": 1,
     })
     env = ExitGarageEnv()
-    (CarHasExited, CarryingObstacle, NotCarryingObstacle, ObstacleCleared,
-     ObstacleNotCleared) = sorted(env.predicates)
-    (DriveCarToExit, PickupObstacle,
-     StoreObstacle) = sorted(get_gt_options(env.get_name()))
+    CarHasExited, ObstacleCleared, ObstacleNotCleared = sorted(env.predicates)
+    ClearObstacle, DriveCarToExit = sorted(get_gt_options(env.get_name()))
     car_type, obstacle_type, robot_type, _ = sorted(env.types)
 
     # Create task with fixed initial state
@@ -327,13 +318,12 @@ def test_exit_garage_options():
     state.set(obstacle1, "x", 0.5)
     state.set(obstacle1, "y", 0.3)
     state.set(obstacle2, "x", 0.8)
-    state.set(obstacle2, "y", 0.1)
+    state.set(obstacle2, "y", 0.05)
     task = EnvironmentTask(state, goal)
 
-    # Test PickupObstacle, StoreObstacle, then DriveCarToExit
+    # Test ClearObstacle, then DriveCarToExit
     option_plan = [
-        PickupObstacle.ground([robot, obstacle2], [0.2]),
-        StoreObstacle.ground([robot, obstacle2], [0.6]),
+        ClearObstacle.ground([robot, obstacle1], [0.2]),
         DriveCarToExit.ground([car], [0.7]),
     ]
     policy = utils.option_plan_to_policy(option_plan)
@@ -346,37 +336,20 @@ def test_exit_garage_options():
         exceptions_to_break_on={utils.OptionExecutionFailure},
     )
     final_state = traj.states[-1]
+    assert final_state.get(robot, "carrying") == 0
+    assert final_state.get(obstacle1, "carried") == 0
+    assert GroundAtom(ObstacleCleared, [obstacle1]).holds(final_state)
+    assert not GroundAtom(ObstacleNotCleared, [obstacle1]).holds(final_state)
     assert GroundAtom(CarHasExited, [car]).holds(final_state)
-    assert not GroundAtom(CarryingObstacle,
-                          [robot, obstacle2]).holds(final_state)
-    assert GroundAtom(ObstacleCleared, [obstacle2]).holds(final_state)
-    assert not GroundAtom(ObstacleNotCleared, [obstacle2]).holds(final_state)
-    assert GroundAtom(NotCarryingObstacle, [robot]).holds(final_state)
     assert task.task.goal_holds(final_state)
 
     # Test scenarios where options shouldn't be initiable
 
-    # Test StoreObstacle when robot isn't carrying anything
-    assert GroundAtom(NotCarryingObstacle, [robot]).holds(state)
-    store_obstacle = StoreObstacle.ground([robot, obstacle1], [0.3])
-    assert not store_obstacle.initiable(state)
-
-    # Test StoreObstacle when robot carrying different obstacle
-    test_state = state.copy()
-    test_state.set(robot, "carrying", 1)
-    test_state.set(obstacle2, "carried", 1)
-    assert not store_obstacle.initiable(test_state)
-
-    # Test PickupObstacle when robot carrying obstacle already
-    pickup_obstacle = PickupObstacle.ground([robot, obstacle1], [0.8])
-    assert not pickup_obstacle.initiable(test_state)
-
-    # Test PickupObstacle when obstacle already picked or stored
-    pickup_obstacle = PickupObstacle.ground([robot, obstacle2], [0.4])
-    assert not pickup_obstacle.initiable(test_state)  # already picked
+    # Test ClearObstacle when obstacle already picked or stored
+    clear_obstacle = ClearObstacle.ground([robot, obstacle2], [0.4])
     test_state = state.copy()
     test_state.set(obstacle2, "y", 0.9)  # obstacle2 already in storage
-    assert not pickup_obstacle.initiable(test_state)
+    assert not clear_obstacle.initiable(test_state)
 
     # Test DriveCarToExit when car is already in collision for some reason
     test_state.set(car, "x", 0.5)
@@ -389,7 +362,7 @@ def test_exit_garage_failed_rrt():
     if motion planning fails."""
     utils.reset_config({
         "env": "exit_garage",
-        "exit_garage_pick_place_refine_penalty": 0,
+        "exit_garage_clear_refine_penalty": 0,
         "exit_garage_min_num_obstacles": 6,
         "exit_garage_max_num_obstacles": 6,
         "exit_garage_rrt_num_attempts": 1,
@@ -398,7 +371,7 @@ def test_exit_garage_failed_rrt():
         "num_train_tasks": 1,
     })
     env = ExitGarageEnv()
-    DriveCarToExit, _, _ = sorted(get_gt_options(env.get_name()))
+    _, DriveCarToExit = sorted(get_gt_options(env.get_name()))
     car_type, obstacle_type, _, _ = sorted(env.types)
 
     # Create task with fixed initial state
diff --git a/tests/nsrt_learning/test_segmentation.py b/tests/nsrt_learning/test_segmentation.py
index 0f4560006f..1418fdd7df 100644
--- a/tests/nsrt_learning/test_segmentation.py
+++ b/tests/nsrt_learning/test_segmentation.py
@@ -214,7 +214,7 @@ def test_contact_based_segmentation(env):
         "doors_min_room_exists_frac": 1.0,
         "doors_max_room_exists_frac": 1.0,
         "doors_birrt_smooth_amt": 0,
-        "exit_garage_pick_place_refine_penalty": 0,
+        "exit_garage_clear_refine_penalty": 0,
         "exit_garage_min_num_obstacles": 3,
         "exit_garage_max_num_obstacles": 3,
         "exit_garage_raise_environment_failure": True,
diff --git a/tests/refinement_estimators/test_base_refinement_estimator.py b/tests/refinement_estimators/test_base_refinement_estimator.py
index 5fec76af49..c996b1e60f 100644
--- a/tests/refinement_estimators/test_base_refinement_estimator.py
+++ b/tests/refinement_estimators/test_base_refinement_estimator.py
@@ -10,6 +10,8 @@
 from predicators.refinement_estimators import BaseRefinementEstimator, \
     create_refinement_estimator
 
+# We don't run these tests for gnn because training on an empty dataset is
+# not possible
 ESTIMATOR_NAMES = ["oracle", "tabular", "cnn"]
 
 
diff --git a/tests/refinement_estimators/test_cnn_refinement_estimator.py b/tests/refinement_estimators/test_cnn_refinement_estimator.py
index db847918ce..6b2118947c 100644
--- a/tests/refinement_estimators/test_cnn_refinement_estimator.py
+++ b/tests/refinement_estimators/test_cnn_refinement_estimator.py
@@ -28,10 +28,10 @@ def test_cnn_refinement_estimator():
     assert estimator.get_name() == "cnn"
     assert estimator.is_learning_based
     with pytest.raises(AssertionError):
-        sample_task = NarrowPassageEnv().get_train_tasks()[0]
+        sample_task = NarrowPassageEnv().get_train_tasks()[0].task
         estimator.get_cost(sample_task, [], [])
     # Check that train actually runs
-    sample_data = [(sample_task, [], [], False, 5)]
+    sample_data = [(sample_task, [], [], False, [], [])]
     estimator.train(sample_data)
     # Check that get_cost works now that the estimator is trained
     estimator.get_cost(sample_task, [], [])
@@ -49,6 +49,7 @@ def test_narrow_passage_cnn_refinement_estimator():
         "cnn_refinement_estimator_crop": True,
         "cnn_refinement_estimator_crop_bounds": (0, 10, 0, 10),
         "cnn_refinement_estimator_downsample": 2,
+        "refinement_data_include_execution_cost": True,
     })
     estimator = CNNRefinementEstimator()
 
@@ -56,7 +57,7 @@ def test_narrow_passage_cnn_refinement_estimator():
     env = NarrowPassageEnv()
     DoorIsClosed, DoorIsOpen, TouchedGoal = sorted(env.predicates)
     door_type, _, robot_type, target_type, _ = sorted(env.types)
-    sample_task = env.get_train_tasks()[0]
+    sample_task = env.get_train_tasks()[0].task
     sample_state = sample_task.init
     door, = sample_state.get_objects(door_type)
     robot, = sample_state.get_objects(robot_type)
@@ -91,10 +92,12 @@ def test_narrow_passage_cnn_refinement_estimator():
 
     # Create sample data to train using
     sample_data = [
-        (sample_task, move_direct_skeleton, move_direct_atoms_seq, True, 4),
+        (sample_task, move_direct_skeleton, move_direct_atoms_seq, True, [4],
+         [3]),
         (sample_task, move_through_door_skeleton, move_through_door_atoms_seq,
-         True, 2),
-        (sample_task, move_direct_skeleton, move_direct_atoms_seq, False, 5),
+         True, [0.5, 1.5], [3, 5]),
+        (sample_task, move_direct_skeleton, move_direct_atoms_seq, False, [5],
+         []),
     ]
     estimator.train(sample_data)
 
diff --git a/tests/refinement_estimators/test_gnn_refinement_estimator.py b/tests/refinement_estimators/test_gnn_refinement_estimator.py
new file mode 100644
index 0000000000..bd1faed77a
--- /dev/null
+++ b/tests/refinement_estimators/test_gnn_refinement_estimator.py
@@ -0,0 +1,220 @@
+"""Test cases for the GNN refinement cost estimator."""
+
+import os
+import shutil
+from pathlib import Path
+from unittest.mock import PropertyMock, patch
+
+import numpy as np
+import pytest
+from gym.spaces import Box
+
+import predicators.envs.narrow_passage
+from predicators import utils
+from predicators.envs.narrow_passage import NarrowPassageEnv
+from predicators.ground_truth_models import get_gt_nsrts, get_gt_options
+from predicators.ground_truth_models.narrow_passage import \
+    NarrowPassageGroundTruthNSRTFactory, \
+    NarrowPassageGroundTruthOptionFactory
+from predicators.refinement_estimators.gnn_refinement_estimator import \
+    GNNRefinementEstimator
+from predicators.settings import CFG
+from predicators.structs import NSRT, Action, GroundAtom, \
+    ParameterizedOption, Predicate, Task, Variable
+
+_ENV_MODULE_NAME = predicators.envs.narrow_passage.__name__
+
+
+def test_gnn_refinement_estimator():
+    """Test general properties of GNN refinement cost estimator."""
+    utils.reset_config({
+        "env": "narrow_passage",
+        "gnn_num_message_passing": 1,
+        "gnn_layer_size": 3,
+        "gnn_num_epochs": 1,
+    })
+    estimator = GNNRefinementEstimator()
+    assert estimator.get_name() == "gnn"
+    assert estimator.is_learning_based
+    with pytest.raises(AssertionError):
+        sample_task = NarrowPassageEnv().get_train_tasks()[0].task
+        estimator.get_cost(sample_task, [], [])
+
+
+def test_narrow_passage_gnn_refinement_estimator():
+    """Test GNN refinement cost estimator for narrow_passage env."""
+    utils.reset_config({
+        "env": "narrow_passage",
+        "gnn_num_message_passing": 1,
+        "gnn_layer_size": 3,
+        "gnn_num_epochs": 1,
+        "gnn_do_normalization": True,
+        "refinement_data_include_execution_cost": True,
+    })
+    estimator = GNNRefinementEstimator()
+
+    # Get env objects and NSRTs
+    env = NarrowPassageEnv()
+    DoorIsClosed, DoorIsOpen, TouchedGoal = sorted(env.predicates)
+    door_type, _, robot_type, target_type, _ = sorted(env.types)
+    sample_task = env.get_train_tasks()[0].task
+    sample_state = sample_task.init
+    door, = sample_state.get_objects(door_type)
+    robot, = sample_state.get_objects(robot_type)
+    target, = sample_state.get_objects(target_type)
+    options = get_gt_options(env.get_name())
+    gt_nsrts = get_gt_nsrts(CFG.env, env.predicates, options)
+    move_and_open_door_nsrt, move_to_target_nsrt = sorted(gt_nsrts)
+
+    # Ground NSRTs using objects
+    ground_move_and_open_door = move_and_open_door_nsrt.ground([robot, door])
+    ground_move_to_target = move_to_target_nsrt.ground([robot, target])
+    # Ground atoms using objects
+    ground_door_is_closed = GroundAtom(DoorIsClosed, [door])
+    ground_door_is_open = GroundAtom(DoorIsOpen, [door])
+    ground_touched_goal = GroundAtom(TouchedGoal, [robot, target])
+
+    # Make valid test skeletons and atom_sequences
+    move_direct_skeleton = [ground_move_to_target]
+    move_direct_atoms_seq = [
+        {ground_door_is_closed},
+        {ground_door_is_closed, ground_touched_goal},
+    ]
+    move_through_door_skeleton = [
+        ground_move_and_open_door,
+        ground_move_to_target,
+    ]
+    move_through_door_atoms_seq = [
+        {ground_door_is_closed},
+        {ground_door_is_open},
+        {ground_door_is_closed, ground_touched_goal},
+    ]
+
+    # Create sample data to train using
+    sample_data = [
+        (sample_task, move_direct_skeleton, move_direct_atoms_seq, True, [4],
+         [3]),
+        (sample_task, move_through_door_skeleton, move_through_door_atoms_seq,
+         True, [0.5, 1.5], [3, 5]),
+        (sample_task, move_direct_skeleton, move_direct_atoms_seq, False, [5],
+         []),
+    ]
+    estimator.train(sample_data)
+
+    # Test direct MoveToTarget skeleton returns finite cost
+    move_direct_cost = estimator.get_cost(sample_task, move_direct_skeleton,
+                                          move_direct_atoms_seq)
+    assert move_direct_cost < float('inf')
+
+    # Test open door then move skeleton returns finite cost
+    move_through_door_cost = estimator.get_cost(sample_task,
+                                                move_through_door_skeleton,
+                                                move_through_door_atoms_seq)
+    assert move_through_door_cost < float('inf')
+
+
+def test_gnn_refinement_estimator_arities():
+    """Test GNN refinement cost estimator on mocked predicate/NSRT sets that
+    are 0-arity, unary, and binary."""
+    utils.reset_config({
+        "env": "narrow_passage",
+        "gnn_num_message_passing": 1,
+        "gnn_layer_size": 3,
+        "gnn_num_epochs": 1,
+        "gnn_use_validation_set": False,
+    })
+
+    # Get base environment, types, predicates, NSRTs
+    env = NarrowPassageEnv()
+    _, DoorIsOpen, TouchedGoal = sorted(env.predicates)
+    door_type, _, robot_type, target_type, _ = sorted(env.types)
+    base_options = get_gt_options(env.get_name())
+    gt_nsrts = get_gt_nsrts(CFG.env, env.predicates, base_options)
+    move_and_open_door_option, _ = sorted(base_options)
+    move_and_open_door_nsrt, _ = sorted(gt_nsrts)
+
+    # Make predicates of all arities
+    ZeroArityPred = Predicate("ZeroArityPred", [], lambda s, o: False)
+    UnaryPred = DoorIsOpen
+    BinaryPred = TouchedGoal
+
+    # Make dummy options and NSRTs of all arities
+
+    _policy = lambda _1, _2, _3, _4: Action(
+        np.array([0, 0, 0], dtype=np.float32))
+    _initiable = lambda _1, _2, _3, _4: True
+    _sampler = lambda _1, _2, rng, _4: np.array([rng.uniform()],
+                                                dtype=np.float32)
+
+    ZeroArityOption = ParameterizedOption("ZeroArityOption", [],
+                                          Box(0, 1,
+                                              (1, )), _policy, _initiable,
+                                          lambda _1, _2, _3, _4: True)
+    ZeroArityNSRT = NSRT("ZeroArityNSRT", [], set(), set(), set(), set(),
+                         ZeroArityOption, [], _sampler)
+
+    UnaryOption = ParameterizedOption("UnaryOption", [robot_type],
+                                      Box(0, 1, (1, )), _policy, _initiable,
+                                      lambda _1, _2, _3, _4: True)
+    robot = Variable("?robot", robot_type)
+    UnaryNSRT = NSRT("UnaryNSRT", [robot], set(), set(), set(), set(),
+                     UnaryOption, [robot], _sampler)
+    BinaryNSRT = move_and_open_door_nsrt
+
+    mock_preds = {ZeroArityPred, UnaryPred, BinaryPred}
+    mock_options = {ZeroArityOption, UnaryOption, move_and_open_door_option}
+    mock_nsrts = {ZeroArityNSRT, UnaryNSRT, BinaryNSRT}
+
+    with patch(f"{_ENV_MODULE_NAME}.NarrowPassageEnv.predicates",
+               new_callable=PropertyMock) as mock_env, \
+            patch.object(NarrowPassageGroundTruthOptionFactory,
+                         "get_options",
+                         return_value=mock_options), \
+            patch.object(NarrowPassageGroundTruthNSRTFactory,
+                         "get_nsrts",
+                         return_value=mock_nsrts):
+        mock_env.return_value = mock_preds
+        # Test that _setup_fields() works
+        estimator = GNNRefinementEstimator()
+        estimator2 = GNNRefinementEstimator()
+
+    # Make a test Task with all types of predicates/NSRTs involved
+    sample_task = env.get_train_tasks()[0].task
+    initial_state = sample_task.init
+    robot, = initial_state.get_objects(robot_type)
+    door, = initial_state.get_objects(door_type)
+    target, = initial_state.get_objects(target_type)
+    goal = {
+        GroundAtom(ZeroArityPred, []),
+        GroundAtom(UnaryPred, [door]),
+        GroundAtom(BinaryPred, [robot, target]),
+    }
+    task = Task(initial_state, goal)
+
+    # Make a test skeleton and atoms_sequence
+    skeleton = [ZeroArityNSRT.ground([])]
+    atoms_sequence = [goal, goal]
+
+    # Create sample refinement training data
+    data = [(task, skeleton, atoms_sequence, False, [5.0], [4])]
+    # Check that train() and _graphify_single_input() successfully run
+    estimator.train(data)
+
+    # Test that getting a cost returns a finite cost
+    test_cost = estimator.get_cost(task, skeleton, atoms_sequence)
+    assert test_cost < float('inf')
+
+    # Create fake directory to test saving and loading model
+    parent_dir = os.path.dirname(__file__)
+    approach_dir = os.path.join(parent_dir, "_fake_approach")
+    os.makedirs(approach_dir, exist_ok=True)
+    test_approach_path = Path(approach_dir) / "test.estimator"
+    estimator.save_model(test_approach_path)
+    estimator2.load_model(test_approach_path)
+
+    # Check that the loaded model is the same as the saved one
+    test_cost2 = estimator2.get_cost(task, skeleton, atoms_sequence)
+    assert test_cost2 == test_cost
+
+    # Remove temp directory
+    shutil.rmtree(approach_dir)
diff --git a/tests/refinement_estimators/test_oracle_refinement_estimator.py b/tests/refinement_estimators/test_oracle_refinement_estimator.py
index c73185c0a2..dbe2eea32a 100644
--- a/tests/refinement_estimators/test_oracle_refinement_estimator.py
+++ b/tests/refinement_estimators/test_oracle_refinement_estimator.py
@@ -18,7 +18,7 @@ def test_oracle_refinement_estimator():
     assert estimator.get_name() == "oracle"
     assert not estimator.is_learning_based
     with pytest.raises(NotImplementedError):
-        sample_task = NarrowPassageEnv().get_train_tasks()[0]
+        sample_task = NarrowPassageEnv().get_train_tasks()[0].task
         estimator.get_cost(sample_task, [], [])
 
 
@@ -94,6 +94,8 @@ def test_exit_garage_oracle_refinement_estimator():
     """Test oracle refinement cost estimator for exit_garage env."""
     utils.reset_config({
         "env": "exit_garage",
+        "exit_garage_min_num_obstacles": 2,
+        "exit_garage_max_num_obstacles": 2,
     })
     estimator = OracleRefinementEstimator()
 
@@ -104,21 +106,19 @@ def test_exit_garage_oracle_refinement_estimator():
     sample_state = sample_task.init
     car, = sample_state.get_objects(car_type)
     robot, = sample_state.get_objects(robot_type)
-    obstacles = sample_state.get_objects(obstacle_type)
+    obstacle1, obstacle2 = sample_state.get_objects(obstacle_type)
+    sample_state.set(obstacle1, "y", 0.6)
+    sample_state.set(obstacle2, "y", 0.4)
     task = Task(sample_state, sample_task.goal)
     gt_nsrts = get_gt_nsrts(CFG.env, env.predicates,
                             get_gt_options(env.get_name()))
-    (drive_car_to_exit_nsrt, pickup_obstacle_nsrt,
-     store_obstacle_nsrt) = sorted(gt_nsrts)
+    clear_obstacle_nsrt, drive_car_to_exit_nsrt = sorted(gt_nsrts)
 
     # Ground NSRTs using objects
     ground_drive_car_to_exit = drive_car_to_exit_nsrt.ground([car])
 
-    def ground_pickup_obstacle(obstacle):
-        return pickup_obstacle_nsrt.ground([robot, obstacle])
-
-    def ground_store_obstacle(obstacle):
-        return store_obstacle_nsrt.ground([robot, obstacle])
+    def ground_clear_obstacle(obstacle):
+        return clear_obstacle_nsrt.ground([robot, obstacle])
 
     # Test direct DriveCarToExit skeleton
     drive_direct_skeleton = [ground_drive_car_to_exit]
@@ -127,13 +127,12 @@ def ground_store_obstacle(obstacle):
 
     # Test pickups and stores before driving
     long_skeleton = [
-        ground_pickup_obstacle(obstacles[0]),
-        ground_store_obstacle(obstacles[0]),
-        ground_pickup_obstacle(obstacles[1]),
+        ground_clear_obstacle(obstacle1),
+        ground_clear_obstacle(obstacle2),
         ground_drive_car_to_exit,
     ]
     long_cost = estimator.get_cost(task, long_skeleton, [])
-    assert long_cost == -2
+    assert long_cost == -0.5
 
     # Make sure that sorting the costs considers the long skeleton cheaper
     assert sorted([drive_direct_cost,
diff --git a/tests/refinement_estimators/test_tabular_refinement_estimator.py b/tests/refinement_estimators/test_tabular_refinement_estimator.py
index 96cc9506fa..2e844cd442 100644
--- a/tests/refinement_estimators/test_tabular_refinement_estimator.py
+++ b/tests/refinement_estimators/test_tabular_refinement_estimator.py
@@ -20,22 +20,24 @@ def test_tabular_refinement_estimator():
     assert estimator.get_name() == "tabular"
     assert estimator.is_learning_based
     with pytest.raises(AssertionError):
-        sample_task = NarrowPassageEnv().get_train_tasks()[0]
+        sample_task = NarrowPassageEnv().get_train_tasks()[0].task
         estimator.get_cost(sample_task, [], [])
     # Check that train actually runs
-    sample_data = [(sample_task, [], [], False, 5)]
+    sample_data = [(sample_task, [], [], False, [], [])]
     estimator.train(sample_data)
     # Check that the resulting dictionary is correct
     cost_dict = estimator._model_dict  # pylint: disable=protected-access
-    assert cost_dict == {(tuple(), tuple()): 8}
-    assert estimator.get_cost(sample_task, [], []) == 8
+    assert cost_dict == {(tuple(), tuple()): (3, 0)}
+    assert estimator.get_cost(sample_task, [], []) == 3
 
 
 def test_narrow_passage_tabular_refinement_estimator():
     """Test tabular refinement cost estimator for narrow_passage env."""
     utils.reset_config({
         "env": "narrow_passage",
-        "refinement_data_failed_refinement_penalty": 3
+        "refinement_data_failed_refinement_penalty": 3,
+        "refinement_data_include_execution_cost": True,
+        "refinement_data_low_level_execution_cost": 0.01,
     })
     estimator = TabularRefinementEstimator()
 
@@ -43,7 +45,7 @@ def test_narrow_passage_tabular_refinement_estimator():
     env = NarrowPassageEnv()
     DoorIsClosed, DoorIsOpen, TouchedGoal = sorted(env.predicates)
     door_type, _, robot_type, target_type, _ = sorted(env.types)
-    sample_task = env.get_train_tasks()[0]
+    sample_task = env.get_train_tasks()[0].task
     sample_state = sample_task.init
     door, = sample_state.get_objects(door_type)
     robot, = sample_state.get_objects(robot_type)
@@ -78,23 +80,25 @@ def test_narrow_passage_tabular_refinement_estimator():
 
     # Create sample data to train using
     sample_data = [
-        (sample_task, move_direct_skeleton, move_direct_atoms_seq, True, 4),
+        (sample_task, move_direct_skeleton, move_direct_atoms_seq, True, [4],
+         [3]),
         (sample_task, move_through_door_skeleton, move_through_door_atoms_seq,
-         True, 2),
-        (sample_task, move_direct_skeleton, move_direct_atoms_seq, False, 5),
+         True, [0.5, 1.5], [3, 5]),
+        (sample_task, move_direct_skeleton, move_direct_atoms_seq, False, [5],
+         []),
     ]
     estimator.train(sample_data)
 
     # Test direct MoveToTarget skeleton
     move_direct_cost = estimator.get_cost(sample_task, move_direct_skeleton,
                                           move_direct_atoms_seq)
-    assert move_direct_cost == 6  # average of 2 samples: 4 and 5 + 3
+    assert abs(move_direct_cost - 6.015) < 1e-5  # average of 4.03 and (5 + 3)
 
     # Test open door then move skeleton
     move_through_door_cost = estimator.get_cost(sample_task,
                                                 move_through_door_skeleton,
                                                 move_through_door_atoms_seq)
-    assert move_through_door_cost == 2
+    assert abs(move_through_door_cost - 2.08) < 1e-5
 
     # Test an impossible skeleton
     impossible_skeleton = [
diff --git a/tests/test_train_refinement_estimator.py b/tests/test_train_refinement_estimator.py
index 070cde6c71..ad85e11fd8 100644
--- a/tests/test_train_refinement_estimator.py
+++ b/tests/test_train_refinement_estimator.py
@@ -53,11 +53,29 @@ def test_train_refinement_estimator():
     # Test successful data generation and training
     temp_log_file = tempfile.NamedTemporaryFile(delete=False).name
     train_sys_argv = [
-        "dummy", "--env", "narrow_passage", "--approach",
-        "refinement_estimation", "--refinement_estimator", "tabular", "--seed",
-        "123", "--num_train_tasks", "1", "--approach_dir", approach_dir,
-        "--data_dir", data_dir, "--refinement_data_file_name", "test.data",
-        "--log_file", temp_log_file
+        "dummy",
+        "--env",
+        "narrow_passage",
+        "--approach",
+        "refinement_estimation",
+        "--refinement_estimator",
+        "tabular",
+        "--seed",
+        "123",
+        "--num_train_tasks",
+        "1",
+        "--approach_dir",
+        approach_dir,
+        "--data_dir",
+        data_dir,
+        "--refinement_data_file_name",
+        "test.data",
+        "--refinement_data_save_every",
+        "1",
+        "--log_file",
+        temp_log_file,
+        "--refinement_train_with_frac_data",
+        "1.1",
     ]
     sys.argv = train_sys_argv
     _train_refinement_estimation_approach()
@@ -114,5 +132,6 @@ def test_train_refinement_estimator():
     sample_option_model = create_option_model("oracle")
     utils.reset_config_with_parser(parser)
     with pytest.raises(PlanningFailure):
-        _collect_refinement_data_for_task(sample_task, sample_option_model,
-                                          set(), set(), set(), 0, [])
+        _collect_refinement_data_for_task(sample_env, sample_task,
+                                          sample_option_model, set(), set(),
+                                          set(), 0, [])

From 2d3849b2533acb8255e1e7b94b0f19a56eeca511 Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <NishanthJKumar@users.noreply.github.com>
Date: Thu, 6 Jul 2023 16:58:44 -0400
Subject: [PATCH 3/5] Implements new variant of cover env for testing active
 sampler learning approaches (#1493)

* i think this works??

* done

* done testing

* revert unnecessary comments

* fix tom comments
---
 predicators/envs/cover.py                     | 23 +++++++++++++++++
 .../ground_truth_models/cover/nsrts.py        | 16 +++++++++---
 .../ground_truth_models/cover/options.py      | 21 +++++++++++-----
 predicators/settings.py                       | 25 +++++++++++++------
 tests/approaches/test_oracle_approach.py      |  5 ++--
 tests/envs/test_cover.py                      |  5 +++-
 tests/explorers/test_online_learning.py       |  2 ++
 7 files changed, 77 insertions(+), 20 deletions(-)

diff --git a/predicators/envs/cover.py b/predicators/envs/cover.py
index b8eae2a805..84a3adef5a 100644
--- a/predicators/envs/cover.py
+++ b/predicators/envs/cover.py
@@ -996,6 +996,29 @@ def _Covers_holds(state: State, objects: Sequence[Object]) -> bool:
                (by - bh == 0)
 
 
+class CoverEnvPlaceHard(CoverEnv):
+    """A cover environment where the only thing that's hard is placing.
+    Specifically, there is only one block and one target, and the default grasp
+    sampler always picks up the block directly in the middle. The robot is
+    allowed to place anywhere, and the default sampler tries placing in a
+    region that's 2x bigger than the target, often missing the target. The only
+    thing that needs to be learned is how to place to correctly cover the
+    target.
+
+    This environment is specifically useful for testing various aspects
+    of different sampler learning approaches.
+    """
+    _allow_free_space_placing: ClassVar[bool] = True
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "cover_place_hard"
+
+    def _get_hand_regions(self, state: State) -> List[Tuple[float, float]]:
+        # Allow placing anywhere!
+        return [(0.0, 1.0)]
+
+
 class BumpyCoverEnv(CoverEnvRegrasp):
     """A variation on the cover regrasp environment where some blocks are
     'bumpy', as indicated by a new feature of blocks.
diff --git a/predicators/ground_truth_models/cover/nsrts.py b/predicators/ground_truth_models/cover/nsrts.py
index f16cd73508..e82bb0967e 100644
--- a/predicators/ground_truth_models/cover/nsrts.py
+++ b/predicators/ground_truth_models/cover/nsrts.py
@@ -19,7 +19,7 @@ def get_env_names(cls) -> Set[str]:
         return {
             "cover", "cover_hierarchical_types", "cover_typed_options",
             "cover_regrasp", "cover_multistep_options", "pybullet_cover",
-            "cover_handempty", "bumpy_cover"
+            "cover_handempty", "bumpy_cover", "cover_place_hard"
         }
 
     @staticmethod
@@ -48,7 +48,7 @@ def get_nsrts(env_name: str, types: Dict[str, Type],
                         "cover_regrasp", "cover_handempty"):
             PickPlace = options["PickPlace"]
         elif env_name in ("cover_typed_options", "cover_multistep_options",
-                          "bumpy_cover"):
+                          "bumpy_cover", "cover_place_hard"):
             Pick, Place = options["Pick"], options["Place"]
 
         nsrts = set()
@@ -77,7 +77,7 @@ def get_nsrts(env_name: str, types: Dict[str, Type],
         elif env_name == "bumpy_cover":
             option = Pick
             option_vars = [block]
-        elif env_name == "cover_typed_options":
+        elif env_name in ("cover_typed_options", "cover_place_hard"):
             option = Pick
             option_vars = [block]
         elif env_name == "cover_multistep_options":
@@ -158,6 +158,8 @@ def pick_sampler(state: State, goal: Set[GroundAtom],
                     ub = float(
                         state.get(b, "pose") + state.get(b, "width") / 2)
                     ub = min(ub, 1.0)
+                elif env_name == ("cover_place_hard"):
+                    return np.array([state.get(b, "pose")], dtype=np.float32)
                 return np.array(rng.uniform(lb, ub, size=(1, )),
                                 dtype=np.float32)
 
@@ -196,9 +198,12 @@ def pick_sampler(state: State, goal: Set[GroundAtom],
         elif env_name == "bumpy_cover":
             option = Place
             option_vars = [block, target]
-        elif env_name == "cover_typed_options":
+        elif env_name in "cover_typed_options":
             option = Place
             option_vars = [target]
+        elif env_name == "cover_place_hard":
+            option = Place
+            option_vars = [block, target]
         elif env_name == "cover_multistep_options":
             option = Place
             option_vars = [block, robot, target]
@@ -271,6 +276,9 @@ def place_sampler(state: State, goal: Set[GroundAtom],
                         center += 3 * state.get(t, "width") / 4
                     lb = center - state.get(t, "width") / 2
                     ub = center + state.get(t, "width") / 2
+                elif env_name == "cover_place_hard":
+                    lb = float(state.get(t, "pose") - state.get(t, "width"))
+                    ub = float(state.get(t, "pose") + state.get(t, "width"))
                 else:
                     lb = float(
                         state.get(t, "pose") - state.get(t, "width") / 10)
diff --git a/predicators/ground_truth_models/cover/options.py b/predicators/ground_truth_models/cover/options.py
index 504e340043..ab6a6dfc70 100644
--- a/predicators/ground_truth_models/cover/options.py
+++ b/predicators/ground_truth_models/cover/options.py
@@ -142,7 +142,7 @@ class CoverTypedOptionsGroundTruthOptionFactory(GroundTruthOptionFactory):
 
     @classmethod
     def get_env_names(cls) -> Set[str]:
-        return {"cover_typed_options"}
+        return {"cover_typed_options", "cover_place_hard"}
 
     @classmethod
     def get_options(cls, env_name: str, types: Dict[str, Type],
@@ -157,25 +157,34 @@ def _Pick_policy(s: State, m: Dict, o: Sequence[Object],
             del m  # unused
             # The pick parameter is a RELATIVE position, so we need to
             # add the pose of the object.
-            pick_pose = s.get(o[0], "pose") + p[0]
-            pick_pose = min(max(pick_pose, 0.0), 1.0)
-            return Action(np.array([pick_pose], dtype=np.float32))
+            if CFG.env == "cover_typed_options":
+                pick_pose = s.get(o[0], "pose") + p[0]
+                pick_pose = min(max(pick_pose, 0.0), 1.0)
+                return Action(np.array([pick_pose], dtype=np.float32))
+            return Action(p)
+
+        lb, ub = (0.0, 1.0)
+        if CFG.env == "cover_typed_options":
+            lb, ub = (-0.1, 0.1)
 
         Pick = utils.SingletonParameterizedOption("Pick",
                                                   _Pick_policy,
                                                   types=[block_type],
                                                   params_space=Box(
-                                                      -0.1, 0.1, (1, )))
+                                                      lb, ub, (1, )))
 
         def _Place_policy(state: State, memory: Dict,
                           objects: Sequence[Object], params: Array) -> Action:
             del state, memory, objects  # unused
             return Action(params)  # action is simply the parameter
 
+        place_types = [block_type, target_type]
+        if CFG.env == "cover_typed_options":
+            place_types = [target_type]
         Place = utils.SingletonParameterizedOption(
             "Place",
             _Place_policy,  # use the parent class's policy
-            types=[target_type],
+            types=place_types,
             params_space=Box(0, 1, (1, )))
 
         return {Pick, Place}
diff --git a/predicators/settings.py b/predicators/settings.py
index 91978e43a9..1be2b8e694 100644
--- a/predicators/settings.py
+++ b/predicators/settings.py
@@ -46,13 +46,6 @@ class GlobalSettings:
     # your call to utils.reset_config().
     render_state_dpi = 150
 
-    # cover env parameters
-    cover_num_blocks = 2
-    cover_num_targets = 2
-    cover_block_widths = [0.1, 0.07]
-    cover_target_widths = [0.05, 0.03]
-    cover_initial_holding_prob = 0.75
-
     # cover_multistep_options env parameters
     cover_multistep_action_limits = [-np.inf, np.inf]
     cover_multistep_degenerate_oracle_samplers = False
@@ -693,6 +686,24 @@ def get_arg_specific_settings(args: Dict[str, Any]) -> Dict[str, Any]:
                     # For the tools environment, keep it much lower.
                     "tools": 1,
                 })[args.get("env", "")],
+
+            # Parameters specific to the cover environment.
+            # cover env parameters
+            cover_num_blocks=defaultdict(lambda: 2, {
+                "cover_place_hard": 1,
+            })[args.get("env", "")],
+            cover_num_targets=defaultdict(lambda: 2, {
+                "cover_place_hard": 1,
+            })[args.get("env", "")],
+            cover_block_widths=defaultdict(lambda: [0.1, 0.07], {
+                "cover_place_hard": [0.1],
+            })[args.get("env", "")],
+            cover_target_widths=defaultdict(lambda: [0.05, 0.03], {
+                "cover_place_hard": [0.05],
+            })[args.get("env", "")],
+            cover_initial_holding_prob=defaultdict(lambda: 0.75, {
+                "cover_place_hard": 0.0,
+            })[args.get("env", "")],
         )
 
 
diff --git a/tests/approaches/test_oracle_approach.py b/tests/approaches/test_oracle_approach.py
index c1e85972ac..dccbbfcf69 100644
--- a/tests/approaches/test_oracle_approach.py
+++ b/tests/approaches/test_oracle_approach.py
@@ -15,8 +15,8 @@
     ClutteredTablePlaceEnv
 from predicators.envs.coffee import CoffeeEnv
 from predicators.envs.cover import BumpyCoverEnv, CoverEnv, \
-    CoverEnvHierarchicalTypes, CoverEnvRegrasp, CoverEnvTypedOptions, \
-    CoverMultistepOptions, RegionalBumpyCoverEnv
+    CoverEnvHierarchicalTypes, CoverEnvPlaceHard, CoverEnvRegrasp, \
+    CoverEnvTypedOptions, CoverMultistepOptions, RegionalBumpyCoverEnv
 from predicators.envs.doors import DoorsEnv
 from predicators.envs.exit_garage import ExitGarageEnv
 from predicators.envs.narrow_passage import NarrowPassageEnv
@@ -46,6 +46,7 @@
 
 ENV_NAME_AND_CLS = [
     ("cover", CoverEnv), ("cover_typed_options", CoverEnvTypedOptions),
+    ("cover_place_hard", CoverEnvPlaceHard),
     ("cover_hierarchical_types", CoverEnvHierarchicalTypes),
     ("cover_regrasp", CoverEnvRegrasp), ("bumpy_cover", BumpyCoverEnv),
     ("cover_multistep_options", CoverMultistepOptions),
diff --git a/tests/envs/test_cover.py b/tests/envs/test_cover.py
index ac3cdb7e55..076a3b79e4 100644
--- a/tests/envs/test_cover.py
+++ b/tests/envs/test_cover.py
@@ -125,7 +125,10 @@ def test_cover(env_name):
 
 def test_cover_typed_options():
     """Tests for CoverEnvTypedOptions class."""
-    utils.reset_config({"env": "cover", "cover_initial_holding_prob": 0.0})
+    utils.reset_config({
+        "env": "cover_typed_options",
+        "cover_initial_holding_prob": 0.0
+    })
     env = CoverEnvTypedOptions()
     for task in env.get_train_tasks():
         for obj in task.init:
diff --git a/tests/explorers/test_online_learning.py b/tests/explorers/test_online_learning.py
index 76bf30f298..fb24d3a13d 100644
--- a/tests/explorers/test_online_learning.py
+++ b/tests/explorers/test_online_learning.py
@@ -121,6 +121,7 @@ def test_interaction():
         "load_data": True,
         "make_interaction_videos": False,
     })
+    env = create_new_env("cover")
     # Invalid query type.
     with pytest.raises(AssertionError) as e:
         _run_pipeline(env, cogman, train_tasks, dataset)
@@ -189,4 +190,5 @@ def test_interaction():
         "make_interaction_videos": True,
         "max_num_steps_interaction_request": 3,
     })
+    env = create_new_env("cover")
     _run_pipeline(env, cogman, train_tasks, dataset)

From 1bdea20b165f09810f97a60a64256a09ac9b5a90 Mon Sep 17 00:00:00 2001
From: Tom Silver <tomsilver@users.noreply.github.com>
Date: Mon, 17 Jul 2023 17:15:13 -0400
Subject: [PATCH 4/5] active sampler explorer: consider multiple goals in case
 one is not reachable (#1494)

---
 .../explorers/active_sampler_explorer.py      | 66 ++++++++++++-------
 .../explorers/test_active_sampler_explorer.py |  2 +-
 2 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py
index 91d8c61382..e79035dfca 100644
--- a/predicators/explorers/active_sampler_explorer.py
+++ b/predicators/explorers/active_sampler_explorer.py
@@ -1,14 +1,14 @@
 """An explorer for active sampler learning."""
 
 import logging
-from typing import Callable, Dict, List, Optional, Set
+from typing import Callable, Dict, Iterator, List, Optional, Set
 
 import numpy as np
 from gym.spaces import Box
 
 from predicators import utils
 from predicators.explorers.base_explorer import BaseExplorer
-from predicators.planning import run_task_plan_once
+from predicators.planning import PlanningFailure, run_task_plan_once
 from predicators.settings import CFG
 from predicators.structs import NSRT, ExplorationStrategy, GroundAtom, \
     NSRTSampler, ParameterizedOption, Predicate, State, Task, Type, \
@@ -108,25 +108,47 @@ def _option_policy(state: State) -> _Option:
             if current_policy is None:
                 # If the assigned goal hasn't yet been reached, try for it.
                 if not assigned_task_goal_reached:
-                    goal = assigned_task.goal
-                    logging.info(
-                        f"[Explorer] Pursuing assigned task goal: {goal}")
+                    logging.info("[Explorer] Pursuing assigned task goal")
+
+                    def generate_goals() -> Iterator[Set[GroundAtom]]:
+                        # Just a single goal.
+                        yield assigned_task.goal
+
                 # Otherwise, practice.
                 else:
-                    # If there are no ground NSRTs that we've tried so far,
-                    # just wait until we have tried to solve some task.
-                    if len(self._ground_op_hist) == 0:
-                        raise utils.OptionExecutionFailure(
-                            "No ground operators to practice yet")
-                    next_practice_nsrt = self._get_practice_ground_nsrt()
-                    logging.info("[Explorer] Pursuing NRST preconditions "
-                                 f"{next_practice_nsrt.name}"
-                                 f"{next_practice_nsrt.objects}")
-                    goal = next_practice_nsrt.preconditions
-                task = Task(state, goal)
-                logging.info(f"[Explorer] Replanning to {task.goal}")
-                current_policy = self._get_option_policy_for_task(task)
-
+                    logging.info("[Explorer] Pursuing NSRT preconditions")
+
+                    def generate_goals() -> Iterator[Set[GroundAtom]]:
+                        nonlocal next_practice_nsrt
+                        # Generate goals sorted by their descending score.
+                        for op in sorted(self._ground_op_hist,
+                                         key=self._score_ground_op,
+                                         reverse=True):
+                            nsrt = [
+                                n for n in self._nsrts if n.op == op.parent
+                            ][0]
+                            # NOTE: setting nonlocal variable.
+                            next_practice_nsrt = nsrt.ground(op.objects)
+                            yield next_practice_nsrt.preconditions
+
+                # Try to plan to each goal until a task plan is found.
+                for goal in generate_goals():
+                    task = Task(state, goal)
+                    logging.info(f"[Explorer] Replanning to {task.goal}")
+                    try:
+                        current_policy = self._get_option_policy_for_task(task)
+                    # Not covering this case because the intention of this
+                    # explorer is to be used in environments where any goal can
+                    # be reached from anywhere, but we still don't want to
+                    # crash in case that assumption is not met.
+                    except PlanningFailure:  # pragma: no cover
+                        continue
+                    logging.info("[Explorer] Plan found.")
+                    break
+                # Terminate early if no goal could be found.
+                else:
+                    logging.info("[Explorer] No reachable goal found.")
+                    raise utils.RequestActPolicyFailure("Failed to find goal.")
             # Query the current policy.
             assert current_policy is not None
             try:
@@ -179,12 +201,6 @@ def _update_ground_op_hist(self, state: State) -> None:
             self._ground_op_hist[last_executed_op] = []
         self._ground_op_hist[last_executed_op].append(success)
 
-    def _get_practice_ground_nsrt(self) -> _GroundNSRT:
-        best_op = max(self._ground_op_hist, key=self._score_ground_op)
-        logging.info(f"[Explorer] Practicing {best_op.name}{best_op.objects}")
-        nsrt = [n for n in self._nsrts if n.op == best_op.parent][0]
-        return nsrt.ground(best_op.objects)
-
     def _get_option_policy_for_task(self,
                                     task: Task) -> Callable[[State], _Option]:
         # Run task planning and then greedily execute.
diff --git a/tests/explorers/test_active_sampler_explorer.py b/tests/explorers/test_active_sampler_explorer.py
index b50133e901..5a2b1647b9 100644
--- a/tests/explorers/test_active_sampler_explorer.py
+++ b/tests/explorers/test_active_sampler_explorer.py
@@ -74,7 +74,7 @@ def test_active_sampler_explorer():
         nsrt_to_explorer_sampler=nsrt_to_explorer_sampler)
     task_idx = 0
     policy, _ = explorer.get_exploration_strategy(task_idx, 500)
-    with pytest.raises(utils.OptionExecutionFailure):
+    with pytest.raises(utils.RequestActPolicyFailure):
         policy(state)
 
     # Test that the PickFromBumpy operator is tried more than the others when

From 2d943925ecbd076cc9d42b4475e2541dba88d421 Mon Sep 17 00:00:00 2001
From: Tom Silver <tsilver@theaiinstitute.com>
Date: Mon, 17 Jul 2023 17:26:49 -0400
Subject: [PATCH 5/5] yapf

---
 tests/approaches/test_oracle_approach.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/approaches/test_oracle_approach.py b/tests/approaches/test_oracle_approach.py
index e60cb20882..d7b4504fe3 100644
--- a/tests/approaches/test_oracle_approach.py
+++ b/tests/approaches/test_oracle_approach.py
@@ -45,7 +45,8 @@
 _PDDL_ENV_MODULE_PATH = predicators.envs.pddl_env.__name__
 
 ENV_NAME_AND_CLS = [
-    ("cover", CoverEnv), ("cover_typed_options", CoverEnvTypedOptions),
+    ("cover", CoverEnv),
+    ("cover_typed_options", CoverEnvTypedOptions),
     ("cover_place_hard", CoverEnvPlaceHard),
     ("cover_hierarchical_types", CoverEnvHierarchicalTypes),
     ("cover_regrasp", CoverEnvRegrasp),