From a8a6e85f940f2a3b45c97e99625b48461721cdb0 Mon Sep 17 00:00:00 2001 From: Katrina Ashton <146023091+kashton-bdai@users.noreply.github.com> Date: Mon, 16 Oct 2023 12:17:54 -0400 Subject: [PATCH] Fixes for things that go wrong when running (#15) Caption that doesn't get updated if non-default is used and check that was only checking if something was None instead of also if it was False. Also fix mypy pre-commit issue. --- vlfm/policy/base_objectnav_policy.py | 2 +- vlfm/vlm/grounding_dino.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vlfm/policy/base_objectnav_policy.py b/vlfm/policy/base_objectnav_policy.py index 0ac3783..abc73c8 100644 --- a/vlfm/policy/base_objectnav_policy.py +++ b/vlfm/policy/base_objectnav_policy.py @@ -350,7 +350,7 @@ def _update_object_map( # If we are using vqa, then use the BLIP2 model to visually confirm whether # the contours are actually correct. - if self._use_vqa is not None: + if (self._use_vqa is not None) and self._use_vqa: contours, _ = cv2.findContours( object_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE ) diff --git a/vlfm/vlm/grounding_dino.py b/vlfm/vlm/grounding_dino.py index e2aa92d..25808e4 100644 --- a/vlfm/vlm/grounding_dino.py +++ b/vlfm/vlm/grounding_dino.py @@ -39,9 +39,7 @@ def __init__( self.box_threshold = box_threshold self.text_threshold = text_threshold - def predict( - self, image: np.ndarray, caption: Optional[str] = "" - ) -> ObjectDetections: + def predict(self, image: np.ndarray, caption: str = "") -> ObjectDetections: """ This function makes predictions on an input image tensor or numpy array using a pretrained model. @@ -62,6 +60,8 @@ def predict( ) if caption == "": caption_to_use = self.caption + else: + caption_to_use = caption print("Caption:", caption_to_use) with torch.inference_mode(): boxes, logits, phrases = predict(