diff --git a/.github/workflows/pre_commit.yml b/.github/workflows/pre_commit.yml
new file mode 100644
index 0000000..84e6a1f
--- /dev/null
+++ b/.github/workflows/pre_commit.yml
@@ -0,0 +1,19 @@
+name: Pre-Commit
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v3
+      with:
+        python-version: 3.9.16
+    - uses: pre-commit/action@v3.0.0
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..6beece6
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,30 @@
+# Copyright [2023] Boston Dynamics AI Institute, Inc.
+
+name: ZSOS - Main Build
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.9.16']
+        os: [ubuntu-latest]
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install package
+      run: |
+        sudo apt-get install -y libgl1-mesa-dev
+        pip install -e .[dev]
+    - name: Pytest
+      run: |
+        pytest test
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6e3d90d..8205803 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -26,7 +26,7 @@ repos:
     rev: 23.3.0
     hooks:
     -   id: black
-        language_version: python3.10
+        language_version: python3.9
         args: ['--config', 'pyproject.toml']
         exclude: 'dreamerv3/.*|grpc_infra/.*'
         verbose: true
diff --git a/pyproject.toml b/pyproject.toml
index f09ebd8..ea120e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,13 +9,18 @@ name = "zsos"
 version = "0.1"
 description = "Zero shot object search"
 authors = [
-    {name = "Naoki Yokoyama", email = "naokiyokoyama@github"},
+    {name = "Naoki Yokoyama", email = "nyokoyama@theaiinstitute.com"},
 ]
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.9"
 dependencies = [
     "torch >= 1.13.1",
-    # "habitat @ git+https://github.com/facebookresearch/habitat-sim.git",
+    "habitat-sim @ git+https://github.com/facebookresearch/habitat-sim.git",
+    "habitat-baselines >= 0.2.4",
+    "habitat-lab",
+    "frontier_exploration @ git+https://github.com/naokiyokoyama/frontier_exploration.git",
+    "transformers == 4.28.0",  # higher versions break BLIP-2
+    "flask >= 2.3.2"
 ]
 
 [project.optional-dependencies]
@@ -29,6 +34,9 @@ dev = [
 "Homepage" = "theaiinstitute.com"
 "GitHub" = "https://github.com/bdaiinstitute/llm-object-search"
 
+[tool.setuptools]
+packages = ["zsos", "config"]
+
 [tool.ruff]
 # Enable pycodestyle (`E`), Pyflakes (`F`), and import sorting (`I`)
 select = ["E", "F", "I"]
@@ -69,8 +77,8 @@ line-length = 120
 # Allow unused variables when underscore-prefixed.
 dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 
-# Assume Python 3.10.
-target-version = "py310"
+# Assume Python 3.9.
+target-version = "py39"
 
 [tool.ruff.per-file-ignores]
 "__init__.py" = ["F401"]
@@ -81,7 +89,7 @@ max-complexity = 10
 
 [tool.black]
 line-length = 88
-target-version = ['py310']
+target-version = ['py39']
 include = '\.pyi?$'
 # `extend-exclude` is not honored when `black` is passed a file path explicitly,
 # as is typical when `black` is invoked via `pre-commit`.
@@ -95,7 +103,7 @@ preview = true
 
 # mypy configuration
 [tool.mypy]
-python_version = "3.10"
+python_version = "3.9"
 disallow_untyped_defs = true
 ignore_missing_imports = true
 explicit_package_bases = true
diff --git a/scripts/eval_llm_policy.sh b/scripts/eval_llm_policy.sh
index 4d790f5..d8ca789 100644
--- a/scripts/eval_llm_policy.sh
+++ b/scripts/eval_llm_policy.sh
@@ -2,8 +2,6 @@
 # Copyright [2023] Boston Dynamics AI Institute, Inc.
 
 python -um zsos.run \
-  --config-name=experiments/llm_objectnav_hm3d.yaml \
-  --config-path ../config \
   habitat_baselines.evaluate=True \
   habitat_baselines.eval_ckpt_path_dir=dummy_policy.pth \
   habitat_baselines.load_resume_state_config=False \
diff --git a/test/test_setup.py b/test/test_setup.py
new file mode 100644
index 0000000..0880664
--- /dev/null
+++ b/test/test_setup.py
@@ -0,0 +1,28 @@
+import os
+
+import torch
+from habitat_baselines.common.baseline_registry import baseline_registry  # noqa
+
+from zsos import get_config
+
+
+def test_load_and_save_config():
+    if not os.path.exists("build"):
+        os.makedirs("build")
+
+    # Save a dummy state_dict using torch.save
+    config = get_config("config/experiments/llm_objectnav_hm3d.yaml")
+    dummy_dict = {
+        "config": config,
+        "extra_state": {"step": 0},
+        "state_dict": {},
+    }
+
+    filename = "build/dummy_policy.pth"
+    torch.save(dummy_dict, filename)
+
+    # Get the file size of the output PDF
+    file_size = os.path.getsize(filename)
+
+    # Check the size is greater than 30 KB
+    assert file_size > 30 * 1024, "Test failed - failed to create pth"
diff --git a/test/test_visualization.py b/test/test_visualization.py
new file mode 100644
index 0000000..3022022
--- /dev/null
+++ b/test/test_visualization.py
@@ -0,0 +1,25 @@
+import os
+
+import cv2
+
+from zsos.utils.visualization import generate_text_image
+
+
+def test_visualization():
+    if not os.path.exists("build"):
+        os.makedirs("build")
+
+    width = 400
+    text = (
+        "This is a long text that needs to be drawn on an image with a specified "
+        "width. The text should wrap around if it exceeds the given width."
+    )
+
+    result_image = generate_text_image(width, text)
+
+    # Save the image to a file
+    output_filename = "build/output_image.png"
+    cv2.imwrite(output_filename, result_image)
+
+    # Assert that the file exists
+    assert os.path.exists(output_filename), "Output image file not found!"
diff --git a/zsos/__init__.py b/zsos/__init__.py
index c3b505a..93b53e1 100644
--- a/zsos/__init__.py
+++ b/zsos/__init__.py
@@ -1,3 +1,4 @@
 import frontier_exploration
+from habitat import get_config
+
 import zsos.obs_transformers.resize
-from zsos.policy import base_policy, llm_policy
diff --git a/zsos/policy/llm_policy.py b/zsos/policy/llm_policy.py
index d198855..4c14641 100644
--- a/zsos/policy/llm_policy.py
+++ b/zsos/policy/llm_policy.py
@@ -3,13 +3,13 @@
 
 import numpy as np
 import torch
+from frontier_exploration.policy import FrontierExplorationPolicy
 from habitat.tasks.nav.object_nav_task import ObjectGoalSensor
 from habitat_baselines.common.baseline_registry import baseline_registry
 from habitat_baselines.common.tensor_dict import TensorDict
 from habitat_baselines.rl.ppo.policy import PolicyActionData
 from torch import Tensor
 
-from frontier_exploration.policy import FrontierExplorationPolicy
 from zsos.llm.llm import BaseLLM, ClientFastChat
 from zsos.mapping.object_map import ObjectMap
 from zsos.obs_transformers.resize import image_resize
diff --git a/zsos/run.py b/zsos/run.py
index 3d89508..c919e3c 100644
--- a/zsos/run.py
+++ b/zsos/run.py
@@ -4,11 +4,13 @@
 from habitat_baselines.run import execute_exp
 from omegaconf import DictConfig
 
+from zsos.policy import base_policy, llm_policy  # noqa: F401
+
 
 @hydra.main(
     version_base=None,
-    config_path="../habitat-lab/habitat-baselines/habitat_baselines/config",
-    config_name="pointnav/ppo_pointnav_example",
+    config_path="../config",
+    config_name="experiments/llm_objectnav_hm3d",
 )
 def main(cfg: DictConfig):
     cfg = patch_config(cfg)
diff --git a/zsos/vlm/blip2.py b/zsos/vlm/blip2.py
index 61c00af..2624466 100644
--- a/zsos/vlm/blip2.py
+++ b/zsos/vlm/blip2.py
@@ -26,20 +26,27 @@ def __init__(
         )
         self.device = device
 
-    def ask(self, image, prompt=None):
+    def ask(self, image, prompt=None) -> str:
+        """Generates a caption for the given image.
+
+        Args:
+            image (numpy.ndarray): The input image as a numpy array.
+            prompt (str, optional): An optional prompt to provide context and guide
+                the caption generation. Can be used to ask questions about the image.
+
+        Returns:
+            dict: The generated caption.
+
+        """
         pil_img = Image.fromarray(image)
         processed_image = (
             self.vis_processors["eval"](pil_img).unsqueeze(0).to(self.device)
         )
 
-        import time
-
-        st = time.time()
         if prompt is None or prompt == "":
-            out = self.model.generate({"image": processed_image})
+            out = self.model.generate({"image": processed_image})[0]
         else:
-            out = self.model.generate({"image": processed_image, "prompt": prompt})
-        print(f"Time taken: {time.time() - st:.2f}s")
+            out = self.model.generate({"image": processed_image, "prompt": prompt})[0]
 
         return out
 
@@ -68,7 +75,7 @@ def ask(self, image: np.ndarray, prompt: Optional[str] = None) -> str:
     class BLIP2Server(ServerMixin, BLIP2):
         def process_payload(self, payload: dict) -> dict:
             image = str_to_image(payload["image"])
-            return {"response": self.ask(image, payload.get("prompt"))[0]}
+            return {"response": self.ask(image, payload.get("prompt"))}
 
     # blip = BLIP2Server(name="blip2_opt", model_type="pretrain_opt2.7b")
     blip = BLIP2Server(name="blip2_t5", model_type="pretrain_flant5xl")
diff --git a/zsos/vlm/grounding_dino.py b/zsos/vlm/grounding_dino.py
index 5d647bf..ca7438b 100644
--- a/zsos/vlm/grounding_dino.py
+++ b/zsos/vlm/grounding_dino.py
@@ -3,8 +3,8 @@
 import numpy as np
 import torch
 import torchvision.transforms.functional as F
-
 from groundingdino.util.inference import load_model, predict
+
 from zsos.vlm.detections import ObjectDetections
 
 from .server_wrapper import ServerMixin, host_model, send_request, str_to_image