jmarshrossney · jmarshrossney · Aug 22, 2024 · Aug 22, 2024 · Aug 22, 2024 · Aug 22, 2024
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,5 @@
 **/__pycache__/
 vectors/
 *.ipynb
+*.egg-info/
+venv/
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/README.md b/README.md
@@ -6,30 +6,50 @@ It's a companion project to an R-shiny based image annotation app that is not ye
 
 ## Installation
 
-### Python environment setup
+### Environment and package installation
 
-Use anaconda or miniconda to create a python environment using the included `environment.yml`
+#### Using pip
+
+Create a fresh virtual environment in the repository root using Python >=3.12 and (e.g.) `venv`: 
 
 ```
-conda env create -f environment.yml
+python -m venv venv
 ```
 
-Please note that this is specifically pinned to python 3.9 due to dependency versions; we make experimental use of the [CEFAS plankton model available through SciVision](https://sci.vision/#/model/resnet50-plankton), which in turn uses an older version of pytorch that isn't packaged above python 3.9.
+Next, install the package using `pip`:
 
-### Object store connection
+```
+python -m pip install .
+```
 
-`.env` contains environment variable names for S3 connection details for the [JASMIN object store](https://github.com/NERC-CEH/object_store_tutorial/). Fill these in with your own credentials. If you're not sure what the `ENDPOINT` should be, please reach out to one of the project contributors listed below. 
+Most likely you are interested in developing and/or experimenting, so you will probably want to install the package in 'editable' mode (`-e`), along with dev tools and jupyter notebook functionality
 
+```
+python -m pip install -e .[all]
+```
 
-### Package installation
+#### Using conda
 
-Get started by cloning this repository and running
+Use anaconda or miniconda to create a python environment using the included `environment.yml`
 
-`pip install -e .`
+```
+conda env create -f environment.yml
+conda activate cyto_ml
+```
+
+Next install this package _without dependencies_:
+
+```
+python -m pip install --no-deps -e .
+```
+
+### Object store connection
+
+`.env` contains environment variable names for S3 connection details for the [JASMIN object store](https://github.com/NERC-CEH/object_store_tutorial/). Fill these in with your own credentials. If you're not sure what the `ENDPOINT` should be, please reach out to one of the project contributors listed below. 
 
 ### Running tests
 
-`python -m pytest` or `py.test`
+`pytest` or `py.test`
 
 ## Contents
 

diff --git a/cyto_ml/models/scivision.py b/cyto_ml/models/scivision.py
diff --git a/environment.yml b/environment.yml
@@ -1,22 +1,25 @@
-name: cyto_39
+name: cyto_ml
 channels:
   - pytorch
   - conda-forge
-  - defaults
+channel_priority: flexible
 dependencies:
-  - python=3.9
-  - pytorch=1.10.0
-  - mkl=2024.0
-  - chromadb=0.5.3
+  - python=3.12
+  - pytorch
+  - black
+  - chromadb
+  - flake8
   - intake-xarray
-  - scikit-image
+  - intake=0.7
+  - isort
+  - jupyterlab
+  - jupytext
   - pandas
   - pytest
   - python-dotenv
   - s3fs
-  - jupyterlab
-  - jupytext
+  - scikit-image
+  - xarray
   - pip
   - pip:
-    - scivision
-    - git+https://github.com/alan-turing-institute/plankton-cefas-scivision@main
+    - git+https://github.com/jmarshrossney/resnet50-cefas
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,12 +1,39 @@
+[build-system]
+requires = ["setuptools >= 61.0"]
+build-backend = "setuptools.build_meta"
+
 [project]
 name = "cyto_ml"
-version = "0.1"
+version = "0.2.0"
+requires-python = ">=3.12"
 description = "This package supports the processing and analysis of plankton sample data"
 readme = "README.md"
-requires-python = "==3.9.*"
+dependencies = [
+    "chromadb",
+    "intake==0.7.0",
+    "intake-xarray",
+    "pandas",
+    "python-dotenv",
+    "s3fs",
+    "scikit-image", # secretly required by intake-xarray as default reader
+    "torch",
+    "xarray",
+    "resnet50-cefas@git+https://github.com/jmarshrossney/resnet50-cefas",
+]
 
-[tool.setuptools]
-py-modules = []
+[project.optional-dependencies]
+jupyter = ["jupyterlab", "jupytext"]
+dev = ["pytest", "black", "flake8", "isort"]
+all = ["cyto_ml[jupyter,dev]"]
 
 [tool.jupytext]
 formats = "ipynb,md"
+
+[tool.pytest.ini_options]
+filterwarnings = [
+    "ignore::DeprecationWarning",
+]
+
+[tool.black]
+target-version = ["py312"]
+line-length = 88
diff --git a/scripts/intake_metadata.py b/scripts/intake_metadata.py
@@ -7,6 +7,7 @@
 Via https://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/intake.html#Build-an-intake-catalog
 
 """
+
 import os
 from cyto_ml.data.intake import intake_yaml
 from cyto_ml.data.s3 import s3_endpoint, image_index

diff --git a/cyto_ml/__init__.py → src/cyto_ml/__init__.py b/cyto_ml/__init__.py → src/cyto_ml/__init__.py
diff --git a/cyto_ml/data/__init__.py → src/cyto_ml/data/__init__.py b/cyto_ml/data/__init__.py → src/cyto_ml/data/__init__.py
diff --git a/cyto_ml/data/intake.py → src/cyto_ml/data/intake.py b/cyto_ml/data/intake.py → src/cyto_ml/data/intake.py
diff --git a/cyto_ml/data/s3.py → src/cyto_ml/data/s3.py b/cyto_ml/data/s3.py → src/cyto_ml/data/s3.py
diff --git a/cyto_ml/data/vectorstore.py → src/cyto_ml/data/vectorstore.py b/cyto_ml/data/vectorstore.py → src/cyto_ml/data/vectorstore.py
@@ -9,7 +9,7 @@
 
 logging.basicConfig(level=logging.INFO)
 # TODO make this sensibly configurable, not confusingly hardcoded
-STORE = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../vectors")
+STORE = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../../vectors")
 
 client = chromadb.PersistentClient(
     path=STORE,

diff --git a/cyto_ml/models/__init__.py → src/cyto_ml/models/__init__.py b/cyto_ml/models/__init__.py → src/cyto_ml/models/__init__.py
diff --git a/src/cyto_ml/models/scivision.py b/src/cyto_ml/models/scivision.py
@@ -0,0 +1,35 @@
+import torch
+from torchvision.transforms.v2.functional import to_image, to_dtype
+from xarray import DataArray
+
+def prepare_image(image: DataArray):
+    """
+    Take an xarray of image data and prepare it to pass through the model
+    a) Converts the image data to a PyTorch tensor
+    b) Accepts a single image or batch (no need for torch.stack)
+    """
+    # Computes the DataArray and returns a numpy array
+    image_numpy = image.to_numpy()
+
+    # Convert the image data to a PyTorch tensor
+    tensor_image = to_dtype(
+        to_image(image_numpy),  # permutes HWC -> CHW
+        torch.float32,
+        scale=True,  # rescales [0, 255] -> [0, 1]
+    )
+    assert torch.all((tensor_image >= 0.0) & (tensor_image <= 1.0))
+
+    if tensor_image.dim() == 3:
+        # Single image, add a batch dimension
+        tensor_image = tensor_image.unsqueeze(0)
+
+    assert tensor_image.dim() == 4
+
+    return tensor_image
+
+
+def flat_embeddings(features: torch.Tensor):
+    """Utility function that takes the features returned by the model in truncate_model
+    And flattens them into a list suitable for storing in a vector database"""
+    # TODO: this only returns the 0th tensor in the batch...why?
+    return features[0].detach().tolist()
diff --git a/cyto_ml/tests/conftest.py → tests/conftest.py b/cyto_ml/tests/conftest.py → tests/conftest.py
@@ -1,10 +1,7 @@
 import os
 import pytest
-from cyto_ml.models.scivision import (
-    load_model,
-    truncate_model,
-    SCIVISION_URL,
-)
+
+from resnet50_cefas import load_model
 
 
 @pytest.fixture
@@ -30,7 +27,7 @@ def image_batch(image_dir):
 
 @pytest.fixture
 def scivision_model():
-    return truncate_model(load_model(SCIVISION_URL))
+    return load_model(strip_final_layer=True)
 
 
 @pytest.fixture

diff --git a/...ures/test_images/testymctestface_1091.tif → ...ures/test_images/testymctestface_1091.tif b/...ures/test_images/testymctestface_1091.tif → ...ures/test_images/testymctestface_1091.tif
diff --git a/...tures/test_images/testymctestface_113.tif → ...tures/test_images/testymctestface_113.tif b/...tures/test_images/testymctestface_113.tif → ...tures/test_images/testymctestface_113.tif
diff --git a/...tures/test_images/testymctestface_127.tif → ...tures/test_images/testymctestface_127.tif b/...tures/test_images/testymctestface_127.tif → ...tures/test_images/testymctestface_127.tif
diff --git a/...tures/test_images/testymctestface_133.tif → ...tures/test_images/testymctestface_133.tif b/...tures/test_images/testymctestface_133.tif → ...tures/test_images/testymctestface_133.tif
diff --git a/...ures/test_images/testymctestface_1388.tif → ...ures/test_images/testymctestface_1388.tif b/...ures/test_images/testymctestface_1388.tif → ...ures/test_images/testymctestface_1388.tif
diff --git a/...ures/test_images/testymctestface_1407.tif → ...ures/test_images/testymctestface_1407.tif b/...ures/test_images/testymctestface_1407.tif → ...ures/test_images/testymctestface_1407.tif
diff --git a/...ures/test_images/testymctestface_1830.tif → ...ures/test_images/testymctestface_1830.tif b/...ures/test_images/testymctestface_1830.tif → ...ures/test_images/testymctestface_1830.tif
diff --git a/...ures/test_images/testymctestface_1876.tif → ...ures/test_images/testymctestface_1876.tif b/...ures/test_images/testymctestface_1876.tif → ...ures/test_images/testymctestface_1876.tif
diff --git a/...tures/test_images/testymctestface_188.tif → ...tures/test_images/testymctestface_188.tif b/...tures/test_images/testymctestface_188.tif → ...tures/test_images/testymctestface_188.tif
diff --git a/...ures/test_images/testymctestface_1887.tif → ...ures/test_images/testymctestface_1887.tif b/...ures/test_images/testymctestface_1887.tif → ...ures/test_images/testymctestface_1887.tif
diff --git a/...ures/test_images/testymctestface_1890.tif → ...ures/test_images/testymctestface_1890.tif b/...ures/test_images/testymctestface_1890.tif → ...ures/test_images/testymctestface_1890.tif
diff --git a/...ures/test_images/testymctestface_1892.tif → ...ures/test_images/testymctestface_1892.tif b/...ures/test_images/testymctestface_1892.tif → ...ures/test_images/testymctestface_1892.tif
diff --git a/...ures/test_images/testymctestface_1901.tif → ...ures/test_images/testymctestface_1901.tif b/...ures/test_images/testymctestface_1901.tif → ...ures/test_images/testymctestface_1901.tif
diff --git a/...ures/test_images/testymctestface_1909.tif → ...ures/test_images/testymctestface_1909.tif b/...ures/test_images/testymctestface_1909.tif → ...ures/test_images/testymctestface_1909.tif
diff --git a/...ures/test_images/testymctestface_1912.tif → ...ures/test_images/testymctestface_1912.tif b/...ures/test_images/testymctestface_1912.tif → ...ures/test_images/testymctestface_1912.tif
diff --git a/...ures/test_images/testymctestface_1914.tif → ...ures/test_images/testymctestface_1914.tif b/...ures/test_images/testymctestface_1914.tif → ...ures/test_images/testymctestface_1914.tif
diff --git a/...ures/test_images/testymctestface_1915.tif → ...ures/test_images/testymctestface_1915.tif b/...ures/test_images/testymctestface_1915.tif → ...ures/test_images/testymctestface_1915.tif
diff --git a/...ures/test_images/testymctestface_1919.tif → ...ures/test_images/testymctestface_1919.tif b/...ures/test_images/testymctestface_1919.tif → ...ures/test_images/testymctestface_1919.tif
diff --git a/...ures/test_images/testymctestface_1922.tif → ...ures/test_images/testymctestface_1922.tif b/...ures/test_images/testymctestface_1922.tif → ...ures/test_images/testymctestface_1922.tif
diff --git a/...ures/test_images/testymctestface_1924.tif → ...ures/test_images/testymctestface_1924.tif b/...ures/test_images/testymctestface_1924.tif → ...ures/test_images/testymctestface_1924.tif
diff --git a/...ures/test_images/testymctestface_1948.tif → ...ures/test_images/testymctestface_1948.tif b/...ures/test_images/testymctestface_1948.tif → ...ures/test_images/testymctestface_1948.tif
diff --git a/...ures/test_images/testymctestface_1953.tif → ...ures/test_images/testymctestface_1953.tif b/...ures/test_images/testymctestface_1953.tif → ...ures/test_images/testymctestface_1953.tif
diff --git a/...ures/test_images/testymctestface_1962.tif → ...ures/test_images/testymctestface_1962.tif b/...ures/test_images/testymctestface_1962.tif → ...ures/test_images/testymctestface_1962.tif
diff --git a/...ures/test_images/testymctestface_1965.tif → ...ures/test_images/testymctestface_1965.tif b/...ures/test_images/testymctestface_1965.tif → ...ures/test_images/testymctestface_1965.tif
diff --git a/...ures/test_images/testymctestface_1981.tif → ...ures/test_images/testymctestface_1981.tif b/...ures/test_images/testymctestface_1981.tif → ...ures/test_images/testymctestface_1981.tif
diff --git a/...ures/test_images/testymctestface_2012.tif → ...ures/test_images/testymctestface_2012.tif b/...ures/test_images/testymctestface_2012.tif → ...ures/test_images/testymctestface_2012.tif
diff --git a/...ures/test_images/testymctestface_2071.tif → ...ures/test_images/testymctestface_2071.tif b/...ures/test_images/testymctestface_2071.tif → ...ures/test_images/testymctestface_2071.tif
diff --git a/...ures/test_images/testymctestface_2102.tif → ...ures/test_images/testymctestface_2102.tif b/...ures/test_images/testymctestface_2102.tif → ...ures/test_images/testymctestface_2102.tif
diff --git a/...ures/test_images/testymctestface_2108.tif → ...ures/test_images/testymctestface_2108.tif b/...ures/test_images/testymctestface_2108.tif → ...ures/test_images/testymctestface_2108.tif
diff --git a/...ures/test_images/testymctestface_2110.tif → ...ures/test_images/testymctestface_2110.tif b/...ures/test_images/testymctestface_2110.tif → ...ures/test_images/testymctestface_2110.tif
diff --git a/...ures/test_images/testymctestface_2115.tif → ...ures/test_images/testymctestface_2115.tif b/...ures/test_images/testymctestface_2115.tif → ...ures/test_images/testymctestface_2115.tif
diff --git a/...ures/test_images/testymctestface_2117.tif → ...ures/test_images/testymctestface_2117.tif b/...ures/test_images/testymctestface_2117.tif → ...ures/test_images/testymctestface_2117.tif
diff --git a/...ures/test_images/testymctestface_2119.tif → ...ures/test_images/testymctestface_2119.tif b/...ures/test_images/testymctestface_2119.tif → ...ures/test_images/testymctestface_2119.tif
diff --git a/...ures/test_images/testymctestface_2172.tif → ...ures/test_images/testymctestface_2172.tif b/...ures/test_images/testymctestface_2172.tif → ...ures/test_images/testymctestface_2172.tif
diff --git a/...ures/test_images/testymctestface_2715.tif → ...ures/test_images/testymctestface_2715.tif b/...ures/test_images/testymctestface_2715.tif → ...ures/test_images/testymctestface_2715.tif
diff --git a/...xtures/test_images/testymctestface_36.tif → ...xtures/test_images/testymctestface_36.tif b/...xtures/test_images/testymctestface_36.tif → ...xtures/test_images/testymctestface_36.tif
diff --git a/...ures/test_images/testymctestface_3612.tif → ...ures/test_images/testymctestface_3612.tif b/...ures/test_images/testymctestface_3612.tif → ...ures/test_images/testymctestface_3612.tif
diff --git a/...ures/test_images/testymctestface_3814.tif → ...ures/test_images/testymctestface_3814.tif b/...ures/test_images/testymctestface_3814.tif → ...ures/test_images/testymctestface_3814.tif
diff --git a/...ures/test_images/testymctestface_4715.tif → ...ures/test_images/testymctestface_4715.tif b/...ures/test_images/testymctestface_4715.tif → ...ures/test_images/testymctestface_4715.tif
diff --git a/...ures/test_images/testymctestface_4961.tif → ...ures/test_images/testymctestface_4961.tif b/...ures/test_images/testymctestface_4961.tif → ...ures/test_images/testymctestface_4961.tif
diff --git a/cyto_ml/tests/test_image_embeddings.py → tests/test_image_embeddings.py b/cyto_ml/tests/test_image_embeddings.py → tests/test_image_embeddings.py
@@ -2,7 +2,6 @@
 from torch import Tensor
 from cyto_ml.models.scivision import prepare_image, flat_embeddings
 
-
 def test_embeddings(scivision_model, single_image):
     features = scivision_model(prepare_image(ImageSource(single_image).to_dask()))
 

diff --git a/cyto_ml/tests/test_object_store.py → tests/test_object_store.py b/cyto_ml/tests/test_object_store.py → tests/test_object_store.py
diff --git a/cyto_ml/tests/test_prepare_image.py → tests/test_prepare_image.py b/cyto_ml/tests/test_prepare_image.py → tests/test_prepare_image.py
@@ -9,8 +9,8 @@
 
 
 def test_single_image(single_image):
-
     image_data = ImageSource(single_image).to_dask()
+
     # Tensorise the image (potentially normalise if we have useful values)
     prepared_image = prepare_image(image_data)
 
@@ -25,7 +25,6 @@ def test_image_batch(image_batch):
     We either pad them (and process a lot of blank space) or stick to single image input
     """
     # Load a batch of plankton images
-
     image_data = ImageSource(image_batch).to_dask()
 
     with pytest.raises(ValueError) as err:

diff --git a/cyto_ml/tests/test_vector_store.py → tests/test_vector_store.py b/cyto_ml/tests/test_vector_store.py → tests/test_vector_store.py