diff --git a/transforms/images/apply-flatfield-plugin/Dockerfile b/transforms/images/apply-flatfield-plugin/Dockerfile new file mode 100644 index 000000000..97ef35df9 --- /dev/null +++ b/transforms/images/apply-flatfield-plugin/Dockerfile @@ -0,0 +1,20 @@ +FROM polusai/bfio:2.3.3 + +# environment variables defined in polusai/bfio +ENV EXEC_DIR="/opt/executables" +ENV POLUS_IMG_EXT=".ome.tif" +ENV POLUS_TAB_EXT=".csv" +ENV POLUS_LOG="INFO" + +# Work directory defined in the base container +WORKDIR ${EXEC_DIR} + +COPY pyproject.toml ${EXEC_DIR} +COPY VERSION ${EXEC_DIR} +COPY README.md ${EXEC_DIR} +COPY src ${EXEC_DIR}/src + +RUN pip3 install ${EXEC_DIR} --no-cache-dir + +ENTRYPOINT ["python3", "-m", "polus.plugins.transforms.images.apply_flatfield"] +CMD ["--help"] diff --git a/transforms/images/apply-flatfield-plugin/README.md b/transforms/images/apply-flatfield-plugin/README.md index 8ac8366c9..68ae4344c 100644 --- a/transforms/images/apply-flatfield-plugin/README.md +++ b/transforms/images/apply-flatfield-plugin/README.md @@ -18,13 +18,20 @@ This plugin specifically uses the formulation from [Peng et al](https://www.natu For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp). -## To Do +## TODO + +### Additional Flatfield Formulations Implement additional formulations of flatfield correction. Specifically, the formula specified by Young: ![Corrected = \frac{Original - Darkfield}{Brightfield - Darkfield} ](https://render.githubusercontent.com/render/math?math=Corrected%20%3D%20%5Cfrac%7BOriginal%20-%20Darkfield%7D%7BBrightfield%20-%20Darkfield%7D%20) -Additional formulations may also include reference image free algorithms for flatfield correction, such as the [rolling ball algorithm](https://www.computer.org/csdl/magazine/co/1983/01/01654163/13rRUwwJWBB). +Additional formulations may also include reference image free algorithms for flatfield correction, such as the [rolling ball algorithm](https://www.computer.org/csdl/magazine/co/1983/01/01654163/13rRUwwJWBB). + +### Photobleach Correction + +Since the `basicpy` package and the `basic-flatfield-estimation` tool do not yet support photobleach estimation, this plugin does not yet support photobleach correction. +Once they add support for photobleach estimation, this plugin should be updated to support it. ## Building @@ -41,9 +48,10 @@ Command line options: | Name | Description | I/O | Type | |------------------|-----------------------------------------------------------------------|--------|------------| -| `--darkPattern` | Filename pattern used to match darkfield files to image files | Input | string | -| `--ffDir` | Image collection containing flatfield and/or darkfield images | Input | collection | -| `--flatPattern` | Filename pattern used to match flatfield files to image files | Input | string | | `--imgDir` | Input image collection to be processed by this plugin | Input | collection | | `--imgPattern` | Filename pattern used to separate data and match with flatfield files | Input | string | +| `--ffDir` | Image collection containing flatfield and/or darkfield images | Input | collection | +| `--ffPattern` | Filename pattern used to match flatfield files to image files | Input | string | +| `--dfPattern` | Filename pattern used to match darkfield files to image files | Input | string | | `--outDir` | Output collection | Output | collection | +| `--preview` | preview tha output images' names without actually running computation | Input | boolean | diff --git a/transforms/images/apply-flatfield-plugin/pyproject.toml b/transforms/images/apply-flatfield-plugin/pyproject.toml index 69408fbb2..df1c15db4 100644 --- a/transforms/images/apply-flatfield-plugin/pyproject.toml +++ b/transforms/images/apply-flatfield-plugin/pyproject.toml @@ -10,29 +10,24 @@ readme = "README.md" packages = [{include = "polus", from = "src"}] [tool.poetry.dependencies] -python = "^3.9" -bfio = { version = "2.1.9", extras = ["all"] } -filepattern = [ - { version = "^2.0.0", platform = "linux" }, - { version = "^2.0.0", platform = "win32" }, - # { git = "https://github.com/PolusAI/filepattern", rev = "c07bf543c435cbc4cf264effd5a178868e9eaf19", platform = "darwin" }, - { git = "https://github.com/JesseMckinzie/filepattern-1", rev = "c27cf04ba3a1946b87c0c43d5720ba394c340894", platform = "darwin" }, -] +python = ">=3.9,<3.12" +bfio = { version = "^2.3.3", extras = ["all"] } +filepattern = "^2.0.4" typer = { version = "^0.7.0", extras = ["all"] } numpy = "^1.24.3" tqdm = "^4.65.0" [tool.poetry.group.dev.dependencies] -pytest = "^7.2.1" -pytest-cov = "^4.0.0" -pytest-sugar = "^0.9.6" -pytest-xdist = "^3.2.0" -pytest-benchmark = "^4.0.0" bump2version = "^1.0.1" pre-commit = "^3.0.4" -black = "^23.1.0" -ruff = "^0.0.265" +pytest = "^7.2.1" +pytest-sugar = "^0.9.6" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = [ + "." +] diff --git a/transforms/images/apply-flatfield-plugin/run-plugin.sh b/transforms/images/apply-flatfield-plugin/run-plugin.sh new file mode 100755 index 000000000..9aa52c473 --- /dev/null +++ b/transforms/images/apply-flatfield-plugin/run-plugin.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +version=$( None: - """Run batch-wise flatfield correction on the image collection.""" - img_fp = FilePattern(str(img_dir), img_pattern) - img_variables = img_fp.get_variables() - - ff_fp = FilePattern(str(ff_dir), ff_pattern) - ff_variables = ff_fp.get_variables() - - # check that ff_variables are a subset of img_variables - if set(ff_variables) - set(img_variables): - msg = ( - f"Flatfield variables are not a subset of image variables: " - f"{ff_variables} - {img_variables}" - ) - raise ValueError(msg) - - if (df_pattern is None) or (not df_pattern): - df_fp = None - else: - df_fp = FilePattern(str(ff_dir), df_pattern) - df_variables = df_fp.get_variables() - if set(df_variables) != set(ff_variables): - msg = ( - f"Flatfield and darkfield variables do not match: " - f"{ff_variables} != {df_variables}" - ) - raise ValueError(msg) - - for group, files in img_fp(group_by=ff_variables): - img_paths = [p for _, [p] in files] - variables = dict(group) - - ff_path: pathlib.Path = ff_fp.get_matching(**variables)[0][1][0] - - df_path = None if df_fp is None else df_fp.get_matching(**variables)[0][1][0] - - _unshade_images(img_paths, out_dir, ff_path, df_path) - - -def _unshade_images( - img_paths: list[pathlib.Path], - out_dir: pathlib.Path, - ff_path: pathlib.Path, - df_path: typing.Optional[pathlib.Path], -) -> None: - """Remove the given flatfield components from all images and save outputs. - - Args: - img_paths: list of paths to images to be processed - out_dir: directory to save the corrected images - ff_path: path to the flatfield image - df_path: path to the darkfield image - """ - with bfio.BioReader(ff_path, max_workers=2) as bf: - ff_image = bf[:, :, :, 0, 0].squeeze() - - if df_path is not None: - with bfio.BioReader(df_path, max_workers=2) as df: - df_image = df[:, :, :, 0, 0].squeeze() - else: - df_image = None - - batch_indices = list(range(0, len(img_paths), 16)) - if batch_indices[-1] != len(img_paths): - batch_indices.append(len(img_paths)) - - for i_start, i_end in tqdm.tqdm( - zip(batch_indices[:-1], batch_indices[1:]), - total=len(batch_indices) - 1, - ): - _unshade_batch( - img_paths[i_start:i_end], - out_dir, - ff_image, - df_image, - ) - - -def _unshade_batch( - batch_paths: list[pathlib.Path], - out_dir: pathlib.Path, - ff_image: numpy.ndarray, - df_image: typing.Optional[numpy.ndarray] = None, -) -> None: - """Apply flatfield correction to a batch of images. - - Args: - batch_paths: list of paths to images to be processed - out_dir: directory to save the corrected images - ff_image: component to be used for flatfield correction - df_image: component to be used for flatfield correction - """ - # Load images - images = [] - with concurrent.futures.ProcessPoolExecutor( - max_workers=utils.MAX_WORKERS, - ) as load_executor: - load_futures = [] - for i, inp_path in enumerate(batch_paths): - load_futures.append(load_executor.submit(utils.load_img, inp_path, i)) - - for lf in tqdm.tqdm( - concurrent.futures.as_completed(load_futures), - total=len(load_futures), - desc="Loading batch", - ): - images.append(lf.result()) - - images = [img for _, img in sorted(images, key=operator.itemgetter(0))] - img_stack = numpy.stack(images, axis=0) - - # Apply flatfield correction - if df_image is not None: - img_stack -= df_image - - img_stack /= ff_image - - # Save outputs - with concurrent.futures.ProcessPoolExecutor( - max_workers=utils.MAX_WORKERS, - ) as save_executor: - save_futures = [] - for inp_path, img in zip(batch_paths, img_stack): - save_futures.append( - save_executor.submit(utils.save_img, inp_path, img, out_dir), - ) - - for sf in tqdm.tqdm( - concurrent.futures.as_completed(save_futures), - total=len(save_futures), - desc="Saving batch", - ): - sf.result() diff --git a/transforms/images/apply-flatfield-plugin/src/polus/plugins/transforms/images/apply_flatfield/__main__.py b/transforms/images/apply-flatfield-plugin/src/polus/plugins/transforms/images/apply_flatfield/__main__.py new file mode 100644 index 000000000..7dac49e72 --- /dev/null +++ b/transforms/images/apply-flatfield-plugin/src/polus/plugins/transforms/images/apply_flatfield/__main__.py @@ -0,0 +1,108 @@ +"""Provides the CLI for the Apply Flatfield plugin.""" + +import json +import logging +import pathlib +import typing + +import typer +from polus.plugins.transforms.images.apply_flatfield import apply +from polus.plugins.transforms.images.apply_flatfield import utils + +# Initialize the logger +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) +logger = logging.getLogger("polus.plugins.transforms.images.apply_flatfield") +logger.setLevel(utils.POLUS_LOG) + +app = typer.Typer() + + +@app.command() +def main( # noqa: PLR0913 + img_dir: pathlib.Path = typer.Option( + ..., + "--imgDir", + help="Path to input images.", + exists=True, + readable=True, + resolve_path=True, + file_okay=False, + ), + img_pattern: str = typer.Option( + ..., + "--imgPattern", + help="Filename pattern used to select images from imgDir.", + ), + ff_dir: pathlib.Path = typer.Option( + ..., + "--ffDir", + help="Path to flatfield (and optionally darkfield) images.", + exists=True, + readable=True, + resolve_path=True, + file_okay=False, + ), + ff_pattern: str = typer.Option( + ..., + "--ffPattern", + help="Filename pattern used to select flatfield components from ffDir.", + ), + df_pattern: typing.Optional[str] = typer.Option( + None, + "--dfPattern", + help="Filename pattern used to select darkfield components from ffDir.", + ), + out_dir: pathlib.Path = typer.Option( + ..., + "--outDir", + help="Path to output directory.", + exists=True, + writable=True, + resolve_path=True, + file_okay=False, + ), + preview: bool = typer.Option( + False, + "--preview", + help="Preview the output without saving.", + ), +) -> None: + """CLI for the Apply Flatfield plugin. + + The variables used in ffPattern and dfPattern must be a subset of those used + in imgPattern. + + If dfPattern is not specified, then darkfield correction will not be + applied. + """ + logger.info("Starting Apply Flatfield plugin ...") + + logger.info(f"imgDir = {img_dir}") + logger.info(f"imgPattern = {img_pattern}") + logger.info(f"ffDir = {ff_dir}") + logger.info(f"ffPattern = {ff_pattern}") + logger.info(f"dfPattern = {df_pattern}") + logger.info(f"outDir = {out_dir}") + logger.info(f"preview = {preview}") + + out_files = apply( + img_dir=img_dir, + img_pattern=img_pattern, + ff_dir=ff_dir, + ff_pattern=ff_pattern, + df_pattern=df_pattern, + out_dir=out_dir, + preview=preview, + ) + + if preview: + with out_dir.joinpath("preview.json").open("w") as writer: + out_dict = {"files": [p.name for p in out_files]} + json.dump(out_dict, writer, indent=2) + + +if __name__ == "__main__": + app() diff --git a/transforms/images/apply-flatfield-plugin/src/polus/plugins/transforms/images/apply_flatfield/apply_flatfield.py b/transforms/images/apply-flatfield-plugin/src/polus/plugins/transforms/images/apply_flatfield/apply_flatfield.py new file mode 100644 index 000000000..32ddca010 --- /dev/null +++ b/transforms/images/apply-flatfield-plugin/src/polus/plugins/transforms/images/apply_flatfield/apply_flatfield.py @@ -0,0 +1,183 @@ +"""Provides the function to apply flatfield.""" + +import concurrent.futures +import logging +import operator +import pathlib +import typing + +import bfio +import numpy +import tqdm +from filepattern import FilePattern + +from . import utils + +logger = logging.getLogger(__name__) +logger.setLevel(utils.POLUS_LOG) + + +def apply( # noqa: PLR0913 + *, + img_dir: pathlib.Path, + img_pattern: str, + ff_dir: pathlib.Path, + ff_pattern: str, + df_pattern: typing.Optional[str], + out_dir: pathlib.Path, + preview: bool = False, +) -> list[pathlib.Path]: + """Run batch-wise flatfield correction on the image collection. + + Args: + img_dir: path to the directory containing the images to be processed. + img_pattern: filename pattern used to select images from img_dir. + ff_dir: path to the directory containing the flatfield images. + ff_pattern: filename pattern used to select flatfield components from + ff_dir. + df_pattern: filename pattern used to select darkfield components from + ff_dir. + out_dir: path to the directory where the corrected images will be + saved. + preview: if True, return the paths to the images that would be saved + without actually performing any other computation. + """ + img_fp = FilePattern(str(img_dir), img_pattern) + img_variables = img_fp.get_variables() + + ff_fp = FilePattern(str(ff_dir), ff_pattern) + ff_variables = ff_fp.get_variables() + + # check that ff_variables are a subset of img_variables + if set(ff_variables) - set(img_variables): + msg = ( + f"Flatfield variables are not a subset of image variables: " + f"{ff_variables} - {img_variables}" + ) + logger.error(msg) + raise ValueError(msg) + + if (df_pattern is None) or (not df_pattern): + df_fp = None + else: + df_fp = FilePattern(str(ff_dir), df_pattern) + df_variables = df_fp.get_variables() + if set(df_variables) != set(ff_variables): + msg = ( + f"Flatfield and darkfield variables do not match: " + f"{ff_variables} != {df_variables}" + ) + logger.error(msg) + raise ValueError(msg) + + out_files = [] + for group, files in img_fp(group_by=ff_variables): + img_paths = [p for _, [p] in files] + variables = dict(group) + + ff_path: pathlib.Path = ff_fp.get_matching(**variables)[0][1][0] + + df_path = None if df_fp is None else df_fp.get_matching(**variables)[0][1][0] + + if preview: + out_files.extend(img_paths) + else: + _unshade_images(img_paths, out_dir, ff_path, df_path) + + return out_files + + +def _unshade_images( + img_paths: list[pathlib.Path], + out_dir: pathlib.Path, + ff_path: pathlib.Path, + df_path: typing.Optional[pathlib.Path], +) -> None: + """Remove the given flatfield components from all images and save outputs. + + Args: + img_paths: list of paths to images to be processed + out_dir: directory to save the corrected images + ff_path: path to the flatfield image + df_path: path to the darkfield image + """ + with bfio.BioReader(ff_path, max_workers=2) as bf: + ff_image = bf[:, :, :, 0, 0].squeeze() + + if df_path is not None: + with bfio.BioReader(df_path, max_workers=2) as df: + df_image = df[:, :, :, 0, 0].squeeze() + else: + df_image = None + + batch_indices = list(range(0, len(img_paths), 16)) + if batch_indices[-1] != len(img_paths): + batch_indices.append(len(img_paths)) + + for i_start, i_end in tqdm.tqdm( + zip(batch_indices[:-1], batch_indices[1:]), + total=len(batch_indices) - 1, + ): + _unshade_batch( + img_paths[i_start:i_end], + out_dir, + ff_image, + df_image, + ) + + +def _unshade_batch( + batch_paths: list[pathlib.Path], + out_dir: pathlib.Path, + ff_image: numpy.ndarray, + df_image: typing.Optional[numpy.ndarray] = None, +) -> None: + """Apply flatfield correction to a batch of images. + + Args: + batch_paths: list of paths to images to be processed + out_dir: directory to save the corrected images + ff_image: component to be used for flatfield correction + df_image: component to be used for flatfield correction + """ + # Load images + images = [] + with concurrent.futures.ProcessPoolExecutor( + max_workers=utils.MAX_WORKERS, + ) as load_executor: + load_futures = [] + for i, inp_path in enumerate(batch_paths): + load_futures.append(load_executor.submit(utils.load_img, inp_path, i)) + + for lf in tqdm.tqdm( + concurrent.futures.as_completed(load_futures), + total=len(load_futures), + desc="Loading batch", + ): + images.append(lf.result()) + + images = [img for _, img in sorted(images, key=operator.itemgetter(0))] + img_stack = numpy.stack(images, axis=0) + + # Apply flatfield correction + if df_image is not None: + img_stack -= df_image + + img_stack /= ff_image + 1e-8 + + # Save outputs + with concurrent.futures.ProcessPoolExecutor( + max_workers=utils.MAX_WORKERS, + ) as save_executor: + save_futures = [] + for inp_path, img in zip(batch_paths, img_stack): + save_futures.append( + save_executor.submit(utils.save_img, inp_path, img, out_dir), + ) + + for sf in tqdm.tqdm( + concurrent.futures.as_completed(save_futures), + total=len(save_futures), + desc="Saving batch", + ): + sf.result() diff --git a/transforms/images/apply-flatfield-plugin/src/polus/plugins/transforms/images/apply_flatfield/utils.py b/transforms/images/apply-flatfield-plugin/src/polus/plugins/transforms/images/apply_flatfield/utils.py new file mode 100644 index 000000000..246c9372f --- /dev/null +++ b/transforms/images/apply-flatfield-plugin/src/polus/plugins/transforms/images/apply_flatfield/utils.py @@ -0,0 +1,51 @@ +"""Utilities for the apply flatfield plugin.""" + +import logging +import multiprocessing +import os +import pathlib + +import bfio +import numpy + +POLUS_LOG = getattr(logging, os.environ.get("POLUS_LOG", "INFO")) +POLUS_IMG_EXT = os.environ.get("POLUS_IMG_EXT", ".ome.tif") +MAX_WORKERS = max(1, multiprocessing.cpu_count() // 2) + + +def load_img(path: pathlib.Path, i: int) -> tuple[int, numpy.ndarray]: + """Load image from path. + + This method is intended to be used in a thread. The index is used to + identify the image after it has been loaded so that it images can be sorted + in the correct order. + + Args: + path: path to image + i: index of image + """ + with bfio.BioReader(path, MAX_WORKERS) as reader: + image = reader[:, :, :, 0, 0].squeeze() + return i, image + + +def save_img( + inp_path: pathlib.Path, + image: numpy.ndarray, + out_dir: pathlib.Path, +) -> None: + """Save image to disk. + + Args: + inp_path: path to input image + image: image to be saved + out_dir: directory to save image + """ + out_path = out_dir / inp_path.name + with bfio.BioReader(inp_path, MAX_WORKERS) as reader, bfio.BioWriter( + out_path, + MAX_WORKERS, + metadata=reader.metadata, + ) as writer: + writer.dtype = image.dtype + writer[:] = image diff --git a/transforms/images/apply-flatfield-plugin/tests/test_plugin.py b/transforms/images/apply-flatfield-plugin/tests/test_plugin.py index 886773807..8bf296a08 100644 --- a/transforms/images/apply-flatfield-plugin/tests/test_plugin.py +++ b/transforms/images/apply-flatfield-plugin/tests/test_plugin.py @@ -66,8 +66,8 @@ def gen_once(num_groups: int, img_size: int) -> FixtureReturnType: return img_dir, img_pattern, ff_dir, ff_pattern -NUM_GROUPS = [2**i for i in range(3)] -IMG_SIZES = [1024 * 2**i for i in range(3)] +NUM_GROUPS = [1, 4] +IMG_SIZES = [1024, 4096] PARAMS = list(itertools.product(NUM_GROUPS, IMG_SIZES)) IDS = [f"{num_groups}_{img_size}" for num_groups, img_size in PARAMS] @@ -94,12 +94,12 @@ def test_estimate(gen_images: FixtureReturnType) -> None: out_dir = pathlib.Path(tempfile.mkdtemp(suffix="out_dir")) apply( - img_dir, - img_pattern, - ff_dir, - f"{ff_pattern}_flatfield.ome.tif", - f"{ff_pattern}_darkfield.ome.tif", - out_dir, + img_dir=img_dir, + img_pattern=img_pattern, + ff_dir=ff_dir, + ff_pattern=f"{ff_pattern}_flatfield.ome.tif", + df_pattern=f"{ff_pattern}_darkfield.ome.tif", + out_dir=out_dir, ) img_names = [p.name for p in img_dir.iterdir()] @@ -128,9 +128,9 @@ def test_cli() -> None: img_pattern, "--ffDir", str(ff_dir), - "--brightPattern", + "--ffPattern", f"{ff_pattern}_flatfield.ome.tif", - "--darkPattern", + "--dfPattern", f"{ff_pattern}_darkfield.ome.tif", "--outDir", str(out_dir),