diff --git a/segmentation/kaggle-nuclei-segmentation-tool/.bumpversion.cfg b/segmentation/kaggle-nuclei-segmentation-tool/.bumpversion.cfg new file mode 100644 index 000000000..09848330b --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/.bumpversion.cfg @@ -0,0 +1,29 @@ +[bumpversion] +current_version = 0.1.5-dev1 +commit = False +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:VERSION] + +[bumpversion:file:README.md] + +[bumpversion:file:plugin.json] + +[bumpversion:file:src/polus/images/segmentation/kaggle_nuclei_segmentation/__init__.py] diff --git a/segmentation/kaggle-nuclei-segmentation-tool/.dockerignore b/segmentation/kaggle-nuclei-segmentation-tool/.dockerignore new file mode 100644 index 000000000..7c603f814 --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/.dockerignore @@ -0,0 +1,4 @@ +.venv +out +tests +__pycache__ diff --git a/segmentation/kaggle-nuclei-segmentation-tool/.gitignore b/segmentation/kaggle-nuclei-segmentation-tool/.gitignore new file mode 100644 index 000000000..c04bc49f7 --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/.gitignore @@ -0,0 +1 @@ +poetry.lock diff --git a/segmentation/kaggle-nuclei-segmentation-tool/CHANGELOG.md b/segmentation/kaggle-nuclei-segmentation-tool/CHANGELOG.md new file mode 100644 index 000000000..a61528bcf --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/CHANGELOG.md @@ -0,0 +1,5 @@ +# CHANGELOG + +## 0.1.5-dev1 + +Initial release. diff --git a/segmentation/kaggle-nuclei-segmentation-tool/Dockerfile b/segmentation/kaggle-nuclei-segmentation-tool/Dockerfile new file mode 100644 index 000000000..e7dcb37a0 --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/Dockerfile @@ -0,0 +1,23 @@ +FROM polusai/bfio:2.1.9 + +# environment variables defined in polusai/bfio +ENV EXEC_DIR="/opt/executables" +ENV POLUS_IMG_EXT=".ome.tif" +ENV POLUS_TAB_EXT=".csv" +ENV POLUS_LOG="INFO" + + +# Work directory defined in the base container +WORKDIR ${EXEC_DIR} + +COPY pyproject.toml ${EXEC_DIR} +COPY VERSION ${EXEC_DIR} +COPY README.md ${EXEC_DIR} +COPY src ${EXEC_DIR}/src + +RUN pip3 install ${EXEC_DIR} --no-cache-dir + + +# Default command. Additional arguments are provided through the command line +ENTRYPOINT ["python3", "-m", "polus.images.segmentation.kaggle_nuclei_segmentation"] +CMD ["--help"] diff --git a/segmentation/kaggle-nuclei-segmentation-tool/README.md b/segmentation/kaggle-nuclei-segmentation-tool/README.md new file mode 100644 index 000000000..48d888c50 --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/README.md @@ -0,0 +1,29 @@ +# Kaggle Nuclei Segmentation (v0.1.5-dev1) + +Segments cell nuclei using U-Net in Tensorflow. + +# Reference +Credits neural network architecture and pretrained model weights : https://github.com/axium/Data-Science-Bowl-2018 + +# Description +This WIPP plugin segments cell nuclei using U-Net in Tensorflow. Neural net architecture and pretrained weights are taken from Data Science Bowl 2018 entry by Muhammad Asim (reference given above). The unet expects the input height and width to be 256 pixels. To ensure that the plugin is able to handle images of all sizes, it adds reflective padding to the input to make the dimensions a multiple of 256. Following this a loop extracts 256x256 tiles to be processed by the network. In the end it untiles and removes padding from the output. + +## Building + +To build the Docker image for the conversion plugin, run `./build-docker.sh`. + +## Install WIPP Plugin + +If WIPP is running, navigate to the plugins page and add a new plugin. Paste the +contents of `plugin.json` into the pop-up window and submit. + +## Options + +This plugin takes 3 input arguments and 1 output argument: + +| Name | Description | I/O | Type | Default | +|---------------|-------------------------|--------|--------|--------- | +| `--inpDir` | Input image collection to be processed by this plugin | Input | collection | +| `--filePattern` | Filename pattern used to separate data | Input | string | .* | +| `--outDir` | Output collection | Output | collection | +| `--preview` | Generate an output preview | Input | boolean | False | diff --git a/segmentation/kaggle-nuclei-segmentation-tool/VERSION b/segmentation/kaggle-nuclei-segmentation-tool/VERSION new file mode 100644 index 000000000..db5b6f8a0 --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/VERSION @@ -0,0 +1 @@ +0.1.5-dev1 diff --git a/segmentation/kaggle-nuclei-segmentation-tool/build-docker.sh b/segmentation/kaggle-nuclei-segmentation-tool/build-docker.sh new file mode 100755 index 000000000..721fb6a3c --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +version=$("] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = ">=3.9,<3.12" +bfio = {version = "2.1.9", extras = ["all"]} +filepattern = ">=2.0.4,<3.0" +typer = "^0.7.0" +tensorflow = "^2.15.0" +opencv-python-headless = "^4.9.0.80" + +[tool.poetry.group.dev.dependencies] +bump2version = "^1.0.1" +pytest = "^7.4" +pytest-sugar = "^0.9.6" +pre-commit = "^3.2.1" +black = "^23.3.0" +mypy = "^1.1.1" +ruff = "^0.0.270" +ipykernel = "^6.29.0" +scikit-image = "0.19.3" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = [ + "." +] diff --git a/segmentation/kaggle-nuclei-segmentation-tool/run-plugin.sh b/segmentation/kaggle-nuclei-segmentation-tool/run-plugin.sh new file mode 100755 index 000000000..97e41953f --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/run-plugin.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +version=$( None: + """Generate preview of the plugin outputs.""" + fps = fp.FilePattern(inp_dir, file_pattern) + out_file = out_dir.joinpath("preview.json") + with Path.open(out_file, "w") as jfile: + out_json: dict[str, Any] = { + "filepattern": file_pattern, + "outDir": [], + } + for file in fps(): + out_name = str(file[1][0]) + out_json["outDir"].append(out_name) + json.dump(out_json, jfile, indent=2) + logger.info(f"generating preview data in {out_dir}") + + +@app.command() +def main( + inp_dir: Path = typer.Option( + ..., + "--inpDir", + "-i", + help="Input directory to be processed.", + exists=True, + readable=True, + file_okay=False, + resolve_path=True, + ), + file_pattern: str = typer.Option( + ".*", + "--filePattern", + "-f", + help="Filepattern used to filter inputs.", + ), + out_dir: Path = typer.Option( + ..., + "--outDir", + "-o", + help="Output directory.", + exists=True, + writable=True, + file_okay=False, + resolve_path=True, + ), + preview: bool = typer.Option( + False, + "--preview", + "-v", + help="Preview of expected outputs (dry-run)", + show_default=False, + ), +) -> None: + """Kaggle Nuclei Segmentation.""" + logger.info(f"inpDir: {inp_dir}") + logger.info(f"filePattern: {file_pattern}") + logger.info(f"outDir: {out_dir}") + + if preview: + generate_preview(inp_dir, file_pattern, out_dir) + logger.info(f"generating preview data in : {out_dir}.") + + if not preview: + fps = fp.FilePattern(inp_dir, file_pattern) + files = [str(file[1][0]) for file in fps()] + for ind in range(0, len(files), BATCH_SIZE): + logger.info("{:.2f}% complete...".format(100 * ind / len(files))) + batch = ",".join(files[ind : min([ind + BATCH_SIZE, len(files)])]) + segment(batch, out_dir) + + logger.info("100% complete...") + + +if __name__ == "__main__": + app() diff --git a/segmentation/kaggle-nuclei-segmentation-tool/src/polus/images/segmentation/kaggle_nuclei_segmentation/segment.py b/segmentation/kaggle-nuclei-segmentation-tool/src/polus/images/segmentation/kaggle_nuclei_segmentation/segment.py new file mode 100644 index 000000000..cec4475bf --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/src/polus/images/segmentation/kaggle_nuclei_segmentation/segment.py @@ -0,0 +1,248 @@ +"""Kaggle Nuclei Segmentation.""" + +import logging +import math +import os +import re +from pathlib import Path + +import bfio +import cv2 +import numpy as np +from keras.layers import BatchNormalization +from keras.layers import Concatenate +from keras.layers import Conv2D +from keras.layers import Conv2DTranspose +from keras.layers import Input +from keras.layers import LeakyReLU +from keras.layers import MaxPooling2D +from keras.models import Model + +logger = logging.getLogger(__name__) +logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO)) +POLUS_IMG_EXT = os.environ.get("POLUS_IMG_EXT", ".ome.tif") + +# Mute Tensorflow messages +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" + + +def unet(in_shape: tuple[int, int, int] = (256, 256, 3), alpha: float = 0.1) -> Model: + """U-Net, a convolutional neural network. + + Args: + in_shape: Input image with three dimensions + alpha: A negative slope coefficient + Returns: + U-Net Model. + """ + ## dropout input argument is unused in a model and therefore removed + # ------ model definition ----- + unet_input = Input(shape=in_shape) + # segment no. 1 --- starting encoder part + conv1_1 = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding="same")(unet_input) + relu1_1 = LeakyReLU(alpha=alpha)(conv1_1) + conv1_2 = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding="same")(relu1_1) + relu1_2 = LeakyReLU(alpha=alpha)(conv1_2) + bn1 = BatchNormalization()(relu1_2) + maxpool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(bn1) + # segment no. 2 + conv2_1 = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding="same")(maxpool1) + relu2_1 = LeakyReLU(alpha=alpha)(conv2_1) + conv2_2 = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding="same")(relu2_1) + relu2_2 = LeakyReLU(alpha=alpha)(conv2_2) + bn2 = BatchNormalization()(relu2_2) + maxpool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(bn2) + # segment no. 3 + conv3_1 = Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding="same")(maxpool2) + relu3_1 = LeakyReLU(alpha=alpha)(conv3_1) + conv3_2 = Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding="same")(relu3_1) + relu3_2 = LeakyReLU(alpha=alpha)(conv3_2) + bn3 = BatchNormalization()(relu3_2) + maxpool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(bn3) + # segment no. 4 + conv4_1 = Conv2D(256, kernel_size=(3, 3), strides=(1, 1), padding="same")(maxpool3) + relu4_1 = LeakyReLU(alpha=alpha)(conv4_1) + conv4_2 = Conv2D(256, kernel_size=(3, 3), strides=(1, 1), padding="same")(relu4_1) + relu4_2 = LeakyReLU(alpha=alpha)(conv4_2) + bn4 = BatchNormalization()(relu4_2) + maxpool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(bn4) + # segment no. 5 --- start of decoder part + conv5_1 = Conv2DTranspose(256, kernel_size=(3, 3), strides=(2, 2), padding="same")( + maxpool4, + ) + relu5_1 = LeakyReLU(alpha=alpha)(conv5_1) + conc5 = Concatenate(axis=3)([relu5_1, relu4_2]) + conv5_2 = Conv2D(256, kernel_size=(3, 3), strides=(1, 1), padding="same")(conc5) + relu5_2 = LeakyReLU(alpha=alpha)(conv5_2) + bn5 = BatchNormalization()(relu5_2) + # segment no. 6 + conv6_1 = Conv2DTranspose(128, kernel_size=(3, 3), strides=(2, 2), padding="same")( + bn5, + ) + relu6_1 = LeakyReLU(alpha=alpha)(conv6_1) + conc6 = Concatenate(axis=3)([relu6_1, relu3_2]) + conv6_2 = Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding="same")(conc6) + relu6_2 = LeakyReLU(alpha=alpha)(conv6_2) + bn6 = BatchNormalization()(relu6_2) + # segment no. 7 + conv7_1 = Conv2DTranspose(64, kernel_size=(3, 3), strides=(2, 2), padding="same")( + bn6, + ) + relu7_1 = LeakyReLU(alpha=alpha)(conv7_1) + conc7 = Concatenate(axis=3)([relu7_1, relu2_2]) + conv7_2 = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding="same")(conc7) + relu7_2 = LeakyReLU(alpha=alpha)(conv7_2) + bn7 = BatchNormalization()(relu7_2) + # segment no. 8 + conv8_1 = Conv2DTranspose(32, kernel_size=(3, 3), strides=(2, 2), padding="same")( + bn7, + ) + relu8_1 = LeakyReLU(alpha=alpha)(conv8_1) + conc8 = Concatenate(axis=3)([relu8_1, relu1_2]) + conv8_2 = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding="same")(conc8) + relu8_2 = LeakyReLU(alpha=alpha)(conv8_2) + unet_output = Conv2D( + 1, + kernel_size=(1, 1), + strides=(1, 1), + padding="same", + activation="sigmoid", + )(relu8_2) + return Model(unet_input, unet_output) + + +def padding(image: np.ndarray) -> tuple[np.ndarray, tuple[int, int, int, int]]: + """Reflective padding. + + The unet expects the height and width of the image to be 256 x 256 + This function adds the required reflective padding to make the image + dimensions a multiple of 256 x 256. This will enable us to extract tiles + of size 256 x 256 which can be processed by the network + + Args: + image: Image files to segment + Returns: + padded image and dimensions for padding + """ + row, col, _ = image.shape + # Determine the desired height and width after padding the input image + m, n = math.ceil(row / 256), math.ceil(col / 256) + required_rows = m * 256 + required_cols = n * 256 + + # Check whether the image dimensions are even or odd. If the image dimesions + # are even, then the same amount of padding can be applied to the (top,bottom) + # or (left,right) of the image. + + if row % 2 == 0: + # no. of rows to be added to the top and bottom of the image + top = int((required_rows - row) / 2) + bottom = top + else: + top = int((required_rows - row) / 2) + bottom = top + 1 + + if col % 2 == 0: + # no. of columns to be added to left and right of the image + left = int((required_cols - col) / 2) + right = left + else: + left = int((required_cols - col) / 2) + right = left + 1 + + pad_dimensions = (top, bottom, left, right) + + final_image = np.zeros((required_rows, required_cols, 3)) + + # Add relective Padding + for i in range(3): + final_image[:, :, i] = cv2.copyMakeBorder( + image[:, :, i], + top, + bottom, + left, + right, + cv2.BORDER_REFLECT, + ) + + # return padded image and pad dimensions + return final_image, pad_dimensions + + +def segment(batch: list[str], out_dir: Path) -> None: + """Kaggle Nuclei Segmentation. + + Args: + batch: Image files to segment + out_dir: output directory + Returns: + None + """ + batch = batch.split(",") # type: ignore + + # Load Model Architecture and model weights + model_path = Path(__file__).parent.resolve().joinpath("unet.h5") + model = unet() + model.load_weights(model_path) + + # Iterate over the files to be processed + for filename in batch: + logger.info(f"Processing image: {filename}") + + # Use bfio to read the image + br = bfio.BioReader(filename) + img = br.read() + + # The network expects the pixel values to be in the range of (0,1). + # Interpolate the pixel values to (0,1) + img = np.interp(img, (img.min(), img.max()), (0, 1)) + # The network expects a 3 channel image. + img = np.dstack((img, img, img)) + + # Add reflective padding to make the image dimensions a multiple of 256 + + # pad_dimensions will help us extract the final result from the padded output. + padded_img, pad_dimensions = padding(img) + + # Intitialize an emtpy array to store the output from the network + final_img = np.zeros((padded_img.shape[0], padded_img.shape[1])) + + # # Extract 256 x 256 tiles from the padded input image. + for i in range(int(padded_img.shape[0] / 256)): + for j in range(int(padded_img.shape[1] / 256)): + # tile to be processed + temp_img = padded_img[i * 256 : (i + 1) * 256, j * 256 : (j + 1) * 256] + inp = np.expand_dims(temp_img, axis=0) + + # predict + x = model.predict(inp) + + # Extract the output image + out = x[0, :, :, 0] + + # Store the output tile + final_img[i * 256 : (i + 1) * 256, j * 256 : (j + 1) * 256] = out + + # get pad dimensions on all 4 sides of the image + top_pad, bottom_pad, left_pad, right_pad = pad_dimensions + + # Extract the Desired output from the padded output + out_image = final_img[ + top_pad : final_img.shape[0] - bottom_pad, + left_pad : final_img.shape[1] - right_pad, + ] + + # Form a binary image + out_image = np.rint(out_image) * 255 + out_image = out_image.astype(np.uint8) + + outname = re.split("\\.", Path(filename).name)[0] + POLUS_IMG_EXT + + with bfio.BioWriter( + file_path=str(out_dir.joinpath(outname).absolute()), + metadata=None, + X=br.X, + Y=br.Y, + ) as bw: + bw[:] = out_image + bw.close() diff --git a/segmentation/polus-cell-nuclei-segmentation-plugin/src/unet.h5 b/segmentation/kaggle-nuclei-segmentation-tool/src/polus/images/segmentation/kaggle_nuclei_segmentation/unet.h5 similarity index 100% rename from segmentation/polus-cell-nuclei-segmentation-plugin/src/unet.h5 rename to segmentation/kaggle-nuclei-segmentation-tool/src/polus/images/segmentation/kaggle_nuclei_segmentation/unet.h5 diff --git a/segmentation/kaggle-nuclei-segmentation-tool/tests/__init__.py b/segmentation/kaggle-nuclei-segmentation-tool/tests/__init__.py new file mode 100644 index 000000000..02bb56b2d --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for segment.""" diff --git a/segmentation/kaggle-nuclei-segmentation-tool/tests/conftest.py b/segmentation/kaggle-nuclei-segmentation-tool/tests/conftest.py new file mode 100644 index 000000000..a52cd6093 --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/tests/conftest.py @@ -0,0 +1,63 @@ +"""Test fixtures. + +Set up all data used in tests. +""" + +import shutil +import tempfile +from io import BytesIO +from pathlib import Path +from urllib.request import urlopen +from zipfile import ZipFile + +import pytest + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Add options to pytest.""" + parser.addoption( + "--downloads", + action="store_true", + dest="downloads", + default=False, + help="run tests that download large data files", + ) + parser.addoption( + "--slow", + action="store_true", + dest="slow", + default=False, + help="run slow tests", + ) + + +FixtureReturnType = tuple[Path, Path] # input dir # output dir + + +def clean_directories() -> None: + """Remove all temporary directories.""" + for d in Path(".").cwd().iterdir(): + if d.is_dir() and d.name.startswith("data_dir"): + shutil.rmtree(d) + + +@pytest.fixture() +def generate_test_data() -> FixtureReturnType: # type: ignore + """Generate staging temporary directories with test data.""" + # staging area + data_dir = Path.cwd().joinpath("data_dir") + out_dir = Path(tempfile.mkdtemp(prefix="out_dir")) + out_dir.mkdir(exist_ok=True) + if not data_dir.exists(): + Path(data_dir).mkdir(parents=True) + urls = "https://data.broadinstitute.org/bbbc/BBBC001/BBBC001_v1_images_tif.zip" + + with urlopen(urls) as url, ZipFile( # noqa:S310 + BytesIO(url.read()), + ) as zfile: # type: ignore + zfile.extractall(data_dir) + + inp_dir = data_dir.joinpath("human_ht29_colon_cancer_1_images") + + yield inp_dir, out_dir + shutil.rmtree(out_dir) diff --git a/segmentation/kaggle-nuclei-segmentation-tool/tests/test_cli.py b/segmentation/kaggle-nuclei-segmentation-tool/tests/test_cli.py new file mode 100644 index 000000000..5fb9b5b97 --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/tests/test_cli.py @@ -0,0 +1,105 @@ +"""Testing the Command Line Tool.""" + +import faulthandler +import json +from pathlib import Path + +from polus.images.segmentation.kaggle_nuclei_segmentation.__main__ import app +from typer.testing import CliRunner + +from .conftest import FixtureReturnType + +faulthandler.enable() + + +def test_cli(generate_test_data: FixtureReturnType) -> None: + """Test the command line.""" + inp_dir, out_dir = generate_test_data + + runner = CliRunner() + + result = runner.invoke( + app, + [ + "--inpDir", + inp_dir, + "--filePattern", + ".*.tif", + "--outDir", + out_dir, + ], + ) + + assert result.exit_code == 0 + + +def test_cli_short(generate_test_data: FixtureReturnType): # noqa + """Test the command line.""" + runner = CliRunner() + + inp_dir, out_dir = generate_test_data + + result = runner.invoke( + app, + [ + "-i", + inp_dir, + "-f", + ".*.tif", + "-o", + out_dir, + ], + ) + + assert result.exit_code == 0 + + +def test_cli_preview(generate_test_data: FixtureReturnType): # noqa + """Test the preview option.""" + runner = CliRunner() + + inp_dir, out_dir = generate_test_data + + result = runner.invoke( + app, + [ + "--inpDir", + inp_dir, + "--filePattern", + ".*.tif", + "--outDir", + out_dir, + "--preview", + ], + ) + + assert result.exit_code == 0 + + with Path.open(out_dir / "preview.json") as file: + plugin_json = json.load(file) + + # verify we generate the preview file + assert plugin_json != {} + + +def test_cli_bad_input(generate_test_data: FixtureReturnType): # noqa + """Test bad inputs.""" + runner = CliRunner() + + inp_dir, out_dir = generate_test_data + # replace with a bad path + inp_dir = Path.cwd().joinpath("test_data") + + result = runner.invoke( + app, + [ + "--inpDir", + inp_dir, + "--filePattern", + ".*.tif", + "--outDir", + out_dir, + ], + ) + + assert result.exc_info[0] is SystemExit diff --git a/segmentation/kaggle-nuclei-segmentation-tool/tests/test_segment.py b/segmentation/kaggle-nuclei-segmentation-tool/tests/test_segment.py new file mode 100644 index 000000000..2fa303fcf --- /dev/null +++ b/segmentation/kaggle-nuclei-segmentation-tool/tests/test_segment.py @@ -0,0 +1,49 @@ +"""Test for Kaggle Nuclei Segmentation.""" + +import filepattern as fp +import numpy as np +from bfio import BioReader +from polus.images.segmentation.kaggle_nuclei_segmentation.segment import padding +from polus.images.segmentation.kaggle_nuclei_segmentation.segment import segment + +from .conftest import FixtureReturnType +from .conftest import clean_directories + + +def test_segment(generate_test_data: FixtureReturnType) -> None: + """Test segment.""" + inp_dir, out_dir = generate_test_data + + fps = fp.FilePattern(inp_dir, ".*.tif") + files = [str(file[1][0]) for file in fps()] + for ind in range(0, len(files), 1): + batch = ",".join(files[ind : min([ind + 1, len(files)])]) + segment(batch, out_dir) + + assert len(list(out_dir.iterdir())) != 0 + + for f in out_dir.iterdir(): + br = BioReader(f) + img = br.read() + + assert len(np.unique(img)) == 2 + + +def test_padding(generate_test_data: FixtureReturnType) -> None: + """Test padding.""" + inp_dir, _ = generate_test_data + + fps = fp.FilePattern(inp_dir, ".*.tif") + files = [str(file[1][0]) for file in fps()] + + for file in files: + br = BioReader(file) + image = br[:100, :256, :] + img = np.interp(image, (image.min(), image.max()), (0, 1)) + img = np.dstack((img, img, img)) + final_image, pad_dimensions = padding(img) + + assert pad_dimensions == (78, 78, 0, 0) + assert final_image.shape == (256, 256, 3) + + clean_directories() diff --git a/segmentation/polus-cell-nuclei-segmentation-plugin/Dockerfile b/segmentation/polus-cell-nuclei-segmentation-plugin/Dockerfile deleted file mode 100644 index 22b9e4a6e..000000000 --- a/segmentation/polus-cell-nuclei-segmentation-plugin/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM polusai/bfio:2.1.9-tensorflow - -COPY VERSION / - -ARG EXEC_DIR="/opt/executables" - -#Create folders -RUN mkdir -p ${EXEC_DIR} - -#Copy executable -COPY src ${EXEC_DIR}/ - -RUN pip install -r ${EXEC_DIR}/requirements.txt --no-cache-dir - -ENTRYPOINT [ "python3", "/opt/executables/models.py" ] \ No newline at end of file diff --git a/segmentation/polus-cell-nuclei-segmentation-plugin/README.md b/segmentation/polus-cell-nuclei-segmentation-plugin/README.md deleted file mode 100644 index eb69e5166..000000000 --- a/segmentation/polus-cell-nuclei-segmentation-plugin/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# Polus Cell Nuclei Segmentation Plugin - -WIPP Plugin Title : Cell Nuclei Segmentation using U-net - -Credits for the Neural network and model weigths : https://github.com/axium/Data-Science-Bowl-2018/ - -For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp). - -For more information on Bioformats, vist the [official page](https://www.openmicroscopy.org/bio-formats/). - -# Reference -For neural network architecture and pretrained weights : https://github.com/axium/Data-Science-Bowl-2018 - -# Description -This WIPP plugin segments cell nuclei using U-Net in Tensorflow. Neural net architecture and pretrained weights are taken from Data Science Bowl 2018 entry by Muhammad Asim (reference given above). The unet expects the input height and width to be 256 pixels. To ensure that the plugin is able to handle images of all sizes, it adds reflective padding to the input to make the dimensions a multiple of 256. Following this a loop extracts 256x256 tiles to be processed by the network. In the end it untiles and removes padding from the output. - -The plugin takes 2 inputs as shown below :\ -(i) Path to the input directory - The directory should consist of grayscale images to be segmented.\ -(ii) Path to the output directory. The output is a binary mask highlighting the nuclei. - - -| Name | Description | I/O | Type | -| -------- | ----------------------- | ------ | ---- | -| `inpDir` | Input image collection | Input | Path | -| `outDir` | Output image collection | Output | Path | - -## Run the plugin - -### Manually - -Create a local folder to emulate WIPP data folder with the name ``. Folder should have the following structure: -``` -. -├── -| ├── inputs -| └── outputs -``` - -Then, run the docker container -```bash -docker run -v /inputs:/data/inputs -v /outputs:/data/outputs labshare/polus-cell-nuclei-segmentation:0.1.0 \ - --inpDir /data/inputs \ - --outDir /data/outputs -``` diff --git a/segmentation/polus-cell-nuclei-segmentation-plugin/VERSION b/segmentation/polus-cell-nuclei-segmentation-plugin/VERSION deleted file mode 100644 index 446ba66e7..000000000 --- a/segmentation/polus-cell-nuclei-segmentation-plugin/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.1.4 \ No newline at end of file diff --git a/segmentation/polus-cell-nuclei-segmentation-plugin/build-docker.sh b/segmentation/polus-cell-nuclei-segmentation-plugin/build-docker.sh deleted file mode 100644 index f93eaf37b..000000000 --- a/segmentation/polus-cell-nuclei-segmentation-plugin/build-docker.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -version=$(=4.1.2.30 diff --git a/segmentation/polus-cell-nuclei-segmentation-plugin/src/segment.py b/segmentation/polus-cell-nuclei-segmentation-plugin/src/segment.py deleted file mode 100644 index 976dfc282..000000000 --- a/segmentation/polus-cell-nuclei-segmentation-plugin/src/segment.py +++ /dev/null @@ -1,239 +0,0 @@ -import numpy as np -import sys -import argparse -import os -import math -import bfio -import javabridge as jutil -import bioformats -import logging -from pathlib import Path - -# Ugly fix to message generated by Keras -stderr = sys.stderr -sys.stderr = open(os.devnull, 'w') -from keras.layers import Conv2D, BatchNormalization, LeakyReLU, Dropout, Input, MaxPooling2D, Conv2DTranspose,Lambda -from keras.layers import Concatenate -from keras.models import Model -from keras.optimizers import SGD, Adam -from keras.losses import binary_crossentropy -from keras.preprocessing.image import ImageDataGenerator -import keras.callbacks as CallBacks -from keras import backend as K -import tensorflow as tf -import cv2 -sys.stderr = stderr - - - -def unet(in_shape=(256,256,3), alpha=0.1, dropout=None): - - # dropout = [0.1,0.2,0.25,0.3,0.5] - # ------ model definition ----- - Unet_Input = Input(shape=in_shape) - # segment no. 1 --- starting encoder part - conv1_1 = Conv2D(32, kernel_size=(3,3), strides = (1,1), padding = 'same')(Unet_Input) - relu1_1 = LeakyReLU(alpha = alpha)(conv1_1) - conv1_2 = Conv2D(32, kernel_size=(3,3), strides = (1,1), padding = 'same')(relu1_1) - relu1_2 = LeakyReLU(alpha = alpha)(conv1_2) - bn1 = BatchNormalization()(relu1_2) - maxpool1 = MaxPooling2D(pool_size = (2,2), strides = (2,2))(bn1) - # segment no. 2 - conv2_1 = Conv2D(64, kernel_size=(3,3), strides = (1,1), padding = 'same')(maxpool1) - relu2_1 = LeakyReLU(alpha = alpha)(conv2_1) - conv2_2 = Conv2D(64, kernel_size=(3,3), strides = (1,1), padding = 'same')(relu2_1) - relu2_2 = LeakyReLU(alpha = alpha)(conv2_2) - bn2 = BatchNormalization()(relu2_2) - maxpool2 = MaxPooling2D(pool_size = (2,2), strides = (2,2))(bn2) - # segment no. 3 - conv3_1 = Conv2D(128, kernel_size=(3,3), strides = (1,1), padding = 'same')(maxpool2) - relu3_1 = LeakyReLU(alpha = alpha)(conv3_1) - conv3_2 = Conv2D(128, kernel_size=(3,3), strides = (1,1), padding = 'same')(relu3_1) - relu3_2 = LeakyReLU(alpha = alpha)(conv3_2) - bn3 = BatchNormalization()(relu3_2) - maxpool3 = MaxPooling2D(pool_size = (2,2), strides = (2,2))(bn3) - # segment no. 4 - conv4_1 = Conv2D(256, kernel_size=(3,3), strides = (1,1), padding = 'same')(maxpool3) - relu4_1 = LeakyReLU(alpha = alpha)(conv4_1) - conv4_2 = Conv2D(256, kernel_size=(3,3), strides = (1,1), padding = 'same')(relu4_1) - relu4_2 = LeakyReLU(alpha = alpha)(conv4_2) - bn4 = BatchNormalization()(relu4_2) - maxpool4 = MaxPooling2D(pool_size = (2,2), strides = (2,2))(bn4) - # segment no. 5 --- start of decoder part - conv5_1 = Conv2DTranspose(256, kernel_size=(3,3), strides = (2,2), padding = 'same')(maxpool4) - relu5_1 = LeakyReLU(alpha = alpha)(conv5_1) - conc5 = Concatenate(axis=3)([relu5_1, relu4_2]) - conv5_2 = Conv2D(256, kernel_size=(3,3), strides = (1,1), padding = 'same')(conc5) - relu5_2 = LeakyReLU(alpha = alpha)(conv5_2) - bn5 = BatchNormalization()(relu5_2) - # segment no. 6 - conv6_1 = Conv2DTranspose(128, kernel_size=(3,3), strides = (2,2), padding = 'same')(bn5) - relu6_1 = LeakyReLU(alpha = alpha)(conv6_1) - conc6 = Concatenate(axis=3)([relu6_1, relu3_2]) - conv6_2 = Conv2D(128, kernel_size=(3,3), strides = (1,1), padding = 'same')(conc6) - relu6_2 = LeakyReLU(alpha = alpha)(conv6_2) - bn6 = BatchNormalization()(relu6_2) - # segment no. 7 - conv7_1 = Conv2DTranspose(64, kernel_size=(3,3), strides = (2,2), padding = 'same')(bn6) - relu7_1 = LeakyReLU(alpha = alpha)(conv7_1) - conc7 = Concatenate(axis=3)([relu7_1, relu2_2]) - conv7_2 = Conv2D(64, kernel_size=(3,3), strides = (1,1), padding = 'same')(conc7) - relu7_2 = LeakyReLU(alpha = alpha)(conv7_2) - bn7 = BatchNormalization()(relu7_2) - # segment no. 8 - conv8_1 = Conv2DTranspose(32, kernel_size=(3,3), strides = (2,2), padding = 'same')(bn7) - relu8_1 = LeakyReLU(alpha = alpha)(conv8_1) - conc8 = Concatenate(axis=3)([relu8_1, relu1_2]) - conv8_2 = Conv2D(32, kernel_size=(3,3), strides = (1,1), padding = 'same')(conc8) - relu8_2 = LeakyReLU(alpha = alpha)(conv8_2) - Unet_Output = Conv2D(1, kernel_size=(1,1), strides = (1,1), padding='same', activation='sigmoid')(relu8_2) - # model - Unet = Model(Unet_Input, Unet_Output) - return Unet - - -def padding(image): - - ''' The unet expects the height and width of the image to be 256 x 256 - This function adds the required reflective padding to make the image - dimensions a multiple of 256 x 256. This will enable us to extract tiles - of size 256 x 256 which can be processed by the network''' - - row,col,_=image.shape - - # Determine the desired height and width after padding the input image - m,n =math.ceil(row/256),math.ceil(col/256) - required_rows=m*256 - required_cols=n*256 - - - # Check whether the image dimensions are even or odd. If the image dimesions - # are even, then the same amount of padding can be applied to the (top,bottom) - # or (left,right) of the image. - - if row%2==0: - - # no. of rows to be added to the top and bottom of the image - top = int((required_rows-row)/2) - bottom = top - else: - top = int((required_rows-row)/2) - bottom = top+1 - - if col%2==0: - - # no. of columns to be added to left and right of the image - left = int((required_cols-col)/2) - right = left - else: - left = int((required_cols-col)/2) - right = left+1 - - pad_dimensions=(top,bottom,left,right) - - final_image=np.zeros((required_rows,required_cols,3)) - - # Add relective Padding - for i in range(3): - final_image[:,:,i]=cv2.copyMakeBorder(image[:,:,i], top, bottom, left, right, cv2.BORDER_REFLECT) - - # return padded image and pad dimensions - return final_image,pad_dimensions - -def main(): - - # Mute Tensorflow messages - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' - - # Initialize the logger - logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') - logger = logging.getLogger("segment") - logger.setLevel(logging.INFO) - - # Parse the inputs - parser=argparse.ArgumentParser() - parser.add_argument('--batch',dest='batch',type=str,required=True) - parser.add_argument('--outDir',dest='output_directory',type=str,required=True) - args = parser.parse_args() - - # Input and output directory - batch = args.batch.split(',') - output_dir = args.output_directory - - # Start the javabridge with proper java logging - log_config = Path(__file__).parent.joinpath("log4j.properties") - jutil.start_vm(args=["-Dlog4j.configuration=file:{}".format(str(log_config.absolute()))],class_path=bioformats.JARS) - - # Load Model Architecture and model weights - model=unet() - model.load_weights('unet.h5') - - # Iterate over the files to be processed - for filename in batch: - logger.info("Processing image: {}".format(filename)) - - # Use bfio to read the image - bf = bfio.BioReader(filename) - img = bf.read_image() - - # Extract the 2-D grayscale image. Bfio loads an image as a 5-D array. - img=(img[:,:,0,0,0]) - - # The network expects the pixel values to be in the range of (0,1). - # Interpolate the pixel values to (0,1) - img=np.interp(img, (img.min(), img.max()), (0,1)) - - # The network expects a 3 channel image. - img=np.dstack((img,img,img)) - - # Add reflective padding to make the image dimensions a multiple of 256 - - # pad_dimensions will help us extract the final result from the padded output. - padded_img,pad_dimensions=padding(img) - - # Intitialize an emtpy array to store the output from the network - final_img=np.zeros((padded_img.shape[0],padded_img.shape[1])) - - # Extract 256 x 256 tiles from the padded input image. - for i in range(int(padded_img.shape[0]/256)): - for j in range(int(padded_img.shape[1]/256)): - - # tile to be processed - temp_img=padded_img[i*256:(i+1)*256,j*256:(j+1)*256] - inp = np.expand_dims(temp_img, axis=0) - - #predict - x=model.predict(inp) - - # Extract the output image - out=x[0,:,:,0] - - # Store the output tile - final_img[i*256:(i+1)*256,j*256:(j+1)*256]=out - - # get pad dimensions on all 4 sides of the image - top_pad,bottom_pad,left_pad,right_pad=pad_dimensions - - # Extract the Desired output from the padded output - out_image=final_img[top_pad:final_img.shape[0]-bottom_pad,left_pad:final_img.shape[1]-right_pad] - - # Form a binary image - out_image=np.rint(out_image)*255 - out_image = out_image.astype(np.uint8) - - # Convert the output to a 5-D arrray to enable bfio to write the image. - output_image_5channel=np.zeros((out_image.shape[0],out_image.shape[1],1,1,1),dtype='uint8') - output_image_5channel[:,:,0,0,0]=out_image - - # Export the output to the desired directory - bw = bfio.BioWriter(str(Path(output_dir).joinpath(Path(filename).name).absolute()), - metadata=bf.read_metadata()) - bw.write_image(output_image_5channel) - bw.close_image() - - # Stop the VM - jutil.kill_vm() - -if __name__=='__main__': - main() \ No newline at end of file