Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kaggle nucleiseg #523

Merged
merged 3 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions segmentation/kaggle-nuclei-segmentation-tool/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[bumpversion]
current_version = 0.1.5-dev1
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{dev}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = _
first_value = dev
values =
dev
_

[bumpversion:part:dev]

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

[bumpversion:file:VERSION]

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]

[bumpversion:file:src/polus/images/segmentation/kaggle_nuclei_segmentation/__init__.py]
4 changes: 4 additions & 0 deletions segmentation/kaggle-nuclei-segmentation-tool/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.venv
out
tests
__pycache__
1 change: 1 addition & 0 deletions segmentation/kaggle-nuclei-segmentation-tool/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry.lock
5 changes: 5 additions & 0 deletions segmentation/kaggle-nuclei-segmentation-tool/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CHANGELOG

## 0.1.5-dev1

Initial release.
23 changes: 23 additions & 0 deletions segmentation/kaggle-nuclei-segmentation-tool/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM polusai/bfio:2.1.9

# environment variables defined in polusai/bfio
ENV EXEC_DIR="/opt/executables"
ENV POLUS_IMG_EXT=".ome.tif"
ENV POLUS_TAB_EXT=".csv"
ENV POLUS_LOG="INFO"


# Work directory defined in the base container
WORKDIR ${EXEC_DIR}

COPY pyproject.toml ${EXEC_DIR}
COPY VERSION ${EXEC_DIR}
COPY README.md ${EXEC_DIR}
COPY src ${EXEC_DIR}/src

RUN pip3 install ${EXEC_DIR} --no-cache-dir


# Default command. Additional arguments are provided through the command line
ENTRYPOINT ["python3", "-m", "polus.images.segmentation.kaggle_nuclei_segmentation"]
CMD ["--help"]
29 changes: 29 additions & 0 deletions segmentation/kaggle-nuclei-segmentation-tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Kaggle Nuclei Segmentation (v0.1.5-dev1)

Segments cell nuclei using U-Net in Tensorflow.

# Reference
Credits neural network architecture and pretrained model weights : https://github.com/axium/Data-Science-Bowl-2018

# Description
This WIPP plugin segments cell nuclei using U-Net in Tensorflow. Neural net architecture and pretrained weights are taken from Data Science Bowl 2018 entry by Muhammad Asim (reference given above). The unet expects the input height and width to be 256 pixels. To ensure that the plugin is able to handle images of all sizes, it adds reflective padding to the input to make the dimensions a multiple of 256. Following this a loop extracts 256x256 tiles to be processed by the network. In the end it untiles and removes padding from the output.

## Building

To build the Docker image for the conversion plugin, run `./build-docker.sh`.

## Install WIPP Plugin

If WIPP is running, navigate to the plugins page and add a new plugin. Paste the
contents of `plugin.json` into the pop-up window and submit.

## Options

This plugin takes 3 input arguments and 1 output argument:

| Name | Description | I/O | Type | Default |
|---------------|-------------------------|--------|--------|--------- |
| `--inpDir` | Input image collection to be processed by this plugin | Input | collection |
| `--filePattern` | Filename pattern used to separate data | Input | string | .* |
| `--outDir` | Output collection | Output | collection |
| `--preview` | Generate an output preview | Input | boolean | False |
1 change: 1 addition & 0 deletions segmentation/kaggle-nuclei-segmentation-tool/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.5-dev1
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

version=$(<VERSION)
docker build . -t polusai/kaggle-nuclei-segmentation-tool:${version}
63 changes: 63 additions & 0 deletions segmentation/kaggle-nuclei-segmentation-tool/plugin.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"name": "Kaggle Nuclei Segmentation",
"version": "0.1.5-dev1",
"title": "Kaggle Nuclei Segmentation",
"description": "Segments cell nuclei using U-Net in Tensorflow. Neural net architecture and pretrained weights are taken from Data Science Bowl 2018 entry by Muhammad Asim",
"author": "Gauhar Bains, Konstantin Taletskiy,Nick Schaub , Hamdah Shafqat Abbasi (gauhar.bains@labshare.org, Konstantin.Taletskiy@labshare.org, nick.schaub@nih.gov, hamdahshafqat.abbasi@nih.gov)",
"institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
"repository": "https://github.com/PolusAI/image-tools",
"website": "https://ncats.nih.gov/preclinical/core/informatics",
"citation": "",
"containerId": "polusai/kaggle-nuclei-segmentation-tool:0.1.5-dev1",
"baseCommand": [
"python3",
"-m",
"polus.images.segmentation.kaggle_nuclei_segmentation"
],
"inputs": [
{
"name": "inpDir",
"type": "collection",
"description": "Input image collection to be processed by this plugin",
"required": true
},
{
"name": "filePattern",
"type": "string",
"description": "Filename pattern used to separate data",
"required": false,
"default": ".*"
},
{
"name": "preview",
"type": "boolean",
"description": "Generate an output preview.",
"required": false
}
],
"outputs": [
{
"name": "outDir",
"type": "collection",
"description": "Output collection"
}
],
"ui": [
{
"key": "inputs.inpDir",
"title": "Input collection",
"description": "Input image collection to be processed by this plugin"
},
{
"key": "inputs.filePattern",
"title": "Filename pattern",
"description": "Filename pattern used to separate data",
"default": ".*"
},
{
"key": "inputs.preview",
"title": "preview",
"description": "Generate an output preview."
}
]
}
35 changes: 35 additions & 0 deletions segmentation/kaggle-nuclei-segmentation-tool/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[tool.poetry]
name = "polus-images-segmentation-kaggle-nuclei-segmentation"
version = "0.1.5-dev1"
description = "Segments cell nuclei using U-Net in Tensorflow. Neural net architecture and pretrained weights are taken from Data Science Bowl 2018 entry by Muhammad Asim"
authors = ["Gauhar Bains, Konstantin Taletskiy,Nick Schaub , Hamdah Shafqat Abbasi <gauhar.bains@labshare.org, Konstantin.Taletskiy@labshare.org, nick.schaub@nih.gov, hamdahshafqat.abbasi@nih.gov>"]
readme = "README.md"
packages = [{include = "polus", from = "src"}]

[tool.poetry.dependencies]
python = ">=3.9,<3.12"
bfio = {version = "2.1.9", extras = ["all"]}
filepattern = ">=2.0.4,<3.0"
typer = "^0.7.0"
tensorflow = "^2.15.0"
opencv-python-headless = "^4.9.0.80"

[tool.poetry.group.dev.dependencies]
bump2version = "^1.0.1"
pytest = "^7.4"
pytest-sugar = "^0.9.6"
pre-commit = "^3.2.1"
black = "^23.3.0"
mypy = "^1.1.1"
ruff = "^0.0.270"
ipykernel = "^6.29.0"
scikit-image = "0.19.3"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
pythonpath = [
"."
]
20 changes: 20 additions & 0 deletions segmentation/kaggle-nuclei-segmentation-tool/run-plugin.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

version=$(<VERSION)

# Update with your data
inpDir=/tmp/path/to/input
outDir=/tmp/path/to/output
filepattern=".*"


container_input_dir="/inpDir"
container_output_dir="/outDir"

docker run -v $inpDir:/${container_input_dir} \
-v $outDir:/${container_output_dir} \
--user $(id -u):$(id -g) \
polusai/kaggle-nuclei-segmentation-plugin:${version} \
--inpDir ${container_input_dir} \
--filePattern ${filepattern} \
--outDir ${container_output_dir}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Kaggle Nuclei Segmentation."""

__version__ = "0.1.5-dev"
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""Kaggle Nuclei Segmentation."""

# Base packages
import json
import logging
from os import environ
from pathlib import Path
from typing import Any

import filepattern as fp
import typer
from polus.images.segmentation.kaggle_nuclei_segmentation.segment import segment

logging.basicConfig(
format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
datefmt="%d-%b-%y %H:%M:%S",
)
POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO"))
logger = logging.getLogger("polus.images.segmentation.kaggle_nuclei_segmentation")
logger.setLevel(POLUS_LOG)
POLUS_IMG_EXT = environ.get("POLUS_IMG_EXT", ".ome.tif")
BATCH_SIZE = 20
app = typer.Typer(help="Kaggle Nuclei Segmentation.")


def generate_preview(
inp_dir: Path,
file_pattern: str,
out_dir: Path,
) -> None:
"""Generate preview of the plugin outputs."""
fps = fp.FilePattern(inp_dir, file_pattern)
out_file = out_dir.joinpath("preview.json")
with Path.open(out_file, "w") as jfile:
out_json: dict[str, Any] = {
"filepattern": file_pattern,
"outDir": [],
}
for file in fps():
out_name = str(file[1][0])
out_json["outDir"].append(out_name)
json.dump(out_json, jfile, indent=2)
logger.info(f"generating preview data in {out_dir}")


@app.command()
def main(
inp_dir: Path = typer.Option(
...,
"--inpDir",
"-i",
help="Input directory to be processed.",
exists=True,
readable=True,
file_okay=False,
resolve_path=True,
),
file_pattern: str = typer.Option(
".*",
"--filePattern",
"-f",
help="Filepattern used to filter inputs.",
),
out_dir: Path = typer.Option(
...,
"--outDir",
"-o",
help="Output directory.",
exists=True,
writable=True,
file_okay=False,
resolve_path=True,
),
preview: bool = typer.Option(
False,
"--preview",
"-v",
help="Preview of expected outputs (dry-run)",
show_default=False,
),
) -> None:
"""Kaggle Nuclei Segmentation."""
logger.info(f"inpDir: {inp_dir}")
logger.info(f"filePattern: {file_pattern}")
logger.info(f"outDir: {out_dir}")

if preview:
generate_preview(inp_dir, file_pattern, out_dir)
logger.info(f"generating preview data in : {out_dir}.")

if not preview:
fps = fp.FilePattern(inp_dir, file_pattern)
files = [str(file[1][0]) for file in fps()]
for ind in range(0, len(files), BATCH_SIZE):
logger.info("{:.2f}% complete...".format(100 * ind / len(files)))
batch = ",".join(files[ind : min([ind + BATCH_SIZE, len(files)])])
segment(batch, out_dir)

logger.info("100% complete...")


if __name__ == "__main__":
app()
Loading
Loading