diff --git a/utils/idr-download-tool/.bumpversion.cfg b/utils/idr-download-tool/.bumpversion.cfg new file mode 100644 index 000000000..2df24080e --- /dev/null +++ b/utils/idr-download-tool/.bumpversion.cfg @@ -0,0 +1,27 @@ +[bumpversion] +current_version = 0.1.0-dev0 +commit = True +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:plugin.json] + +[bumpversion:file:VERSION] + +[bumpversion:file:src/polus/images/utils/idr_download/__init__.py] diff --git a/utils/idr-download-tool/Dockerfile b/utils/idr-download-tool/Dockerfile new file mode 100644 index 000000000..8fb102a3b --- /dev/null +++ b/utils/idr-download-tool/Dockerfile @@ -0,0 +1,28 @@ +FROM continuumio/miniconda3 + +# environment variables defined in polusai/bfio +ENV EXEC_DIR="/opt/executables" +ENV POLUS_IMG_EXT=".ome.tif" +ENV POLUS_TAB_EXT=".csv" +ENV POLUS_LOG="INFO" + +ARG conda_env=myidrenv + +# Work directory defined in the base container +WORKDIR ${EXEC_DIR} + +COPY pyproject.toml ${EXEC_DIR} +COPY environment.yml ${EXEC_DIR} +COPY VERSION ${EXEC_DIR} +COPY README.md ${EXEC_DIR} +COPY src ${EXEC_DIR}/src + +RUN conda env create -f environment.yml + +ENV PATH /opt/conda/envs/myidrenv/bin:$PATH +RUN /bin/bash -c "source activate myidrenv" + +RUN pip3 install ${EXEC_DIR} --no-cache-dir + +ENTRYPOINT ["python3", "-m", "polus.images.utils.idr_download"] +CMD ["--help"] diff --git a/utils/idr-download-tool/README.md b/utils/idr-download-tool/README.md new file mode 100644 index 000000000..139789019 --- /dev/null +++ b/utils/idr-download-tool/README.md @@ -0,0 +1,41 @@ +# Idr Download (v0.1.0-dev0) + +This tool enables the retrieval of data from the [idr](https://idr.openmicroscopy.org/) using the IDR web API. + +## Note +In order to retrieve data from the IDR web server, users need to establish a VPN connection + + +Conda is employed to install all dependencies because one of the critical packages, `omero-py`, encountered installation issues with pip + +Currently, the supported object types in a tool include: `project`, `dataset`, `screen`, `plate`, `well` + + +## Building + +To build the Docker image for the download plugin, run +`bash build-docker.sh`. + +## Run the Docker image + +To execute the built docker image for the download plugin, run +`bash run-plugin.sh`. + +## Options + +This plugin takes 4 input arguments and +1 output argument: + +| Name | Description | I/O | Type | +| --------------- | ------------------------------------------------------------ | ------ | ----------- | +| `--dataType` | Object types to be retreived from Idr Server | Input | String | +| `--name ` | Name of an object | Input | String | +| `--objectId ` | Identification of an object of an object | Input | Integer | +| `--outDir` | Directory to store the downloaded data | Output | genericData | +| `--preview` | Generate a JSON file with outputs | Output | JSON | + + + +## Sample docker command: +```bash +docker run -v /home/ec2-user/data/:/home/ec2-user/data/ polusai/idr-download-tool:0.1.0-dev0 --dataType="plate" --name='plate1_1_013' --outDir=/home/ec2-user/data/output``` diff --git a/utils/idr-download-tool/VERSION b/utils/idr-download-tool/VERSION new file mode 100644 index 000000000..206c0852b --- /dev/null +++ b/utils/idr-download-tool/VERSION @@ -0,0 +1 @@ +0.1.0-dev0 diff --git a/utils/idr-download-tool/build-docker.sh b/utils/idr-download-tool/build-docker.sh new file mode 100644 index 000000000..6c5b121e5 --- /dev/null +++ b/utils/idr-download-tool/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +version=$(" + ] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = "3.9.18" +bfio = {version = "^2.3.6", extras = ["all"]} +typer = "^0.7.0" +pydantic = "^2.5.3" +pandas = "2.2.2" +preadator = "0.4.0-dev2" +tqdm = "^4.66.2" + +[[tool.poetry.source]] +name = "test" +url = "https://test.pypi.org/simple/" +default = false +secondary = true + +[tool.poetry.group.dev.dependencies] +bump2version = "^1.0.1" +flake8 = "^6.0.0" +pre-commit = "^3.2.1" +flake8-docstrings = "^1.7.0" +black = "^23.3.0" +mypy = "^1.1.1" +pytest = "^7.2.2" +ruff = "^0.0.270" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = [ + "." +] diff --git a/utils/idr-download-tool/run-plugin.sh b/utils/idr-download-tool/run-plugin.sh new file mode 100644 index 000000000..ea1cbb645 --- /dev/null +++ b/utils/idr-download-tool/run-plugin.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +version=$( None: + """Retrieve the microscopy image data from the OMERO NCATS server.""" + logger.info(f"--dataType = {data_type}") + logger.info(f"--name = {name}") + logger.info(f"--objectId = {object_id}") + logger.info(f"--outDir = {out_dir}") + + out_dir = out_dir.resolve() + + if not Path(out_dir).exists(): + out_dir.mkdir(exist_ok=True) + + if not preview: + idr.IdrDownload( # noqa :B018 + data_type=data_type.value, + name=name, + object_id=object_id, + out_dir=out_dir, + ).get_data + + else: + ut.generate_preview(out_dir) + + +if __name__ == "__main__": + app() diff --git a/utils/idr-download-tool/src/polus/images/utils/idr_download/idr_api.py b/utils/idr-download-tool/src/polus/images/utils/idr_download/idr_api.py new file mode 100644 index 000000000..9a03e1696 --- /dev/null +++ b/utils/idr-download-tool/src/polus/images/utils/idr_download/idr_api.py @@ -0,0 +1,496 @@ +"""Idr Download Package.""" + +import logging +import os +import warnings +from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import as_completed +from itertools import chain +from itertools import product +from pathlib import Path +from typing import Any +from typing import Optional + +import pandas as pd +import polus.images.utils.idr_download.utils as ut +import preadator +import requests +from omero.gateway import BlitzGateway +from pydantic import BaseModel as V2BaseModel +from tqdm import tqdm + +# Suppress all warnings +warnings.filterwarnings("ignore") + +logger = logging.getLogger(__name__) +logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO)) + +BOOL = False + + +class Connection(V2BaseModel): + """Establishes a connection to an idr api server using BlitzGateway. + + Args: + host: The IP address of the idr server. + username: The username for authentication. + password: The password for authentication. + secure: Secure connection between client and server. + + Returns: + BlitzGateway: A connection object to the IDR server. + """ + + def _authentication(self) -> BlitzGateway: + """Connection to an idr server using BlitzGateway.""" + connection = BlitzGateway( + host=ut.HOST, + username=ut.USPW, + passwd=ut.USPW, + secure=True, + ) + connection.connect() + connection.c.enableKeepAlive(6) + + return connection + + +class Annotations(V2BaseModel): + """Retrieve annotations for each well or image individually.""" + + idx: int + + def well_annotations(self) -> pd.DataFrame: + """Well Annotations.""" + well_url = f"{ut.WELL_URL}{self.idx}/query/?query=Well-{self.idx}" + anno = requests.get(well_url, verify=BOOL, timeout=500).json() + return pd.DataFrame(anno["data"]["rows"], columns=anno["data"]["columns"]) + + def image_annotations(self) -> pd.DataFrame: + """Image Annotations.""" + image_url = ( + f"{ut.BASE_URL}/webclient/api/annotations/?type=map&image={self.idx}" + ) + keys = [] + values = [] + for a in requests.get(image_url, verify=BOOL, timeout=500).json()[ + "annotations" + ]: + for v in a["values"]: + keys.append(v[0]) + values.append(v[1]) + return pd.DataFrame([values], columns=[keys]) + + +class Collection(V2BaseModel): + """Establishes a connection to an idr api server using BlitzGateway. + + Args: + data_type: The supported object types to be retreived. + out_dir: Path to directory outputs. + name: Name of the object to be downloaded. + object_id: Identifier of the object to be downloaded. + """ + + data_type: ut.DATATYPE + out_dir: Path + name: Optional[str] = None + object_id: Optional[int] = None + + +class Project(Collection): + """Obtain the dataset IDs linked to each project.""" + + @property + def get_data(self) -> tuple[list[list[Any]], Path]: + """Retrieve dataset IDs linked to each project.""" + if self.name is not None: + project_dict = ut._all_projects_ids() + project_id = [i["id"] for i in project_dict if i["name"] == self.name] + project_name = [ + i["projectName"] for i in project_dict if i["name"] == self.name + ][0] + + if self.object_id is not None: + project_dict = ut._all_projects_ids() + project_id = [f["id"] for f in project_dict if f["id"] == self.object_id] + project_name = [ + i["projectName"] for i in project_dict if i["id"] == project_id + ][0] + + if len(project_id) == 0: + msg = f"Please provide valid name or id of the {self.data_type}" + raise ValueError(msg) + + dirpath = Path(self.out_dir, project_name) + if not dirpath.exists(): + dirpath.mkdir(parents=True, exist_ok=False) + + conn = Connection()._authentication() + conn.connect() + + dataset_list = [] + for _, pid in enumerate(project_id): + project = conn.getObject(ut.DATATYPE.PROJECT, pid) + datasets = [dataset.getId() for dataset in project.listChildren()] + dataset_list.append(datasets) + + dataset_list = list(chain.from_iterable(dataset_list)) + conn.close() + + return dataset_list, dirpath + + +class Dataset(Collection): + """Download the IDR dataset object.""" + + @property + def get_data(self) -> None: + """Write all the images of the IDR dataset object.""" + conn = Connection()._authentication() + conn.connect() + dataset = conn.getObject(ut.DATATYPE.DATASET, self.object_id) + if dataset.__class__.__name__ == ut.DATATYPE_MAPPING[self.data_type]: + dataset_name = dataset.getName() + dataset_dir = self.out_dir.joinpath(dataset_name) + dataset_dir.mkdir(parents=True, exist_ok=True) + anno_dir = self.out_dir.joinpath(Path("metadata", dataset_name)) + anno_dir.mkdir(parents=True, exist_ok=True) + pd.DataFrame() + with ThreadPoolExecutor(max_workers=15) as executor: + for image in dataset.listChildren(): + image_id = image.getId() + image_name = image.getName().split(".")[0] + df_anno = Annotations(idx=image_id).image_annotations() + df_anno.to_csv( + Path(anno_dir, f"{dataset_name}_{image_name}.csv"), + index=False, + ) + for t, c, z in product( + range(0, image.getSizeT()), + range(0, image.getSizeC()), + range(0, image.getSizeZ()), + ): + pixels = image.getPrimaryPixels().getPlane( + theZ=z, + theC=c, + theT=t, + ) + + imagename = image.getName().split(".")[0] + executor.submit( + ut.saveimage(pixels, imagename, dataset_dir, z, c, t), + ) + + conn.close() + if dataset.__class__.__name__ == "NoneType": + msg = f"Please provide valid name or id of the {self.data_type}" + raise ValueError(msg) + + +class Screen(Collection): + """Obtain the plate IDs and names linked to each screen.""" + + @property + def plates(self) -> tuple[list[dict], Path]: + """Retrieve the plate IDs and names linked to each screen.""" + screen_dict = ut._all_screen_ids() + + if self.name is not None: + screen = [ + {"id": f["id"], "name": f["screenName"]} + for f in screen_dict + if f["name"] == self.name + ][0] + + if self.object_id is not None: + screen = [ + {"id": f["id"], "name": f["screenName"]} + for f in screen_dict + if f["id"] == self.object_id + ][0] + + if len(screen) == 0: + msg = f"Please provide valid name or id of the {self.data_type}" + raise ValueError(msg) + + dirpath = Path(self.out_dir, screen["name"]) + if not dirpath.exists(): + dirpath.mkdir(parents=True, exist_ok=False) + + screen_id = screen["id"] + + plates_url = f"{ut.BASE_URL}/webclient/api/plates/?id={screen_id}" + return [ + {"id": p["id"], "name": p["name"]} + for p in requests.get(plates_url, verify=BOOL, timeout=500).json()["plates"] + ], dirpath + + +class Plate(Collection): + """Download the IDR plate object.""" + + @property + def get_data(self) -> None: + """Save all images from the plate object.""" + conn = Connection()._authentication() + conn.connect() + plate = conn.getObject(ut.DATATYPE.PLATE, self.object_id) + if plate.__class__.__name__ == ut.DATATYPE_MAPPING[self.data_type]: + plate_name = plate.getName() + plate_dir = self.out_dir.joinpath(plate_name) + anno_dir = self.out_dir.joinpath("metadata") + plate_dir.mkdir(parents=True, exist_ok=True) + anno_dir.mkdir(parents=True, exist_ok=True) + df = pd.DataFrame() + with ThreadPoolExecutor(max_workers=ut.NUM_THREADS) as executor: + threads = [] + for _, well in enumerate(plate.listChildren()): + data_type = "well" + if well.__class__.__name__ == ut.DATATYPE_MAPPING[data_type]: + indicies = well.countWellSample() + well_name = well.getWellPos() + well_id = well.getId() + df_anno = Annotations(idx=well_id).well_annotations() + data = [df, df_anno] + df = pd.concat(data, ignore_index=True, sort=False) + df.to_csv(Path(anno_dir, f"{plate_name}.csv"), index=False) + for index in range(0, indicies): + pixels = well.getImage(index).getPrimaryPixels() + for t, c, z in product( + range(0, pixels.getSizeT()), + range(0, pixels.getSizeC()), + range(0, pixels.getSizeZ()), + ): + image = pixels.getPlane(theZ=z, theC=c, theT=t) + threads.append( + executor.submit( + ut.saveimage( + image, + well_name, + plate_dir, + z, + c, + t, + index, + ), + ), + ) + + for future in tqdm( + as_completed(threads), + total=len(threads), + desc="Fetching wells", + ): + plate = future.result() + + conn.close() + + +class Well(Collection): + """Download the IDR well object.""" + + @property + def get_data(self) -> None: + """Save all images from the well object.""" + conn = Connection()._authentication() + conn.connect() + well = conn.getObject(ut.DATATYPE.WELL, self.object_id) + if well.__class__.__name__ == ut.DATATYPE_MAPPING[self.data_type]: + indicies = well.countWellSample() + well_name = well.getWellPos() + well_id = well.getId() + df_anno = Annotations(idx=well_id).well_annotations() + well_dir = self.out_dir.joinpath(well_name) + well_dir.mkdir(parents=True, exist_ok=True) + anno_dir = self.out_dir.joinpath("metadata") + anno_dir.mkdir(parents=True, exist_ok=True) + df_anno.to_csv(Path(anno_dir, f"{well_id}_{well_name}.csv"), index=False) + with ThreadPoolExecutor(max_workers=ut.NUM_THREADS) as executor: + for index in range(0, indicies): + pixels = well.getImage(index).getPrimaryPixels() + for t, c, z in product( + range(0, pixels.getSizeT()), + range(0, pixels.getSizeC()), + range(0, pixels.getSizeZ()), + ): + image = pixels.getPlane(theZ=z, theC=c, theT=t) + executor.submit( + ut.saveimage(image, well_name, well_dir, z, c, t, index), + ) + conn.close() + + +class IdrDownload(Collection): + """Download the IDR objects.""" + + @property + def get_data(self) -> None: # noqa : C901 + """Save all images from the IDR objects.""" + if self.data_type == ut.DATATYPE.SCREEN: + if self.name is not None: + sc, dirpath = Screen( + data_type=self.data_type, + name=self.name, + out_dir=self.out_dir, + ).plates + logger.info(f"Downloading {self.data_type}: name={self.name}") + if self.object_id is not None: + sc, dirpath = Screen( + data_type=self.data_type, + object_id=self.object_id, + out_dir=self.out_dir, + ).plates + logger.info(f"Downloading {self.data_type}: id={self.object_id}") + if self.name is not None and self.object_id is not None: + sc, dirpath = Screen( + data_type=self.data_type, + name=self.name, + object_id=self.object_id, + out_dir=self.out_dir, + ).plates + logger.info( + f"Download {self.data_type}:name={self.name},id={self.object_id}", + ) + if self.name is None and self.object_id is None: + msg = f"Both {self.data_type} name & {self.data_type} id is missing" + raise ValueError(msg) + plate_list = sc + with preadator.ProcessManager( + name="Idr download", + num_processes=4, + threads_per_process=2, + ) as executor: + threads = [] + for _, pl in enumerate(plate_list): + plate_id = pl["id"] + threads.append( + executor.submit( + Plate( + data_type=ut.DATATYPE.PLATE, + object_id=plate_id, + out_dir=dirpath, + ).get_data, + ), + ) + + for f in tqdm( + as_completed(threads), + total=len(threads), + mininterval=5, + desc=f"download plate {plate_id}", + initial=0, + unit_scale=True, + colour="cyan", + ): + f.result() + + if self.data_type == ut.DATATYPE.PLATE: + if self.name is not None: + plate_id = [ + pl["id"] for pl in ut._all_plates_ids() if pl["name"] == self.name + ][0] + Plate( # noqa:B018 + data_type=self.data_type, + object_id=plate_id, + out_dir=self.out_dir, + ).get_data + logger.info(f"Downloading {self.data_type}: name={self.name}") + if self.object_id is not None: + Plate( # noqa:B018 + data_type=self.data_type, + object_id=self.object_id, + out_dir=self.out_dir, + ).get_data + logger.info(f"Downloading {self.data_type}: id={self.object_id}") + if self.name is not None and self.object_id is not None: + Plate( # noqa:B018 + data_type=self.data_type, + name=self.name, + object_id=self.object_id, + out_dir=self.out_dir, + ).get_data + logger.info( + f"Download {self.data_type}:name={self.name},id={self.object_id}", + ) + if self.name is None and self.object_id is None: + msg = f"Both {self.data_type} name & {self.data_type} id are missing" + raise ValueError(msg) + + if self.data_type == ut.DATATYPE.WELL: + if self.object_id is None: + msg = f"Please provide objectID of {self.data_type}" + raise ValueError(msg) + + Well( # noqa:B018 + data_type=self.data_type, + object_id=self.object_id, + out_dir=self.out_dir, + ).get_data + + if self.data_type == ut.DATATYPE.PROJECT: + if self.object_id is None and self.name is None: + msg = f"Both {self.data_type} name & {self.data_type} id are missing" + raise ValueError(msg) + if self.name is not None: + dataset_list, dirpath = Project( + data_type=self.data_type, + name=self.name, + out_dir=self.out_dir, + ).get_data + logger.info(f"Downloading {self.data_type}: name={self.name}") + if self.object_id is not None: + dataset_list, dirpath = Project( + data_type=self.data_type, + object_id=self.object_id, + out_dir=self.out_dir, + ).get_data + logger.info(f"Downloading {self.data_type}: id={self.object_id}") + if self.name is not None and self.object_id is not None: + dataset_list, dirpath = Project( + data_type=self.data_type, + name=self.name, + object_id=self.object_id, + out_dir=self.out_dir, + ).get_data + logger.info( + f"Download {self.data_type}:name={self.name},id={self.object_id}", + ) + + for d in dataset_list: + Dataset( # noqa:B018 # type:ignore + data_type=ut.DATATYPE.DATASET, + object_id=d, + out_dir=dirpath, + ).get_data + + if self.data_type == ut.DATATYPE.DATASET: + if self.object_id is None and self.name is None: + msg = f"Both {self.data_type} name & {self.data_type} id are missing" + raise ValueError(msg) + if self.name is not None: + dataset_ids = ut._all_datasets_ids() + data_id = [d["id"] for d in dataset_ids if d["name"] == self.name][0] + Dataset( # noqa:B018 + data_type=self.data_type, + object_id=data_id, + out_dir=self.out_dir, + ).get_data + logger.info(f"Downloading {self.data_type}: name={self.name}") + if self.object_id is not None: + Dataset( # noqa:B018 + data_type=self.data_type, + object_id=self.object_id, + out_dir=self.out_dir, + ).get_data + logger.info(f"Downloading {self.data_type}: id={self.object_id}") + if self.name is not None and self.object_id is not None: + Dataset( # noqa:B018 + data_type=self.data_type, + name=self.name, + object_id=self.object_id, + out_dir=self.out_dir, + ).get_data + logger.info( + f"Download {self.data_type}:name={self.name},id={self.object_id}", + ) diff --git a/utils/idr-download-tool/src/polus/images/utils/idr_download/utils.py b/utils/idr-download-tool/src/polus/images/utils/idr_download/utils.py new file mode 100644 index 000000000..3eb151811 --- /dev/null +++ b/utils/idr-download-tool/src/polus/images/utils/idr_download/utils.py @@ -0,0 +1,158 @@ +"""Idr Download Package.""" + +import logging +import os +import shutil +import warnings +from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import as_completed +from enum import Enum +from multiprocessing import cpu_count +from pathlib import Path +from typing import Any +from typing import Optional + +import numpy as np +import requests +from bfio import BioWriter +from tqdm import tqdm + +# Suppress all warnings +warnings.filterwarnings("ignore") + +BOOL = False +BASE_URL = "https://idr.openmicroscopy.org" +HOST = "ws://idr.openmicroscopy.org/omero-ws" +USPW = "public" +SCREEN_URL = f"{BASE_URL}/api/v0/m/screens/" +PROJECT_URL = f"{BASE_URL}/api/v0/m/projects/" +PLATES_URL = f"{BASE_URL}/webclient/api/plates/?id=" +WELL_URL = f"{BASE_URL}/webgateway/table/Screen.plateLinks.child.wells/" + +logger = logging.getLogger(__name__) +logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO)) + +NUM_THREADS = max(cpu_count(), 2) + + +class DATATYPE(str, Enum): + """Objects types.""" + + PROJECT = "project" + DATASET = "dataset" + SCREEN = "screen" + PLATE = "plate" + WELL = "well" + Default = "plate" + + +DATATYPE_MAPPING = { + DATATYPE.PROJECT: "_ProjectWrapper", + DATATYPE.PLATE: "_PlateWrapper", + DATATYPE.WELL: "_WellWrapper", + DATATYPE.DATASET: "_DatasetWrapper", + DATATYPE.SCREEN: "_ScreenWrapper", +} + + +def generate_preview( + path: Path, +) -> None: + """Generate preview of the plugin outputs.""" + source_path = Path(__file__).parents[5].joinpath("examples") + shutil.copytree(source_path, path, dirs_exist_ok=True) + + +def _all_screen_ids() -> list[dict]: + """Obtain the screen IDs and names accessible through the IDR Web API.""" + screen_dict = [] + for r in requests.get(SCREEN_URL, verify=BOOL, timeout=500).json()["data"]: + name = r["Name"].split("-")[0] + screen_dict.append({"id": r["@id"], "name": name, "screenName": r["Name"]}) + return screen_dict + + +def _get_plateid(screen_id: str) -> list[dict]: + """Obtain the plate IDs and names accessible through the IDR Web API.""" + plate_dict = [] + url = f"{PLATES_URL}{screen_id}" + for p in requests.get(url, verify=BOOL, timeout=500).json()["plates"]: + new_dict = {"id": p["id"], "name": p["name"]} + plate_dict.append(new_dict) + return plate_dict + + +def _all_plates_ids() -> list[dict]: + """Obtain the plate IDs & names for all screens available on the IDR Web API.""" + screen_ids = [sc["id"] for sc in _all_screen_ids()] + all_plates_ids = [] + with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: + futures = [executor.submit(_get_plateid, plate_id) for plate_id in screen_ids] + for future in tqdm( + as_completed(futures), + total=len(futures), + desc="Fetching plates", + ): + plates = future.result() + all_plates_ids.extend(plates) + return all_plates_ids + + +def _all_projects_ids() -> list[dict]: + """Obtain all project IDs and names accessible through the IDR Web API.""" + return [ + {"id": i["@id"], "name": i["Name"].split("-")[0], "projectName": i["Name"]} + for i in requests.get(PROJECT_URL, verify=BOOL, timeout=500).json()["data"] + ] + + +def _get_datasetid(project_id: str) -> list[dict]: + """Obtain the dataset IDs and names accessible through the IDR Web API.""" + dataset_url = f"{PROJECT_URL}{project_id}/datasets/" + return [ + {"id": i["@id"], "name": i["Name"]} + for i in requests.get(dataset_url, verify=BOOL, timeout=500).json()["data"] + ] + + +def _all_datasets_ids() -> list[list[Any]]: + """Obtain the dataset IDs and names accessible through the IDR Web API.""" + project_ids = [project["id"] for project in _all_projects_ids()] + all_dataset_ids = [] + with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: + futures = [ + executor.submit(_get_datasetid, project_id) for project_id in project_ids + ] + for future in tqdm( + as_completed(futures), + total=len(futures), + desc="Fetching datasets", + ): + datasets = future.result() + all_dataset_ids.extend(datasets) + return all_dataset_ids + + +def write_ometif(image: np.ndarray, out_file: Path) -> None: + """Utilizing BioWriter for writing numpy arrays.""" + with BioWriter(file_path=out_file) as bw: + bw.X = image.shape[1] + bw.Y = image.shape[0] + bw.dtype = image.dtype + bw[:, :, :, :, :] = image + + +def saveimage( # noqa: PLR0913 + image: np.ndarray, + name: str, + dir_name: Path, + z: int, + c: int, + t: int, + index: Optional[int] = 1, +) -> None: + """Generating a single-plane image using BioWriter.""" + name = f"{name}_f{index}_z{z}_t{t}_c{c}.ome.tif" + image_name = Path(dir_name, name) + image = np.expand_dims(image, axis=(2, 3, 4)) + write_ometif(image, image_name) diff --git a/utils/idr-download-tool/tests/__init__.py b/utils/idr-download-tool/tests/__init__.py new file mode 100644 index 000000000..b7eaab70c --- /dev/null +++ b/utils/idr-download-tool/tests/__init__.py @@ -0,0 +1 @@ +"""Idr Download Tool.""" diff --git a/utils/idr-download-tool/tests/conftest.py b/utils/idr-download-tool/tests/conftest.py new file mode 100644 index 000000000..66731070b --- /dev/null +++ b/utils/idr-download-tool/tests/conftest.py @@ -0,0 +1,50 @@ +"""Test fixtures. + +Set up all data used in tests. +""" + +import shutil +import tempfile +from pathlib import Path +from typing import Union + +import pytest +from polus.images.utils.idr_download.utils import DATATYPE + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Add options to pytest.""" + parser.addoption( + "--slow", + action="store_true", + dest="slow", + default=False, + help="run slow tests", + ) + + +def clean_directories() -> None: + """Remove all temporary directories.""" + for d in Path(".").cwd().iterdir(): + if d.is_dir() and d.name.startswith("tmp"): + shutil.rmtree(d) + + +@pytest.fixture() +def output_directory() -> Union[str, Path]: + """Create output directory.""" + return Path(tempfile.mkdtemp(dir=Path.cwd())) + + +@pytest.fixture( + params=[ + (DATATYPE.WELL, None, 1046940), + (DATATYPE.DATASET, "byn", None), + (DATATYPE.SCREEN, "idr0025", None), + (DATATYPE.PROJECT, "idr0146", None), + (DATATYPE.PLATE, "plate1_1_013", None), + ], +) +def get_params(request: pytest.FixtureRequest) -> pytest.FixtureRequest: + """To get the parameter of the fixture.""" + return request.param diff --git a/utils/idr-download-tool/tests/test_cli.py b/utils/idr-download-tool/tests/test_cli.py new file mode 100644 index 000000000..1ce896b11 --- /dev/null +++ b/utils/idr-download-tool/tests/test_cli.py @@ -0,0 +1,33 @@ +"""Test Command line Tool.""" + +from typer.testing import CliRunner +from pathlib import Path +import pytest +from polus.images.utils.idr_download.__main__ import app +from .conftest import clean_directories +import time + + +@pytest.mark.skipif("not config.getoption('slow')") +def test_cli(output_directory: Path, get_params: pytest.FixtureRequest) -> None: + """Test the command line.""" + runner = CliRunner() + data_type, name, object_id = get_params + + result = runner.invoke( + app, + [ + "--dataType", + data_type, + "--name", + name, + "--objectId", + object_id, + "--outDir", + output_directory, + ], + ) + + assert result.exit_code == 0 + time.sleep(5) + clean_directories() diff --git a/utils/idr-download-tool/tests/test_idr_api.py b/utils/idr-download-tool/tests/test_idr_api.py new file mode 100644 index 000000000..d62ba1e04 --- /dev/null +++ b/utils/idr-download-tool/tests/test_idr_api.py @@ -0,0 +1,27 @@ +"""Test Idr download Tool.""" + +from pathlib import Path + +import polus.images.utils.idr_download.idr_api as od +import pytest + +from .conftest import clean_directories + + +@pytest.mark.skipif("not config.getoption('slow')") +def test_idr_download( + output_directory: Path, + get_params: pytest.FixtureRequest, +) -> None: + """Test data from Omero Server.""" + data_type, name, object_id = get_params + model = od.IdrDownload( + data_type=data_type, + name=name, + object_id=object_id, + out_dir=output_directory, + ) + model.get_data() + assert any(output_directory.iterdir()) is True + + clean_directories()