diff --git a/README.md b/README.md index 025f8b2..4f90d96 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,26 @@ [![PyTests](https://github.com/jjjermiah/NBIA-toolkit/actions/workflows/main.yml/badge.svg)](https://github.com/jjjermiah/NBIA-toolkit/actions/workflows/main.yml) [![Documentation Status](https://readthedocs.org/projects/nbia-toolkit/badge/?version=latest)](https://nbia-toolkit.readthedocs.io/en/latest/?badge=latest) -# none of this works yet lol but it will soon # NBIA Toolkit - Packaged code to access the NBIA REST API See Documentation at [NBIA-Toolkit Read The Docs](https://nbia-toolkit.readthedocs.io/en/latest/) + TODO::readthedocs::error in the example first cell + TODO::auth.py::implement better access token handling + TODO::auth.py::implement better error handling + TODO::auth.py::implement refresh token functionality + TODO::auth.py::implement logout functionality + TODO::auth.py::implement encryption for username and password + TODO::nbia.py::implement better error handling + TODO::nbia.py::implement better logging & logger configuration + TODO::nbia.py::enforce type checking for all functions and add type hints + TODO::nbia.py::implement return formats for dict, and pandas.DataFrame + TODO::dicomsort.py::come up with more efficient algorithm for sorting + TODO::dicomsort.py::implement better error handling + TODO::dicomsort.py::come up with solution to only use part of UIDs (last 5 digits)? + + Wiki is empty for now: See the [Wiki](https://github.com/jjjermiah/NBIA-toolkit/wiki) for more information. @@ -22,7 +36,7 @@ $ pip install nbiatoolkit ## Usage -- TODO +See Documentation at [NBIA-Toolkit Read The Docs](https://nbia-toolkit.readthedocs.io/en/latest/) ## Contributing diff --git a/driver.py b/driver.py index d073b90..6f54ef0 100644 --- a/driver.py +++ b/driver.py @@ -1,16 +1,50 @@ from nbiatoolkit import NBIAClient import requests from pprint import pprint +import multiprocessing as mp +from tqdm import tqdm +from tcia_utils import nbia +import logging +USERNAME = "sejinkim" +PASSWORD = "q6mJyLD8cPeGTwg!" -client = NBIAClient(log_level="DEBUG") -# series = client.getSeries(Collection="4D-Lung") -# pprint(series[0]) -# print(type(series[0])) -# print() - -# series0 = series[0]['SeriesInstanceUID'] -series0 = '1.3.6.1.4.1.14519.5.2.1.6834.5010.189721824525842725510380467695' -response = client.downloadSeries( - SeriesInstanceUID = series0, - downloadDir = "/home/bioinf/bhklab/jermiah/projects/NBIA-toolkit/resources") -# pprint(response) + +client = NBIAClient(username=USERNAME, password=PASSWORD) +series = client.getSeries(Collection="RADCURE") + +seriesList = [_["SeriesInstanceUID"] for _ in series] +seriesList = seriesList[0:5] + + +def download(series) -> bool: + response = client.downloadSeries( + SeriesInstanceUID = series, + downloadDir = "/home/bioinf/bhklab/jermiah/projects/rawdata") + return response + +if (True): + # iterate through each series and download + # print out the progressbar + with mp.Pool(processes=30) as pool: + for _ in tqdm(pool.imap_unordered(download, seriesList), total=len(seriesList)): + pass +else: + _log = logging.getLogger(__name__) + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + + USERNAME = "sejinkim" + PASSWORD = "q6mJyLD8cPeGTwg!" + nbia.getToken(user=USERNAME, pw=PASSWORD) + + collection_name = "RADCURE" + metadata = nbia.getSeries(collection = collection_name, format = "df", api_url = "restricted") + + nbia.downloadSeries(series_data = seriesList, path = "/home/bioinf/bhklab/jermiah/projects/rawdata", input_type = "list", api_url = "restricted") +# with mp.Pool(processes=30) as pool: +# for _ in tqdm(pool.imap_unordered(download, seriesList), total=len(seriesList)): +# pass +# python dicomsort.py -u /home/bioinf/bhklab/jermiah/projects/rawdata /home/bioinf/bhklab/jermiah/projects/NBIA-toolkit/resources/rawdata/%PatientID/%StudyDate-%StudyID-%StudyDescription-%StudyInstanceUID/%SeriesNumber-%SeriesDescription-%SeriesIntanceUID/%InstanceNumber-%SOPInstanceUID.dcm + + + \ No newline at end of file diff --git a/src/nbiatoolkit/nbia.py b/src/nbiatoolkit/nbia.py index fd6d175..25dca87 100644 --- a/src/nbiatoolkit/nbia.py +++ b/src/nbiatoolkit/nbia.py @@ -3,6 +3,9 @@ from nbiatoolkit.utils.logger import setup_logger import requests from requests.exceptions import JSONDecodeError as JSONDecodeError +import io, zipfile, os +import hashlib + class NBIAClient: """ TODO:: Add docstring @@ -20,7 +23,7 @@ def __init__(self, name = "NBIAClient", console_logging=True, log_level=log_level) # Setup OAuth2 client - self.logger.info("Setting up OAuth2 client... with username %s", username) + self.logger.debug("Setting up OAuth2 client... with username %s", username) self._oauth2_client = OAuth2(username=username, password=password) self.api_headers = self._oauth2_client.getToken() @@ -28,7 +31,7 @@ def query_api(self, endpoint: NBIA_ENDPOINTS, params: dict = {}) -> dict: base_url = "https://services.cancerimagingarchive.net/nbia-api/services/" query_url = base_url + endpoint.value - self.logger.info("Querying API endpoint: %s", query_url) + self.logger.debug("Querying API endpoint: %s", query_url) self.logger.debug("API headers: %s", (self._createDebugURL(endpoint, params))) try: @@ -137,9 +140,9 @@ def getSeries(self, def downloadSeries(self, SeriesInstanceUID: str, downloadDir: str, - ) -> list: + ) -> bool: + - import io, zipfile, os params = dict() params["SeriesInstanceUID"] = SeriesInstanceUID @@ -160,14 +163,14 @@ def downloadSeries(self, # Log error or raise an exception pass - return response + return True def _calculateMD5(self, filepath: str ) -> str: - import hashlib + hash_md5 = hashlib.md5() with open(filepath, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): @@ -177,7 +180,7 @@ def _calculateMD5(self, def validateMD5(self, seriesDir: str ) -> bool: - import os + md5File = os.path.join(seriesDir, "md5hashes.csv") assert os.path.isfile(md5File), "MD5 hash file not found in download directory."