From d101983255f668b68ddfdc838f7ed488d08ecd6e Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 13 Mar 2024 16:27:23 +0000 Subject: [PATCH 01/10] whitespace --- Wrappers/Python/cil/utilities/dataexample.py | 34 +++++++++--------- Wrappers/Python/test/test_dataexample.py | 36 ++++++++++---------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/Wrappers/Python/cil/utilities/dataexample.py b/Wrappers/Python/cil/utilities/dataexample.py index a6a4f020e9..18f3374a97 100644 --- a/Wrappers/Python/cil/utilities/dataexample.py +++ b/Wrappers/Python/cil/utilities/dataexample.py @@ -33,7 +33,7 @@ class DATA(object): @classmethod def dfile(cls): return None - + class CILDATA(DATA): data_dir = os.path.abspath(os.path.join(sys.prefix, 'share','cil')) @classmethod @@ -41,9 +41,9 @@ def get(cls, size=None, scale=(0,1), **kwargs): ddir = kwargs.get('data_dir', CILDATA.data_dir) loader = TestData(data_dir=ddir) return loader.load(cls.dfile(), size, scale, **kwargs) - + class REMOTEDATA(DATA): - + FOLDER = '' URL = '' FILE_SIZE = '' @@ -56,7 +56,7 @@ def get(cls, data_dir): def _download_and_extract_from_url(cls, data_dir): with urlopen(cls.URL) as response: with BytesIO(response.read()) as bytes, ZipFile(bytes) as zipfile: - zipfile.extractall(path = data_dir) + zipfile.extractall(path = data_dir) @classmethod def download_data(cls, data_dir): @@ -72,8 +72,8 @@ def download_data(cls, data_dir): if os.path.isdir(os.path.join(data_dir, cls.FOLDER)): print("Dataset already exists in " + data_dir) else: - if input("Are you sure you want to download " + cls.FILE_SIZE + " dataset from " + cls.URL + " ? (y/n)") == "y": - print('Downloading dataset from ' + cls.URL) + if input("Are you sure you want to download " + cls.FILE_SIZE + " dataset from " + cls.URL + " ? (y/n)") == "y": + print('Downloading dataset from ' + cls.URL) cls._download_and_extract_from_url(os.path.join(data_dir,cls.FOLDER)) print('Download complete') else: @@ -185,15 +185,15 @@ def get(cls, **kwargs): ------- ImageData The simulated spheres volume - ''' + ''' ddir = kwargs.get('data_dir', CILDATA.data_dir) loader = NEXUSDataReader() loader.set_up(file_name=os.path.join(os.path.abspath(ddir), 'sim_volume.nxs')) return loader.read() - + class WALNUT(REMOTEDATA): ''' - A microcomputed tomography dataset of a walnut from https://zenodo.org/records/4822516 + A microcomputed tomography dataset of a walnut from https://zenodo.org/records/4822516 ''' FOLDER = 'walnut' URL = 'https://zenodo.org/record/4822516/files/walnut.zip' @@ -202,7 +202,7 @@ class WALNUT(REMOTEDATA): @classmethod def get(cls, data_dir): ''' - A microcomputed tomography dataset of a walnut from https://zenodo.org/records/4822516 + A microcomputed tomography dataset of a walnut from https://zenodo.org/records/4822516 This function returns the raw projection data from the .txrm file Parameters @@ -222,19 +222,19 @@ def get(cls, data_dir): except(FileNotFoundError): raise(FileNotFoundError("Dataset .txrm file not found in specifed data_dir: {} \n \ Specify a different data_dir or download data with dataexample.{}.download_data(data_dir)".format(filepath, cls.__name__))) - + class USB(REMOTEDATA): ''' - A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516 + A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516 ''' - FOLDER = 'USB' + FOLDER = 'USB' URL = 'https://zenodo.org/record/4822516/files/usb.zip' FILE_SIZE = '3.2 GB' @classmethod def get(cls, data_dir): ''' - A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516 + A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516 This function returns the raw projection data from the .txrm file Parameters @@ -254,7 +254,7 @@ def get(cls, data_dir): except(FileNotFoundError): raise(FileNotFoundError("Dataset .txrm file not found in: {} \n \ Specify a different data_dir or download data with dataexample.{}.download_data(data_dir)".format(filepath, cls.__name__))) - + class KORN(REMOTEDATA): ''' A microcomputed tomography dataset of a sunflower seeds in a box from https://zenodo.org/records/6874123 @@ -319,7 +319,7 @@ class TestData(object): def __init__(self, data_dir): self.data_dir = data_dir - + def load(self, which, size=None, scale=(0,1), **kwargs): ''' Return a test data of the requested image @@ -645,4 +645,4 @@ def scikit_random_noise(image, mode='gaussian', seed=None, clip=True, **kwargs): if clip: out = np.clip(out, low_clip, 1.0) - return out \ No newline at end of file + return out diff --git a/Wrappers/Python/test/test_dataexample.py b/Wrappers/Python/test/test_dataexample.py index 2baf4eaad4..3eaa003133 100644 --- a/Wrappers/Python/test/test_dataexample.py +++ b/Wrappers/Python/test/test_dataexample.py @@ -25,7 +25,7 @@ from testclass import CCPiTestClass import platform import numpy as np -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock from urllib import request from zipfile import ZipFile from io import StringIO @@ -151,26 +151,26 @@ def test_load_SIMULATED_CONE_BEAM_DATA(self): .set_panel((128,128),(64,64))\ .set_angles(np.linspace(0,360,300,False)) - self.assertEqual(ag_expected,image.geometry,msg="Acquisition geometry mismatch") + self.assertEqual(ag_expected,image.geometry,msg="Acquisition geometry mismatch") class TestRemoteData(unittest.TestCase): def setUp(self): - + self.data_list = ['WALNUT','USB','KORN','SANDSTONE'] self.tmp_file = 'tmp.txt' self.tmp_zip = 'tmp.zip' with ZipFile(self.tmp_zip, 'w') as zipped_file: zipped_file.writestr(self.tmp_file, np.array([1, 2, 3])) with open(self.tmp_zip, 'rb') as zipped_file: - self.zipped_bytes = zipped_file.read() - + self.zipped_bytes = zipped_file.read() + def tearDown(self): for data in self.data_list: test_func = getattr(dataexample, data) if os.path.exists(os.path.join(test_func.FOLDER)): shutil.rmtree(test_func.FOLDER) - + if os.path.exists(self.tmp_zip): os.remove(self.tmp_zip) @@ -189,7 +189,7 @@ def test_unzip_remote_data(self, mock_urlopen): dataexample.REMOTEDATA._download_and_extract_from_url('.') self.assertTrue(os.path.isfile(self.tmp_file)) - @patch('cil.utilities.dataexample.input', return_value='n') + @patch('cil.utilities.dataexample.input', return_value='n') @patch('cil.utilities.dataexample.urlopen') def test_download_data_input_n(self, mock_urlopen, input): self.mock_urlopen(mock_urlopen) @@ -197,31 +197,31 @@ def test_download_data_input_n(self, mock_urlopen, input): data_list = ['WALNUT','USB','KORN','SANDSTONE'] for data in data_list: # redirect print output - capturedOutput = StringIO() - sys.stdout = capturedOutput + capturedOutput = StringIO() + sys.stdout = capturedOutput test_func = getattr(dataexample, data) test_func.download_data('.') - + self.assertFalse(os.path.isfile(self.tmp_file)) self.assertEqual(capturedOutput.getvalue(),'Download cancelled\n') - + # return to standard print output - sys.stdout = sys.__stdout__ + sys.stdout = sys.__stdout__ - @patch('cil.utilities.dataexample.input', return_value='y') + @patch('cil.utilities.dataexample.input', return_value='y') @patch('cil.utilities.dataexample.urlopen') def test_download_data_input_y(self, mock_urlopen, input): self.mock_urlopen(mock_urlopen) # redirect print output - capturedOutput = StringIO() - sys.stdout = capturedOutput + capturedOutput = StringIO() + sys.stdout = capturedOutput + - for data in self.data_list: test_func = getattr(dataexample, data) test_func.download_data('.') self.assertTrue(os.path.isfile(os.path.join(test_func.FOLDER,self.tmp_file))) - + # return to standard print output - sys.stdout = sys.__stdout__ + sys.stdout = sys.__stdout__ From e75fa3c284ff5583e0decefa44095333f2cdbbb1 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 13 Mar 2024 16:29:20 +0000 Subject: [PATCH 02/10] more OOP --- Wrappers/Python/cil/utilities/dataexample.py | 536 ++++++++----------- Wrappers/Python/test/test_dataexample.py | 2 +- 2 files changed, 213 insertions(+), 325 deletions(-) diff --git a/Wrappers/Python/cil/utilities/dataexample.py b/Wrappers/Python/cil/utilities/dataexample.py index 18f3374a97..e65d224e5a 100644 --- a/Wrappers/Python/cil/utilities/dataexample.py +++ b/Wrappers/Python/cil/utilities/dataexample.py @@ -16,7 +16,7 @@ # Authors: # CIL Developers, listed at: https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt -from cil.framework import ImageData, ImageGeometry, DataContainer +from cil.framework import ImageGeometry import numpy import numpy as np from PIL import Image @@ -26,301 +26,38 @@ from zipfile import ZipFile from urllib.request import urlopen from io import BytesIO -from scipy.io import loadmat from cil.io import NEXUSDataReader, NikonDataReader, ZEISSDataReader +from abc import ABC -class DATA(object): - @classmethod - def dfile(cls): - return None - -class CILDATA(DATA): - data_dir = os.path.abspath(os.path.join(sys.prefix, 'share','cil')) - @classmethod - def get(cls, size=None, scale=(0,1), **kwargs): - ddir = kwargs.get('data_dir', CILDATA.data_dir) - loader = TestData(data_dir=ddir) - return loader.load(cls.dfile(), size, scale, **kwargs) - -class REMOTEDATA(DATA): - - FOLDER = '' - URL = '' - FILE_SIZE = '' - - @classmethod - def get(cls, data_dir): - return None - - @classmethod - def _download_and_extract_from_url(cls, data_dir): - with urlopen(cls.URL) as response: - with BytesIO(response.read()) as bytes, ZipFile(bytes) as zipfile: - zipfile.extractall(path = data_dir) - - @classmethod - def download_data(cls, data_dir): - ''' - Download a dataset from a remote repository - - Parameters - ---------- - data_dir: str, optional - The path to the data directory where the downloaded data should be stored - - ''' - if os.path.isdir(os.path.join(data_dir, cls.FOLDER)): - print("Dataset already exists in " + data_dir) - else: - if input("Are you sure you want to download " + cls.FILE_SIZE + " dataset from " + cls.URL + " ? (y/n)") == "y": - print('Downloading dataset from ' + cls.URL) - cls._download_and_extract_from_url(os.path.join(data_dir,cls.FOLDER)) - print('Download complete') - else: - print('Download cancelled') - -class BOAT(CILDATA): - @classmethod - def dfile(cls): - return TestData.BOAT -class CAMERA(CILDATA): - @classmethod - def dfile(cls): - return TestData.CAMERA -class PEPPERS(CILDATA): - @classmethod - def dfile(cls): - return TestData.PEPPERS -class RESOLUTION_CHART(CILDATA): - @classmethod - def dfile(cls): - return TestData.RESOLUTION_CHART -class SIMPLE_PHANTOM_2D(CILDATA): - @classmethod - def dfile(cls): - return TestData.SIMPLE_PHANTOM_2D -class SHAPES(CILDATA): - @classmethod - def dfile(cls): - return TestData.SHAPES -class RAINBOW(CILDATA): - @classmethod - def dfile(cls): - return TestData.RAINBOW -class SYNCHROTRON_PARALLEL_BEAM_DATA(CILDATA): - @classmethod - def get(cls, **kwargs): - ''' - A DLS dataset - - Parameters - ---------- - data_dir: str, optional - The path to the data directory - - Returns - ------- - AcquisitionData - The DLS dataset - ''' - - ddir = kwargs.get('data_dir', CILDATA.data_dir) - loader = NEXUSDataReader() - loader.set_up(file_name=os.path.join(os.path.abspath(ddir), '24737_fd_normalised.nxs')) - return loader.read() -class SIMULATED_PARALLEL_BEAM_DATA(CILDATA): - @classmethod - def get(cls, **kwargs): - ''' - A simulated parallel-beam dataset generated from SIMULATED_SPHERE_VOLUME - - Parameters - ---------- - data_dir: str, optional - The path to the data directory - - Returns - ------- - AcquisitionData - The simulated spheres dataset - ''' - - ddir = kwargs.get('data_dir', CILDATA.data_dir) - loader = NEXUSDataReader() - loader.set_up(file_name=os.path.join(os.path.abspath(ddir), 'sim_parallel_beam.nxs')) - return loader.read() -class SIMULATED_CONE_BEAM_DATA(CILDATA): - @classmethod - def get(cls, **kwargs): - ''' - A cone-beam dataset generated from SIMULATED_SPHERE_VOLUME - - Parameters - ---------- - data_dir: str, optional - The path to the data directory - - Returns - ------- - AcquisitionData - The simulated spheres dataset - ''' - - ddir = kwargs.get('data_dir', CILDATA.data_dir) - loader = NEXUSDataReader() - loader.set_up(file_name=os.path.join(os.path.abspath(ddir), 'sim_cone_beam.nxs')) - return loader.read() -class SIMULATED_SPHERE_VOLUME(CILDATA): - @classmethod - def get(cls, **kwargs): - ''' - A simulated volume of spheres - - Parameters - ---------- - data_dir: str, optional - The path to the data directory - - Returns - ------- - ImageData - The simulated spheres volume - ''' - ddir = kwargs.get('data_dir', CILDATA.data_dir) - loader = NEXUSDataReader() - loader.set_up(file_name=os.path.join(os.path.abspath(ddir), 'sim_volume.nxs')) - return loader.read() - -class WALNUT(REMOTEDATA): - ''' - A microcomputed tomography dataset of a walnut from https://zenodo.org/records/4822516 - ''' - FOLDER = 'walnut' - URL = 'https://zenodo.org/record/4822516/files/walnut.zip' - FILE_SIZE = '6.4 GB' - - @classmethod - def get(cls, data_dir): - ''' - A microcomputed tomography dataset of a walnut from https://zenodo.org/records/4822516 - This function returns the raw projection data from the .txrm file - - Parameters - ---------- - data_dir: str - The path to the directory where the dataset is stored. Data can be downloaded with dataexample.WALNUT.download_data(data_dir) - - Returns - ------- - ImageData - The walnut dataset - ''' - filepath = os.path.join(data_dir, cls.FOLDER, 'valnut','valnut_2014-03-21_643_28','tomo-A','valnut_tomo-A.txrm') - try: - loader = ZEISSDataReader(file_name=filepath) - return loader.read() - except(FileNotFoundError): - raise(FileNotFoundError("Dataset .txrm file not found in specifed data_dir: {} \n \ - Specify a different data_dir or download data with dataexample.{}.download_data(data_dir)".format(filepath, cls.__name__))) +DEFAULT_DATA_DIR = os.path.abspath(os.path.join(sys.prefix, 'share', 'cil')) -class USB(REMOTEDATA): - ''' - A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516 - ''' - FOLDER = 'USB' - URL = 'https://zenodo.org/record/4822516/files/usb.zip' - FILE_SIZE = '3.2 GB' +class TestData: + '''Provides 6 datasets: - @classmethod - def get(cls, data_dir): - ''' - A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516 - This function returns the raw projection data from the .txrm file - - Parameters - ---------- - data_dir: str - The path to the directory where the dataset is stored. Data can be downloaded with dataexample.WALNUT.download_data(data_dir) - - Returns - ------- - ImageData - The usb dataset - ''' - filepath = os.path.join(data_dir, cls.FOLDER, 'gruppe 4','gruppe 4_2014-03-20_1404_12','tomo-A','gruppe 4_tomo-A.txrm') - try: - loader = ZEISSDataReader(file_name=filepath) - return loader.read() - except(FileNotFoundError): - raise(FileNotFoundError("Dataset .txrm file not found in: {} \n \ - Specify a different data_dir or download data with dataexample.{}.download_data(data_dir)".format(filepath, cls.__name__))) - -class KORN(REMOTEDATA): - ''' - A microcomputed tomography dataset of a sunflower seeds in a box from https://zenodo.org/records/6874123 + BOAT: 'boat.tiff' + CAMERA: 'camera.png' + PEPPERS: 'peppers.tiff' + RESOLUTION_CHART: 'resolution_chart.tiff' + SIMPLE_PHANTOM_2D: 'hotdog' + SHAPES: 'shapes.png' + RAINBOW: 'rainbow.png' ''' - FOLDER = 'korn' - URL = 'https://zenodo.org/record/6874123/files/korn.zip' - FILE_SIZE = '2.9 GB' - - @classmethod - def get(cls, data_dir): - ''' - A microcomputed tomography dataset of a sunflower seeds in a box from https://zenodo.org/records/6874123 - This function returns the raw projection data from the .xtekct file - - Parameters - ---------- - data_dir: str - The path to the directory where the dataset is stored. Data can be downloaded with dataexample.KORN.download_data(data_dir) - - Returns - ------- - ImageData - The korn dataset - ''' - filepath = os.path.join(data_dir, cls.FOLDER, 'Korn i kasse','47209 testscan korn01_recon.xtekct') - try: - loader = NikonDataReader(file_name=filepath) - return loader.read() - except(FileNotFoundError): - raise(FileNotFoundError("Dataset .xtekct file not found in: {} \n \ - Specify a different data_dir or download data with dataexample.{}.download_data(data_dir)".format(filepath, cls.__name__))) - - -class SANDSTONE(REMOTEDATA): - ''' - A synchrotron x-ray tomography dataset of sandstone from https://zenodo.org/records/4912435 - A small subset of the data containing selected projections and 4 slices of the reconstruction - ''' - FOLDER = 'sandstone' - URL = 'https://zenodo.org/records/4912435/files/small.zip' - FILE_SIZE = '227 MB' - -class TestData(object): - '''Class to return test data - - provides 6 dataset: BOAT = 'boat.tiff' CAMERA = 'camera.png' PEPPERS = 'peppers.tiff' RESOLUTION_CHART = 'resolution_chart.tiff' SIMPLE_PHANTOM_2D = 'hotdog' - SHAPES = 'shapes.png' + SHAPES = 'shapes.png' RAINBOW = 'rainbow.png' - ''' - BOAT = 'boat.tiff' - CAMERA = 'camera.png' - PEPPERS = 'peppers.tiff' - RESOLUTION_CHART = 'resolution_chart.tiff' - SIMPLE_PHANTOM_2D = 'hotdog' - SHAPES = 'shapes.png' - RAINBOW = 'rainbow.png' + + @classmethod + def _datasets(cls): + return {cls.BOAT, cls.CAMERA, cls.PEPPERS, cls.RESOLUTION_CHART, cls.SIMPLE_PHANTOM_2D, cls.SHAPES, cls.RAINBOW} def __init__(self, data_dir): self.data_dir = data_dir - def load(self, which, size=None, scale=(0,1), **kwargs): + def load(self, which, size=None, scale=None): ''' Return a test data of the requested image @@ -338,52 +75,28 @@ def load(self, which, size=None, scale=(0,1), **kwargs): ImageData The simulated spheres volume ''' - if which not in [TestData.BOAT, TestData.CAMERA, - TestData.PEPPERS, TestData.RESOLUTION_CHART, - TestData.SIMPLE_PHANTOM_2D, TestData.SHAPES, - TestData.RAINBOW]: - raise ValueError('Unknown TestData {}.'.format(which)) + if scale is None: + scale = 0, 1 + if which not in self._datasets(): + raise KeyError(f"Unknown TestData: {which}") if which == TestData.SIMPLE_PHANTOM_2D: - if size is None: - N = 512 - M = 512 - else: - N = size[0] - M = size[1] - + N, M = (512, 512) if size is None else (size[0], size[1]) sdata = numpy.zeros((N, M)) sdata[int(round(N/4)):int(round(3*N/4)), int(round(M/4)):int(round(3*M/4))] = 0.5 sdata[int(round(N/8)):int(round(7*N/8)), int(round(3*M/8)):int(round(5*M/8))] = 1 ig = ImageGeometry(voxel_num_x = M, voxel_num_y = N, dimension_labels=[ImageGeometry.HORIZONTAL_Y, ImageGeometry.HORIZONTAL_X]) data = ig.allocate() data.fill(sdata) - elif which == TestData.SHAPES: - with Image.open(os.path.join(self.data_dir, which)) as f: - - if size is None: - N = 200 - M = 300 - else: - N = size[0] - M = size[1] - + N, M = (200, 300) if size is None else (size[0], size[1]) ig = ImageGeometry(voxel_num_x = M, voxel_num_y = N, dimension_labels=[ImageGeometry.HORIZONTAL_Y, ImageGeometry.HORIZONTAL_X]) data = ig.allocate() tmp = numpy.array(f.convert('L').resize((M,N))) data.fill(tmp/numpy.max(tmp)) - else: with Image.open(os.path.join(self.data_dir, which)) as tmp: - - if size is None: - N = tmp.size[1] - M = tmp.size[0] - else: - N = size[0] - M = size[1] - + N, M = (tmp.size[1], tmp.size[0]) if size is None else (size[0], size[1]) bands = tmp.getbands() if len(bands) > 1: if len(bands) == 4: @@ -414,26 +127,22 @@ def load(self, which, size=None, scale=(0,1), **kwargs): # print ("data.geometry", data.geometry) return data - @staticmethod - def random_noise(image, mode='gaussian', seed=None, clip=True, **kwargs): + @classmethod + def random_noise(cls, image, **kwargs): '''Function to add noise to input image :param image: input dataset, DataContainer of numpy.ndarray - :param mode: type of noise - :param seed: seed for random number generator - :param clip: should clip the data. + :param **kwargs: Passed to `scikit_random_noise` See https://github.com/scikit-image/scikit-image/blob/master/skimage/util/noise.py - ''' if hasattr(image, 'as_array'): - arr = TestData.scikit_random_noise(image.as_array(), mode=mode, seed=seed, clip=clip, - **kwargs) + arr = cls.scikit_random_noise(image.as_array(), **kwargs) out = image.copy() out.fill(arr) return out elif issubclass(type(image), numpy.ndarray): - return TestData.scikit_random_noise(image, mode=mode, seed=seed, clip=clip, - **kwargs) + return cls.scikit_random_noise(image, **kwargs) + raise TypeError(type(image)) @staticmethod def scikit_random_noise(image, mode='gaussian', seed=None, clip=True, **kwargs): @@ -538,7 +247,6 @@ def scikit_random_noise(image, mode='gaussian', seed=None, clip=True, **kwargs): STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - """ mode = mode.lower() @@ -548,7 +256,7 @@ def scikit_random_noise(image, mode='gaussian', seed=None, clip=True, **kwargs): else: low_clip = 0. - image = numpy.asarray(image, dtype=(np.float64)) + image = numpy.asarray(image, dtype=np.float64) if seed is not None: np.random.seed(seed=seed) @@ -646,3 +354,183 @@ def scikit_random_noise(image, mode='gaussian', seed=None, clip=True, **kwargs): out = np.clip(out, low_clip, 1.0) return out + +class _CIL_DATA(ABC): + dfile: str + @classmethod + def get(cls, data_dir=DEFAULT_DATA_DIR, **loader_kwargs): + loader = TestData(data_dir) + return loader.load(cls.dfile, **loader_kwargs) + +class _REMOTE_DATA(ABC): + FOLDER: str + URL: str + FILE_SIZE: str + + @staticmethod + def _prompt(msg): + while (res := input(f"{msg} [y/n]").lower()) not in "yn": + pass + return res == "y" + + @classmethod + def _download_and_extract_from_url(cls, data_dir): + with urlopen(cls.URL) as response: + with BytesIO(response.read()) as bytes, ZipFile(bytes) as zipfile: + zipfile.extractall(path=data_dir) + + @classmethod + def download_data(cls, data_dir): + ''' + Download a dataset from a remote repository + + Parameters + ---------- + data_dir: str, optional + The path to the data directory where the downloaded data should be stored + ''' + if os.path.isdir(os.path.join(data_dir, cls.FOLDER)): + print(f"Dataset already exists in {data_dir}") + else: + if cls._prompt(f"Are you sure you want to download {cls.FILE_SIZE} dataset from {cls.URL}?"): + print(f"Downloading dataset from {cls.URL}") + cls._download_and_extract_from_url(os.path.join(data_dir,cls.FOLDER)) + print('Download complete') + else: + print('Download cancelled') + +class BOAT(_CIL_DATA): + dfile = TestData.BOAT +class CAMERA(_CIL_DATA): + dfile = TestData.CAMERA +class PEPPERS(_CIL_DATA): + dfile = TestData.PEPPERS +class RESOLUTION_CHART(_CIL_DATA): + dfile = TestData.RESOLUTION_CHART +class SIMPLE_PHANTOM_2D(_CIL_DATA): + dfile = TestData.SIMPLE_PHANTOM_2D +class SHAPES(_CIL_DATA): + dfile = TestData.SHAPES +class RAINBOW(_CIL_DATA): + dfile = TestData.RAINBOW +class _NEXUS_CIL_DATA(_CIL_DATA): + @classmethod + def get(cls, data_dir=DEFAULT_DATA_DIR): + ''' + Parameters + ---------- + data_dir: str, optional + The path to the data directory + + Returns + ------- + AcquisitionData + ''' + loader = NEXUSDataReader() + loader.set_up(file_name=os.path.join(data_dir, cls.dfile)) + return loader.read() +class SYNCHROTRON_PARALLEL_BEAM_DATA(_NEXUS_CIL_DATA): + '''A DLS dataset''' + dfile = '24737_fd_normalised.nxs' +class SIMULATED_PARALLEL_BEAM_DATA(_NEXUS_CIL_DATA): + '''A simulated parallel-beam dataset generated from SIMULATED_SPHERE_VOLUME''' + dfile = 'sim_parallel_beam.nxs' +class SIMULATED_CONE_BEAM_DATA(_NEXUS_CIL_DATA): + '''A cone-beam dataset generated from SIMULATED_SPHERE_VOLUME''' + dfile = 'sim_cone_beam.nxs' +class SIMULATED_SPHERE_VOLUME(_NEXUS_CIL_DATA): + '''A simulated volume of spheres''' + dfile = 'sim_volume.nxs' + +class WALNUT(_REMOTE_DATA): + '''A microcomputed tomography dataset of a walnut from https://zenodo.org/records/4822516''' + FOLDER = 'walnut' + URL = 'https://zenodo.org/record/4822516/files/walnut.zip' + FILE_SIZE = '6.4 GB' + + @classmethod + def get(cls, data_dir): + ''' + This function returns the raw projection data from the .txrm file + + Parameters + ---------- + data_dir: str + The path to the directory where the dataset is stored. Data can be downloaded with dataexample.WALNUT.download_data(data_dir) + + Returns + ------- + ImageData + The walnut dataset + ''' + filepath = os.path.join(data_dir, cls.FOLDER, 'valnut','valnut_2014-03-21_643_28','tomo-A','valnut_tomo-A.txrm') + try: + loader = ZEISSDataReader(file_name=filepath) + return loader.read() + except FileNotFoundError as exc: + raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({data_dir})`") from exc + +class USB(_REMOTE_DATA): + '''A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516''' + FOLDER = 'USB' + URL = 'https://zenodo.org/record/4822516/files/usb.zip' + FILE_SIZE = '3.2 GB' + + @classmethod + def get(cls, data_dir): + ''' + This function returns the raw projection data from the .txrm file + + Parameters + ---------- + data_dir: str + The path to the directory where the dataset is stored. Data can be downloaded with dataexample.USB.download_data(data_dir) + + Returns + ------- + ImageData + The usb dataset + ''' + filepath = os.path.join(data_dir, cls.FOLDER, 'gruppe 4','gruppe 4_2014-03-20_1404_12','tomo-A','gruppe 4_tomo-A.txrm') + try: + loader = ZEISSDataReader(file_name=filepath) + return loader.read() + except FileNotFoundError as exc: + raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({data_dir})`") from exc + +class KORN(_REMOTE_DATA): + '''A microcomputed tomography dataset of a sunflower seeds in a box from https://zenodo.org/records/6874123''' + FOLDER = 'korn' + URL = 'https://zenodo.org/record/6874123/files/korn.zip' + FILE_SIZE = '2.9 GB' + + @classmethod + def get(cls, data_dir): + ''' + This function returns the raw projection data from the .xtekct file + + Parameters + ---------- + data_dir: str + The path to the directory where the dataset is stored. Data can be downloaded with dataexample.KORN.download_data(data_dir) + + Returns + ------- + ImageData + The korn dataset + ''' + filepath = os.path.join(data_dir, cls.FOLDER, 'Korn i kasse','47209 testscan korn01_recon.xtekct') + try: + loader = NikonDataReader(file_name=filepath) + return loader.read() + except FileNotFoundError as exc: + raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({data_dir})`") from exc + +class SANDSTONE(_REMOTE_DATA): + ''' + A synchrotron x-ray tomography dataset of sandstone from https://zenodo.org/records/4912435 + A small subset of the data containing selected projections and 4 slices of the reconstruction + ''' + FOLDER = 'sandstone' + URL = 'https://zenodo.org/records/4912435/files/small.zip' + FILE_SIZE = '227 MB' diff --git a/Wrappers/Python/test/test_dataexample.py b/Wrappers/Python/test/test_dataexample.py index 3eaa003133..47f7e72da0 100644 --- a/Wrappers/Python/test/test_dataexample.py +++ b/Wrappers/Python/test/test_dataexample.py @@ -186,7 +186,7 @@ def mock_urlopen(self, mock_urlopen): @patch('cil.utilities.dataexample.urlopen') def test_unzip_remote_data(self, mock_urlopen): self.mock_urlopen(mock_urlopen) - dataexample.REMOTEDATA._download_and_extract_from_url('.') + dataexample._REMOTE_DATA._download_and_extract_from_url('.') self.assertTrue(os.path.isfile(self.tmp_file)) @patch('cil.utilities.dataexample.input', return_value='n') From 017a4cfc5e3925aa553e500363612b4f3b2dcd57 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 13 Mar 2024 17:28:27 +0000 Subject: [PATCH 03/10] update tests --- Wrappers/Python/test/test_dataexample.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Wrappers/Python/test/test_dataexample.py b/Wrappers/Python/test/test_dataexample.py index 47f7e72da0..474a6f37f7 100644 --- a/Wrappers/Python/test/test_dataexample.py +++ b/Wrappers/Python/test/test_dataexample.py @@ -186,8 +186,12 @@ def mock_urlopen(self, mock_urlopen): @patch('cil.utilities.dataexample.urlopen') def test_unzip_remote_data(self, mock_urlopen): self.mock_urlopen(mock_urlopen) - dataexample._REMOTE_DATA._download_and_extract_from_url('.') + self.assertFalse(os.path.isfile(self.tmp_file)) + class RemoteData(dataexample._REMOTE_DATA): + URL = '' + RemoteData._download_and_extract_from_url('.') self.assertTrue(os.path.isfile(self.tmp_file)) + os.remove(self.tmp_file) @patch('cil.utilities.dataexample.input', return_value='n') @patch('cil.utilities.dataexample.urlopen') @@ -220,8 +224,11 @@ def test_download_data_input_y(self, mock_urlopen, input): for data in self.data_list: test_func = getattr(dataexample, data) + fname = os.path.join(test_func.FOLDER, self.tmp_file) + self.assertFalse(os.path.isfile(fname)) test_func.download_data('.') - self.assertTrue(os.path.isfile(os.path.join(test_func.FOLDER,self.tmp_file))) + self.assertTrue(os.path.isfile(fname)) + os.remove(fname) # return to standard print output sys.stdout = sys.__stdout__ From 33b5556ea385fb0fb6dbc8413f2e431f0c8f285f Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 14 Mar 2024 00:01:26 +0000 Subject: [PATCH 04/10] _REMOTE_DATA.FOLDER => cls.__name__ --- Wrappers/Python/cil/utilities/dataexample.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/Wrappers/Python/cil/utilities/dataexample.py b/Wrappers/Python/cil/utilities/dataexample.py index e65d224e5a..d2d844dd9d 100644 --- a/Wrappers/Python/cil/utilities/dataexample.py +++ b/Wrappers/Python/cil/utilities/dataexample.py @@ -363,7 +363,6 @@ def get(cls, data_dir=DEFAULT_DATA_DIR, **loader_kwargs): return loader.load(cls.dfile, **loader_kwargs) class _REMOTE_DATA(ABC): - FOLDER: str URL: str FILE_SIZE: str @@ -389,12 +388,12 @@ def download_data(cls, data_dir): data_dir: str, optional The path to the data directory where the downloaded data should be stored ''' - if os.path.isdir(os.path.join(data_dir, cls.FOLDER)): + if os.path.isdir(os.path.join(data_dir, cls.__name__)): print(f"Dataset already exists in {data_dir}") else: if cls._prompt(f"Are you sure you want to download {cls.FILE_SIZE} dataset from {cls.URL}?"): print(f"Downloading dataset from {cls.URL}") - cls._download_and_extract_from_url(os.path.join(data_dir,cls.FOLDER)) + cls._download_and_extract_from_url(os.path.join(data_dir, cls.__name__)) print('Download complete') else: print('Download cancelled') @@ -444,7 +443,6 @@ class SIMULATED_SPHERE_VOLUME(_NEXUS_CIL_DATA): class WALNUT(_REMOTE_DATA): '''A microcomputed tomography dataset of a walnut from https://zenodo.org/records/4822516''' - FOLDER = 'walnut' URL = 'https://zenodo.org/record/4822516/files/walnut.zip' FILE_SIZE = '6.4 GB' @@ -463,7 +461,7 @@ def get(cls, data_dir): ImageData The walnut dataset ''' - filepath = os.path.join(data_dir, cls.FOLDER, 'valnut','valnut_2014-03-21_643_28','tomo-A','valnut_tomo-A.txrm') + filepath = os.path.join(data_dir, cls.__name__, 'valnut','valnut_2014-03-21_643_28','tomo-A','valnut_tomo-A.txrm') try: loader = ZEISSDataReader(file_name=filepath) return loader.read() @@ -472,7 +470,6 @@ def get(cls, data_dir): class USB(_REMOTE_DATA): '''A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516''' - FOLDER = 'USB' URL = 'https://zenodo.org/record/4822516/files/usb.zip' FILE_SIZE = '3.2 GB' @@ -491,7 +488,7 @@ def get(cls, data_dir): ImageData The usb dataset ''' - filepath = os.path.join(data_dir, cls.FOLDER, 'gruppe 4','gruppe 4_2014-03-20_1404_12','tomo-A','gruppe 4_tomo-A.txrm') + filepath = os.path.join(data_dir, cls.__name__, 'gruppe 4','gruppe 4_2014-03-20_1404_12','tomo-A','gruppe 4_tomo-A.txrm') try: loader = ZEISSDataReader(file_name=filepath) return loader.read() @@ -500,7 +497,6 @@ def get(cls, data_dir): class KORN(_REMOTE_DATA): '''A microcomputed tomography dataset of a sunflower seeds in a box from https://zenodo.org/records/6874123''' - FOLDER = 'korn' URL = 'https://zenodo.org/record/6874123/files/korn.zip' FILE_SIZE = '2.9 GB' @@ -519,7 +515,7 @@ def get(cls, data_dir): ImageData The korn dataset ''' - filepath = os.path.join(data_dir, cls.FOLDER, 'Korn i kasse','47209 testscan korn01_recon.xtekct') + filepath = os.path.join(data_dir, cls.__name__, 'Korn i kasse','47209 testscan korn01_recon.xtekct') try: loader = NikonDataReader(file_name=filepath) return loader.read() @@ -531,6 +527,5 @@ class SANDSTONE(_REMOTE_DATA): A synchrotron x-ray tomography dataset of sandstone from https://zenodo.org/records/4912435 A small subset of the data containing selected projections and 4 slices of the reconstruction ''' - FOLDER = 'sandstone' URL = 'https://zenodo.org/records/4912435/files/small.zip' FILE_SIZE = '227 MB' From 72752b3b26cef1261a8c2bf7d570ab5f9cfb02df Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 14 Mar 2024 00:15:49 +0000 Subject: [PATCH 05/10] drop _CIL_DATA => TestData --- Wrappers/Python/cil/utilities/dataexample.py | 91 +++++++++----------- 1 file changed, 42 insertions(+), 49 deletions(-) diff --git a/Wrappers/Python/cil/utilities/dataexample.py b/Wrappers/Python/cil/utilities/dataexample.py index d2d844dd9d..fbfa067cf5 100644 --- a/Wrappers/Python/cil/utilities/dataexample.py +++ b/Wrappers/Python/cil/utilities/dataexample.py @@ -31,7 +31,7 @@ DEFAULT_DATA_DIR = os.path.abspath(os.path.join(sys.prefix, 'share', 'cil')) -class TestData: +class TestData(ABC): '''Provides 6 datasets: BOAT: 'boat.tiff' @@ -355,12 +355,48 @@ def scikit_random_noise(image, mode='gaussian', seed=None, clip=True, **kwargs): return out -class _CIL_DATA(ABC): + def get(self, **load_kwargs): + return self.load(type(self).__name__, **load_kwargs) + +class BOAT(TestData): + pass +class CAMERA(TestData): + pass +class PEPPERS(TestData): + pass +class RESOLUTION_CHART(TestData): + pass +class SIMPLE_PHANTOM_2D(TestData): + pass +class SHAPES(TestData): + pass +class RAINBOW(TestData): + pass + +class NexusTestData(TestData): dfile: str - @classmethod - def get(cls, data_dir=DEFAULT_DATA_DIR, **loader_kwargs): - loader = TestData(data_dir) - return loader.load(cls.dfile, **loader_kwargs) + def get(self): + ''' + Returns + ------- + AcquisitionData + ''' + loader = NEXUSDataReader() + loader.set_up(file_name=os.path.join(self.data_dir, self.dfile)) + return loader.read() + +class SYNCHROTRON_PARALLEL_BEAM_DATA(NexusTestData): + '''A DLS dataset''' + dfile = '24737_fd_normalised.nxs' +class SIMULATED_PARALLEL_BEAM_DATA(NexusTestData): + '''A simulated parallel-beam dataset generated from SIMULATED_SPHERE_VOLUME''' + dfile = 'sim_parallel_beam.nxs' +class SIMULATED_CONE_BEAM_DATA(NexusTestData): + '''A cone-beam dataset generated from SIMULATED_SPHERE_VOLUME''' + dfile = 'sim_cone_beam.nxs' +class SIMULATED_SPHERE_VOLUME(NexusTestData): + '''A simulated volume of spheres''' + dfile = 'sim_volume.nxs' class _REMOTE_DATA(ABC): URL: str @@ -398,49 +434,6 @@ def download_data(cls, data_dir): else: print('Download cancelled') -class BOAT(_CIL_DATA): - dfile = TestData.BOAT -class CAMERA(_CIL_DATA): - dfile = TestData.CAMERA -class PEPPERS(_CIL_DATA): - dfile = TestData.PEPPERS -class RESOLUTION_CHART(_CIL_DATA): - dfile = TestData.RESOLUTION_CHART -class SIMPLE_PHANTOM_2D(_CIL_DATA): - dfile = TestData.SIMPLE_PHANTOM_2D -class SHAPES(_CIL_DATA): - dfile = TestData.SHAPES -class RAINBOW(_CIL_DATA): - dfile = TestData.RAINBOW -class _NEXUS_CIL_DATA(_CIL_DATA): - @classmethod - def get(cls, data_dir=DEFAULT_DATA_DIR): - ''' - Parameters - ---------- - data_dir: str, optional - The path to the data directory - - Returns - ------- - AcquisitionData - ''' - loader = NEXUSDataReader() - loader.set_up(file_name=os.path.join(data_dir, cls.dfile)) - return loader.read() -class SYNCHROTRON_PARALLEL_BEAM_DATA(_NEXUS_CIL_DATA): - '''A DLS dataset''' - dfile = '24737_fd_normalised.nxs' -class SIMULATED_PARALLEL_BEAM_DATA(_NEXUS_CIL_DATA): - '''A simulated parallel-beam dataset generated from SIMULATED_SPHERE_VOLUME''' - dfile = 'sim_parallel_beam.nxs' -class SIMULATED_CONE_BEAM_DATA(_NEXUS_CIL_DATA): - '''A cone-beam dataset generated from SIMULATED_SPHERE_VOLUME''' - dfile = 'sim_cone_beam.nxs' -class SIMULATED_SPHERE_VOLUME(_NEXUS_CIL_DATA): - '''A simulated volume of spheres''' - dfile = 'sim_volume.nxs' - class WALNUT(_REMOTE_DATA): '''A microcomputed tomography dataset of a walnut from https://zenodo.org/records/4822516''' URL = 'https://zenodo.org/record/4822516/files/walnut.zip' From f998aac1292fee52874bd5866fc8359531a9013e Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 14 Mar 2024 00:25:17 +0000 Subject: [PATCH 06/10] inherit data_dir --- Wrappers/Python/cil/utilities/dataexample.py | 84 +++++++------------- 1 file changed, 29 insertions(+), 55 deletions(-) diff --git a/Wrappers/Python/cil/utilities/dataexample.py b/Wrappers/Python/cil/utilities/dataexample.py index fbfa067cf5..3cd9878319 100644 --- a/Wrappers/Python/cil/utilities/dataexample.py +++ b/Wrappers/Python/cil/utilities/dataexample.py @@ -31,7 +31,11 @@ DEFAULT_DATA_DIR = os.path.abspath(os.path.join(sys.prefix, 'share', 'cil')) -class TestData(ABC): +class BaseTestData(ABC): + def __init__(self, data_dir): + self.data_dir = data_dir + +class TestData(BaseTestData): '''Provides 6 datasets: BOAT: 'boat.tiff' @@ -54,9 +58,6 @@ class TestData(ABC): def _datasets(cls): return {cls.BOAT, cls.CAMERA, cls.PEPPERS, cls.RESOLUTION_CHART, cls.SIMPLE_PHANTOM_2D, cls.SHAPES, cls.RAINBOW} - def __init__(self, data_dir): - self.data_dir = data_dir - def load(self, which, size=None, scale=None): ''' Return a test data of the requested image @@ -398,7 +399,7 @@ class SIMULATED_SPHERE_VOLUME(NexusTestData): '''A simulated volume of spheres''' dfile = 'sim_volume.nxs' -class _REMOTE_DATA(ABC): +class RemoteTestData(BaseTestData): URL: str FILE_SIZE: str @@ -408,114 +409,87 @@ def _prompt(msg): pass return res == "y" - @classmethod - def _download_and_extract_from_url(cls, data_dir): - with urlopen(cls.URL) as response: + def _download_and_extract_from_url(self): + with urlopen(self.URL) as response: with BytesIO(response.read()) as bytes, ZipFile(bytes) as zipfile: - zipfile.extractall(path=data_dir) + zipfile.extractall(path=self.data_dir) - @classmethod - def download_data(cls, data_dir): - ''' - Download a dataset from a remote repository - - Parameters - ---------- - data_dir: str, optional - The path to the data directory where the downloaded data should be stored - ''' - if os.path.isdir(os.path.join(data_dir, cls.__name__)): - print(f"Dataset already exists in {data_dir}") + def download_data(self): + '''Download a dataset from a remote repository''' + if os.path.isdir(os.path.join(self.data_dir, type(self).__name__)): + print(f"Dataset already exists in {self.data_dir}") else: - if cls._prompt(f"Are you sure you want to download {cls.FILE_SIZE} dataset from {cls.URL}?"): - print(f"Downloading dataset from {cls.URL}") - cls._download_and_extract_from_url(os.path.join(data_dir, cls.__name__)) + if self._prompt(f"Are you sure you want to download {self.FILE_SIZE} dataset from {self.URL}?"): + print(f"Downloading dataset from {self.URL}") + self._download_and_extract_from_url(os.path.join(self.data_dir, type(self).__name__)) print('Download complete') else: print('Download cancelled') -class WALNUT(_REMOTE_DATA): +class WALNUT(RemoteTestData): '''A microcomputed tomography dataset of a walnut from https://zenodo.org/records/4822516''' URL = 'https://zenodo.org/record/4822516/files/walnut.zip' FILE_SIZE = '6.4 GB' - @classmethod - def get(cls, data_dir): + def get(self): ''' This function returns the raw projection data from the .txrm file - Parameters - ---------- - data_dir: str - The path to the directory where the dataset is stored. Data can be downloaded with dataexample.WALNUT.download_data(data_dir) - Returns ------- ImageData The walnut dataset ''' - filepath = os.path.join(data_dir, cls.__name__, 'valnut','valnut_2014-03-21_643_28','tomo-A','valnut_tomo-A.txrm') + filepath = os.path.join(self.data_dir, type(self).__name__, 'valnut','valnut_2014-03-21_643_28','tomo-A','valnut_tomo-A.txrm') try: loader = ZEISSDataReader(file_name=filepath) return loader.read() except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({data_dir})`") from exc + raise ValueError(f"Specify a different data_dir or download data with `{type(self).__name__}.download_data({self.data_dir})`") from exc -class USB(_REMOTE_DATA): +class USB(RemoteTestData): '''A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516''' URL = 'https://zenodo.org/record/4822516/files/usb.zip' FILE_SIZE = '3.2 GB' - @classmethod - def get(cls, data_dir): + def get(self): ''' This function returns the raw projection data from the .txrm file - Parameters - ---------- - data_dir: str - The path to the directory where the dataset is stored. Data can be downloaded with dataexample.USB.download_data(data_dir) - Returns ------- ImageData The usb dataset ''' - filepath = os.path.join(data_dir, cls.__name__, 'gruppe 4','gruppe 4_2014-03-20_1404_12','tomo-A','gruppe 4_tomo-A.txrm') + filepath = os.path.join(self.data_dir, type(self).__name__, 'gruppe 4','gruppe 4_2014-03-20_1404_12','tomo-A','gruppe 4_tomo-A.txrm') try: loader = ZEISSDataReader(file_name=filepath) return loader.read() except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({data_dir})`") from exc + raise ValueError(f"Specify a different data_dir or download data with `{type(self).__name__}.download_data({self.data_dir})`") from exc -class KORN(_REMOTE_DATA): +class KORN(RemoteTestData): '''A microcomputed tomography dataset of a sunflower seeds in a box from https://zenodo.org/records/6874123''' URL = 'https://zenodo.org/record/6874123/files/korn.zip' FILE_SIZE = '2.9 GB' - @classmethod - def get(cls, data_dir): + def get(self): ''' This function returns the raw projection data from the .xtekct file - Parameters - ---------- - data_dir: str - The path to the directory where the dataset is stored. Data can be downloaded with dataexample.KORN.download_data(data_dir) - Returns ------- ImageData The korn dataset ''' - filepath = os.path.join(data_dir, cls.__name__, 'Korn i kasse','47209 testscan korn01_recon.xtekct') + filepath = os.path.join(self.data_dir, type(self).__name__, 'Korn i kasse','47209 testscan korn01_recon.xtekct') try: loader = NikonDataReader(file_name=filepath) return loader.read() except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({data_dir})`") from exc + raise ValueError(f"Specify a different data_dir or download data with `{type(self).__name__}.download_data({self.data_dir})`") from exc -class SANDSTONE(_REMOTE_DATA): +class SANDSTONE(RemoteTestData): ''' A synchrotron x-ray tomography dataset of sandstone from https://zenodo.org/records/4912435 A small subset of the data containing selected projections and 4 slices of the reconstruction From e2e6ac6301a2f3d7067cdbef20813d6f92036fc8 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 14 Mar 2024 00:50:06 +0000 Subject: [PATCH 07/10] revert to @classmethod get() --- Wrappers/Python/cil/utilities/dataexample.py | 56 ++++++++++++-------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/Wrappers/Python/cil/utilities/dataexample.py b/Wrappers/Python/cil/utilities/dataexample.py index 3cd9878319..addea26aaa 100644 --- a/Wrappers/Python/cil/utilities/dataexample.py +++ b/Wrappers/Python/cil/utilities/dataexample.py @@ -29,10 +29,11 @@ from cil.io import NEXUSDataReader, NikonDataReader, ZEISSDataReader from abc import ABC + DEFAULT_DATA_DIR = os.path.abspath(os.path.join(sys.prefix, 'share', 'cil')) class BaseTestData(ABC): - def __init__(self, data_dir): + def __init__(self, data_dir=DEFAULT_DATA_DIR): self.data_dir = data_dir class TestData(BaseTestData): @@ -53,6 +54,7 @@ class TestData(BaseTestData): SIMPLE_PHANTOM_2D = 'hotdog' SHAPES = 'shapes.png' RAINBOW = 'rainbow.png' + dfile: str @classmethod def _datasets(cls): @@ -356,34 +358,36 @@ def scikit_random_noise(image, mode='gaussian', seed=None, clip=True, **kwargs): return out - def get(self, **load_kwargs): - return self.load(type(self).__name__, **load_kwargs) + @classmethod + def get(cls, data_dir=DEFAULT_DATA_DIR, **load_kwargs): + """Calls cls(data_dir).load(cls.dfile, **load_kwargs)""" + return cls(data_dir).load(cls.dfile, **load_kwargs) class BOAT(TestData): - pass + dfile = TestData.BOAT class CAMERA(TestData): - pass + dfile = TestData.CAMERA class PEPPERS(TestData): - pass + dfile = TestData.PEPPERS class RESOLUTION_CHART(TestData): - pass + dfile = TestData.RESOLUTION_CHART class SIMPLE_PHANTOM_2D(TestData): - pass + dfile = TestData.SIMPLE_PHANTOM_2D class SHAPES(TestData): - pass + dfile = TestData.SHAPES class RAINBOW(TestData): - pass + dfile = TestData.RAINBOW -class NexusTestData(TestData): - dfile: str - def get(self): +class NexusTestData(BaseTestData): + @classmethod + def get(cls, data_dir=DEFAULT_DATA_DIR): ''' Returns ------- AcquisitionData ''' loader = NEXUSDataReader() - loader.set_up(file_name=os.path.join(self.data_dir, self.dfile)) + loader.set_up(file_name=os.path.join(data_dir, cls.dfile)) return loader.read() class SYNCHROTRON_PARALLEL_BEAM_DATA(NexusTestData): @@ -431,7 +435,8 @@ class WALNUT(RemoteTestData): URL = 'https://zenodo.org/record/4822516/files/walnut.zip' FILE_SIZE = '6.4 GB' - def get(self): + @classmethod + def get(cls, data_dir=DEFAULT_DATA_DIR): ''' This function returns the raw projection data from the .txrm file @@ -440,19 +445,21 @@ def get(self): ImageData The walnut dataset ''' - filepath = os.path.join(self.data_dir, type(self).__name__, 'valnut','valnut_2014-03-21_643_28','tomo-A','valnut_tomo-A.txrm') + self = cls(data_dir) + filepath = os.path.join(self.data_dir, cls.__name__, 'valnut','valnut_2014-03-21_643_28','tomo-A','valnut_tomo-A.txrm') try: loader = ZEISSDataReader(file_name=filepath) return loader.read() except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{type(self).__name__}.download_data({self.data_dir})`") from exc + raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({self.data_dir})`") from exc class USB(RemoteTestData): '''A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516''' URL = 'https://zenodo.org/record/4822516/files/usb.zip' FILE_SIZE = '3.2 GB' - def get(self): + @classmethod + def get(cls, data_dir=DEFAULT_DATA_DIR): ''' This function returns the raw projection data from the .txrm file @@ -461,19 +468,21 @@ def get(self): ImageData The usb dataset ''' - filepath = os.path.join(self.data_dir, type(self).__name__, 'gruppe 4','gruppe 4_2014-03-20_1404_12','tomo-A','gruppe 4_tomo-A.txrm') + self = cls(data_dir) + filepath = os.path.join(self.data_dir, cls.__name__, 'gruppe 4','gruppe 4_2014-03-20_1404_12','tomo-A','gruppe 4_tomo-A.txrm') try: loader = ZEISSDataReader(file_name=filepath) return loader.read() except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{type(self).__name__}.download_data({self.data_dir})`") from exc + raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({self.data_dir})`") from exc class KORN(RemoteTestData): '''A microcomputed tomography dataset of a sunflower seeds in a box from https://zenodo.org/records/6874123''' URL = 'https://zenodo.org/record/6874123/files/korn.zip' FILE_SIZE = '2.9 GB' - def get(self): + @classmethod + def get(cls, data_dir=DEFAULT_DATA_DIR): ''' This function returns the raw projection data from the .xtekct file @@ -482,12 +491,13 @@ def get(self): ImageData The korn dataset ''' - filepath = os.path.join(self.data_dir, type(self).__name__, 'Korn i kasse','47209 testscan korn01_recon.xtekct') + self = cls(data_dir) + filepath = os.path.join(self.data_dir, cls.__name__, 'Korn i kasse','47209 testscan korn01_recon.xtekct') try: loader = NikonDataReader(file_name=filepath) return loader.read() except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{type(self).__name__}.download_data({self.data_dir})`") from exc + raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({self.data_dir})`") from exc class SANDSTONE(RemoteTestData): ''' From de08d2791427834f3bd194f919f2660e46336528 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 14 Mar 2024 01:21:19 +0000 Subject: [PATCH 08/10] drop _download_and_extract_from_url --- Wrappers/Python/cil/utilities/dataexample.py | 19 +++----- Wrappers/Python/test/test_dataexample.py | 51 +++++++++----------- 2 files changed, 32 insertions(+), 38 deletions(-) diff --git a/Wrappers/Python/cil/utilities/dataexample.py b/Wrappers/Python/cil/utilities/dataexample.py index addea26aaa..5e7dcf8885 100644 --- a/Wrappers/Python/cil/utilities/dataexample.py +++ b/Wrappers/Python/cil/utilities/dataexample.py @@ -413,19 +413,16 @@ def _prompt(msg): pass return res == "y" - def _download_and_extract_from_url(self): - with urlopen(self.URL) as response: - with BytesIO(response.read()) as bytes, ZipFile(bytes) as zipfile: - zipfile.extractall(path=self.data_dir) - def download_data(self): '''Download a dataset from a remote repository''' - if os.path.isdir(os.path.join(self.data_dir, type(self).__name__)): - print(f"Dataset already exists in {self.data_dir}") + folder = os.path.join(self.data_dir, type(self).__name__) + if os.path.isdir(folder): + print(f"Dataset already exists in {folder}") else: if self._prompt(f"Are you sure you want to download {self.FILE_SIZE} dataset from {self.URL}?"): print(f"Downloading dataset from {self.URL}") - self._download_and_extract_from_url(os.path.join(self.data_dir, type(self).__name__)) + with urlopen(self.URL) as response, BytesIO(response.read()) as bytes, ZipFile(bytes) as zipfile: + zipfile.extractall(path=folder) print('Download complete') else: print('Download cancelled') @@ -451,7 +448,7 @@ def get(cls, data_dir=DEFAULT_DATA_DIR): loader = ZEISSDataReader(file_name=filepath) return loader.read() except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({self.data_dir})`") from exc + raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}({data_dir}).download_data()`") from exc class USB(RemoteTestData): '''A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516''' @@ -474,7 +471,7 @@ def get(cls, data_dir=DEFAULT_DATA_DIR): loader = ZEISSDataReader(file_name=filepath) return loader.read() except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({self.data_dir})`") from exc + raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}({data_dir}).download_data()`") from exc class KORN(RemoteTestData): '''A microcomputed tomography dataset of a sunflower seeds in a box from https://zenodo.org/records/6874123''' @@ -497,7 +494,7 @@ def get(cls, data_dir=DEFAULT_DATA_DIR): loader = NikonDataReader(file_name=filepath) return loader.read() except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}.download_data({self.data_dir})`") from exc + raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}({data_dir}).download_data()`") from exc class SANDSTONE(RemoteTestData): ''' diff --git a/Wrappers/Python/test/test_dataexample.py b/Wrappers/Python/test/test_dataexample.py index 474a6f37f7..afd5f74eb6 100644 --- a/Wrappers/Python/test/test_dataexample.py +++ b/Wrappers/Python/test/test_dataexample.py @@ -156,7 +156,6 @@ def test_load_SIMULATED_CONE_BEAM_DATA(self): class TestRemoteData(unittest.TestCase): def setUp(self): - self.data_list = ['WALNUT','USB','KORN','SANDSTONE'] self.tmp_file = 'tmp.txt' self.tmp_zip = 'tmp.zip' @@ -166,10 +165,10 @@ def setUp(self): self.zipped_bytes = zipped_file.read() def tearDown(self): - for data in self.data_list: - test_func = getattr(dataexample, data) - if os.path.exists(os.path.join(test_func.FOLDER)): - shutil.rmtree(test_func.FOLDER) + for data in self.data_list + ['REMOTE_TEST']: + folder = os.path.join(dataexample.DEFAULT_DATA_DIR, data) + if os.path.exists(folder): + shutil.rmtree(folder) if os.path.exists(self.tmp_zip): os.remove(self.tmp_zip) @@ -183,28 +182,31 @@ def mock_urlopen(self, mock_urlopen): mock_response.__enter__.return_value = mock_response mock_urlopen.return_value = mock_response + @patch('cil.utilities.dataexample.input', return_value='y') @patch('cil.utilities.dataexample.urlopen') - def test_unzip_remote_data(self, mock_urlopen): + def test_unzip_remote_data(self, mock_urlopen, input): self.mock_urlopen(mock_urlopen) - self.assertFalse(os.path.isfile(self.tmp_file)) - class RemoteData(dataexample._REMOTE_DATA): + sys.stdout = StringIO() # redirect print output + + fname = os.path.join(dataexample.DEFAULT_DATA_DIR, 'REMOTE_TEST', self.tmp_file) + self.assertFalse(os.path.isfile(fname)) + class REMOTE_TEST(dataexample.RemoteTestData): URL = '' - RemoteData._download_and_extract_from_url('.') - self.assertTrue(os.path.isfile(self.tmp_file)) - os.remove(self.tmp_file) + FILE_SIZE = '0 B' + REMOTE_TEST().download_data() + self.assertTrue(os.path.isfile(fname)) + os.remove(fname) + + sys.stdout = sys.__stdout__ # return to standard print output @patch('cil.utilities.dataexample.input', return_value='n') @patch('cil.utilities.dataexample.urlopen') def test_download_data_input_n(self, mock_urlopen, input): self.mock_urlopen(mock_urlopen) - - data_list = ['WALNUT','USB','KORN','SANDSTONE'] - for data in data_list: - # redirect print output - capturedOutput = StringIO() - sys.stdout = capturedOutput + for data in self.data_list: + sys.stdout = capturedOutput = StringIO() # redirect print output test_func = getattr(dataexample, data) - test_func.download_data('.') + test_func().download_data() self.assertFalse(os.path.isfile(self.tmp_file)) self.assertEqual(capturedOutput.getvalue(),'Download cancelled\n') @@ -216,19 +218,14 @@ def test_download_data_input_n(self, mock_urlopen, input): @patch('cil.utilities.dataexample.urlopen') def test_download_data_input_y(self, mock_urlopen, input): self.mock_urlopen(mock_urlopen) - - # redirect print output - capturedOutput = StringIO() - sys.stdout = capturedOutput - + sys.stdout = StringIO() # redirect print output for data in self.data_list: test_func = getattr(dataexample, data) - fname = os.path.join(test_func.FOLDER, self.tmp_file) + fname = os.path.join(dataexample.DEFAULT_DATA_DIR, data, self.tmp_file) self.assertFalse(os.path.isfile(fname)) - test_func.download_data('.') + test_func().download_data() self.assertTrue(os.path.isfile(fname)) os.remove(fname) - # return to standard print output - sys.stdout = sys.__stdout__ + sys.stdout = sys.__stdout__ # return to standard print output From 4466b52e32f09f44ac424d96c287a57372b35141 Mon Sep 17 00:00:00 2001 From: Hannah Robarts <77114597+hrobarts@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:28:47 +0000 Subject: [PATCH 09/10] Update Wrappers/Python/cil/utilities/dataexample.py Co-authored-by: Casper da Costa-Luis Signed-off-by: Hannah Robarts <77114597+hrobarts@users.noreply.github.com> --- Wrappers/Python/cil/utilities/dataexample.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Wrappers/Python/cil/utilities/dataexample.py b/Wrappers/Python/cil/utilities/dataexample.py index 5e7dcf8885..98c6138536 100644 --- a/Wrappers/Python/cil/utilities/dataexample.py +++ b/Wrappers/Python/cil/utilities/dataexample.py @@ -444,11 +444,9 @@ def get(cls, data_dir=DEFAULT_DATA_DIR): ''' self = cls(data_dir) filepath = os.path.join(self.data_dir, cls.__name__, 'valnut','valnut_2014-03-21_643_28','tomo-A','valnut_tomo-A.txrm') - try: - loader = ZEISSDataReader(file_name=filepath) - return loader.read() - except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}({data_dir}).download_data()`") from exc + self.download_data() + loader = ZEISSDataReader(file_name=filepath) + return loader.read() class USB(RemoteTestData): '''A microcomputed tomography dataset of a usb memory stick from https://zenodo.org/records/4822516''' From 1c0ae813e087c8426585b388ba6c974f8f334f70 Mon Sep 17 00:00:00 2001 From: hrobarts Date: Thu, 14 Mar 2024 12:15:39 +0000 Subject: [PATCH 10/10] Add get method for SANDSTONE --- Wrappers/Python/cil/utilities/dataexample.py | 48 +++++++++++++++----- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/Wrappers/Python/cil/utilities/dataexample.py b/Wrappers/Python/cil/utilities/dataexample.py index 98c6138536..f410b91e87 100644 --- a/Wrappers/Python/cil/utilities/dataexample.py +++ b/Wrappers/Python/cil/utilities/dataexample.py @@ -28,6 +28,9 @@ from io import BytesIO from cil.io import NEXUSDataReader, NikonDataReader, ZEISSDataReader from abc import ABC +from matplotlib.pyplot import imread +from scipy.io import loadmat +from pathlib import Path DEFAULT_DATA_DIR = os.path.abspath(os.path.join(sys.prefix, 'share', 'cil')) @@ -465,11 +468,9 @@ def get(cls, data_dir=DEFAULT_DATA_DIR): ''' self = cls(data_dir) filepath = os.path.join(self.data_dir, cls.__name__, 'gruppe 4','gruppe 4_2014-03-20_1404_12','tomo-A','gruppe 4_tomo-A.txrm') - try: - loader = ZEISSDataReader(file_name=filepath) - return loader.read() - except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}({data_dir}).download_data()`") from exc + self.download_data() + loader = ZEISSDataReader(file_name=filepath) + return loader.read() class KORN(RemoteTestData): '''A microcomputed tomography dataset of a sunflower seeds in a box from https://zenodo.org/records/6874123''' @@ -488,12 +489,10 @@ def get(cls, data_dir=DEFAULT_DATA_DIR): ''' self = cls(data_dir) filepath = os.path.join(self.data_dir, cls.__name__, 'Korn i kasse','47209 testscan korn01_recon.xtekct') - try: - loader = NikonDataReader(file_name=filepath) - return loader.read() - except FileNotFoundError as exc: - raise ValueError(f"Specify a different data_dir or download data with `{cls.__name__}({data_dir}).download_data()`") from exc - + self.download_data() + loader = NikonDataReader(file_name=filepath) + return loader.read() + class SANDSTONE(RemoteTestData): ''' A synchrotron x-ray tomography dataset of sandstone from https://zenodo.org/records/4912435 @@ -501,3 +500,30 @@ class SANDSTONE(RemoteTestData): ''' URL = 'https://zenodo.org/records/4912435/files/small.zip' FILE_SIZE = '227 MB' + + @classmethod + def get(cls, filename, data_dir=DEFAULT_DATA_DIR): + ''' + This function returns data from a specified file in the sandstone folder + Parameters + ---------- + filename : str + filename of the data to get, specify the filepath within the sandstone folder e.g. 'slice_0270_data.mat' or 'proj/BBii_0001.tif' + + Returns + ------- + DataContainer + Data from the sandstone dataset + ''' + self = cls(data_dir) + filepath = os.path.join(self.data_dir, cls.__name__, filename) + print(filepath) + self.download_data() + if Path(filename).suffix == '.tif': + return imread(filepath) + + elif Path(filename).suffix == '.mat': + return loadmat(filepath) + + else: + raise ValueError('{0} file type not recognised'.format( Path(filename).suffix) ) \ No newline at end of file