From 2565e56c0a44595a25ce0eea682025dc7e38b201 Mon Sep 17 00:00:00 2001 From: Janusz Lisiecki Date: Wed, 23 Oct 2024 14:52:47 +0200 Subject: [PATCH] Move from deprecated distutils to packaging - converts all calls from distutils to packaging as distutils is deprecated and will be removed sooner or later Signed-off-by: Janusz Lisiecki --- Acknowledgements.txt | 34 ++ conda/dali_python_bindings/recipe/meta.yaml | 2 + .../nvidia/dali/_autograph/pyct/gast_util.py | 12 +- .../python/nvidia/dali/plugin/jax/__init__.py | 4 +- .../dali/plugin/jax/fn/_jax_function_impl.py | 4 +- .../plugin/numba/experimental/__init__.py | 14 +- dali/python/nvidia/dali/plugin/paddle.py | 8 +- dali/python/nvidia/dali/plugin/tf.py | 14 +- dali/python/setup.py.in | 1 + .../test/python/autograph/pyct/test_loader.py | 4 +- .../test/python/test_dali_tf_dataset_mnist.py | 6 +- .../test_dali_tf_dataset_mnist_eager.py | 4 +- .../test_dali_tf_dataset_mnist_graph.py | 12 +- dali/test/python/test_utils.py | 6 +- dali_tf_plugin/build_dali_tf.sh | 2 +- dali_tf_plugin/dali_tf_plugin_install_tool.py | 10 +- dali_tf_plugin/dali_tf_plugin_utils.py | 4 +- dali_tf_plugin/setup.py.in | 3 +- .../use_cases/paddle/resnet50/utils/config.py | 486 ++++++++++-------- .../tensorflow/resnet-n/nvutils/common.py | 6 +- .../tensorflow/resnet-n/nvutils/hvd_patch.py | 4 +- .../tensorflow/resnet-n/nvutils/runner.py | 10 +- .../tensorflow/resnet-n/nvutils/runner_ctl.py | 8 +- qa/TL1_tensorflow_dataset/test_impl.sh | 8 +- 24 files changed, 391 insertions(+), 275 deletions(-) diff --git a/Acknowledgements.txt b/Acknowledgements.txt index 5ab2b5ce7a8..6cae0805595 100644 --- a/Acknowledgements.txt +++ b/Acknowledgements.txt @@ -4410,3 +4410,37 @@ products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. + +============================================================================== +str2bool + + +BSD 3-Clause License + +Copyright (c) 2017, SymonSoft +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/conda/dali_python_bindings/recipe/meta.yaml b/conda/dali_python_bindings/recipe/meta.yaml index 9ccd2dca1b8..a15e07c2035 100644 --- a/conda/dali_python_bindings/recipe/meta.yaml +++ b/conda/dali_python_bindings/recipe/meta.yaml @@ -81,6 +81,7 @@ requirements: - astunparse >=1.6.0 - gast >=0.3.3 - dm-tree >=0.1.8 + - packaging - nvidia-dali-core{% if environ.get('NVIDIA_DALI_BUILD_FLAVOR', '')|length %}{{"-" + environ.get('NVIDIA_DALI_BUILD_FLAVOR', '')}}{% endif %}-cuda{{ environ.get('CUDA_VERSION', '') | replace(".","") }} ={{ environ.get('DALI_CONDA_BUILD_VERSION', '') }} - nvidia-nvimagecodec-cuda{{ environ.get('CUDA_VERSION', '') | replace(".","") }} run: @@ -94,6 +95,7 @@ requirements: - astunparse >=1.6.0 - gast >=0.3.3 - dm-tree >=0.1.8 + - packaging - nvidia-dali-core{% if environ.get('NVIDIA_DALI_BUILD_FLAVOR', '')|length %}{{"-" + environ.get('NVIDIA_DALI_BUILD_FLAVOR', '')}}{% endif %}-cuda{{ environ.get('CUDA_VERSION', '') | replace(".","") }} ={{ environ.get('DALI_CONDA_BUILD_VERSION', '') }} - nvidia-nvimagecodec-cuda{{ environ.get('CUDA_VERSION', '') | replace(".","") }} about: diff --git a/dali/python/nvidia/dali/_autograph/pyct/gast_util.py b/dali/python/nvidia/dali/_autograph/pyct/gast_util.py index 3424a7be963..095ac4869b0 100644 --- a/dali/python/nvidia/dali/_autograph/pyct/gast_util.py +++ b/dali/python/nvidia/dali/_autograph/pyct/gast_util.py @@ -18,13 +18,13 @@ import functools import gast -from distutils.version import LooseVersion +from packaging.version import Version def get_gast_version(): """Gast exports `__version__` from 0.5.3 onwards, we need to look it up in a different way.""" if hasattr(gast, "__version__"): - return gast.__version__ + return Version(gast.__version__) try: import pkg_resources @@ -32,15 +32,15 @@ def get_gast_version(): except pkg_resources.DistributionNotFound: # Older gast had 'Str', check for the oldest supported version if hasattr(gast, "Str"): - return "0.2" + return Version("0.2") else: try: # Try to call it with 3 arguments, to differentiate between 0.5+ and earlier. gast.Assign(None, None, None) except AssertionError as e: if "Bad argument number for Assign: 3, expecting 2" in str(e): - return "0.4" - return "0.5" + return Version("0.4") + return Version("0.5") def is_constant(node): @@ -76,7 +76,7 @@ def _compat_assign_gast_5(targets, value, type_comment): return gast.Assign(targets=targets, value=value, type_comment=type_comment) -if get_gast_version() < LooseVersion("0.5"): +if get_gast_version() < Version("0.5"): compat_assign = _compat_assign_gast_4 else: compat_assign = _compat_assign_gast_5 diff --git a/dali/python/nvidia/dali/plugin/jax/__init__.py b/dali/python/nvidia/dali/plugin/jax/__init__.py index edd29ebc795..08945e3743c 100644 --- a/dali/python/nvidia/dali/plugin/jax/__init__.py +++ b/dali/python/nvidia/dali/plugin/jax/__init__.py @@ -16,7 +16,7 @@ from . import fn # noqa: F401 -from distutils.version import LooseVersion +from packaging.version import Version from .iterator import DALIGenericIterator, data_iterator assert ( @@ -24,7 +24,7 @@ ), "DALI JAX support requires Python 3.8 or above" -assert LooseVersion(jax.__version__) >= LooseVersion( +assert Version(jax.__version__) >= Version( "0.4.11" ), "DALI JAX support requires JAX 0.4.11 or above" diff --git a/dali/python/nvidia/dali/plugin/jax/fn/_jax_function_impl.py b/dali/python/nvidia/dali/plugin/jax/fn/_jax_function_impl.py index ccb5126a355..06caa66bbec 100644 --- a/dali/python/nvidia/dali/plugin/jax/fn/_jax_function_impl.py +++ b/dali/python/nvidia/dali/plugin/jax/fn/_jax_function_impl.py @@ -14,7 +14,7 @@ from typing import Optional, Protocol, Tuple, Union -from distutils.version import LooseVersion +from packaging.version import Version import jax import jax.dlpack @@ -170,7 +170,7 @@ def flip_horizontal(image: jax.Array): The transformed function that processes DALI-traced batches (DataNodes). """ - if LooseVersion(jax.__version__) < LooseVersion("0.4.16"): + if Version(jax.__version__) < Version("0.4.16"): raise RuntimeError("DALI `jax_function` requires JAX 0.4.16 or above.") def decorator(function): diff --git a/dali/python/nvidia/dali/plugin/numba/experimental/__init__.py b/dali/python/nvidia/dali/plugin/numba/experimental/__init__.py index 6e7a4b7733b..3db1f654fdd 100644 --- a/dali/python/nvidia/dali/plugin/numba/experimental/__init__.py +++ b/dali/python/nvidia/dali/plugin/numba/experimental/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from distutils.version import LooseVersion +from packaging.version import Version from nvidia.dali.pipeline import Pipeline from nvidia.dali.data_node import DataNode as _DataNode @@ -57,8 +57,8 @@ # Minimal version of Numba that is required for Numba GPU operator to work minimal_numba_version = { - 11: LooseVersion("0.55.2"), - 12: LooseVersion("0.57.0"), + 11: Version("0.55.2"), + 12: Version("0.57.0"), } @@ -196,7 +196,7 @@ def _get_run_fn_gpu(self, run_fn, types, dims): for dali_type, ndim in zip(types, dims): cuda_arguments.append(numba_types.Array(_to_numba[dali_type], ndim, "C")) - if LooseVersion(nb.__version__) < LooseVersion("0.57.0"): + if Version(nb.__version__) < Version("0.57.0"): cres = cuda.compiler.compile_cuda(run_fn, numba_types.void, cuda_arguments) else: pipeline = Pipeline.current() @@ -210,7 +210,7 @@ def _get_run_fn_gpu(self, run_fn, types, dims): code = run_fn.__code__ filename = code.co_filename linenum = code.co_firstlineno - if LooseVersion(nb.__version__) < LooseVersion("0.57.0"): + if Version(nb.__version__) < Version("0.57.0"): nvvm_options["debug"] = False nvvm_options["lineinfo"] = False lib, _ = tgt_ctx.prepare_cuda_kernel( @@ -509,7 +509,7 @@ def __init__( @staticmethod def _check_minimal_numba_version(throw: bool = True): - current_version = LooseVersion(nb.__version__) + current_version = Version(nb.__version__) toolkit_version = cuda.runtime.get_version() if toolkit_version[0] not in minimal_numba_version: if throw: @@ -522,7 +522,7 @@ def _check_minimal_numba_version(throw: bool = True): raise RuntimeError( f"Insufficient Numba version. Numba GPU operator " f"requires Numba {str(min_ver)} or higher. " - f"Detected version: {str(LooseVersion(nb.__version__))}." + f"Detected version: {str(Version(nb.__version__))}." ) else: return False diff --git a/dali/python/nvidia/dali/plugin/paddle.py b/dali/python/nvidia/dali/plugin/paddle.py index 04fc1208f46..b3989d69d59 100644 --- a/dali/python/nvidia/dali/plugin/paddle.py +++ b/dali/python/nvidia/dali/plugin/paddle.py @@ -18,7 +18,7 @@ import numpy as np import paddle -from distutils.version import LooseVersion +from packaging.version import Version from nvidia.dali import types from nvidia.dali.backend import TensorListCPU, TensorGPU, TensorListGPU @@ -26,11 +26,9 @@ from nvidia.dali.plugin.base_iterator import LastBatchPolicy if isinstance(paddle.__version__, str): - assert LooseVersion(paddle.__version__) == LooseVersion("0.0.0") or LooseVersion( + assert Version(paddle.__version__) == Version("0.0.0") or Version( paddle.__version__ - ) >= LooseVersion( - "2.0.0" - ), "DALI PaddlePaddle support requires Paddle develop or release >= 2.0.0" + ) >= Version("2.0.0"), "DALI PaddlePaddle support requires Paddle develop or release >= 2.0.0" dtype_map = { diff --git a/dali/python/nvidia/dali/plugin/tf.py b/dali/python/nvidia/dali/plugin/tf.py index 460f25b28b6..7ae887e79d3 100644 --- a/dali/python/nvidia/dali/plugin/tf.py +++ b/dali/python/nvidia/dali/plugin/tf.py @@ -26,7 +26,7 @@ from nvidia.dali._utils.external_source_impl import _get_generator_from_source_desc from nvidia.dali._utils.external_source_impl import _cycle_enabled -from distutils.version import LooseVersion +from packaging.version import Version import warnings from nvidia.dali_tf_plugin import dali_tf_plugin @@ -307,29 +307,29 @@ def DALIRawIterator(): def _get_tf_version(): - return LooseVersion(tf.__version__) + return Version(tf.__version__) -MIN_TENSORFLOW_VERSION = LooseVersion("1.15") +MIN_TENSORFLOW_VERSION = Version("1.15") def dataset_compatible_tensorflow(): """Returns ``True`` if current TensorFlow version is compatible with DALIDataset.""" - return LooseVersion(tf.__version__) >= MIN_TENSORFLOW_VERSION + return Version(tf.__version__) >= MIN_TENSORFLOW_VERSION def dataset_inputs_compatible_tensorflow(): """Returns ``True`` if the current TensorFlow version is compatible with experimental.DALIDatasetWithInputs and input Datasets can be used with DALI. """ - return LooseVersion(tf.__version__) >= LooseVersion("2.4.1") + return Version(tf.__version__) >= Version("2.4.1") def dataset_distributed_compatible_tensorflow(): """Returns ``True`` if the tf.distribute APIs for current TensorFlow version are compatible with DALIDataset. """ - return LooseVersion(tf.__version__) >= LooseVersion("2.5.0") + return Version(tf.__version__) >= Version("2.5.0") def _get_experimental(): @@ -813,7 +813,7 @@ def _as_variant_tensor(self): fail_on_device_mismatch=self._fail_on_device_mismatch, ) - if _get_tf_version() < LooseVersion("2.0"): + if _get_tf_version() < Version("2.0"): class _DALIDatasetImpl(dataset_ops.DatasetV1Adapter): @functools.wraps(_DALIDatasetV2.__init__) diff --git a/dali/python/setup.py.in b/dali/python/setup.py.in index 5014584b599..c8ff4cad961 100644 --- a/dali/python/setup.py.in +++ b/dali/python/setup.py.in @@ -87,6 +87,7 @@ For more details please check the # 1.16 on python 3.12 due to import six.moves 'six >= 1.16', 'dm-tree', + packaging, @DALI_INSTALL_REQUIRES_NVIMGCODEC@ ], ) diff --git a/dali/test/python/autograph/pyct/test_loader.py b/dali/test/python/autograph/pyct/test_loader.py index 088e966fa3c..bcd33d2c4e2 100644 --- a/dali/test/python/autograph/pyct/test_loader.py +++ b/dali/test/python/autograph/pyct/test_loader.py @@ -21,7 +21,7 @@ import unittest import gast -from distutils.version import LooseVersion +from packaging.version import Version from nvidia.dali._autograph.pyct import ast_util from nvidia.dali._autograph.pyct import gast_util @@ -79,7 +79,7 @@ def test_load_ast(self): decorator_list=[], returns=None, type_comment=None, - **{"type_params": []} if gast_util.get_gast_version() >= LooseVersion("0.5.5") else {}, + **{"type_params": []} if gast_util.get_gast_version() >= Version("0.5.5") else {}, ) module, source, _ = loader.load_ast(node) diff --git a/dali/test/python/test_dali_tf_dataset_mnist.py b/dali/test/python/test_dali_tf_dataset_mnist.py index 999b7524c3a..55731421955 100644 --- a/dali/test/python/test_dali_tf_dataset_mnist.py +++ b/dali/test/python/test_dali_tf_dataset_mnist.py @@ -21,7 +21,7 @@ from shutil import rmtree as remove_directory import tensorflow as tf import tensorflow.compat.v1 as tf_v1 -from distutils.version import StrictVersion +from packaging.version import Version from nose import SkipTest @@ -116,7 +116,7 @@ def run_keras_single_device(device="cpu", device_id=0): def graph_model(images, reuse, is_training): - if StrictVersion(tf.__version__) >= StrictVersion("2.16"): + if Version(tf.__version__) >= Version("2.16"): raise SkipTest("TF < 2.16 is required for this test") with tf_v1.variable_scope("mnist_net", reuse=reuse): images = tf_v1.layers.flatten(images) @@ -196,7 +196,7 @@ def _run_config(device="cpu", device_id=0): def run_estimators_single_device(device="cpu", device_id=0): - if StrictVersion(tf.__version__) < StrictVersion("2.16"): + if Version(tf.__version__) < Version("2.16"): with tf.device("/{0}:{1}".format(device, device_id)): model = keras_model() model = tf.keras.estimator.model_to_estimator( diff --git a/dali/test/python/test_dali_tf_dataset_mnist_eager.py b/dali/test/python/test_dali_tf_dataset_mnist_eager.py index d84054a62b7..dcc93be60d0 100644 --- a/dali/test/python/test_dali_tf_dataset_mnist_eager.py +++ b/dali/test/python/test_dali_tf_dataset_mnist_eager.py @@ -19,7 +19,7 @@ from test_utils_tensorflow import skip_for_incompatible_tf, available_gpus from nose_utils import raises from nose import SkipTest -from distutils.version import LooseVersion +from packaging.version import Version tf.compat.v1.enable_eager_execution() @@ -60,7 +60,7 @@ def test_keras_wrong_placement_cpu(): def test_keras_multi_gpu_mirrored_strategy(): # due to compatibility problems between the driver, cuda version and # TensorFlow 2.12 test_keras_multi_gpu_mirrored_strategy doesn't work. - if LooseVersion(tf.__version__) >= LooseVersion("2.12.0"): + if Version(tf.__version__) >= Version("2.12.0"): raise SkipTest("This test is not supported for TensorFlow 2.12") strategy = tf.distribute.MirroredStrategy(devices=available_gpus()) diff --git a/dali/test/python/test_dali_tf_dataset_mnist_graph.py b/dali/test/python/test_dali_tf_dataset_mnist_graph.py index 88bbe297912..0a1aba5441c 100644 --- a/dali/test/python/test_dali_tf_dataset_mnist_graph.py +++ b/dali/test/python/test_dali_tf_dataset_mnist_graph.py @@ -16,35 +16,35 @@ import tensorflow.compat.v1 as tf_v1 from nose_utils import with_setup, SkipTest, raises import test_dali_tf_dataset_mnist as mnist -from distutils.version import StrictVersion +from packaging.version import Version mnist.tf.compat.v1.disable_eager_execution() @with_setup(tf.keras.backend.clear_session) def test_keras_single_gpu(): - if StrictVersion(tf.__version__) >= StrictVersion("2.16"): + if Version(tf.__version__) >= Version("2.16"): raise SkipTest("TF < 2.16 is required for this test") mnist.run_keras_single_device("gpu", 0) @with_setup(tf.keras.backend.clear_session) def test_keras_single_other_gpu(): - if StrictVersion(tf.__version__) >= StrictVersion("2.16"): + if Version(tf.__version__) >= Version("2.16"): raise SkipTest("TF < 2.16 is required for this test") mnist.run_keras_single_device("gpu", 1) @with_setup(tf.keras.backend.clear_session) def test_keras_single_cpu(): - if StrictVersion(tf.__version__) >= StrictVersion("2.16"): + if Version(tf.__version__) >= Version("2.16"): raise SkipTest("TF < 2.16 is required for this test") mnist.run_keras_single_device("cpu", 0) @raises(tf.errors.OpError, "TF device and DALI device mismatch. TF*: CPU, DALI*: GPU for output") def test_keras_wrong_placement_gpu(): - if StrictVersion(tf.__version__) >= StrictVersion("2.16"): + if Version(tf.__version__) >= Version("2.16"): raise SkipTest("TF < 2.16 is required for this test") with tf.device("cpu:0"): model = mnist.keras_model() @@ -55,7 +55,7 @@ def test_keras_wrong_placement_gpu(): @raises(tf.errors.OpError, "TF device and DALI device mismatch. TF*: GPU, DALI*: CPU for output") def test_keras_wrong_placement_cpu(): - if StrictVersion(tf.__version__) >= StrictVersion("2.16"): + if Version(tf.__version__) >= Version("2.16"): raise SkipTest("TF < 2.16 is required for this test") with tf.device("gpu:0"): model = mnist.keras_model() diff --git a/dali/test/python/test_utils.py b/dali/test/python/test_utils.py index 2603c875b32..a8ce3519a8f 100644 --- a/dali/test/python/test_utils.py +++ b/dali/test/python/test_utils.py @@ -26,7 +26,7 @@ import subprocess import sys import tempfile -from distutils.version import LooseVersion +from packaging.version import Version from nose_utils import SkipTest @@ -945,8 +945,8 @@ def check_numba_compatibility_cpu(if_skip=True): # Numba bug: # https://github.com/numba/numba/issues/8567 if platform.processor().lower() in ("arm64", "aarch64", "armv8") and ( - LooseVersion(numba.__version__) >= LooseVersion("0.57.0") - and LooseVersion(numba.__version__) < LooseVersion("0.59.0") + Version(numba.__version__) >= Version("0.57.0") + and Version(numba.__version__) < Version("0.59.0") ): if if_skip: raise SkipTest() diff --git a/dali_tf_plugin/build_dali_tf.sh b/dali_tf_plugin/build_dali_tf.sh index 2419b57b14b..3986e2da4c0 100755 --- a/dali_tf_plugin/build_dali_tf.sh +++ b/dali_tf_plugin/build_dali_tf.sh @@ -27,7 +27,7 @@ DALI_LFLAGS="-L${DALI_STUB_DIR} -ldali" TF_CFLAGS=( $($PYTHON -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) TF_LFLAGS=( $($PYTHON -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) -CPP_VER=( $($PYTHON -c "import tensorflow as tf; from distutils.version import LooseVersion; print('--std=c++14' if tf.__version__ < LooseVersion('2.10') else '--std=c++17')") ) +CPP_VER=( $($PYTHON -c "import tensorflow as tf; from packaging.version import Version; print('--std=c++14' if Version(tf.__version__) < Version('2.10') else '--std=c++17')") ) # Note: DNDEBUG flag is needed due to issue with TensorFlow custom ops: # https://github.com/tensorflow/tensorflow/issues/17316 diff --git a/dali_tf_plugin/dali_tf_plugin_install_tool.py b/dali_tf_plugin/dali_tf_plugin_install_tool.py index d33e83f4230..9c09c2e4f3f 100644 --- a/dali_tf_plugin/dali_tf_plugin_install_tool.py +++ b/dali_tf_plugin/dali_tf_plugin_install_tool.py @@ -28,7 +28,7 @@ get_tf_build_flags, ) import os -from distutils.version import StrictVersion, LooseVersion +from packaging.version import Version from pathlib import Path import tempfile from stubgen import stubgen @@ -129,7 +129,7 @@ def __init__(self, plugin_dest_dir=None): self.can_install_prebuilt = ( not self.always_build and bool(self.tf_compiler) - and StrictVersion(self.tf_compiler) >= StrictVersion("5.0") + and Version(self.tf_compiler) >= Version("5.0") and self.is_compatible_with_prebuilt_bin and self.prebuilt_dali_stub is not None ) @@ -162,8 +162,8 @@ def __init__(self, plugin_dest_dir=None): or self.default_cpp_version == self.tf_compiler or not bool(self.tf_compiler) or ( - StrictVersion(self.default_cpp_version) >= StrictVersion("5.0") - and StrictVersion(self.tf_compiler) >= StrictVersion("5.0") + Version(self.default_cpp_version) >= Version("5.0") + and Version(self.tf_compiler) >= Version("5.0") ) ) @@ -366,7 +366,7 @@ def build(self): lib_path = os.path.join(self.plugin_dest_dir, lib_filename) # for a newer TF we need to compiler with C++17 - cpp_ver = "--std=c++14" if self.tf_version < LooseVersion("2.10") else "--std=c++17" + cpp_ver = "--std=c++14" if self.tf_version < Version("2.10") else "--std=c++17" # Note: DNDEBUG flag is needed due to issue with TensorFlow custom ops: # https://github.com/tensorflow/tensorflow/issues/17316 # Do not remove it. diff --git a/dali_tf_plugin/dali_tf_plugin_utils.py b/dali_tf_plugin/dali_tf_plugin_utils.py index 2266f42c5f9..712f56a39d6 100644 --- a/dali_tf_plugin/dali_tf_plugin_utils.py +++ b/dali_tf_plugin/dali_tf_plugin_utils.py @@ -17,7 +17,7 @@ import re import sys import fnmatch -from distutils.version import StrictVersion +from packaging.version import Version # Find file matching `pattern` in `path` @@ -66,7 +66,7 @@ def get_tf_compiler_version(): res = re.search(r"GCC:\s*\(.*\)\s*(\d+.\d+).\d+", line) if res: ver = res.group(1) - if not ret_ver or StrictVersion(ret_ver) < StrictVersion(ver): + if not ret_ver or Version(ret_ver) < Version(ver): ret_ver = ver return ret_ver diff --git a/dali_tf_plugin/setup.py.in b/dali_tf_plugin/setup.py.in index f614ea8e8a5..6ded63d8094 100644 --- a/dali_tf_plugin/setup.py.in +++ b/dali_tf_plugin/setup.py.in @@ -88,7 +88,8 @@ For more details please check the 'Programming Language :: Python :: 3.12', ], install_requires = [ - 'nvidia-dali@DALI_FLAVOR_MINUS@-cuda@CUDA_VERSION_SHORT_DIGIT_ONLY@==@DALI_VERSION@' + 'nvidia-dali@DALI_FLAVOR_MINUS@-cuda@CUDA_VERSION_SHORT_DIGIT_ONLY@==@DALI_VERSION@', + packaging, ], cmdclass={ diff --git a/docs/examples/use_cases/paddle/resnet50/utils/config.py b/docs/examples/use_cases/paddle/resnet50/utils/config.py index c77ea7422cc..1e49238bc07 100644 --- a/docs/examples/use_cases/paddle/resnet50/utils/config.py +++ b/docs/examples/use_cases/paddle/resnet50/utils/config.py @@ -16,13 +16,31 @@ import copy import argparse import logging -import distutils.util import dllogger from utils.mode import RunScope from utils.utility import get_num_trainers from utils.save_load import _PDOPT_SUFFIX, _PDPARAMS_SUFFIX -_AUTO_LAST_EPOCH = 'auto' +_AUTO_LAST_EPOCH = "auto" + + +# based on https://github.com/symonsoft/str2bool/tree/master +# BSD3 license +def str2bool(value, raise_exc=False): + if ( + isinstance(value, str) + or sys.version_info[0] < 3 + and isinstance(value, basestring) + ): + value = value.lower() + if value in _true_set: + return True + if value in _false_set: + return False + + if raise_exc: + raise ValueError('Expected "%s"' % '", "'.join(_true_set | _false_set)) + return None def _get_full_path_of_ckpt(args): @@ -38,16 +56,18 @@ def _check_file_exist(path_with_prefix): found = True return found, pdopt_path, pdparams_path - target_from_checkpoint = os.path.join(args.from_checkpoint, - args.model_prefix) + target_from_checkpoint = os.path.join( + args.from_checkpoint, args.model_prefix + ) if args.last_epoch_of_checkpoint is None: args.last_epoch_of_checkpoint = -1 elif args.last_epoch_of_checkpoint == _AUTO_LAST_EPOCH: folders = os.listdir(args.from_checkpoint) args.last_epoch_of_checkpoint = -1 for folder in folders: - tmp_ckpt_path = os.path.join(args.from_checkpoint, folder, - args.model_prefix) + tmp_ckpt_path = os.path.join( + args.from_checkpoint, folder, args.model_prefix + ) try: folder = int(folder) @@ -57,19 +77,27 @@ def _check_file_exist(path_with_prefix): ) continue - if folder > args.last_epoch_of_checkpoint and \ - _check_file_exist(tmp_ckpt_path)[0]: + if ( + folder > args.last_epoch_of_checkpoint + and _check_file_exist(tmp_ckpt_path)[0] + ): args.last_epoch_of_checkpoint = folder - epoch_with_prefix = os.path.join(str(args.last_epoch_of_checkpoint), args.model_prefix) \ - if args.last_epoch_of_checkpoint > -1 else args.model_prefix - target_from_checkpoint = os.path.join(args.from_checkpoint, - epoch_with_prefix) + epoch_with_prefix = ( + os.path.join(str(args.last_epoch_of_checkpoint), args.model_prefix) + if args.last_epoch_of_checkpoint > -1 + else args.model_prefix + ) + target_from_checkpoint = os.path.join( + args.from_checkpoint, epoch_with_prefix + ) else: try: args.last_epoch_of_checkpoint = int(args.last_epoch_of_checkpoint) except ValueError: - raise ValueError(f"The value of --last-epoch-of-checkpoint should be None, {_AUTO_LAST_EPOCH}" \ - f" or integer >= 0, but receive {args.last_epoch_of_checkpoint}") + raise ValueError( + f"The value of --last-epoch-of-checkpoint should be None, {_AUTO_LAST_EPOCH}" + f" or integer >= 0, but receive {args.last_epoch_of_checkpoint}" + ) args.from_checkpoint = target_from_checkpoint found, pdopt_path, pdparams_path = _check_file_exist(args.from_checkpoint) @@ -86,13 +114,15 @@ def _get_full_path_of_pretrained_params(args): args.last_epoch_of_checkpoint = -1 return - args.from_pretrained_params = os.path.join(args.from_pretrained_params, - args.model_prefix) + args.from_pretrained_params = os.path.join( + args.from_pretrained_params, args.model_prefix + ) pdparams_path = args.from_pretrained_params + _PDPARAMS_SUFFIX if not os.path.exists(pdparams_path): args.from_pretrained_params = None logging.warning( - f"Cannot find {pdparams_path}, disable --from-pretrained-params.") + f"Cannot find {pdparams_path}, disable --from-pretrained-params." + ) args.last_epoch_of_checkpoint = -1 @@ -102,7 +132,7 @@ def print_args(args): # Due to dllogger cannot serialize Enum into JSON. args_for_log.run_scope = args_for_log.run_scope.value - dllogger.log(step='PARAMETER', data=vars(args_for_log)) + dllogger.log(step="PARAMETER", data=vars(args_for_log)) def check_and_process_args(args): @@ -112,25 +142,31 @@ def check_and_process_args(args): if args.run_scope == scope.value: run_scope = scope break - assert run_scope is not None, \ - f"only support {[scope.value for scope in RunScope]} as run_scope" + assert ( + run_scope is not None + ), f"only support {[scope.value for scope in RunScope]} as run_scope" args.run_scope = run_scope # Precess image layout and channel args.image_channel = args.image_shape[0] if args.data_layout == "NHWC": args.image_shape = [ - args.image_shape[1], args.image_shape[2], args.image_shape[0] + args.image_shape[1], + args.image_shape[2], + args.image_shape[0], ] # Precess learning rate args.lr = get_num_trainers() * args.lr # Precess model loading - assert not (args.from_checkpoint is not None and \ - args.from_pretrained_params is not None), \ - "--from-pretrained-params and --from-checkpoint should " \ - "not be set simultaneously." + assert not ( + args.from_checkpoint is not None + and args.from_pretrained_params is not None + ), ( + "--from-pretrained-params and --from-checkpoint should " + "not be set simultaneously." + ) _get_full_path_of_pretrained_params(args) _get_full_path_of_ckpt(args) args.start_epoch = args.last_epoch_of_checkpoint + 1 @@ -138,12 +174,12 @@ def check_and_process_args(args): # Precess benchmark if args.benchmark: assert args.run_scope in [ - RunScope.TRAIN_ONLY, RunScope.EVAL_ONLY + RunScope.TRAIN_ONLY, + RunScope.EVAL_ONLY, ], "If benchmark enabled, run_scope must be `train_only` or `eval_only`" # Only run one epoch when benchmark or eval_only. - if args.benchmark or \ - (args.run_scope == RunScope.EVAL_ONLY): + if args.benchmark or (args.run_scope == RunScope.EVAL_ONLY): args.epochs = args.start_epoch + 1 if args.run_scope == RunScope.EVAL_ONLY: @@ -151,366 +187,410 @@ def check_and_process_args(args): def add_global_args(parser): - group = parser.add_argument_group('Global') + group = parser.add_argument_group("Global") group.add_argument( - '--output-dir', + "--output-dir", type=str, - default='./output/', - help='A path to store trained models.') + default="./output/", + help="A path to store trained models.", + ) group.add_argument( - '--run-scope', - default='train_eval', - choices=('train_eval', 'train_only', 'eval_only'), - help='Running scope. It should be one of {train_eval, train_only, eval_only}.' + "--run-scope", + default="train_eval", + choices=("train_eval", "train_only", "eval_only"), + help="Running scope. It should be one of {train_eval, train_only, eval_only}.", ) group.add_argument( - '--epochs', + "--epochs", type=int, default=90, - help='The number of epochs for training.') + help="The number of epochs for training.", + ) group.add_argument( - '--save-interval', + "--save-interval", type=int, default=1, - help='The iteration interval to save checkpoints.') + help="The iteration interval to save checkpoints.", + ) group.add_argument( - '--eval-interval', + "--eval-interval", type=int, default=1, - help='The iteration interval to test trained models on a given validation dataset. ' \ - 'Ignored when --run-scope is train_only.' + help="The iteration interval to test trained models on a given validation dataset. " + "Ignored when --run-scope is train_only.", ) group.add_argument( - '--print-interval', + "--print-interval", type=int, default=10, - help='The iteration interval to show training/evaluation message.') + help="The iteration interval to show training/evaluation message.", + ) group.add_argument( - '--report-file', + "--report-file", type=str, - default='./report.json', - help='A file in which to store JSON experiment report.') + default="./report.json", + help="A file in which to store JSON experiment report.", + ) group.add_argument( - '--data-layout', - default='NCHW', - choices=('NCHW', 'NHWC'), - help='Data format. It should be one of {NCHW, NHWC}.') + "--data-layout", + default="NCHW", + choices=("NCHW", "NHWC"), + help="Data format. It should be one of {NCHW, NHWC}.", + ) group.add_argument( - '--benchmark', action='store_true', help='To enable benchmark mode.') + "--benchmark", action="store_true", help="To enable benchmark mode." + ) group.add_argument( - '--benchmark-steps', + "--benchmark-steps", type=int, default=100, - help='Steps for benchmark run, only be applied when --benchmark is set.' + help="Steps for benchmark run, only be applied when --benchmark is set.", ) group.add_argument( - '--benchmark-warmup-steps', + "--benchmark-warmup-steps", type=int, default=100, - help='Warmup steps for benchmark run, only be applied when --benchmark is set.' + help="Warmup steps for benchmark run, only be applied when --benchmark is set.", ) group.add_argument( - '--model-prefix', + "--model-prefix", type=str, default="resnet_50_paddle", - help='The prefix name of model files to save/load.') + help="The prefix name of model files to save/load.", + ) group.add_argument( - '--from-pretrained-params', + "--from-pretrained-params", type=str, default=None, - help='A folder path which contains pretrained parameters, that is a file in name' \ - ' --model-prefix + .pdparams. It should not be set with --from-checkpoint' \ - ' at the same time.' + help="A folder path which contains pretrained parameters, that is a file in name" + " --model-prefix + .pdparams. It should not be set with --from-checkpoint" + " at the same time.", ) group.add_argument( - '--from-checkpoint', + "--from-checkpoint", type=str, default=None, - help='A checkpoint path to resume training. It should not be set ' \ - 'with --from-pretrained-params at the same time. The path provided ' \ - 'could be a folder contains < epoch_id/ckpt_files > or < ckpt_files >.' + help="A checkpoint path to resume training. It should not be set " + "with --from-pretrained-params at the same time. The path provided " + "could be a folder contains < epoch_id/ckpt_files > or < ckpt_files >.", ) group.add_argument( - '--last-epoch-of-checkpoint', + "--last-epoch-of-checkpoint", type=str, default=None, - help='The epoch id of the checkpoint given by --from-checkpoint. ' \ - 'It should be None, auto or integer >= 0. If it is set as ' \ - 'None, then training will start from 0-th epoch. If it is set as ' \ - 'auto, then it will search largest integer-convertable folder ' \ - ' --from-checkpoint, which contains required checkpoint. ' \ - 'Default is None.' + help="The epoch id of the checkpoint given by --from-checkpoint. " + "It should be None, auto or integer >= 0. If it is set as " + "None, then training will start from 0-th epoch. If it is set as " + "auto, then it will search largest integer-convertable folder " + " --from-checkpoint, which contains required checkpoint. " + "Default is None.", ) group.add_argument( - '--show-config', - type=distutils.util.strtobool, + "--show-config", + type=str2bool, default=True, - help='To show arguments.') + help="To show arguments.", + ) group.add_argument( - '--enable-cpu-affinity', - type=distutils.util.strtobool, + "--enable-cpu-affinity", + type=str2bool, default=True, - help='To enable in-built GPU-CPU affinity.') + help="To enable in-built GPU-CPU affinity.", + ) return parser def add_advance_args(parser): - group = parser.add_argument_group('Advanced Training') + group = parser.add_argument_group("Advanced Training") # AMP group.add_argument( - '--amp', - action='store_true', - help='Enable automatic mixed precision training (AMP).') + "--amp", + action="store_true", + help="Enable automatic mixed precision training (AMP).", + ) group.add_argument( - '--scale-loss', + "--scale-loss", type=float, default=1.0, - help='The loss scalar for AMP training, only be applied when --amp is set.' + help="The loss scalar for AMP training, only be applied when --amp is set.", ) group.add_argument( - '--use-dynamic-loss-scaling', - action='store_true', - help='Enable dynamic loss scaling in AMP training, only be applied when --amp is set.' + "--use-dynamic-loss-scaling", + action="store_true", + help="Enable dynamic loss scaling in AMP training, only be applied when --amp is set.", ) group.add_argument( - '--use-pure-fp16', - action='store_true', - help='Enable pure FP16 training, only be applied when --amp is set.') + "--use-pure-fp16", + action="store_true", + help="Enable pure FP16 training, only be applied when --amp is set.", + ) group.add_argument( - '--fuse-resunit', - action='store_true', - help='Enable CUDNNv8 ResUnit fusion, only be applied when --amp is set.') + "--fuse-resunit", + action="store_true", + help="Enable CUDNNv8 ResUnit fusion, only be applied when --amp is set.", + ) # ASP group.add_argument( - '--asp', - action='store_true', - help='Enable automatic sparse training (ASP).') + "--asp", + action="store_true", + help="Enable automatic sparse training (ASP).", + ) group.add_argument( - '--prune-model', - action='store_true', - help='Prune model to 2:4 sparse pattern, only be applied when --asp is set.' + "--prune-model", + action="store_true", + help="Prune model to 2:4 sparse pattern, only be applied when --asp is set.", ) group.add_argument( - '--mask-algo', - default='mask_1d', - choices=('mask_1d', 'mask_2d_greedy', 'mask_2d_best'), - help='The algorithm to generate sparse masks. It should be one of ' \ - '{mask_1d, mask_2d_greedy, mask_2d_best}. This only be applied ' \ - 'when --asp and --prune-model is set.' + "--mask-algo", + default="mask_1d", + choices=("mask_1d", "mask_2d_greedy", "mask_2d_best"), + help="The algorithm to generate sparse masks. It should be one of " + "{mask_1d, mask_2d_greedy, mask_2d_best}. This only be applied " + "when --asp and --prune-model is set.", ) return parser def add_dataset_args(parser): def float_list(x): - return list(map(float, x.split(','))) + return list(map(float, x.split(","))) def int_list(x): - return list(map(int, x.split(','))) + return list(map(int, x.split(","))) - dataset_group = parser.add_argument_group('Dataset') + dataset_group = parser.add_argument_group("Dataset") dataset_group.add_argument( - '--image-root', + "--image-root", type=str, - default='/imagenet', - help='A root folder of train/val images. It should contain train and val folders, ' \ - 'which store corresponding images.' + default="/imagenet", + help="A root folder of train/val images. It should contain train and val folders, " + "which store corresponding images.", ) dataset_group.add_argument( - '--image-shape', + "--image-shape", type=int_list, default=[4, 224, 224], - help='The image shape. Its shape should be [channel, height, width].') + help="The image shape. Its shape should be [channel, height, width].", + ) # Data Loader dataset_group.add_argument( - '--batch-size', + "--batch-size", type=int, default=256, - help='The batch size for both training and evaluation.') + help="The batch size for both training and evaluation.", + ) dataset_group.add_argument( - '--dali-random-seed', + "--dali-random-seed", type=int, default=42, - help='The random seed for DALI data loader.') + help="The random seed for DALI data loader.", + ) dataset_group.add_argument( - '--dali-num-threads', + "--dali-num-threads", type=int, default=4, - help='The number of threads applied to DALI data loader.') + help="The number of threads applied to DALI data loader.", + ) dataset_group.add_argument( - '--dali-output-fp16', - action='store_true', - help='Output FP16 data from DALI data loader.') + "--dali-output-fp16", + action="store_true", + help="Output FP16 data from DALI data loader.", + ) # Augmentation - augmentation_group = parser.add_argument_group('Data Augmentation') + augmentation_group = parser.add_argument_group("Data Augmentation") augmentation_group.add_argument( - '--crop-size', + "--crop-size", type=int, default=224, - help='The size to crop input images.') + help="The size to crop input images.", + ) augmentation_group.add_argument( - '--rand-crop-scale', + "--rand-crop-scale", type=float_list, - default=[0.08, 1.], - help='Range from which to choose a random area fraction.') + default=[0.08, 1.0], + help="Range from which to choose a random area fraction.", + ) augmentation_group.add_argument( - '--rand-crop-ratio', + "--rand-crop-ratio", type=float_list, default=[3.0 / 4, 4.0 / 3], - help='Range from which to choose a random aspect ratio (width/height).') + help="Range from which to choose a random aspect ratio (width/height).", + ) augmentation_group.add_argument( - '--normalize-scale', + "--normalize-scale", type=float, default=1.0 / 255.0, - help='A scalar to normalize images.') + help="A scalar to normalize images.", + ) augmentation_group.add_argument( - '--normalize-mean', + "--normalize-mean", type=float_list, default=[0.485, 0.456, 0.406], - help='The mean values to normalize RGB images.') + help="The mean values to normalize RGB images.", + ) augmentation_group.add_argument( - '--normalize-std', + "--normalize-std", type=float_list, default=[0.229, 0.224, 0.225], - help='The std values to normalize RGB images.') + help="The std values to normalize RGB images.", + ) augmentation_group.add_argument( - '--resize-short', + "--resize-short", type=int, default=256, - help='The length of the shorter dimension of the resized image.') + help="The length of the shorter dimension of the resized image.", + ) return parser def add_model_args(parser): - group = parser.add_argument_group('Model') + group = parser.add_argument_group("Model") group.add_argument( - '--model-arch-name', + "--model-arch-name", type=str, - default='ResNet50', - help='The model architecture name. It should be one of {ResNet50}.') + default="ResNet50", + help="The model architecture name. It should be one of {ResNet50}.", + ) group.add_argument( - '--num-of-class', + "--num-of-class", type=int, default=1000, - help='The number classes of images.') + help="The number classes of images.", + ) group.add_argument( - '--bn-weight-decay', - action='store_true', - help='Apply weight decay to BatchNorm shift and scale.') + "--bn-weight-decay", + action="store_true", + help="Apply weight decay to BatchNorm shift and scale.", + ) return parser def add_training_args(parser): - group = parser.add_argument_group('Training') + group = parser.add_argument_group("Training") group.add_argument( - '--label-smoothing', + "--label-smoothing", type=float, default=0.1, - help='The ratio of label smoothing.') + help="The ratio of label smoothing.", + ) group.add_argument( - '--optimizer', - default='Momentum', + "--optimizer", + default="Momentum", metavar="OPTIMIZER", - choices=('Momentum'), - help='The name of optimizer. It should be one of {Momentum}.') + choices=("Momentum"), + help="The name of optimizer. It should be one of {Momentum}.", + ) group.add_argument( - '--momentum', + "--momentum", type=float, default=0.875, - help='The momentum value of optimizer.') + help="The momentum value of optimizer.", + ) group.add_argument( - '--weight-decay', + "--weight-decay", type=float, default=3.0517578125e-05, - help='The coefficient of weight decay.') + help="The coefficient of weight decay.", + ) group.add_argument( - '--lr-scheduler', - default='Cosine', + "--lr-scheduler", + default="Cosine", metavar="LR_SCHEDULER", - choices=('Cosine'), - help='The name of learning rate scheduler. It should be one of {Cosine}.' + choices=("Cosine"), + help="The name of learning rate scheduler. It should be one of {Cosine}.", ) group.add_argument( - '--lr', type=float, default=0.256, help='The initial learning rate.') + "--lr", type=float, default=0.256, help="The initial learning rate." + ) group.add_argument( - '--warmup-epochs', + "--warmup-epochs", type=int, default=5, - help='The number of epochs for learning rate warmup.') + help="The number of epochs for learning rate warmup.", + ) group.add_argument( - '--warmup-start-lr', + "--warmup-start-lr", type=float, default=0.0, - help='The initial learning rate for warmup.') + help="The initial learning rate for warmup.", + ) return parser def add_trt_args(parser): - group = parser.add_argument_group('Paddle-TRT') + group = parser.add_argument_group("Paddle-TRT") group.add_argument( - '--device', + "--device", type=int, - default='0', - help='The GPU device id for Paddle-TRT inference.' + default="0", + help="The GPU device id for Paddle-TRT inference.", ) group.add_argument( - '--trt-inference-dir', + "--trt-inference-dir", type=str, - default='./inference', - help='A path to store/load inference models. ' \ - 'export_model.py would export models to this folder, ' \ - 'then inference.py would load from here.' + default="./inference", + help="A path to store/load inference models. " + "export_model.py would export models to this folder, " + "then inference.py would load from here.", ) group.add_argument( - '--trt-precision', - default='FP32', - choices=('FP32', 'FP16', 'INT8'), - help='The precision of TensorRT. It should be one of {FP32, FP16, INT8}.' + "--trt-precision", + default="FP32", + choices=("FP32", "FP16", "INT8"), + help="The precision of TensorRT. It should be one of {FP32, FP16, INT8}.", ) group.add_argument( - '--trt-workspace-size', + "--trt-workspace-size", type=int, default=(1 << 30), - help='The memory workspace of TensorRT in MB.') + help="The memory workspace of TensorRT in MB.", + ) group.add_argument( - '--trt-min-subgraph-size', + "--trt-min-subgraph-size", type=int, default=3, - help='The minimal subgraph size to enable PaddleTRT.') + help="The minimal subgraph size to enable PaddleTRT.", + ) group.add_argument( - '--trt-use-static', - type=distutils.util.strtobool, + "--trt-use-static", + type=str2bool, default=False, - help='Fix TensorRT engine at first running.') + help="Fix TensorRT engine at first running.", + ) group.add_argument( - '--trt-use-calib-mode', - type=distutils.util.strtobool, + "--trt-use-calib-mode", + type=str2bool, default=False, - help='Use the PTQ calibration of PaddleTRT int8.') + help="Use the PTQ calibration of PaddleTRT int8.", + ) group.add_argument( - '--trt-export-log-path', + "--trt-export-log-path", type=str, - default='./export.json', - help='A file in which to store JSON model exporting report.') + default="./export.json", + help="A file in which to store JSON model exporting report.", + ) group.add_argument( - '--trt-log-path', + "--trt-log-path", type=str, - default='./inference.json', - help='A file in which to store JSON inference report.') + default="./inference.json", + help="A file in which to store JSON inference report.", + ) group.add_argument( - '--trt-use-synthetic', - type=distutils.util.strtobool, + "--trt-use-synthetic", + type=str2bool, default=False, - help='Apply synthetic data for benchmark.') + help="Apply synthetic data for benchmark.", + ) return parser def parse_args(including_trt=False): parser = argparse.ArgumentParser( description="PaddlePaddle RN50v1.5 training script", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) parser = add_global_args(parser) parser = add_dataset_args(parser) parser = add_model_args(parser) diff --git a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/common.py b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/common.py index 55eb620abd2..a36c753e2e4 100644 --- a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/common.py +++ b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/common.py @@ -1,5 +1,5 @@ import tensorflow as tf -from distutils.version import StrictVersion +from packaging.version import Version BASE_LEARNING_RATE = 0.1 @@ -18,7 +18,7 @@ def create_piecewise_constant_decay_with_warmup(batch_size, epoch_size, rescaled_lr = BASE_LEARNING_RATE * batch_size / base_lr_batch_size step_boundaries = [float(steps_per_epoch) * x for x in boundaries] lr_values = [rescaled_lr * m for m in multipliers] - if StrictVersion(tf.__version__) >= StrictVersion("2.13"): + if Version(tf.__version__) >= Version("2.13"): warmup_steps = int(warmup_epochs * steps_per_epoch) else: warmup_steps = warmup_epochs * steps_per_epoch @@ -38,7 +38,7 @@ def __init__(self, rescaled_lr, step_boundaries, lr_values, warmup_steps, super(PiecewiseConstantDecayWithWarmup, self).__init__() self.rescaled_lr = rescaled_lr - if StrictVersion(tf.__version__) >= StrictVersion("2.13"): + if Version(tf.__version__) >= Version("2.13"): self.step_boundaries = [int(b) for b in step_boundaries] else: self.step_boundaries = step_boundaries diff --git a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/hvd_patch.py b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/hvd_patch.py index 4dca4f6577a..5f328eff4d6 100644 --- a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/hvd_patch.py +++ b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/hvd_patch.py @@ -8,10 +8,10 @@ import horovod.tensorflow as hvd import tensorflow as tf from nvutils import common -from distutils.version import LooseVersion +from packaging.version import Version from horovod.tensorflow import Average, Compression, Sum -_PRE_TF_2_4_0 = LooseVersion(tf.__version__) < LooseVersion('2.4.0') +_PRE_TF_2_4_0 = Version(tf.__version__) < Version('2.4.0') def create_distributed_optimizer( keras, optimizer, name, device_dense, device_sparse, compression, diff --git a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner.py b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner.py index a53034dbe06..91af31bd94c 100755 --- a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner.py +++ b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner.py @@ -16,7 +16,7 @@ from nvutils import image_processing from nvutils import common -from distutils.version import StrictVersion +from packaging.version import Version import tensorflow as tf from tensorflow import keras @@ -27,8 +27,8 @@ from keras import backend print(tf.__version__) -if StrictVersion(tf.__version__) > StrictVersion("2.1.0"): - if StrictVersion(tf.__version__) >= StrictVersion("2.4.0"): +if Version(tf.__version__) > Version("2.1.0"): + if Version(tf.__version__) >= Version("2.4.0"): from tensorflow.python.keras.mixed_precision import device_compatibility_check else: from tensorflow.python.keras.mixed_precision.experimental import device_compatibility_check @@ -142,7 +142,7 @@ def train(model_func, params): tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU') if precision == 'fp16': - if StrictVersion(tf.__version__) >= StrictVersion("2.4.0"): + if Version(tf.__version__) >= Version("2.4.0"): policy = keras.mixed_precision.Policy('mixed_float16') keras.mixed_precision.set_global_policy(policy) else: @@ -160,7 +160,7 @@ def train(model_func, params): # Horovod: add Horovod DistributedOptimizer. We use a modified version to # support the custom learning rate schedule. opt = hvd.DistributedOptimizer(opt) - if StrictVersion(tf.__version__) >= StrictVersion("2.4.0") and precision == 'fp16': + if Version(tf.__version__) >= Version("2.4.0") and precision == 'fp16': opt = keras.mixed_precision.LossScaleOptimizer(opt, dynamic=False, initial_scale=loss_scale) diff --git a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner_ctl.py b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner_ctl.py index bcdfa22bf4a..cf4f9266d22 100755 --- a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner_ctl.py +++ b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner_ctl.py @@ -17,7 +17,7 @@ from builtins import range from nvutils import image_processing from nvutils import common -from distutils.version import StrictVersion +from packaging.version import Version import tensorflow as tf from tensorflow import keras @@ -27,8 +27,8 @@ from keras import backend print(tf.__version__) -if StrictVersion(tf.__version__) > StrictVersion("2.1.0"): - if StrictVersion(tf.__version__) >= StrictVersion("2.4.0"): +if Version(tf.__version__) > Version("2.1.0"): + if Version(tf.__version__) >= Version("2.4.0"): from tensorflow.python.keras.mixed_precision import device_compatibility_check else: from tensorflow.python.keras.mixed_precision.experimental import device_compatibility_check @@ -105,7 +105,7 @@ def train_ctl(model_func, params): summary_writer = None if precision == 'fp16': - if StrictVersion(tf.__version__) >= StrictVersion("2.4.0"): + if Version(tf.__version__) >= Version("2.4.0"): policy = keras.mixed_precision.Policy('mixed_float16') keras.mixed_precision.set_global_policy(policy) else: diff --git a/qa/TL1_tensorflow_dataset/test_impl.sh b/qa/TL1_tensorflow_dataset/test_impl.sh index d3bec26540e..aae4b015fab 100755 --- a/qa/TL1_tensorflow_dataset/test_impl.sh +++ b/qa/TL1_tensorflow_dataset/test_impl.sh @@ -25,8 +25,8 @@ test_body() { pushd ../../../docs/examples/frameworks/tensorflow/ # TF 2.16 removed usage of tf.estimator the test uses is_below_2_16=$(python -c 'import tensorflow as tf; \ - from distutils.version import StrictVersion; \ - print(StrictVersion(tf.__version__) < StrictVersion("2.16"))') + from packaging.version import Version; \ + print(Version(tf.__version__) < Version("2.16"))') if [ $is_below_2_16 = 'True' ]; then jupyter nbconvert tensorflow-dataset.ipynb \ @@ -39,9 +39,9 @@ test_body() { # TensorFlow 2.12 test_keras_multi_gpu_mirrored_strategy doesn't work. is_compatible_distributed=$(python -c 'import nvidia.dali.plugin.tf as dali_tf; \ import tensorflow as tf; \ - from distutils.version import LooseVersion; \ + from packaging.version import Version; \ print(dali_tf.dataset_distributed_compatible_tensorflow() \ - and LooseVersion(tf.__version__) < LooseVersion("2.12.0"))') + and Version(tf.__version__) < Version("2.12.0"))') if [ $is_compatible_distributed = 'True' ]; then jupyter nbconvert tensorflow-dataset-multigpu.ipynb \ --to notebook --inplace --execute \