diff --git a/Acknowledgements.txt b/Acknowledgements.txt
index 5ab2b5ce7a8..6cae0805595 100644
--- a/Acknowledgements.txt
+++ b/Acknowledgements.txt
@@ -4410,3 +4410,37 @@ products or services of Licensee, or any third party.
 8. By copying, installing or otherwise using Python, Licensee
 agrees to be bound by the terms and conditions of this License
 Agreement.
+
+==============================================================================
+str2bool
+
+
+BSD 3-Clause License
+
+Copyright (c) 2017, SymonSoft
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/conda/dali_python_bindings/recipe/meta.yaml b/conda/dali_python_bindings/recipe/meta.yaml
index 9ccd2dca1b8..a15e07c2035 100644
--- a/conda/dali_python_bindings/recipe/meta.yaml
+++ b/conda/dali_python_bindings/recipe/meta.yaml
@@ -81,6 +81,7 @@ requirements:
     - astunparse >=1.6.0
     - gast >=0.3.3
     - dm-tree >=0.1.8
+    - packaging
     - nvidia-dali-core{% if environ.get('NVIDIA_DALI_BUILD_FLAVOR', '')|length %}{{"-" + environ.get('NVIDIA_DALI_BUILD_FLAVOR', '')}}{% endif %}-cuda{{ environ.get('CUDA_VERSION', '') | replace(".","") }} ={{ environ.get('DALI_CONDA_BUILD_VERSION', '') }}
     - nvidia-nvimagecodec-cuda{{ environ.get('CUDA_VERSION', '') | replace(".","") }}
   run:
@@ -94,6 +95,7 @@ requirements:
     - astunparse >=1.6.0
     - gast >=0.3.3
     - dm-tree >=0.1.8
+    - packaging
     - nvidia-dali-core{% if environ.get('NVIDIA_DALI_BUILD_FLAVOR', '')|length %}{{"-" + environ.get('NVIDIA_DALI_BUILD_FLAVOR', '')}}{% endif %}-cuda{{ environ.get('CUDA_VERSION', '') | replace(".","") }} ={{ environ.get('DALI_CONDA_BUILD_VERSION', '') }}
     - nvidia-nvimagecodec-cuda{{ environ.get('CUDA_VERSION', '') | replace(".","") }}
 about:
diff --git a/dali/python/nvidia/dali/_autograph/pyct/gast_util.py b/dali/python/nvidia/dali/_autograph/pyct/gast_util.py
index 3424a7be963..107f626416e 100644
--- a/dali/python/nvidia/dali/_autograph/pyct/gast_util.py
+++ b/dali/python/nvidia/dali/_autograph/pyct/gast_util.py
@@ -18,9 +18,19 @@
 import functools
 import gast
 
-from distutils.version import LooseVersion
+from packaging.version import Version
 
 
+def convert_to_version(function):
+    """Makes sure that returned function value is a Version object"""
+
+    def wrap_function(*args, **kwargs):
+        return Version(function(*args, **kwargs))
+
+    return wrap_function
+
+
+@convert_to_version
 def get_gast_version():
     """Gast exports `__version__` from 0.5.3 onwards, we need to look it up in a different way."""
     if hasattr(gast, "__version__"):
@@ -76,7 +86,7 @@ def _compat_assign_gast_5(targets, value, type_comment):
     return gast.Assign(targets=targets, value=value, type_comment=type_comment)
 
 
-if get_gast_version() < LooseVersion("0.5"):
+if get_gast_version() < Version("0.5"):
     compat_assign = _compat_assign_gast_4
 else:
     compat_assign = _compat_assign_gast_5
diff --git a/dali/python/nvidia/dali/plugin/jax/__init__.py b/dali/python/nvidia/dali/plugin/jax/__init__.py
index edd29ebc795..08945e3743c 100644
--- a/dali/python/nvidia/dali/plugin/jax/__init__.py
+++ b/dali/python/nvidia/dali/plugin/jax/__init__.py
@@ -16,7 +16,7 @@
 
 from . import fn  # noqa: F401
 
-from distutils.version import LooseVersion
+from packaging.version import Version
 from .iterator import DALIGenericIterator, data_iterator
 
 assert (
@@ -24,7 +24,7 @@
 ), "DALI JAX support requires Python 3.8 or above"
 
 
-assert LooseVersion(jax.__version__) >= LooseVersion(
+assert Version(jax.__version__) >= Version(
     "0.4.11"
 ), "DALI JAX support requires JAX 0.4.11 or above"
 
diff --git a/dali/python/nvidia/dali/plugin/jax/fn/_jax_function_impl.py b/dali/python/nvidia/dali/plugin/jax/fn/_jax_function_impl.py
index ccb5126a355..06caa66bbec 100644
--- a/dali/python/nvidia/dali/plugin/jax/fn/_jax_function_impl.py
+++ b/dali/python/nvidia/dali/plugin/jax/fn/_jax_function_impl.py
@@ -14,7 +14,7 @@
 
 from typing import Optional, Protocol, Tuple, Union
 
-from distutils.version import LooseVersion
+from packaging.version import Version
 
 import jax
 import jax.dlpack
@@ -170,7 +170,7 @@ def flip_horizontal(image: jax.Array):
         The transformed function that processes DALI-traced batches (DataNodes).
     """
 
-    if LooseVersion(jax.__version__) < LooseVersion("0.4.16"):
+    if Version(jax.__version__) < Version("0.4.16"):
         raise RuntimeError("DALI `jax_function` requires JAX 0.4.16 or above.")
 
     def decorator(function):
diff --git a/dali/python/nvidia/dali/plugin/numba/experimental/__init__.py b/dali/python/nvidia/dali/plugin/numba/experimental/__init__.py
index 6e7a4b7733b..3db1f654fdd 100644
--- a/dali/python/nvidia/dali/plugin/numba/experimental/__init__.py
+++ b/dali/python/nvidia/dali/plugin/numba/experimental/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from distutils.version import LooseVersion
+from packaging.version import Version
 
 from nvidia.dali.pipeline import Pipeline
 from nvidia.dali.data_node import DataNode as _DataNode
@@ -57,8 +57,8 @@
 
 # Minimal version of Numba that is required for Numba GPU operator to work
 minimal_numba_version = {
-    11: LooseVersion("0.55.2"),
-    12: LooseVersion("0.57.0"),
+    11: Version("0.55.2"),
+    12: Version("0.57.0"),
 }
 
 
@@ -196,7 +196,7 @@ def _get_run_fn_gpu(self, run_fn, types, dims):
         for dali_type, ndim in zip(types, dims):
             cuda_arguments.append(numba_types.Array(_to_numba[dali_type], ndim, "C"))
 
-        if LooseVersion(nb.__version__) < LooseVersion("0.57.0"):
+        if Version(nb.__version__) < Version("0.57.0"):
             cres = cuda.compiler.compile_cuda(run_fn, numba_types.void, cuda_arguments)
         else:
             pipeline = Pipeline.current()
@@ -210,7 +210,7 @@ def _get_run_fn_gpu(self, run_fn, types, dims):
         code = run_fn.__code__
         filename = code.co_filename
         linenum = code.co_firstlineno
-        if LooseVersion(nb.__version__) < LooseVersion("0.57.0"):
+        if Version(nb.__version__) < Version("0.57.0"):
             nvvm_options["debug"] = False
             nvvm_options["lineinfo"] = False
             lib, _ = tgt_ctx.prepare_cuda_kernel(
@@ -509,7 +509,7 @@ def __init__(
 
     @staticmethod
     def _check_minimal_numba_version(throw: bool = True):
-        current_version = LooseVersion(nb.__version__)
+        current_version = Version(nb.__version__)
         toolkit_version = cuda.runtime.get_version()
         if toolkit_version[0] not in minimal_numba_version:
             if throw:
@@ -522,7 +522,7 @@ def _check_minimal_numba_version(throw: bool = True):
                 raise RuntimeError(
                     f"Insufficient Numba version. Numba GPU operator "
                     f"requires Numba {str(min_ver)} or higher. "
-                    f"Detected version: {str(LooseVersion(nb.__version__))}."
+                    f"Detected version: {str(Version(nb.__version__))}."
                 )
             else:
                 return False
diff --git a/dali/python/nvidia/dali/plugin/paddle.py b/dali/python/nvidia/dali/plugin/paddle.py
index 04fc1208f46..b3989d69d59 100644
--- a/dali/python/nvidia/dali/plugin/paddle.py
+++ b/dali/python/nvidia/dali/plugin/paddle.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 import paddle
-from distutils.version import LooseVersion
+from packaging.version import Version
 
 from nvidia.dali import types
 from nvidia.dali.backend import TensorListCPU, TensorGPU, TensorListGPU
@@ -26,11 +26,9 @@
 from nvidia.dali.plugin.base_iterator import LastBatchPolicy
 
 if isinstance(paddle.__version__, str):
-    assert LooseVersion(paddle.__version__) == LooseVersion("0.0.0") or LooseVersion(
+    assert Version(paddle.__version__) == Version("0.0.0") or Version(
         paddle.__version__
-    ) >= LooseVersion(
-        "2.0.0"
-    ), "DALI PaddlePaddle support requires Paddle develop or release >= 2.0.0"
+    ) >= Version("2.0.0"), "DALI PaddlePaddle support requires Paddle develop or release >= 2.0.0"
 
 
 dtype_map = {
diff --git a/dali/python/nvidia/dali/plugin/tf.py b/dali/python/nvidia/dali/plugin/tf.py
index 460f25b28b6..7ae887e79d3 100644
--- a/dali/python/nvidia/dali/plugin/tf.py
+++ b/dali/python/nvidia/dali/plugin/tf.py
@@ -26,7 +26,7 @@
 from nvidia.dali._utils.external_source_impl import _get_generator_from_source_desc
 from nvidia.dali._utils.external_source_impl import _cycle_enabled
 
-from distutils.version import LooseVersion
+from packaging.version import Version
 import warnings
 
 from nvidia.dali_tf_plugin import dali_tf_plugin
@@ -307,29 +307,29 @@ def DALIRawIterator():
 
 
 def _get_tf_version():
-    return LooseVersion(tf.__version__)
+    return Version(tf.__version__)
 
 
-MIN_TENSORFLOW_VERSION = LooseVersion("1.15")
+MIN_TENSORFLOW_VERSION = Version("1.15")
 
 
 def dataset_compatible_tensorflow():
     """Returns ``True`` if current TensorFlow version is compatible with DALIDataset."""
-    return LooseVersion(tf.__version__) >= MIN_TENSORFLOW_VERSION
+    return Version(tf.__version__) >= MIN_TENSORFLOW_VERSION
 
 
 def dataset_inputs_compatible_tensorflow():
     """Returns ``True`` if the current TensorFlow version is compatible with
     experimental.DALIDatasetWithInputs and input Datasets can be used with DALI.
     """
-    return LooseVersion(tf.__version__) >= LooseVersion("2.4.1")
+    return Version(tf.__version__) >= Version("2.4.1")
 
 
 def dataset_distributed_compatible_tensorflow():
     """Returns ``True`` if the tf.distribute APIs for current TensorFlow version are compatible
     with DALIDataset.
     """
-    return LooseVersion(tf.__version__) >= LooseVersion("2.5.0")
+    return Version(tf.__version__) >= Version("2.5.0")
 
 
 def _get_experimental():
@@ -813,7 +813,7 @@ def _as_variant_tensor(self):
                 fail_on_device_mismatch=self._fail_on_device_mismatch,
             )
 
-    if _get_tf_version() < LooseVersion("2.0"):
+    if _get_tf_version() < Version("2.0"):
 
         class _DALIDatasetImpl(dataset_ops.DatasetV1Adapter):
             @functools.wraps(_DALIDatasetV2.__init__)
diff --git a/dali/python/setup.py.in b/dali/python/setup.py.in
index 5014584b599..2c507ce04c5 100644
--- a/dali/python/setup.py.in
+++ b/dali/python/setup.py.in
@@ -87,6 +87,7 @@ For more details please check the
           # 1.16 on python 3.12 due to import six.moves
           'six >= 1.16',
           'dm-tree',
+          'packaging',
           @DALI_INSTALL_REQUIRES_NVIMGCODEC@
           ],
      )
diff --git a/dali/test/python/autograph/pyct/test_loader.py b/dali/test/python/autograph/pyct/test_loader.py
index 088e966fa3c..bcd33d2c4e2 100644
--- a/dali/test/python/autograph/pyct/test_loader.py
+++ b/dali/test/python/autograph/pyct/test_loader.py
@@ -21,7 +21,7 @@
 import unittest
 
 import gast
-from distutils.version import LooseVersion
+from packaging.version import Version
 
 from nvidia.dali._autograph.pyct import ast_util
 from nvidia.dali._autograph.pyct import gast_util
@@ -79,7 +79,7 @@ def test_load_ast(self):
             decorator_list=[],
             returns=None,
             type_comment=None,
-            **{"type_params": []} if gast_util.get_gast_version() >= LooseVersion("0.5.5") else {},
+            **{"type_params": []} if gast_util.get_gast_version() >= Version("0.5.5") else {},
         )
 
         module, source, _ = loader.load_ast(node)
diff --git a/dali/test/python/test_dali_tf_dataset_mnist.py b/dali/test/python/test_dali_tf_dataset_mnist.py
index 999b7524c3a..55731421955 100644
--- a/dali/test/python/test_dali_tf_dataset_mnist.py
+++ b/dali/test/python/test_dali_tf_dataset_mnist.py
@@ -21,7 +21,7 @@
 from shutil import rmtree as remove_directory
 import tensorflow as tf
 import tensorflow.compat.v1 as tf_v1
-from distutils.version import StrictVersion
+from packaging.version import Version
 from nose import SkipTest
 
 
@@ -116,7 +116,7 @@ def run_keras_single_device(device="cpu", device_id=0):
 
 
 def graph_model(images, reuse, is_training):
-    if StrictVersion(tf.__version__) >= StrictVersion("2.16"):
+    if Version(tf.__version__) >= Version("2.16"):
         raise SkipTest("TF < 2.16 is required for this test")
     with tf_v1.variable_scope("mnist_net", reuse=reuse):
         images = tf_v1.layers.flatten(images)
@@ -196,7 +196,7 @@ def _run_config(device="cpu", device_id=0):
 
 
 def run_estimators_single_device(device="cpu", device_id=0):
-    if StrictVersion(tf.__version__) < StrictVersion("2.16"):
+    if Version(tf.__version__) < Version("2.16"):
         with tf.device("/{0}:{1}".format(device, device_id)):
             model = keras_model()
         model = tf.keras.estimator.model_to_estimator(
diff --git a/dali/test/python/test_dali_tf_dataset_mnist_eager.py b/dali/test/python/test_dali_tf_dataset_mnist_eager.py
index d84054a62b7..dcc93be60d0 100644
--- a/dali/test/python/test_dali_tf_dataset_mnist_eager.py
+++ b/dali/test/python/test_dali_tf_dataset_mnist_eager.py
@@ -19,7 +19,7 @@
 from test_utils_tensorflow import skip_for_incompatible_tf, available_gpus
 from nose_utils import raises
 from nose import SkipTest
-from distutils.version import LooseVersion
+from packaging.version import Version
 
 tf.compat.v1.enable_eager_execution()
 
@@ -60,7 +60,7 @@ def test_keras_wrong_placement_cpu():
 def test_keras_multi_gpu_mirrored_strategy():
     # due to compatibility problems between the driver, cuda version and
     # TensorFlow 2.12 test_keras_multi_gpu_mirrored_strategy doesn't work.
-    if LooseVersion(tf.__version__) >= LooseVersion("2.12.0"):
+    if Version(tf.__version__) >= Version("2.12.0"):
         raise SkipTest("This test is not supported for TensorFlow 2.12")
     strategy = tf.distribute.MirroredStrategy(devices=available_gpus())
 
diff --git a/dali/test/python/test_dali_tf_dataset_mnist_graph.py b/dali/test/python/test_dali_tf_dataset_mnist_graph.py
index 88bbe297912..0a1aba5441c 100644
--- a/dali/test/python/test_dali_tf_dataset_mnist_graph.py
+++ b/dali/test/python/test_dali_tf_dataset_mnist_graph.py
@@ -16,35 +16,35 @@
 import tensorflow.compat.v1 as tf_v1
 from nose_utils import with_setup, SkipTest, raises
 import test_dali_tf_dataset_mnist as mnist
-from distutils.version import StrictVersion
+from packaging.version import Version
 
 mnist.tf.compat.v1.disable_eager_execution()
 
 
 @with_setup(tf.keras.backend.clear_session)
 def test_keras_single_gpu():
-    if StrictVersion(tf.__version__) >= StrictVersion("2.16"):
+    if Version(tf.__version__) >= Version("2.16"):
         raise SkipTest("TF < 2.16 is required for this test")
     mnist.run_keras_single_device("gpu", 0)
 
 
 @with_setup(tf.keras.backend.clear_session)
 def test_keras_single_other_gpu():
-    if StrictVersion(tf.__version__) >= StrictVersion("2.16"):
+    if Version(tf.__version__) >= Version("2.16"):
         raise SkipTest("TF < 2.16 is required for this test")
     mnist.run_keras_single_device("gpu", 1)
 
 
 @with_setup(tf.keras.backend.clear_session)
 def test_keras_single_cpu():
-    if StrictVersion(tf.__version__) >= StrictVersion("2.16"):
+    if Version(tf.__version__) >= Version("2.16"):
         raise SkipTest("TF < 2.16 is required for this test")
     mnist.run_keras_single_device("cpu", 0)
 
 
 @raises(tf.errors.OpError, "TF device and DALI device mismatch. TF*: CPU, DALI*: GPU for output")
 def test_keras_wrong_placement_gpu():
-    if StrictVersion(tf.__version__) >= StrictVersion("2.16"):
+    if Version(tf.__version__) >= Version("2.16"):
         raise SkipTest("TF < 2.16 is required for this test")
     with tf.device("cpu:0"):
         model = mnist.keras_model()
@@ -55,7 +55,7 @@ def test_keras_wrong_placement_gpu():
 
 @raises(tf.errors.OpError, "TF device and DALI device mismatch. TF*: GPU, DALI*: CPU for output")
 def test_keras_wrong_placement_cpu():
-    if StrictVersion(tf.__version__) >= StrictVersion("2.16"):
+    if Version(tf.__version__) >= Version("2.16"):
         raise SkipTest("TF < 2.16 is required for this test")
     with tf.device("gpu:0"):
         model = mnist.keras_model()
diff --git a/dali/test/python/test_utils.py b/dali/test/python/test_utils.py
index 2603c875b32..a8ce3519a8f 100644
--- a/dali/test/python/test_utils.py
+++ b/dali/test/python/test_utils.py
@@ -26,7 +26,7 @@
 import subprocess
 import sys
 import tempfile
-from distutils.version import LooseVersion
+from packaging.version import Version
 from nose_utils import SkipTest
 
 
@@ -945,8 +945,8 @@ def check_numba_compatibility_cpu(if_skip=True):
     # Numba bug:
     # https://github.com/numba/numba/issues/8567
     if platform.processor().lower() in ("arm64", "aarch64", "armv8") and (
-        LooseVersion(numba.__version__) >= LooseVersion("0.57.0")
-        and LooseVersion(numba.__version__) < LooseVersion("0.59.0")
+        Version(numba.__version__) >= Version("0.57.0")
+        and Version(numba.__version__) < Version("0.59.0")
     ):
         if if_skip:
             raise SkipTest()
diff --git a/dali_tf_plugin/build_dali_tf.sh b/dali_tf_plugin/build_dali_tf.sh
index 2419b57b14b..3986e2da4c0 100755
--- a/dali_tf_plugin/build_dali_tf.sh
+++ b/dali_tf_plugin/build_dali_tf.sh
@@ -27,7 +27,7 @@ DALI_LFLAGS="-L${DALI_STUB_DIR} -ldali"
 TF_CFLAGS=( $($PYTHON -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
 TF_LFLAGS=( $($PYTHON -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
 
-CPP_VER=( $($PYTHON -c "import tensorflow as tf; from distutils.version import LooseVersion; print('--std=c++14' if tf.__version__ < LooseVersion('2.10') else '--std=c++17')") )
+CPP_VER=( $($PYTHON -c "import tensorflow as tf; from packaging.version import Version; print('--std=c++14' if Version(tf.__version__) < Version('2.10') else '--std=c++17')") )
 
 # Note: DNDEBUG flag is needed due to issue with TensorFlow custom ops:
 # https://github.com/tensorflow/tensorflow/issues/17316
diff --git a/dali_tf_plugin/dali_tf_plugin_install_tool.py b/dali_tf_plugin/dali_tf_plugin_install_tool.py
index d33e83f4230..b4a3078f2c2 100644
--- a/dali_tf_plugin/dali_tf_plugin_install_tool.py
+++ b/dali_tf_plugin/dali_tf_plugin_install_tool.py
@@ -28,7 +28,7 @@
     get_tf_build_flags,
 )
 import os
-from distutils.version import StrictVersion, LooseVersion
+from packaging.version import Version
 from pathlib import Path
 import tempfile
 from stubgen import stubgen
@@ -129,7 +129,7 @@ def __init__(self, plugin_dest_dir=None):
         self.can_install_prebuilt = (
             not self.always_build
             and bool(self.tf_compiler)
-            and StrictVersion(self.tf_compiler) >= StrictVersion("5.0")
+            and Version(self.tf_compiler) >= Version("5.0")
             and self.is_compatible_with_prebuilt_bin
             and self.prebuilt_dali_stub is not None
         )
@@ -162,8 +162,8 @@ def __init__(self, plugin_dest_dir=None):
             or self.default_cpp_version == self.tf_compiler
             or not bool(self.tf_compiler)
             or (
-                StrictVersion(self.default_cpp_version) >= StrictVersion("5.0")
-                and StrictVersion(self.tf_compiler) >= StrictVersion("5.0")
+                Version(self.default_cpp_version) >= Version("5.0")
+                and Version(self.tf_compiler) >= Version("5.0")
             )
         )
 
@@ -366,7 +366,7 @@ def build(self):
             lib_path = os.path.join(self.plugin_dest_dir, lib_filename)
 
             # for a newer TF we need to compiler with C++17
-            cpp_ver = "--std=c++14" if self.tf_version < LooseVersion("2.10") else "--std=c++17"
+            cpp_ver = "--std=c++14" if Version(self.tf_version) < Version("2.10") else "--std=c++17"
             # Note: DNDEBUG flag is needed due to issue with TensorFlow custom ops:
             # https://github.com/tensorflow/tensorflow/issues/17316
             # Do not remove it.
diff --git a/dali_tf_plugin/dali_tf_plugin_utils.py b/dali_tf_plugin/dali_tf_plugin_utils.py
index 2266f42c5f9..712f56a39d6 100644
--- a/dali_tf_plugin/dali_tf_plugin_utils.py
+++ b/dali_tf_plugin/dali_tf_plugin_utils.py
@@ -17,7 +17,7 @@
 import re
 import sys
 import fnmatch
-from distutils.version import StrictVersion
+from packaging.version import Version
 
 # Find file matching `pattern` in `path`
 
@@ -66,7 +66,7 @@ def get_tf_compiler_version():
         res = re.search(r"GCC:\s*\(.*\)\s*(\d+.\d+).\d+", line)
         if res:
             ver = res.group(1)
-            if not ret_ver or StrictVersion(ret_ver) < StrictVersion(ver):
+            if not ret_ver or Version(ret_ver) < Version(ver):
                 ret_ver = ver
     return ret_ver
 
diff --git a/dali_tf_plugin/setup.py.in b/dali_tf_plugin/setup.py.in
index f614ea8e8a5..112ef084c06 100644
--- a/dali_tf_plugin/setup.py.in
+++ b/dali_tf_plugin/setup.py.in
@@ -88,7 +88,8 @@ For more details please check the
           'Programming Language :: Python :: 3.12',
           ],
       install_requires = [
-          'nvidia-dali@DALI_FLAVOR_MINUS@-cuda@CUDA_VERSION_SHORT_DIGIT_ONLY@==@DALI_VERSION@'
+          'nvidia-dali@DALI_FLAVOR_MINUS@-cuda@CUDA_VERSION_SHORT_DIGIT_ONLY@==@DALI_VERSION@',
+          'packaging',
           ],
 
       cmdclass={
diff --git a/docker/Dockerfile.customopbuilder.clean b/docker/Dockerfile.customopbuilder.clean
index 1683a2a9ebd..0b3f42eae8f 100644
--- a/docker/Dockerfile.customopbuilder.clean
+++ b/docker/Dockerfile.customopbuilder.clean
@@ -100,6 +100,7 @@ RUN rm -f /usr/bin/python && \
         rm get-pip.py; \
     fi && \
     pip install --upgrade pip && \
+    pip install packaging && \
     python --version && \
     pip --version
 
diff --git a/docs/examples/use_cases/paddle/resnet50/utils/config.py b/docs/examples/use_cases/paddle/resnet50/utils/config.py
index c77ea7422cc..a88aca56d57 100644
--- a/docs/examples/use_cases/paddle/resnet50/utils/config.py
+++ b/docs/examples/use_cases/paddle/resnet50/utils/config.py
@@ -13,16 +13,37 @@
 # limitations under the License.
 
 import os
+import sys
 import copy
 import argparse
 import logging
-import distutils.util
 import dllogger
 from utils.mode import RunScope
 from utils.utility import get_num_trainers
 from utils.save_load import _PDOPT_SUFFIX, _PDPARAMS_SUFFIX
 
-_AUTO_LAST_EPOCH = 'auto'
+_AUTO_LAST_EPOCH = "auto"
+
+_true_set = {'yes', 'true', 't', 'y', '1'}
+_false_set = {'no', 'false', 'f', 'n', '0'}
+
+# based on https://github.com/symonsoft/str2bool/tree/master
+# BSD3 license
+def str2bool(value, raise_exc=False):
+    if (
+        isinstance(value, str)
+        or sys.version_info[0] < 3
+        and isinstance(value, basestring)
+    ):
+        value = value.lower()
+        if value in _true_set:
+            return True
+        if value in _false_set:
+            return False
+
+    if raise_exc:
+        raise ValueError('Expected "%s"' % '", "'.join(_true_set | _false_set))
+    return None
 
 
 def _get_full_path_of_ckpt(args):
@@ -38,16 +59,18 @@ def _check_file_exist(path_with_prefix):
             found = True
         return found, pdopt_path, pdparams_path
 
-    target_from_checkpoint = os.path.join(args.from_checkpoint,
-                                          args.model_prefix)
+    target_from_checkpoint = os.path.join(
+        args.from_checkpoint, args.model_prefix
+    )
     if args.last_epoch_of_checkpoint is None:
         args.last_epoch_of_checkpoint = -1
     elif args.last_epoch_of_checkpoint == _AUTO_LAST_EPOCH:
         folders = os.listdir(args.from_checkpoint)
         args.last_epoch_of_checkpoint = -1
         for folder in folders:
-            tmp_ckpt_path = os.path.join(args.from_checkpoint, folder,
-                                         args.model_prefix)
+            tmp_ckpt_path = os.path.join(
+                args.from_checkpoint, folder, args.model_prefix
+            )
 
             try:
                 folder = int(folder)
@@ -57,19 +80,27 @@ def _check_file_exist(path_with_prefix):
                 )
                 continue
 
-            if folder > args.last_epoch_of_checkpoint and \
-               _check_file_exist(tmp_ckpt_path)[0]:
+            if (
+                folder > args.last_epoch_of_checkpoint
+                and _check_file_exist(tmp_ckpt_path)[0]
+            ):
                 args.last_epoch_of_checkpoint = folder
-        epoch_with_prefix = os.path.join(str(args.last_epoch_of_checkpoint), args.model_prefix) \
-                            if args.last_epoch_of_checkpoint > -1 else args.model_prefix
-        target_from_checkpoint = os.path.join(args.from_checkpoint,
-                                              epoch_with_prefix)
+        epoch_with_prefix = (
+            os.path.join(str(args.last_epoch_of_checkpoint), args.model_prefix)
+            if args.last_epoch_of_checkpoint > -1
+            else args.model_prefix
+        )
+        target_from_checkpoint = os.path.join(
+            args.from_checkpoint, epoch_with_prefix
+        )
     else:
         try:
             args.last_epoch_of_checkpoint = int(args.last_epoch_of_checkpoint)
         except ValueError:
-            raise ValueError(f"The value of --last-epoch-of-checkpoint should be None, {_AUTO_LAST_EPOCH}"  \
-                            f" or integer >= 0, but receive {args.last_epoch_of_checkpoint}")
+            raise ValueError(
+                f"The value of --last-epoch-of-checkpoint should be None, {_AUTO_LAST_EPOCH}"
+                f" or integer >= 0, but receive {args.last_epoch_of_checkpoint}"
+            )
 
     args.from_checkpoint = target_from_checkpoint
     found, pdopt_path, pdparams_path = _check_file_exist(args.from_checkpoint)
@@ -86,13 +117,15 @@ def _get_full_path_of_pretrained_params(args):
         args.last_epoch_of_checkpoint = -1
         return
 
-    args.from_pretrained_params = os.path.join(args.from_pretrained_params,
-                                               args.model_prefix)
+    args.from_pretrained_params = os.path.join(
+        args.from_pretrained_params, args.model_prefix
+    )
     pdparams_path = args.from_pretrained_params + _PDPARAMS_SUFFIX
     if not os.path.exists(pdparams_path):
         args.from_pretrained_params = None
         logging.warning(
-            f"Cannot find {pdparams_path}, disable --from-pretrained-params.")
+            f"Cannot find {pdparams_path}, disable --from-pretrained-params."
+        )
     args.last_epoch_of_checkpoint = -1
 
 
@@ -102,7 +135,7 @@ def print_args(args):
     # Due to dllogger cannot serialize Enum into JSON.
     args_for_log.run_scope = args_for_log.run_scope.value
 
-    dllogger.log(step='PARAMETER', data=vars(args_for_log))
+    dllogger.log(step="PARAMETER", data=vars(args_for_log))
 
 
 def check_and_process_args(args):
@@ -112,25 +145,31 @@ def check_and_process_args(args):
         if args.run_scope == scope.value:
             run_scope = scope
             break
-    assert run_scope is not None, \
-           f"only support {[scope.value for scope in RunScope]} as run_scope"
+    assert (
+        run_scope is not None
+    ), f"only support {[scope.value for scope in RunScope]} as run_scope"
     args.run_scope = run_scope
 
     # Precess image layout and channel
     args.image_channel = args.image_shape[0]
     if args.data_layout == "NHWC":
         args.image_shape = [
-            args.image_shape[1], args.image_shape[2], args.image_shape[0]
+            args.image_shape[1],
+            args.image_shape[2],
+            args.image_shape[0],
         ]
 
     # Precess learning rate
     args.lr = get_num_trainers() * args.lr
 
     # Precess model loading
-    assert not (args.from_checkpoint is not None and \
-                args.from_pretrained_params is not None), \
-           "--from-pretrained-params and --from-checkpoint should " \
-           "not be set simultaneously."
+    assert not (
+        args.from_checkpoint is not None
+        and args.from_pretrained_params is not None
+    ), (
+        "--from-pretrained-params and --from-checkpoint should "
+        "not be set simultaneously."
+    )
     _get_full_path_of_pretrained_params(args)
     _get_full_path_of_ckpt(args)
     args.start_epoch = args.last_epoch_of_checkpoint + 1
@@ -138,12 +177,12 @@ def check_and_process_args(args):
     # Precess benchmark
     if args.benchmark:
         assert args.run_scope in [
-            RunScope.TRAIN_ONLY, RunScope.EVAL_ONLY
+            RunScope.TRAIN_ONLY,
+            RunScope.EVAL_ONLY,
         ], "If benchmark enabled, run_scope must be `train_only` or `eval_only`"
 
     # Only run one epoch when benchmark or eval_only.
-    if args.benchmark or \
-      (args.run_scope == RunScope.EVAL_ONLY):
+    if args.benchmark or (args.run_scope == RunScope.EVAL_ONLY):
         args.epochs = args.start_epoch + 1
 
     if args.run_scope == RunScope.EVAL_ONLY:
@@ -151,366 +190,410 @@ def check_and_process_args(args):
 
 
 def add_global_args(parser):
-    group = parser.add_argument_group('Global')
+    group = parser.add_argument_group("Global")
     group.add_argument(
-        '--output-dir',
+        "--output-dir",
         type=str,
-        default='./output/',
-        help='A path to store trained models.')
+        default="./output/",
+        help="A path to store trained models.",
+    )
     group.add_argument(
-        '--run-scope',
-        default='train_eval',
-        choices=('train_eval', 'train_only', 'eval_only'),
-        help='Running scope. It should be one of {train_eval, train_only, eval_only}.'
+        "--run-scope",
+        default="train_eval",
+        choices=("train_eval", "train_only", "eval_only"),
+        help="Running scope. It should be one of {train_eval, train_only, eval_only}.",
     )
     group.add_argument(
-        '--epochs',
+        "--epochs",
         type=int,
         default=90,
-        help='The number of epochs for training.')
+        help="The number of epochs for training.",
+    )
     group.add_argument(
-        '--save-interval',
+        "--save-interval",
         type=int,
         default=1,
-        help='The iteration interval to save checkpoints.')
+        help="The iteration interval to save checkpoints.",
+    )
     group.add_argument(
-        '--eval-interval',
+        "--eval-interval",
         type=int,
         default=1,
-        help='The iteration interval to test trained models on a given validation dataset. ' \
-             'Ignored when --run-scope is train_only.'
+        help="The iteration interval to test trained models on a given validation dataset. "
+        "Ignored when --run-scope is train_only.",
     )
     group.add_argument(
-        '--print-interval',
+        "--print-interval",
         type=int,
         default=10,
-        help='The iteration interval to show training/evaluation message.')
+        help="The iteration interval to show training/evaluation message.",
+    )
     group.add_argument(
-        '--report-file',
+        "--report-file",
         type=str,
-        default='./report.json',
-        help='A file in which to store JSON experiment report.')
+        default="./report.json",
+        help="A file in which to store JSON experiment report.",
+    )
     group.add_argument(
-        '--data-layout',
-        default='NCHW',
-        choices=('NCHW', 'NHWC'),
-        help='Data format. It should be one of {NCHW, NHWC}.')
+        "--data-layout",
+        default="NCHW",
+        choices=("NCHW", "NHWC"),
+        help="Data format. It should be one of {NCHW, NHWC}.",
+    )
     group.add_argument(
-        '--benchmark', action='store_true', help='To enable benchmark mode.')
+        "--benchmark", action="store_true", help="To enable benchmark mode."
+    )
     group.add_argument(
-        '--benchmark-steps',
+        "--benchmark-steps",
         type=int,
         default=100,
-        help='Steps for benchmark run, only be applied when --benchmark is set.'
+        help="Steps for benchmark run, only be applied when --benchmark is set.",
     )
     group.add_argument(
-        '--benchmark-warmup-steps',
+        "--benchmark-warmup-steps",
         type=int,
         default=100,
-        help='Warmup steps for benchmark run, only be applied when --benchmark is set.'
+        help="Warmup steps for benchmark run, only be applied when --benchmark is set.",
     )
     group.add_argument(
-        '--model-prefix',
+        "--model-prefix",
         type=str,
         default="resnet_50_paddle",
-        help='The prefix name of model files to save/load.')
+        help="The prefix name of model files to save/load.",
+    )
     group.add_argument(
-        '--from-pretrained-params',
+        "--from-pretrained-params",
         type=str,
         default=None,
-        help='A folder path which contains pretrained parameters, that is a file in name' \
-             ' --model-prefix + .pdparams. It should not be set with --from-checkpoint' \
-             ' at the same time.'
+        help="A folder path which contains pretrained parameters, that is a file in name"
+        " --model-prefix + .pdparams. It should not be set with --from-checkpoint"
+        " at the same time.",
     )
     group.add_argument(
-        '--from-checkpoint',
+        "--from-checkpoint",
         type=str,
         default=None,
-        help='A checkpoint path to resume training. It should not be set ' \
-             'with --from-pretrained-params at the same time. The path provided ' \
-             'could be a folder contains < epoch_id/ckpt_files > or < ckpt_files >.'
+        help="A checkpoint path to resume training. It should not be set "
+        "with --from-pretrained-params at the same time. The path provided "
+        "could be a folder contains < epoch_id/ckpt_files > or < ckpt_files >.",
     )
     group.add_argument(
-        '--last-epoch-of-checkpoint',
+        "--last-epoch-of-checkpoint",
         type=str,
         default=None,
-        help='The epoch id of the checkpoint given by --from-checkpoint. ' \
-             'It should be None, auto or integer >= 0. If it is set as ' \
-             'None, then training will start from 0-th epoch. If it is set as ' \
-             'auto, then it will search largest integer-convertable folder ' \
-             ' --from-checkpoint, which contains required checkpoint. ' \
-             'Default is None.'
+        help="The epoch id of the checkpoint given by --from-checkpoint. "
+        "It should be None, auto or integer >= 0. If it is set as "
+        "None, then training will start from 0-th epoch. If it is set as "
+        "auto, then it will search largest integer-convertable folder "
+        " --from-checkpoint, which contains required checkpoint. "
+        "Default is None.",
     )
     group.add_argument(
-        '--show-config',
-        type=distutils.util.strtobool,
+        "--show-config",
+        type=str2bool,
         default=True,
-        help='To show arguments.')
+        help="To show arguments.",
+    )
     group.add_argument(
-        '--enable-cpu-affinity',
-        type=distutils.util.strtobool,
+        "--enable-cpu-affinity",
+        type=str2bool,
         default=True,
-        help='To enable in-built GPU-CPU affinity.')
+        help="To enable in-built GPU-CPU affinity.",
+    )
     return parser
 
 
 def add_advance_args(parser):
-    group = parser.add_argument_group('Advanced Training')
+    group = parser.add_argument_group("Advanced Training")
     # AMP
     group.add_argument(
-        '--amp',
-        action='store_true',
-        help='Enable automatic mixed precision training (AMP).')
+        "--amp",
+        action="store_true",
+        help="Enable automatic mixed precision training (AMP).",
+    )
     group.add_argument(
-        '--scale-loss',
+        "--scale-loss",
         type=float,
         default=1.0,
-        help='The loss scalar for AMP training, only be applied when --amp is set.'
+        help="The loss scalar for AMP training, only be applied when --amp is set.",
     )
     group.add_argument(
-        '--use-dynamic-loss-scaling',
-        action='store_true',
-        help='Enable dynamic loss scaling in AMP training, only be applied when --amp is set.'
+        "--use-dynamic-loss-scaling",
+        action="store_true",
+        help="Enable dynamic loss scaling in AMP training, only be applied when --amp is set.",
     )
     group.add_argument(
-        '--use-pure-fp16',
-        action='store_true',
-        help='Enable pure FP16 training, only be applied when --amp is set.')
+        "--use-pure-fp16",
+        action="store_true",
+        help="Enable pure FP16 training, only be applied when --amp is set.",
+    )
     group.add_argument(
-        '--fuse-resunit',
-        action='store_true',
-        help='Enable CUDNNv8 ResUnit fusion, only be applied when --amp is set.')
+        "--fuse-resunit",
+        action="store_true",
+        help="Enable CUDNNv8 ResUnit fusion, only be applied when --amp is set.",
+    )
     # ASP
     group.add_argument(
-        '--asp',
-        action='store_true',
-        help='Enable automatic sparse training (ASP).')
+        "--asp",
+        action="store_true",
+        help="Enable automatic sparse training (ASP).",
+    )
     group.add_argument(
-        '--prune-model',
-        action='store_true',
-        help='Prune model to 2:4 sparse pattern, only be applied when --asp is set.'
+        "--prune-model",
+        action="store_true",
+        help="Prune model to 2:4 sparse pattern, only be applied when --asp is set.",
     )
     group.add_argument(
-        '--mask-algo',
-        default='mask_1d',
-        choices=('mask_1d', 'mask_2d_greedy', 'mask_2d_best'),
-        help='The algorithm to generate sparse masks. It should be one of ' \
-             '{mask_1d, mask_2d_greedy, mask_2d_best}. This only be applied ' \
-             'when --asp and --prune-model is set.'
+        "--mask-algo",
+        default="mask_1d",
+        choices=("mask_1d", "mask_2d_greedy", "mask_2d_best"),
+        help="The algorithm to generate sparse masks. It should be one of "
+        "{mask_1d, mask_2d_greedy, mask_2d_best}. This only be applied "
+        "when --asp and --prune-model is set.",
     )
     return parser
 
 
 def add_dataset_args(parser):
     def float_list(x):
-        return list(map(float, x.split(',')))
+        return list(map(float, x.split(",")))
 
     def int_list(x):
-        return list(map(int, x.split(',')))
+        return list(map(int, x.split(",")))
 
-    dataset_group = parser.add_argument_group('Dataset')
+    dataset_group = parser.add_argument_group("Dataset")
     dataset_group.add_argument(
-        '--image-root',
+        "--image-root",
         type=str,
-        default='/imagenet',
-        help='A root folder of train/val images. It should contain train and val folders, ' \
-             'which store corresponding images.'
+        default="/imagenet",
+        help="A root folder of train/val images. It should contain train and val folders, "
+        "which store corresponding images.",
     )
     dataset_group.add_argument(
-        '--image-shape',
+        "--image-shape",
         type=int_list,
         default=[4, 224, 224],
-        help='The image shape. Its shape should be [channel, height, width].')
+        help="The image shape. Its shape should be [channel, height, width].",
+    )
 
     # Data Loader
     dataset_group.add_argument(
-        '--batch-size',
+        "--batch-size",
         type=int,
         default=256,
-        help='The batch size for both training and evaluation.')
+        help="The batch size for both training and evaluation.",
+    )
     dataset_group.add_argument(
-        '--dali-random-seed',
+        "--dali-random-seed",
         type=int,
         default=42,
-        help='The random seed for DALI data loader.')
+        help="The random seed for DALI data loader.",
+    )
     dataset_group.add_argument(
-        '--dali-num-threads',
+        "--dali-num-threads",
         type=int,
         default=4,
-        help='The number of threads applied to DALI data loader.')
+        help="The number of threads applied to DALI data loader.",
+    )
     dataset_group.add_argument(
-        '--dali-output-fp16',
-        action='store_true',
-        help='Output FP16 data from DALI data loader.')
+        "--dali-output-fp16",
+        action="store_true",
+        help="Output FP16 data from DALI data loader.",
+    )
 
     # Augmentation
-    augmentation_group = parser.add_argument_group('Data Augmentation')
+    augmentation_group = parser.add_argument_group("Data Augmentation")
     augmentation_group.add_argument(
-        '--crop-size',
+        "--crop-size",
         type=int,
         default=224,
-        help='The size to crop input images.')
+        help="The size to crop input images.",
+    )
     augmentation_group.add_argument(
-        '--rand-crop-scale',
+        "--rand-crop-scale",
         type=float_list,
-        default=[0.08, 1.],
-        help='Range from which to choose a random area fraction.')
+        default=[0.08, 1.0],
+        help="Range from which to choose a random area fraction.",
+    )
     augmentation_group.add_argument(
-        '--rand-crop-ratio',
+        "--rand-crop-ratio",
         type=float_list,
         default=[3.0 / 4, 4.0 / 3],
-        help='Range from which to choose a random aspect ratio (width/height).')
+        help="Range from which to choose a random aspect ratio (width/height).",
+    )
     augmentation_group.add_argument(
-        '--normalize-scale',
+        "--normalize-scale",
         type=float,
         default=1.0 / 255.0,
-        help='A scalar to normalize images.')
+        help="A scalar to normalize images.",
+    )
     augmentation_group.add_argument(
-        '--normalize-mean',
+        "--normalize-mean",
         type=float_list,
         default=[0.485, 0.456, 0.406],
-        help='The mean values to normalize RGB images.')
+        help="The mean values to normalize RGB images.",
+    )
     augmentation_group.add_argument(
-        '--normalize-std',
+        "--normalize-std",
         type=float_list,
         default=[0.229, 0.224, 0.225],
-        help='The std values to normalize RGB images.')
+        help="The std values to normalize RGB images.",
+    )
     augmentation_group.add_argument(
-        '--resize-short',
+        "--resize-short",
         type=int,
         default=256,
-        help='The length of the shorter dimension of the resized image.')
+        help="The length of the shorter dimension of the resized image.",
+    )
     return parser
 
 
 def add_model_args(parser):
-    group = parser.add_argument_group('Model')
+    group = parser.add_argument_group("Model")
     group.add_argument(
-        '--model-arch-name',
+        "--model-arch-name",
         type=str,
-        default='ResNet50',
-        help='The model architecture name. It should be one of {ResNet50}.')
+        default="ResNet50",
+        help="The model architecture name. It should be one of {ResNet50}.",
+    )
     group.add_argument(
-        '--num-of-class',
+        "--num-of-class",
         type=int,
         default=1000,
-        help='The number classes of images.')
+        help="The number classes of images.",
+    )
     group.add_argument(
-        '--bn-weight-decay',
-        action='store_true',
-        help='Apply weight decay to BatchNorm shift and scale.')
+        "--bn-weight-decay",
+        action="store_true",
+        help="Apply weight decay to BatchNorm shift and scale.",
+    )
     return parser
 
 
 def add_training_args(parser):
-    group = parser.add_argument_group('Training')
+    group = parser.add_argument_group("Training")
     group.add_argument(
-        '--label-smoothing',
+        "--label-smoothing",
         type=float,
         default=0.1,
-        help='The ratio of label smoothing.')
+        help="The ratio of label smoothing.",
+    )
     group.add_argument(
-        '--optimizer',
-        default='Momentum',
+        "--optimizer",
+        default="Momentum",
         metavar="OPTIMIZER",
-        choices=('Momentum'),
-        help='The name of optimizer. It should be one of {Momentum}.')
+        choices=("Momentum"),
+        help="The name of optimizer. It should be one of {Momentum}.",
+    )
     group.add_argument(
-        '--momentum',
+        "--momentum",
         type=float,
         default=0.875,
-        help='The momentum value of optimizer.')
+        help="The momentum value of optimizer.",
+    )
     group.add_argument(
-        '--weight-decay',
+        "--weight-decay",
         type=float,
         default=3.0517578125e-05,
-        help='The coefficient of weight decay.')
+        help="The coefficient of weight decay.",
+    )
     group.add_argument(
-        '--lr-scheduler',
-        default='Cosine',
+        "--lr-scheduler",
+        default="Cosine",
         metavar="LR_SCHEDULER",
-        choices=('Cosine'),
-        help='The name of learning rate scheduler. It should be one of {Cosine}.'
+        choices=("Cosine"),
+        help="The name of learning rate scheduler. It should be one of {Cosine}.",
     )
     group.add_argument(
-        '--lr', type=float, default=0.256, help='The initial learning rate.')
+        "--lr", type=float, default=0.256, help="The initial learning rate."
+    )
     group.add_argument(
-        '--warmup-epochs',
+        "--warmup-epochs",
         type=int,
         default=5,
-        help='The number of epochs for learning rate warmup.')
+        help="The number of epochs for learning rate warmup.",
+    )
     group.add_argument(
-        '--warmup-start-lr',
+        "--warmup-start-lr",
         type=float,
         default=0.0,
-        help='The initial learning rate for warmup.')
+        help="The initial learning rate for warmup.",
+    )
     return parser
 
 
 def add_trt_args(parser):
-    group = parser.add_argument_group('Paddle-TRT')
+    group = parser.add_argument_group("Paddle-TRT")
     group.add_argument(
-        '--device',
+        "--device",
         type=int,
-        default='0',
-        help='The GPU device id for Paddle-TRT inference.'
+        default="0",
+        help="The GPU device id for Paddle-TRT inference.",
     )
     group.add_argument(
-        '--trt-inference-dir',
+        "--trt-inference-dir",
         type=str,
-        default='./inference',
-        help='A path to store/load inference models. ' \
-             'export_model.py would export models to this folder, ' \
-             'then inference.py would load from here.'
+        default="./inference",
+        help="A path to store/load inference models. "
+        "export_model.py would export models to this folder, "
+        "then inference.py would load from here.",
     )
     group.add_argument(
-        '--trt-precision',
-        default='FP32',
-        choices=('FP32', 'FP16', 'INT8'),
-        help='The precision of TensorRT. It should be one of {FP32, FP16, INT8}.'
+        "--trt-precision",
+        default="FP32",
+        choices=("FP32", "FP16", "INT8"),
+        help="The precision of TensorRT. It should be one of {FP32, FP16, INT8}.",
     )
     group.add_argument(
-        '--trt-workspace-size',
+        "--trt-workspace-size",
         type=int,
         default=(1 << 30),
-        help='The memory workspace of TensorRT in MB.')
+        help="The memory workspace of TensorRT in MB.",
+    )
     group.add_argument(
-        '--trt-min-subgraph-size',
+        "--trt-min-subgraph-size",
         type=int,
         default=3,
-        help='The minimal subgraph size to enable PaddleTRT.')
+        help="The minimal subgraph size to enable PaddleTRT.",
+    )
     group.add_argument(
-        '--trt-use-static',
-        type=distutils.util.strtobool,
+        "--trt-use-static",
+        type=str2bool,
         default=False,
-        help='Fix TensorRT engine at first running.')
+        help="Fix TensorRT engine at first running.",
+    )
     group.add_argument(
-        '--trt-use-calib-mode',
-        type=distutils.util.strtobool,
+        "--trt-use-calib-mode",
+        type=str2bool,
         default=False,
-        help='Use the PTQ calibration of PaddleTRT int8.')
+        help="Use the PTQ calibration of PaddleTRT int8.",
+    )
     group.add_argument(
-        '--trt-export-log-path',
+        "--trt-export-log-path",
         type=str,
-        default='./export.json',
-        help='A file in which to store JSON model exporting report.')
+        default="./export.json",
+        help="A file in which to store JSON model exporting report.",
+    )
     group.add_argument(
-        '--trt-log-path',
+        "--trt-log-path",
         type=str,
-        default='./inference.json',
-        help='A file in which to store JSON inference report.')
+        default="./inference.json",
+        help="A file in which to store JSON inference report.",
+    )
     group.add_argument(
-        '--trt-use-synthetic',
-        type=distutils.util.strtobool,
+        "--trt-use-synthetic",
+        type=str2bool,
         default=False,
-        help='Apply synthetic data for benchmark.')
+        help="Apply synthetic data for benchmark.",
+    )
     return parser
 
 
 def parse_args(including_trt=False):
     parser = argparse.ArgumentParser(
         description="PaddlePaddle RN50v1.5 training script",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
     parser = add_global_args(parser)
     parser = add_dataset_args(parser)
     parser = add_model_args(parser)
diff --git a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/common.py b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/common.py
index 55eb620abd2..a36c753e2e4 100644
--- a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/common.py
+++ b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/common.py
@@ -1,5 +1,5 @@
 import tensorflow as tf
-from distutils.version import StrictVersion
+from packaging.version import Version
 
 BASE_LEARNING_RATE = 0.1
 
@@ -18,7 +18,7 @@ def create_piecewise_constant_decay_with_warmup(batch_size, epoch_size,
   rescaled_lr = BASE_LEARNING_RATE * batch_size / base_lr_batch_size
   step_boundaries = [float(steps_per_epoch) * x for x in boundaries]
   lr_values = [rescaled_lr * m for m in multipliers]
-  if StrictVersion(tf.__version__) >= StrictVersion("2.13"):
+  if Version(tf.__version__) >= Version("2.13"):
     warmup_steps = int(warmup_epochs * steps_per_epoch)
   else:
     warmup_steps = warmup_epochs * steps_per_epoch
@@ -38,7 +38,7 @@ def __init__(self, rescaled_lr, step_boundaries, lr_values, warmup_steps,
     super(PiecewiseConstantDecayWithWarmup, self).__init__()
 
     self.rescaled_lr = rescaled_lr
-    if StrictVersion(tf.__version__) >= StrictVersion("2.13"):
+    if Version(tf.__version__) >= Version("2.13"):
       self.step_boundaries = [int(b) for b in step_boundaries]
     else:
       self.step_boundaries = step_boundaries
diff --git a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/hvd_patch.py b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/hvd_patch.py
index 4dca4f6577a..5f328eff4d6 100644
--- a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/hvd_patch.py
+++ b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/hvd_patch.py
@@ -8,10 +8,10 @@
 import horovod.tensorflow as hvd
 import tensorflow as tf
 from nvutils import common
-from distutils.version import LooseVersion
+from packaging.version import Version
 from horovod.tensorflow import Average, Compression, Sum
 
-_PRE_TF_2_4_0 = LooseVersion(tf.__version__) < LooseVersion('2.4.0')
+_PRE_TF_2_4_0 = Version(tf.__version__) < Version('2.4.0')
 
 def create_distributed_optimizer(
         keras, optimizer, name, device_dense, device_sparse, compression,
diff --git a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner.py b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner.py
index a53034dbe06..91af31bd94c 100755
--- a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner.py
+++ b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner.py
@@ -16,7 +16,7 @@
 
 from nvutils import image_processing
 from nvutils import common
-from distutils.version import StrictVersion
+from packaging.version import Version
 
 import tensorflow as tf
 from tensorflow import keras
@@ -27,8 +27,8 @@
 
 from keras import backend
 print(tf.__version__)
-if StrictVersion(tf.__version__) > StrictVersion("2.1.0"):
-  if StrictVersion(tf.__version__) >= StrictVersion("2.4.0"):
+if Version(tf.__version__) > Version("2.1.0"):
+  if Version(tf.__version__) >= Version("2.4.0"):
     from tensorflow.python.keras.mixed_precision import device_compatibility_check
   else:
     from tensorflow.python.keras.mixed_precision.experimental import device_compatibility_check
@@ -142,7 +142,7 @@ def train(model_func, params):
     tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU')
 
   if precision == 'fp16':
-    if StrictVersion(tf.__version__) >= StrictVersion("2.4.0"):
+    if Version(tf.__version__) >= Version("2.4.0"):
       policy = keras.mixed_precision.Policy('mixed_float16')
       keras.mixed_precision.set_global_policy(policy)
     else:
@@ -160,7 +160,7 @@ def train(model_func, params):
   # Horovod: add Horovod DistributedOptimizer. We use a modified version to
   # support the custom learning rate schedule.
   opt = hvd.DistributedOptimizer(opt)
-  if StrictVersion(tf.__version__) >= StrictVersion("2.4.0") and precision == 'fp16':
+  if Version(tf.__version__) >= Version("2.4.0") and precision == 'fp16':
     opt = keras.mixed_precision.LossScaleOptimizer(opt, dynamic=False,
                                                    initial_scale=loss_scale)
 
diff --git a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner_ctl.py b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner_ctl.py
index bcdfa22bf4a..cf4f9266d22 100755
--- a/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner_ctl.py
+++ b/docs/examples/use_cases/tensorflow/resnet-n/nvutils/runner_ctl.py
@@ -17,7 +17,7 @@
 from builtins import range
 from nvutils import image_processing
 from nvutils import common
-from distutils.version import StrictVersion
+from packaging.version import Version
 
 import tensorflow as tf
 from tensorflow import keras
@@ -27,8 +27,8 @@
 
 from keras import backend
 print(tf.__version__)
-if StrictVersion(tf.__version__) > StrictVersion("2.1.0"):
-  if StrictVersion(tf.__version__) >= StrictVersion("2.4.0"):
+if Version(tf.__version__) > Version("2.1.0"):
+  if Version(tf.__version__) >= Version("2.4.0"):
     from tensorflow.python.keras.mixed_precision import device_compatibility_check
   else:
     from tensorflow.python.keras.mixed_precision.experimental import device_compatibility_check
@@ -105,7 +105,7 @@ def train_ctl(model_func, params):
     summary_writer = None
 
   if precision == 'fp16':
-    if StrictVersion(tf.__version__) >= StrictVersion("2.4.0"):
+    if Version(tf.__version__) >= Version("2.4.0"):
       policy = keras.mixed_precision.Policy('mixed_float16')
       keras.mixed_precision.set_global_policy(policy)
     else:
diff --git a/qa/TL1_tensorflow_dataset/test_impl.sh b/qa/TL1_tensorflow_dataset/test_impl.sh
index d3bec26540e..aae4b015fab 100755
--- a/qa/TL1_tensorflow_dataset/test_impl.sh
+++ b/qa/TL1_tensorflow_dataset/test_impl.sh
@@ -25,8 +25,8 @@ test_body() {
         pushd ../../../docs/examples/frameworks/tensorflow/
         # TF 2.16 removed usage of tf.estimator the test uses
         is_below_2_16=$(python -c 'import tensorflow as tf; \
-                                   from distutils.version import StrictVersion; \
-                                   print(StrictVersion(tf.__version__) < StrictVersion("2.16"))')
+                                   from packaging.version import Version; \
+                                   print(Version(tf.__version__) < Version("2.16"))')
 
         if [ $is_below_2_16 = 'True' ]; then
             jupyter nbconvert tensorflow-dataset.ipynb \
@@ -39,9 +39,9 @@ test_body() {
         # TensorFlow 2.12 test_keras_multi_gpu_mirrored_strategy doesn't work.
         is_compatible_distributed=$(python -c 'import nvidia.dali.plugin.tf as dali_tf; \
                                                import tensorflow as tf; \
-                                               from distutils.version import LooseVersion; \
+                                               from packaging.version import Version; \
                                                print(dali_tf.dataset_distributed_compatible_tensorflow() \
-                                               and LooseVersion(tf.__version__) < LooseVersion("2.12.0"))')
+                                               and Version(tf.__version__) < Version("2.12.0"))')
         if [ $is_compatible_distributed = 'True' ]; then
             jupyter nbconvert tensorflow-dataset-multigpu.ipynb \
                     --to notebook --inplace --execute \