Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
mmakevic-amd committed Sep 6, 2024
2 parents dfb89db + d2c9b32 commit f35ddc0
Show file tree
Hide file tree
Showing 1,399 changed files with 42,440 additions and 20,873 deletions.
39 changes: 18 additions & 21 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -219,13 +219,16 @@ build:mkl_aarch64_threadpool -c opt
build:cuda --repo_env TF_NEED_CUDA=1
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --@local_config_cuda//:enable_cuda
# Default CUDA and CUDNN versions.
build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda --repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29"
# This flag is needed to include hermetic CUDA libraries for bazel tests.
test:cuda --@local_config_cuda//cuda:include_hermetic_cuda_libs=true

# CUDA: This config refers to building CUDA op kernels with clang.
build:cuda_clang --config=cuda
# Enable TensorRT optimizations https://developer.nvidia.com/tensorrt
build:cuda_clang --config=tensorrt
build:cuda_clang --action_env=TF_CUDA_CLANG="1"
build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
build:cuda_clang --copt=-Qunused-arguments
# Select supported compute capabilities (supported graphics cards).
# This is the same as the official TensorFlow builds.
# See https://developer.nvidia.com/cuda-gpus#compute
Expand All @@ -234,22 +237,22 @@ build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
# release while SASS is only forward compatible inside the current
# major release. Example: sm_80 kernels can run on sm_89 GPUs but
# not on sm_90 GPUs. compute_80 kernels though can also run on sm_90 GPUs.
build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90"
build:cuda_clang --repo_env=HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90"
# Set lld as the linker.
build:cuda_clang --host_linkopt="-fuse-ld=lld"
build:cuda_clang --host_linkopt="-lm"
build:cuda_clang --linkopt="-fuse-ld=lld"
build:cuda_clang --linkopt="-lm"

# Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
build:cuda_clang_official --config=cuda_clang
build:cuda_clang_official --action_env=TF_CUDA_VERSION="12"
build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8"
build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3"
build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build:cuda_clang_official --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda_clang_official --repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29"
build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang"
build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain"

# Build with nvcc for CUDA and clang for host
build:nvcc_clang --config=cuda
# Unfortunately, cuda_configure.bzl demands this for using nvcc + clang
build:nvcc_clang --action_env=TF_CUDA_CLANG="1"
build:nvcc_clang --action_env=TF_NVCC_CLANG="1"
build:nvcc_clang --@local_config_cuda//:cuda_compiler=nvcc

Expand Down Expand Up @@ -545,10 +548,6 @@ build:rbe_linux_cuda --config=cuda_clang_official
build:rbe_linux_cuda --config=rbe_linux_cpu
# For Remote build execution -- GPU configuration
build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.17-clang_config_cuda"
build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.17-clang_config_tensorrt"
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.17-clang_config_nccl"
test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"

build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
build:rbe_linux_cuda_nvcc --config=nvcc_clang
Expand Down Expand Up @@ -633,7 +632,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/cla
# Test-related settings below this point.
test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
test:release_linux_base --local_test_jobs=HOST_CPUS
test:release_linux_base --test_env=LD_LIBRARY_PATH
# Give only the list of failed tests at the end of the log
test:release_linux_base --test_summary=short

Expand All @@ -647,7 +645,6 @@ build:release_gpu_linux --config=release_cpu_linux
# Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
# Note that linux cpu and cuda builds share the same toolchain now.
build:release_gpu_linux --config=cuda_clang_official
test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
# Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think
test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute

Expand All @@ -656,6 +653,7 @@ build:release_arm64_linux --config=linux_arm64
build:release_arm64_linux --crosstool_top="@ml2014_clang_aarch64_config_aarch64//crosstool:toolchain"
build:release_arm64_linux --config=mkl_aarch64_threadpool
build:release_arm64_linux --copt=-flax-vector-conversions
test:release_arm64_linux --flaky_test_attempts=3

# The old gcc linux build options are preserved in the unsupported_*_linux
# configs. If your project fails to build with Clang, you can use these
Expand All @@ -677,9 +675,8 @@ build:unsupported_gpu_linux --config=unsupported_cpu_linux
build:unsupported_gpu_linux --action_env=TF_CUDA_VERSION="11"
build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8"
build:unsupported_gpu_linux --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80"
build:unsupported_gpu_linux --config=tensorrt
build:unsupported_gpu_linux --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.2"
build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64:/usr/local/tensorrt/lib"
build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64"
build:unsupported_gpu_linux --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build:unsupported_gpu_linux --crosstool_top=@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain

Expand Down Expand Up @@ -774,7 +771,7 @@ test:linux_cuda_wheel_test --config=linux_cuda_wheel_test_filters -- //tensorflo
# ARM64 WHEEL
test:linux_arm64_wheel_test_filters --test_tag_filters=-no_oss,-no_aarch64,-oss_excluded,-oss_serial,-gpu,-tpu,-benchmark-test,-v1only,-no_oss_py38,-no_oss_py39,-no_oss_py310
test:linux_arm64_wheel_test_filters --build_tag_filters=-no_oss,-no_aarch64,-oss_excluded,-oss_serial,-gpu,-tpu,-benchmark-test,-v1only,-no_oss_py38,-no_oss_py39,-no_oss_py310
test:linux_arm64_wheel_test_filters --test_lang_filters=py --test_size_filters=small,medium --flaky_test_attempts=3
test:linux_arm64_wheel_test_filters --test_lang_filters=py --test_size_filters=small,medium
test:linux_arm64_wheel_test --config=linux_arm64_wheel_test_filters -- //tensorflow/... -//tensorflow/compiler/tf2tensorrt/... -//tensorflow/core/tpu/... -//tensorflow/lite/... -//tensorflow/tools/toolchains/... -//tensorflow/go/... -//tensorflow/java/... -//tensorflow/core/grappler/optimizers:auto_mixed_precision_test_cpu -//tensorflow/core/grappler/optimizers:remapper_test_cpu -//tensorflow/core/kernels/image:resize_bicubic_op_test -//tensorflow/compiler/mlir/tfr/examples/customization:test_ops_test -//tensorflow/compiler/mlir/tfr/examples/mnist:mnist_ops_test -//tensorflow/compiler/mlir/tfr/examples/pad:pad_ops_test
# MACOS ARM64 WHEEL
test:macos_arm64_wheel_test_filters --test_tag_filters=-no_oss,-oss_excluded,-oss_serial,-no_oss_py39,-no_oss_py310,-nomac,-no_mac,-mac_excluded,-v1only,-gpu,-tpu,-benchmark-test,-no_mac_arm64,-no_aarch64
Expand Down Expand Up @@ -812,7 +809,7 @@ test:linux_cuda_pycpp_test --config=linux_cuda_pycpp_test_filters -- //tensorflo
# inherit from build.
build:linux_arm64_pycpp_test_filters --test_tag_filters=-no_oss,-no_aarch64,-oss_excluded,-oss_serial,-gpu,-tpu,-benchmark-test,-v1only
build:linux_arm64_pycpp_test_filters --build_tag_filters=-no_oss,-no_aarch64,-oss_excluded,-oss_serial,-gpu,-tpu,-benchmark-test,-v1only
build:linux_arm64_pycpp_test_filters --test_lang_filters=cc,py --test_size_filters=small,medium
build:linux_arm64_pycpp_test_filters --test_lang_filters=cc,py --test_size_filters=small,medium --flaky_test_attempts=3
# TODO(michaelhudgins): Why do we need to specifically omit go and java here?
build:linux_arm64_pycpp_test --config=linux_arm64_pycpp_test_filters -- //tensorflow/... -//tensorflow/compiler/tf2tensorrt/... -//tensorflow/core/tpu/... -//tensorflow/lite/... -//tensorflow/tools/toolchains/... -//tensorflow/go/... -//tensorflow/java/... -//tensorflow/core/grappler/optimizers:auto_mixed_precision_test_cpu -//tensorflow/core/grappler/optimizers:remapper_test_cpu -//tensorflow/core/kernels/image:resize_bicubic_op_test -//tensorflow/compiler/mlir/tfr/examples/customization:test_ops_test -//tensorflow/compiler/mlir/tfr/examples/mnist:mnist_ops_test -//tensorflow/compiler/mlir/tfr/examples/pad:pad_ops_test -//tensorflow/python/tools:aot_compiled_test
# CROSS-COMPILE ARM64 PYCPP
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/sigbuild-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ jobs:
registry: gcr.io
username: _json_key
password: ${{ secrets.GCP_CREDS }}
-
name: Login to AR
# Once this is verified, removed gcr.io actions.
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
registry: us-central1-docker.pkg.dev
username: _json_key
password: ${{ secrets.GCP_CREDS }}
-
name: Grab the upcoming TF version to tag this container
run: |
Expand Down Expand Up @@ -87,6 +95,8 @@ jobs:
tensorflow/build:${{ steps.tf-version.outputs.TF_VERSION }}-${{ matrix.python-version }}
gcr.io/tensorflow-sigs/build:latest-${{ matrix.python-version }}
gcr.io/tensorflow-sigs/build:${{ steps.tf-version.outputs.TF_VERSION }}-${{ matrix.python-version }}
us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/build:latest-${{ matrix.python-version }}
us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/build:${{ steps.tf-version.outputs.TF_VERSION }}-${{ matrix.python-version }}
cache-from: type=registry,ref=tensorflow/build:latest-${{ matrix.python-version }}
cache-to: type=inline
-
Expand Down
20 changes: 14 additions & 6 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,21 @@ There are two ways to run TensorFlow unit tests.
export flags="--config=opt -k"
```

If the tests are to be run on the GPU, add CUDA paths to LD_LIBRARY_PATH and
add the `cuda` option flag
If the tests are to be run on the GPU:
* For TensorFlow versions starting from v.2.18.0:
Add the `cuda` option flag.

```bash
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH"
export flags="--config=opt --config=cuda -k"
```
```bash
export flags="--config=opt --config=cuda -k"
```

* For TensorFlow versions prior v.2.18.0:
Add CUDA paths to LD_LIBRARY_PATH and add the `cuda` option flag.

```bash
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH"
export flags="--config=opt --config=cuda -k"
```

For example, to run all tests under tensorflow/python, do:

Expand Down
18 changes: 17 additions & 1 deletion RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,23 @@

* `tf.lite`
* C API:
* An optional, fourth parameter was added `TfLiteOperatorCreate` as a step forward towards a cleaner API for `TfLiteOperator`. Function `TfLiteOperatorCreate` was added recently, in TensorFlow Lite version 2.17.0, released on 7/11/2024, and we do not expect there will be much code using this function yet. Any code breakages can be easily resolved by passing nullptr as the new, 4th parameter.
* An optional, fourth parameter was added `TfLiteOperatorCreate` as a step
forward towards a cleaner API for `TfLiteOperator`. Function
`TfLiteOperatorCreate` was added recently, in TensorFlow Lite version 2.17.0,
released on 7/11/2024, and we do not expect there will be much code using this
function yet. Any code breakages can be easily resolved by passing nullptr as
the new, 4th parameter.
* SignatureRunner is now supported for models with no signatures.

* TensorRT support is disabled in CUDA builds for code health improvement.

* Hermetic CUDA support is added.

Hermetic CUDA uses a specific downloadable version of CUDA instead of the
user’s locally installed CUDA. Bazel will download CUDA, CUDNN and NCCL
distributions, and then use CUDA libraries and tools as dependencies in
various Bazel targets. This enables more reproducible builds for Google ML
projects and supported CUDA versions.

### Known Caveats

Expand Down
47 changes: 47 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,50 @@ tf_workspace1()
load("@//tensorflow:workspace0.bzl", "tf_workspace0")

tf_workspace0()

load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
"cuda_json_init_repository",
)

cuda_json_init_repository()

load(
"@cuda_redist_json//:distributions.bzl",
"CUDA_REDISTRIBUTIONS",
"CUDNN_REDISTRIBUTIONS",
)
load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
"cuda_redist_init_repositories",
"cudnn_redist_init_repository",
)

cuda_redist_init_repositories(
cuda_redistributions = CUDA_REDISTRIBUTIONS,
)

cudnn_redist_init_repository(
cudnn_redistributions = CUDNN_REDISTRIBUTIONS,
)

load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
"cuda_configure",
)

cuda_configure(name = "local_config_cuda")

load(
"@local_tsl//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
"nccl_redist_init_repository",
)

nccl_redist_init_repository()

load(
"@local_tsl//third_party/nccl/hermetic:nccl_configure.bzl",
"nccl_configure",
)

nccl_configure(name = "local_config_nccl")
Loading

0 comments on commit f35ddc0

Please sign in to comment.