Merge pull request #2563 from ROCm/develop-upstream-sync-240527

Develop upstream sync 240527
ROCm · Jun 9, 2024 · 7070641 · 7070641
2 parents a822047 + bca3bd2
commit 7070641
Show file tree

Hide file tree

Showing 760 changed files with 35,192 additions and 5,094 deletions.
diff --git a/RELEASE.md b/RELEASE.md
@@ -32,6 +32,8 @@
 * Replace `DebuggerOptions` of TensorFlow Quantizer, and migrate to
   `DebuggerConfig` of StableHLO Quantizer.
 * Add TensorFlow to StableHLO converter to TensorFlow pip package.
+* TensorRT support: this is the last release supporting TensorRT. It will be
+removed in the next release.
 
 ## Keras
 

diff --git a/ci/official/containers/linux_arm64/cuda.packages.txt b/ci/official/containers/linux_arm64/cuda.packages.txt
@@ -1,6 +1,6 @@
 # CuDNN: https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#ubuntu-network-installation
-libcudnn8=8.9.6.50-1+cuda12.2
-libcudnn8-dev=8.9.6.50-1+cuda12.2
+libcudnn9-dev-cuda-12=9.1.1.17-1
+libcudnn9-cuda-12=9.1.1.17-1
 
 # This can be removed once NVIDIA publishes a cuda-12.3.2 Docker image.
 # For now it ensures that we install at least version 12.3.107 of PTXAS,

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
@@ -1382,7 +1382,7 @@ tf_cc_shared_library(
         "//tensorflow/compiler/mlir/quantization/common/quantization_lib:quantization_config",
         "//tensorflow/compiler/mlir/lite/sparsity:sparsify_model",
         "//tensorflow/compiler/mlir/quantization/stablehlo/python:pywrap_quantization_lib_impl",
-        "//tensorflow/compiler/mlir/quantization/tensorflow_to_stablehlo/python:pywrap_tensorflow_to_stablehlo_lib_impl",
+        "//tensorflow/compiler/mlir/tensorflow_to_stablehlo/python:pywrap_tensorflow_to_stablehlo_lib_impl",
         "//tensorflow/compiler/mlir/quantization/tensorflow/calibrator:custom_aggregator_op",
         "//tensorflow/compiler/mlir/quantization/tensorflow/python:quantize_model_cc_impl",
         "//tensorflow/compiler/mlir/quantization/tensorflow:passes",

diff --git a/tensorflow/c/experimental/stream_executor/BUILD b/tensorflow/c/experimental/stream_executor/BUILD
@@ -69,7 +69,6 @@ cc_library(
         "//tensorflow/c:tf_status_helper",
         "@local_tsl//tsl/platform:statusor",
         "@local_xla//xla/stream_executor",
-        "@local_xla//xla/stream_executor:event_interface",
         "@local_xla//xla/stream_executor:stream_executor_interface",
         "@local_xla//xla/stream_executor:stream_interface",
     ],

diff --git a/tensorflow/c/experimental/stream_executor/stream_executor.cc b/tensorflow/c/experimental/stream_executor/stream_executor.cc
@@ -154,20 +154,6 @@ absl::Status ValidateSEPlatformRegistrationParams(
 }
 #undef TF_VALIDATE_NOT_NULL
 
-// Converts SE_EventStatus to Event::Status.
-Event::Status SEEventStatusToEventStatus(SE_EventStatus s) {
-  switch (s) {
-    case SE_EVENT_ERROR:
-      return Event::Status::kError;
-    case SE_EVENT_PENDING:
-      return Event::Status::kPending;
-    case SE_EVENT_COMPLETE:
-      return Event::Status::kComplete;
-    default:
-      return Event::Status::kUnknown;
-  }
-}
-
 // Converts DeviceMemoryBase to a C struct.
 SP_DeviceMemoryBase DeviceMemoryBaseToC(const DeviceMemoryBase* mem) {
   SP_DeviceMemoryBase device_memory_base{SP_DEVICE_MEMORY_BASE_STRUCT_SIZE};
@@ -407,33 +393,21 @@ class CStreamExecutor : public StreamExecutor {
     return stream_executor_->host_callback(&device_, stream_handle,
                                            &HostCallbackTrampoline, ctx);
   }
-  absl::Status DeallocateEvent(Event* event) override {
-    static_cast<CEvent*>(event->implementation())->Destroy();
-    return absl::OkStatus();
-  }
   absl::Status RecordEvent(Stream* stream, Event* event) override {
     SP_Stream stream_handle =
         static_cast<CStream*>(stream->implementation())->Handle();
-    return static_cast<CEvent*>(event->implementation())->Record(stream_handle);
+    return static_cast<CEvent*>(event)->Record(stream_handle);
   }
   absl::Status WaitForEvent(Stream* stream, Event* event) override {
     SP_Stream stream_handle =
         static_cast<CStream*>(stream->implementation())->Handle();
-    SP_Event event_handle =
-        static_cast<CEvent*>(event->implementation())->Handle();
+    SP_Event event_handle = static_cast<CEvent*>(event)->Handle();
     OwnedTFStatus c_status(TF_NewStatus());
     stream_executor_->wait_for_event(&device_, stream_handle, event_handle,
                                      c_status.get());
     absl::Status s = StatusFromTF_Status(c_status.get());
     return s;
   }
-  Event::Status PollForEventStatus(Event* event) override {
-    SP_Event event_handle =
-        static_cast<CEvent*>(event->implementation())->Handle();
-    SE_EventStatus event_status =
-        stream_executor_->get_event_status(&device_, event_handle);
-    return SEEventStatusToEventStatus(event_status);
-  }
   void DeallocateStream(Stream* stream) override {
     static_cast<CStream*>(stream->implementation())->Destroy();
   }
@@ -453,8 +427,7 @@ class CStreamExecutor : public StreamExecutor {
   }
   absl::Status BlockHostForEvent(Stream* stream, Event* event) {
     OwnedTFStatus c_status(TF_NewStatus());
-    SP_Event event_handle =
-        static_cast<CEvent*>(event->implementation())->Handle();
+    SP_Event event_handle = static_cast<CEvent*>(event)->Handle();
     stream_executor_->block_host_for_event(&device_, event_handle,
                                            c_status.get());
     return StatusFromTF_Status(c_status.get());
@@ -550,15 +523,14 @@ class CStreamExecutor : public StreamExecutor {
   absl::StatusOr<std::unique_ptr<Event>> CreateEvent() override {
     auto c_event = std::make_unique<CEvent>(&device_, stream_executor_);
     TF_RETURN_IF_ERROR(c_event->Create());
-    return std::make_unique<Event>(this, std::move(c_event));
+    return std::move(c_event);
   }
 
   absl::StatusOr<std::unique_ptr<Stream>> CreateStream(
       std::optional<std::variant<StreamPriority, int>> priority =
           std::nullopt) override {
-    auto c_stream = std::make_unique<CStream>(&device_, stream_executor_);
-    TF_RETURN_IF_ERROR(c_stream->Create());
-    auto stream = std::make_unique<Stream>(this, std::move(c_stream));
+    auto stream = std::make_unique<CStream>(&device_, stream_executor_, this);
+    TF_RETURN_IF_ERROR(stream->Create());
     return std::move(stream);
   }
 

diff --git a/tensorflow/c/experimental/stream_executor/stream_executor_internal.h b/tensorflow/c/experimental/stream_executor/stream_executor_internal.h
@@ -20,7 +20,6 @@ limitations under the License.
 
 #include "tensorflow/c/experimental/stream_executor/stream_executor.h"
 #include "tensorflow/c/tf_status_helper.h"
-#include "xla/stream_executor/event_interface.h"
 #include "xla/stream_executor/executor_cache.h"
 #include "xla/stream_executor/platform.h"
 #include "xla/stream_executor/stream_executor.h"
@@ -98,13 +97,18 @@ class CPlatform : public Platform {
   stream_executor::ExecutorCache executor_cache_;
 };
 
-class CStream : public StreamInterface {
+class CStream : public Stream {
  public:
-  CStream(SP_Device* device, SP_StreamExecutor* stream_executor)
-      : device_(device),
+  CStream(SP_Device* device, SP_StreamExecutor* stream_executor,
+          StreamExecutor* executor)
+      : Stream(executor),
+        device_(device),
         stream_executor_(stream_executor),
         stream_handle_(nullptr) {}
-  ~CStream() override { Destroy(); }
+  ~CStream() override {
+    parent()->BlockHostUntilDone(this).IgnoreError();
+    Destroy();
+  }
 
   absl::Status Create() {
     tensorflow::TF_StatusPtr c_status(TF_NewStatus());
@@ -128,14 +132,30 @@ class CStream : public StreamInterface {
   SP_Stream stream_handle_;
 };
 
-class CEvent : public EventInterface {
+class CEvent : public Event {
  public:
   CEvent(SP_Device* device, SP_StreamExecutor* stream_executor)
       : device_(device),
         stream_executor_(stream_executor),
         event_handle_(nullptr) {}
   ~CEvent() override { Destroy(); }
 
+  Event::Status PollForStatus() override {
+    SE_EventStatus event_status =
+        stream_executor_->get_event_status(device_, event_handle_);
+
+    switch (event_status) {
+      case SE_EVENT_ERROR:
+        return Event::Status::kError;
+      case SE_EVENT_PENDING:
+        return Event::Status::kPending;
+      case SE_EVENT_COMPLETE:
+        return Event::Status::kComplete;
+      default:
+        return Event::Status::kUnknown;
+    }
+  }
+
   absl::Status Create() {
     tensorflow::TF_StatusPtr c_status(TF_NewStatus());
     stream_executor_->create_event(device_, &event_handle_, c_status.get());

diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD
@@ -1027,14 +1027,14 @@ cc_library(
         ":tensorflow_lite",
         "//tensorflow/compiler/mlir/lite/schema:schema_fbs",
         "//tensorflow/compiler/mlir/lite/schema:schema_fbs_with_mutable",
+        "//tensorflow/compiler/mlir/lite/schema:schema_utils",
         "//tensorflow/compiler/mlir/tensorflow:dynamic_shape_utils",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_types",
         "//tensorflow/core/platform:errors",
         "//tensorflow/core/platform:status",
         "//tensorflow/core/platform:statusor",
         "//tensorflow/lite/core/c:private_common",
         "//tensorflow/lite/kernels/internal:kernel_utils",
-        "//tensorflow/lite/schema:schema_utils",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@flatbuffers",
@@ -1149,6 +1149,7 @@ cc_library(
         ":tensorflow_lite",
         "//tensorflow/compiler/mlir/lite/quantization/ir:QuantOps",
         "//tensorflow/compiler/mlir/lite/schema:schema_fbs_with_mutable",
+        "//tensorflow/compiler/mlir/lite/schema:schema_utils",
         "//tensorflow/compiler/mlir/lite/stablehlo:legalize_stablehlo_composite_to_tfl_custom",
         "//tensorflow/compiler/mlir/lite/stablehlo:legalize_stablehlo_to_vhlo_pass",
         "//tensorflow/compiler/mlir/quantization/common/quantization_lib",
@@ -1163,7 +1164,6 @@ cc_library(
         "//tensorflow/core/platform:status",
         "//tensorflow/lite:framework",
         "//tensorflow/lite/experimental/remat:metadata_util",
-        "//tensorflow/lite/schema:schema_utils",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/status",

diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc
@@ -78,6 +78,7 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/lite/offset_buffer.h"
 #include "tensorflow/compiler/mlir/lite/quantization/ir/QuantOps.h"
 #include "tensorflow/compiler/mlir/lite/schema/mutable/schema_generated.h"
+#include "tensorflow/compiler/mlir/lite/schema/schema_utils.h"
 #include "tensorflow/compiler/mlir/lite/stablehlo/transforms/passes.h"
 #include "tensorflow/compiler/mlir/lite/utils/const_tensor_utils.h"
 #include "tensorflow/compiler/mlir/lite/utils/convert_type.h"
@@ -97,7 +98,6 @@ limitations under the License.
 #include "tensorflow/lite/experimental/remat/metadata_util.h"
 #include "tensorflow/lite/graph_info.h"
 #include "tensorflow/lite/model_builder.h"
-#include "tensorflow/lite/schema/schema_utils.h"
 #include "tsl/platform/status.h"
 #include "tsl/platform/statusor.h"
 

diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc b/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc
@@ -47,6 +47,7 @@ limitations under the License.
 #include "stablehlo/dialect/VhloOps.h"  // from @stablehlo
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/schema/mutable/schema_generated.h"
+#include "tensorflow/compiler/mlir/lite/schema/schema_utils.h"
 #include "tensorflow/compiler/mlir/lite/utils/convert_type.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/dynamic_shape_utils.h"
@@ -55,7 +56,6 @@ limitations under the License.
 #include "tensorflow/core/platform/status.h"
 #include "tensorflow/lite/core/c/builtin_op_data.h"
 #include "tensorflow/lite/kernels/internal/kernel_utils.h"
-#include "tensorflow/lite/schema/schema_utils.h"
 #include "tsl/platform/status.h"
 
 namespace {

diff --git a/tensorflow/compiler/mlir/lite/kernels/internal/BUILD b/tensorflow/compiler/mlir/lite/kernels/internal/BUILD
@@ -0,0 +1,17 @@
+load("//tensorflow:tensorflow.default.bzl", "get_compatible_with_portable")
+load("//tensorflow/lite:build_def.bzl", "tflite_copts")
+
+package(
+    # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
+    default_visibility = [
+        "//visibility:public",
+    ],
+    licenses = ["notice"],
+)
+
+cc_library(
+    name = "compatibility_macros",
+    hdrs = ["compatibility_macros.h"],
+    compatible_with = get_compatible_with_portable(),
+    copts = tflite_copts(),
+)
diff --git a/tensorflow/compiler/mlir/lite/kernels/internal/README b/tensorflow/compiler/mlir/lite/kernels/internal/README
@@ -0,0 +1,2 @@
+This folder contains compatibility_macros.h, which mirrors compatibility.h in
+lite/kernels/internal and recreates macros from there that are needed in the converter.
diff --git a/tensorflow/compiler/mlir/lite/kernels/internal/compatibility_macros.h b/tensorflow/compiler/mlir/lite/kernels/internal/compatibility_macros.h
@@ -0,0 +1,35 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_LITE_KERNELS_INTERNAL_COMPATIBILITY_MACROS_H_
+#define TENSORFLOW_COMPILER_MLIR_LITE_KERNELS_INTERNAL_COMPATIBILITY_MACROS_H_
+
+#ifndef TFLITE_ABORT
+#define TFLITE_ABORT abort()
+#endif
+
+#ifndef TFLITE_ASSERT_FALSE
+#if defined(NDEBUG)
+#define TFLITE_ASSERT_FALSE (static_cast<void>(0))
+#else
+#define TFLITE_ASSERT_FALSE TFLITE_ABORT
+#endif
+#endif
+
+#ifndef TFLITE_DCHECK
+#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
+#endif
+
+#endif  // TENSORFLOW_COMPILER_MLIR_LITE_KERNELS_INTERNAL_COMPATIBILITY_MACROS_H_
diff --git a/tensorflow/compiler/mlir/lite/quantization/lite/BUILD b/tensorflow/compiler/mlir/lite/quantization/lite/BUILD
@@ -31,6 +31,8 @@ cc_library(
         "//tensorflow/compiler/mlir/lite:flatbuffer_translate_lib",
         "//tensorflow/compiler/mlir/lite:tensorflow_lite",
         "//tensorflow/compiler/mlir/lite:tf_tfl_passes",
+        "//tensorflow/compiler/mlir/lite/debug",
+        "//tensorflow/compiler/mlir/lite/debug:debug_options_proto_cc",
         "//tensorflow/compiler/mlir/lite/schema:schema_fbs",
         "//tensorflow/compiler/mlir/quantization/common/quantization_lib:quantization_config",
         "//tensorflow/compiler/mlir/tensorflow:error_util",
@@ -165,13 +167,13 @@ tf_cc_test(
     deps = [
         ":quantize_model",
         "//tensorflow/compiler/mlir/lite/schema:schema_fbs",
+        "//tensorflow/compiler/mlir/lite/schema:schema_utils",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/lite:framework",
         "//tensorflow/lite:string",
         "//tensorflow/lite/c:c_api_types",
         "//tensorflow/lite/core/api:error_reporter",
-        "//tensorflow/lite/schema:schema_utils",
         "//tensorflow/lite/tools/optimize:test_util",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_googletest//:gtest",
@@ -203,11 +205,11 @@ tf_cc_test(
     deps = [
         ":quantize_weights",
         "//tensorflow/compiler/mlir/lite/schema:schema_fbs",
+        "//tensorflow/compiler/mlir/lite/schema:schema_utils",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/lite:framework",
         "//tensorflow/lite/c:c_api_types",
-        "//tensorflow/lite/schema:schema_utils",
         "//tensorflow/lite/tools/optimize:test_util",
         "@com_google_googletest//:gtest",
         "@flatbuffers",

diff --git a/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc b/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.h"
 
+#include <optional>
 #include <string>
 #include <unordered_set>
 
@@ -30,6 +31,7 @@ limitations under the License.
 #include "mlir/Pass/PassManager.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/common/tfl_pass_config.h"
+#include "tensorflow/compiler/mlir/lite/debug/debug.h"
 #include "tensorflow/compiler/mlir/lite/flatbuffer_export.h"
 #include "tensorflow/compiler/mlir/lite/flatbuffer_import.h"
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
@@ -53,17 +55,19 @@ std::string TfLiteToMlir(const absl::string_view tflite_op_name) {
 
 // TODO(fengliuai): check the result for `fully_quantize` flag.
 TfLiteStatus QuantizeModel(
-    const absl::string_view model_buffer, const tflite::TensorType& input_type,
-    const tflite::TensorType& output_type,
-    const tflite::TensorType& inference_type,
-    const std::unordered_set<std::string>& operator_names,
-    bool disable_per_channel, bool fully_quantize, std::string& output_buffer,
-    tflite::ErrorReporter* error_reporter, bool verify_numeric,
+    const absl::string_view model_buffer, const tflite::TensorType &input_type,
+    const tflite::TensorType &output_type,
+    const tflite::TensorType &inference_type,
+    const std::unordered_set<std::string> &operator_names,
+    bool disable_per_channel, bool fully_quantize, std::string &output_buffer,
+    tflite::ErrorReporter *error_reporter, bool verify_numeric,
     bool whole_model_verify, bool legacy_float_scale,
-    const absl::flat_hash_set<std::string>& denylisted_ops,
-    const absl::flat_hash_set<std::string>& denylisted_nodes,
+    const absl::flat_hash_set<std::string> &denylisted_ops,
+    const absl::flat_hash_set<std::string> &denylisted_nodes,
     const bool enable_variable_quantization,
-    bool disable_per_channel_for_dense_layers) {
+    bool disable_per_channel_for_dense_layers,
+    const std::optional<const tensorflow::converter::DebugOptions>
+        &debug_options) {
   // Translate TFLite names to mlir op names.
   absl::flat_hash_set<std::string> denylisted_mlir_op_names;
   for (const auto& entry : denylisted_ops) {
@@ -85,6 +89,10 @@ TfLiteStatus QuantizeModel(
 
   // Apply quantization passes.
   PassManager pm((*module)->getName(), OpPassManager::Nesting::Implicit);
+  if (debug_options.has_value()) {
+    // Add debugging instrumentation
+    tensorflow::InitPassManager(pm, debug_options.value());
+  }
   quant::QuantizationSpecs quant_specs;
   quant_specs.inference_type = tflite::TflTypeToTfType(inference_type);
   quant_specs.post_training_quantization = true;