Skip to content

Commit

Permalink
Merge pull request #2563 from ROCm/develop-upstream-sync-240527
Browse files Browse the repository at this point in the history
Develop upstream sync 240527
  • Loading branch information
mmakevic-amd authored Jun 9, 2024
2 parents a822047 + bca3bd2 commit 7070641
Show file tree
Hide file tree
Showing 760 changed files with 35,192 additions and 5,094 deletions.
2 changes: 2 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
* Replace `DebuggerOptions` of TensorFlow Quantizer, and migrate to
`DebuggerConfig` of StableHLO Quantizer.
* Add TensorFlow to StableHLO converter to TensorFlow pip package.
* TensorRT support: this is the last release supporting TensorRT. It will be
removed in the next release.

## Keras

Expand Down
4 changes: 2 additions & 2 deletions ci/official/containers/linux_arm64/cuda.packages.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CuDNN: https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#ubuntu-network-installation
libcudnn8=8.9.6.50-1+cuda12.2
libcudnn8-dev=8.9.6.50-1+cuda12.2
libcudnn9-dev-cuda-12=9.1.1.17-1
libcudnn9-cuda-12=9.1.1.17-1

# This can be removed once NVIDIA publishes a cuda-12.3.2 Docker image.
# For now it ensures that we install at least version 12.3.107 of PTXAS,
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1382,7 +1382,7 @@ tf_cc_shared_library(
"//tensorflow/compiler/mlir/quantization/common/quantization_lib:quantization_config",
"//tensorflow/compiler/mlir/lite/sparsity:sparsify_model",
"//tensorflow/compiler/mlir/quantization/stablehlo/python:pywrap_quantization_lib_impl",
"//tensorflow/compiler/mlir/quantization/tensorflow_to_stablehlo/python:pywrap_tensorflow_to_stablehlo_lib_impl",
"//tensorflow/compiler/mlir/tensorflow_to_stablehlo/python:pywrap_tensorflow_to_stablehlo_lib_impl",
"//tensorflow/compiler/mlir/quantization/tensorflow/calibrator:custom_aggregator_op",
"//tensorflow/compiler/mlir/quantization/tensorflow/python:quantize_model_cc_impl",
"//tensorflow/compiler/mlir/quantization/tensorflow:passes",
Expand Down
1 change: 0 additions & 1 deletion tensorflow/c/experimental/stream_executor/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ cc_library(
"//tensorflow/c:tf_status_helper",
"@local_tsl//tsl/platform:statusor",
"@local_xla//xla/stream_executor",
"@local_xla//xla/stream_executor:event_interface",
"@local_xla//xla/stream_executor:stream_executor_interface",
"@local_xla//xla/stream_executor:stream_interface",
],
Expand Down
40 changes: 6 additions & 34 deletions tensorflow/c/experimental/stream_executor/stream_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,20 +154,6 @@ absl::Status ValidateSEPlatformRegistrationParams(
}
#undef TF_VALIDATE_NOT_NULL

// Converts SE_EventStatus to Event::Status.
Event::Status SEEventStatusToEventStatus(SE_EventStatus s) {
switch (s) {
case SE_EVENT_ERROR:
return Event::Status::kError;
case SE_EVENT_PENDING:
return Event::Status::kPending;
case SE_EVENT_COMPLETE:
return Event::Status::kComplete;
default:
return Event::Status::kUnknown;
}
}

// Converts DeviceMemoryBase to a C struct.
SP_DeviceMemoryBase DeviceMemoryBaseToC(const DeviceMemoryBase* mem) {
SP_DeviceMemoryBase device_memory_base{SP_DEVICE_MEMORY_BASE_STRUCT_SIZE};
Expand Down Expand Up @@ -407,33 +393,21 @@ class CStreamExecutor : public StreamExecutor {
return stream_executor_->host_callback(&device_, stream_handle,
&HostCallbackTrampoline, ctx);
}
absl::Status DeallocateEvent(Event* event) override {
static_cast<CEvent*>(event->implementation())->Destroy();
return absl::OkStatus();
}
absl::Status RecordEvent(Stream* stream, Event* event) override {
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
return static_cast<CEvent*>(event->implementation())->Record(stream_handle);
return static_cast<CEvent*>(event)->Record(stream_handle);
}
absl::Status WaitForEvent(Stream* stream, Event* event) override {
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Event event_handle =
static_cast<CEvent*>(event->implementation())->Handle();
SP_Event event_handle = static_cast<CEvent*>(event)->Handle();
OwnedTFStatus c_status(TF_NewStatus());
stream_executor_->wait_for_event(&device_, stream_handle, event_handle,
c_status.get());
absl::Status s = StatusFromTF_Status(c_status.get());
return s;
}
Event::Status PollForEventStatus(Event* event) override {
SP_Event event_handle =
static_cast<CEvent*>(event->implementation())->Handle();
SE_EventStatus event_status =
stream_executor_->get_event_status(&device_, event_handle);
return SEEventStatusToEventStatus(event_status);
}
void DeallocateStream(Stream* stream) override {
static_cast<CStream*>(stream->implementation())->Destroy();
}
Expand All @@ -453,8 +427,7 @@ class CStreamExecutor : public StreamExecutor {
}
absl::Status BlockHostForEvent(Stream* stream, Event* event) {
OwnedTFStatus c_status(TF_NewStatus());
SP_Event event_handle =
static_cast<CEvent*>(event->implementation())->Handle();
SP_Event event_handle = static_cast<CEvent*>(event)->Handle();
stream_executor_->block_host_for_event(&device_, event_handle,
c_status.get());
return StatusFromTF_Status(c_status.get());
Expand Down Expand Up @@ -550,15 +523,14 @@ class CStreamExecutor : public StreamExecutor {
absl::StatusOr<std::unique_ptr<Event>> CreateEvent() override {
auto c_event = std::make_unique<CEvent>(&device_, stream_executor_);
TF_RETURN_IF_ERROR(c_event->Create());
return std::make_unique<Event>(this, std::move(c_event));
return std::move(c_event);
}

absl::StatusOr<std::unique_ptr<Stream>> CreateStream(
std::optional<std::variant<StreamPriority, int>> priority =
std::nullopt) override {
auto c_stream = std::make_unique<CStream>(&device_, stream_executor_);
TF_RETURN_IF_ERROR(c_stream->Create());
auto stream = std::make_unique<Stream>(this, std::move(c_stream));
auto stream = std::make_unique<CStream>(&device_, stream_executor_, this);
TF_RETURN_IF_ERROR(stream->Create());
return std::move(stream);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ limitations under the License.

#include "tensorflow/c/experimental/stream_executor/stream_executor.h"
#include "tensorflow/c/tf_status_helper.h"
#include "xla/stream_executor/event_interface.h"
#include "xla/stream_executor/executor_cache.h"
#include "xla/stream_executor/platform.h"
#include "xla/stream_executor/stream_executor.h"
Expand Down Expand Up @@ -98,13 +97,18 @@ class CPlatform : public Platform {
stream_executor::ExecutorCache executor_cache_;
};

class CStream : public StreamInterface {
class CStream : public Stream {
public:
CStream(SP_Device* device, SP_StreamExecutor* stream_executor)
: device_(device),
CStream(SP_Device* device, SP_StreamExecutor* stream_executor,
StreamExecutor* executor)
: Stream(executor),
device_(device),
stream_executor_(stream_executor),
stream_handle_(nullptr) {}
~CStream() override { Destroy(); }
~CStream() override {
parent()->BlockHostUntilDone(this).IgnoreError();
Destroy();
}

absl::Status Create() {
tensorflow::TF_StatusPtr c_status(TF_NewStatus());
Expand All @@ -128,14 +132,30 @@ class CStream : public StreamInterface {
SP_Stream stream_handle_;
};

class CEvent : public EventInterface {
class CEvent : public Event {
public:
CEvent(SP_Device* device, SP_StreamExecutor* stream_executor)
: device_(device),
stream_executor_(stream_executor),
event_handle_(nullptr) {}
~CEvent() override { Destroy(); }

Event::Status PollForStatus() override {
SE_EventStatus event_status =
stream_executor_->get_event_status(device_, event_handle_);

switch (event_status) {
case SE_EVENT_ERROR:
return Event::Status::kError;
case SE_EVENT_PENDING:
return Event::Status::kPending;
case SE_EVENT_COMPLETE:
return Event::Status::kComplete;
default:
return Event::Status::kUnknown;
}
}

absl::Status Create() {
tensorflow::TF_StatusPtr c_status(TF_NewStatus());
stream_executor_->create_event(device_, &event_handle_, c_status.get());
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/compiler/mlir/lite/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1027,14 +1027,14 @@ cc_library(
":tensorflow_lite",
"//tensorflow/compiler/mlir/lite/schema:schema_fbs",
"//tensorflow/compiler/mlir/lite/schema:schema_fbs_with_mutable",
"//tensorflow/compiler/mlir/lite/schema:schema_utils",
"//tensorflow/compiler/mlir/tensorflow:dynamic_shape_utils",
"//tensorflow/compiler/mlir/tensorflow:tensorflow_types",
"//tensorflow/core/platform:errors",
"//tensorflow/core/platform:status",
"//tensorflow/core/platform:statusor",
"//tensorflow/lite/core/c:private_common",
"//tensorflow/lite/kernels/internal:kernel_utils",
"//tensorflow/lite/schema:schema_utils",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/strings",
"@flatbuffers",
Expand Down Expand Up @@ -1149,6 +1149,7 @@ cc_library(
":tensorflow_lite",
"//tensorflow/compiler/mlir/lite/quantization/ir:QuantOps",
"//tensorflow/compiler/mlir/lite/schema:schema_fbs_with_mutable",
"//tensorflow/compiler/mlir/lite/schema:schema_utils",
"//tensorflow/compiler/mlir/lite/stablehlo:legalize_stablehlo_composite_to_tfl_custom",
"//tensorflow/compiler/mlir/lite/stablehlo:legalize_stablehlo_to_vhlo_pass",
"//tensorflow/compiler/mlir/quantization/common/quantization_lib",
Expand All @@ -1163,7 +1164,6 @@ cc_library(
"//tensorflow/core/platform:status",
"//tensorflow/lite:framework",
"//tensorflow/lite/experimental/remat:metadata_util",
"//tensorflow/lite/schema:schema_utils",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status",
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/compiler/mlir/lite/flatbuffer_import.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ limitations under the License.
#include "tensorflow/compiler/mlir/lite/offset_buffer.h"
#include "tensorflow/compiler/mlir/lite/quantization/ir/QuantOps.h"
#include "tensorflow/compiler/mlir/lite/schema/mutable/schema_generated.h"
#include "tensorflow/compiler/mlir/lite/schema/schema_utils.h"
#include "tensorflow/compiler/mlir/lite/stablehlo/transforms/passes.h"
#include "tensorflow/compiler/mlir/lite/utils/const_tensor_utils.h"
#include "tensorflow/compiler/mlir/lite/utils/convert_type.h"
Expand All @@ -97,7 +98,6 @@ limitations under the License.
#include "tensorflow/lite/experimental/remat/metadata_util.h"
#include "tensorflow/lite/graph_info.h"
#include "tensorflow/lite/model_builder.h"
#include "tensorflow/lite/schema/schema_utils.h"
#include "tsl/platform/status.h"
#include "tsl/platform/statusor.h"

Expand Down
2 changes: 1 addition & 1 deletion tensorflow/compiler/mlir/lite/flatbuffer_operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ limitations under the License.
#include "stablehlo/dialect/VhloOps.h" // from @stablehlo
#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
#include "tensorflow/compiler/mlir/lite/schema/mutable/schema_generated.h"
#include "tensorflow/compiler/mlir/lite/schema/schema_utils.h"
#include "tensorflow/compiler/mlir/lite/utils/convert_type.h"
#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
#include "tensorflow/compiler/mlir/tensorflow/utils/dynamic_shape_utils.h"
Expand All @@ -55,7 +56,6 @@ limitations under the License.
#include "tensorflow/core/platform/status.h"
#include "tensorflow/lite/core/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/kernel_utils.h"
#include "tensorflow/lite/schema/schema_utils.h"
#include "tsl/platform/status.h"

namespace {
Expand Down
17 changes: 17 additions & 0 deletions tensorflow/compiler/mlir/lite/kernels/internal/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
load("//tensorflow:tensorflow.default.bzl", "get_compatible_with_portable")
load("//tensorflow/lite:build_def.bzl", "tflite_copts")

package(
# copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
default_visibility = [
"//visibility:public",
],
licenses = ["notice"],
)

cc_library(
name = "compatibility_macros",
hdrs = ["compatibility_macros.h"],
compatible_with = get_compatible_with_portable(),
copts = tflite_copts(),
)
2 changes: 2 additions & 0 deletions tensorflow/compiler/mlir/lite/kernels/internal/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
This folder contains compatibility_macros.h, which mirrors compatibility.h in
lite/kernels/internal and recreates macros from there that are needed in the converter.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_COMPILER_MLIR_LITE_KERNELS_INTERNAL_COMPATIBILITY_MACROS_H_
#define TENSORFLOW_COMPILER_MLIR_LITE_KERNELS_INTERNAL_COMPATIBILITY_MACROS_H_

#ifndef TFLITE_ABORT
#define TFLITE_ABORT abort()
#endif

#ifndef TFLITE_ASSERT_FALSE
#if defined(NDEBUG)
#define TFLITE_ASSERT_FALSE (static_cast<void>(0))
#else
#define TFLITE_ASSERT_FALSE TFLITE_ABORT
#endif
#endif

#ifndef TFLITE_DCHECK
#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
#endif

#endif // TENSORFLOW_COMPILER_MLIR_LITE_KERNELS_INTERNAL_COMPATIBILITY_MACROS_H_
6 changes: 4 additions & 2 deletions tensorflow/compiler/mlir/lite/quantization/lite/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ cc_library(
"//tensorflow/compiler/mlir/lite:flatbuffer_translate_lib",
"//tensorflow/compiler/mlir/lite:tensorflow_lite",
"//tensorflow/compiler/mlir/lite:tf_tfl_passes",
"//tensorflow/compiler/mlir/lite/debug",
"//tensorflow/compiler/mlir/lite/debug:debug_options_proto_cc",
"//tensorflow/compiler/mlir/lite/schema:schema_fbs",
"//tensorflow/compiler/mlir/quantization/common/quantization_lib:quantization_config",
"//tensorflow/compiler/mlir/tensorflow:error_util",
Expand Down Expand Up @@ -165,13 +167,13 @@ tf_cc_test(
deps = [
":quantize_model",
"//tensorflow/compiler/mlir/lite/schema:schema_fbs",
"//tensorflow/compiler/mlir/lite/schema:schema_utils",
"//tensorflow/core:framework_internal",
"//tensorflow/core:lib",
"//tensorflow/lite:framework",
"//tensorflow/lite:string",
"//tensorflow/lite/c:c_api_types",
"//tensorflow/lite/core/api:error_reporter",
"//tensorflow/lite/schema:schema_utils",
"//tensorflow/lite/tools/optimize:test_util",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_googletest//:gtest",
Expand Down Expand Up @@ -203,11 +205,11 @@ tf_cc_test(
deps = [
":quantize_weights",
"//tensorflow/compiler/mlir/lite/schema:schema_fbs",
"//tensorflow/compiler/mlir/lite/schema:schema_utils",
"//tensorflow/core:framework_internal",
"//tensorflow/core:lib",
"//tensorflow/lite:framework",
"//tensorflow/lite/c:c_api_types",
"//tensorflow/lite/schema:schema_utils",
"//tensorflow/lite/tools/optimize:test_util",
"@com_google_googletest//:gtest",
"@flatbuffers",
Expand Down
26 changes: 17 additions & 9 deletions tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License.

#include "tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.h"

#include <optional>
#include <string>
#include <unordered_set>

Expand All @@ -30,6 +31,7 @@ limitations under the License.
#include "mlir/Pass/PassManager.h" // from @llvm-project
#include "mlir/Support/LogicalResult.h" // from @llvm-project
#include "tensorflow/compiler/mlir/lite/common/tfl_pass_config.h"
#include "tensorflow/compiler/mlir/lite/debug/debug.h"
#include "tensorflow/compiler/mlir/lite/flatbuffer_export.h"
#include "tensorflow/compiler/mlir/lite/flatbuffer_import.h"
#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
Expand All @@ -53,17 +55,19 @@ std::string TfLiteToMlir(const absl::string_view tflite_op_name) {

// TODO(fengliuai): check the result for `fully_quantize` flag.
TfLiteStatus QuantizeModel(
const absl::string_view model_buffer, const tflite::TensorType& input_type,
const tflite::TensorType& output_type,
const tflite::TensorType& inference_type,
const std::unordered_set<std::string>& operator_names,
bool disable_per_channel, bool fully_quantize, std::string& output_buffer,
tflite::ErrorReporter* error_reporter, bool verify_numeric,
const absl::string_view model_buffer, const tflite::TensorType &input_type,
const tflite::TensorType &output_type,
const tflite::TensorType &inference_type,
const std::unordered_set<std::string> &operator_names,
bool disable_per_channel, bool fully_quantize, std::string &output_buffer,
tflite::ErrorReporter *error_reporter, bool verify_numeric,
bool whole_model_verify, bool legacy_float_scale,
const absl::flat_hash_set<std::string>& denylisted_ops,
const absl::flat_hash_set<std::string>& denylisted_nodes,
const absl::flat_hash_set<std::string> &denylisted_ops,
const absl::flat_hash_set<std::string> &denylisted_nodes,
const bool enable_variable_quantization,
bool disable_per_channel_for_dense_layers) {
bool disable_per_channel_for_dense_layers,
const std::optional<const tensorflow::converter::DebugOptions>
&debug_options) {
// Translate TFLite names to mlir op names.
absl::flat_hash_set<std::string> denylisted_mlir_op_names;
for (const auto& entry : denylisted_ops) {
Expand All @@ -85,6 +89,10 @@ TfLiteStatus QuantizeModel(

// Apply quantization passes.
PassManager pm((*module)->getName(), OpPassManager::Nesting::Implicit);
if (debug_options.has_value()) {
// Add debugging instrumentation
tensorflow::InitPassManager(pm, debug_options.value());
}
quant::QuantizationSpecs quant_specs;
quant_specs.inference_type = tflite::TflTypeToTfType(inference_type);
quant_specs.post_training_quantization = true;
Expand Down
Loading

0 comments on commit 7070641

Please sign in to comment.