weekly sync 230731 after solving conflicts

ROCm · Aug 1, 2023 · da2cefb · da2cefb
1 parent e1de22b
commit da2cefb
Show file tree

Hide file tree

Showing 4 changed files with 2 additions and 26 deletions.
diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc
@@ -104,13 +104,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
 
   // TODO(b/258036887): Enable gpu_graph_level=2. Currently blocked by CUDA 12
   // integration.
-<<<<<<< HEAD
-  opts.set_xla_gpu_cuda_graph_level(0);
-  opts.set_xla_gpu_cuda_graph_num_runs_to_instantiate(-1);
-=======
-  opts.set_xla_gpu_graph_level(1);
+  opts.set_xla_gpu_graph_level(0);
   opts.set_xla_gpu_graph_num_runs_to_instantiate(-1);
->>>>>>> upstream/master
   opts.set_xla_gpu_enable_persistent_temp_buffers(false);
   opts.set_xla_gpu_graph_min_graph_size(5);
   opts.set_xla_gpu_graph_enable_concurrent_region(false);
@@ -172,13 +167,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
   opts.set_xla_gpu_enable_triton_gemm(true);
   opts.set_xla_gpu_enable_cudnn_int8x32_convolution_reordering(true);
   opts.set_xla_gpu_triton_gemm_any(false);
-<<<<<<< HEAD
   opts.set_xla_gpu_enable_triton_softmax_fusion(false);
-  opts.set_xla_gpu_triton_fusion_level(1);
-=======
-  opts.set_xla_gpu_enable_triton_softmax_fusion(true);
   opts.set_xla_gpu_triton_fusion_level(2);
->>>>>>> upstream/master
 
   // Moving reduce-scatter out of while loops can increase memory footprint, so
   // turning it off by default.

diff --git a/tensorflow/compiler/xla/python/pjrt_ifrt/xla_sharding_serdes_test.cc b/tensorflow/compiler/xla/python/pjrt_ifrt/xla_sharding_serdes_test.cc
@@ -34,14 +34,9 @@ class XlaShardingSerDesTest : public test_util::ShardingTest {};
 
 TEST_P(XlaShardingSerDesTest, HloShardingRoundTrip) {
   auto device_list = GetDevices({0, 1});
-<<<<<<< HEAD
-  auto xla_hlo_sharding = xla::HloSharding::Tile(xla::TileAssignment(absl::Span<const int64_t>({2, 1})));
-  auto sharding = HloSharding::Create(device_list,
-=======
   auto xla_hlo_sharding = xla::HloSharding::Tile(
       xla::TileAssignment((absl::Span<const int64_t>){2, 1}));
   auto sharding = HloSharding::Create(device_list, MemoryKind("abc"),
->>>>>>> upstream/master
                                       /*xla_hlo_sharding=*/xla_hlo_sharding);
 
   TF_ASSERT_OK_AND_ASSIGN(auto serialized, Serialize(*sharding));

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
@@ -31,7 +31,6 @@ load(
     "if_mkl",
     "mkl_deps",
 )
-<<<<<<< HEAD
 load(
     "@local_config_rocm//rocm:build_defs.bzl",
     "if_rocm_is_configured",
@@ -42,8 +41,6 @@ load(
     "//third_party/mkl_dnn:build_defs.bzl",
     "if_onednn_v3",
 )
-=======
->>>>>>> upstream/master
 load("//tensorflow:tensorflow.default.bzl", "cc_header_only_library", "filegroup", "get_compatible_with_portable", "tf_cc_shared_library", "tf_cuda_cc_test", "tf_cuda_cc_tests", "tf_disable_ptxas_warning_flags", "tf_kernel_library")
 load(
     "//tensorflow/core/platform:build_config_root.bzl",

diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/gpu.bazelrc b/tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/gpu.bazelrc
@@ -87,15 +87,9 @@ test --test_summary=short
 # Pass --config=nonpip to run the same suite of tests. If you want to run just
 # one test for investigation, you don't need --config=nonpip; just run the
 # bazel test invocation as normal.
-<<<<<<< HEAD
 test:nonpip_filters --test_tag_filters=gpu,requires-gpu,-no_gpu,-no_oss,-oss_excluded,-oss_serial,-no_cuda11,-no_rocm,-no_oss_py39,-no_oss_py310
 test:nonpip_filters --build_tag_filters=gpu,requires-gpu,-no_gpu,-no_oss,-oss_excluded,-oss_serial,-no_cuda11,-no_rocm,-no_oss_py39,-no_oss_py310
-test:nonpip_filters --test_lang_filters=py --test_size_filters=small,medium
-=======
-test:nonpip_filters --test_tag_filters=gpu,requires-gpu,-no_gpu,-no_oss,-oss_excluded,-oss_serial,-no_cuda11,-no_oss_py39,-no_oss_py310
-test:nonpip_filters --build_tag_filters=gpu,requires-gpu,-no_gpu,-no_oss,-oss_excluded,-oss_serial,-no_cuda11,-no_oss_py39,-no_oss_py310
-test:nonpip_filters --test_lang_filters=py --flaky_test_attempts=3 --test_size_filters=small,medium
->>>>>>> upstream/master
+test:nonpip_filters --test_lang_filters=py --flaky_test_attemps=3 --test_size_filters=small,medium
 test:nonpip --config=nonpip_filters -- //tensorflow/... -//tensorflow/python/integration_testing/... -//tensorflow/compiler/tf2tensorrt/... -//tensorflow/compiler/xrt/... -//tensorflow/core/tpu/... -//tensorflow/lite/... -//tensorflow/tools/toolchains/...
 
 # "nonpip_large" will run tests marked as large as well