diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc index 3b30fded9bab9d..5378872e4a96d3 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc @@ -114,9 +114,8 @@ bool HasFastFP16Support(const DeviceProperties& props) { return GetDeviceGPUArch(props) >= kMinGPUArch; #elif TENSORFLOW_USE_ROCM absl::flat_hash_set FP16SupportedDevices = { - {"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, {"gfx940"}, {"gfx941"}, - {"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"}, {"gfx1100"}, - {"gfx1200"}, {"gfx1201"} + {"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, {"gfx940"}, {"gfx941"}, + {"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"}, {"gfx1100"} }; std::string gcnArchName = props.environment().at("architecture"); std::vector gpu_arch = absl::StrSplit(gcnArchName, ":"); diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc index 8cd7a2aed60299..a4f8f74640647d 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc @@ -72,9 +72,7 @@ inline GpuStats GetNumGPUs(const Cluster& cluster) { compute_capability_it->second == "gfx90a" || compute_capability_it->second == "gfx940" || compute_capability_it->second == "gfx941" || - compute_capability_it->second == "gfx942" || - compute_capability_it->second == "gfx1200" || - compute_capability_it->second == "gfx1201") && is_enabled) { + compute_capability_it->second == "gfx942") && is_enabled) { gpu_stats.num_voltas++; } #endif diff --git a/tensorflow/core/util/gpu_device_functions.h b/tensorflow/core/util/gpu_device_functions.h index be0282ddd18fc0..27341ef0dc2583 100644 --- a/tensorflow/core/util/gpu_device_functions.h +++ b/tensorflow/core/util/gpu_device_functions.h @@ -743,7 +743,7 @@ __device__ inline double GpuAtomicAdd(double* ptr, double value) { } #endif -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ #define ADDRSP1 __attribute__((address_space(1))) __device__ float @@ -963,7 +963,7 @@ __device__ inline int64_t GpuAtomicMin(int64_t* ptr, int64_t value) { } #endif -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ // Low level instructions don't return. For now, assume that return value // is always unused. __device__ float GpuAtomicAdd(float* dst, float val) { @@ -978,7 +978,7 @@ __device__ inline T GpuAtomicAddShared(T* ptr, T value) { return GpuAtomicAdd(ptr, value); } -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ __device__ float GpuAtomicAddShared(float* dst, float val) { atomicAdd(dst, val); return val; diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm index f2c99c0f98bba8..bf13b0f87c531d 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rocm +++ b/tensorflow/tools/ci_build/Dockerfile.rocm @@ -7,7 +7,7 @@ ARG ROCM_DEB_REPO=https://repo.radeon.com/rocm/apt/6.1.2/ ARG ROCM_BUILD_NAME=ubuntu ARG ROCM_BUILD_NUM=main ARG ROCM_PATH=/opt/rocm-6.1.2 -ARG GPU_DEVICE_TARGETS="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100" ARG DEBIAN_FRONTEND=noninteractive ENV TF_NEED_ROCM 1 diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh b/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh index fd4e31d6af24f9..8b3d96228ebab0 100755 --- a/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh +++ b/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh @@ -59,7 +59,7 @@ if [[ "$DISTRO" == "focal" ]] || [[ "$DISTRO" == "jammy" ]]; then ROCM_DEB_REPO=${ROCM_DEB_REPO_HOME}${ROCM_VERS}/ AMDGPU_DEB_REPO=${AMDGPU_DEB_REPO_HOME}${ROCM_VERS}/ - DEBIAN_FRONTEND=noninteractive apt-get --allow-unauthenticated update + DEBIAN_FRONTEND=noninteractive apt-get --allow-unauthenticated update DEBIAN_FRONTEND=noninteractive apt install -y wget software-properties-common DEBIAN_FRONTEND=noninteractive apt-get clean all @@ -136,7 +136,7 @@ elif [[ "$DISTRO" == "el8" ]]; then dnf --enablerepo=extras,epel,elrepo,build_system install -y hipblaslt-devel || true fi -GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201"} +GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"} echo $ROCM_VERSION echo $ROCM_REPO diff --git a/third_party/xla/xla/service/gpu/ir_emitter.cc b/third_party/xla/xla/service/gpu/ir_emitter.cc index 61c0ed4be79c9e..d0401d06052801 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter.cc @@ -264,12 +264,10 @@ void IrEmitter::BindFusionArguments(const HloInstruction* fusion, void IrEmitter::MaybeEmitFenceForAMDGPU(llvm::AtomicOrdering atomic_ordering, const char* sync_scope_id) { if (IsEmittingForAMDGPU() && - (ir_emitter_context_->rocm_compute_capability().gcn_arch_name().substr(0, 6) == "gfx90a" || - ir_emitter_context_->rocm_compute_capability().gcn_arch_name().substr(0, 6) == "gfx940" || - ir_emitter_context_->rocm_compute_capability().gcn_arch_name().substr(0, 6) == "gfx941" || - ir_emitter_context_->rocm_compute_capability().gcn_arch_name().substr(0, 6) == "gfx942" || - ir_emitter_context_->rocm_compute_capability().gcn_arch_name().substr(0, 6) == "gfx1200" || - ir_emitter_context_->rocm_compute_capability().gcn_arch_name().substr(0, 6) == "gfx1201")) { + (ir_emitter_context_->rocm_compute_capability().gcn_arch_name().substr(0, 6) == "gfx90a" || + ir_emitter_context_->rocm_compute_capability().gcn_arch_name().substr(0, 6) == "gfx940" || + ir_emitter_context_->rocm_compute_capability().gcn_arch_name().substr(0, 6) == "gfx941" || + ir_emitter_context_->rocm_compute_capability().gcn_arch_name().substr(0, 6) == "gfx942")) { b_.CreateFence(atomic_ordering, b_.getContext().getOrInsertSyncScopeID(sync_scope_id)); } diff --git a/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 7fe594bcae9381..beacd13fb83bfc 100644 --- a/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -870,7 +870,7 @@ std::string MapGCNArchNameTokenToFeatureStr(const std::string& token, if (token == "sramecc+") { return "+sramecc"; } else if (token == "sramecc-") { - if(gfx == "gfx90a" || gfx == "gfx940" || gfx == "gfx941" || gfx == "gfx942" || gfx == "gfx1200" || gfx == "gfx1201" ) + if(gfx == "gfx90a" || gfx == "gfx940" || gfx == "gfx941" || gfx == "gfx942") return ""; return "-sramecc"; } else if (token == "xnack+") { diff --git a/third_party/xla/xla/stream_executor/device_description.h b/third_party/xla/xla/stream_executor/device_description.h index fd368a44a5c3f7..8ed02e972f67d6 100644 --- a/third_party/xla/xla/stream_executor/device_description.h +++ b/third_party/xla/xla/stream_executor/device_description.h @@ -162,7 +162,7 @@ class RocmComputeCapability { bool has_fast_fp16_support() const { static constexpr absl::string_view kList[] = {"gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", - "gfx1030", "gfx1100", "gfx1200", "gfx1201"}; + "gfx1030", "gfx1100"}; return absl::c_count(kList, gfx_version()) != 0; } @@ -202,8 +202,6 @@ class RocmComputeCapability { "gfx942", // MI300 "gfx1030", // RX68xx / RX69xx "gfx1100" // RX7900 - "gfx1200" // NAVI4x - "gfx1201" // NAVI4x }; };