diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index 97d4fc9693e20b..a04ff3ecacbd27 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -725,7 +725,7 @@ inline bool isGfx10orGfx11(OpKernelContext* ctx) { result = hipGetDeviceProperties(&props, dev); if (result == hipSuccess) { std::string gcnArchName = props.gcnArchName; - return (gcnArchName.substr(0,5)=="gfx10" || gcnArchName.substr(0,5)=="gfx11"); + return gcnArchName.substr(0,4)=="gfx1"; } return false; } diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h index e659d727f26043..5736da0ca33993 100644 --- a/tensorflow/core/util/gpu_launch_config.h +++ b/tensorflow/core/util/gpu_launch_config.h @@ -203,6 +203,8 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize( #elif TENSORFLOW_USE_ROCM hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor( &block_count, func, fixed_block_size, dynamic_shared_memory_size); + if (block_count < 1) + block_count = 1; CHECK_EQ(err, hipSuccess); #endif block_count = std::min(block_count * d.getNumGpuMultiProcessors(),