From f8b84141b215a4b1e5d01758ec486554cbc4c819 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Mon, 28 Oct 2024 10:54:33 -0700 Subject: [PATCH] Modernizing iree_atomic_*. (#18910) C11's _Generic lets us avoid the need for specifying the type in the name and more closely match the C11 atomic syntax. This assumes that any C compiler we have that goes down the disabled atomics path supports _Generic (modern GCC, Clang, and MSVC all have for awhile). This allows us to drop-in replace C11-style atomics (useful in the new AMDGPU backend) and on MSVC will allow us to use their implementation when it's ready (it's way better than the Interlocked solution we have now). --- experimental/webgpu/nop_semaphore.c | 12 +- runtime/src/iree/base/internal/atomics.h | 55 +-- .../src/iree/base/internal/atomics_clang.h | 35 +- .../src/iree/base/internal/atomics_disabled.h | 344 ++++++++++------ runtime/src/iree/base/internal/atomics_gcc.h | 44 ++- runtime/src/iree/base/internal/atomics_msvc.h | 374 ++++++++++++------ .../src/iree/base/internal/atomics_test.cc | 64 +-- .../base/internal/dynamic_library_win32.c | 2 +- .../src/iree/base/internal/synchronization.c | 50 ++- .../src/iree/base/internal/threading_darwin.c | 7 +- .../iree/base/internal/threading_pthreads.c | 12 +- .../src/iree/base/internal/threading_test.cc | 34 +- .../src/iree/base/internal/threading_win32.c | 7 +- .../iree/base/internal/wait_handle_inproc.c | 13 +- .../src/iree/hal/drivers/cuda/memory_pools.c | 16 +- .../src/iree/hal/drivers/hip/memory_pools.c | 16 +- .../src/iree/hal/drivers/metal/shared_event.m | 4 +- .../iree/hal/drivers/metal/staging_buffer.m | 10 +- .../hal/drivers/vulkan/native_semaphore.cc | 9 +- .../hal/local/executable_plugin_manager.c | 11 +- .../src/iree/hal/utils/deferred_work_queue.c | 42 +- runtime/src/iree/hal/utils/file_transfer.c | 4 +- runtime/src/iree/task/affinity_set.h | 8 +- runtime/src/iree/task/executor.c | 10 +- runtime/src/iree/task/executor_demo.cc | 8 +- runtime/src/iree/task/poller.c | 32 +- runtime/src/iree/task/scope.c | 22 +- runtime/src/iree/task/task.c | 78 ++-- runtime/src/iree/task/task_test_dispatch.cc | 7 +- runtime/src/iree/task/worker.c | 22 +- runtime/src/iree/vm/context.c | 4 +- runtime/src/iree/vm/invocation.c | 4 +- runtime/src/iree/vm/ref.c | 34 +- runtime/src/iree/vm/ref_test.cc | 6 +- 34 files changed, 794 insertions(+), 606 deletions(-) diff --git a/experimental/webgpu/nop_semaphore.c b/experimental/webgpu/nop_semaphore.c index d4151ee29990..65d26486567b 100644 --- a/experimental/webgpu/nop_semaphore.c +++ b/experimental/webgpu/nop_semaphore.c @@ -38,8 +38,8 @@ iree_status_t iree_hal_webgpu_nop_semaphore_create( iree_hal_resource_initialize(&iree_hal_webgpu_nop_semaphore_vtable, &semaphore->resource); semaphore->host_allocator = host_allocator; - iree_atomic_store_int64(&semaphore->value, initial_value, - iree_memory_order_seq_cst); + iree_atomic_store(&semaphore->value, initial_value, + iree_memory_order_seq_cst); *out_semaphore = (iree_hal_semaphore_t*)semaphore; } @@ -63,8 +63,7 @@ static iree_status_t iree_hal_webgpu_nop_semaphore_query( iree_hal_semaphore_t* base_semaphore, uint64_t* out_value) { iree_hal_webgpu_nop_semaphore_t* semaphore = iree_hal_webgpu_nop_semaphore_cast(base_semaphore); - *out_value = - iree_atomic_load_int64(&semaphore->value, iree_memory_order_seq_cst); + *out_value = iree_atomic_load(&semaphore->value, iree_memory_order_seq_cst); return iree_ok_status(); } @@ -72,8 +71,7 @@ static iree_status_t iree_hal_webgpu_nop_semaphore_signal( iree_hal_semaphore_t* base_semaphore, uint64_t new_value) { iree_hal_webgpu_nop_semaphore_t* semaphore = iree_hal_webgpu_nop_semaphore_cast(base_semaphore); - iree_atomic_store_int64(&semaphore->value, new_value, - iree_memory_order_seq_cst); + iree_atomic_store(&semaphore->value, new_value, iree_memory_order_seq_cst); return iree_ok_status(); } @@ -88,7 +86,7 @@ static iree_status_t iree_hal_webgpu_nop_semaphore_wait( iree_hal_webgpu_nop_semaphore_t* semaphore = iree_hal_webgpu_nop_semaphore_cast(base_semaphore); uint64_t current_value = - iree_atomic_load_int64(&semaphore->value, iree_memory_order_seq_cst); + iree_atomic_load(&semaphore->value, iree_memory_order_seq_cst); if (current_value < value) { return iree_make_status( IREE_STATUS_FAILED_PRECONDITION, diff --git a/runtime/src/iree/base/internal/atomics.h b/runtime/src/iree/base/internal/atomics.h index 731d9eef510e..f428731506a5 100644 --- a/runtime/src/iree/base/internal/atomics.h +++ b/runtime/src/iree/base/internal/atomics.h @@ -86,47 +86,6 @@ extern "C" { #endif // IREE_COMPILER_* -// If the compiler can automatically determine the types: -#ifdef iree_atomic_load_auto - -#define iree_atomic_load_int32 iree_atomic_load_auto -#define iree_atomic_store_int32 iree_atomic_store_auto -#define iree_atomic_fetch_add_int32 iree_atomic_fetch_add_auto -#define iree_atomic_fetch_sub_int32 iree_atomic_fetch_sub_auto -#define iree_atomic_fetch_and_int32 iree_atomic_fetch_and_auto -#define iree_atomic_fetch_or_int32 iree_atomic_fetch_or_auto -#define iree_atomic_fetch_xor_int32 iree_atomic_fetch_xor_auto -#define iree_atomic_exchange_int32 iree_atomic_exchange_auto -#define iree_atomic_compare_exchange_strong_int32 \ - iree_atomic_compare_exchange_strong_auto -#define iree_atomic_compare_exchange_weak_int32 \ - iree_atomic_compare_exchange_weak_auto - -#define iree_atomic_load_int64 iree_atomic_load_auto -#define iree_atomic_store_int64 iree_atomic_store_auto -#define iree_atomic_fetch_add_int64 iree_atomic_fetch_add_auto -#define iree_atomic_fetch_sub_int64 iree_atomic_fetch_sub_auto -#define iree_atomic_fetch_and_int64 iree_atomic_fetch_and_auto -#define iree_atomic_fetch_or_int64 iree_atomic_fetch_or_auto -#define iree_atomic_fetch_xor_int64 iree_atomic_fetch_xor_auto -#define iree_atomic_exchange_int64 iree_atomic_exchange_auto -#define iree_atomic_compare_exchange_strong_int64 \ - iree_atomic_compare_exchange_strong_auto -#define iree_atomic_compare_exchange_weak_int64 \ - iree_atomic_compare_exchange_weak_auto - -#define iree_atomic_load_intptr iree_atomic_load_auto -#define iree_atomic_store_intptr iree_atomic_store_auto -#define iree_atomic_fetch_add_intptr iree_atomic_fetch_add_auto -#define iree_atomic_fetch_sub_intptr iree_atomic_fetch_sub_auto -#define iree_atomic_exchange_intptr iree_atomic_exchange_auto -#define iree_atomic_compare_exchange_strong_intptr \ - iree_atomic_compare_exchange_strong_auto -#define iree_atomic_compare_exchange_weak_intptr \ - iree_atomic_compare_exchange_weak_auto - -#endif // iree_atomic_load_auto - //============================================================================== // Reference count atomics //============================================================================== @@ -140,10 +99,10 @@ typedef iree_atomic_int32_t iree_atomic_ref_count_t; // should use IREE_ATOMIC_VAR_INIT, but apparently this has to be fixed // at call sites (where the variables are initialized in the first place). #define iree_atomic_ref_count_init_value(count_ptr, value) \ - iree_atomic_store_int32(count_ptr, value, iree_memory_order_relaxed) + iree_atomic_store((count_ptr), (value), iree_memory_order_relaxed) #define iree_atomic_ref_count_init(count_ptr) \ - iree_atomic_ref_count_init_value(count_ptr, 1) + iree_atomic_ref_count_init_value((count_ptr), 1) // Why relaxed order: // https://www.boost.org/doc/libs/1_57_0/doc/html/atomic/usage_examples.html#boost_atomic.usage_examples.example_reference_counters.discussion @@ -155,9 +114,9 @@ typedef iree_atomic_int32_t iree_atomic_ref_count_t; // value (unlike iree_atomic_ref_count_dec), so we make sure that it does not, // which allows the implementation to use faster atomic instructions where // available, e.g. STADD on ARMv8.1-a. -#define iree_atomic_ref_count_inc(count_ptr) \ - do { \ - iree_atomic_fetch_add_int32(count_ptr, 1, iree_memory_order_relaxed); \ +#define iree_atomic_ref_count_inc(count_ptr) \ + do { \ + iree_atomic_fetch_add((count_ptr), 1, iree_memory_order_relaxed); \ } while (false) // For now we stick to acq_rel order. TODO: should we follow Boost's advice? @@ -169,13 +128,13 @@ typedef iree_atomic_int32_t iree_atomic_ref_count_t; // may be a pessimization... I would like to hear a second opinion on this, // particularly regarding how x86-centric this might be. #define iree_atomic_ref_count_dec(count_ptr) \ - iree_atomic_fetch_sub_int32(count_ptr, 1, iree_memory_order_acq_rel) + iree_atomic_fetch_sub((count_ptr), 1, iree_memory_order_acq_rel) // memory_order_acquire order ensures that this sees decrements from // iree_atomic_ref_count_dec. On the other hand, there is no ordering with // iree_atomic_ref_count_inc. #define iree_atomic_ref_count_load(count_ptr) \ - iree_atomic_load_int32(count_ptr, iree_memory_order_acquire) + iree_atomic_load((count_ptr), iree_memory_order_acquire) // Aborts the program if the given reference count value is not 1. // This should be avoided in all situations but those where continuing execution diff --git a/runtime/src/iree/base/internal/atomics_clang.h b/runtime/src/iree/base/internal/atomics_clang.h index 44514e05c742..afa7a3352017 100644 --- a/runtime/src/iree/base/internal/atomics_clang.h +++ b/runtime/src/iree/base/internal/atomics_clang.h @@ -33,37 +33,38 @@ typedef enum iree_memory_order_e { typedef _Atomic int32_t iree_atomic_int32_t; typedef _Atomic int64_t iree_atomic_int64_t; +typedef _Atomic uint32_t iree_atomic_uint32_t; +typedef _Atomic uint64_t iree_atomic_uint64_t; // TODO(#3453): check for __int128 support before using // typedef _Atomic __int128 iree_atomic_int128_t; typedef _Atomic intptr_t iree_atomic_intptr_t; -#define iree_atomic_load_auto(object, order) \ - __c11_atomic_load((object), (order)) -#define iree_atomic_store_auto(object, desired, order) \ +#define iree_atomic_thread_fence(order) __c11_atomic_thread_fence(order) + +#define iree_atomic_load(object, order) __c11_atomic_load((object), (order)) +#define iree_atomic_store(object, desired, order) \ __c11_atomic_store((object), (desired), (order)) -#define iree_atomic_fetch_add_auto(object, operand, order) \ +#define iree_atomic_fetch_add(object, operand, order) \ __c11_atomic_fetch_add((object), (operand), (order)) -#define iree_atomic_fetch_sub_auto(object, operand, order) \ +#define iree_atomic_fetch_sub(object, operand, order) \ __c11_atomic_fetch_sub((object), (operand), (order)) -#define iree_atomic_fetch_and_auto(object, operand, order) \ +#define iree_atomic_fetch_and(object, operand, order) \ __c11_atomic_fetch_and((object), (operand), (order)) -#define iree_atomic_fetch_or_auto(object, operand, order) \ +#define iree_atomic_fetch_or(object, operand, order) \ __c11_atomic_fetch_or((object), (operand), (order)) -#define iree_atomic_fetch_xor_auto(object, operand, order) \ +#define iree_atomic_fetch_xor(object, operand, order) \ __c11_atomic_fetch_xor((object), (operand), (order)) -#define iree_atomic_exchange_auto(object, operand, order) \ +#define iree_atomic_exchange(object, operand, order) \ __c11_atomic_exchange((object), (operand), (order)) -#define iree_atomic_compare_exchange_strong_auto(object, expected, desired, \ - order_succ, order_fail) \ - __c11_atomic_compare_exchange_strong((object), (expected), (desired), \ +#define iree_atomic_compare_exchange_strong(object, expected, desired, \ + order_succ, order_fail) \ + __c11_atomic_compare_exchange_strong((object), (expected), (desired), \ (order_succ), (order_fail)) -#define iree_atomic_compare_exchange_weak_auto(object, expected, desired, \ - order_succ, order_fail) \ - __c11_atomic_compare_exchange_weak((object), (expected), (desired), \ +#define iree_atomic_compare_exchange_weak(object, expected, desired, \ + order_succ, order_fail) \ + __c11_atomic_compare_exchange_weak((object), (expected), (desired), \ (order_succ), (order_fail)) -#define iree_atomic_thread_fence(order) __c11_atomic_thread_fence(order) - #ifdef __cplusplus } // extern "C" #endif diff --git a/runtime/src/iree/base/internal/atomics_disabled.h b/runtime/src/iree/base/internal/atomics_disabled.h index 5c0a7cad6ff5..5dbb272f4748 100644 --- a/runtime/src/iree/base/internal/atomics_disabled.h +++ b/runtime/src/iree/base/internal/atomics_disabled.h @@ -16,12 +16,8 @@ #if IREE_SYNCHRONIZATION_DISABLE_UNSAFE -#ifdef __cplusplus -extern "C" { -#endif - typedef enum iree_memory_order_e { - iree_memory_order_relaxed, + iree_memory_order_relaxed = 0u, iree_memory_order_consume, iree_memory_order_acquire, iree_memory_order_release, @@ -33,65 +29,197 @@ typedef enum iree_memory_order_e { typedef int32_t iree_atomic_int32_t; typedef int64_t iree_atomic_int64_t; +typedef uint32_t iree_atomic_uint32_t; +typedef uint64_t iree_atomic_uint64_t; // TODO(#3453): check for __int128 support before using // typedef __int128 iree_atomic_int128_t; typedef intptr_t iree_atomic_intptr_t; -#define iree_atomic_load_int32(object, order) (*(object)) -#define iree_atomic_store_int32(object, desired, order) (*(object) = (desired)) -#define iree_atomic_fetch_add_int32(object, operand, order) \ - iree_atomic_fetch_add_int32_impl((volatile iree_atomic_int32_t*)(object), \ - (int32_t)(operand)) -#define iree_atomic_fetch_sub_int32(object, operand, order) \ - iree_atomic_fetch_add_int32_impl((volatile iree_atomic_int32_t*)(object), \ - -(int32_t)(operand)) -#define iree_atomic_fetch_and_int32(object, operand, order) \ - iree_atomic_fetch_and_int32_impl((volatile iree_atomic_int32_t*)(object), \ - (int32_t)(operand)) -#define iree_atomic_fetch_or_int32(object, operand, order) \ - iree_atomic_fetch_or_int32_impl((volatile iree_atomic_int32_t*)(object), \ - (int32_t)(operand)) -#define iree_atomic_fetch_xor_int32(object, operand, order) \ - iree_atomic_fetch_xor_int32_impl((volatile iree_atomic_int32_t*)(object), \ - (int32_t)(operand)) -#define iree_atomic_exchange_int32(object, desired, order) \ - iree_atomic_fetch_exchange_int32_impl( \ - (volatile iree_atomic_int32_t*)(object), (int32_t)(desired)) -#define iree_atomic_compare_exchange_strong_int32(object, expected, desired, \ - order_succ, order_fail) \ - iree_atomic_compare_exchange_int32_impl( \ - (volatile iree_atomic_int32_t*)(object), (int32_t*)(expected), \ - (int32_t)(desired)) -#define iree_atomic_compare_exchange_weak_int32 \ - iree_atomic_compare_exchange_strong_int32 - -#define iree_atomic_load_int64(object, order) (*(object)) -#define iree_atomic_store_int64(object, desired, order) (*(object) = (desired)) -#define iree_atomic_fetch_add_int64(object, operand, order) \ - iree_atomic_fetch_add_int64_impl((volatile iree_atomic_int64_t*)(object), \ - (int64_t)(operand)) -#define iree_atomic_fetch_sub_int64(object, operand, order) \ - iree_atomic_fetch_add_int64_impl((volatile iree_atomic_int64_t*)(object), \ - -(int64_t)(operand)) -#define iree_atomic_fetch_and_int64(object, operand, order) \ - iree_atomic_fetch_and_int64_impl((volatile iree_atomic_int64_t*)(object), \ - (int64_t)(operand)) -#define iree_atomic_fetch_or_int64(object, operand, order) \ - iree_atomic_fetch_or_int64_impl((volatile iree_atomic_int64_t*)(object), \ - (int64_t)(operand)) -#define iree_atomic_fetch_xor_int64(object, operand, order) \ - iree_atomic_fetch_xor_int64_impl((volatile iree_atomic_int64_t*)(object), \ - (int64_t)(operand)) -#define iree_atomic_exchange_int64(object, desired, order) \ - iree_atomic_fetch_exchange_int64_impl( \ - (volatile iree_atomic_int64_t*)(object), (int64_t)(desired)) -#define iree_atomic_compare_exchange_strong_int64(object, expected, desired, \ - order_succ, order_fail) \ - iree_atomic_compare_exchange_int64_impl( \ - (volatile iree_atomic_int64_t*)(object), (int64_t*)(expected), \ - (int64_t)(desired)) -#define iree_atomic_compare_exchange_weak_int64 \ - iree_atomic_compare_exchange_strong_int64 +#define iree_atomic_thread_fence(order) + +#ifdef __cplusplus + +extern "C++" { + +#define iree_atomic_load(object, order) (*(object)) +#define iree_atomic_store(object, desired, order) (*(object) = (desired)) +#define iree_atomic_fetch_add(object, operand, order) \ + iree_atomic_fetch_add_impl((object), (operand)) +#define iree_atomic_fetch_sub(object, operand, order) \ + iree_atomic_fetch_sub_impl((object), (operand)) +#define iree_atomic_fetch_and(object, operand, order) \ + iree_atomic_fetch_and_impl((object), (operand)) +#define iree_atomic_fetch_or(object, operand, order) \ + iree_atomic_fetch_or_impl((object), (operand)) +#define iree_atomic_fetch_xor(object, operand, order) \ + iree_atomic_fetch_xor_impl((object), (operand)) +#define iree_atomic_exchange(object, desired, order) \ + iree_atomic_fetch_exchange_impl((object), (desired)) +#define iree_atomic_compare_exchange_strong(object, expected, desired, \ + order_succ, order_fail) \ + iree_atomic_compare_exchange_impl((object), (expected), (desired)) +#define iree_atomic_compare_exchange_weak iree_atomic_compare_exchange_strong + +template +static inline T iree_atomic_fetch_add_impl(volatile T* object, V operand) { + T original = *object; + *object += operand; + return original; +} + +template +static inline T iree_atomic_fetch_sub_impl(volatile T* object, V operand) { + T original = *object; + *object -= operand; + return original; +} + +template +static inline T iree_atomic_fetch_and_impl(volatile T* object, V operand) { + T original = *object; + *object &= operand; + return original; +} + +template +static inline T iree_atomic_fetch_or_impl(volatile T* object, V operand) { + T original = *object; + *object |= operand; + return original; +} + +template +static inline T iree_atomic_fetch_xor_impl(volatile T* object, V operand) { + T original = *object; + *object ^= operand; + return original; +} + +template +static inline T iree_atomic_fetch_exchange_impl(volatile T* object, V desired) { + T original = *object; + *object = desired; + return original; +} + +template +static inline bool iree_atomic_compare_exchange_impl(volatile T* object, + V* expected, V desired) { + if (*object == *expected) { + *object = desired; + return true; + } else { + *expected = *object; + return false; + } +} + +} // extern "C" + +#else + +#define iree_atomic_load(object, order) (*(object)) +#define iree_atomic_store(object, desired, order) (*(object) = (desired)) +#define iree_atomic_fetch_add(object, operand, order) \ + _Generic((object), \ + iree_atomic_int32_t *: iree_atomic_fetch_add_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_int64_t *: iree_atomic_fetch_add_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t)(operand)), \ + iree_atomic_uint32_t *: iree_atomic_fetch_add_uint32_impl( \ + (volatile iree_atomic_uint32_t*)(object), \ + (uint32_t)(operand)), \ + iree_atomic_uint64_t *: iree_atomic_fetch_add_uint64_impl( \ + (volatile iree_atomic_uint64_t*)(object), \ + (uint64_t)(operand))) +#define iree_atomic_fetch_sub(object, operand, order) \ + _Generic((object), \ + iree_atomic_int32_t *: iree_atomic_fetch_sub_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_int64_t *: iree_atomic_fetch_sub_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t)(operand)), \ + iree_atomic_uint32_t *: iree_atomic_fetch_sub_uint32_impl( \ + (volatile iree_atomic_uint32_t*)(object), \ + (uint32_t)(operand)), \ + iree_atomic_uint64_t *: iree_atomic_fetch_sub_uint64_impl( \ + (volatile iree_atomic_uint64_t*)(object), \ + (uint64_t)(operand))) +#define iree_atomic_fetch_and(object, operand, order) \ + _Generic((object), \ + iree_atomic_int32_t *: iree_atomic_fetch_and_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_int64_t *: iree_atomic_fetch_and_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t)(operand)), \ + iree_atomic_uint32_t *: iree_atomic_fetch_and_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_uint64_t *: iree_atomic_fetch_and_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t)(operand))) +#define iree_atomic_fetch_or(object, operand, order) \ + _Generic((object), \ + iree_atomic_int32_t *: iree_atomic_fetch_or_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_int64_t *: iree_atomic_fetch_or_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t)(operand)), \ + iree_atomic_uint32_t *: iree_atomic_fetch_or_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_uint64_t *: iree_atomic_fetch_or_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t)(operand))) +#define iree_atomic_fetch_xor(object, operand, order) \ + _Generic((object), \ + iree_atomic_int32_t *: iree_atomic_fetch_xor_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_int64_t *: iree_atomic_fetch_xor_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t)(operand)), \ + iree_atomic_uint32_t *: iree_atomic_fetch_xor_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_uint64_t *: iree_atomic_fetch_xor_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t)(operand))) +#define iree_atomic_exchange(object, desired, order) \ + _Generic((object), \ + iree_atomic_int32_t *: iree_atomic_fetch_exchange_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t)(desired)), \ + iree_atomic_int64_t *: iree_atomic_fetch_exchange_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t)(desired)), \ + iree_atomic_uint32_t *: iree_atomic_fetch_exchange_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t)(desired)), \ + iree_atomic_uint64_t *: iree_atomic_fetch_exchange_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t)(desired))) +#define iree_atomic_compare_exchange_strong(object, expected, desired, \ + order_succ, order_fail) \ + _Generic((object), \ + iree_atomic_int32_t *: iree_atomic_compare_exchange_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t*)(expected), (int32_t)(desired)), \ + iree_atomic_int64_t *: iree_atomic_compare_exchange_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t*)(expected), (int64_t)(desired)), \ + iree_atomic_uint32_t *: iree_atomic_compare_exchange_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t*)(expected), (int32_t)(desired)), \ + iree_atomic_uint64_t *: iree_atomic_compare_exchange_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t*)(expected), (int64_t)(desired))) +#define iree_atomic_compare_exchange_weak iree_atomic_compare_exchange_strong static inline int32_t iree_atomic_fetch_add_int32_impl( volatile iree_atomic_int32_t* object, int32_t operand) { @@ -100,6 +228,27 @@ static inline int32_t iree_atomic_fetch_add_int32_impl( return original; } +static inline int32_t iree_atomic_fetch_sub_int32_impl( + volatile iree_atomic_int32_t* object, int32_t operand) { + int32_t original = *object; + *object -= operand; + return original; +} + +static inline int32_t iree_atomic_fetch_add_uint32_impl( + volatile iree_atomic_int32_t* object, uint32_t operand) { + uint32_t original = *object; + *object += operand; + return original; +} + +static inline int32_t iree_atomic_fetch_sub_uint32_impl( + volatile iree_atomic_uint32_t* object, uint32_t operand) { + uint32_t original = *object; + *object -= operand; + return original; +} + static inline int32_t iree_atomic_fetch_and_int32_impl( volatile iree_atomic_int32_t* object, int32_t operand) { int32_t original = *object; @@ -146,6 +295,27 @@ static inline int64_t iree_atomic_fetch_add_int64_impl( return original; } +static inline int64_t iree_atomic_fetch_sub_int64_impl( + volatile iree_atomic_int64_t* object, int64_t operand) { + int64_t original = *object; + *object -= operand; + return original; +} + +static inline int64_t iree_atomic_fetch_add_uint64_impl( + volatile iree_atomic_uint64_t* object, uint64_t operand) { + uint64_t original = *object; + *object += operand; + return original; +} + +static inline int64_t iree_atomic_fetch_sub_uint64_impl( + volatile iree_atomic_uint64_t* object, uint64_t operand) { + uint64_t original = *object; + *object -= operand; + return original; +} + static inline int64_t iree_atomic_fetch_and_int64_impl( volatile iree_atomic_int64_t* object, int64_t operand) { int64_t original = *object; @@ -185,59 +355,7 @@ static inline bool iree_atomic_compare_exchange_int64_impl( } } -// There are no pointer-width atomic ops in MSVC so we need to specialize based -// on the pointer size. -#if defined(IREE_PTR_SIZE_32) -#define iree_atomic_load_intptr(object, order) \ - (intptr_t) iree_atomic_load_int32((iree_atomic_int32_t*)(object), (order)) -#define iree_atomic_store_intptr(object, desired, order) \ - (intptr_t) iree_atomic_store_int32((iree_atomic_int32_t*)(object), \ - (int32_t)(desired), (order)) -#define iree_atomic_fetch_add_intptr(object, operand, order) \ - (intptr_t) iree_atomic_fetch_add_int32((iree_atomic_int32_t*)(object), \ - (int32_t)(operand), (order)) -#define iree_atomic_fetch_sub_intptr(object, operand, order) \ - (intptr_t) iree_atomic_fetch_sub_int32((iree_atomic_int32_t*)(object), \ - (int32_t)(operand), (order)) -#define iree_atomic_exchange_intptr(object, desired, order) \ - (intptr_t) iree_atomic_exchange_int32((iree_atomic_int32_t*)(object), \ - (int32_t)(desired), (order)) -#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \ - order_succ, order_fail) \ - iree_atomic_compare_exchange_strong_int32( \ - (iree_atomic_int32_t*)(object), (int32_t*)(expected), \ - (int32_t)(desired), (order_succ), (order_fail)) -#define iree_atomic_compare_exchange_weak_intptr \ - iree_atomic_compare_exchange_strong_intptr -#else -#define iree_atomic_load_intptr(object, order) \ - (intptr_t) iree_atomic_load_int64((iree_atomic_int64_t*)(object), (order)) -#define iree_atomic_store_intptr(object, desired, order) \ - (intptr_t) iree_atomic_store_int64((iree_atomic_int64_t*)(object), \ - (int64_t)(desired), (order)) -#define iree_atomic_fetch_add_intptr(object, operand, order) \ - (intptr_t) iree_atomic_fetch_add_int64((iree_atomic_int64_t*)(object), \ - (int64_t)(operand), (order)) -#define iree_atomic_fetch_sub_intptr(object, operand, order) \ - (intptr_t) iree_atomic_fetch_sub_int64((iree_atomic_int64_t*)(object), \ - (int64_t)(operand), (order)) -#define iree_atomic_exchange_intptr(object, desired, order) \ - (intptr_t) iree_atomic_exchange_int64((iree_atomic_int64_t*)(object), \ - (int64_t)(desired), (order)) -#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \ - order_succ, order_fail) \ - iree_atomic_compare_exchange_strong_int64( \ - (iree_atomic_int64_t*)(object), (int64_t*)(expected), \ - (int64_t)(desired), (order_succ), (order_fail)) -#define iree_atomic_compare_exchange_weak_intptr \ - iree_atomic_compare_exchange_strong_intptr -#endif // IREE_PTR_SIZE_32 - -#define iree_atomic_thread_fence(order) - -#ifdef __cplusplus -} // extern "C" -#endif +#endif // __cplusplus #endif // IREE_SYNCHRONIZATION_DISABLE_UNSAFE diff --git a/runtime/src/iree/base/internal/atomics_gcc.h b/runtime/src/iree/base/internal/atomics_gcc.h index d413b9816253..728add728612 100644 --- a/runtime/src/iree/base/internal/atomics_gcc.h +++ b/runtime/src/iree/base/internal/atomics_gcc.h @@ -34,6 +34,8 @@ typedef enum iree_memory_order_e { typedef int32_t iree_atomic_int32_t; typedef int64_t iree_atomic_int64_t; +typedef uint32_t iree_atomic_uint32_t; +typedef uint64_t iree_atomic_uint64_t; // typedef __int128 iree_atomic_int128_t; typedef intptr_t iree_atomic_intptr_t; @@ -45,47 +47,47 @@ typedef intptr_t iree_atomic_intptr_t; #define __iree_auto_type __auto_type #endif -#define iree_atomic_load_auto(object, order) \ +static inline void iree_atomic_thread_fence(int order) { + // Ignore error where TSan does not support atomic thread fence. + IREE_DISABLE_COMPILER_TSAN_ERRORS() + __atomic_thread_fence(order); + IREE_RESTORE_COMPILER_TSAN_ERRORS() +} + +#define iree_atomic_load(object, order) \ __extension__({ \ __iree_auto_type __atomic_load_ptr = (object); \ __typeof__(*__atomic_load_ptr) __atomic_load_tmp; \ __atomic_load(__atomic_load_ptr, &__atomic_load_tmp, (order)); \ __atomic_load_tmp; \ }) -#define iree_atomic_store_auto(object, desired, order) \ +#define iree_atomic_store(object, desired, order) \ __extension__({ \ __iree_auto_type __atomic_store_ptr = (object); \ __typeof__(*__atomic_store_ptr) __atomic_store_tmp = (desired); \ __atomic_store(__atomic_store_ptr, &__atomic_store_tmp, (order)); \ }) -#define iree_atomic_fetch_add_auto(object, operand, order) \ +#define iree_atomic_fetch_add(object, operand, order) \ __atomic_fetch_add((object), (operand), (order)) -#define iree_atomic_fetch_sub_auto(object, operand, order) \ +#define iree_atomic_fetch_sub(object, operand, order) \ __atomic_fetch_sub((object), (operand), (order)) -#define iree_atomic_fetch_and_auto(object, operand, order) \ +#define iree_atomic_fetch_and(object, operand, order) \ __atomic_fetch_and((object), (operand), (order)) -#define iree_atomic_fetch_or_auto(object, operand, order) \ +#define iree_atomic_fetch_or(object, operand, order) \ __atomic_fetch_or((object), (operand), (order)) -#define iree_atomic_fetch_xor_auto(object, operand, order) \ +#define iree_atomic_fetch_xor(object, operand, order) \ __atomic_fetch_xor((object), (operand), (order)) -#define iree_atomic_exchange_auto(object, operand, order) \ +#define iree_atomic_exchange(object, operand, order) \ __atomic_exchange_n((object), (operand), (order)) -#define iree_atomic_compare_exchange_strong_auto(object, expected, desired, \ - order_succ, order_fail) \ - __atomic_compare_exchange_n(object, expected, desired, /*weak=*/false, \ +#define iree_atomic_compare_exchange_strong(object, expected, desired, \ + order_succ, order_fail) \ + __atomic_compare_exchange_n(object, expected, desired, /*weak=*/false, \ (order_succ), (order_fail)) -#define iree_atomic_compare_exchange_weak_auto(object, expected, desired, \ - order_succ, order_fail) \ - __atomic_compare_exchange_n(object, expected, desired, /*weak=*/true, \ +#define iree_atomic_compare_exchange_weak(object, expected, desired, \ + order_succ, order_fail) \ + __atomic_compare_exchange_n(object, expected, desired, /*weak=*/true, \ (order_succ), (order_fail)) -static inline void iree_atomic_thread_fence(int order) { - // Ignore error where TSan does not support atomic thread fence. - IREE_DISABLE_COMPILER_TSAN_ERRORS() - __atomic_thread_fence(order); - IREE_RESTORE_COMPILER_TSAN_ERRORS() -} - #ifdef __cplusplus } // extern "C" #endif diff --git a/runtime/src/iree/base/internal/atomics_msvc.h b/runtime/src/iree/base/internal/atomics_msvc.h index 5cfbf43eb3f6..2af2798c0a13 100644 --- a/runtime/src/iree/base/internal/atomics_msvc.h +++ b/runtime/src/iree/base/internal/atomics_msvc.h @@ -16,12 +16,141 @@ #if defined(IREE_COMPILER_MSVC) -#ifdef __cplusplus +// TODO(benvanik): make MSVC's C11 atomic support work. +// It's difficult to detect and has some weird configuration assertions around +// mixed C and C++ code. Support is only present when the +// `/experimental:c11atomics` but that is ignored on /TP (C++) compilation. +// __STDC_NO_ATOMICS__ is not unset when included/enabled so we can't use the +// standard check. Hopefully that'd be fixed if it ever leaves experimental. +#define IREE_ATOMIC_USE_MSVC_C11 0 +#if IREE_ATOMIC_USE_MSVC_C11 +#include +#endif // IREE_ATOMIC_USE_MSVC_C11 + +#if IREE_ATOMIC_USE_MSVC_C11 && defined(atomic_init) + +typedef enum iree_memory_order_e { + iree_memory_order_relaxed = _Atomic_memory_order_relaxed, + iree_memory_order_consume = _Atomic_memory_order_consume, + iree_memory_order_acquire = _Atomic_memory_order_acquire, + iree_memory_order_release = _Atomic_memory_order_release, + iree_memory_order_acq_rel = _Atomic_memory_order_acq_rel, + iree_memory_order_seq_cst = _Atomic_memory_order_seq_cst, +} iree_memory_order_t; + +#define IREE_ATOMIC_VAR_INIT(value) (value) + +typedef _Atomic int32_t iree_atomic_int32_t; +typedef _Atomic int64_t iree_atomic_int64_t; +typedef _Atomic uint32_t iree_atomic_uint32_t; +typedef _Atomic uint64_t iree_atomic_uint64_t; +// TODO(#3453): check for __int128 support before using +// typedef _Atomic __int128 iree_atomic_int128_t; +typedef _Atomic intptr_t iree_atomic_intptr_t; + +#define iree_atomic_thread_fence(order) atomic_thread_fence(order) + +#define iree_atomic_load(object, order) __c11_atomic_load((object), (order)) +#define iree_atomic_store(object, desired, order) \ + __c11_atomic_store((object), (desired), (order)) +#define iree_atomic_fetch_add(object, operand, order) \ + __c11_atomic_fetch_add((object), (operand), (order)) +#define iree_atomic_fetch_sub(object, operand, order) \ + __c11_atomic_fetch_sub((object), (operand), (order)) +#define iree_atomic_fetch_and(object, operand, order) \ + __c11_atomic_fetch_and((object), (operand), (order)) +#define iree_atomic_fetch_or(object, operand, order) \ + __c11_atomic_fetch_or((object), (operand), (order)) +#define iree_atomic_fetch_xor(object, operand, order) \ + __c11_atomic_fetch_xor((object), (operand), (order)) +#define iree_atomic_exchange(object, operand, order) \ + __c11_atomic_exchange((object), (operand), (order)) +#define iree_atomic_compare_exchange_strong(object, expected, desired, \ + order_succ, order_fail) \ + __c11_atomic_compare_exchange_strong((object), (expected), (desired), \ + (order_succ), (order_fail)) +#define iree_atomic_compare_exchange_weak(object, expected, desired, \ + order_succ, order_fail) \ + __c11_atomic_compare_exchange_weak((object), (expected), (desired), \ + (order_succ), (order_fail)) + +#elif __cplusplus + +// When compiling for C++ we reinterpret atomics as std::atomic. This relies +// on std::atomic on primitive types being lock-free such that the memory for +// each atomic is just the atomic value. We need this special path because MSVC +// doesn't support C features like _Generic in C++. + +extern "C++" { +#include +} // extern "C++" + extern "C" { -#endif typedef enum iree_memory_order_e { - iree_memory_order_relaxed, + iree_memory_order_relaxed = _Atomic_memory_order_relaxed, + iree_memory_order_consume = _Atomic_memory_order_consume, + iree_memory_order_acquire = _Atomic_memory_order_acquire, + iree_memory_order_release = _Atomic_memory_order_release, + iree_memory_order_acq_rel = _Atomic_memory_order_acq_rel, + iree_memory_order_seq_cst = _Atomic_memory_order_seq_cst, +} iree_memory_order_t; + +#define IREE_ATOMIC_VAR_INIT(value) (value) + +typedef std::atomic iree_atomic_int32_t; +typedef std::atomic iree_atomic_int64_t; +typedef std::atomic iree_atomic_uint32_t; +typedef std::atomic iree_atomic_uint64_t; +typedef std::atomic iree_atomic_intptr_t; + +#define iree_atomic_thread_fence(order) std::atomic_thread_fence(order) + +#define iree_atomic_load(object, order) \ + std::atomic_load_explicit((object), (std::memory_order)(order)) +#define iree_atomic_store(object, desired, order) \ + std::atomic_store_explicit((object), (desired), (std::memory_order)(order)) +#define iree_atomic_fetch_add(object, operand, order) \ + std::atomic_fetch_add_explicit((object), (operand), \ + (std::memory_order)(order)) +#define iree_atomic_fetch_sub(object, operand, order) \ + std::atomic_fetch_sub_explicit((object), (operand), \ + (std::memory_order)(order)) +#define iree_atomic_fetch_and(object, operand, order) \ + std::atomic_fetch_and_explicit((object), (operand), \ + (std::memory_order)(order)) +#define iree_atomic_fetch_or(object, operand, order) \ + std::atomic_fetch_or_explicit((object), (operand), (std::memory_order)(order)) +#define iree_atomic_fetch_xor(object, operand, order) \ + std::atomic_fetch_xor_explicit((object), (operand), \ + (std::memory_order)(order)) +#define iree_atomic_exchange(object, operand, order) \ + std::atomic_exchange_explicit((object), (operand), (std::memory_order)(order)) +#define iree_atomic_compare_exchange_strong(object, expected, desired, \ + order_succ, order_fail) \ + std::atomic_compare_exchange_strong_explicit( \ + (object), (expected), (desired), (std::memory_order)(order_succ), \ + (std::memory_order)(order_fail)) +#define iree_atomic_compare_exchange_weak(object, expected, desired, \ + order_succ, order_fail) \ + std::atomic_compare_exchange_weak_explicit((object), (expected), (desired), \ + (std::memory_order)(order_succ), \ + (std::memory_order)(order_fail)) + +} // extern "C" + +#else + +// When compiling in C we can use _Generic to automatically route to the +// builtins that change their name based on the atomic type. This implementation +// is not good: it ignores memory order entirely and uses the full barrier +// implied by any of the _Interlocked* builtins. There are some variants of the +// builtins that we could use based on the order but their support across +// targets differs. Hopefully ~soon we can use C11 atomics directly and drop +// this code path. + +typedef enum iree_memory_order_e { + iree_memory_order_relaxed = 0u, iree_memory_order_consume, iree_memory_order_acquire, iree_memory_order_release, @@ -29,72 +158,131 @@ typedef enum iree_memory_order_e { iree_memory_order_seq_cst, } iree_memory_order_t; -#define IREE_ATOMIC_VAR_INIT(value) \ - { (value) } - -typedef struct { - int32_t __val; -} iree_atomic_int32_t; -typedef struct { - int64_t __val; -} iree_atomic_int64_t; -// typedef __declspec(align(16)) struct { -// uint64_t __val[2]; -// } iree_atomic_int128_t; -typedef struct { - intptr_t __val; -} iree_atomic_intptr_t; - -#define iree_atomic_load_int32(object, order) \ - InterlockedExchangeAdd((volatile LONG*)object, 0) -#define iree_atomic_store_int32(object, desired, order) \ - InterlockedExchange((volatile LONG*)object, desired) -#define iree_atomic_fetch_add_int32(object, operand, order) \ - InterlockedExchangeAdd((volatile LONG*)object, operand) -#define iree_atomic_fetch_sub_int32(object, operand, order) \ - InterlockedExchangeAdd((volatile LONG*)object, -((int32_t)(operand))) -#define iree_atomic_fetch_and_int32(object, operand, order) \ - InterlockedAnd((volatile LONG*)object, operand) -#define iree_atomic_fetch_or_int32(object, operand, order) \ - InterlockedOr((volatile LONG*)object, operand) -#define iree_atomic_fetch_xor_int32(object, operand, order) \ - InterlockedXor((volatile LONG*)object, operand) -#define iree_atomic_exchange_int32(object, desired, order) \ - InterlockedExchange((volatile LONG*)object, desired) -#define iree_atomic_compare_exchange_strong_int32(object, expected, desired, \ - order_succ, order_fail) \ - iree_atomic_compare_exchange_strong_int32_impl( \ - (volatile iree_atomic_int32_t*)(object), (int32_t*)(expected), \ - (int32_t)(desired), (order_succ), (order_fail)) -#define iree_atomic_compare_exchange_weak_int32 \ - iree_atomic_compare_exchange_strong_int32 - -#define iree_atomic_load_int64(object, order) \ - InterlockedExchangeAdd64((volatile LONG64*)object, 0) -#define iree_atomic_store_int64(object, desired, order) \ - InterlockedExchange64((volatile LONG64*)object, (LONG64)desired) -#define iree_atomic_fetch_add_int64(object, operand, order) \ - InterlockedExchangeAdd64((volatile LONG64*)object, (LONG64)operand) -#define iree_atomic_fetch_sub_int64(object, operand, order) \ - InterlockedExchangeAdd64((volatile LONG64*)object, -(operand)) -#define iree_atomic_fetch_and_int64(object, operand, order) \ - InterlockedAnd64((volatile LONG64*)object, operand) -#define iree_atomic_fetch_or_int64(object, operand, order) \ - InterlockedOr64((volatile LONG64*)object, operand) -#define iree_atomic_fetch_xor_int64(object, operand, order) \ - InterlockedXor64((volatile LONG64*)object, operand) -#define iree_atomic_exchange_int64(object, desired, order) \ - InterlockedExchange64((volatile LONG64*)object, desired) -#define iree_atomic_compare_exchange_strong_int64(object, expected, desired, \ - order_succ, order_fail) \ - iree_atomic_compare_exchange_strong_int64_impl( \ - (volatile iree_atomic_int64_t*)(object), (int64_t*)(expected), \ - (int64_t)(desired), (order_succ), (order_fail)) -#define iree_atomic_compare_exchange_weak_int64 \ - iree_atomic_compare_exchange_strong_int64 +#define IREE_ATOMIC_VAR_INIT(value) (value) + +typedef int32_t iree_atomic_int32_t; +typedef int64_t iree_atomic_int64_t; +typedef uint32_t iree_atomic_uint32_t; +typedef uint64_t iree_atomic_uint64_t; +typedef intptr_t iree_atomic_intptr_t; #define iree_atomic_thread_fence(order) MemoryBarrier() +#define iree_atomic_load(object, order) \ + _Generic((object), \ + iree_atomic_int32_t *: _InterlockedExchangeAdd( \ + (volatile int32_t*)(object), 0), \ + iree_atomic_int64_t *: _InterlockedExchangeAdd64( \ + (volatile int64_t*)(object), 0), \ + iree_atomic_uint32_t *: _InterlockedExchangeAdd( \ + (volatile int32_t*)(object), 0), \ + iree_atomic_uint64_t *: _InterlockedExchangeAdd64( \ + (volatile int64_t*)(object), 0)) +#define iree_atomic_store(object, desired, order) \ + _Generic((object), \ + iree_atomic_int32_t *: _InterlockedExchange((volatile int32_t*)(object), \ + (int32_t)(desired)), \ + iree_atomic_int64_t *: _InterlockedExchange64( \ + (volatile int64_t*)(object), \ + (int64_t)(desired)), \ + iree_atomic_uint32_t *: _InterlockedExchange( \ + (volatile int32_t*)(object), \ + (int32_t)(desired)), \ + iree_atomic_uint64_t *: _InterlockedExchange64( \ + (volatile int64_t*)(object), \ + (int64_t)(desired))) +#define iree_atomic_fetch_add(object, operand, order) \ + _Generic((object), \ + iree_atomic_int32_t *: _InterlockedExchangeAdd( \ + (volatile int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_int64_t *: _InterlockedExchangeAdd64( \ + (volatile int64_t*)(object), \ + (int64_t)(operand)), \ + iree_atomic_uint32_t *: _InterlockedExchangeAdd( \ + (volatile int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_uint64_t *: _InterlockedExchangeAdd64( \ + (volatile int64_t*)(object), \ + (int64_t)(operand))) +#define iree_atomic_fetch_sub(object, operand, order) \ + _Generic((object), \ + iree_atomic_int32_t *: _InterlockedExchangeAdd( \ + (volatile int32_t*)(object), \ + -((int32_t)(operand))), \ + iree_atomic_int64_t *: _InterlockedExchangeAdd64( \ + (volatile int64_t*)(object), \ + -((int64_t)(operand))), \ + iree_atomic_uint32_t *: _InterlockedExchangeAdd( \ + (volatile int32_t*)(object), \ + -((int32_t)(operand))), \ + iree_atomic_uint64_t *: _InterlockedExchangeAdd64( \ + (volatile int64_t*)(object), \ + -((int64_t)(operand)))) +#define iree_atomic_fetch_and(object, operand, order) \ + _Generic((object), \ + iree_atomic_int32_t *: _InterlockedAnd((volatile int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_int64_t *: _InterlockedAnd64((volatile int64_t*)(object), \ + (int64_t)(operand)), \ + iree_atomic_uint32_t *: _InterlockedAnd((volatile int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_uint64_t *: _InterlockedAnd64((volatile int64_t*)(object), \ + (int64_t)(operand))) +#define iree_atomic_fetch_or(object, operand, order) \ + _Generic((object), \ + iree_atomic_int32_t *: _InterlockedOr((volatile int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_int64_t *: _InterlockedOr64((volatile int64_t*)(object), \ + (int64_t)(operand)), \ + iree_atomic_uint32_t *: _InterlockedOr((volatile int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_uint64_t *: _InterlockedOr64((volatile int64_t*)(object), \ + (int64_t)(operand))) +#define iree_atomic_fetch_xor(object, operand, order) \ + _Generic((object), \ + iree_atomic_int32_t *: _InterlockedXor((volatile int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_int64_t *: _InterlockedXor64((volatile int64_t*)(object), \ + (int64_t)(operand)), \ + iree_atomic_uint32_t *: _InterlockedXor((volatile int32_t*)(object), \ + (int32_t)(operand)), \ + iree_atomic_uint64_t *: _InterlockedXor64((volatile int64_t*)(object), \ + (int64_t)(operand))) +#define iree_atomic_exchange(object, desired, order) \ + _Generic((object), \ + iree_atomic_int32_t *: _InterlockedExchange((volatile int32_t*)(object), \ + (int32_t)(desired)), \ + iree_atomic_int64_t *: _InterlockedExchange64( \ + (volatile int64_t*)(object), \ + (int64_t)(desired)), \ + iree_atomic_uint32_t *: _InterlockedExchange( \ + (volatile int32_t*)(object), \ + (int32_t)(desired)), \ + iree_atomic_uint64_t *: _InterlockedExchange64( \ + (volatile int64_t*)(object), \ + (int64_t)(desired))) +#define iree_atomic_compare_exchange_strong(object, expected, desired, \ + order_succ, order_fail) \ + _Generic((object), \ + iree_atomic_int32_t *: iree_atomic_compare_exchange_strong_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t*)(expected), (int32_t)(desired), \ + (order_succ), (order_fail)), \ + iree_atomic_int64_t *: iree_atomic_compare_exchange_strong_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t*)(expected), (int64_t)(desired), \ + (order_succ), (order_fail)), \ + iree_atomic_uint32_t *: iree_atomic_compare_exchange_strong_int32_impl( \ + (volatile iree_atomic_int32_t*)(object), \ + (int32_t*)(expected), (int32_t)(desired), \ + (order_succ), (order_fail)), \ + iree_atomic_uint64_t *: iree_atomic_compare_exchange_strong_int64_impl( \ + (volatile iree_atomic_int64_t*)(object), \ + (int64_t*)(expected), (int64_t)(desired), \ + (order_succ), (order_fail))) +#define iree_atomic_compare_exchange_weak iree_atomic_compare_exchange_strong + static inline bool iree_atomic_compare_exchange_strong_int32_impl( volatile iree_atomic_int32_t* object, int32_t* expected, int32_t desired, iree_memory_order_t order_succ, iree_memory_order_t order_fail) { @@ -123,59 +311,7 @@ static inline bool iree_atomic_compare_exchange_strong_int64_impl( } } -#define iree_atomic_thread_fence(order) MemoryBarrier() - -// There are no pointer-width atomic ops in MSVC so we need to specialize based -// on the pointer size. -#if defined(IREE_PTR_SIZE_32) -#define iree_atomic_load_intptr(object, order) \ - (intptr_t) iree_atomic_load_int32((iree_atomic_int32_t*)(object), (order)) -#define iree_atomic_store_intptr(object, desired, order) \ - (intptr_t) iree_atomic_store_int32((iree_atomic_int32_t*)(object), \ - (int32_t)(desired), (order)) -#define iree_atomic_fetch_add_intptr(object, operand, order) \ - (intptr_t) iree_atomic_fetch_add_int32((iree_atomic_int32_t*)(object), \ - (int32_t)(operand), (order)) -#define iree_atomic_fetch_sub_intptr(object, operand, order) \ - (intptr_t) iree_atomic_fetch_sub_int32((iree_atomic_int32_t*)(object), \ - (int32_t)(operand), (order)) -#define iree_atomic_exchange_intptr(object, desired, order) \ - (intptr_t) iree_atomic_exchange_int32((iree_atomic_int32_t*)(object), \ - (int32_t)(desired), (order)) -#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \ - order_succ, order_fail) \ - iree_atomic_compare_exchange_strong_int32( \ - (iree_atomic_int32_t*)(object), (int32_t*)(expected), \ - (int32_t)(desired), (order_succ), (order_fail)) -#define iree_atomic_compare_exchange_weak_intptr \ - iree_atomic_compare_exchange_strong_intptr -#else -#define iree_atomic_load_intptr(object, order) \ - (intptr_t) iree_atomic_load_int64((iree_atomic_int64_t*)(object), (order)) -#define iree_atomic_store_intptr(object, desired, order) \ - (intptr_t) iree_atomic_store_int64((iree_atomic_int64_t*)(object), \ - (int64_t)(desired), (order)) -#define iree_atomic_fetch_add_intptr(object, operand, order) \ - (intptr_t) iree_atomic_fetch_add_int64((iree_atomic_int64_t*)(object), \ - (int64_t)(operand), (order)) -#define iree_atomic_fetch_sub_intptr(object, operand, order) \ - (intptr_t) iree_atomic_fetch_sub_int64((iree_atomic_int64_t*)(object), \ - (int64_t)(operand), (order)) -#define iree_atomic_exchange_intptr(object, desired, order) \ - (intptr_t) iree_atomic_exchange_int64((iree_atomic_int64_t*)(object), \ - (int64_t)(desired), (order)) -#define iree_atomic_compare_exchange_strong_intptr(object, expected, desired, \ - order_succ, order_fail) \ - iree_atomic_compare_exchange_strong_int64( \ - (iree_atomic_int64_t*)(object), (int64_t*)(expected), \ - (int64_t)(desired), (order_succ), (order_fail)) -#define iree_atomic_compare_exchange_weak_intptr \ - iree_atomic_compare_exchange_strong_intptr -#endif // IREE_PTR_SIZE_32 - -#ifdef __cplusplus -} // extern "C" -#endif +#endif // IREE_ATOMIC_USE_MSVC_C11 #endif // IREE_COMPILER_MSVC diff --git a/runtime/src/iree/base/internal/atomics_test.cc b/runtime/src/iree/base/internal/atomics_test.cc index a9fce2f2173e..d78890c674a7 100644 --- a/runtime/src/iree/base/internal/atomics_test.cc +++ b/runtime/src/iree/base/internal/atomics_test.cc @@ -21,9 +21,9 @@ TEST(AtomicPtr, LoadStore) { intptr_t ptr_0 = 0x0; intptr_t ptr_1 = 0x1; iree_atomic_intptr_t value = IREE_ATOMIC_VAR_INIT(ptr_0); - EXPECT_EQ(ptr_0, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst)); - iree_atomic_store_intptr(&value, ptr_1, iree_memory_order_seq_cst); - EXPECT_EQ(ptr_1, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_0, iree_atomic_load(&value, iree_memory_order_seq_cst)); + iree_atomic_store(&value, ptr_1, iree_memory_order_seq_cst); + EXPECT_EQ(ptr_1, iree_atomic_load(&value, iree_memory_order_seq_cst)); } TEST(AtomicPtr, AddSub) { @@ -31,15 +31,15 @@ TEST(AtomicPtr, AddSub) { intptr_t ptr_1 = 0x1; intptr_t ptr_2 = 0x2; iree_atomic_intptr_t value = IREE_ATOMIC_VAR_INIT(ptr_0); - EXPECT_EQ(ptr_0, iree_atomic_fetch_add_intptr(&value, ptr_1, - iree_memory_order_seq_cst)); - EXPECT_EQ(ptr_1, iree_atomic_fetch_add_intptr(&value, ptr_1, - iree_memory_order_seq_cst)); - EXPECT_EQ(ptr_2, iree_atomic_fetch_sub_intptr(&value, ptr_1, - iree_memory_order_seq_cst)); - EXPECT_EQ(ptr_1, iree_atomic_fetch_sub_intptr(&value, ptr_1, - iree_memory_order_seq_cst)); - EXPECT_EQ(ptr_0, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_0, + iree_atomic_fetch_add(&value, ptr_1, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_1, + iree_atomic_fetch_add(&value, ptr_1, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_2, + iree_atomic_fetch_sub(&value, ptr_1, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_1, + iree_atomic_fetch_sub(&value, ptr_1, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_0, iree_atomic_load(&value, iree_memory_order_seq_cst)); } TEST(AtomicPtr, Exchange) { @@ -47,11 +47,11 @@ TEST(AtomicPtr, Exchange) { intptr_t ptr_1 = 0x1; intptr_t ptr_2 = 0x2; iree_atomic_intptr_t value = IREE_ATOMIC_VAR_INIT(ptr_0); - EXPECT_EQ(ptr_0, iree_atomic_exchange_intptr(&value, ptr_1, - iree_memory_order_seq_cst)); - EXPECT_EQ(ptr_1, iree_atomic_exchange_intptr(&value, ptr_2, - iree_memory_order_seq_cst)); - EXPECT_EQ(ptr_2, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_0, + iree_atomic_exchange(&value, ptr_1, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_1, + iree_atomic_exchange(&value, ptr_2, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_2, iree_atomic_load(&value, iree_memory_order_seq_cst)); } TEST(AtomicPtr, CompareExchange) { @@ -62,31 +62,31 @@ TEST(AtomicPtr, CompareExchange) { intptr_t ptr_expected = 0; // OK: value == ptr_0, CAS(ptr_0 -> ptr_1) - iree_atomic_store_intptr(&value, ptr_0, iree_memory_order_seq_cst); + iree_atomic_store(&value, ptr_0, iree_memory_order_seq_cst); ptr_expected = ptr_0; - EXPECT_TRUE(iree_atomic_compare_exchange_strong_intptr( - &value, &ptr_expected, ptr_1, iree_memory_order_seq_cst, - iree_memory_order_seq_cst)); + EXPECT_TRUE(iree_atomic_compare_exchange_strong(&value, &ptr_expected, ptr_1, + iree_memory_order_seq_cst, + iree_memory_order_seq_cst)); EXPECT_EQ(ptr_0, ptr_expected); - EXPECT_EQ(ptr_1, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_1, iree_atomic_load(&value, iree_memory_order_seq_cst)); // OK: value == ptr_1, CAS(ptr_1 -> ptr_2) - iree_atomic_store_intptr(&value, ptr_1, iree_memory_order_seq_cst); + iree_atomic_store(&value, ptr_1, iree_memory_order_seq_cst); ptr_expected = ptr_1; - EXPECT_TRUE(iree_atomic_compare_exchange_strong_intptr( - &value, &ptr_expected, ptr_2, iree_memory_order_seq_cst, - iree_memory_order_seq_cst)); + EXPECT_TRUE(iree_atomic_compare_exchange_strong(&value, &ptr_expected, ptr_2, + iree_memory_order_seq_cst, + iree_memory_order_seq_cst)); EXPECT_EQ(ptr_1, ptr_expected); - EXPECT_EQ(ptr_2, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_2, iree_atomic_load(&value, iree_memory_order_seq_cst)); // FAIL: value == ptr_0, CAS(ptr_1 -> ptr_2) - iree_atomic_store_intptr(&value, ptr_0, iree_memory_order_seq_cst); + iree_atomic_store(&value, ptr_0, iree_memory_order_seq_cst); ptr_expected = ptr_1; - EXPECT_FALSE(iree_atomic_compare_exchange_strong_intptr( - &value, &ptr_expected, ptr_2, iree_memory_order_seq_cst, - iree_memory_order_seq_cst)); + EXPECT_FALSE(iree_atomic_compare_exchange_strong(&value, &ptr_expected, ptr_2, + iree_memory_order_seq_cst, + iree_memory_order_seq_cst)); EXPECT_EQ(ptr_0, ptr_expected); - EXPECT_EQ(ptr_0, iree_atomic_load_intptr(&value, iree_memory_order_seq_cst)); + EXPECT_EQ(ptr_0, iree_atomic_load(&value, iree_memory_order_seq_cst)); } TEST(AtomicRefCount, IncDec) { diff --git a/runtime/src/iree/base/internal/dynamic_library_win32.c b/runtime/src/iree/base/internal/dynamic_library_win32.c index af6e4e80b8ef..2cbdd07f6416 100644 --- a/runtime/src/iree/base/internal/dynamic_library_win32.c +++ b/runtime/src/iree/base/internal/dynamic_library_win32.c @@ -91,7 +91,7 @@ static iree_status_t iree_dynamic_library_make_temp_file_path( static iree_atomic_int32_t next_unique_id = IREE_ATOMIC_VAR_INIT(0); // relaxed because we only care about uniqueness, we don't care about ordering // of accesses to unique_id w.r.t. other memory operations. - uint32_t unique_id = (uint32_t)iree_atomic_fetch_add_int32( + uint32_t unique_id = (uint32_t)iree_atomic_fetch_add( &next_unique_id, 1, iree_memory_order_relaxed); // Allocate storage for the full file path and format it in. diff --git a/runtime/src/iree/base/internal/synchronization.c b/runtime/src/iree/base/internal/synchronization.c index 65fb0d1a93e8..960a70c3be9b 100644 --- a/runtime/src/iree/base/internal/synchronization.c +++ b/runtime/src/iree/base/internal/synchronization.c @@ -447,8 +447,7 @@ void iree_slim_mutex_initialize(iree_slim_mutex_t* out_mutex) { void iree_slim_mutex_deinitialize(iree_slim_mutex_t* mutex) { // Assert unlocked (callers must ensure the mutex is no longer in use). - SYNC_ASSERT( - iree_atomic_load_int32(&mutex->value, iree_memory_order_acquire) == 0); + SYNC_ASSERT(iree_atomic_load(&mutex->value, iree_memory_order_acquire) == 0); } // Helper to perform a compare_exchange operation on mutex->value, internally @@ -467,9 +466,9 @@ static bool iree_slim_mutex_try_lock_compare_exchange( // more about efficiency in the uncontended case than we care about avoiding // spurious failure. Also, some callers are calling this in a loop, where they // would want the weak form anyway. - return iree_atomic_compare_exchange_weak_int32( - &mutex->value, expected, desired, iree_memory_order_acquire, - iree_memory_order_relaxed); + return iree_atomic_compare_exchange_weak(&mutex->value, expected, desired, + iree_memory_order_acquire, + iree_memory_order_relaxed); } void iree_slim_mutex_lock(iree_slim_mutex_t* mutex) @@ -490,8 +489,7 @@ void iree_slim_mutex_lock(iree_slim_mutex_t* mutex) // This uses relaxed order because this is an internal intermediate step and // we only need atomicity here. value = - iree_atomic_fetch_add_int32(&mutex->value, 1, iree_memory_order_relaxed) + - 1; + iree_atomic_fetch_add(&mutex->value, 1, iree_memory_order_relaxed) + 1; while (true) { // While the lock is available: try to acquire it for this thread. @@ -513,8 +511,7 @@ void iree_slim_mutex_lock(iree_slim_mutex_t* mutex) int spin_count = 100; for (int i = 0; i < spin_count && iree_slim_mutex_is_locked(value); ++i) { iree_processor_yield(); - value = - iree_atomic_load_int32(&mutex->value, iree_memory_order_relaxed); + value = iree_atomic_load(&mutex->value, iree_memory_order_relaxed); } } @@ -523,7 +520,7 @@ void iree_slim_mutex_lock(iree_slim_mutex_t* mutex) // NOTE: we don't care about wait failure here as we are going to loop // and check again anyway. iree_futex_wait(&mutex->value, value, IREE_TIME_INFINITE_FUTURE); - value = iree_atomic_load_int32(&mutex->value, iree_memory_order_relaxed); + value = iree_atomic_load(&mutex->value, iree_memory_order_relaxed); } } } @@ -541,8 +538,8 @@ void iree_slim_mutex_unlock(iree_slim_mutex_t* mutex) IREE_DISABLE_THREAD_SAFETY_ANALYSIS { // Refer to the iree_slim_mutex_t struct comment, "Notes on atomics". // Transition 1->0 (unlocking with no waiters) or 2->1 (with waiters). - if (iree_atomic_fetch_sub_int32(&mutex->value, iree_slim_mutex_value(1), - iree_memory_order_release) != + if (iree_atomic_fetch_sub(&mutex->value, iree_slim_mutex_value(1), + iree_memory_order_release) != iree_slim_mutex_value(1)) { // One (or more) waiters; wake a single one to avoid a thundering herd of // multiple threads all waking and trying to grab the lock (as only one will @@ -749,14 +746,14 @@ void iree_notification_initialize(iree_notification_t* out_notification) { void iree_notification_deinitialize(iree_notification_t* notification) { // Assert no more waiters (callers must tear down waiters first). SYNC_ASSERT( - (iree_atomic_load_int64(¬ification->value, iree_memory_order_acquire) & + (iree_atomic_load(¬ification->value, iree_memory_order_acquire) & IREE_NOTIFICATION_WAITER_MASK) == 0); } void iree_notification_post(iree_notification_t* notification, int32_t count) { - uint64_t previous_value = iree_atomic_fetch_add_int64( - ¬ification->value, IREE_NOTIFICATION_EPOCH_INC, - iree_memory_order_acq_rel); + uint64_t previous_value = + iree_atomic_fetch_add(¬ification->value, IREE_NOTIFICATION_EPOCH_INC, + iree_memory_order_acq_rel); // Ensure we have at least one waiter; wake up to |count| of them. if (IREE_UNLIKELY(previous_value & IREE_NOTIFICATION_WAITER_MASK)) { iree_futex_wake(iree_notification_epoch_address(notification), count); @@ -765,9 +762,9 @@ void iree_notification_post(iree_notification_t* notification, int32_t count) { iree_wait_token_t iree_notification_prepare_wait( iree_notification_t* notification) { - uint64_t previous_value = iree_atomic_fetch_add_int64( - ¬ification->value, IREE_NOTIFICATION_WAITER_INC, - iree_memory_order_acq_rel); + uint64_t previous_value = + iree_atomic_fetch_add(¬ification->value, IREE_NOTIFICATION_WAITER_INC, + iree_memory_order_acq_rel); return (iree_wait_token_t)(previous_value >> IREE_NOTIFICATION_EPOCH_SHIFT); } @@ -779,8 +776,7 @@ typedef enum iree_notification_result_e { static iree_notification_result_t iree_notification_test_wait_condition( iree_notification_t* notification, iree_wait_token_t wait_token) { - return (iree_atomic_load_int64(¬ification->value, - iree_memory_order_acquire) >> + return (iree_atomic_load(¬ification->value, iree_memory_order_acquire) >> IREE_NOTIFICATION_EPOCH_SHIFT) != wait_token ? IREE_NOTIFICATION_RESULT_RESOLVED : IREE_NOTIFICATION_RESULT_UNRESOLVED; @@ -830,9 +826,9 @@ bool iree_notification_commit_wait(iree_notification_t* notification, // TODO(benvanik): benchmark under real workloads. // iree_memory_order_relaxed would suffice for correctness but the faster // the waiter count gets to 0 the less likely we'll wake on the futex. - uint64_t previous_value = iree_atomic_fetch_add_int64( - ¬ification->value, IREE_NOTIFICATION_WAITER_DEC, - iree_memory_order_acq_rel); + uint64_t previous_value = + iree_atomic_fetch_add(¬ification->value, IREE_NOTIFICATION_WAITER_DEC, + iree_memory_order_acq_rel); SYNC_ASSERT((previous_value & IREE_NOTIFICATION_WAITER_MASK) != 0); return result == IREE_NOTIFICATION_RESULT_RESOLVED; @@ -842,9 +838,9 @@ void iree_notification_cancel_wait(iree_notification_t* notification) { // TODO(benvanik): benchmark under real workloads. // iree_memory_order_relaxed would suffice for correctness but the faster // the waiter count gets to 0 the less likely we'll wake on the futex. - uint64_t previous_value = iree_atomic_fetch_add_int64( - ¬ification->value, IREE_NOTIFICATION_WAITER_DEC, - iree_memory_order_acq_rel); + uint64_t previous_value = + iree_atomic_fetch_add(¬ification->value, IREE_NOTIFICATION_WAITER_DEC, + iree_memory_order_acq_rel); SYNC_ASSERT((previous_value & IREE_NOTIFICATION_WAITER_MASK) != 0); } diff --git a/runtime/src/iree/base/internal/threading_darwin.c b/runtime/src/iree/base/internal/threading_darwin.c index 52932f848816..dc4b5f8ef81e 100644 --- a/runtime/src/iree/base/internal/threading_darwin.c +++ b/runtime/src/iree/base/internal/threading_darwin.c @@ -104,9 +104,8 @@ iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg, thread->entry_arg = entry_arg; iree_strncpy_s(thread->name, IREE_ARRAYSIZE(thread->name), params.name.data, iree_min(params.name.size, IREE_ARRAYSIZE(thread->name) - 1)); - iree_atomic_store_int32(&thread->is_suspended, - params.create_suspended ? 1 : 0, - iree_memory_order_relaxed); + iree_atomic_store(&thread->is_suspended, params.create_suspended ? 1 : 0, + iree_memory_order_relaxed); pthread_attr_t thread_attr; pthread_attr_init(&thread_attr); @@ -239,7 +238,7 @@ void iree_thread_resume(iree_thread_t* thread) { // always balance suspend/resume or else we'll mess with any // debuggers/profilers that may be suspending threads for their own uses. int32_t expected = 1; - if (iree_atomic_compare_exchange_strong_int32( + if (iree_atomic_compare_exchange_strong( &thread->is_suspended, &expected, 0, iree_memory_order_acq_rel, iree_memory_order_relaxed /* expected is unused */)) { thread_resume(thread->mach_port); diff --git a/runtime/src/iree/base/internal/threading_pthreads.c b/runtime/src/iree/base/internal/threading_pthreads.c index 1686fd16a060..3f15987be768 100644 --- a/runtime/src/iree/base/internal/threading_pthreads.c +++ b/runtime/src/iree/base/internal/threading_pthreads.c @@ -51,8 +51,8 @@ static void iree_thread_set_priority_class( static bool iree_thread_resumed_predicate(void* arg) { iree_thread_t* thread = (iree_thread_t*)arg; - return iree_atomic_load_int32(&thread->suspend_count, - iree_memory_order_acquire) == 0; + return iree_atomic_load(&thread->suspend_count, iree_memory_order_acquire) == + 0; } #if defined(IREE_PLATFORM_EMSCRIPTEN) @@ -99,8 +99,8 @@ static void* iree_thread_start_routine(void* param) { IREE_TRACE_SET_THREAD_NAME(thread->name); // Wait until we resume if we were created suspended. - while (iree_atomic_load_int32(&thread->suspend_count, - iree_memory_order_acquire) > 0) { + while (iree_atomic_load(&thread->suspend_count, iree_memory_order_acquire) > + 0) { iree_notification_await(&thread->suspend_barrier, iree_thread_resumed_predicate, thread, iree_infinite_timeout()); @@ -335,8 +335,8 @@ void iree_thread_request_affinity(iree_thread_t* thread, void iree_thread_resume(iree_thread_t* thread) { IREE_TRACE_ZONE_BEGIN(z0); - if (iree_atomic_exchange_int32(&thread->suspend_count, 0, - iree_memory_order_acq_rel) == 1) { + if (iree_atomic_exchange(&thread->suspend_count, 0, + iree_memory_order_acq_rel) == 1) { iree_notification_post(&thread->suspend_barrier, IREE_ALL_WAITERS); } diff --git a/runtime/src/iree/base/internal/threading_test.cc b/runtime/src/iree/base/internal/threading_test.cc index 8ee5a96b7fa6..1fd973083e22 100644 --- a/runtime/src/iree/base/internal/threading_test.cc +++ b/runtime/src/iree/base/internal/threading_test.cc @@ -34,12 +34,11 @@ TEST(ThreadTest, Lifetime) { iree_atomic_int32_t value; iree_notification_t barrier; } entry_data; - iree_atomic_store_int32(&entry_data.value, 123, iree_memory_order_relaxed); + iree_atomic_store(&entry_data.value, 123, iree_memory_order_relaxed); iree_notification_initialize(&entry_data.barrier); iree_thread_entry_t entry_fn = +[](void* entry_arg) -> int { auto* entry_data = reinterpret_cast(entry_arg); - iree_atomic_fetch_add_int32(&entry_data->value, 1, - iree_memory_order_acq_rel); + iree_atomic_fetch_add(&entry_data->value, 1, iree_memory_order_acq_rel); iree_notification_post(&entry_data->barrier, IREE_ALL_WAITERS); return 0; }; @@ -55,8 +54,8 @@ TEST(ThreadTest, Lifetime) { &entry_data.barrier, +[](void* entry_arg) -> bool { auto* entry_data = reinterpret_cast(entry_arg); - return iree_atomic_load_int32(&entry_data->value, - iree_memory_order_relaxed) == (123 + 1); + return iree_atomic_load(&entry_data->value, + iree_memory_order_relaxed) == (123 + 1); }, &entry_data, iree_infinite_timeout()); @@ -76,12 +75,11 @@ TEST(ThreadTest, CreateSuspended) { iree_atomic_int32_t value; iree_notification_t barrier; } entry_data; - iree_atomic_store_int32(&entry_data.value, 123, iree_memory_order_relaxed); + iree_atomic_store(&entry_data.value, 123, iree_memory_order_relaxed); iree_notification_initialize(&entry_data.barrier); iree_thread_entry_t entry_fn = +[](void* entry_arg) -> int { auto* entry_data = reinterpret_cast(entry_arg); - iree_atomic_fetch_add_int32(&entry_data->value, 1, - iree_memory_order_acq_rel); + iree_atomic_fetch_add(&entry_data->value, 1, iree_memory_order_acq_rel); iree_notification_post(&entry_data->barrier, IREE_ALL_WAITERS); return 0; }; @@ -95,11 +93,11 @@ TEST(ThreadTest, CreateSuspended) { // the value. I can't think of a good way to test this, though, so we'll just // wait a moment here and assume that if the thread was able to run it would // have during this wait. - ASSERT_EQ(123, iree_atomic_load_int32(&entry_data.value, - iree_memory_order_seq_cst)); + ASSERT_EQ(123, + iree_atomic_load(&entry_data.value, iree_memory_order_seq_cst)); std::this_thread::sleep_for(std::chrono::milliseconds(150)); - ASSERT_EQ(123, iree_atomic_load_int32(&entry_data.value, - iree_memory_order_seq_cst)); + ASSERT_EQ(123, + iree_atomic_load(&entry_data.value, iree_memory_order_seq_cst)); // Resume the thread and wait for it to finish its work. iree_thread_resume(thread); @@ -107,8 +105,8 @@ TEST(ThreadTest, CreateSuspended) { &entry_data.barrier, +[](void* entry_arg) -> bool { auto* entry_data = reinterpret_cast(entry_arg); - return iree_atomic_load_int32(&entry_data->value, - iree_memory_order_relaxed) == (123 + 1); + return iree_atomic_load(&entry_data->value, + iree_memory_order_relaxed) == (123 + 1); }, &entry_data, iree_infinite_timeout()); iree_thread_release(thread); @@ -126,11 +124,10 @@ TEST(ThreadTest, PriorityOverride) { struct entry_data_t { iree_atomic_int32_t value; } entry_data; - iree_atomic_store_int32(&entry_data.value, 0, iree_memory_order_relaxed); + iree_atomic_store(&entry_data.value, 0, iree_memory_order_relaxed); iree_thread_entry_t entry_fn = +[](void* entry_arg) -> int { auto* entry_data = reinterpret_cast(entry_arg); - iree_atomic_fetch_add_int32(&entry_data->value, 1, - iree_memory_order_release); + iree_atomic_fetch_add(&entry_data->value, 1, iree_memory_order_release); return 0; }; @@ -150,8 +147,7 @@ TEST(ThreadTest, PriorityOverride) { thread, IREE_THREAD_PRIORITY_CLASS_LOWEST); // Wait for the thread to finish. - while (iree_atomic_load_int32(&entry_data.value, iree_memory_order_acquire) != - 1) { + while (iree_atomic_load(&entry_data.value, iree_memory_order_acquire) != 1) { iree_thread_yield(); } diff --git a/runtime/src/iree/base/internal/threading_win32.c b/runtime/src/iree/base/internal/threading_win32.c index 6166ce288175..64ddca614da2 100644 --- a/runtime/src/iree/base/internal/threading_win32.c +++ b/runtime/src/iree/base/internal/threading_win32.c @@ -143,9 +143,8 @@ iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg, thread->entry_arg = entry_arg; strncpy_s(thread->name, IREE_ARRAYSIZE(thread->name), params.name.data, min(params.name.size, IREE_ARRAYSIZE(thread->name) - 1)); - iree_atomic_store_int32(&thread->is_suspended, - params.create_suspended ? 1 : 0, - iree_memory_order_relaxed); + iree_atomic_store(&thread->is_suspended, params.create_suspended ? 1 : 0, + iree_memory_order_relaxed); iree_thread_override_list_initialize(iree_thread_set_priority_class, params.priority_class, thread->allocator, &thread->qos_override_list); @@ -304,7 +303,7 @@ void iree_thread_resume(iree_thread_t* thread) { // always balance suspend/resume or else we'll mess with any // debuggers/profilers that may be suspending threads for their own uses. int32_t expected = 1; - if (iree_atomic_compare_exchange_strong_int32( + if (iree_atomic_compare_exchange_strong( &thread->is_suspended, &expected, 0, iree_memory_order_acq_rel, iree_memory_order_relaxed /* expected is unused */)) { ResumeThread(thread->handle); diff --git a/runtime/src/iree/base/internal/wait_handle_inproc.c b/runtime/src/iree/base/internal/wait_handle_inproc.c index e3192595e177..7f92797b1bc8 100644 --- a/runtime/src/iree/base/internal/wait_handle_inproc.c +++ b/runtime/src/iree/base/internal/wait_handle_inproc.c @@ -240,7 +240,7 @@ static bool iree_wait_set_check(const iree_wait_set_check_params_t* params) { iree_wait_handle_t* wait_handle = ¶ms->set->handles[i]; iree_futex_handle_t* futex = (iree_futex_handle_t*)wait_handle->value.local_futex; - if (iree_atomic_load_int64(&futex->value, iree_memory_order_acquire) != 0) { + if (iree_atomic_load(&futex->value, iree_memory_order_acquire) != 0) { ++ready_count; if (params->wake_handle) { *params->wake_handle = *wait_handle; @@ -292,7 +292,7 @@ iree_status_t iree_wait_any(iree_wait_set_t* set, iree_time_t deadline_ns, } static bool iree_futex_handle_check(iree_futex_handle_t* futex) { - return iree_atomic_load_int64(&futex->value, iree_memory_order_acquire) != 0; + return iree_atomic_load(&futex->value, iree_memory_order_acquire) != 0; } iree_status_t iree_wait_one(iree_wait_handle_t* handle, @@ -335,8 +335,8 @@ iree_status_t iree_event_initialize(bool initial_state, if (iree_status_is_ok(status)) { out_event->type = IREE_WAIT_PRIMITIVE_TYPE_LOCAL_FUTEX; out_event->value.local_futex = (void*)futex; - iree_atomic_store_int64(&futex->value, initial_state ? 1 : 0, - iree_memory_order_release); + iree_atomic_store(&futex->value, initial_state ? 1 : 0, + iree_memory_order_release); iree_notification_initialize(&futex->notification); } @@ -358,8 +358,7 @@ void iree_event_set(iree_event_t* event) { // Try to transition from unset -> set. // No-op if already set and otherwise we successfully signaled the event and // need to notify all waiters. - if (iree_atomic_exchange_int64(&futex->value, 1, iree_memory_order_release) == - 0) { + if (iree_atomic_exchange(&futex->value, 1, iree_memory_order_release) == 0) { // Notify those waiting on just this event. iree_notification_post(&futex->notification, IREE_ALL_WAITERS); // Notify any multi-waits that may have this event as part of their set. @@ -371,7 +370,7 @@ void iree_event_reset(iree_event_t* event) { if (!event) return; iree_futex_handle_t* futex = (iree_futex_handle_t*)event->value.local_futex; if (!futex) return; - iree_atomic_store_int64(&futex->value, 0, iree_memory_order_release); + iree_atomic_store(&futex->value, 0, iree_memory_order_release); } #endif // IREE_WAIT_API == IREE_WAIT_API_INPROC diff --git a/runtime/src/iree/hal/drivers/cuda/memory_pools.c b/runtime/src/iree/hal/drivers/cuda/memory_pools.c index 236ffaac840b..1e34422478f5 100644 --- a/runtime/src/iree/hal/drivers/cuda/memory_pools.c +++ b/runtime/src/iree/hal/drivers/cuda/memory_pools.c @@ -121,8 +121,8 @@ static void iree_hal_cuda_memory_pool_track_alloc( iree_atomic_int64_t* bytes_allocated = is_device_local ? &pools->statistics.device_bytes_allocated : &pools->statistics.host_bytes_allocated; - iree_atomic_fetch_add_int64(bytes_allocated, allocation_size, - iree_memory_order_relaxed); + iree_atomic_fetch_add(bytes_allocated, allocation_size, + iree_memory_order_relaxed); }); } @@ -141,8 +141,8 @@ static void iree_hal_cuda_memory_pool_track_free( : &pools->statistics.host_bytes_freed; iree_device_size_t allocation_size = iree_hal_buffer_allocation_size(buffer); - iree_atomic_fetch_add_int64(bytes_freed, allocation_size, - iree_memory_order_relaxed); + iree_atomic_fetch_add(bytes_freed, allocation_size, + iree_memory_order_relaxed); }); } @@ -150,13 +150,13 @@ void iree_hal_cuda_memory_pools_merge_statistics( iree_hal_cuda_memory_pools_t* pools, iree_hal_allocator_statistics_t* statistics) { IREE_STATISTICS({ - statistics->device_bytes_allocated = iree_atomic_load_int64( + statistics->device_bytes_allocated = iree_atomic_load( &pools->statistics.device_bytes_allocated, iree_memory_order_relaxed); - statistics->host_bytes_allocated = iree_atomic_load_int64( + statistics->host_bytes_allocated = iree_atomic_load( &pools->statistics.host_bytes_allocated, iree_memory_order_relaxed); - statistics->device_bytes_freed = iree_atomic_load_int64( + statistics->device_bytes_freed = iree_atomic_load( &pools->statistics.device_bytes_freed, iree_memory_order_relaxed); - statistics->host_bytes_freed = iree_atomic_load_int64( + statistics->host_bytes_freed = iree_atomic_load( &pools->statistics.host_bytes_freed, iree_memory_order_relaxed); if (pools->device_local) { cuuint64_t pool_peak = 0; diff --git a/runtime/src/iree/hal/drivers/hip/memory_pools.c b/runtime/src/iree/hal/drivers/hip/memory_pools.c index e599cf62daa0..89e27fafdfd1 100644 --- a/runtime/src/iree/hal/drivers/hip/memory_pools.c +++ b/runtime/src/iree/hal/drivers/hip/memory_pools.c @@ -121,8 +121,8 @@ static void iree_hal_hip_memory_pool_track_alloc( iree_atomic_int64_t* bytes_allocated = is_device_local ? &pools->statistics.device_bytes_allocated : &pools->statistics.host_bytes_allocated; - iree_atomic_fetch_add_int64(bytes_allocated, allocation_size, - iree_memory_order_relaxed); + iree_atomic_fetch_add(bytes_allocated, allocation_size, + iree_memory_order_relaxed); }); } @@ -141,8 +141,8 @@ static void iree_hal_hip_memory_pool_track_free( : &pools->statistics.host_bytes_freed; iree_device_size_t allocation_size = iree_hal_buffer_allocation_size(buffer); - iree_atomic_fetch_add_int64(bytes_freed, allocation_size, - iree_memory_order_relaxed); + iree_atomic_fetch_add(bytes_freed, allocation_size, + iree_memory_order_relaxed); }); } @@ -150,13 +150,13 @@ void iree_hal_hip_memory_pools_merge_statistics( iree_hal_hip_memory_pools_t* pools, iree_hal_allocator_statistics_t* statistics) { IREE_STATISTICS({ - statistics->device_bytes_allocated = iree_atomic_load_int64( + statistics->device_bytes_allocated = iree_atomic_load( &pools->statistics.device_bytes_allocated, iree_memory_order_relaxed); - statistics->host_bytes_allocated = iree_atomic_load_int64( + statistics->host_bytes_allocated = iree_atomic_load( &pools->statistics.host_bytes_allocated, iree_memory_order_relaxed); - statistics->device_bytes_freed = iree_atomic_load_int64( + statistics->device_bytes_freed = iree_atomic_load( &pools->statistics.device_bytes_freed, iree_memory_order_relaxed); - statistics->host_bytes_freed = iree_atomic_load_int64( + statistics->host_bytes_freed = iree_atomic_load( &pools->statistics.host_bytes_freed, iree_memory_order_relaxed); if (pools->device_local) { diff --git a/runtime/src/iree/hal/drivers/metal/shared_event.m b/runtime/src/iree/hal/drivers/metal/shared_event.m index f741f2ea3a63..716306c215bb 100644 --- a/runtime/src/iree/hal/drivers/metal/shared_event.m +++ b/runtime/src/iree/hal/drivers/metal/shared_event.m @@ -231,7 +231,7 @@ iree_status_t iree_hal_metal_shared_event_multi_wait( // Create an atomic to count how many semaphores have signaled. Mark it as `__block` so different // threads are sharing the same data via reference. __block iree_atomic_int32_t wait_count; - iree_atomic_store_int32(&wait_count, 0, iree_memory_order_release); + iree_atomic_store(&wait_count, 0, iree_memory_order_release); // The total count we are expecting to see. iree_host_size_t total_count = (wait_mode == IREE_HAL_WAIT_MODE_ALL) ? semaphore_list->count : 1; // Theoretically we don't really need to mark the semaphore handle as __block given that the @@ -253,7 +253,7 @@ iree_status_t iree_hal_metal_shared_event_multi_wait( // Fail as a whole if any participating semaphore failed. if (v >= IREE_HAL_SEMAPHORE_FAILURE_VALUE) did_fail = true; - int32_t old_value = iree_atomic_fetch_add_int32( + int32_t old_value = iree_atomic_fetch_add( &wait_count, 1, iree_memory_order_release); // The last signaled semaphore send out the notification. // Atomic fetch add returns the old value, so need to +1. diff --git a/runtime/src/iree/hal/drivers/metal/staging_buffer.m b/runtime/src/iree/hal/drivers/metal/staging_buffer.m index ca0128f78890..e83e622e868b 100644 --- a/runtime/src/iree/hal/drivers/metal/staging_buffer.m +++ b/runtime/src/iree/hal/drivers/metal/staging_buffer.m @@ -37,8 +37,7 @@ iree_status_t iree_hal_metal_staging_buffer_initialize( out_staging_buffer->host_buffer = metal_buffer.contents; iree_slim_mutex_initialize(&out_staging_buffer->offset_mutex); out_staging_buffer->offset = 0; - iree_atomic_store_int32(&out_staging_buffer->pending_command_buffers, 0, - iree_memory_order_relaxed); + iree_atomic_store(&out_staging_buffer->pending_command_buffers, 0, iree_memory_order_relaxed); IREE_TRACE_ZONE_END(z0); return iree_ok_status(); @@ -97,14 +96,13 @@ void iree_hal_metal_staging_buffer_reset(iree_hal_metal_staging_buffer_t* stagin void iree_hal_metal_staging_buffer_increase_command_buffer_refcount( iree_hal_metal_staging_buffer_t* staging_buffer) { - iree_atomic_fetch_add_int32(&staging_buffer->pending_command_buffers, 1, - iree_memory_order_relaxed); + iree_atomic_fetch_add(&staging_buffer->pending_command_buffers, 1, iree_memory_order_relaxed); } void iree_hal_metal_staging_buffer_decrease_command_buffer_refcount( iree_hal_metal_staging_buffer_t* staging_buffer) { - if (iree_atomic_fetch_sub_int32(&staging_buffer->pending_command_buffers, 1, - iree_memory_order_acq_rel) == 1) { + if (iree_atomic_fetch_sub(&staging_buffer->pending_command_buffers, 1, + iree_memory_order_acq_rel) == 1) { iree_hal_metal_staging_buffer_reset(staging_buffer); } } diff --git a/runtime/src/iree/hal/drivers/vulkan/native_semaphore.cc b/runtime/src/iree/hal/drivers/vulkan/native_semaphore.cc index f75b2c0bbdb1..631f138a1c26 100644 --- a/runtime/src/iree/hal/drivers/vulkan/native_semaphore.cc +++ b/runtime/src/iree/hal/drivers/vulkan/native_semaphore.cc @@ -68,8 +68,7 @@ iree_status_t iree_hal_vulkan_native_semaphore_create( &semaphore->base); semaphore->logical_device = logical_device; semaphore->handle = handle; - iree_atomic_store_intptr(&semaphore->failure_status, 0, - iree_memory_order_release); + iree_atomic_store(&semaphore->failure_status, 0, iree_memory_order_release); *out_semaphore = &semaphore->base; } else { logical_device->syms()->vkDestroySemaphore(*logical_device, handle, @@ -87,7 +86,7 @@ static void iree_hal_vulkan_native_semaphore_destroy( iree_allocator_t host_allocator = semaphore->logical_device->host_allocator(); IREE_TRACE_ZONE_BEGIN(z0); - iree_status_ignore((iree_status_t)iree_atomic_load_intptr( + iree_status_ignore((iree_status_t)iree_atomic_load( &semaphore->failure_status, iree_memory_order_acquire)); semaphore->logical_device->syms()->vkDestroySemaphore( @@ -127,7 +126,7 @@ static iree_status_t iree_hal_vulkan_native_semaphore_query( // If the semaphore failed then clone the status so we can report it. if (value >= IREE_HAL_SEMAPHORE_FAILURE_VALUE) { - iree_status_t failure_status = (iree_status_t)iree_atomic_load_intptr( + iree_status_t failure_status = (iree_status_t)iree_atomic_load( &semaphore->failure_status, iree_memory_order_acquire); if (iree_status_is_ok(failure_status)) { return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, @@ -178,7 +177,7 @@ static void iree_hal_vulkan_native_semaphore_fail( // Try to set our local status - we only preserve the first failure so only // do this if we are going from a valid semaphore to a failed one. iree_status_t old_status = iree_ok_status(); - if (!iree_atomic_compare_exchange_strong_intptr( + if (!iree_atomic_compare_exchange_strong( &semaphore->failure_status, (intptr_t*)&old_status, (intptr_t)status, iree_memory_order_acq_rel, iree_memory_order_relaxed /* old_status is unused */)) { diff --git a/runtime/src/iree/hal/local/executable_plugin_manager.c b/runtime/src/iree/hal/local/executable_plugin_manager.c index 6d41c76df5d0..2739aa9f26c6 100644 --- a/runtime/src/iree/hal/local/executable_plugin_manager.c +++ b/runtime/src/iree/hal/local/executable_plugin_manager.c @@ -432,8 +432,8 @@ static iree_status_t iree_hal_executable_plugin_manager_register( // Get the next provider slot. Note that we don't yet increment it as we need // to put the provider in there first. - int32_t slot = iree_atomic_load_int32(&manager->provider_count, - iree_memory_order_acquire); + int32_t slot = + iree_atomic_load(&manager->provider_count, iree_memory_order_acquire); if (slot >= manager->capacity) { iree_slim_mutex_unlock(&manager->mutex); return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, @@ -449,8 +449,7 @@ static iree_status_t iree_hal_executable_plugin_manager_register( } // Mark the slot as valid now that the provider is in it. - iree_atomic_fetch_add_int32(&manager->provider_count, 1, - iree_memory_order_release); + iree_atomic_fetch_add(&manager->provider_count, 1, iree_memory_order_release); iree_slim_mutex_unlock(&manager->mutex); return iree_ok_status(); @@ -506,8 +505,8 @@ static iree_status_t iree_hal_executable_plugin_manager_resolve( // but that's ok: multithreaded registration/resolution is non-deterministic // by nature. Not holding the lock here means we allow multiple threads to // resolve imports at the same time. - int32_t provider_count = iree_atomic_load_int32(&manager->provider_count, - iree_memory_order_acquire); + int32_t provider_count = + iree_atomic_load(&manager->provider_count, iree_memory_order_acquire); // Scan in reverse registration order so that more recently registered // providers get queried first. try_resolve will populate any function diff --git a/runtime/src/iree/hal/utils/deferred_work_queue.c b/runtime/src/iree/hal/utils/deferred_work_queue.c index b4b2285c972f..e41fe3523778 100644 --- a/runtime/src/iree/hal/utils/deferred_work_queue.c +++ b/runtime/src/iree/hal/utils/deferred_work_queue.c @@ -393,9 +393,9 @@ static void iree_hal_deferred_work_queue_working_area_initialize( iree_notification_initialize(&working_area->state_notification); iree_hal_deferred_work_queue_ready_action_list_deinitialize( &working_area->ready_worklist, host_allocator); - iree_atomic_store_int32(&working_area->worker_state, - IREE_HAL_WORKER_STATE_IDLE_WAITING, - iree_memory_order_release); + iree_atomic_store(&working_area->worker_state, + IREE_HAL_WORKER_STATE_IDLE_WAITING, + iree_memory_order_release); } static void iree_hal_deferred_work_queue_working_area_deinitialize( @@ -413,9 +413,9 @@ static void iree_hal_deferred_work_queue_completion_area_initialize( iree_notification_initialize(&completion_area->state_notification); iree_hal_deferred_work_queue_completion_list_initialize( &completion_area->completion_list); - iree_atomic_store_int32(&completion_area->worker_state, - IREE_HAL_WORKER_STATE_IDLE_WAITING, - iree_memory_order_release); + iree_atomic_store(&completion_area->worker_state, + IREE_HAL_WORKER_STATE_IDLE_WAITING, + iree_memory_order_release); } static void iree_hal_deferred_work_queue_completion_area_deinitialize( @@ -557,17 +557,17 @@ static iree_hal_deferred_work_queue_t* iree_hal_deferred_work_queue_cast( static void iree_hal_deferred_work_queue_notify_worker_thread( iree_hal_deferred_work_queue_working_area_t* working_area) { - iree_atomic_store_int32(&working_area->worker_state, - IREE_HAL_WORKER_STATE_WORKLOAD_PENDING, - iree_memory_order_release); + iree_atomic_store(&working_area->worker_state, + IREE_HAL_WORKER_STATE_WORKLOAD_PENDING, + iree_memory_order_release); iree_notification_post(&working_area->state_notification, IREE_ALL_WAITERS); } static void iree_hal_deferred_work_queue_notify_completion_thread( iree_hal_deferred_work_queue_completion_area_t* completion_area) { - iree_atomic_store_int32(&completion_area->worker_state, - IREE_HAL_WORKER_STATE_WORKLOAD_PENDING, - iree_memory_order_release); + iree_atomic_store(&completion_area->worker_state, + IREE_HAL_WORKER_STATE_WORKLOAD_PENDING, + iree_memory_order_release); iree_notification_post(&completion_area->state_notification, IREE_ALL_WAITERS); } @@ -1236,14 +1236,14 @@ iree_status_t iree_hal_deferred_work_queue_issue( static bool iree_hal_deferred_work_queue_worker_has_incoming_request( iree_hal_deferred_work_queue_working_area_t* working_area) { - iree_hal_deferred_work_queue_worker_state_t value = iree_atomic_load_int32( - &working_area->worker_state, iree_memory_order_acquire); + iree_hal_deferred_work_queue_worker_state_t value = + iree_atomic_load(&working_area->worker_state, iree_memory_order_acquire); return value == IREE_HAL_WORKER_STATE_WORKLOAD_PENDING; } static bool iree_hal_deferred_work_queue_completion_has_incoming_request( iree_hal_deferred_work_queue_completion_area_t* completion_area) { - iree_hal_deferred_work_queue_worker_state_t value = iree_atomic_load_int32( + iree_hal_deferred_work_queue_worker_state_t value = iree_atomic_load( &completion_area->worker_state, iree_memory_order_acquire); return value == IREE_HAL_WORKER_STATE_WORKLOAD_PENDING; } @@ -1369,9 +1369,9 @@ static int iree_hal_deferred_work_queue_completion_execute( // sure that we don't accidentally ignore new workload pushed after done // ready list processing but before overwriting the state from this worker // thread. - iree_atomic_store_int32(&completion_area->worker_state, - IREE_HAL_WORKER_STATE_IDLE_WAITING, - iree_memory_order_release); + iree_atomic_store(&completion_area->worker_state, + IREE_HAL_WORKER_STATE_IDLE_WAITING, + iree_memory_order_release); iree_hal_deferred_work_queue_worker_process_completion(actions); iree_slim_mutex_lock(&actions->action_mutex); @@ -1424,9 +1424,9 @@ static int iree_hal_deferred_work_queue_worker_execute( // sure that we don't accidentally ignore new workload pushed after done // ready list processing but before overwriting the state from this worker // thread. - iree_atomic_store_int32(&working_area->worker_state, - IREE_HAL_WORKER_STATE_IDLE_WAITING, - iree_memory_order_release); + iree_atomic_store(&working_area->worker_state, + IREE_HAL_WORKER_STATE_IDLE_WAITING, + iree_memory_order_release); iree_hal_deferred_work_queue_worker_process_ready_list(actions); diff --git a/runtime/src/iree/hal/utils/file_transfer.c b/runtime/src/iree/hal/utils/file_transfer.c index cee1df6ebe2c..2bc8decf2f9a 100644 --- a/runtime/src/iree/hal/utils/file_transfer.c +++ b/runtime/src/iree/hal/utils/file_transfer.c @@ -242,8 +242,8 @@ static iree_status_t iree_hal_transfer_operation_create( // steps are part of this transfer. IREE_TRACE({ static iree_atomic_int32_t next_trace_id = IREE_ATOMIC_VAR_INIT(0); - operation->trace_id = iree_atomic_fetch_add_int32( - &next_trace_id, 1, iree_memory_order_seq_cst); + operation->trace_id = + iree_atomic_fetch_add(&next_trace_id, 1, iree_memory_order_seq_cst); IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, operation->trace_id); }); diff --git a/runtime/src/iree/task/affinity_set.h b/runtime/src/iree/task/affinity_set.h index 3dbf756d7519..dfe6a7a5293e 100644 --- a/runtime/src/iree/task/affinity_set.h +++ b/runtime/src/iree/task/affinity_set.h @@ -61,25 +61,25 @@ typedef iree_atomic_int64_t iree_atomic_task_affinity_set_t; static inline iree_task_affinity_set_t iree_atomic_task_affinity_set_load( iree_atomic_task_affinity_set_t* set, iree_memory_order_t order) { - return iree_atomic_load_int64(set, order); + return iree_atomic_load(set, order); } static inline void iree_atomic_task_affinity_set_store( iree_atomic_task_affinity_set_t* set, iree_task_affinity_set_t value, iree_memory_order_t order) { - iree_atomic_store_int64(set, value, order); + iree_atomic_store(set, value, order); } static inline iree_task_affinity_set_t iree_atomic_task_affinity_set_fetch_and( iree_atomic_task_affinity_set_t* set, iree_task_affinity_set_t value, iree_memory_order_t order) { - return iree_atomic_fetch_and_int64(set, value, order); + return iree_atomic_fetch_and(set, value, order); } static inline iree_task_affinity_set_t iree_atomic_task_affinity_set_fetch_or( iree_atomic_task_affinity_set_t* set, iree_task_affinity_set_t value, iree_memory_order_t order) { - return iree_atomic_fetch_or_int64(set, value, order); + return iree_atomic_fetch_or(set, value, order); } #ifdef __cplusplus diff --git a/runtime/src/iree/task/executor.c b/runtime/src/iree/task/executor.c index ff3280aaf4d2..6fc98e279e4c 100644 --- a/runtime/src/iree/task/executor.c +++ b/runtime/src/iree/task/executor.c @@ -103,10 +103,9 @@ iree_status_t iree_task_executor_create(iree_task_executor_options_t options, IREE_TRACE({ static iree_atomic_int32_t executor_id = IREE_ATOMIC_VAR_INIT(0); char trace_name[32]; - int trace_name_length = - snprintf(trace_name, sizeof(trace_name), "iree-executor-%d", - iree_atomic_fetch_add_int32(&executor_id, 1, - iree_memory_order_seq_cst)); + int trace_name_length = snprintf( + trace_name, sizeof(trace_name), "iree-executor-%d", + iree_atomic_fetch_add(&executor_id, 1, iree_memory_order_seq_cst)); IREE_LEAK_CHECK_DISABLE_PUSH(); executor->trace_name = malloc(trace_name_length + 1); memcpy((void*)executor->trace_name, trace_name, trace_name_length + 1); @@ -540,8 +539,7 @@ static iree_task_t* iree_task_executor_try_steal_task_from_affinity_set( worker_index += offset + 1; mask = iree_shr(mask, offset + 1); iree_task_worker_t* victim_worker = &executor->workers[victim_index]; - if (iree_atomic_load_int32(&victim_worker->state, - iree_memory_order_acquire) != + if (iree_atomic_load(&victim_worker->state, iree_memory_order_acquire) != IREE_TASK_WORKER_STATE_RUNNING) { return NULL; } diff --git a/runtime/src/iree/task/executor_demo.cc b/runtime/src/iree/task/executor_demo.cc index 63dba4ce0192..972d16b114a7 100644 --- a/runtime/src/iree/task/executor_demo.cc +++ b/runtime/src/iree/task/executor_demo.cc @@ -89,8 +89,8 @@ extern "C" int main(int argc, char* argv[]) { IREE_TRACE_SCOPE_NAMED("tile0"); IREE_ASSERT_EQ(0, user_context); simulate_work(tile_context); - iree_atomic_fetch_add_int32(&tile_context->statistics->reserved, 1, - iree_memory_order_relaxed); + iree_atomic_fetch_add(&tile_context->statistics->reserved, 1, + iree_memory_order_relaxed); return iree_ok_status(); }, 0), @@ -107,8 +107,8 @@ extern "C" int main(int argc, char* argv[]) { IREE_TRACE_SCOPE_NAMED("tile1"); IREE_ASSERT_EQ(0, user_context); simulate_work(tile_context); - iree_atomic_fetch_add_int32(&tile_context->statistics->reserved, 1, - iree_memory_order_relaxed); + iree_atomic_fetch_add(&tile_context->statistics->reserved, 1, + iree_memory_order_relaxed); return iree_ok_status(); }, 0), diff --git a/runtime/src/iree/task/poller.c b/runtime/src/iree/task/poller.c index e314379dc3be..e04aa3bcf162 100644 --- a/runtime/src/iree/task/poller.c +++ b/runtime/src/iree/task/poller.c @@ -32,8 +32,8 @@ iree_status_t iree_task_poller_initialize( // thread as it performs the initial resume of the wait thread. We'll need to // check in enqueue to see if the wait thread needs to be resumed. // initial_state = IREE_TASK_POLLER_STATE_SUSPENDED; - iree_atomic_store_int32(&out_poller->state, initial_state, - iree_memory_order_release); + iree_atomic_store(&out_poller->state, initial_state, + iree_memory_order_release); // Acquire an event we can use to wake the wait thread from other threads. iree_status_t status = iree_event_pool_acquire( @@ -83,7 +83,7 @@ void iree_task_poller_request_exit(iree_task_poller_t* poller) { // If the thread is already in the exiting/zombie state we don't need to do // anything. iree_task_poller_state_t prev_state = - (iree_task_poller_state_t)iree_atomic_exchange_int32( + (iree_task_poller_state_t)iree_atomic_exchange( &poller->state, IREE_TASK_POLLER_STATE_EXITING, iree_memory_order_acq_rel); switch (prev_state) { @@ -93,8 +93,8 @@ void iree_task_poller_request_exit(iree_task_poller_t* poller) { break; case IREE_TASK_POLLER_STATE_ZOMBIE: // Poller already exited; reset state to ZOMBIE. - iree_atomic_store_int32(&poller->state, IREE_TASK_POLLER_STATE_ZOMBIE, - iree_memory_order_release); + iree_atomic_store(&poller->state, IREE_TASK_POLLER_STATE_ZOMBIE, + iree_memory_order_release); break; default: // Poller now set to EXITING and should exit soon. @@ -111,7 +111,7 @@ void iree_task_poller_request_exit(iree_task_poller_t* poller) { // Returns true if the wait thread is in the zombie state (exited and awaiting // teardown). static bool iree_task_poller_is_zombie(iree_task_poller_t* poller) { - return iree_atomic_load_int32(&poller->state, iree_memory_order_acquire) == + return iree_atomic_load(&poller->state, iree_memory_order_acquire) == IREE_TASK_POLLER_STATE_ZOMBIE; } @@ -240,8 +240,8 @@ static iree_task_poller_prepare_result_t iree_task_poller_prepare_task( // scan of tasks. wait_status_code = IREE_STATUS_OK; } else if (task->cancellation_flag != NULL && - iree_atomic_load_int32(task->cancellation_flag, - iree_memory_order_acquire) != 0) { + iree_atomic_load(task->cancellation_flag, + iree_memory_order_acquire) != 0) { // Task was cancelled by the user (or a wait-any). These retire without // failure and it's up to the user to handle what happens to them. wait_status_code = IREE_STATUS_CANCELLED; @@ -313,8 +313,8 @@ static iree_task_poller_prepare_result_t iree_task_poller_prepare_task( // If this was part of a wait-any operation then set the cancellation flag // such that other waits are cancelled. if (iree_any_bit_set(task->header.flags, IREE_TASK_FLAG_WAIT_ANY)) { - if (iree_atomic_fetch_add_int32(task->cancellation_flag, 1, - iree_memory_order_release) == 0) { + if (iree_atomic_fetch_add(task->cancellation_flag, 1, + iree_memory_order_release) == 0) { // Ensure we scan again to clean up any potentially cancelled tasks. // If this was task 4 in a wait-any list then tasks 0-3 need to be // retired. @@ -429,7 +429,7 @@ static void iree_task_poller_wake_task(iree_task_poller_t* poller, // wait handles were resolved. static void iree_task_poller_commit_wait(iree_task_poller_t* poller, iree_time_t deadline_ns) { - if (iree_atomic_load_int32(&poller->state, iree_memory_order_acquire) == + if (iree_atomic_load(&poller->state, iree_memory_order_acquire) == IREE_TASK_POLLER_STATE_EXITING) { // Thread exit requested - don't block shutdown. return; @@ -486,7 +486,7 @@ static void iree_task_poller_commit_wait(iree_task_poller_t* poller, static void iree_task_poller_pump_until_exit(iree_task_poller_t* poller) { while (true) { // Check state to see if we've been asked to exit. - if (iree_atomic_load_int32(&poller->state, iree_memory_order_acquire) == + if (iree_atomic_load(&poller->state, iree_memory_order_acquire) == IREE_TASK_POLLER_STATE_EXITING) { // Thread exit requested - cancel pumping. break; @@ -536,8 +536,8 @@ static int iree_task_poller_main(iree_task_poller_t* poller) { // to exit while suspended/still starting up, so check that here before we // mess with any data structures. const bool should_run = - iree_atomic_exchange_int32(&poller->state, IREE_TASK_POLLER_STATE_RUNNING, - iree_memory_order_acq_rel) != + iree_atomic_exchange(&poller->state, IREE_TASK_POLLER_STATE_RUNNING, + iree_memory_order_acq_rel) != IREE_TASK_POLLER_STATE_EXITING; if (IREE_LIKELY(should_run)) { // << work happens here >> @@ -545,8 +545,8 @@ static int iree_task_poller_main(iree_task_poller_t* poller) { } IREE_TRACE_ZONE_END(thread_zone); - iree_atomic_store_int32(&poller->state, IREE_TASK_POLLER_STATE_ZOMBIE, - iree_memory_order_release); + iree_atomic_store(&poller->state, IREE_TASK_POLLER_STATE_ZOMBIE, + iree_memory_order_release); iree_notification_post(&poller->state_notification, IREE_ALL_WAITERS); return 0; } diff --git a/runtime/src/iree/task/scope.c b/runtime/src/iree/task/scope.c index 3ccf6ae5dfea..a777d3dc6067 100644 --- a/runtime/src/iree/task/scope.c +++ b/runtime/src/iree/task/scope.c @@ -49,12 +49,12 @@ void iree_task_scope_deinitialize(iree_task_scope_t* scope) { memset(scope->name, 0xCD, sizeof(scope->name)); // In most cases the status will have been consumed by the scope owner. - iree_status_t status = (iree_status_t)iree_atomic_exchange_intptr( + iree_status_t status = (iree_status_t)iree_atomic_exchange( &scope->permanent_status, (intptr_t)NULL, iree_memory_order_acquire); IREE_IGNORE_ERROR(status); - while (iree_atomic_load_int32(&scope->pending_idle_notification_posts, - iree_memory_order_acquire)) { + while (iree_atomic_load(&scope->pending_idle_notification_posts, + iree_memory_order_acquire)) { iree_thread_yield(); } iree_notification_deinitialize(&scope->idle_notification); @@ -74,14 +74,14 @@ iree_task_dispatch_statistics_t iree_task_scope_consume_statistics( } bool iree_task_scope_has_failed(iree_task_scope_t* scope) { - return iree_atomic_load_intptr(&scope->permanent_status, - iree_memory_order_acquire) != 0; + return iree_atomic_load(&scope->permanent_status, + iree_memory_order_acquire) != 0; } iree_status_t iree_task_scope_consume_status(iree_task_scope_t* scope) { iree_status_t old_status = iree_ok_status(); iree_status_t new_status = iree_ok_status(); - while (!iree_atomic_compare_exchange_strong_intptr( + while (!iree_atomic_compare_exchange_strong( &scope->permanent_status, (intptr_t*)&old_status, (intptr_t)new_status, iree_memory_order_acq_rel, iree_memory_order_acquire /* old_status is actually used */)) { @@ -114,7 +114,7 @@ static void iree_task_scope_try_set_status(iree_task_scope_t* scope, } iree_status_t old_status = iree_ok_status(); - if (!iree_atomic_compare_exchange_strong_intptr( + if (!iree_atomic_compare_exchange_strong( &scope->permanent_status, (intptr_t*)&old_status, (intptr_t)new_status, iree_memory_order_acq_rel, iree_memory_order_relaxed /* old_status is unused */)) { @@ -140,16 +140,16 @@ void iree_task_scope_begin(iree_task_scope_t* scope) { // relaxed because this 'begin' call will be paired with a 'end' call that // will perform the release-store, and this value is only read by // 'deinitialize'. - iree_atomic_store_int32(&scope->pending_idle_notification_posts, 1, - iree_memory_order_relaxed); + iree_atomic_store(&scope->pending_idle_notification_posts, 1, + iree_memory_order_relaxed); } void iree_task_scope_end(iree_task_scope_t* scope) { if (iree_atomic_ref_count_dec(&scope->pending_submissions) == 1) { // All submissions have completed in this scope - notify any waiters. iree_notification_post(&scope->idle_notification, IREE_ALL_WAITERS); - iree_atomic_store_int32(&scope->pending_idle_notification_posts, 0, - iree_memory_order_release); + iree_atomic_store(&scope->pending_idle_notification_posts, 0, + iree_memory_order_release); } } diff --git a/runtime/src/iree/task/task.c b/runtime/src/iree/task/task.c index ae4fbf99d5b3..d0e40103e814 100644 --- a/runtime/src/iree/task/task.c +++ b/runtime/src/iree/task/task.c @@ -39,13 +39,13 @@ void iree_task_set_completion_task(iree_task_t* task, iree_task_t* completion_task) { IREE_ASSERT(!task->completion_task); task->completion_task = completion_task; - iree_atomic_fetch_add_int32(&completion_task->pending_dependency_count, 1, - iree_memory_order_acq_rel); + iree_atomic_fetch_add(&completion_task->pending_dependency_count, 1, + iree_memory_order_acq_rel); } bool iree_task_is_ready(iree_task_t* task) { - if (iree_atomic_load_int32(&task->pending_dependency_count, - iree_memory_order_acquire) > 0) { + if (iree_atomic_load(&task->pending_dependency_count, + iree_memory_order_acquire) > 0) { // At least one dependency is still pending. return false; } @@ -62,7 +62,7 @@ static void iree_task_try_set_status(iree_atomic_intptr_t* permanent_status, z0, iree_status_code_string(iree_status_code(new_status))); iree_status_t old_status = iree_ok_status(); - if (!iree_atomic_compare_exchange_strong_intptr( + if (!iree_atomic_compare_exchange_strong( permanent_status, (intptr_t*)&old_status, (intptr_t)new_status, iree_memory_order_acq_rel, iree_memory_order_relaxed /* old_status is unused */)) { @@ -102,16 +102,15 @@ void iree_task_discard(iree_task_t* task, iree_task_list_t* discard_worklist) { // tasks in the appropriate order: if we had a DAG of A -> B, C -> D we must // discard respecting the same topological ordering. - IREE_ASSERT_EQ(0, iree_atomic_load_int32(&task->pending_dependency_count, - iree_memory_order_acquire)); + IREE_ASSERT_EQ(0, iree_atomic_load(&task->pending_dependency_count, + iree_memory_order_acquire)); // Almost all tasks will have a completion task; some may have additional // dependent tasks (like barriers) that will be handled below. const bool completion_task_ready = task->completion_task && - iree_atomic_fetch_sub_int32( - &task->completion_task->pending_dependency_count, 1, - iree_memory_order_acq_rel) == 1; + iree_atomic_fetch_sub(&task->completion_task->pending_dependency_count, 1, + iree_memory_order_acq_rel) == 1; if (completion_task_ready) { iree_task_list_push_back(discard_worklist, task->completion_task); } @@ -147,8 +146,8 @@ void iree_task_discard(iree_task_t* task, iree_task_list_t* discard_worklist) { static void iree_task_retire(iree_task_t* task, iree_task_submission_t* pending_submission, iree_status_t status) { - IREE_ASSERT_EQ(0, iree_atomic_load_int32(&task->pending_dependency_count, - iree_memory_order_acquire)); + IREE_ASSERT_EQ(0, iree_atomic_load(&task->pending_dependency_count, + iree_memory_order_acquire)); // Decrement the pending count on the completion task, if any. iree_task_t* completion_task = task->completion_task; @@ -159,8 +158,8 @@ static void iree_task_retire(iree_task_t* task, iree_task_cleanup(task, IREE_STATUS_OK); bool completion_task_ready = completion_task && - iree_atomic_fetch_sub_int32(&completion_task->pending_dependency_count, - 1, iree_memory_order_acq_rel) == 1; + iree_atomic_fetch_sub(&completion_task->pending_dependency_count, 1, + iree_memory_order_acq_rel) == 1; if (completion_task_ready) { // This was the last pending dependency and the completion task is ready // to run. @@ -180,8 +179,8 @@ static void iree_task_retire(iree_task_t* task, bool completion_task_ready = completion_task && - iree_atomic_fetch_sub_int32(&completion_task->pending_dependency_count, - 1, iree_memory_order_acq_rel) == 1; + iree_atomic_fetch_sub(&completion_task->pending_dependency_count, 1, + iree_memory_order_acq_rel) == 1; if (completion_task_ready) { // This was the last pending dependency and we know that we can safely // abort the completion task by discarding. @@ -239,7 +238,7 @@ void iree_task_call_initialize(iree_task_scope_t* scope, iree_task_call_t* out_task) { iree_task_initialize(IREE_TASK_TYPE_CALL, scope, &out_task->header); out_task->closure = closure; - iree_atomic_store_intptr(&out_task->status, 0, iree_memory_order_release); + iree_atomic_store(&out_task->status, 0, iree_memory_order_release); } void iree_task_call_execute(iree_task_call_t* task, @@ -272,9 +271,9 @@ void iree_task_call_execute(iree_task_call_t* task, // Check to see if there are no pending dependencies before retiring; the // dependency count can go up if new nested tasks were enqueued. - if (iree_atomic_load_int32(&task->header.pending_dependency_count, - iree_memory_order_acquire) == 0) { - iree_status_t status = (iree_status_t)iree_atomic_exchange_intptr( + if (iree_atomic_load(&task->header.pending_dependency_count, + iree_memory_order_acquire) == 0) { + iree_status_t status = (iree_status_t)iree_atomic_exchange( &task->status, 0, iree_memory_order_acq_rel); iree_task_retire(&task->header, pending_submission, status); } @@ -295,8 +294,8 @@ void iree_task_barrier_initialize(iree_task_scope_t* scope, out_task->dependent_tasks = dependent_tasks; for (iree_host_size_t i = 0; i < out_task->dependent_task_count; ++i) { iree_task_t* dependent_task = out_task->dependent_tasks[i]; - iree_atomic_fetch_add_int32(&dependent_task->pending_dependency_count, 1, - iree_memory_order_acq_rel); + iree_atomic_fetch_add(&dependent_task->pending_dependency_count, 1, + iree_memory_order_acq_rel); } } @@ -314,8 +313,8 @@ void iree_task_barrier_set_dependent_tasks( task->dependent_tasks = dependent_tasks; for (iree_host_size_t i = 0; i < task->dependent_task_count; ++i) { iree_task_t* dependent_task = task->dependent_tasks[i]; - iree_atomic_fetch_add_int32(&dependent_task->pending_dependency_count, 1, - iree_memory_order_acq_rel); + iree_atomic_fetch_add(&dependent_task->pending_dependency_count, 1, + iree_memory_order_acq_rel); } } @@ -329,8 +328,8 @@ static void iree_task_barrier_discard(iree_task_barrier_t* task, for (iree_host_size_t i = 0; i < task->dependent_task_count; ++i) { iree_task_t* dependent_task = task->dependent_tasks[i]; const bool dependent_task_ready = - iree_atomic_fetch_sub_int32(&dependent_task->pending_dependency_count, - 1, iree_memory_order_acq_rel) == 1; + iree_atomic_fetch_sub(&dependent_task->pending_dependency_count, 1, + iree_memory_order_acq_rel) == 1; if (dependent_task_ready) { // The dependent task has retired and can now be discard. iree_task_list_push_back(discard_worklist, dependent_task); @@ -348,8 +347,8 @@ void iree_task_barrier_retire(iree_task_barrier_t* task, for (iree_host_size_t i = 0; i < task->dependent_task_count; ++i) { iree_task_t* dependent_task = task->dependent_tasks[task->dependent_task_count - i - 1]; - if (iree_atomic_fetch_sub_int32(&dependent_task->pending_dependency_count, - 1, iree_memory_order_acq_rel) == 1) { + if (iree_atomic_fetch_sub(&dependent_task->pending_dependency_count, 1, + iree_memory_order_acq_rel) == 1) { // The dependent task has retired and can now be made ready. iree_task_submission_enqueue(pending_submission, dependent_task); } @@ -530,13 +529,13 @@ static void iree_task_dispatch_initialize_base( memcpy(out_task->workgroup_size, workgroup_size, sizeof(out_task->workgroup_size)); out_task->local_memory_size = 0; - iree_atomic_store_intptr(&out_task->status, 0, iree_memory_order_release); + iree_atomic_store(&out_task->status, 0, iree_memory_order_release); memset(&out_task->statistics, 0, sizeof(out_task->statistics)); IREE_TRACE({ static iree_atomic_int64_t next_dispatch_id = IREE_ATOMIC_VAR_INIT(0); - out_task->dispatch_id = iree_atomic_fetch_add_int64( - &next_dispatch_id, 1ll, iree_memory_order_acq_rel); + out_task->dispatch_id = iree_atomic_fetch_add(&next_dispatch_id, 1ll, + iree_memory_order_acq_rel); }); } @@ -597,8 +596,7 @@ void iree_task_dispatch_issue(iree_task_dispatch_t* dispatch_task, #endif // IREE_HAL_VERBOSE_TRACING_ENABLE // Setup the iteration space for shards to pull work from the complete grid. - iree_atomic_store_int32(&dispatch_task->tile_index, 0, - iree_memory_order_relaxed); + iree_atomic_store(&dispatch_task->tile_index, 0, iree_memory_order_relaxed); dispatch_task->tile_count = workgroup_count[0] * workgroup_count[1] * workgroup_count[2]; @@ -672,7 +670,7 @@ void iree_task_dispatch_retire(iree_task_dispatch_t* dispatch_task, // any other has hit an error; failure in a dispatch should be so exceedingly // rare that allowing some shards to complete after one encounters an error is // not a problem. - iree_status_t status = (iree_status_t)iree_atomic_exchange_intptr( + iree_status_t status = (iree_status_t)iree_atomic_exchange( &dispatch_task->status, 0, iree_memory_order_acq_rel); iree_task_retire(&dispatch_task->header, pending_submission, status); @@ -763,9 +761,9 @@ void iree_task_dispatch_shard_execute( const uint32_t tiles_per_reservation = dispatch_task->tiles_per_reservation; // relaxed order because we only care about atomic increments, not about // ordering of tile_index accesses w.r.t. other memory accesses. - uint32_t tile_base = iree_atomic_fetch_add_int32(&dispatch_task->tile_index, - tiles_per_reservation, - iree_memory_order_relaxed); + uint32_t tile_base = + iree_atomic_fetch_add(&dispatch_task->tile_index, tiles_per_reservation, + iree_memory_order_relaxed); while (tile_base < tile_count) { const uint32_t tile_range = iree_min(tile_base + tiles_per_reservation, tile_count); @@ -813,9 +811,9 @@ void iree_task_dispatch_shard_execute( } // Try to grab the next slice of tiles. - tile_base = iree_atomic_fetch_add_int32(&dispatch_task->tile_index, - tiles_per_reservation, - iree_memory_order_relaxed); + tile_base = + iree_atomic_fetch_add(&dispatch_task->tile_index, tiles_per_reservation, + iree_memory_order_relaxed); } abort_shard: diff --git a/runtime/src/iree/task/task_test_dispatch.cc b/runtime/src/iree/task/task_test_dispatch.cc index 3324b6cc464e..b18c26e790ec 100644 --- a/runtime/src/iree/task/task_test_dispatch.cc +++ b/runtime/src/iree/task/task_test_dispatch.cc @@ -35,8 +35,7 @@ class GridCoverage { bool Verify() { fflush(stdout); for (iree_host_size_t i = 0; i < workgroup_count_; ++i) { - if (iree_atomic_load_int32(&storage_[i], iree_memory_order_seq_cst) != - 1) { + if (iree_atomic_load(&storage_[i], iree_memory_order_seq_cst) != 1) { return false; } } @@ -52,8 +51,8 @@ class GridCoverage { tile_context->workgroup_count[0]) + tile_context->workgroup_xyz[1] * tile_context->workgroup_count[0] + tile_context->workgroup_xyz[0]; - iree_atomic_fetch_add_int32(&coverage->storage_[slot], 1, - iree_memory_order_seq_cst); + iree_atomic_fetch_add(&coverage->storage_[slot], 1, + iree_memory_order_seq_cst); // Useful when testing large grids: // printf("%u, %u, %u\n", tile_context->workgroup_xyz[0], diff --git a/runtime/src/iree/task/worker.c b/runtime/src/iree/task/worker.c index 5bebaa50fc09..e0e1efd82085 100644 --- a/runtime/src/iree/task/worker.c +++ b/runtime/src/iree/task/worker.c @@ -48,8 +48,8 @@ iree_status_t iree_task_worker_initialize( iree_task_queue_initialize(&out_worker->local_task_queue); iree_task_worker_state_t initial_state = IREE_TASK_WORKER_STATE_RUNNING; - iree_atomic_store_int32(&out_worker->state, initial_state, - iree_memory_order_release); + iree_atomic_store(&out_worker->state, initial_state, + iree_memory_order_release); iree_thread_create_params_t thread_params; memset(&thread_params, 0, sizeof(thread_params)); @@ -78,14 +78,14 @@ void iree_task_worker_request_exit(iree_task_worker_t* worker) { // If the thread is already in the exiting/zombie state we don't need to do // anything. iree_task_worker_state_t prev_state = - (iree_task_worker_state_t)iree_atomic_exchange_int32( + (iree_task_worker_state_t)iree_atomic_exchange( &worker->state, IREE_TASK_WORKER_STATE_EXITING, iree_memory_order_acq_rel); switch (prev_state) { case IREE_TASK_WORKER_STATE_ZOMBIE: // Worker already exited; reset state to ZOMBIE. - iree_atomic_store_int32(&worker->state, IREE_TASK_WORKER_STATE_ZOMBIE, - iree_memory_order_release); + iree_atomic_store(&worker->state, IREE_TASK_WORKER_STATE_ZOMBIE, + iree_memory_order_release); break; default: // Worker now set to EXITING and should exit soon. @@ -101,7 +101,7 @@ void iree_task_worker_request_exit(iree_task_worker_t* worker) { // Returns true if the worker is in the zombie state (exited and awaiting // teardown). static bool iree_task_worker_is_zombie(iree_task_worker_t* worker) { - return iree_atomic_load_int32(&worker->state, iree_memory_order_acquire) == + return iree_atomic_load(&worker->state, iree_memory_order_acquire) == IREE_TASK_WORKER_STATE_ZOMBIE; } @@ -310,7 +310,7 @@ static void iree_task_worker_pump_until_exit(iree_task_worker_t* worker) { iree_task_worker_mark_active(worker); // Check state to see if we've been asked to exit. - if (iree_atomic_load_int32(&worker->state, iree_memory_order_acquire) == + if (iree_atomic_load(&worker->state, iree_memory_order_acquire) == IREE_TASK_WORKER_STATE_EXITING) { // Thread exit requested - cancel pumping. iree_notification_cancel_wait(&worker->wake_notification); @@ -395,8 +395,8 @@ static int iree_task_worker_main(iree_task_worker_t* worker) { // to exit while suspended/still starting up, so check that here before we // mess with any data structures. const bool should_run = - iree_atomic_exchange_int32(&worker->state, IREE_TASK_WORKER_STATE_RUNNING, - iree_memory_order_acq_rel) != + iree_atomic_exchange(&worker->state, IREE_TASK_WORKER_STATE_RUNNING, + iree_memory_order_acq_rel) != IREE_TASK_WORKER_STATE_EXITING; if (IREE_LIKELY(should_run)) { // << work happens here >> @@ -407,8 +407,8 @@ static int iree_task_worker_main(iree_task_worker_t* worker) { iree_task_worker_mark_idle(worker); IREE_TRACE_ZONE_END(thread_zone); - iree_atomic_store_int32(&worker->state, IREE_TASK_WORKER_STATE_ZOMBIE, - iree_memory_order_release); + iree_atomic_store(&worker->state, IREE_TASK_WORKER_STATE_ZOMBIE, + iree_memory_order_release); iree_notification_post(&worker->state_notification, IREE_ALL_WAITERS); return 0; } diff --git a/runtime/src/iree/vm/context.c b/runtime/src/iree/vm/context.c index d55e67fb99f3..3a1fc239e999 100644 --- a/runtime/src/iree/vm/context.c +++ b/runtime/src/iree/vm/context.c @@ -51,8 +51,8 @@ static iree_vm_context_id_t iree_vm_context_allocate_id(void) { static iree_atomic_int32_t next_context_id = IREE_ATOMIC_VAR_INIT(1); // relaxed because we only care about atomic increments, not ordering w.r.t. // other memory accesses. - uint32_t context_id = iree_atomic_fetch_add_int32(&next_context_id, 1, - iree_memory_order_relaxed); + uint32_t context_id = + iree_atomic_fetch_add(&next_context_id, 1, iree_memory_order_relaxed); #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_FIBERS // This is what we pass to Tracy as the fiber name. // The string must remain live for the lifetime of the process. diff --git a/runtime/src/iree/vm/invocation.c b/runtime/src/iree/vm/invocation.c index 2ba5bab75ab3..d3fe20ac0f12 100644 --- a/runtime/src/iree/vm/invocation.c +++ b/runtime/src/iree/vm/invocation.c @@ -226,8 +226,8 @@ static iree_vm_invocation_id_t iree_vm_invoke_allocate_id( // The string must remain live for the lifetime of the process. // TODO(benvanik): name it based on the function? static iree_atomic_int32_t next_invocation_id = IREE_ATOMIC_VAR_INIT(1); - uint32_t invocation_id = iree_atomic_fetch_add_int32( - &next_invocation_id, 1, iree_memory_order_relaxed); + uint32_t invocation_id = iree_atomic_fetch_add(&next_invocation_id, 1, + iree_memory_order_relaxed); IREE_LEAK_CHECK_DISABLE_PUSH(); char* name = (char*)malloc(32); snprintf(name, 32, "invoke-%04d", invocation_id - 1); diff --git a/runtime/src/iree/vm/ref.c b/runtime/src/iree/vm/ref.c index 3d5f2552b585..fe3313620075 100644 --- a/runtime/src/iree/vm/ref.c +++ b/runtime/src/iree/vm/ref.c @@ -12,15 +12,15 @@ // Useful debugging tool: #if 0 -static inline volatile iree_atomic_ref_count_t* iree_vm_get_raw_counter_ptr( +static inline iree_atomic_ref_count_t* iree_vm_get_raw_counter_ptr( void* ptr, iree_vm_ref_type_t type); -static inline volatile iree_atomic_ref_count_t* iree_vm_get_ref_counter_ptr( +static inline iree_atomic_ref_count_t* iree_vm_get_ref_counter_ptr( iree_vm_ref_t* ref); static void iree_vm_ref_trace(const char* msg, iree_vm_ref_t* ref) { if (!ref->ptr) return; - volatile iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref); + iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref); iree_string_view_t name = iree_vm_ref_type_name(ref->type); fprintf(stderr, "%s %.*s 0x%p %d\n", msg, (int)name.size, name.data, ref->ptr, iree_atomic_ref_count_load(counter)); @@ -28,7 +28,7 @@ static void iree_vm_ref_trace(const char* msg, iree_vm_ref_t* ref) { static void iree_vm_ref_ptr_trace(const char* msg, void* ptr, iree_vm_ref_type_t type) { if (!ptr) return; - volatile iree_atomic_ref_count_t* counter = + iree_atomic_ref_count_t* counter = iree_vm_get_raw_counter_ptr(ptr, type); iree_string_view_t name = iree_vm_ref_type_name(type); fprintf(stderr, "%s %.*s 0x%p %d\n", msg, (int)name.size, name.data, ptr, @@ -45,19 +45,18 @@ iree_vm_ref_type_name(iree_vm_ref_type_t type) { return iree_vm_ref_type_descriptor(type)->type_name; } -static inline volatile iree_atomic_ref_count_t* iree_vm_get_raw_counter_ptr( +static inline iree_atomic_ref_count_t* iree_vm_get_raw_counter_ptr( void* ptr, iree_vm_ref_type_t type) { IREE_VM_REF_ASSERT(ptr); IREE_VM_REF_ASSERT(type_descriptor); - return (volatile iree_atomic_ref_count_t*)ptr + - (type & IREE_VM_REF_TYPE_TAG_BIT_MASK); + return (iree_atomic_ref_count_t*)ptr + (type & IREE_VM_REF_TYPE_TAG_BIT_MASK); } -static inline volatile iree_atomic_ref_count_t* iree_vm_get_ref_counter_ptr( +static inline iree_atomic_ref_count_t* iree_vm_get_ref_counter_ptr( iree_vm_ref_t* ref) { IREE_VM_REF_ASSERT(ref); IREE_VM_REF_ASSERT(ref->ptr); - return (volatile iree_atomic_ref_count_t*)ref->ptr + + return (iree_atomic_ref_count_t*)ref->ptr + (ref->type & IREE_VM_REF_TYPE_TAG_BIT_MASK); } @@ -65,8 +64,7 @@ IREE_API_EXPORT void iree_vm_ref_object_retain(void* ptr, iree_vm_ref_type_t type) { if (!ptr) return; IREE_VM_REF_ASSERT(type); - volatile iree_atomic_ref_count_t* counter = - iree_vm_get_raw_counter_ptr(ptr, type); + iree_atomic_ref_count_t* counter = iree_vm_get_raw_counter_ptr(ptr, type); iree_atomic_ref_count_inc(counter); iree_vm_ref_ptr_trace("RETAIN", ptr, type); } @@ -76,8 +74,7 @@ IREE_API_EXPORT void iree_vm_ref_object_release(void* ptr, if (!ptr) return; IREE_VM_REF_ASSERT(type); iree_vm_ref_ptr_trace("RELEASE", ptr, type); - volatile iree_atomic_ref_count_t* counter = - iree_vm_get_raw_counter_ptr(ptr, type); + iree_atomic_ref_count_t* counter = iree_vm_get_raw_counter_ptr(ptr, type); if (iree_atomic_ref_count_dec(counter) == 1) { const iree_vm_ref_type_descriptor_t* descriptor = iree_vm_ref_type_descriptor(type); @@ -130,8 +127,7 @@ IREE_API_EXPORT iree_status_t iree_vm_ref_wrap_retain(void* ptr, out_ref->ptr = ptr; out_ref->type = type; if (out_ref->ptr) { - volatile iree_atomic_ref_count_t* counter = - iree_vm_get_ref_counter_ptr(out_ref); + iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(out_ref); iree_atomic_ref_count_inc(counter); iree_vm_ref_trace("WRAP RETAIN", out_ref); } @@ -142,8 +138,7 @@ IREE_API_EXPORT iree_status_t iree_vm_ref_wrap_retain(void* ptr, IREE_API_EXPORT void iree_vm_ref_retain_inplace(iree_vm_ref_t* ref) { IREE_VM_REF_ASSERT(ref); if (ref->ptr) { - volatile iree_atomic_ref_count_t* counter = - iree_vm_get_ref_counter_ptr(ref); + iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref); iree_atomic_ref_count_inc(counter); iree_vm_ref_trace("RETAIN", ref); } @@ -157,8 +152,7 @@ IREE_API_EXPORT void iree_vm_ref_retain(iree_vm_ref_t* ref, IREE_VM_REF_ASSERT(out_ref); iree_vm_ref_t temp_ref = *ref; if (ref->ptr) { - volatile iree_atomic_ref_count_t* counter = - iree_vm_get_ref_counter_ptr(ref); + iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref); iree_atomic_ref_count_inc(counter); iree_vm_ref_trace("RETAIN", ref); } @@ -217,7 +211,7 @@ IREE_API_EXPORT void iree_vm_ref_release(iree_vm_ref_t* ref) { if (ref->type == IREE_VM_REF_TYPE_NULL || ref->ptr == NULL) return; iree_vm_ref_trace("RELEASE", ref); - volatile iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref); + iree_atomic_ref_count_t* counter = iree_vm_get_ref_counter_ptr(ref); if (iree_atomic_ref_count_dec(counter) == 1) { const iree_vm_ref_type_descriptor_t* descriptor = iree_vm_ref_type_descriptor(ref->type); diff --git a/runtime/src/iree/vm/ref_test.cc b/runtime/src/iree/vm/ref_test.cc index 68eaa5eb5dc5..5260749b31aa 100644 --- a/runtime/src/iree/vm/ref_test.cc +++ b/runtime/src/iree/vm/ref_test.cc @@ -73,9 +73,9 @@ static iree_vm_ref_t MakeRef(InstancePtr& instance, const char* type_name) { // WARNING: this is an implementation detail and must never be relied on - it's // only here to test the expected behavior. static int32_t ReadCounter(iree_vm_ref_t* ref) { - return iree_atomic_load_int32((iree_atomic_ref_count_t*)ref->ptr + - (ref->type & IREE_VM_REF_TYPE_TAG_BIT_MASK), - iree_memory_order_seq_cst); + return iree_atomic_load((iree_atomic_ref_count_t*)ref->ptr + + (ref->type & IREE_VM_REF_TYPE_TAG_BIT_MASK), + iree_memory_order_seq_cst); } } // namespace