Skip to content

Commit

Permalink
Modernizing iree_atomic_*. (#18910)
Browse files Browse the repository at this point in the history
C11's _Generic lets us avoid the need for specifying the type in the
name and more closely match the C11 atomic syntax. This assumes that any
C compiler we have that goes down the disabled atomics path supports
_Generic (modern GCC, Clang, and MSVC all have for awhile).

This allows us to drop-in replace C11-style atomics (useful in the new
AMDGPU backend) and on MSVC will allow us to use their implementation
when it's ready (it's way better than the Interlocked solution we have
now).
  • Loading branch information
benvanik authored Oct 28, 2024
1 parent 4823dc0 commit f8b8414
Show file tree
Hide file tree
Showing 34 changed files with 794 additions and 606 deletions.
12 changes: 5 additions & 7 deletions experimental/webgpu/nop_semaphore.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ iree_status_t iree_hal_webgpu_nop_semaphore_create(
iree_hal_resource_initialize(&iree_hal_webgpu_nop_semaphore_vtable,
&semaphore->resource);
semaphore->host_allocator = host_allocator;
iree_atomic_store_int64(&semaphore->value, initial_value,
iree_memory_order_seq_cst);
iree_atomic_store(&semaphore->value, initial_value,
iree_memory_order_seq_cst);
*out_semaphore = (iree_hal_semaphore_t*)semaphore;
}

Expand All @@ -63,17 +63,15 @@ static iree_status_t iree_hal_webgpu_nop_semaphore_query(
iree_hal_semaphore_t* base_semaphore, uint64_t* out_value) {
iree_hal_webgpu_nop_semaphore_t* semaphore =
iree_hal_webgpu_nop_semaphore_cast(base_semaphore);
*out_value =
iree_atomic_load_int64(&semaphore->value, iree_memory_order_seq_cst);
*out_value = iree_atomic_load(&semaphore->value, iree_memory_order_seq_cst);
return iree_ok_status();
}

static iree_status_t iree_hal_webgpu_nop_semaphore_signal(
iree_hal_semaphore_t* base_semaphore, uint64_t new_value) {
iree_hal_webgpu_nop_semaphore_t* semaphore =
iree_hal_webgpu_nop_semaphore_cast(base_semaphore);
iree_atomic_store_int64(&semaphore->value, new_value,
iree_memory_order_seq_cst);
iree_atomic_store(&semaphore->value, new_value, iree_memory_order_seq_cst);
return iree_ok_status();
}

Expand All @@ -88,7 +86,7 @@ static iree_status_t iree_hal_webgpu_nop_semaphore_wait(
iree_hal_webgpu_nop_semaphore_t* semaphore =
iree_hal_webgpu_nop_semaphore_cast(base_semaphore);
uint64_t current_value =
iree_atomic_load_int64(&semaphore->value, iree_memory_order_seq_cst);
iree_atomic_load(&semaphore->value, iree_memory_order_seq_cst);
if (current_value < value) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
Expand Down
55 changes: 7 additions & 48 deletions runtime/src/iree/base/internal/atomics.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,47 +86,6 @@ extern "C" {

#endif // IREE_COMPILER_*

// If the compiler can automatically determine the types:
#ifdef iree_atomic_load_auto

#define iree_atomic_load_int32 iree_atomic_load_auto
#define iree_atomic_store_int32 iree_atomic_store_auto
#define iree_atomic_fetch_add_int32 iree_atomic_fetch_add_auto
#define iree_atomic_fetch_sub_int32 iree_atomic_fetch_sub_auto
#define iree_atomic_fetch_and_int32 iree_atomic_fetch_and_auto
#define iree_atomic_fetch_or_int32 iree_atomic_fetch_or_auto
#define iree_atomic_fetch_xor_int32 iree_atomic_fetch_xor_auto
#define iree_atomic_exchange_int32 iree_atomic_exchange_auto
#define iree_atomic_compare_exchange_strong_int32 \
iree_atomic_compare_exchange_strong_auto
#define iree_atomic_compare_exchange_weak_int32 \
iree_atomic_compare_exchange_weak_auto

#define iree_atomic_load_int64 iree_atomic_load_auto
#define iree_atomic_store_int64 iree_atomic_store_auto
#define iree_atomic_fetch_add_int64 iree_atomic_fetch_add_auto
#define iree_atomic_fetch_sub_int64 iree_atomic_fetch_sub_auto
#define iree_atomic_fetch_and_int64 iree_atomic_fetch_and_auto
#define iree_atomic_fetch_or_int64 iree_atomic_fetch_or_auto
#define iree_atomic_fetch_xor_int64 iree_atomic_fetch_xor_auto
#define iree_atomic_exchange_int64 iree_atomic_exchange_auto
#define iree_atomic_compare_exchange_strong_int64 \
iree_atomic_compare_exchange_strong_auto
#define iree_atomic_compare_exchange_weak_int64 \
iree_atomic_compare_exchange_weak_auto

#define iree_atomic_load_intptr iree_atomic_load_auto
#define iree_atomic_store_intptr iree_atomic_store_auto
#define iree_atomic_fetch_add_intptr iree_atomic_fetch_add_auto
#define iree_atomic_fetch_sub_intptr iree_atomic_fetch_sub_auto
#define iree_atomic_exchange_intptr iree_atomic_exchange_auto
#define iree_atomic_compare_exchange_strong_intptr \
iree_atomic_compare_exchange_strong_auto
#define iree_atomic_compare_exchange_weak_intptr \
iree_atomic_compare_exchange_weak_auto

#endif // iree_atomic_load_auto

//==============================================================================
// Reference count atomics
//==============================================================================
Expand All @@ -140,10 +99,10 @@ typedef iree_atomic_int32_t iree_atomic_ref_count_t;
// should use IREE_ATOMIC_VAR_INIT, but apparently this has to be fixed
// at call sites (where the variables are initialized in the first place).
#define iree_atomic_ref_count_init_value(count_ptr, value) \
iree_atomic_store_int32(count_ptr, value, iree_memory_order_relaxed)
iree_atomic_store((count_ptr), (value), iree_memory_order_relaxed)

#define iree_atomic_ref_count_init(count_ptr) \
iree_atomic_ref_count_init_value(count_ptr, 1)
iree_atomic_ref_count_init_value((count_ptr), 1)

// Why relaxed order:
// https://www.boost.org/doc/libs/1_57_0/doc/html/atomic/usage_examples.html#boost_atomic.usage_examples.example_reference_counters.discussion
Expand All @@ -155,9 +114,9 @@ typedef iree_atomic_int32_t iree_atomic_ref_count_t;
// value (unlike iree_atomic_ref_count_dec), so we make sure that it does not,
// which allows the implementation to use faster atomic instructions where
// available, e.g. STADD on ARMv8.1-a.
#define iree_atomic_ref_count_inc(count_ptr) \
do { \
iree_atomic_fetch_add_int32(count_ptr, 1, iree_memory_order_relaxed); \
#define iree_atomic_ref_count_inc(count_ptr) \
do { \
iree_atomic_fetch_add((count_ptr), 1, iree_memory_order_relaxed); \
} while (false)

// For now we stick to acq_rel order. TODO: should we follow Boost's advice?
Expand All @@ -169,13 +128,13 @@ typedef iree_atomic_int32_t iree_atomic_ref_count_t;
// may be a pessimization... I would like to hear a second opinion on this,
// particularly regarding how x86-centric this might be.
#define iree_atomic_ref_count_dec(count_ptr) \
iree_atomic_fetch_sub_int32(count_ptr, 1, iree_memory_order_acq_rel)
iree_atomic_fetch_sub((count_ptr), 1, iree_memory_order_acq_rel)

// memory_order_acquire order ensures that this sees decrements from
// iree_atomic_ref_count_dec. On the other hand, there is no ordering with
// iree_atomic_ref_count_inc.
#define iree_atomic_ref_count_load(count_ptr) \
iree_atomic_load_int32(count_ptr, iree_memory_order_acquire)
iree_atomic_load((count_ptr), iree_memory_order_acquire)

// Aborts the program if the given reference count value is not 1.
// This should be avoided in all situations but those where continuing execution
Expand Down
35 changes: 18 additions & 17 deletions runtime/src/iree/base/internal/atomics_clang.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,37 +33,38 @@ typedef enum iree_memory_order_e {

typedef _Atomic int32_t iree_atomic_int32_t;
typedef _Atomic int64_t iree_atomic_int64_t;
typedef _Atomic uint32_t iree_atomic_uint32_t;
typedef _Atomic uint64_t iree_atomic_uint64_t;
// TODO(#3453): check for __int128 support before using
// typedef _Atomic __int128 iree_atomic_int128_t;
typedef _Atomic intptr_t iree_atomic_intptr_t;

#define iree_atomic_load_auto(object, order) \
__c11_atomic_load((object), (order))
#define iree_atomic_store_auto(object, desired, order) \
#define iree_atomic_thread_fence(order) __c11_atomic_thread_fence(order)

#define iree_atomic_load(object, order) __c11_atomic_load((object), (order))
#define iree_atomic_store(object, desired, order) \
__c11_atomic_store((object), (desired), (order))
#define iree_atomic_fetch_add_auto(object, operand, order) \
#define iree_atomic_fetch_add(object, operand, order) \
__c11_atomic_fetch_add((object), (operand), (order))
#define iree_atomic_fetch_sub_auto(object, operand, order) \
#define iree_atomic_fetch_sub(object, operand, order) \
__c11_atomic_fetch_sub((object), (operand), (order))
#define iree_atomic_fetch_and_auto(object, operand, order) \
#define iree_atomic_fetch_and(object, operand, order) \
__c11_atomic_fetch_and((object), (operand), (order))
#define iree_atomic_fetch_or_auto(object, operand, order) \
#define iree_atomic_fetch_or(object, operand, order) \
__c11_atomic_fetch_or((object), (operand), (order))
#define iree_atomic_fetch_xor_auto(object, operand, order) \
#define iree_atomic_fetch_xor(object, operand, order) \
__c11_atomic_fetch_xor((object), (operand), (order))
#define iree_atomic_exchange_auto(object, operand, order) \
#define iree_atomic_exchange(object, operand, order) \
__c11_atomic_exchange((object), (operand), (order))
#define iree_atomic_compare_exchange_strong_auto(object, expected, desired, \
order_succ, order_fail) \
__c11_atomic_compare_exchange_strong((object), (expected), (desired), \
#define iree_atomic_compare_exchange_strong(object, expected, desired, \
order_succ, order_fail) \
__c11_atomic_compare_exchange_strong((object), (expected), (desired), \
(order_succ), (order_fail))
#define iree_atomic_compare_exchange_weak_auto(object, expected, desired, \
order_succ, order_fail) \
__c11_atomic_compare_exchange_weak((object), (expected), (desired), \
#define iree_atomic_compare_exchange_weak(object, expected, desired, \
order_succ, order_fail) \
__c11_atomic_compare_exchange_weak((object), (expected), (desired), \
(order_succ), (order_fail))

#define iree_atomic_thread_fence(order) __c11_atomic_thread_fence(order)

#ifdef __cplusplus
} // extern "C"
#endif
Expand Down
Loading

0 comments on commit f8b8414

Please sign in to comment.