diff --git a/experimental/cuda2/event_semaphore.c b/experimental/cuda2/event_semaphore.c index 47efd65e5817..0bddc8a83747 100644 --- a/experimental/cuda2/event_semaphore.c +++ b/experimental/cuda2/event_semaphore.c @@ -389,6 +389,10 @@ iree_status_t iree_hal_cuda2_event_semaphore_acquire_timepoint_device_wait( (iree_hal_cuda2_timepoint_t*)tp; if (signal_timepoint->kind == IREE_HAL_CUDA_TIMEPOINT_KIND_DEVICE_SIGNAL && signal_timepoint->base.minimum_value >= min_value) { + // We've found an existing signal timepoint to wait on; we don't need a + // standalone wait timepoint anymore. Decrease its refcount before + // overwriting it to return it back to the pool and retain the new one. + iree_hal_cuda2_event_release(wait_timepoint->timepoint.device_wait); iree_hal_cuda2_event_t* event = signal_timepoint->timepoint.device_signal; iree_hal_cuda2_event_retain(event); wait_timepoint->timepoint.device_wait = event;