diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index 19445f1333..7c3e1c92a3 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -335,7 +335,7 @@ class FileHandle { } if (sync_default_stream) { CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(nullptr)); } - KVIKIO_NVTX_FUNC_RANGE("cufileRead()", size); + KVIKIO_NVTX_SCOPED_RANGE("cufileRead()", size); ssize_t ret = cuFileAPI::instance().Read( _handle, devPtr_base, size, convert_size2off(file_offset), convert_size2off(devPtr_offset)); CUFILE_CHECK_BYTES_DONE(ret); @@ -387,7 +387,7 @@ class FileHandle { } if (sync_default_stream) { CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(nullptr)); } - KVIKIO_NVTX_FUNC_RANGE("cufileWrite()", size); + KVIKIO_NVTX_SCOPED_RANGE("cufileWrite()", size); ssize_t ret = cuFileAPI::instance().Write( _handle, devPtr_base, size, convert_size2off(file_offset), convert_size2off(devPtr_offset)); if (ret == -1) { @@ -434,6 +434,7 @@ class FileHandle { std::size_t gds_threshold = defaults::gds_threshold(), bool sync_default_stream = true) { + KVIKIO_NVTX_MARKER("FileHandle::pread()", size); if (is_host_memory(buf)) { auto op = [this](void* hostPtr_base, std::size_t size, @@ -510,6 +511,7 @@ class FileHandle { std::size_t gds_threshold = defaults::gds_threshold(), bool sync_default_stream = true) { + KVIKIO_NVTX_MARKER("FileHandle::pwrite()", size); if (is_host_memory(buf)) { auto op = [this](const void* hostPtr_base, std::size_t size, diff --git a/cpp/include/kvikio/posix_io.hpp b/cpp/include/kvikio/posix_io.hpp index 0437ef69f8..4327a301ec 100644 --- a/cpp/include/kvikio/posix_io.hpp +++ b/cpp/include/kvikio/posix_io.hpp @@ -211,7 +211,7 @@ std::size_t posix_device_io(int fd, template std::size_t posix_host_read(int fd, void* buf, std::size_t size, std::size_t file_offset) { - KVIKIO_NVTX_FUNC_RANGE("posix_host_read()", size); + KVIKIO_NVTX_SCOPED_RANGE("posix_host_read()", size); return detail::posix_host_io( fd, buf, size, convert_size2off(file_offset)); } @@ -233,7 +233,7 @@ std::size_t posix_host_read(int fd, void* buf, std::size_t size, std::size_t fil template std::size_t posix_host_write(int fd, const void* buf, std::size_t size, std::size_t file_offset) { - KVIKIO_NVTX_FUNC_RANGE("posix_host_write()", size); + KVIKIO_NVTX_SCOPED_RANGE("posix_host_write()", size); return detail::posix_host_io( fd, buf, size, convert_size2off(file_offset)); } @@ -257,7 +257,7 @@ inline std::size_t posix_device_read(int fd, std::size_t file_offset, std::size_t devPtr_offset) { - KVIKIO_NVTX_FUNC_RANGE("posix_device_read()", size); + KVIKIO_NVTX_SCOPED_RANGE("posix_device_read()", size); return detail::posix_device_io( fd, devPtr_base, size, file_offset, devPtr_offset); } @@ -281,7 +281,7 @@ inline std::size_t posix_device_write(int fd, std::size_t file_offset, std::size_t devPtr_offset) { - KVIKIO_NVTX_FUNC_RANGE("posix_device_write()", size); + KVIKIO_NVTX_SCOPED_RANGE("posix_device_write()", size); return detail::posix_device_io( fd, devPtr_base, size, file_offset, devPtr_offset); } diff --git a/cpp/include/kvikio/remote_handle.hpp b/cpp/include/kvikio/remote_handle.hpp index bff96ce0ad..5bb18f6396 100644 --- a/cpp/include/kvikio/remote_handle.hpp +++ b/cpp/include/kvikio/remote_handle.hpp @@ -164,7 +164,7 @@ inline std::size_t callback_host_memory(char* data, ctx->overflow_error = true; return CURL_WRITEFUNC_ERROR; } - KVIKIO_NVTX_FUNC_RANGE("RemoteHandle - callback_host_memory()", nbytes); + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_host_memory()", nbytes); std::memcpy(ctx->buf + ctx->offset, data, nbytes); ctx->offset += nbytes; return nbytes; @@ -191,7 +191,7 @@ inline std::size_t callback_device_memory(char* data, ctx->overflow_error = true; return CURL_WRITEFUNC_ERROR; } - KVIKIO_NVTX_FUNC_RANGE("RemoteHandle - callback_device_memory()", nbytes); + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_device_memory()", nbytes); ctx->bounce_buffer->write(data, nbytes); ctx->offset += nbytes; @@ -515,7 +515,7 @@ class RemoteHandle { */ std::size_t read(void* buf, std::size_t size, std::size_t file_offset = 0) { - KVIKIO_NVTX_FUNC_RANGE("RemoteHandle::read()", size); + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::read()", size); if (file_offset + size > _nbytes) { std::stringstream ss; @@ -578,7 +578,7 @@ class RemoteHandle { std::size_t file_offset = 0, std::size_t task_size = defaults::task_size()) { - KVIKIO_NVTX_FUNC_RANGE("RemoteHandle::pread()", size); + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::pread()", size); auto task = [this](void* devPtr_base, std::size_t size, std::size_t file_offset, diff --git a/cpp/include/kvikio/utils.hpp b/cpp/include/kvikio/utils.hpp index 4c43326063..3cad457ffa 100644 --- a/cpp/include/kvikio/utils.hpp +++ b/cpp/include/kvikio/utils.hpp @@ -287,47 +287,104 @@ struct libkvikio_domain { static constexpr char const* name{"libkvikio"}; }; +// Macro to concatenate two tokens x and y. +#define KVIKIO_CONCAT_HELPER(x, y) x##y +#define KVIKIO_CONCAT(x, y) KVIKIO_CONCAT_HELPER(x, y) + +// Macro to create a static, registered string that will not have a name conflict with any +// registered string defined in the same scope. +#define KVIKIO_REGISTER_STRING(msg) \ + [](const char* a_msg) -> auto& { \ + static nvtx3::registered_string_in a_reg_str{a_msg}; \ + return a_reg_str; \ + }(msg) + // Macro overloads of KVIKIO_NVTX_FUNC_RANGE -#define KVIKIO_NVTX_FUNC_RANGE_1() NVTX3_FUNC_RANGE_IN(libkvikio_domain) -#define KVIKIO_NVTX_FUNC_RANGE_2(msg, val) \ - nvtx3::scoped_range_in _kvikio_nvtx_range \ - { \ - nvtx3::event_attributes \ - { \ - msg, nvtx3::payload { convert_to_64bit(val) } \ - } \ +#define KVIKIO_NVTX_FUNC_RANGE_IMPL() NVTX3_FUNC_RANGE_IN(libkvikio_domain) + +#define KVIKIO_NVTX_SCOPED_RANGE_IMPL(msg, val) \ + nvtx3::scoped_range_in KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \ + { \ + nvtx3::event_attributes \ + { \ + KVIKIO_REGISTER_STRING(msg), nvtx3::payload { convert_to_64bit(val) } \ + } \ } -#define GET_KVIKIO_NVTX_FUNC_RANGE_MACRO(_1, _2, NAME, ...) NAME + +#define KVIKIO_NVTX_MARKER_IMPL(msg, val) \ + nvtx3::mark_in( \ + nvtx3::event_attributes{KVIKIO_REGISTER_STRING(msg), nvtx3::payload{convert_to_64bit(val)}}) + #endif /** * @brief Convenience macro for generating an NVTX range in the `libkvikio` domain * from the lifetime of a function. * - * Takes two arguments (message, payload) or no arguments, in which case the name - * of the immediately enclosing function returned by `__func__` is used. + * Takes no argument. The name of the immediately enclosing function returned by `__func__` is used + * as the message. * * Example: * ``` - * void some_function1(){ - * KVIKIO_NVTX_FUNC_RANGE("my function", 42); - * ... - * } - * void some_function2(){ - * KVIKIO_NVTX_FUNC_RANGE(); // The name `some_function2` is used + * void some_function(){ + * KVIKIO_NVTX_FUNC_RANGE(); // The name `some_function` is used as the message * ... * } * ``` */ #ifdef KVIKIO_CUDA_FOUND -#define KVIKIO_NVTX_FUNC_RANGE(...) \ - GET_KVIKIO_NVTX_FUNC_RANGE_MACRO( \ - __VA_ARGS__, KVIKIO_NVTX_FUNC_RANGE_2, KVIKIO_NVTX_FUNC_RANGE_1) \ - (__VA_ARGS__) +#define KVIKIO_NVTX_FUNC_RANGE() KVIKIO_NVTX_FUNC_RANGE_IMPL() #else #define KVIKIO_NVTX_FUNC_RANGE(...) \ do { \ } while (0) #endif +/** + * @brief Convenience macro for generating an NVTX scoped range in the `libkvikio` domain to + * annotate a time duration. + * + * Takes two arguments (message, payload). + * + * Example: + * ``` + * void some_function(){ + * KVIKIO_NVTX_SCOPED_RANGE("my function", 42); + * ... + * } + * ``` + */ +#ifdef KVIKIO_CUDA_FOUND +#define KVIKIO_NVTX_SCOPED_RANGE(msg, val) KVIKIO_NVTX_SCOPED_RANGE_IMPL(msg, val) +#else +#define KVIKIO_NVTX_SCOPED_RANGE(msg, val) \ + do { \ + } while (0) +#endif + +/** + * @brief Convenience macro for generating an NVTX marker in the `libkvikio` domain to annotate a + * certain time point. + * + * Takes two arguments (message, payload). Use this macro to annotate asynchronous I/O operations, + * where the payload refers to the I/O size. + * + * Example: + * ``` + * std::future some_function(){ + * size_t io_size{2077}; + * KVIKIO_NVTX_MARKER("I/O operation", io_size); + * perform_async_io_operation(io_size); + * ... + * } + * ``` + */ +#ifdef KVIKIO_CUDA_FOUND +#define KVIKIO_NVTX_MARKER(message, payload) KVIKIO_NVTX_MARKER_IMPL(message, payload) +#else +#define KVIKIO_NVTX_MARKER(message, payload) \ + do { \ + } while (0) +#endif + } // namespace kvikio