diff --git a/cpp/include/kvikio/cufile_config.hpp b/cpp/include/kvikio/cufile_config.hpp index 85d6cf57ed..b1457880bb 100644 --- a/cpp/include/kvikio/cufile_config.hpp +++ b/cpp/include/kvikio/cufile_config.hpp @@ -19,6 +19,8 @@ #include #include +#include + namespace kvikio { namespace detail { @@ -39,7 +41,7 @@ namespace detail { * * @return The filepath to the cufile.json file or the empty string if it isn't found. */ -[[nodiscard]] inline const std::string& config_path() +[[nodiscard]] KVIKIO_EXPORT inline const std::string& config_path() { static const std::string ret = detail::lookup_config_path(); return ret; diff --git a/cpp/include/kvikio/defaults.hpp b/cpp/include/kvikio/defaults.hpp index 119197bfc4..c812c6e251 100644 --- a/cpp/include/kvikio/defaults.hpp +++ b/cpp/include/kvikio/defaults.hpp @@ -21,7 +21,6 @@ #include #include #include -#include #include diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index fbd510d86f..f84e792489 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -286,7 +286,7 @@ class FileHandle { * * @return The number of bytes */ - [[nodiscard]] inline std::size_t nbytes() const + [[nodiscard]] std::size_t nbytes() const { if (closed()) { return 0; } if (_nbytes == 0) { _nbytes = detail::get_file_size(_fd_direct_off); } diff --git a/cpp/include/kvikio/utils.hpp b/cpp/include/kvikio/utils.hpp index 918983a919..7a54b2793b 100644 --- a/cpp/include/kvikio/utils.hpp +++ b/cpp/include/kvikio/utils.hpp @@ -28,6 +28,20 @@ #include #include +// Macros used for defining symbol visibility, only GLIBC is supported. +// Since KvikIO is header-only, we rely on the linker to disambiguate inline functions +// that have (or return) static references. To do this, the relevant function must have +// `__attribute__((visibility("default")))`. If not, then if KvikIO is used in two +// different DSOs, the function will appear twice, and there will be two static objects. +// See . +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MINGW32__) && !defined(__MINGW64__) +#define KVIKIO_EXPORT __attribute__((visibility("default"))) +#define KVIKIO_HIDDEN __attribute__((visibility("hidden"))) +#else +#define KVIKIO_EXPORT +#define KVIKIO_HIDDEN +#endif + namespace kvikio { // cuFile defines a page size to 4 KiB @@ -101,33 +115,6 @@ constexpr bool is_host_memory(const void* ptr) { return true; } return ret; } -/** - * @brief RAII wrapper for a CUDA primary context - */ -class CudaPrimaryContext { - public: - CUdevice dev{}; - CUcontext ctx{}; - - CudaPrimaryContext(int device_ordinal) - { - CUDA_DRIVER_TRY(cudaAPI::instance().DeviceGet(&dev, device_ordinal)); - CUDA_DRIVER_TRY(cudaAPI::instance().DevicePrimaryCtxRetain(&ctx, dev)); - } - CudaPrimaryContext(const CudaPrimaryContext&) = delete; - CudaPrimaryContext& operator=(CudaPrimaryContext const&) = delete; - CudaPrimaryContext(CudaPrimaryContext&&) = delete; - CudaPrimaryContext&& operator=(CudaPrimaryContext&&) = delete; - ~CudaPrimaryContext() - { - try { - CUDA_DRIVER_TRY(cudaAPI::instance().DevicePrimaryCtxRelease(dev), CUfileException); - } catch (const CUfileException& e) { - std::cerr << e.what() << std::endl; - } - } -}; - /** * @brief Given a device ordinal, return the primary context of the device. * @@ -136,11 +123,24 @@ class CudaPrimaryContext { * @param ordinal Device ordinal - an integer between 0 and the number of CUDA devices * @return Primary CUDA context */ -[[nodiscard]] inline CUcontext get_primary_cuda_context(int ordinal) +[[nodiscard]] KVIKIO_EXPORT inline CUcontext get_primary_cuda_context(int ordinal) { - static std::map _primary_contexts; - _primary_contexts.try_emplace(ordinal, ordinal); - return _primary_contexts.at(ordinal).ctx; + static std::map _cache; + static std::mutex _mutex; + std::lock_guard const lock(_mutex); + + if (_cache.find(ordinal) == _cache.end()) { + CUdevice dev{}; + CUcontext ctx{}; + CUDA_DRIVER_TRY(cudaAPI::instance().DeviceGet(&dev, ordinal)); + + // Notice, we let the primary context leak at program exit. We do this because `_cache` + // is static and we are not allowed to call `cuDevicePrimaryCtxRelease()` after main: + // + CUDA_DRIVER_TRY(cudaAPI::instance().DevicePrimaryCtxRetain(&ctx, dev)); + _cache.emplace(ordinal, ctx); + } + return _cache.at(ordinal); } /**