Skip to content

Commit

Permalink
shim: only open the cufile driver when the streaming API isn't available
Browse files Browse the repository at this point in the history
  • Loading branch information
madsbk committed Oct 25, 2024
1 parent 498c1f9 commit 521c80f
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 23 deletions.
6 changes: 3 additions & 3 deletions cpp/include/kvikio/cufile/driver.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -45,7 +45,7 @@ inline void set_driver_flag(unsigned int& prop, unsigned int flag, bool val) noe
class DriverInitializer {
// Optional, if not used cuFiles opens the driver automatically
public:
DriverInitializer() { CUFILE_TRY(cuFileAPI::instance().DriverOpen()); }
DriverInitializer() { cuFileAPI::instance().driver_open(); }

DriverInitializer(DriverInitializer const&) = delete;
DriverInitializer& operator=(DriverInitializer const&) = delete;
Expand All @@ -55,7 +55,7 @@ class DriverInitializer {
~DriverInitializer()
{
try {
CUFILE_TRY(cuFileAPI::instance().DriverClose());
cuFileAPI::instance().driver_close();
} catch (const CUfileException& e) {
std::cerr << "Unable to close GDS file driver: ";
std::cerr << e.what();
Expand Down
62 changes: 43 additions & 19 deletions cpp/include/kvikio/shim/cufile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#pragma once

#include <stdexcept>
#include <string>

#include <iostream>
#include <kvikio/shim/cufile_h_wrapper.hpp>
#include <kvikio/shim/utils.hpp>

Expand All @@ -38,8 +38,6 @@ class cuFileAPI {
decltype(cuFileWrite)* Write{nullptr};
decltype(cuFileBufRegister)* BufRegister{nullptr};
decltype(cuFileBufDeregister)* BufDeregister{nullptr};
decltype(cuFileDriverOpen)* DriverOpen{nullptr};
decltype(cuFileDriverClose)* DriverClose{nullptr};
decltype(cuFileDriverGetProperties)* DriverGetProperties{nullptr};
decltype(cuFileDriverSetPollMode)* DriverSetPollMode{nullptr};
decltype(cuFileDriverSetMaxCacheSize)* DriverSetMaxCacheSize{nullptr};
Expand All @@ -54,6 +52,12 @@ class cuFileAPI {
decltype(cuFileStreamRegister)* StreamRegister{nullptr};
decltype(cuFileStreamDeregister)* StreamDeregister{nullptr};

private:
// Don't call driver open and close directly, use `.driver_open()` and `driver_close()`.
decltype(cuFileDriverOpen)* _DriverOpen{nullptr};
decltype(cuFileDriverClose)* _DriverClose{nullptr};

public:
bool stream_available = false;

private:
Expand All @@ -77,8 +81,8 @@ class cuFileAPI {
get_symbol(Write, lib, KVIKIO_STRINGIFY(cuFileWrite));
get_symbol(BufRegister, lib, KVIKIO_STRINGIFY(cuFileBufRegister));
get_symbol(BufDeregister, lib, KVIKIO_STRINGIFY(cuFileBufDeregister));
get_symbol(DriverOpen, lib, KVIKIO_STRINGIFY(cuFileDriverOpen));
get_symbol(DriverClose, lib, KVIKIO_STRINGIFY(cuFileDriverClose));
get_symbol(_DriverOpen, lib, KVIKIO_STRINGIFY(cuFileDriverOpen));
get_symbol(_DriverClose, lib, KVIKIO_STRINGIFY(cuFileDriverClose));
get_symbol(DriverGetProperties, lib, KVIKIO_STRINGIFY(cuFileDriverGetProperties));
get_symbol(DriverSetPollMode, lib, KVIKIO_STRINGIFY(cuFileDriverSetPollMode));
get_symbol(DriverSetMaxCacheSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxCacheSize));
Expand Down Expand Up @@ -107,23 +111,16 @@ class cuFileAPI {

// cuFile is supposed to open and close the driver automatically but because of a bug in
// CUDA 11.8, it sometimes segfault. See <https://github.com/rapidsai/kvikio/issues/159>.
CUfileError_t const error = DriverOpen();
if (error.err != CU_FILE_SUCCESS) {
throw std::runtime_error(std::string{"cuFile error at: "} + __FILE__ + ":" +
KVIKIO_STRINGIFY(__LINE__) + ": " +
cufileop_status_error(error.err));
}
}
~cuFileAPI()
{
CUfileError_t const error = DriverClose();
if (error.err != CU_FILE_SUCCESS) {
std::cerr << "Unable to close GDS file driver: " << cufileop_status_error(error.err)
<< std::endl;
if (!stream_available) { // The stream API was introduced in CUDA 12.2.
driver_open();
}
}
// Notice, we don't close the driver at program exit since we are not allowed to call
// cuFile after main:
// <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#initialization>
~cuFileAPI() = default;
#else
cuFileAPI() { throw std::runtime_error(CUFILE_ERRSTR(0)); }
cuFileAPI() { throw std::runtime_error("KvikIO not compiled with cuFile.h"); }
#endif

public:
Expand All @@ -137,6 +134,33 @@ class cuFileAPI {
static cuFileAPI _instance;
return _instance;
}

/**
* @brief Open the cuFile driver
*
* cuFile accept multiple calls to `cufileDriverOpen()`, only the first call opens
* the driver, but every call should have a matching call to `cufileDriverClose()`.
*/
void driver_open()
{
CUfileError_t const error = _DriverOpen();
if (error.err != CU_FILE_SUCCESS) {
throw std::runtime_error(std::string{"Unable to open GDS file driver: "} +
cufileop_status_error(error.err));
}
}

/**
* @brief Close the cuFile driver
*/
void driver_close()
{
CUfileError_t const error = _DriverClose();
if (error.err != CU_FILE_SUCCESS) {
throw std::runtime_error(std::string{"Unable to close GDS file driver: "} +
cufileop_status_error(error.err));
}
}
};

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/kvikio/stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#pragma once

#include <sys/types.h>
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <kvikio/error.hpp>
#include <kvikio/shim/cuda.hpp>
#include <kvikio/shim/cufile.hpp>
Expand Down

0 comments on commit 521c80f

Please sign in to comment.