Skip to content

Commit

Permalink
Regards #681: WIP: Added a fatbin builder based on NVIDIA's libnvfatb…
Browse files Browse the repository at this point in the history
…in; but...

* not tested it yet
* need to add the nvFatbin documentation example program, rewritten to use the APIs
  • Loading branch information
eyalroz committed Sep 22, 2024
1 parent 21d1e69 commit 2b3650c
Show file tree
Hide file tree
Showing 8 changed files with 475 additions and 4 deletions.
1 change: 1 addition & 0 deletions .github/action-scripts/install-cuda-ubuntu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ CUDA_PACKAGES_IN=(
"cudart-dev"
"nvcc"
"profiler-api"
"libnvfatbin"
)

## -------------------
Expand Down
1 change: 1 addition & 0 deletions .github/action-scripts/install-cuda-windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ $CUDA_PACKAGES_IN = @(
"nvrtc_dev";
"nsight_nvtx";
"nvtx";
"nvfatbin";
"cudart";
"visual_studio_integration";
"cuda_profiler_api";
Expand Down
27 changes: 23 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ if(libm_exists)
set(c_math_lib m)
endif()

if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.1)
foreach(tgt in nvptxcompiler nvptxcompiler_static)
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4)
foreach(tgt in nvfatbin nvfatbin_static)
if (NOT TARGET ${tgt})
_CUDAToolkit_find_and_add_import_lib(${tgt})
endif()
Expand Down Expand Up @@ -79,11 +79,30 @@ add_library("${caw_namespace}::driver-and-runtime" ALIAS caw_runtime-and-driver)
target_link_libraries(caw_rtc INTERFACE cuda-api-wrappers::runtime-and-driver CUDA::nvrtc)
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.1)
if (TARGET CUDA::nvptxcompiler)
target_link_libraries(caw_rtc INTERFACE CUDA::nvptxcompiler)
set(ptx_compiler_target nvptxcompiler)
elseif (TARGET CUDA::nvptxcompiler)
target_link_libraries(caw_rtc INTERFACE CUDA::nvptxcompiler_static)
elseif(EXISTS "${CUDA_nvptxcompiler_LIBRARY}")
target_link_libraries(caw_rtc INTERFACE "${CUDA_nvptxcompiler_LIBRARY}")
elseif(EXISTS "${CUDA_nvptxcompiler_static_LIBRARY}")
target_link_libraries(caw_rtc INTERFACE "${CUDA_nvptxcompiler_static_LIBRARY}" pthread)
else()
message(WARNING "Could not locate a valid NVIDIA PTX Compiler target or library file")
endif()
endif()
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4)
if (TARGET CUDA::nvfatbin)
target_link_libraries(caw_runtime-and-driver INTERFACE CUDA::nvfatbin)
elseif (TARGET CUDA::nvfatbin)
target_link_libraries(caw_runtime-and-driver INTERFACE CUDA::nvfatbin_static)
elseif(EXISTS "${CUDA_nvfatbin_LIBRARY}")
target_link_libraries(caw_runtime-and-driver INTERFACE "${CUDA_nvfatbin_LIBRARY}")
elseif(EXISTS "${CUDA_nvfatbin_static_LIBRARY}")
target_link_libraries(caw_runtime-and-driver INTERFACE "${CUDA_nvfatbin_static_LIBRARY}")
else()
set(ptx_compiler_target nvptxcompiler_static)
message(WARNING "Could not locate a valid NVIDIA fatbin creator target or library file")
endif()
target_link_libraries(caw_rtc INTERFACE CUDA::${ptx_compiler_target})
endif()

target_link_libraries(caw_nvtx INTERFACE cuda-api-wrappers::runtime-and-driver)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ This is a header-only library of integrated wrappers around the core parts of NV
* The slightly higher-level CUDA [Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
* NVIDIA's dynamic CUDA code compilation library, [NVRTC](http://docs.nvidia.com/cuda/nvrtc/index.html)
* NVIDIA's out-of-driver, full-featured [PTX compiler library](https://docs.nvidia.com/cuda/ptx-compiler-api/index.html) (available since CUDA 11.1)
* NVIDIA's fat binary creation library, [nvFatbin](https://docs.nvidia.com/cuda/nvfatbin/index.html) (available since CUDA 12.4)
* The NVIDIA profiler in-program API, also known as [NVTX](https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx) (the NVIDIA Toolkit Extensions library).

It is intended for those who would otherwise use these APIs directly, to make working with them be more intuitive and consistent, making use of modern C++ language capabilities, programming idioms and best practices. In a nutshell - making CUDA API work more fun :-)
Expand Down
3 changes: 3 additions & 0 deletions src/cuda/api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
#include "api/kernels/in_library.hpp"
#endif
#include "api/link.hpp"
#if CUDA_VERSION >= 12040
#include "api/fatbin_builder.hpp"
#endif

#include "api/current_device.hpp"

Expand Down
260 changes: 260 additions & 0 deletions src/cuda/api/fatbin_builder.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
/**
* @file
*
* @brief Contains the @ref fatbin_builder_t class and related code.
*/
#pragma once
#ifndef CUDA_API_WRAPPERS_FATBIN_HPP_
#define CUDA_API_WRAPPERS_FATBIN_HPP_

#include "detail/region.hpp"
#include "fatbin_options.hpp"
#include "types.hpp"

#include <nvFatbin.h>

namespace cuda {

///@cond
class fatbin_builder_t;
///@endcond

namespace fatbin_builder {

using handle_t = nvFatbinHandle;

inline fatbin_builder_t wrap(handle_t handle, bool take_ownership = false) noexcept;

inline fatbin_builder_t create(const options_t & options);

namespace detail_ {

inline ::std::string identify(handle_t handle)
{
return "Fatbin builder with handle " + cuda::detail_::ptr_as_hex(handle);
}

inline ::std::string identify(const fatbin_builder_t&);

} // namespace detail_

} // namespace fatbin_builder


class fatbin_builder_t {
public: // type definitions
using size_type = ::size_t;

struct deleter_type {
void operator()(void * data) { operator delete(data); }
};

public: // getters

fatbin_builder::handle_t handle() const
{ return handle_; }

/// True if this wrapper is responsible for telling CUDA to destroy
/// the fatbin handle upon the wrapper's own destruction
bool is_owning() const noexcept
{ return owning; }

protected: // unsafe actions

void build_without_size_check_in(memory::region_t target_region) const
{
auto status = nvFatbinGet(handle_, target_region.data());
throw_if_error_lazy(status, "Failed completing the generation of a fatbin at " +
cuda::detail_::ptr_as_hex(target_region.data()));
}

public:
size_type size() const
{
size_type result;
auto status = nvFatbinSize(handle_, &result);
throw_if_error_lazy(status, "Failed determining prospective fatbin size for " + fatbin_builder::detail_::identify(*this));
return result;
}

void build_in(memory::region_t target_region) const
{
auto required_size = size();
if (target_region.size() < required_size) {
throw ::std::invalid_argument("Provided region for fatbin creation is of size "
+ ::std::to_string(target_region.size()) + " bytes, while the fatbin requires " + ::std::to_string(required_size));
}
return build_without_size_check_in(target_region);
}

memory::unique_region<deleter_type> build() const
{
auto size_ = size();
auto ptr = operator new(size_);
memory::region_t target_region{ptr, size_};
build_in(target_region);
return memory::unique_region<deleter_type>(target_region);
}

void add_ptx_source(
const char* identifier,
span<char> nul_terminated_ptx_source,
device::compute_capability_t target_compute_capability) const // no support for options, for now
{
#ifndef NDEBUG
if (nul_terminated_ptx_source.empty()) {
throw ::std::invalid_argument("Empty PTX source code passed for addition into fatbin");
}
if (nul_terminated_ptx_source[nul_terminated_ptx_source.size() - 1] != '\0') {
throw ::std::invalid_argument("PTX source code passed for addition into fatbin was not nul-character-terminated");
}
#endif
auto compute_capability_str = ::std::to_string(target_compute_capability.as_combined_number());
auto empty_cmdline = "";
auto status = nvFatbinAddPTX(handle_,
nul_terminated_ptx_source.data(),
nul_terminated_ptx_source.size(),
compute_capability_str.c_str(),
identifier,
empty_cmdline);
throw_if_error_lazy(status, "Failed adding PTX source fragment "
+ ::std::string(identifier) + " at " + detail_::ptr_as_hex(nul_terminated_ptx_source.data())
+ " to a fat binary for target compute capability " + compute_capability_str);
}

void add_lto_ir(
const char* identifier,
memory::region_t lto_ir,
device::compute_capability_t target_compute_capability) const
{
auto compute_capability_str = ::std::to_string(target_compute_capability.as_combined_number());
auto empty_cmdline = "";
auto status = nvFatbinAddLTOIR(
handle_, lto_ir.data(), lto_ir.size(), compute_capability_str.c_str(), identifier, empty_cmdline);
throw_if_error_lazy(status, "Failed adding LTO IR fragment "
+ ::std::string(identifier) + " at " + detail_::ptr_as_hex(lto_ir.data())
+ " to a fat binary for target compute capability " + compute_capability_str);
}

void add_cubin(
const char* identifier,
memory::region_t cubin,
device::compute_capability_t target_compute_capability) const
{
auto compute_capability_str = ::std::to_string(target_compute_capability.as_combined_number());
auto status = nvFatbinAddCubin(
handle_, cubin.data(), cubin.size(), compute_capability_str.c_str(), identifier);
throw_if_error_lazy(status, "Failed adding cubin fragment "
+ ::std::string(identifier) + " at " + detail_::ptr_as_hex(cubin.data())
+ " to a fat binary for target compute capability " + compute_capability_str);
}

#if CUDA_VERSION >= 12050
/**
* Adds relocatable PTX entries from a host object to the fat binary being built
*
* @param ptx_code PTX "host object". TODO: Is this PTX code in text mode? Something else?
*
* @note The builder's options (specified on creation) are ignored for these operations.
*/
void add_relocatable_ptx(memory::region_t ptx_code) const
{
auto status = nvFatbinAddReloc(handle_, ptx_code.data(), ptx_code.size());
throw_if_error_lazy(status, "Failed adding relocatable PTX code at " + detail_::ptr_as_hex(ptx_code.data())
+ "to fatbin builder " + fatbin_builder::detail_::identify(*this) );
}

// TODO: WTF is an index?
void add_index(const char* identifier, memory::region_t index) const
{
auto status = nvFatbinAddIndex(handle_, index.data(), index.size(), identifier);
throw_if_error_lazy(status, "Failed adding index " + ::std::string(identifier) + " at "
+ detail_::ptr_as_hex(index.data()) + " to a fat binary");
}
#endif // CUDA_VERSION >= 12050

protected: // constructors

fatbin_builder_t(
fatbin_builder::handle_t handle,
// no support for options, for now
bool take_ownership) noexcept
: handle_(handle), owning(take_ownership)
{}

public: // friendship

friend fatbin_builder_t fatbin_builder::wrap(fatbin_builder::handle_t, bool) noexcept;

public: // constructors and destructor

fatbin_builder_t(const fatbin_builder_t &) = delete;

fatbin_builder_t(fatbin_builder_t &&other) noexcept:
fatbin_builder_t(other.handle_, other.owning)
{
other.owning = false;
};

~fatbin_builder_t() noexcept(false)
{
if (owning) {
auto status = nvFatbinDestroy(&handle_); // this nullifies the handle :-O
throw_if_error_lazy(status,
::std::string("Failed destroying fatbin builder ") + detail_::ptr_as_hex(handle_) +
" in " + fatbin_builder::detail_::identify(handle_));
}
}

public: // operators

fatbin_builder_t &operator=(const fatbin_builder_t &) = delete;

fatbin_builder_t &operator=(fatbin_builder_t &&other) noexcept
{
::std::swap(handle_, other.handle_);
::std::swap(owning, owning);
return *this;
}

protected: // data members
fatbin_builder::handle_t handle_;
bool owning;
// this field is mutable only for enabling move construction; other
// than in that case it must not be altered
};

namespace fatbin_builder {

/// Create a new link-process (before adding any compiled images or or image-files)
inline fatbin_builder_t create(const options_t & options)
{
handle_t new_handle;
auto marshalled_options = marshalling::marshal(options);
auto option_ptrs = marshalled_options.option_ptrs();
auto status = nvFatbinCreate(&new_handle, option_ptrs.data(), option_ptrs.size());
throw_if_error_lazy(status, "Failed creating a new fatbin builder");
auto do_take_ownership = true;
return wrap(new_handle, do_take_ownership);
}

inline fatbin_builder_t wrap(handle_t handle, bool take_ownership) noexcept
{
return fatbin_builder_t{handle, take_ownership};
}

namespace detail_ {

inline ::std::string identify(const fatbin_builder_t& builder)
{
return identify(builder.handle());
}

} // namespace detail_

} // namespace fatbin_builder


} // namespace cuda

#endif // CUDA_API_WRAPPERS_FATBIN_HPP_
Loading

0 comments on commit 2b3650c

Please sign in to comment.