diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index cb2b3395..0ec06a9d 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -21,7 +21,7 @@ env: ROCM_PATH: "/opt/rocm" GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102" PATH: "/usr/bin:$PATH" - PC_SAMPLING_TESTS_REGEX: ".*pc_sampling.*" + PC_SAMPLING_TESTS_REGEX: ".*pc-sampling.*" jobs: core: diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 397e3a33..6bbec14e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -57,6 +57,7 @@ add_subdirectory(async-copy-tracing) add_subdirectory(scratch-memory-tracing) add_subdirectory(c-tool) add_subdirectory(page-migration) +add_subdirectory(pc_sampling) add_subdirectory(thread-trace) add_subdirectory(hip-graph-tracing) diff --git a/tests/pc_sampling/CMakeLists.txt b/tests/pc_sampling/CMakeLists.txt new file mode 100644 index 00000000..a9096b2c --- /dev/null +++ b/tests/pc_sampling/CMakeLists.txt @@ -0,0 +1,142 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +if(NOT CMAKE_HIP_COMPILER) + find_program( + amdclangpp_EXECUTABLE + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin NO_CACHE) + mark_as_advanced(amdclangpp_EXECUTABLE) + + if(amdclangpp_EXECUTABLE) + set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}") + endif() +endif() + +project(rocprofiler-sdk-samples-pc-sampling-integration-test LANGUAGES CXX HIP) + +foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) + if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "") + set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}") + endif() +endforeach() + +find_package(rocprofiler-sdk REQUIRED) + +find_package(PkgConfig) + +if(PkgConfig_FOUND) + set(ENV{PKG_CONFIG_SYSTEM_INCLUDE_PATH} "") + pkg_check_modules(DW libdw) + + if(DW_FOUND + AND DW_INCLUDE_DIRS + AND DW_LIBRARIES) + set(libdw_INCLUDE_DIR + "${DW_INCLUDE_DIRS}" + CACHE FILEPATH "libdw include directory") + set(libdw_LIBRARY + "${DW_LIBRARIES}" + CACHE FILEPATH "libdw libraries") + endif() +endif() + +if(NOT libdw_INCLUDE_DIR OR NOT libdw_LIBRARY) + find_path( + libdw_ROOT_DIR + NAMES include/elfutils/libdw.h + HINTS ${libdw_ROOT} + PATHS ${libdw_ROOT}) + + mark_as_advanced(libdw_ROOT_DIR) + + find_path( + libdw_INCLUDE_DIR + NAMES elfutils/libdw.h + HINTS ${libdw_ROOT} + PATHS ${libdw_ROOT} + PATH_SUFFIXES include) + + find_library( + libdw_LIBRARY + NAMES dw + HINTS ${libdw_ROOT} + PATHS ${libdw_ROOT} + PATH_SUFFIXES lib lib64) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(libdw DEFAULT_MSG libdw_LIBRARY libdw_INCLUDE_DIR) + +if(libdw_FOUND AND NOT TARGET libdw::libdw) + add_library(libdw::libdw INTERFACE IMPORTED) + if(TARGET PkgConfig::DW AND DW_FOUND) + target_link_libraries(libdw::libdw INTERFACE PkgConfig::DW) + else() + target_link_libraries(libdw::libdw INTERFACE ${libdw_LIBRARY}) + target_include_directories(libdw::libdw SYSTEM INTERFACE ${libdw_INCLUDE_DIR}) + endif() +endif() + +add_library(pc-sampling-integration-test-client SHARED) +target_sources( + pc-sampling-integration-test-client + PRIVATE address_translation.cpp + address_translation.hpp + client.cpp + client.hpp + cid_retirement.cpp + cid_retirement.hpp + codeobj.cpp + codeobj.hpp + external_cid.cpp + external_cid.hpp + kernel_tracing.cpp + kernel_tracing.hpp + pcs.hpp + pcs.cpp + utils.hpp + utils.cpp) +target_link_libraries( + pc-sampling-integration-test-client + PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler-sdk::tests-build-flags + rocprofiler-sdk::tests-common-library amd_comgr dw) + +set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) +find_package(Threads REQUIRED) + +add_executable(pc-sampling-integration-test) +target_sources(pc-sampling-integration-test PRIVATE main.cpp) +target_link_libraries( + pc-sampling-integration-test + PRIVATE pc-sampling-integration-test-client Threads::Threads + rocprofiler-sdk::tests-build-flags) + +# rocprofiler_pc-sampling-integration_get_preload_env(PRELOAD_ENV +# pc-sampling-integration-test-client) +# rocprofiler_pc-sampling-integration_get_ld_library_path_env(LIBRARY_PATH_ENV) + +# set(pc-sampling-integration-test-env ${PRELOAD_ENV} ${LIBRARY_PATH_ENV}) + +add_test(NAME pc-sampling-integration-test + COMMAND $) + +set_tests_properties( + pc-sampling-integration-test + PROPERTIES + TIMEOUT + 45 + LABELS + "integration-tests;pc-sampling" + # ENVIRONMENT + # "${ROCPROFILER_MEMCHECK_PRELOAD_ENV};HSA_TOOLS_LIB=$" + SKIP_REGULAR_EXPRESSION + "PC sampling unavailable" + ENVIRONMENT + "${pc-sampling-integration-test-env}" + FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") diff --git a/tests/pc_sampling/address_translation.cpp b/tests/pc_sampling/address_translation.cpp new file mode 100644 index 00000000..0632f7ac --- /dev/null +++ b/tests/pc_sampling/address_translation.cpp @@ -0,0 +1,197 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +#include "address_translation.hpp" +#include "pcs.hpp" +#include "utils.hpp" + +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace address_translation +{ +namespace +{ +struct FlatProfiler +{ +public: + FlatProfiler() = default; + ~FlatProfiler() = default; + + CodeobjAddressTranslate translator; + KernelObjectMap kernel_object_map; + FlatProfile flat_profile; + std::mutex global_mut; +}; +} // namespace + +// Raw pointer to prevent early destruction of static objects +FlatProfiler* flat_profiler = nullptr; + +void +init() +{ + flat_profiler = new FlatProfiler(); +} + +void +fini() +{ + delete flat_profiler; +} + +CodeobjAddressTranslate& +get_address_translator() +{ + return flat_profiler->translator; +} + +KernelObjectMap& +get_kernel_object_map() +{ + return flat_profiler->kernel_object_map; +} + +FlatProfile& +get_flat_profile() +{ + return flat_profiler->flat_profile; +} + +std::mutex& +get_global_mutex() +{ + return flat_profiler->global_mut; +} + +KernelObject::KernelObject(uint64_t code_object_id, + std::string kernel_name, + uint64_t begin_address, + uint64_t end_address) +: code_object_id_(code_object_id) +, kernel_name_(kernel_name) +, begin_address_(begin_address) +, end_address_(end_address) +{ + auto& translator = get_address_translator(); + uint64_t vaddr = begin_address; + while(vaddr < end_address) + { + auto inst = translator.get(vaddr); + vaddr += inst->size; + this->add_instruction(std::move(inst)); + } +} + +void +dump_flat_profile() +{ + // It seems that an instruction can be part of multiple + // instances of the same kernel loaded on two different devices. + // We need to prevent counting the same instruction multiple times. + std::unordered_set visited_instructions; + + const auto& kernel_object_map = get_kernel_object_map(); + const auto& flat_profile = get_flat_profile(); + + std::stringstream ss; + uint64_t samples_num = 0; + kernel_object_map.iterate_kernel_objects([&](const KernelObject* kernel_obj) { + ss << "\n===================================="; + ss << "The kernel: " << kernel_obj->kernel_name() + << " with the begin address: " << kernel_obj->begin_address() + << " from code object with id: " << kernel_obj->code_object_id() << std::endl; + kernel_obj->iterate_instrunctions([&](const Instruction& inst) { + ss << "\t"; + ss << inst.inst << "\t"; + ss << inst.comment << "\t"; + ss << "samples: "; + const auto* _sample_instruction = flat_profile.get_sample_instruction(inst); + if(_sample_instruction == nullptr) + ss << "0"; + else + { + _sample_instruction->process([&](const SampleInstruction& sample_instruction) { + ss << sample_instruction.sample_count(); + // Assure that each instruction is counted once. + if(visited_instructions.count(sample_instruction.inst()) == 0) + { + samples_num += sample_instruction.sample_count(); + visited_instructions.insert(sample_instruction.inst()); + } + + if(sample_instruction.exec_mask_counts().size() <= 1) + { + ss << ", exec_mask: " << std::hex; + ss << sample_instruction.exec_mask_counts().begin()->first; + ss << std::dec; + assert(sample_instruction.sample_count() == + sample_instruction.exec_mask_counts().begin()->second); + } + else + { + uint64_t num_samples_sum = 0; + // More than one exec_mask + for(auto& [exec_mask, samples_per_exec] : + sample_instruction.exec_mask_counts()) + { + ss << std::endl; + ss << "\t\t" + << "exec_mask: " << std::hex << exec_mask; + ss << "\t" + << "samples: " << std::dec << samples_per_exec; + num_samples_sum += samples_per_exec; + ss << std::endl; + } + assert(sample_instruction.sample_count() == num_samples_sum); + } + }); + } + ss << std::endl; + }); + ss << "====================================\n" << std::endl; + }); + + ss << "The total number of decoded samples: " << samples_num << std::endl; + ss << "The total number of collected samples: " << client::pcs::total_samples_num() + << std::endl; + + *utils::get_output_stream() << ss.str() << std::endl; + + assert(samples_num == client::pcs::total_samples_num()); + // We expect at least one PC sample to be decoded/delivered; + assert(samples_num > 0); +} + +} // namespace address_translation +} // namespace client diff --git a/tests/pc_sampling/address_translation.hpp b/tests/pc_sampling/address_translation.hpp new file mode 100644 index 00000000..1426fcfe --- /dev/null +++ b/tests/pc_sampling/address_translation.hpp @@ -0,0 +1,273 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace address_translation +{ +using Instruction = rocprofiler::codeobj::disassembly::Instruction; +using CodeobjAddressTranslate = rocprofiler::codeobj::disassembly::CodeobjAddressTranslate; + +class KernelObject +{ +private: + using process_inst_fn = std::function; + +public: + KernelObject() = default; + KernelObject(uint64_t code_object_id, + std::string kernel_name, + uint64_t begin_address, + uint64_t end_address); + + // write lock required + void add_instruction(std::unique_ptr instruction) + { + auto lock = std::unique_lock{mut}; + + instructions_.push_back(std::move(instruction)); + } + + // read lock required + void iterate_instrunctions(process_inst_fn fn) const + { + auto lock = std::shared_lock{mut}; + + for(const auto& inst : this->instructions_) + fn(*inst); + } + + uint64_t code_object_id() const { return code_object_id_; }; + std::string kernel_name() const { return kernel_name_; }; + uint64_t begin_address() const { return begin_address_; }; + uint64_t end_address() const { return end_address_; }; + +private: + mutable std::shared_mutex mut; + uint64_t code_object_id_; + std::string kernel_name_; + uint64_t begin_address_; + uint64_t end_address_; + std::vector> instructions_; +}; + +class KernelObjectMap +{ +private: + using process_kernel_fn = std::function; + +public: + KernelObjectMap() = default; + + // write lock required + void add_kernel(uint64_t code_object_id, + std::string name, + uint64_t begin_address, + uint64_t end_address) + { + auto lock = std::unique_lock{mut}; + + auto key = form_key(code_object_id, name, begin_address); + auto it = kernel_object_map.find(key); + assert(it == kernel_object_map.end()); + kernel_object_map.insert( + {key, + std::make_unique(code_object_id, name, begin_address, end_address)}); + } + +#if 0 + // read lock required + KernelObject* get_kernel(uint64_t code_object_id, std::string name) + { + auto lock = std::shared_lock{mut}; + + auto key = form_key(code_object_id, name); + auto it = kernel_object_map.find(key); + if(it == kernel_object_map.end()) + { + return nullptr; + } + + return it->second.get(); + } +#endif + + // read lock required + void iterate_kernel_objects(process_kernel_fn fn) const + { + auto lock = std::shared_lock{mut}; + + for(auto& [_, kernel_obj] : kernel_object_map) + fn(kernel_obj.get()); + } + +private: + std::unordered_map> kernel_object_map; + mutable std::shared_mutex mut; + + std::string form_key(uint64_t code_object_id, std::string kernel_name, uint64_t begin_address) + { + return std::to_string(code_object_id) + "_" + kernel_name + "_" + + std::to_string(begin_address); + } +}; + +class SampleInstruction +{ +private: + using proces_sample_inst_fn = std::function; + +public: + SampleInstruction() = default; + SampleInstruction(std::unique_ptr inst) + : inst_(std::move(inst)) + {} + + // write lock required + void add_sample(uint64_t exec_mask) + { + auto lock = std::unique_lock{mut}; + + if(exec_mask_counts_.find(exec_mask) == exec_mask_counts_.end()) + { + exec_mask_counts_[exec_mask] = 0; + } + exec_mask_counts_[exec_mask]++; + sample_count_++; + } + + // read lock required + void process(proces_sample_inst_fn fn) const + { + auto lock = std::shared_lock{mut}; + + fn(*this); + } + + Instruction* inst() const { return inst_.get(); }; + // In case an instruction is samples with different exec masks, + // keep track of how many time each exec_mask was observed. + const std::map& exec_mask_counts() const { return exec_mask_counts_; } + // How many time this instruction is samples + uint64_t sample_count() const { return sample_count_; }; + +private: + mutable std::shared_mutex mut; + + // FIXME: prevent direct access of the following fields. + // The following fields should be accessible only from within `process` function. + std::unique_ptr inst_; + // In case an instruction is samples with different exec masks, + // keep track of how many time each exec_mask was observed. + std::map exec_mask_counts_; + // How many time this instruction is samples + uint64_t sample_count_ = 0; +}; + +class FlatProfile +{ +public: + FlatProfile() = default; + + // write lock required + void add_sample(std::unique_ptr instruction, uint64_t exec_mask) + { + auto lock = std::unique_lock{mut}; + + auto inst_id = get_instruction_id(*instruction); + auto itr = samples.find(inst_id); + if(itr == samples.end()) + { + // Add new instruction + samples.insert({inst_id, std::make_unique(std::move(instruction))}); + itr = samples.find(inst_id); + } + + auto* sample_instruction = itr->second.get(); + sample_instruction->add_sample(exec_mask); + } + + // read lock required + const SampleInstruction* get_sample_instruction(const Instruction& inst) const + { + auto lock = std::shared_lock{mut}; + + auto inst_id = get_instruction_id(inst); + auto itr = samples.find(inst_id); + if(itr == samples.end()) return nullptr; + return itr->second.get(); + } + +private: + // For the sake of this test, we use `ld_addr` as the instruction identifier. + // TODO: To cover code object loading/unloading and relocations, + // use `(code_object_id + ld_addr)` as the unique identifier. + // This assumes the decoder chage to return code_object_id as part + // of the `LoadedCodeobjDecoder::get(uint64_t ld_addr)` method. + using instrution_id_t = uint64_t; + instrution_id_t get_instruction_id(const Instruction& instruction) const + { + // Ensure the decoder determined the `ld_addr`. + assert(instruction.ld_addr > 0); + return instruction.ld_addr; + } + + std::unordered_map> samples; + mutable std::shared_mutex mut; +}; + +std::mutex& +get_global_mutex(); + +CodeobjAddressTranslate& +get_address_translator(); + +KernelObjectMap& +get_kernel_object_map(); + +FlatProfile& +get_flat_profile(); + +void +dump_flat_profile(); + +void +init(); + +void +fini(); +} // namespace address_translation +} // namespace client diff --git a/tests/pc_sampling/cid_retirement.cpp b/tests/pc_sampling/cid_retirement.cpp new file mode 100644 index 00000000..fe2bb147 --- /dev/null +++ b/tests/pc_sampling/cid_retirement.cpp @@ -0,0 +1,129 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/pc_sampling_library/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "utils.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace client +{ +namespace cid_retirement +{ +constexpr size_t BUFFER_SIZE_BYTES = 8192; +constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 4); + +rocprofiler_buffer_id_t cid_retirement_buffer; + +void +cid_retirement_tracing_buffered(rocprofiler_context_id_t /*context*/, + rocprofiler_buffer_id_t /*buffer_id*/, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* /*user_data*/, + uint64_t /*drop_count*/) +{ + std::stringstream ss; + + for(size_t i = 0; i < num_headers; ++i) + { + auto* header = headers[i]; + + if(header == nullptr) + { + throw std::runtime_error{ + "rocprofiler provided a null pointer to header. this should never happen"}; + } + else if(header->hash != + rocprofiler_record_header_compute_hash(header->category, header->kind)) + { + throw std::runtime_error{"rocprofiler_record_header_t (category | kind) != hash"}; + } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING) + { + if(header->kind == ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT) + { + auto* cid_record = + static_cast( + header->payload); + ss << "... The retired internal correlation id is: " + << cid_record->internal_correlation_id; + ss << ", the timestamp is: " << cid_record->timestamp; + ss << std::endl; + // TODO: assert that the retiring timestamp is greater than + // the greatest timestamp of PC samples matching the retired CID. + } + } + } + + *utils::get_output_stream() << ss.str(); +} + +void +configure_cid_retirement_tracing(rocprofiler_context_id_t context) +{ + ROCPROFILER_CALL(rocprofiler_create_buffer(context, + BUFFER_SIZE_BYTES, + WATERMARK, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + cid_retirement_tracing_buffered, + nullptr, + &cid_retirement_buffer), + "buffer creation"); + + ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( + context, + ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT, + nullptr, + 0, + cid_retirement_buffer), + "buffer tracing service for cid retirement configure"); +} + +void +flush_retired_cids() +{ + ROCPROFILER_CALL(rocprofiler_flush_buffer(cid_retirement_buffer), + "Cannot flush retired CIDs buffer"); + *utils::get_output_stream() << "Retired CIDs flushed..." << std::endl; +} + +} // namespace cid_retirement +} // namespace client diff --git a/tests/pc_sampling/cid_retirement.hpp b/tests/pc_sampling/cid_retirement.hpp new file mode 100644 index 00000000..1585f7e3 --- /dev/null +++ b/tests/pc_sampling/cid_retirement.hpp @@ -0,0 +1,38 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +namespace client +{ +namespace cid_retirement +{ +void +configure_cid_retirement_tracing(rocprofiler_context_id_t context); + +void +flush_retired_cids(); +} // namespace cid_retirement +} // namespace client diff --git a/tests/pc_sampling/client.cpp b/tests/pc_sampling/client.cpp new file mode 100644 index 00000000..b18062e7 --- /dev/null +++ b/tests/pc_sampling/client.cpp @@ -0,0 +1,225 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/pc_sampling_library/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "client.hpp" + +#include "address_translation.hpp" +#include "cid_retirement.hpp" +#include "codeobj.hpp" +#include "external_cid.hpp" +#include "kernel_tracing.hpp" +#include "pcs.hpp" +#include "utils.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace +{ +rocprofiler_client_id_t* client_id = nullptr; +rocprofiler_client_finalize_t client_fini_func = nullptr; +rocprofiler_context_id_t client_ctx; + +int +tool_init(rocprofiler_client_finalize_t fini_func, void* /*tool_data*/) +{ + client_fini_func = fini_func; + + address_translation::init(); + external_cid::init(); + pcs::init(); + + ROCPROFILER_CALL(rocprofiler_create_context(&client_ctx), "Cannot create context\n"); + + pcs::configure_pc_sampling_on_all_agents(client_ctx); + + // Enable code object tracing service, to match PC samples to corresponding code object + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(client_ctx, + ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, + nullptr, + 0, + client::codeobj::codeobj_tracing_callback, + nullptr), + "code object tracing service configure"); + + cid_retirement::configure_cid_retirement_tracing(client_ctx); + // Kernel tracing service need for external correlation service. + kernel_tracing::configure_kernel_tracing_service(client_ctx); + external_cid::configure_external_correlation_service(client_ctx); + + int valid_ctx = 0; + ROCPROFILER_CALL(rocprofiler_context_is_valid(client_ctx, &valid_ctx), + "failure checking context validity"); + if(valid_ctx == 0) + { + // notify rocprofiler that initialization failed + // and all the contexts, buffers, etc. created + // should be ignored + return -1; + } + + ROCPROFILER_CALL(rocprofiler_start_context(client_ctx), "rocprofiler context start failed"); + + return 0; +} + +void +tool_fini(void* /*tool_data*/) +{ + // Drain all retired correlation IDs + client::sync(); + + if(client_id) + { + // Assert the context is inactive. + int state = -1; + ROCPROFILER_CALL(rocprofiler_context_is_active(client_ctx, &state), + "Cannot inspect the stat of the context.") + assert(state == 0); + + // No need to stop the context, since it has been stopped implicitly by the rocprofiler-SDK. + + // Flush remaining PC samples + pcs::flush_and_destroy_buffers(); + } + + address_translation::dump_flat_profile(); + // deallocation + address_translation::fini(); + external_cid::fini(); + pcs::fini(); +} + +} // namespace + +// forward declaration +void +setup(); + +void +setup() +{ + // Do not force configuration + if(int status = 0; + rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0) + { + *utils::get_output_stream() << "Client forces rocprofiler configuration.\n" << std::endl; + ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure), + "failed to force configuration"); + } +} + +void +shutdown() +{} + +void +sync() +{ + // Flush rocprofiler-SDK's buffers containing PC samples. + pcs::flush_buffers(); + + // Flush retired correlation IDs. + cid_retirement::flush_retired_cids(); +} + +} // namespace client + +extern "C" rocprofiler_tool_configure_result_t* +rocprofiler_configure(uint32_t version, + const char* runtime_version, + uint32_t priority, + rocprofiler_client_id_t* id) +{ + // only activate if main tool + if(priority > 0) return nullptr; + + // set the client name + id->name = "PCSamplingExampleTool"; + + // store client info + client::client_id = id; + + // compute major/minor/patch version info + uint32_t major = version / 10000; + uint32_t minor = (version % 10000) / 100; + uint32_t patch = version % 100; + + // generate info string + auto info = std::stringstream{}; + info << id->name << " is using rocprofiler v" << major << "." << minor << "." << patch << " (" + << runtime_version << ")"; + + std::clog << info.str() << std::endl; + + std::ostream* output_stream = nullptr; + std::string filename = "pc_sampling_integration_test.log"; + if(auto* outfile = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE"); outfile) filename = outfile; + if(filename == "stdout") + output_stream = &std::cout; + else if(filename == "stderr") + output_stream = &std::cerr; + else + output_stream = new std::ofstream{filename}; + + client::utils::get_output_stream() = output_stream; + + // create configure data + static auto cfg = + rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t), + &client::tool_init, + &client::tool_fini, + static_cast(output_stream)}; + + // return pointer to configure data + return &cfg; +} diff --git a/tests/pc_sampling/client.hpp b/tests/pc_sampling/client.hpp new file mode 100644 index 00000000..b82f27d7 --- /dev/null +++ b/tests/pc_sampling/client.hpp @@ -0,0 +1,44 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#ifdef pc_sampling_code_obj_tracing_client_EXPORTS +# define CLIENT_API __attribute__((visibility("default"))) +#else +# define CLIENT_API +#endif + +#define USE_CLIENT_SHUTDOWN_EXPLICITLY 1 + +namespace client +{ +void +setup() CLIENT_API; + +void +shutdown() CLIENT_API; + +void +sync() CLIENT_API; + +} // namespace client diff --git a/tests/pc_sampling/codeobj.cpp b/tests/pc_sampling/codeobj.cpp new file mode 100644 index 00000000..a9cd688e --- /dev/null +++ b/tests/pc_sampling/codeobj.cpp @@ -0,0 +1,261 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/pc_sampling_library/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "address_translation.hpp" +#include "client.hpp" +#include "pcs.hpp" +#include "utils.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace codeobj +{ +#define CODEOBJ_DEBUG 0 + +constexpr bool COPY_MEMORY_CODEOBJ = true; + +std::string +cxa_demangle(std::string_view _mangled_name, int* _status) +{ + constexpr size_t buffer_len = 4096; + // return the mangled since there is no buffer + if(_mangled_name.empty()) + { + *_status = -2; + return std::string{}; + } + + auto _demangled_name = std::string{_mangled_name}; + + // PARAMETERS to __cxa_demangle + // mangled_name: + // A NULL-terminated character string containing the name to be demangled. + // buffer: + // A region of memory, allocated with malloc, of *length bytes, into which the + // demangled name is stored. If output_buffer is not long enough, it is expanded + // using realloc. output_buffer may instead be NULL; in that case, the demangled + // name is placed in a region of memory allocated with malloc. + // _buflen: + // If length is non-NULL, the length of the buffer containing the demangled name + // is placed in *length. + // status: + // *status is set to one of the following values + size_t _demang_len = 0; + char* _demang = abi::__cxa_demangle(_demangled_name.c_str(), nullptr, &_demang_len, _status); + switch(*_status) + { + // 0 : The demangling operation succeeded. + // -1 : A memory allocation failure occurred. + // -2 : mangled_name is not a valid name under the C++ ABI mangling rules. + // -3 : One of the arguments is invalid. + case 0: + { + if(_demang) _demangled_name = std::string{_demang}; + break; + } + case -1: + { + char _msg[buffer_len]; + ::memset(_msg, '\0', buffer_len * sizeof(char)); + ::snprintf(_msg, + buffer_len, + "memory allocation failure occurred demangling %s", + _demangled_name.c_str()); + ::perror(_msg); + break; + } + case -2: break; + case -3: + { + char _msg[buffer_len]; + ::memset(_msg, '\0', buffer_len * sizeof(char)); + ::snprintf(_msg, + buffer_len, + "Invalid argument in: (\"%s\", nullptr, nullptr, %p)", + _demangled_name.c_str(), + (void*) _status); + ::perror(_msg); + break; + } + default: break; + }; + + // if it "demangled" but the length is zero, set the status to -2 + if(_demang_len == 0 && *_status == 0) *_status = -2; + + // free allocated buffer + ::free(_demang); + return _demangled_name; +} + +template +std::string +as_hex(Tp _v, size_t _width = 16) +{ + auto _ss = std::stringstream{}; + _ss.fill('0'); + _ss << "0x" << std::hex << std::setw(_width) << _v; + return _ss.str(); +} + +void +codeobj_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* /*user_data*/, + void* /*callback_data*/) +{ + std::stringstream info; + + info << "-----------------------------\n"; + if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && + record.operation == ROCPROFILER_CODE_OBJECT_LOAD) + { + auto* data = + static_cast(record.payload); + + if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) + { + auto& global_mut = address_translation::get_global_mutex(); + { + auto lock = std::unique_lock{global_mut}; + + auto& translator = client::address_translation::get_address_translator(); + // register code object inside the decoder + if(std::string_view(data->uri).find("file:///") == 0) + { + translator.addDecoder( + data->uri, data->code_object_id, data->load_delta, data->load_size); + } + else if(COPY_MEMORY_CODEOBJ) + { + translator.addDecoder(reinterpret_cast(data->memory_base), + data->memory_size, + data->code_object_id, + data->load_delta, + data->load_size); + } + else + { + return; + } + + // extract symbols from code object + auto& kernel_object_map = client::address_translation::get_kernel_object_map(); + auto symbolmap = translator.getSymbolMap(); + for(auto& [vaddr, symbol] : symbolmap) + { + kernel_object_map.add_kernel( + data->code_object_id, symbol.name, vaddr, vaddr + symbol.mem_size); + } + } + + info << "code object load :: "; + } + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD) + { + // Ensure all PC samples of the unloaded code object are decoded, + // prior to removing the decoder. + client::sync(); + auto& global_mut = address_translation::get_global_mutex(); + { + auto lock = std::unique_lock{global_mut}; + auto& translator = client::address_translation::get_address_translator(); + translator.removeDecoder(data->code_object_id, data->load_delta); + } + + info << "code object unload :: "; + } + + info << "code_object_id=" << data->code_object_id + << ", rocp_agent=" << data->rocp_agent.handle << ", uri=" << data->uri + << ", load_base=" << as_hex(data->load_base) << ", load_size=" << data->load_size + << ", load_delta=" << as_hex(data->load_delta); + if(data->storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE) + info << ", storage_file_descr=" << data->storage_file; + else if(data->storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY) + info << ", storage_memory_base=" << as_hex(data->memory_base) + << ", storage_memory_size=" << data->memory_size; + + info << std::endl; + } + if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && + record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER) + { + auto* data = + static_cast( + record.payload); + + if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) + { + info << "kernel symbol load :: "; + } + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD) + { + info << "kernel symbol unload :: "; + // client_kernels.erase(data->kernel_id); + } + + auto kernel_name = std::regex_replace(data->kernel_name, std::regex{"(\\.kd)$"}, ""); + int demangle_status = 0; + kernel_name = cxa_demangle(kernel_name, &demangle_status); + + info << "code_object_id=" << data->code_object_id << ", kernel_id=" << data->kernel_id + << ", kernel_object=" << as_hex(data->kernel_object) + << ", kernarg_segment_size=" << data->kernarg_segment_size + << ", kernarg_segment_alignment=" << data->kernarg_segment_alignment + << ", group_segment_size=" << data->group_segment_size + << ", private_segment_size=" << data->private_segment_size + << ", kernel_name=" << kernel_name; + + info << std::endl; + } + + *utils::get_output_stream() << info.str() << std::endl; +} + +} // namespace codeobj +} // namespace client diff --git a/tests/pc_sampling/codeobj.hpp b/tests/pc_sampling/codeobj.hpp new file mode 100644 index 00000000..4dc303e9 --- /dev/null +++ b/tests/pc_sampling/codeobj.hpp @@ -0,0 +1,38 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +namespace client +{ +namespace codeobj +{ +void +codeobj_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* callback_data); + +} // namespace codeobj +} // namespace client diff --git a/tests/pc_sampling/external_cid.cpp b/tests/pc_sampling/external_cid.cpp new file mode 100644 index 00000000..4592fa63 --- /dev/null +++ b/tests/pc_sampling/external_cid.cpp @@ -0,0 +1,110 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/pc_sampling_library/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "utils.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace client +{ +namespace external_cid +{ +namespace +{ +template +auto +make_array(Arg arg, Args&&... args) +{ + constexpr auto N = 1 + sizeof...(Args); + return std::array{std::forward(arg), std::forward(args)...}; +} +} // namespace + +/** + * @brief Must be called at the beginning of the `tool_ini`. + */ +void +init() +{} + +/** + * @brief Should be called at the of the `tool_fini` + */ +void +fini() +{} + +int +set_external_correlation_id(rocprofiler_thread_id_t /*thr_id*/, + rocprofiler_context_id_t /*ctx_id*/, + rocprofiler_external_correlation_id_request_kind_t /*kind*/, + rocprofiler_tracing_operation_t /*op*/, + uint64_t internal_corr_id, + rocprofiler_user_data_t* external_corr_id, + void* /*user_data*/) +{ + // In multi-queues (devices) scenario, incrementing external correlation IDs + // might not always match with incrementing internal correlation IDs. + // Thus, use the value of internal correlation ID and verify that both + // externall correlation IDs and internal correlation IDs are the same + // in delivered PC samples. + external_corr_id->value = internal_corr_id; + return 0; +} + +void +configure_external_correlation_service(rocprofiler_context_id_t context) +{ + auto external_corr_id_request_kinds = + make_array(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH); + + ROCPROFILER_CHECK(rocprofiler_configure_external_correlation_id_request_service( + context, + external_corr_id_request_kinds.data(), + external_corr_id_request_kinds.size(), + set_external_correlation_id, + nullptr)); +} + +} // namespace external_cid +} // namespace client diff --git a/tests/pc_sampling/external_cid.hpp b/tests/pc_sampling/external_cid.hpp new file mode 100644 index 00000000..7e2d6675 --- /dev/null +++ b/tests/pc_sampling/external_cid.hpp @@ -0,0 +1,42 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include + +namespace client +{ +namespace external_cid +{ +void +configure_external_correlation_service(rocprofiler_context_id_t context); + +void +init(); + +void +fini(); +} // namespace external_cid +} // namespace client diff --git a/tests/pc_sampling/kernel_tracing.cpp b/tests/pc_sampling/kernel_tracing.cpp new file mode 100644 index 00000000..986feb59 --- /dev/null +++ b/tests/pc_sampling/kernel_tracing.cpp @@ -0,0 +1,78 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/pc_sampling_library/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "utils.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace client +{ +namespace kernel_tracing +{ +constexpr size_t BUFFER_SIZE_BYTES = 8192; +constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 4); + +rocprofiler_buffer_id_t kernel_tracing_buffer; + +void +kernel_tracing_buffered(rocprofiler_context_id_t /*context*/, + rocprofiler_buffer_id_t /*buffer_id*/, + rocprofiler_record_header_t** /*headers*/, + size_t /*num_headers*/, + void* /*user_data*/, + uint64_t /*drop_count*/) +{} + +void +configure_kernel_tracing_service(rocprofiler_context_id_t context) +{ + ROCPROFILER_CHECK(rocprofiler_create_buffer(context, + BUFFER_SIZE_BYTES, + WATERMARK, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + kernel_tracing_buffered, + nullptr, + &kernel_tracing_buffer)); + + ROCPROFILER_CHECK(rocprofiler_configure_buffer_tracing_service( + context, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, kernel_tracing_buffer)); +} + +} // namespace kernel_tracing +} // namespace client diff --git a/tests/pc_sampling/kernel_tracing.hpp b/tests/pc_sampling/kernel_tracing.hpp new file mode 100644 index 00000000..226337d4 --- /dev/null +++ b/tests/pc_sampling/kernel_tracing.hpp @@ -0,0 +1,41 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +namespace client +{ +namespace kernel_tracing +{ +void +kernel_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* callback_data); + +void +configure_kernel_tracing_service(rocprofiler_context_id_t context); + +} // namespace kernel_tracing +} // namespace client diff --git a/tests/pc_sampling/main.cpp b/tests/pc_sampling/main.cpp new file mode 100644 index 00000000..bc730377 --- /dev/null +++ b/tests/pc_sampling/main.cpp @@ -0,0 +1,224 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include + +#include +#include +#include +#include + +namespace +{ +#define M 8192 +#define N 8192 +#define K 8192 +#define TileSize 16 +#define BLOCK_SIZE_X 16 +#define BLOCK_SIZE_Y 16 +#define GRID_SIZE_X (M + BLOCK_SIZE_X - 1) / BLOCK_SIZE_X +#define GRID_SIZE_Y (N + BLOCK_SIZE_Y - 1) / BLOCK_SIZE_Y +#define WAVES_PER_BLOCK_MI200_PLUS (BLOCK_SIZE_X * BLOCK_SIZE_Y) / 64 + +#define HIP_API_CALL(CALL) \ + { \ + hipError_t error_ = (CALL); \ + if(error_ != hipSuccess) \ + { \ + fprintf(stderr, \ + "%s:%d :: HIP error : %s\n", \ + __FILE__, \ + __LINE__, \ + hipGetErrorString(error_)); \ + throw std::runtime_error("hip_api_call"); \ + } \ + } +} // namespace + +namespace +{ +void +check_hip_error(void); +} // namespace + +__global__ void +matrix_multiply(float* A, float* B, float* Out, int /*m*/, int n, int k) +{ + int gid_x = blockDim.x * blockIdx.x + threadIdx.x; + int gid_y = blockDim.y * blockIdx.y + threadIdx.y; + + if(gid_x < N && gid_y < M) + { + float sum = 0; + for(int i = 0; i < k; ++i) + { + sum += A[gid_y * k + i] * B[i * n + gid_x]; + } + + Out[gid_y * n + gid_x] = sum; + } +} + +#if 1 +__global__ void +matrix_multiply_tile(float* A, float* B, float* Out, int m, int n, int k) +{ + __shared__ float subTileM[TileSize][TileSize]; + __shared__ float subTileN[TileSize][TileSize]; + + int bx = blockIdx.x; + int by = blockIdx.y; + int tx = threadIdx.x; + int ty = threadIdx.y; + + int row = by * TileSize + ty; + int col = bx * TileSize + tx; + + float sum = 0; + for(int i = 0; i < ((k - 1) / TileSize + 1); i++) + { + int curr_l = row * k + i * TileSize + tx; + int curr_r = (i * TileSize + ty) * n + col; + + if(i * TileSize + tx < k && row < m) + { + subTileM[ty][tx] = A[curr_l]; + } + else + { + subTileM[ty][tx] = 0.0; + } + + if(i * TileSize + ty < k && col < n) + { + subTileN[ty][tx] = B[curr_r]; + } + else + { + subTileN[ty][tx] = 0.0; + } + + __syncthreads(); + + for(int j = 0; j < TileSize; j++) + { + if(j + TileSize * i < k) + { + sum += subTileM[ty][j] * subTileN[j][tx]; + } + } + + __syncthreads(); + } + + if(row < m && col < n) + { + Out[row * n + col] = sum; + } +} +#endif + +void +run_hip_app() +{ + std::vector A(M * K); + std::vector B(K * N); + std::vector Out(M * N); + + // Randomly initialize the matrices + for(int i = 0; i < M * K; ++i) + { + A[i] = (float) rand() / (float) RAND_MAX; + } + + for(int i = 0; i < K * N; ++i) + { + B[i] = (float) rand() / (float) RAND_MAX; + } + + // Allocate GPU Memory + float *d_A, *d_B, *d_Out; + HIP_API_CALL(hipMalloc(&d_A, sizeof(float) * M * K)); + HIP_API_CALL(hipMalloc(&d_B, sizeof(float) * K * N)); + HIP_API_CALL(hipMalloc(&d_Out, sizeof(float) * M * N)); + + // Copy data to GPU + HIP_API_CALL(hipMemcpy(d_A, A.data(), sizeof(float) * M * K, hipMemcpyHostToDevice)); + HIP_API_CALL(hipMemcpy(d_B, B.data(), sizeof(float) * K * N, hipMemcpyHostToDevice)); + + // Run the kernel + dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y); + dim3 grid_size((M + block_size.x - 1) / block_size.x, (N + block_size.y - 1) / block_size.y); + matrix_multiply<<>>(d_A, d_B, d_Out, M, N, K); + check_hip_error(); + matrix_multiply_tile<<>>(d_A, d_B, d_Out, M, N, K); + check_hip_error(); + + // Copy data back to CPU + HIP_API_CALL(hipMemcpy(Out.data(), d_Out, sizeof(float) * M * N, hipMemcpyDeviceToHost)); + + // Free GPU Memory + HIP_API_CALL(hipFree(d_A)); + HIP_API_CALL(hipFree(d_B)); + HIP_API_CALL(hipFree(d_Out)); +} + +#define DEVICE_ID 0 + +int +main(int /*argc*/, char** /*argv*/) +{ + int deviceId = DEVICE_ID; + + auto status = hipSetDevice(deviceId); + assert(status == hipSuccess); + HIP_API_CALL(status); + + int currDeviceId = -1; + status = hipGetDevice(&currDeviceId); + HIP_API_CALL(status); + assert(status == hipSuccess); + assert(deviceId == currDeviceId); + + for(int i = 0; i < 1; i++) + { + std::cout << "<<< MatMul starts" << std::endl; + run_hip_app(); + std::cout << ">>> MatMul ends" << std::endl; + } + + return 0; +} + +namespace +{ +void +check_hip_error(void) +{ + hipError_t err = hipGetLastError(); + if(err != hipSuccess) + { + std::cerr << "Error: " << hipGetErrorString(err) << std::endl; + throw std::runtime_error("hip_api_call"); + } +} +} // namespace diff --git a/tests/pc_sampling/pcs.cpp b/tests/pc_sampling/pcs.cpp new file mode 100644 index 00000000..dbd6f0ba --- /dev/null +++ b/tests/pc_sampling/pcs.cpp @@ -0,0 +1,504 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +#include "pcs.hpp" +#include "address_translation.hpp" +#include "codeobj.hpp" +#include "external_cid.hpp" +#include "utils.hpp" + +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace pcs +{ +namespace +{ +constexpr int MAX_FAILURES = 10; +constexpr size_t BUFFER_SIZE_BYTES = 8192; +constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 4); + +struct tool_agent_info; +using avail_configs_vec_t = std::vector; +using tool_agent_info_vec_t = std::vector>; +using pc_sampling_buffer_id_vec_t = std::vector; + +struct tool_agent_info +{ + rocprofiler_agent_id_t agent_id; + std::unique_ptr avail_configs; + const rocprofiler_agent_t* agent; +}; + +struct PCSampler +{ +private: + using code_object_id_t = uint64_t; + using code_object_id_set_t = std::unordered_set; + +public: + PCSampler() = default; + + ~PCSampler() + { + // Assert that `active_code_objects` is empty. + // For more information, refer to the comments above. + assert(active_code_objects.empty()); + // Clear the data + buffer_ids.clear(); + } + + // GPU agents supporting PC sampling + tool_agent_info_vec_t gpu_agents; + // The total number of collected samples + std::atomic total_samples_num{0}; + // ROCProfiler-SDK PC sampling buffers + pc_sampling_buffer_id_vec_t buffer_ids; + // The set that keeps track of reported code object loading/unloading events. + // At the end of the test, the sets needs to be empty. + // Namely, each loading event will insert a code object id into the set, + // while each unloading event will delete a code ojbect id from the set. + code_object_id_set_t active_code_objects; +}; + +// The reason for using raw pointers is the following. +// Sometimes, statically created objects of the client::pcs +// namespace might be freed prior to the `tool_fini`, +// meaning objects of `pcs` namespace become unusable inside `tool_fini`. +// Instead, use raw pointers to control objects deallocation time. +PCSampler* pc_sampler = nullptr; + +// forward declaration +bool +query_avail_configs_for_agent(tool_agent_info* agent_info); + +rocprofiler_status_t +find_all_gpu_agents_supporting_pc_sampling_impl(rocprofiler_agent_version_t version, + const void** agents, + size_t num_agents, + void* user_data) +{ + assert(version == ROCPROFILER_AGENT_INFO_VERSION_0); + // user_data represent the pointer to the array where gpu_agent will be stored + if(!user_data) return ROCPROFILER_STATUS_ERROR; + + std::stringstream ss; + + auto* _out_agents = static_cast(user_data); + auto* _agents = reinterpret_cast(agents); + for(size_t i = 0; i < num_agents; i++) + { + if(_agents[i]->type == ROCPROFILER_AGENT_TYPE_GPU) + { + // Instantiate the tool_agent_info. + // Store pointer to the rocprofiler_agent_t and instatiate a vector of + // available configurations. + // Move the ownership to the _out_agents + auto tool_gpu_agent = std::make_unique(); + tool_gpu_agent->agent_id = _agents[i]->id; + tool_gpu_agent->avail_configs = std::make_unique(); + tool_gpu_agent->agent = _agents[i]; + // Check if the GPU agent supports PC sampling. If so, add it to the + // output list `_out_agents`. + if(query_avail_configs_for_agent(tool_gpu_agent.get())) + _out_agents->push_back(std::move(tool_gpu_agent)); + } + + ss << "[" << __FUNCTION__ << "] " << _agents[i]->name << " :: " + << "id=" << _agents[i]->id.handle << ", " + << "type=" << _agents[i]->type << "\n"; + } + + *utils::get_output_stream() << ss.str() << std::endl; + + return ROCPROFILER_STATUS_SUCCESS; +} + +void +find_all_gpu_agents_supporting_pc_sampling() +{ + // This function returns the all gpu agents supporting some kind of PC sampling + ROCPROFILER_CALL( + rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, + &find_all_gpu_agents_supporting_pc_sampling_impl, + sizeof(rocprofiler_agent_t), + static_cast(&pc_sampler->gpu_agents)), + "Failed to find GPU agents"); +} + +/** + * @brief The function queries available PC sampling configurations. + * If there is at least one available configuration, it returns true. + * Otherwise, this function returns false to indicate the agent does + * not support PC sampling. + */ +bool +query_avail_configs_for_agent(tool_agent_info* agent_info) +{ + // Clear the available configurations vector + agent_info->avail_configs->clear(); + + auto cb = [](const rocprofiler_pc_sampling_configuration_t* configs, + size_t num_config, + void* user_data) { + auto* avail_configs = static_cast(user_data); + for(size_t i = 0; i < num_config; i++) + { + avail_configs->emplace_back(configs[i]); + } + return ROCPROFILER_STATUS_SUCCESS; + }; + + auto status = rocprofiler_query_pc_sampling_agent_configurations( + agent_info->agent_id, cb, agent_info->avail_configs.get()); + + std::stringstream ss; + + if(status != ROCPROFILER_STATUS_SUCCESS) + { + // The query operation failed, so consider the PC sampling is unsupported at the agent. + // This can happen if the PC sampling service is invoked within the ROCgdb. + ss << "Querying PC sampling capabilities failed with status: " << status << std::endl; + *utils::get_output_stream() << ss.str() << std::endl; + return false; + } + else if(agent_info->avail_configs->size() == 0) + { + // No available configuration at the moment, so mark the PC sampling as unsupported. + return false; + } + + ss << "The agent with the id: " << agent_info->agent_id.handle << " supports the " + << agent_info->avail_configs->size() << " configurations: " << std::endl; + size_t ind = 0; + for(auto& cfg : *agent_info->avail_configs) + { + ss << "(" << ++ind << ".) " + << "method: " << cfg.method << ", " + << "unit: " << cfg.unit << ", " + << "min_interval: " << cfg.min_interval << ", " + << "max_interval: " << cfg.max_interval << ", " + << "flags: " << std::hex << cfg.flags << std::dec << std::endl; + } + + *utils::get_output_stream() << ss.str() << std::flush; + + return true; +} + +void +configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info, + rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id) +{ + int failures = MAX_FAILURES; + size_t interval = 0; + do + { + // Update the list of available configurations + auto success = query_avail_configs_for_agent(agent_info); + if(!success) + { + // An error occured while querying PC sampling capabilities, + // so avoid trying configuring PC sampling service. + // Instead return false to indicated a failure. + ROCPROFILER_CALL(ROCPROFILER_STATUS_ERROR, + "Could not configuring PC sampling service due to failure with query " + "capabilities."); + } + + const rocprofiler_pc_sampling_configuration_t* first_host_trap_config = nullptr; + const rocprofiler_pc_sampling_configuration_t* first_stochastic_config = nullptr; + // Search until encountering on the stochastic configuration, if any. + // Otherwise, use the host trap config + for(auto const& cfg : *agent_info->avail_configs) + { + if(cfg.method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC) + { + first_stochastic_config = &cfg; + break; + } + else if(!first_host_trap_config && + cfg.method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP) + { + first_host_trap_config = &cfg; + } + } + + // Check if the stochastic config is found. Use host trap config otherwise. + const rocprofiler_pc_sampling_configuration_t* picked_cfg = + (first_stochastic_config != nullptr) ? first_stochastic_config : first_host_trap_config; + + interval = picked_cfg->min_interval; + + auto status = rocprofiler_configure_pc_sampling_service(context_id, + agent_info->agent_id, + picked_cfg->method, + picked_cfg->unit, + interval, + buffer_id); + if(status == ROCPROFILER_STATUS_SUCCESS) + { + *utils::get_output_stream() + << ">>> We chose PC sampling interval: " << interval + << " on the agent: " << agent_info->agent->id.handle << std::endl; + return; + } + else if(status != ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE) + { + ROCPROFILER_CALL(status, "Failed to configure PC sampling"); + } + // status == ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE + // means another process P2 already configured PC sampling. + // Query available configurations again and receive the configurations picked by P2. + // However, if P2 destroys PC sampling service after query function finished, + // but before the `rocprofiler_configure_pc_sampling_service` is called, + // then the `rocprofiler_configure_pc_sampling_service` will fail again. + // The process P1 executing this loop can spin wait (starve) if it is unlucky enough + // to always be interuppted by some other process P2 that creates/destroys + // PC sampling service on the same device while P1 is executing the code + // after the `query_avail_configs_for_agent` and + // before the `rocprofiler_configure_pc_sampling_service`. + // This should happen very rarely, but just to be sure, we introduce a counter `failures` + // that will allow certain amount of failures to process P1. + } while(--failures); + + // The process failed too many times configuring PC sampling, + // report this to user; + ROCPROFILER_CALL(ROCPROFILER_STATUS_ERROR, + "Failed too many times configuring PC sampling service"); +} + +void +rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/, + rocprofiler_buffer_id_t /*buffer_id*/, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* /*data*/, + uint64_t drop_count) +{ + std::stringstream ss; + ss << "The number of delivered samples is: " << num_headers << ", " + << "while the number of dropped samples is: " << drop_count << std::endl; + + auto& flat_profile = client::address_translation::get_flat_profile(); + auto& translator = client::address_translation::get_address_translator(); + auto& global_mut = address_translation::get_global_mutex(); + + { + auto lock = std::unique_lock{global_mut}; + + for(size_t i = 0; i < num_headers; i++) + { + auto* cur_header = headers[i]; + + if(cur_header == nullptr) + { + throw std::runtime_error{ + "rocprofiler provided a null pointer to header. this should never happen"}; + } + else if(cur_header->hash != + rocprofiler_record_header_compute_hash(cur_header->category, cur_header->kind)) + { + throw std::runtime_error{"rocprofiler_record_header_t (category | kind) != hash"}; + } + else if(cur_header->category == ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING) + { + if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_SAMPLE) + { + auto* pc_sample = + static_cast(cur_header->payload); + + ss << "pc: " << std::hex << pc_sample->pc << ", " + << "timestamp: " << std::dec << pc_sample->timestamp << ", " + << "exec: " << std::hex << std::setw(16) << pc_sample->exec_mask << ", " + << "workgroup_id_(x=" << std::dec << std::setw(5) + << pc_sample->workgroup_id.x << ", " + << "y=" << std::setw(5) << pc_sample->workgroup_id.y << ", " + << "z=" << std::setw(5) << pc_sample->workgroup_id.z << "), " + << "wave_id: " << std::setw(2) + << static_cast(pc_sample->wave_id) << ", " + << "cu_id: " << pc_sample->hw_id << ", " + << "correlation: {internal=" << std::setw(7) + << pc_sample->correlation_id.internal << ", " + << "external=" << std::setw(5) << pc_sample->correlation_id.external.value + << "}" << std::endl; + + // Ignore samples from blit kernels. + if(pc_sample->correlation_id.internal == + ROCPROFILER_CORRELATION_ID_INTERNAL_NONE) + continue; + + total_samples_num() += 1; + + auto corr_id = pc_sample->correlation_id; + // Internal correlation IDs are generated by the ROCProfiler-SDK for + // kernel dispatches only. Similarly, the test tool generate external + // correlation IDs for the kernel dispatches only. + // Thus, we should expect them to be equal. + assert(corr_id.internal == corr_id.external.value); + assert(corr_id.external.value > 0); + + // Decoding the PC + auto inst = translator.get(pc_sample->pc); + flat_profile.add_sample(std::move(inst), pc_sample->exec_mask); + } + else if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_CODE_OBJECT_LOAD_MARKER) + { + auto* marker = static_cast( + cur_header->payload); + auto code_object_id = marker->code_object_id; + ss << "code object loading: " << code_object_id << std::endl; + // The code object load event can be reported once per code object id. + assert(pc_sampler->active_code_objects.count(code_object_id) == 0); + pc_sampler->active_code_objects.emplace(code_object_id); + } + else if(cur_header->kind == + ROCPROFILER_PC_SAMPLING_RECORD_CODE_OBJECT_UNLOAD_MARKER) + { + auto* marker = + static_cast( + cur_header->payload); + auto code_object_id = marker->code_object_id; + ss << "code object unloading: " << code_object_id << std::endl; + // The code object unload event can be reported once per code object id. + assert(pc_sampler->active_code_objects.count(code_object_id) == 1); + pc_sampler->active_code_objects.erase(code_object_id); + } + } + else + { + throw std::runtime_error{"unexpected rocprofiler_record_header_t category + kind"}; + } + } + + // TODO: do we need some sync here? + *utils::get_output_stream() << ss.str() << std::endl; + } +} +} // namespace + +void +init() +{ + pc_sampler = new PCSampler(); +} + +void +fini() +{ + delete pc_sampler; +} + +std::atomic& +total_samples_num() +{ + return pc_sampler->total_samples_num; +} + +void +configure_pc_sampling_on_all_agents(rocprofiler_context_id_t context) +{ + find_all_gpu_agents_supporting_pc_sampling(); + + if(pc_sampler->gpu_agents.empty()) + { + *utils::get_output_stream() << "No availabe gpu agents supporting PC sampling" << std::endl; + *utils::get_output_stream() << "PC sampling unavailable" << std::endl; + // Exit with no error if none of the GPUs support PC sampling. + exit(0); + } + + auto& buff_ids_vec = pc_sampler->buffer_ids; + + for(auto& gpu_agent : pc_sampler->gpu_agents) + { + // creating a buffer that will hold pc sampling information + rocprofiler_buffer_policy_t drop_buffer_action = ROCPROFILER_BUFFER_POLICY_LOSSLESS; + auto buffer_id = rocprofiler_buffer_id_t{}; + ROCPROFILER_CALL(rocprofiler_create_buffer(context, + client::pcs::BUFFER_SIZE_BYTES, + client::pcs::WATERMARK, + drop_buffer_action, + client::pcs::rocprofiler_pc_sampling_callback, + nullptr, + &buffer_id), + "Cannot create pc sampling buffer"); + + client::pcs::configure_pc_sampling_prefer_stochastic(gpu_agent.get(), context, buffer_id); + + // One helper thread per GPU agent's buffer. + auto client_agent_thread = rocprofiler_callback_thread_t{}; + ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_agent_thread), + "failure creating callback thread"); + + ROCPROFILER_CALL(rocprofiler_assign_callback_thread(buffer_id, client_agent_thread), + "failed to assign thread for buffer"); + + buff_ids_vec.emplace_back(buffer_id); + } +} + +void +flush_buffers() +{ + // Flush rocproifler-SDK's buffers containing PC samples. + for(const auto& buff_id : pc_sampler->buffer_ids) + { + // Flush the buffer explicitly + ROCPROFILER_CALL(rocprofiler_flush_buffer(buff_id), "Failure flushing buffer"); + } +} + +void +flush_and_destroy_buffers() +{ + for(const auto& buff_id : pc_sampler->buffer_ids) + { + // Flush the buffer explicitly + ROCPROFILER_CALL(rocprofiler_flush_buffer(buff_id), "Failure flushing buffer"); + // Destroying the buffer + rocprofiler_status_t status = rocprofiler_destroy_buffer(buff_id); + if(status == ROCPROFILER_STATUS_ERROR_BUFFER_BUSY) + { + *utils::get_output_stream() + << "The buffer is busy, so we cannot destroy it at the moment." << std::endl; + } + else + { + ROCPROFILER_CALL(status, "Cannot destroy buffer"); + } + } +} +} // namespace pcs +} // namespace client diff --git a/tests/pc_sampling/pcs.hpp b/tests/pc_sampling/pcs.hpp new file mode 100644 index 00000000..4b854613 --- /dev/null +++ b/tests/pc_sampling/pcs.hpp @@ -0,0 +1,55 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +#include +#include + +namespace client +{ +namespace pcs +{ +// Must be called first (prior to any other function from this namespace) +void +init(); + +// Must be called at the end of the `tool_fini` +void +fini(); + +std::atomic& +total_samples_num(); + +void +configure_pc_sampling_on_all_agents(rocprofiler_context_id_t context); + +void +flush_buffers(); + +void +flush_and_destroy_buffers(); +} // namespace pcs +} // namespace client diff --git a/tests/pc_sampling/utils.cpp b/tests/pc_sampling/utils.cpp new file mode 100644 index 00000000..4fed10bd --- /dev/null +++ b/tests/pc_sampling/utils.cpp @@ -0,0 +1,37 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "utils.hpp" + +namespace client +{ +namespace utils +{ +std::ostream*& +get_output_stream() +{ + // The output strea is initially unitialized + static std::ostream* _v = nullptr; + return _v; +} +} // namespace utils +} // namespace client diff --git a/tests/pc_sampling/utils.hpp b/tests/pc_sampling/utils.hpp new file mode 100644 index 00000000..e9275160 --- /dev/null +++ b/tests/pc_sampling/utils.hpp @@ -0,0 +1,65 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include + +#include +#include + +#define ROCPROFILER_VAR_NAME_COMBINE(X, Y) X##Y +#define ROCPROFILER_VARIABLE(X, Y) ROCPROFILER_VAR_NAME_COMBINE(X, Y) + +#define ROCPROFILER_CALL(result, msg) \ + { \ + rocprofiler_status_t CHECKSTATUS = result; \ + if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::cerr << #result << " failed with error code " << CHECKSTATUS << std::endl; \ + throw std::runtime_error(#result " failure"); \ + } \ + } + +#define ROCPROFILER_CHECK(result) \ + { \ + rocprofiler_status_t ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) = result; \ + if(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::string status_msg = \ + rocprofiler_get_status_string(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \ + std::stringstream errmsg{}; \ + errmsg << "[" << __FILE__ << ":" << __LINE__ << "] " << #result \ + << " failed with error code " << ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) \ + << " :: " << status_msg; \ + throw std::runtime_error(errmsg.str()); \ + } \ + } + +namespace client +{ +namespace utils +{ +std::ostream*& +get_output_stream(); +} +} // namespace client