diff --git a/samples/pc_sampling/client.cpp b/samples/pc_sampling/client.cpp index 604a3040..745583de 100644 --- a/samples/pc_sampling/client.cpp +++ b/samples/pc_sampling/client.cpp @@ -69,6 +69,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* /*tool_data*/) { client_fini_func = fini_func; + // Initialize necessary data structures + pcs::init(); + client::pcs::find_all_gpu_agents_supporting_pc_sampling(); if(client::pcs::gpu_agents.empty()) @@ -86,6 +89,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* /*tool_data*/) ROCPROFILER_CHECK(rocprofiler_create_context(&client_ctx)); + auto* buff_ids_vec = pcs::get_pc_sampling_buffer_ids(); + for(auto& gpu_agent : pcs::gpu_agents) { // creating a buffer that will hold pc sampling information @@ -108,7 +113,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* /*tool_data*/) ROCPROFILER_CHECK(rocprofiler_assign_callback_thread(buffer_id, client_agent_thread)); - client::pcs::buffer_ids.emplace_back(buffer_id); + buff_ids_vec->emplace_back(buffer_id); } int valid_ctx = 0; @@ -138,12 +143,12 @@ tool_fini(void* /*tool_data*/) assert(state == 0); // No need to stop the context, since it has been stopped implicitly by the rocprofiler-SDK. - for(size_t i = 0; i < client::pcs::buffer_ids.size(); i++) + for(auto buff_id : *pcs::get_pc_sampling_buffer_ids()) { // Flush the buffer explicitly - ROCPROFILER_CHECK(rocprofiler_flush_buffer(client::pcs::buffer_ids.at(i))); + ROCPROFILER_CHECK(rocprofiler_flush_buffer(buff_id)); // Destroying the buffer - rocprofiler_status_t status = rocprofiler_destroy_buffer(client::pcs::buffer_ids.at(i)); + rocprofiler_status_t status = rocprofiler_destroy_buffer(buff_id); if(status == ROCPROFILER_STATUS_ERROR_BUFFER_BUSY) { *utils::get_output_stream() @@ -155,6 +160,9 @@ tool_fini(void* /*tool_data*/) } } } + + // deallocation + pcs::fini(); } } // namespace diff --git a/samples/pc_sampling/pcs.cpp b/samples/pc_sampling/pcs.cpp index 6ec63695..307f292d 100644 --- a/samples/pc_sampling/pcs.cpp +++ b/samples/pc_sampling/pcs.cpp @@ -40,8 +40,40 @@ namespace client { namespace pcs { -tool_agent_info_vec_t gpu_agents; -std::vector buffer_ids; +// TODO: Since this is used only within the `tool_init`, +// we are safe using static constructor. +// It would be nice to make this consistent with the `buffer_ids`. +tool_agent_info_vec_t gpu_agents; +// The reason for using raw pointers is the following. +// Sometimes, statically created objects of the client::pcs +// namespace might be freed prior to the `tool_fini`, +// meaning `buffer_ids` become unusable inside `tool_fini`. +// Instead, use raw pointers to control objects deallocation time. +// TODO: The approach with exporting raw pointers outside of the +// `pcs` namespace is a temporary solution. +// Instead, it would be better to encapsulate `buffer_ids` inside the +// `pcs` namespace and export functions for registering/flushing/destroying buffers. +pc_sampling_buffer_id_vec_t* buffer_ids = nullptr; + +void +init() +{ + buffer_ids = new pc_sampling_buffer_id_vec_t(); +} + +void +fini() +{ + // Clear the data + buffer_ids->clear(); + delete buffer_ids; +} + +pc_sampling_buffer_id_vec_t* +get_pc_sampling_buffer_ids() +{ + return buffer_ids; +} rocprofiler_status_t find_all_gpu_agents_supporting_pc_sampling_impl(rocprofiler_agent_version_t version, diff --git a/samples/pc_sampling/pcs.hpp b/samples/pc_sampling/pcs.hpp index 6ddb8851..a938c6a3 100644 --- a/samples/pc_sampling/pcs.hpp +++ b/samples/pc_sampling/pcs.hpp @@ -35,8 +35,9 @@ constexpr size_t BUFFER_SIZE_BYTES = 8192; constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 4); struct tool_agent_info; -using avail_configs_vec_t = std::vector; -using tool_agent_info_vec_t = std::vector>; +using avail_configs_vec_t = std::vector; +using tool_agent_info_vec_t = std::vector>; +using pc_sampling_buffer_id_vec_t = std::vector; struct tool_agent_info { @@ -50,8 +51,18 @@ struct tool_agent_info // meaning we were not able to enable PC sampling service. // Check the `tool_init` for more information. extern tool_agent_info_vec_t gpu_agents; + +// Must be called first (prior to any other function from this namespace) +void +init(); + +// Must be called at the end of the `tool_fini` +void +fini(); + // Ids of the buffers used as containers for PC sampling records -extern std::vector buffer_ids; +pc_sampling_buffer_id_vec_t* +get_pc_sampling_buffer_ids(); void find_all_gpu_agents_supporting_pc_sampling();