Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable tracker in ProxyLib by default #761

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/proxy_lib.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:

- name: Run "ctest --output-on-failure" with proxy library
working-directory: ${{env.BUILD_DIR}}
run: LD_PRELOAD=./lib/libumf_proxy.so ctest --output-on-failure
run: LD_PRELOAD=./lib/libumf_proxy.so ctest --output-on-failure -E "proxy_lib_memoryPool"

- name: Run "./test/umf_test-memoryPool" with proxy library
working-directory: ${{env.BUILD_DIR}}
Expand Down
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,16 @@ if(WINDOWS)
)
endif()
endif()

# set UMF_PROXY_LIB_ENABLED
if(UMF_PROXY_LIB_BASED_ON_POOL STREQUAL SCALABLE)
if(UMF_LINK_HWLOC_STATICALLY)
message(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does proxy lib require HWLOC to be linked dynamically?

STATUS
"Disabling the proxy library, because HWLOC is set to link statically which is not supported"
)
elseif(UMF_DISABLE_HWLOC)
message(STATUS "Disabling the proxy library, because HWLOC is disabled")
elseif(UMF_PROXY_LIB_BASED_ON_POOL STREQUAL SCALABLE)
if(UMF_POOL_SCALABLE_ENABLED)
set(UMF_PROXY_LIB_ENABLED ON)
set(PROXY_LIB_USES_SCALABLE_POOL ON)
Expand Down
4 changes: 1 addition & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,6 @@ install(TARGETS umf EXPORT ${PROJECT_NAME}-targets)

add_subdirectory(pool)

if(UMF_PROXY_LIB_ENABLED
AND NOT UMF_LINK_HWLOC_STATICALLY
AND NOT UMF_DISABLE_HWLOC)
if(UMF_PROXY_LIB_ENABLED)
add_subdirectory(proxy_lib)
endif()
39 changes: 32 additions & 7 deletions src/provider/provider_tracking.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,14 @@ static umf_result_t umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker,
int ret = critnib_insert(hTracker->map, (uintptr_t)ptr, value, 0);

if (ret == 0) {
LOG_DEBUG("memory region is added, tracker=%p, ptr=%p, size=%zu",
(void *)hTracker, ptr, size);
LOG_DEBUG(
"memory region is added, tracker=%p, ptr=%p, pool=%p, size=%zu",
(void *)hTracker, ptr, (void *)pool, size);
return UMF_RESULT_SUCCESS;
}

LOG_ERR("failed to insert tracker value, ret=%d, ptr=%p, size=%zu", ret,
ptr, size);
LOG_ERR("failed to insert tracker value, ret=%d, ptr=%p, pool=%p, size=%zu",
ret, ptr, (void *)pool, size);

umf_ba_free(hTracker->tracker_allocator, value);

Expand Down Expand Up @@ -161,11 +162,35 @@ static umf_result_t trackingAlloc(void *hProvider, size_t size,
return ret;
}

umf_result_t ret2 = umfMemoryTrackerAdd(p->hTracker, p->pool, *ptr, size);
if (ret2 != UMF_RESULT_SUCCESS) {
LOG_ERR("failed to add allocated region to the tracker, ptr = %p, size "
// check if the allocation was already added to the tracker
// (in case of using ProxyLib)
tracker_value_t *value =
(tracker_value_t *)critnib_get(p->hTracker->map, *(uintptr_t *)ptr);
if (value) {
assert(value->pool != p->pool);

LOG_DEBUG("ptr already exists in the tracker (added by Proxy Lib) - "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please describe the case when memory already present in the tracker?

"updating value, ptr=%p, size=%zu, old pool: %p, new pool %p",
*ptr, size, (void *)value->pool, (void *)p->pool);

// the allocation was made by the ProxyLib so we only update the tracker
value->pool = p->pool;
Comment on lines +176 to +177
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we assume that ProxyLib will not look for this pointer any more? Is it based on the fact that ProxyLib does not call umfPoolByPtr() in its source code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Also the proxy lib would not free this allocation - this would be done by the user created Pool

int crit_ret = critnib_insert(p->hTracker->map, *(uintptr_t *)ptr,
value, 1 /* update */);

// this cannot fail since we know the element exists and there is
// nothing to allocate
assert(crit_ret == 0);
(void)crit_ret;
} else {
umf_result_t ret2 =
umfMemoryTrackerAdd(p->hTracker, p->pool, *ptr, size);
if (ret2 != UMF_RESULT_SUCCESS) {
LOG_ERR(
"failed to add allocated region to the tracker, ptr = %p, size "
"= %zu, ret = %d",
*ptr, size, ret2);
}
}

return ret;
Expand Down
7 changes: 2 additions & 5 deletions src/proxy_lib/proxy_lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,6 @@ void proxy_lib_create_common(void) {

} else if (utils_env_var_has_str("UMF_PROXY",
"page.disposition=shared-shm")) {
LOG_DEBUG("proxy_lib: using the MAP_SHARED visibility mode with the "
"named shared memory");
ldorau marked this conversation as resolved.
Show resolved Hide resolved
os_params.visibility = UMF_MEM_MAP_SHARED;

memset(shm_name, 0, NAME_MAX);
Expand All @@ -145,9 +143,8 @@ void proxy_lib_create_common(void) {
exit(-1);
}

umf_result =
umfPoolCreate(umfPoolManagerOps(), OS_memory_provider, NULL,
UMF_POOL_CREATE_FLAG_DISABLE_TRACKING, &Proxy_pool);
umf_result = umfPoolCreate(umfPoolManagerOps(), OS_memory_provider, NULL, 0,
&Proxy_pool);
if (umf_result != UMF_RESULT_SUCCESS) {
LOG_ERR("creating UMF pool manager failed");
exit(-1);
Expand Down
32 changes: 24 additions & 8 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,7 @@ add_umf_test(
LIBS ${UMF_UTILS_FOR_TEST})

# tests for the proxy library
if(UMF_PROXY_LIB_ENABLED
AND UMF_BUILD_SHARED_LIBRARY
AND NOT UMF_DISABLE_HWLOC
AND NOT UMF_LINK_HWLOC_STATICALLY)
if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY)
add_umf_test(
NAME proxy_lib_basic
SRCS ${BA_SOURCES_FOR_TEST} test_proxy_lib.cpp
Expand Down Expand Up @@ -382,13 +379,14 @@ function(add_umf_ipc_test)
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})

set_tests_properties(${TEST_NAME} PROPERTIES LABELS "umf")
set_tests_properties(${TEST_NAME} PROPERTIES TIMEOUT 60)
if(NOT UMF_TESTS_FAIL_ON_SKIP)
set_tests_properties(${TEST_NAME} PROPERTIES SKIP_RETURN_CODE 125)
endif()
endfunction()

if(LINUX)
if(NOT UMF_DISABLE_HWLOC)
if(NOT UMF_DISABLE_HWLOC AND UMF_POOL_SCALABLE_ENABLED)
build_umf_test(
NAME
ipc_os_prov_consumer
Expand All @@ -406,6 +404,18 @@ if(LINUX)
add_umf_ipc_test(TEST ipc_os_prov_anon_fd)
add_umf_ipc_test(TEST ipc_os_prov_shm)

if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY)
build_umf_test(
NAME
ipc_os_prov_proxy
SRCS
ipc_os_prov_proxy.c
common/ipc_common.c
LIBS
${UMF_UTILS_FOR_TEST})
add_umf_ipc_test(TEST ipc_os_prov_proxy)
endif()

build_umf_test(
NAME
ipc_devdax_prov_consumer
Expand Down Expand Up @@ -436,13 +446,17 @@ if(LINUX)
ipc_file_prov_producer.c
common/ipc_common.c
common/ipc_os_prov_common.c)
add_umf_ipc_test(TEST ipc_file_prov)
add_umf_ipc_test(TEST ipc_file_prov_fsdax)

# TODO - fix ipc_file_prov and ipc_file_prov_fsdax tests
# add_umf_ipc_test(TEST ipc_file_prov) add_umf_ipc_test(TEST
# ipc_file_prov_fsdax)
Comment on lines +450 to +452
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An issue should be added for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ldorau I will wait for your fix #800 to be merged and I will remove this

endif()

# TODO add IPC tests for CUDA

if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER)
if(UMF_BUILD_GPU_TESTS
AND UMF_BUILD_LEVEL_ZERO_PROVIDER
AND UMF_BUILD_LIBUMF_POOL_DISJOINT)
Comment on lines -445 to +459
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it belong to this PR? What does it have in common?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have to use Disjoint Pool in GPU tests

build_umf_test(
NAME
ipc_level_zero_prov_consumer
Expand All @@ -453,6 +467,7 @@ if(LINUX)
providers/level_zero_helpers.cpp
LIBS
ze_loader
disjoint_pool
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it belong to this PR? What does it have in common?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have to use Disjoint Pool in GPU tests

${UMF_UTILS_FOR_TEST})
build_umf_test(
NAME
Expand All @@ -464,6 +479,7 @@ if(LINUX)
providers/level_zero_helpers.cpp
LIBS
ze_loader
disjoint_pool
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it belong to this PR? What does it have in common?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have to use Disjoint Pool in GPU tests

${UMF_UTILS_FOR_TEST})
target_include_directories(umf_test-ipc_level_zero_prov_producer
PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS})
Expand Down
82 changes: 35 additions & 47 deletions test/common/ipc_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ int consumer_connect(int port) {
return ret;
}

int run_consumer(int port, umf_memory_provider_ops_t *provider_ops,
void *provider_params, memcopy_callback_t memcopy_callback,
void *memcopy_ctx) {
int run_consumer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params,
umf_memory_provider_ops_t *provider_ops, void *provider_params,
memcopy_callback_t memcopy_callback, void *memcopy_ctx) {
char consumer_message[MSG_SIZE];
int producer_socket = -1;
int ret = -1;
Expand All @@ -131,6 +131,9 @@ int run_consumer(int port, umf_memory_provider_ops_t *provider_ops,
return -1;
}

umf_memory_pool_handle_t pool;
umf_result = umfPoolCreate(pool_ops, provider, pool_params, 0, &pool);

producer_socket = consumer_connect(port);
if (producer_socket < 0) {
goto err_umfMemoryProviderDestroy;
Expand Down Expand Up @@ -183,7 +186,7 @@ int run_consumer(int port, umf_memory_provider_ops_t *provider_ops,
len);

void *SHM_ptr;
umf_result = umfMemoryProviderOpenIPCHandle(provider, IPC_handle, &SHM_ptr);
umf_result = umfOpenIPCHandle(pool, IPC_handle, &SHM_ptr);
if (umf_result == UMF_RESULT_ERROR_NOT_SUPPORTED) {
fprintf(stderr,
"[consumer] SKIP: opening the IPC handle is not supported\n");
Expand Down Expand Up @@ -240,8 +243,7 @@ int run_consumer(int port, umf_memory_provider_ops_t *provider_ops,

err_closeIPCHandle:
// we do not know the exact size of the remote shared memory
umf_result = umfMemoryProviderCloseIPCHandle(provider, SHM_ptr,
sizeof(unsigned long long));
umf_result = umfCloseIPCHandle(SHM_ptr);
if (umf_result != UMF_RESULT_SUCCESS) {
fprintf(stderr, "[consumer] ERROR: closing the IPC handle failed\n");
}
Expand All @@ -254,6 +256,7 @@ int run_consumer(int port, umf_memory_provider_ops_t *provider_ops,

err_umfMemoryProviderDestroy:
umfMemoryProviderDestroy(provider);
umfPoolDestroy(pool);

if (ret == 0) {
fprintf(stderr, "[consumer] Shutting down (status OK) ...\n");
Expand Down Expand Up @@ -303,9 +306,9 @@ int producer_connect(int port) {
return -1;
}

int run_producer(int port, umf_memory_provider_ops_t *provider_ops,
void *provider_params, memcopy_callback_t memcopy_callback,
void *memcopy_ctx) {
int run_producer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params,
umf_memory_provider_ops_t *provider_ops, void *provider_params,
memcopy_callback_t memcopy_callback, void *memcopy_ctx) {
int ret = -1;
umf_memory_provider_handle_t provider = NULL;
umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN;
Expand All @@ -321,6 +324,9 @@ int run_producer(int port, umf_memory_provider_ops_t *provider_ops,
return -1;
}

umf_memory_pool_handle_t pool;
umf_result = umfPoolCreate(pool_ops, provider, pool_params, 0, &pool);

size_t page_size;
umf_result = umfMemoryProviderGetMinPageSize(provider, NULL, &page_size);
if (umf_result != UMF_RESULT_SUCCESS) {
Expand All @@ -335,45 +341,36 @@ int run_producer(int port, umf_memory_provider_ops_t *provider_ops,
size_t ptr2_size = 2 * page_size;
size_t size_IPC_shared_memory = 3 * page_size;

umf_result = umfMemoryProviderAlloc(provider, ptr1_size, 0, &ptr1);
if (umf_result != UMF_RESULT_SUCCESS) {
ptr1 = umfPoolMalloc(pool, ptr1_size);
if (ptr1 == NULL) {
fprintf(stderr, "[producer] ERROR: allocating 1 page failed\n");
goto err_umfMemoryProviderDestroy;
}

umf_result = umfMemoryProviderAlloc(provider, ptr2_size, 0, &ptr2);
if (umf_result != UMF_RESULT_SUCCESS) {
ptr2 = umfPoolMalloc(pool, ptr2_size);
if (ptr2 == NULL) {
fprintf(stderr, "[producer] ERROR: allocating 2 pages failed\n");
goto err_free_ptr1;
}

umf_result = umfMemoryProviderAlloc(provider, size_IPC_shared_memory, 0,
&IPC_shared_memory);
if (umf_result != UMF_RESULT_SUCCESS) {
IPC_shared_memory = umfPoolMalloc(pool, size_IPC_shared_memory);
if (IPC_shared_memory == NULL) {
fprintf(stderr, "[producer] ERROR: allocating 3 pages failed\n");
goto err_free_ptr2;
}

// get size of the IPC handle
size_t IPC_handle_size;
umf_result = umfMemoryProviderGetIPCHandleSize(provider, &IPC_handle_size);
if (umf_result != UMF_RESULT_SUCCESS) {
fprintf(stderr,
"[producer] ERROR: getting size of the IPC handle failed\n");
goto err_free_IPC_shared_memory;
}
umf_ipc_handle_t IPC_handle = NULL;

// allocate data for IPC provider
void *IPC_handle = malloc(IPC_handle_size);
if (IPC_handle == NULL) {
fprintf(stderr,
"[producer] ERROR: allocating memory for IPC handle failed\n");
// get the IPC handle
umf_result =
umfGetIPCHandle(IPC_shared_memory, &IPC_handle, &IPC_handle_size);
if (umf_result != UMF_RESULT_SUCCESS) {
fprintf(stderr, "[producer] ERROR: getting the IPC handle failed\n");
goto err_free_IPC_shared_memory;
}

// zero the IPC handle and the shared memory
memset(IPC_handle, 0, IPC_handle_size);

// save a random number (&provider) in the shared memory
unsigned long long SHM_number_1 = (unsigned long long)&provider;
memcopy_callback(IPC_shared_memory, &SHM_number_1, sizeof(SHM_number_1),
Expand All @@ -382,16 +379,6 @@ int run_producer(int port, umf_memory_provider_ops_t *provider_ops,
fprintf(stderr, "[producer] My shared memory contains a number: %llu\n",
SHM_number_1);

// get the IPC handle from the OS memory provider
umf_result = umfMemoryProviderGetIPCHandle(
provider, IPC_shared_memory, size_IPC_shared_memory, IPC_handle);
if (umf_result != UMF_RESULT_SUCCESS) {
fprintf(stderr,
"[producer] ERROR: getting the IPC handle from the OS memory "
"provider failed\n");
goto err_free_IPC_handle;
}

fprintf(stderr, "[producer] Got the IPC handle\n");

producer_socket = producer_connect(port);
Expand Down Expand Up @@ -494,23 +481,24 @@ int run_producer(int port, umf_memory_provider_ops_t *provider_ops,
close(producer_socket);

err_PutIPCHandle:
umf_result = umfMemoryProviderPutIPCHandle(provider, IPC_handle);
umf_result = umfPutIPCHandle(IPC_handle);
if (umf_result != UMF_RESULT_SUCCESS) {
fprintf(stderr, "[producer] ERROR: putting the IPC handle failed\n");
}

fprintf(stderr, "[producer] Put the IPC handle\n");

err_free_IPC_handle:
free(IPC_handle);
err_free_IPC_shared_memory:
(void)umfMemoryProviderFree(provider, IPC_shared_memory,
size_IPC_shared_memory);
(void)umfFree(IPC_shared_memory);

err_free_ptr2:
(void)umfMemoryProviderFree(provider, ptr2, ptr2_size);
(void)umfFree(ptr2);

err_free_ptr1:
(void)umfMemoryProviderFree(provider, ptr1, ptr1_size);
(void)umfFree(ptr1);

err_umfMemoryProviderDestroy:
umfPoolDestroy(pool);
umfMemoryProviderDestroy(provider);

if (ret == 0) {
Expand Down
Loading
Loading