From bc67a96ed1ed18eee5b88679fd54c40fe3a73073 Mon Sep 17 00:00:00 2001 From: jsarrao <43554622+jsarrao@users.noreply.github.com> Date: Tue, 24 Sep 2024 10:57:15 -0700 Subject: [PATCH] [kitrt] Fixes for numpy extension module (#57) * [kitrt] Fixes for numpy extension module - Renamed kitrt.c to kitrt.cpp - Removed system allocators from extension module - Fixed typo in mem_realloc method name for both cuda and hip - Fixed signature for enable/disable mem handler * Unconditionally include kitrt.h. Fixed whitespace errors. --- kitsune/runtime/cuda/kitcuda.h | 16 +++++----- kitsune/runtime/cuda/memory.cpp | 2 +- .../runtime/cuda/numpy/{kitrt.c => kitrt.cpp} | 29 ++++--------------- kitsune/runtime/hip/memory.cpp | 2 +- 4 files changed, 14 insertions(+), 35 deletions(-) rename kitsune/runtime/cuda/numpy/{kitrt.c => kitrt.cpp} (92%) diff --git a/kitsune/runtime/cuda/kitcuda.h b/kitsune/runtime/cuda/kitcuda.h index 5c493f207ed9477..17ffaf170a5c45c 100644 --- a/kitsune/runtime/cuda/kitcuda.h +++ b/kitsune/runtime/cuda/kitcuda.h @@ -57,9 +57,7 @@ #include #include -#ifndef NPY_TARGET_VERSION #include "kitrt.h" -#endif #include @@ -294,9 +292,9 @@ extern void __kitcuda_memcpy_sym_to_device(void *host_sym, uint64_t dev_sym, * @param threads_per_blk - threads per block (set to zero for auto determination). */ extern void* __kitcuda_launch_kernel(const void *fat_bin, const char *kern_name, - void **kern_args, uint64_t trip_count, + void **kern_args, uint64_t trip_count, int threads_per_blk, - const KitRTInstMix *inst_mix, + const KitRTInstMix *inst_mix, void *opaque_stream); /** @@ -311,12 +309,12 @@ extern void* __kitcuda_launch_kernel(const void *fat_bin, const char *kern_name, */ extern void __kitcuda_use_occupancy_launch(bool enable); -/** +/** * Enable/Disable the tuning of occupancy-based calculations for * the determination of kernel launch parameters. If the `enable` - * parameter is set to `true` both occupancy-based launches and - * the refinement of the occupancy-driven results will be used. - * `enable == true` will enable occupancy_launches. + * parameter is set to `true` both occupancy-based launches and + * the refinement of the occupancy-driven results will be used. + * `enable == true` will enable occupancy_launches. * * @param enable - enable/disable tuned occupancy launches. */ @@ -368,7 +366,7 @@ extern void __kitcuda_set_custom_launch_params(unsigned blks_per_grid, unsigned threads_per_blk); /** - * Return a thread-aware stream. + * Return a thread-aware stream. */ extern void* __kitcuda_get_thread_stream(); diff --git a/kitsune/runtime/cuda/memory.cpp b/kitsune/runtime/cuda/memory.cpp index 94fcaad37bb1214..b3bcaba0ed663e5 100644 --- a/kitsune/runtime/cuda/memory.cpp +++ b/kitsune/runtime/cuda/memory.cpp @@ -124,7 +124,7 @@ __kitcuda_mem_calloc_managed(size_t count, size_t element_size) { return (void *)memp; } -__attribute__((malloc)) void *__kitcuda__mem_realloc_managed(void *ptr, +__attribute__((malloc)) void *__kitcuda_mem_realloc_managed(void *ptr, size_t size) { assert(size != 0 && "zero-valued size!"); diff --git a/kitsune/runtime/cuda/numpy/kitrt.c b/kitsune/runtime/cuda/numpy/kitrt.cpp similarity index 92% rename from kitsune/runtime/cuda/numpy/kitrt.c rename to kitsune/runtime/cuda/numpy/kitrt.cpp index afb572e0e160af4..037206d539bdec4 100644 --- a/kitsune/runtime/cuda/numpy/kitrt.c +++ b/kitsune/runtime/cuda/numpy/kitrt.cpp @@ -62,6 +62,7 @@ #define NPY_TARGET_VERSION NPY_1_22_API_VERSION #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include "../kitcuda.h" +#include "../../memory_map.h" #include typedef struct { @@ -113,13 +114,6 @@ static void __kitrt_NumPyFree(void *ctx, void *ptr, size_t size) { funcs->free(ptr); } -static KitRTAllocatorFuncs __kitrt_sys_allocators_ctx = { - malloc, - calloc, - realloc, - free -}; - static KitRTAllocatorFuncs __kitrt_cuda_allocators_ctx = { __kitcuda_mem_alloc_managed, __kitcuda_mem_calloc_managed, @@ -139,25 +133,12 @@ static PyDataMem_Handler __kitrt_data_handler = { } }; -static PyDataMem_Handler __sys_data_handler = { - "kit_rt_data_allocator", - 1, - { - &__kitrt_sys_allocators_ctx, - __kitrt_NumPyMalloc, - __kitrt_NumPyCalloc, - __kitrt_NumPyRealloc, - __kitrt_NumPyFree - } -}; - -static PyObject *kitrt_InfoMethod() { - extern void __kitrt_print_memory_map(); +static PyObject *kitrt_InfoMethod(PyObject*, PyObject*) { __kitrt_print_memory_map(); Py_RETURN_NONE; } -static PyObject *kitrt_EnableMemHandler() { +static PyObject *kitrt_EnableMemHandler(PyObject*, PyObject*) { PyObject *kitrt_handler = PyCapsule_New(&__kitrt_data_handler, "mem_handler", NULL); if (kitrt_handler != NULL) { (void)PyDataMem_SetHandler(kitrt_handler); @@ -166,7 +147,7 @@ static PyObject *kitrt_EnableMemHandler() { return kitrt_handler; } -static PyObject *kitrt_DisableMemHandler() { +static PyObject *kitrt_DisableMemHandler(PyObject*, PyObject*) { (void)PyDataMem_SetHandler(NULL); return NULL; } @@ -190,7 +171,7 @@ static PyModuleDef def = { }; -PyMODINIT_FUNC PyInit_kitrt(void) { +extern "C" PyMODINIT_FUNC PyInit_kitrt(void) { import_array(); PyObject *kitrt_handler = PyCapsule_New(&__kitrt_data_handler, "mem_handler", NULL); diff --git a/kitsune/runtime/hip/memory.cpp b/kitsune/runtime/hip/memory.cpp index 781996a1f7ac93e..be216c48fe86fe8 100644 --- a/kitsune/runtime/hip/memory.cpp +++ b/kitsune/runtime/hip/memory.cpp @@ -92,7 +92,7 @@ __attribute__((malloc)) void *__kithip_mem_calloc_managed(size_t count, return (void *)memp; } -__attribute__((malloc)) void *__kithip__mem_realloc_managed(void *ptr, +__attribute__((malloc)) void *__kithip_mem_realloc_managed(void *ptr, size_t size) { assert(size != 0 && "zero-valued size!"); void *memptr = nullptr;