Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[kitrt] Fixes for numpy extension module #57

Merged
merged 2 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions kitsune/runtime/cuda/kitcuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@
#include <stdlib.h>
#include <assert.h>

#ifndef NPY_TARGET_VERSION
#include "kitrt.h"
#endif

#include <cuda.h>

Expand Down Expand Up @@ -294,9 +292,9 @@ extern void __kitcuda_memcpy_sym_to_device(void *host_sym, uint64_t dev_sym,
* @param threads_per_blk - threads per block (set to zero for auto determination).
*/
extern void* __kitcuda_launch_kernel(const void *fat_bin, const char *kern_name,
void **kern_args, uint64_t trip_count,
void **kern_args, uint64_t trip_count,
int threads_per_blk,
const KitRTInstMix *inst_mix,
const KitRTInstMix *inst_mix,
void *opaque_stream);

/**
Expand All @@ -311,12 +309,12 @@ extern void* __kitcuda_launch_kernel(const void *fat_bin, const char *kern_name,
*/
extern void __kitcuda_use_occupancy_launch(bool enable);

/**
/**
* Enable/Disable the tuning of occupancy-based calculations for
* the determination of kernel launch parameters. If the `enable`
* parameter is set to `true` both occupancy-based launches and
* the refinement of the occupancy-driven results will be used.
* `enable == true` will enable occupancy_launches.
* parameter is set to `true` both occupancy-based launches and
* the refinement of the occupancy-driven results will be used.
* `enable == true` will enable occupancy_launches.
*
* @param enable - enable/disable tuned occupancy launches.
*/
Expand Down Expand Up @@ -368,7 +366,7 @@ extern void __kitcuda_set_custom_launch_params(unsigned blks_per_grid,
unsigned threads_per_blk);

/**
* Return a thread-aware stream.
* Return a thread-aware stream.
*/
extern void* __kitcuda_get_thread_stream();

Expand Down
2 changes: 1 addition & 1 deletion kitsune/runtime/cuda/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ __kitcuda_mem_calloc_managed(size_t count, size_t element_size) {
return (void *)memp;
}

__attribute__((malloc)) void *__kitcuda__mem_realloc_managed(void *ptr,
__attribute__((malloc)) void *__kitcuda_mem_realloc_managed(void *ptr,
size_t size) {
assert(size != 0 && "zero-valued size!");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
#define NPY_TARGET_VERSION NPY_1_22_API_VERSION
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "../kitcuda.h"
#include "../../memory_map.h"
#include <numpy/arrayobject.h>

typedef struct {
Expand Down Expand Up @@ -113,13 +114,6 @@ static void __kitrt_NumPyFree(void *ctx, void *ptr, size_t size) {
funcs->free(ptr);
}

static KitRTAllocatorFuncs __kitrt_sys_allocators_ctx = {
malloc,
calloc,
realloc,
free
};

static KitRTAllocatorFuncs __kitrt_cuda_allocators_ctx = {
__kitcuda_mem_alloc_managed,
__kitcuda_mem_calloc_managed,
Expand All @@ -139,25 +133,12 @@ static PyDataMem_Handler __kitrt_data_handler = {
}
};

static PyDataMem_Handler __sys_data_handler = {
"kit_rt_data_allocator",
1,
{
&__kitrt_sys_allocators_ctx,
__kitrt_NumPyMalloc,
__kitrt_NumPyCalloc,
__kitrt_NumPyRealloc,
__kitrt_NumPyFree
}
};

static PyObject *kitrt_InfoMethod() {
extern void __kitrt_print_memory_map();
static PyObject *kitrt_InfoMethod(PyObject*, PyObject*) {
__kitrt_print_memory_map();
Py_RETURN_NONE;
}

static PyObject *kitrt_EnableMemHandler() {
static PyObject *kitrt_EnableMemHandler(PyObject*, PyObject*) {
PyObject *kitrt_handler = PyCapsule_New(&__kitrt_data_handler, "mem_handler", NULL);
if (kitrt_handler != NULL) {
(void)PyDataMem_SetHandler(kitrt_handler);
Expand All @@ -166,7 +147,7 @@ static PyObject *kitrt_EnableMemHandler() {
return kitrt_handler;
}

static PyObject *kitrt_DisableMemHandler() {
static PyObject *kitrt_DisableMemHandler(PyObject*, PyObject*) {
(void)PyDataMem_SetHandler(NULL);
return NULL;
}
Expand All @@ -190,7 +171,7 @@ static PyModuleDef def = {
};


PyMODINIT_FUNC PyInit_kitrt(void) {
extern "C" PyMODINIT_FUNC PyInit_kitrt(void) {
import_array();

PyObject *kitrt_handler = PyCapsule_New(&__kitrt_data_handler, "mem_handler", NULL);
Expand Down
2 changes: 1 addition & 1 deletion kitsune/runtime/hip/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ __attribute__((malloc)) void *__kithip_mem_calloc_managed(size_t count,
return (void *)memp;
}

__attribute__((malloc)) void *__kithip__mem_realloc_managed(void *ptr,
__attribute__((malloc)) void *__kithip_mem_realloc_managed(void *ptr,
size_t size) {
assert(size != 0 && "zero-valued size!");
void *memptr = nullptr;
Expand Down