diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..656fae0 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,64 @@ +{ + "files.associations": { + "__config": "cpp", + "cassert": "cpp", + "chrono": "cpp", + "array": "cpp", + "atomic": "cpp", + "bit": "cpp", + "*.tcc": "cpp", + "bitset": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "compare": "cpp", + "concepts": "cpp", + "condition_variable": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "vector": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "ratio": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "initializer_list": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "mutex": "cpp", + "new": "cpp", + "numbers": "cpp", + "ostream": "cpp", + "semaphore": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "stop_token": "cpp", + "streambuf": "cpp", + "thread": "cpp", + "typeinfo": "cpp", + "__nullptr": "cpp" + } +} \ No newline at end of file diff --git a/msm/pippenger.cuh b/msm/pippenger.cuh index 91d3bf3..af04526 100644 --- a/msm/pippenger.cuh +++ b/msm/pippenger.cuh @@ -363,6 +363,7 @@ class msm_t affine_h *d_points; scalar_t *d_scalars; vec2d_t d_hist; + bool owned; template using vec_t = slice_t; @@ -386,9 +387,9 @@ class msm_t } public: - msm_t(const affine_t points[], size_t np, + msm_t(const affine_t points[], size_t np, bool owned, size_t ffi_affine_sz = sizeof(affine_t), int device_id = -1) - : gpu(select_gpu(device_id)), d_points(nullptr), d_scalars(nullptr) + : owned(owned), gpu(select_gpu(device_id)), d_points(nullptr), d_scalars(nullptr) { npoints = (np + WARP_SZ - 1) & ((size_t)0 - WARP_SZ); @@ -408,20 +409,20 @@ public: uint32_t row_sz = 1U << (wbits - 1); size_t d_buckets_sz = (nwins * row_sz) + (gpu.sm_count() * BATCH_ADD_BLOCK_SIZE / WARP_SZ); - size_t d_blob_sz = (d_buckets_sz * sizeof(d_buckets[0])) + (nwins * row_sz * sizeof(uint32_t)) + (points ? npoints * sizeof(d_points[0]) : 0); + size_t d_blob_sz = (d_buckets_sz * sizeof(d_buckets[0])) + (nwins * row_sz * sizeof(uint32_t)); d_buckets = reinterpret_cast(gpu.Dmalloc(d_blob_sz)); d_hist = vec2d_t(&d_buckets[d_buckets_sz], row_sz); if (points) { - d_points = reinterpret_cast(d_hist[nwins]); + d_points = reinterpret_cast(gpu.Dmalloc(points ? npoints * sizeof(d_points[0]) : 0)); gpu.HtoD(d_points, points, np, ffi_affine_sz); - npoints = np; } - else - { + + if (owned) npoints = 0; - } + else + npoints = np; } inline msm_t(vec_t points, size_t ffi_affine_sz = sizeof(affine_t), int device_id = -1) @@ -433,6 +434,17 @@ public: gpu.sync(); if (d_buckets) gpu.Dfree(d_buckets); + if (d_points && owned) + gpu.Dfree(d_points); + } + affine_h *get_d_points() + { + return d_points; + } + void set_d_points(affine_h *d_points) + { + assert(!this->owned); + this->d_points = d_points; } private: @@ -768,6 +780,40 @@ private: } }; +template +struct msm_context_t +{ + T *d_points; +}; + +template +void drop_msm_context_t(msm_context_t &ref) +{ + CUDA_OK(cudaFree(ref.d_points)); +} + +template +static RustError mult_pippenger_init(const affine_t points[], size_t npoints, + msm_context_t *msm_context) +{ + try + { + msm_t msm{points, npoints, false}; + msm_context->d_points = msm.get_d_points(); + return RustError{cudaSuccess}; + } + catch (const cuda_error &e) + { +#ifdef TAKE_RESPONSIBILITY_FOR_ERROR_MESSAGE + return RustError{e.code(), e.what()}; +#else + return RustError{e.code()}; +#endif + } +} + template static RustError mult_pippenger(point_t *out, const affine_t points[], size_t npoints, const scalar_t scalars[], bool mont = true, @@ -775,7 +821,7 @@ static RustError mult_pippenger(point_t *out, const affine_t points[], size_t np { try { - msm_t msm{nullptr, npoints}; + msm_t msm{nullptr, npoints, true}; return msm.invoke(*out, slice_t{points, npoints}, scalars, mont, ffi_affine_sz); } @@ -789,4 +835,30 @@ static RustError mult_pippenger(point_t *out, const affine_t points[], size_t np #endif } } + +template +static RustError mult_pippenger_with(point_t *out, msm_context_t *msm_context, size_t npoints, + const scalar_t scalars[], bool mont = true, + size_t ffi_affine_sz = sizeof(affine_t)) +{ + try + { + msm_t msm{nullptr, npoints, false}; + msm.set_d_points(msm_context->d_points); + return msm.invoke(*out, nullptr, npoints, + scalars, mont, ffi_affine_sz); + } + catch (const cuda_error &e) + { + out->inf(); +#ifdef TAKE_RESPONSIBILITY_FOR_ERROR_MESSAGE + return RustError{e.code(), e.what()}; +#else + return RustError{e.code()}; #endif + } +} + +#endif \ No newline at end of file