Skip to content

Commit

Permalink
blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
Hanting Zhang committed Mar 15, 2024
1 parent 1d86d4a commit 1d0d021
Showing 1 changed file with 49 additions and 21 deletions.
70 changes: 49 additions & 21 deletions spmvm/spmvm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ struct spmvm_context_t
scalar_t *d_scalars;
// output scalars
scalar_t *d_out;

};

template <typename scalar_t>
Expand Down Expand Up @@ -88,6 +89,10 @@ struct spmvm_host_t
size_t num_rows;
size_t num_cols;
size_t nnz;

const size_t *blocks;
size_t num_blocks;
size_t block_size;
};

template <typename scalar_t>
Expand Down Expand Up @@ -115,21 +120,21 @@ public:
// scalar_t *d_out;

public:
spmvm_t(size_t num_rows, size_t num_cols, size_t nnz, int device_id = -1)
spmvm_t(spmvm_host_t<scalar_t> *csr, int device_id = -1)
: gpu(select_gpu(device_id))
{
this->context = reinterpret_cast<spmvm_context_t<scalar_t> *>(malloc(sizeof(spmvm_context_t<scalar_t>)));

context->d_data = reinterpret_cast<scalar_t *>(gpu.Dmalloc(nnz * sizeof(scalar_t)));
context->d_col_idx = reinterpret_cast<size_t *>(gpu.Dmalloc(nnz * sizeof(size_t)));
context->d_row_ptr = reinterpret_cast<size_t *>(gpu.Dmalloc((num_rows + 1) * sizeof(size_t)));
context->d_data = reinterpret_cast<scalar_t *>(gpu.Dmalloc(csr->block_size * sizeof(scalar_t)));
context->d_col_idx = reinterpret_cast<size_t *>(gpu.Dmalloc(csr->block_size * sizeof(size_t)));
context->d_row_ptr = reinterpret_cast<size_t *>(gpu.Dmalloc((csr->num_rows + 1) * sizeof(size_t)));

context->num_rows = num_rows;
context->num_cols = num_cols;
context->nnz = nnz;
context->num_rows = csr->num_rows;
context->num_cols = csr->num_cols;
context->nnz = csr->nnz;

context->d_scalars = reinterpret_cast<scalar_t *>(gpu.Dmalloc(num_cols * sizeof(scalar_t)));
context->d_out = reinterpret_cast<scalar_t *>(gpu.Dmalloc(num_rows * sizeof(scalar_t)));
context->d_scalars = reinterpret_cast<scalar_t *>(gpu.Dmalloc(csr->num_cols * sizeof(scalar_t)));
context->d_out = reinterpret_cast<scalar_t *>(gpu.Dmalloc(csr->num_rows * sizeof(scalar_t)));

this->owned = true;
}
Expand Down Expand Up @@ -202,21 +207,44 @@ public:

try
{
if (csr->data)
gpu[2].HtoD(&context->d_data[0], &csr->data[0], context->nnz);
if (csr->col_idx)
gpu[2].HtoD(&context->d_col_idx[0], &csr->col_idx[0], context->nnz);
if (csr->row_ptr)
gpu[2].HtoD(&context->d_row_ptr[0], &csr->row_ptr[0], context->num_rows + 1);

if (scalars)
gpu[2].HtoD(&context->d_scalars[0], &scalars[0], context->num_cols);

spmvm_context_t<scalar_t> *d_context = reinterpret_cast<spmvm_context_t<scalar_t> *>(gpu[2].Dmalloc(sizeof(spmvm_context_t<scalar_t>)));
gpu[2].HtoD(d_context, context, 1);
cudaMemsetAsync(&context->d_out[0], 0, context->num_rows * sizeof(scalar_t), gpu[2]);
csr_vector_mul<scalar_t><<<gpu.sm_count(), nthreads, 0, gpu[2]>>>(d_context);
CUDA_OK(cudaGetLastError());

size_t start_row = 0;
size_t end_row = 0;
size_t num_rows = 0;

size_t start_data = 0;
size_t end_data = 0;
size_t num_data = 0;

for (size_t i = 0; i < csr->num_blocks - 1; ++i) {
start_row = csr->blocks[i];
end_row = csr->blocks[i + 1];
num_rows = end_row - start_row;

start_data = csr->row_ptr[start_row];
end_data = csr->row_ptr[end_row];
num_data = end_data - start_data;

if (csr->data)
gpu[i&1].HtoD(&context->d_data[0], &csr->data[start_data], num_data);
if (csr->col_idx)
gpu[i&1].HtoD(&context->d_col_idx[0], &csr->col_idx[start_data], num_data);
if (csr->row_ptr) {
gpu[i&1].HtoD(&context->d_row_ptr[0], &csr->row_ptr[start_row], num_rows + 1);
csr->num_rows = num_rows;
}

gpu[i&1].sync();

spmvm_context_t<scalar_t> *d_context = reinterpret_cast<spmvm_context_t<scalar_t> *>(gpu[i&1].Dmalloc(sizeof(spmvm_context_t<scalar_t>)));
gpu[i&1].HtoD(d_context, context, 1);

csr_vector_mul<scalar_t><<<gpu.sm_count(), nthreads, 0, gpu[i&1]>>>(d_context);
CUDA_OK(cudaGetLastError());
}

gpu[2].DtoH(&out[0], &context->d_out[0], context->num_rows);
gpu.sync();
Expand Down

0 comments on commit 1d0d021

Please sign in to comment.