Skip to content

Commit

Permalink
added tests
Browse files Browse the repository at this point in the history
  • Loading branch information
eliazonta committed Nov 24, 2023
1 parent 6551d1f commit 863abe9
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/main.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctime>

#include "../include/parser.h"
#include "../include/sequential.h"
Expand Down
149 changes: 149 additions & 0 deletions test/test_cuda.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "../include/parser.h"
#include "../include/parallel.cuh"
#include "../include/utils.cuh"

#define NUM_THREADS 1024
#define NUM_REPEAT 100
#define PRINT_MODE 2

int main(int argc, const char * argv[]) {
fprintf(stdout, "============================\n");
fprintf(stdout, "*** Parallel SpMV (CUDA) ***\n");
fprintf(stdout, "----------------------------\n");
fprintf(stdout, "*********** TEST ***********\n");
fprintf(stdout, "----------------------------\n");
fprintf(stdout, "============================\n");

int *row_ptr, *col_ind, num_rows, num_cols, num_vals, numSMs;
float *values;

int num_thread = NUM_THREADS;
int num_repeat = NUM_REPEAT;
int print_mode = PRINT_MODE;
const char *filename = 'data/arcs130.mtx';

read_matrix(&row_ptr, &col_ind, &values, filename, &num_rows, &num_cols, &num_vals);

// float *x = (float *) malloc(num_rows * sizeof(float));
float *x = malloc_host<float>(num_rows);
// float *y = (float *) malloc(num_rows * sizeof(float));
float *y = malloc_host<float>(num_rows);
for (int i = 0; i < num_rows; ++i) {
x[i] = 1.0;
y[i] = 0.0;
}

if (print_mode == 2) {
// Values Array
fprintf(stdout, "Values Array:\n");
for (int i = 0; i < num_vals; i++) {
fprintf(stdout, "%.6f ", values[i]);
}

// Column Indices Array
fprintf(stdout, "\n\nColumn Indices Array:\n");
for (int i = 0; i < num_vals; i++) {
fprintf(stdout, "%d ", col_ind[i]);
}

// Row Pointer Array
fprintf(stdout, "\n\nRow Pointer Array:\n");
for (int i = 0; i < (num_rows + 1); i++) {
fprintf(stdout, "%d ", row_ptr[i]);
}

fprintf(stdout, "\n\nInitial Vector:\n");
for (int i = 0; i < num_rows; i++) {
fprintf(stdout, "%.1f ", x[i]);
}

fprintf(stdout, "\n\nResulting Vector:\n");
}

// Allocate on device
int *d_row_ptr = malloc_device<int>(num_rows + 1);
int *d_col_ind = malloc_device<int>(num_vals);

float *d_values = malloc_device<float>(num_vals);
float *d_x = malloc_device<float>(num_rows);
float *d_y = malloc_device<float>(num_rows);

// cudaMalloc((void**)&d_row_ptr, (num_rows + 1) * sizeof(int));
// cudaMalloc((void**)&d_col_ind, num_vals * sizeof(int));
// cudaMalloc((void**)&d_values, num_vals * sizeof(float));
// cudaMalloc((void**)&d_x, num_rows * sizeof(float));
// cudaMalloc((void**)&d_y, num_rows * sizeof(float));

// Get number of SMs
cudaDeviceGetAttribute(&numSMs, cudaDevAttrMultiProcessorCount, 0);

// Copy from host to device
auto s = get_time();
copy_to_device(d_row_ptr, row_ptr, (num_rows + 1) * sizeof(int));
copy_to_device(d_col_ind, col_ind, num_vals * sizeof(int));
copy_to_device(d_values, values, num_vals * sizeof(float));
// cudaMemcpy(d_row_ptr, row_ptr, (num_rows + 1) * sizeof(int), cudaMemcpyHostToDevice);
// cudaMemcpy(d_col_ind, col_ind, num_vals * sizeof(int), cudaMemcpyHostToDevice);
// cudaMemcpy(d_values, values, num_vals * sizeof(float), cudaMemcpyHostToDevice);
auto time_H2D = get_time() - s;

// Time the iterations
float elapsed_time;
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);

cudaEventRecord(start);

for (int i = 0; i < num_repeat; i++) {
copy_to_device(d_x, x, num_rows * sizeof(float));
copy_to_device(d_y, y, num_rows * sizeof(float));
// cudaMemcpy(d_x, x, num_rows * sizeof(float), cudaMemcpyHostToDevice);
// cudaMemcpy(d_y, y, num_rows * sizeof(float), cudaMemcpyHostToDevice);

// Call kernel function
spmv_csr<<<32 * numSMs, num_thread>>>(d_row_ptr, d_col_ind, d_values, num_rows, d_x, d_y);

// Copy the result to x_{i} at the end of each iteration, and use it in iteration x_{i+1}
copy_to_host(y, d_y, num_rows * sizeof(float));
// cudaMemcpy(y, d_y, num_rows * sizeof(float), cudaMemcpyDeviceToHost);
for (int i = 0; i < num_rows; i++) {
x[i] = y[i];
y[i] = 0.0;
}
}

cudaEventRecord(stop);

cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed_time, start, stop);

// Print resulting vector
if (print_mode == 2) {
for (int i = 0; i < num_rows; i++) {
fprintf(stdout, "%.6f ", x[i]);
}
fprintf(stdout, "\n");
}

// Print elapsed time
printf("\nParallel Running time: %.4f ms\n", elapsed_time);
printf("Num SMs: %d\n", numSMs);

cudaFree(d_row_ptr);
cudaFree(d_col_ind);
cudaFree(d_values);
cudaFree(d_x);
cudaFree(d_y);

free(row_ptr);
free(col_ind);
free(values);

return 0;
}

89 changes: 89 additions & 0 deletions test/test_serial.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctime>
#include <iostream>

#include "../include/parser.h"
#include "../include/sequential.h"

#define NUM_REPEAT 100
#define PRINT_MODE 2

int main(int argc, char **argv) {
fprintf(stdout, "============================\n");
fprintf(stdout, "****** Sequential SpMV *****\n");
fprintf(stdout, "----------------------------\n");
fprintf(stdout, "*********** TEST ***********\n");
fprintf(stdout, "----------------------------\n");
fprintf(stdout, "============================\n");

int *row_ptr, *col_ind, num_rows, num_cols, num_vals;
float *values, elapsed_time;

int num_repeat = NUM_REPEAT;
int print_mode = PRINT_MODE;
const char *filename = 'data/arc130.mtx';

read_matrix(&row_ptr, &col_ind, &values, filename, &num_rows, &num_cols, &num_vals);

float *x = (float *) malloc(num_rows * sizeof(float));
float *y = (float *) malloc(num_rows * sizeof(float));
for (int i = 0; i < num_rows; ++i) {
x[i] = 1.0;
y[i] = 0.0;
}

if (print_mode == 2) {
// Values Array
fprintf(stdout, "Values Array:\n");
for (int i = 0; i < num_vals; ++i) {
fprintf(stdout, "%.6f ", values[i]);
}

// Row Pointer Array
fprintf(stdout, "\n\nRow Pointer Array:\n");
for (int i = 0; i < (num_rows + 1); ++i) {
fprintf(stdout, "%d ", row_ptr[i]);
}

// Column Indices Array
fprintf(stdout, "\n\nColumn Indices Array:\n");
for (int i = 0; i < num_vals; ++i) {
fprintf(stdout, "%d ", col_ind[i]);
}

fprintf(stdout, "\n\nInitial Vector:\n");
for (int i = 0; i < num_rows; ++i) {
fprintf(stdout, "%.1f ", x[i]);
}
}

// Time the iterations
clock_t start = clock();
for (int i = 0; i < num_repeat; ++i) {
spmv_csr(row_ptr, col_ind, values, num_rows, x, y);

// Moving the ith result in order to use it in the ith + 1 iteration
for (int i = 0; i < num_rows; ++i) {
x[i] = y[i];
y[i] = 0.0;
}
}
clock_t stop = clock();
elapsed_time = (((float) (stop - start)) / CLOCKS_PER_SEC) * 1000; // ms

// Print resulting vector
if (print_mode == 2) {
fprintf(stdout, "\n\n Resulting Vector:\n");
for (int i = 0; i < num_rows; i++) {
fprintf(stdout, "%.6f ", x[i]);
}
fprintf(stdout, "\n");
}
printf("\nSerial Running time: %.4f ms\n", elapsed_time);
free(row_ptr);
free(col_ind);
free(values);
return EXIT_SUCCESS;
}

0 comments on commit 863abe9

Please sign in to comment.