Skip to content

Commit

Permalink
fix for oob LDS fill in wvSpltK slm version (#63)
Browse files Browse the repository at this point in the history
  • Loading branch information
amd-hhashemi authored Jun 21, 2024
1 parent 3e9dac6 commit c455e9c
Showing 1 changed file with 1 addition and 0 deletions.
1 change: 1 addition & 0 deletions csrc/custom/custom_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ __global__ void wvSpltK_hf_m1_sml_(const int K, const int N, const DTYPE* B,
for (uint32_t k = 0; k < min(K * M, 32 * 1024);
k += THRDS * WvPrGrp * A_CHUNK) {
uint32_t k_in = k + ((threadIdx.y * THRDS + threadIdx.x) * A_CHUNK);
if (k_in >= min(K * M, 32 * 1024)) break;
((bigType*)(&s[k_in]))->b128 = ((bigType*)(&A[k_in]))->b128;
}
__syncthreads();
Expand Down

0 comments on commit c455e9c

Please sign in to comment.