Skip to content

Commit

Permalink
ialignment fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
root committed Jun 18, 2024
1 parent 674807c commit 7af9c41
Showing 1 changed file with 3 additions and 5 deletions.
8 changes: 3 additions & 5 deletions csrc/custom/custom_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ __global__ void wvSpltK_hf_m1_sml_(const int K, const int N, const DTYPE* B,

// Fetch A activation matrix in interleaved fashion from LDS or memory
for (int m = 0; m < M; m++) {
bigA[m][k2] = *((const bigType*)(&(s[k_ + K * m])));
bigA[m][k2] = *((const bigType*)(&(s[k_ + K * m])));
}
}

Expand Down Expand Up @@ -450,8 +450,6 @@ __global__ void wvSpltK_hf_m1_sml_(const int K, const int N, const DTYPE* B,
}
}



__global__ void wvSpltK_hf_m1_(const int K, const int N, const DTYPE* B,
const DTYPE* __restrict__ A, DTYPE* C,
const int CuCount) {
Expand Down Expand Up @@ -1863,8 +1861,8 @@ void wvSpltK_(void* in_a, void* in_b, void* out_c, const int M_in,
switch (N_in) {
case 1:
if ((K_in <= 32 * 1024) && (M_in % 2 == 0)) {
wvSpltK_hf_m1_sml_<<<grid, block, 0, stream>>>(K_in, M_in, af4, bf4,
c, CuCount);
wvSpltK_hf_m1_sml_<<<grid, block, 0, stream>>>(K_in, M_in, af4, bf4, c,
CuCount);
} else {
wvSpltK_hf_m1_<<<grid, block, 0, stream>>>(K_in, M_in, af4, bf4, c,
CuCount);
Expand Down

0 comments on commit 7af9c41

Please sign in to comment.