diff --git a/csrc/custom/custom_kernels.cu b/csrc/custom/custom_kernels.cu index 5155a6150a19e..2c4698533332e 100644 --- a/csrc/custom/custom_kernels.cu +++ b/csrc/custom/custom_kernels.cu @@ -383,7 +383,7 @@ __global__ void wvSpltK_hf_m1_sml_(const int K, const int N, const DTYPE* B, // Fetch A activation matrix in interleaved fashion from LDS or memory for (int m = 0; m < M; m++) { - bigA[m][k2] = *((const bigType*)(&(s[k_ + K * m]))); + bigA[m][k2] = *((const bigType*)(&(s[k_ + K * m]))); } } @@ -450,8 +450,6 @@ __global__ void wvSpltK_hf_m1_sml_(const int K, const int N, const DTYPE* B, } } - - __global__ void wvSpltK_hf_m1_(const int K, const int N, const DTYPE* B, const DTYPE* __restrict__ A, DTYPE* C, const int CuCount) { @@ -1863,8 +1861,8 @@ void wvSpltK_(void* in_a, void* in_b, void* out_c, const int M_in, switch (N_in) { case 1: if ((K_in <= 32 * 1024) && (M_in % 2 == 0)) { - wvSpltK_hf_m1_sml_<<>>(K_in, M_in, af4, bf4, - c, CuCount); + wvSpltK_hf_m1_sml_<<>>(K_in, M_in, af4, bf4, c, + CuCount); } else { wvSpltK_hf_m1_<<>>(K_in, M_in, af4, bf4, c, CuCount);