From cbb7b3f4270707df17bfa1d03e32644020307322 Mon Sep 17 00:00:00 2001 From: tx_haggis <13982343+adbancroft@users.noreply.github.com> Date: Mon, 23 Sep 2024 11:56:09 -0500 Subject: [PATCH] Add _do_raw functions for 32 & 64 bit types (#119) --- libdivide.h | 58 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/libdivide.h b/libdivide.h index 4421888..f98a30b 100644 --- a/libdivide.h +++ b/libdivide.h @@ -238,18 +238,28 @@ static LIBDIVIDE_INLINE struct libdivide_u32_branchfree_t libdivide_u32_branchfr static LIBDIVIDE_INLINE struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d); static LIBDIVIDE_INLINE struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d); -static LIBDIVIDE_INLINE int16_t libdivide_s16_do_raw(int16_t numer, int16_t magic, uint8_t more); +static LIBDIVIDE_INLINE int16_t libdivide_s16_do_raw( + int16_t numer, int16_t magic, uint8_t more); static LIBDIVIDE_INLINE int16_t libdivide_s16_do( int16_t numer, const struct libdivide_s16_t *denom); -static LIBDIVIDE_INLINE uint16_t libdivide_u16_do_raw(uint16_t numer, uint16_t magic, uint8_t more); +static LIBDIVIDE_INLINE uint16_t libdivide_u16_do_raw( + uint16_t numer, uint16_t magic, uint8_t more); static LIBDIVIDE_INLINE uint16_t libdivide_u16_do( uint16_t numer, const struct libdivide_u16_t *denom); +static LIBDIVIDE_INLINE int32_t libdivide_s32_do_raw( + int32_t numer, int32_t magic, uint8_t more); static LIBDIVIDE_INLINE int32_t libdivide_s32_do( int32_t numer, const struct libdivide_s32_t *denom); +static LIBDIVIDE_INLINE uint32_t libdivide_u32_do_raw( + uint32_t numer, uint32_t magic, uint8_t more); static LIBDIVIDE_INLINE uint32_t libdivide_u32_do( uint32_t numer, const struct libdivide_u32_t *denom); +static LIBDIVIDE_INLINE int64_t libdivide_s64_do_raw( + int64_t numer, int64_t magic, uint8_t more); static LIBDIVIDE_INLINE int64_t libdivide_s64_do( int64_t numer, const struct libdivide_s64_t *denom); +static LIBDIVIDE_INLINE uint64_t libdivide_u64_do_raw( + uint64_t numer, uint64_t magic, uint8_t more); static LIBDIVIDE_INLINE uint64_t libdivide_u64_do( uint64_t numer, const struct libdivide_u64_t *denom); @@ -914,12 +924,11 @@ struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) { return ret; } -uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { +uint32_t libdivide_u32_do_raw(uint32_t numer, uint32_t magic, uint8_t more) { + if (!magic) { return numer >> more; } else { - uint32_t q = libdivide_mullhi_u32(denom->magic, numer); + uint32_t q = libdivide_mullhi_u32(magic, numer); if (more & LIBDIVIDE_ADD_MARKER) { uint32_t t = ((numer - q) >> 1) + q; return t >> (more & LIBDIVIDE_32_SHIFT_MASK); @@ -931,6 +940,10 @@ uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) { } } +uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) { + return libdivide_u32_do_raw(numer, denom->magic, denom->more); +} + uint32_t libdivide_u32_branchfree_do( uint32_t numer, const struct libdivide_u32_branchfree_t *denom) { uint32_t q = libdivide_mullhi_u32(denom->magic, numer); @@ -1074,12 +1087,11 @@ struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) { return ret; } -uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { +uint64_t libdivide_u64_do_raw(uint64_t numer, uint64_t magic, uint8_t more) { + if (!magic) { return numer >> more; } else { - uint64_t q = libdivide_mullhi_u64(denom->magic, numer); + uint64_t q = libdivide_mullhi_u64(magic, numer); if (more & LIBDIVIDE_ADD_MARKER) { uint64_t t = ((numer - q) >> 1) + q; return t >> (more & LIBDIVIDE_64_SHIFT_MASK); @@ -1091,6 +1103,10 @@ uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) { } } +uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) { + return libdivide_u64_do_raw(numer, denom->magic, denom->more); +} + uint64_t libdivide_u64_branchfree_do( uint64_t numer, const struct libdivide_u64_branchfree_t *denom) { uint64_t q = libdivide_mullhi_u64(denom->magic, numer); @@ -1430,11 +1446,10 @@ struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) { return result; } -int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; +int32_t libdivide_s32_do_raw(int32_t numer, int32_t magic, uint8_t more) { uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - if (!denom->magic) { + if (!magic) { uint32_t sign = (int8_t)more >> 7; uint32_t mask = ((uint32_t)1 << shift) - 1; uint32_t uq = numer + ((numer >> 31) & mask); @@ -1443,7 +1458,7 @@ int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) { q = (q ^ sign) - sign; return q; } else { - uint32_t uq = (uint32_t)libdivide_mullhi_s32(denom->magic, numer); + uint32_t uq = (uint32_t)libdivide_mullhi_s32(magic, numer); if (more & LIBDIVIDE_ADD_MARKER) { // must be arithmetic shift and then sign extend int32_t sign = (int8_t)more >> 7; @@ -1458,6 +1473,10 @@ int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) { } } +int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) { + return libdivide_s32_do_raw(numer, denom->magic, denom->more); +} + int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) { uint8_t more = denom->more; uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; @@ -1599,11 +1618,10 @@ struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) { return ret; } -int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; +int64_t libdivide_s64_do_raw(int64_t numer, int64_t magic, uint8_t more) { uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - if (!denom->magic) { // shift path + if (!magic) { // shift path uint64_t mask = ((uint64_t)1 << shift) - 1; uint64_t uq = numer + ((numer >> 63) & mask); int64_t q = (int64_t)uq; @@ -1613,7 +1631,7 @@ int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) { q = (q ^ sign) - sign; return q; } else { - uint64_t uq = (uint64_t)libdivide_mullhi_s64(denom->magic, numer); + uint64_t uq = (uint64_t)libdivide_mullhi_s64(magic, numer); if (more & LIBDIVIDE_ADD_MARKER) { // must be arithmetic shift and then sign extend int64_t sign = (int8_t)more >> 7; @@ -1628,6 +1646,10 @@ int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) { } } +int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) { + return libdivide_s64_do_raw(numer, denom->magic, denom->more); +} + int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) { uint8_t more = denom->more; uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;