diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp index c5fa7cfd..8e8fb9ca 100644 --- a/include/mcl/bn.hpp +++ b/include/mcl/bn.hpp @@ -725,6 +725,7 @@ struct GLV1 : mcl::GLV1T { } static inline void optimizedSplitForBLS12_381(mpz_class u[2], const mpz_class& x) { + assert(sizeof(Unit) == 8); /* z = -0xd201000000010000 L = z^2-1 = 0xac45a4010001a40200000000ffffffff @@ -738,12 +739,19 @@ struct GLV1 : mcl::GLV1T { static const uint64_t vv[] = { 0xb1fb72917b67f718, 0xbe35f678f00fd56e }; static const size_t n = 128 / mcl::UnitBitSize; Unit t[n*3]; + // n = 128 bit + // t[n*3] = x[n*2] * vv[n] mcl::bint::mulNM(t, gmp::getUnit(x), n*2, (const Unit*)vv, n); + // t[n] <- t[n*3] mcl::bint::shrT(t, t+n*2-1, mcl::UnitBitSize-1); // >>255 bool dummy; gmp::setArray(&dummy, b, t, n); - mcl::bint::mulT(t, t, (const Unit*)Lv); - mcl::bint::subT(t, gmp::getUnit(x), t); + Unit t2[n*2]; + // t2[n*2] = t[n] * Lv[n] + // Do not overlap I/O buffers on pre-Broadwell CPUs. + mcl::bint::mulT(t2, t, (const Unit*)Lv); + // t[n] = x[n*2] - t2[n*2] + mcl::bint::subT(t, gmp::getUnit(x), t2); gmp::setArray(&dummy, a, t, n); (void)dummy; }