From 675c9fe0c9c6306ef03a936aa59224e1eb55b38e Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 22 May 2024 12:12:10 +0900 Subject: [PATCH 01/12] variable w in mulGLV for AVX-512 --- src/msm_avx.cpp | 61 +++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 386caf47..5fe56d67 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -962,8 +962,6 @@ struct EcM { static const int a_ = 0; static const int b_ = 4; static const int specialB_ = mcl::ec::local::Plus4; - static const int w = 4; - static const int tblN = 1< + template static void makeTable(EcM *tbl, const EcM& P) { + const size_t tblN = 1<(); tbl[1] = P; dbl(tbl[2], P); @@ -1089,6 +1088,7 @@ struct EcM { } } +#if 0 static void mul(EcM& Q, const EcM& P, const Vec *y, size_t yn) { EcM tbl[tblN]; @@ -1106,6 +1106,7 @@ struct EcM { } } } +#endif static void mulLambda(EcM& Q, const EcM& P) { FpM::mul(Q.x, P.x, FpM::rw_); @@ -1115,10 +1116,12 @@ struct EcM { template static void mulGLV(EcM& Q, const EcM& P, const Vec y[4]) { + const size_t w = 4; + const size_t tblN = 1<(tbl1, P); + makeTable(tbl1, P); if (!isProj && mixed) normalizeJacobiVec(tbl1+1); for (size_t i = 0; i < tblN; i++) { mulLambda(tbl2[i], tbl1[i]); @@ -1133,36 +1136,34 @@ struct EcM { pa[i+M*0] = aa[0]; pa[i+M*1] = aa[1]; pb[i+M*0] = bb[0]; pb[i+M*1] = bb[1]; } -#if 1 - const size_t jn = bitLen / w; - const size_t yn = 2; + const size_t bitLen = 128; + Vec vmask = vpbroadcastq(tblN-1); bool first = true; - for (size_t i = 0; i < yn; i++) { - const Vec& v1 = a[yn-1-i]; - const Vec& v2 = b[yn-1-i]; - for (size_t j = 0; j < jn; j++) { - if (!first) for (int k = 0; k < w; k++) EcM::dbl(Q, Q); - EcM T; - Vec idx; - // compute v2 first before v1. see misc/internal.md - idx = vand(vpsrlq(v2, bitLen-w-j*w), g_vmask4); - if (first) { - Q.gather(tbl2, idx); - first = false; - } else { - T.gather(tbl2, idx); - add(Q, Q, T); - } - idx = vand(vpsrlq(v1, bitLen-w-j*w), g_vmask4); - T.gather(tbl1, idx); + size_t pos = bitLen; + for (size_t i = 0; i < (bitLen + w-1)/w; i++) { + size_t dblN = w; + if (pos < w) { + vmask = vpbroadcastq((1<(Q, Q); + EcM T; + Vec idx; + idx = vand(getUnitAt(b, 2, pos), vmask); + if (first) { + Q.gather(tbl2, idx); + first = false; + } else { + T.gather(tbl2, idx); add(Q, Q, T); } + idx = vand(getUnitAt(a, 2, pos), vmask); + T.gather(tbl1, idx); + add(Q, Q, T); } -#else - mul(Q, P, a, 2); - mul(T, T, b, 2); - add(Q, Q, T); -#endif } void cset(const Vmask& c, const EcM& v) { From 25b78b5598fe072a0c8e579d25a6cec496a33ffc Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 22 May 2024 17:01:30 +0900 Subject: [PATCH 02/12] remove unused code --- src/msm_avx.cpp | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 5fe56d67..f4d1c5c9 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1087,26 +1087,6 @@ struct EcM { vpscatterqq(&tbl[0].z.v[i], idx, z.v[i]); } } - -#if 0 - static void mul(EcM& Q, const EcM& P, const Vec *y, size_t yn) - { - EcM tbl[tblN]; - makeTable(tbl, P); - const size_t jn = bitLen / w; - Q = tbl[0]; - for (size_t i = 0; i < yn; i++) { - const Vec& v = y[yn-1-i]; - for (size_t j = 0; j < jn; j++) { - for (int k = 0; k < w; k++) EcM::dbl(Q, Q); - Vec idx = vand(vpsrlq(v, bitLen-w-j*w), g_vmask4); - EcM T; - T.gather(tbl, idx); - add(Q, Q, T); - } - } - } -#endif static void mulLambda(EcM& Q, const EcM& P) { FpM::mul(Q.x, P.x, FpM::rw_); From cf4c173e09b343429f7704d493b3730eaeb38171 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 23 May 2024 15:34:30 +0900 Subject: [PATCH 03/12] developing singed version (under construction) --- src/msm_avx.cpp | 103 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 1 deletion(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index f4d1c5c9..9bd3c5af 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -235,6 +235,11 @@ inline Vmask vcmpgt(const Vec& a, const Vec& b) return _mm512_cmpgt_epi64_mask(a, b); } +inline Vmask vcmpge(const Vec& a, const Vec& b) +{ + return _mm512_cmpge_epi64_mask(a, b); +} + inline Vmask mand(const Vmask& a, const Vmask& b) { return _mm512_kand(a, b); @@ -656,6 +661,7 @@ inline void cvt6Ux8to8Ux8(Vec y[8], const Unit x[6*8]) struct FpM { Vec v[N]; + static FpM zero_; static FpM one_; static FpM rawOne_; static FpM rw_; @@ -796,6 +802,12 @@ struct FpM { FpM::mul(t, *this, FpM::m52to64_); cvt8Ux8to6Ux8((Unit*)v, t.v); } + FpM neg() const + { + FpM t; + FpM::sub(t, FpM::zero_, *this); + return t; + } static void inv(FpM& z, const FpM& x) { mcl::msm::FpA v[M]; @@ -828,6 +840,7 @@ struct FpM { #endif }; +FpM FpM::zero_; FpM FpM::one_; FpM FpM::rawOne_; FpM FpM::rw_; @@ -1093,6 +1106,8 @@ struct EcM { Q.y = P.y; Q.z = P.z; } +#if 1 + // Treat idx as an unsigned integer template static void mulGLV(EcM& Q, const EcM& P, const Vec y[4]) { @@ -1117,7 +1132,7 @@ struct EcM { pb[i+M*0] = bb[0]; pb[i+M*1] = bb[1]; } const size_t bitLen = 128; - Vec vmask = vpbroadcastq(tblN-1); + Vec vmask = vpbroadcastq((1<(Q, Q, T); } } +#else + template + static void makeNAFtbl(Vec *idxTbl, Vmask *negTbl, const Vec a[2]) + { + const Vec vmask = vpbroadcastq((1<= H ? W - idx : idx; + pos += w; + } + idxTbl[i] = idx; + i++; + } + assert(i == (bitLen+w-1)/w); + } + // Treat idx as a signed integer + template + static void mulGLV(EcM& Q, const EcM& P, const Vec y[4]) + { + const size_t w = 4; + const size_t tblN = (1<<(w-1))+1; +// const size_t tblN = 1<(tbl1, P); + if (!isProj && mixed) normalizeJacobiVec(tbl1+1); + for (size_t i = 0; i < tblN; i++) { + mulLambda(tbl2[i], tbl1[i]); + } + const Unit *src = (const Unit*)y; + Unit *pa = (Unit*)a; + Unit *pb = (Unit*)b; + for (size_t i = 0; i < M; i++) { + Unit buf[4] = { src[i+M*0], src[i+M*1], src[i+M*2], src[i+M*3] }; + Unit aa[2], bb[2]; + mcl::ec::local::optimizedSplitRawForBLS12_381(aa, bb, buf); + pa[i+M*0] = aa[0]; pa[i+M*1] = aa[1]; + pb[i+M*0] = bb[0]; pb[i+M*1] = bb[1]; + } + const size_t bitLen = 128; + const size_t n = (bitLen + w-1)/w; + Vec aTbl[n], bTbl[n]; + Vmask aNegTbl[n], bNegTbl[n]; + makeNAFtbl(aTbl, aNegTbl, a); + makeNAFtbl(bTbl, bNegTbl, b); + + const size_t remain = bitLen % w; + bool first = true; + for (int i = int(n)-1; i >= 0; i--) { + size_t dblN = (i == 0 && remain) ? remain : w; + if (!first) for (size_t k = 0; k < dblN; k++) EcM::dbl(Q, Q); + + EcM T; + Vec idx = bTbl[i]; + T.gather(tbl2, idx); + T.y = FpM::select(bNegTbl[i], T.y.neg(), T.y); + if (first) { + Q = T; + first = false; + } else { + add(Q, Q, T); + } + idx = aTbl[i]; + T.gather(tbl1, idx); + T.y = FpM::select(aNegTbl[i], T.y.neg(), T.y); + add(Q, Q, T); + } + } +#endif void cset(const Vmask& c, const EcM& v) { x.cset(c, v.x); @@ -1345,6 +1445,7 @@ bool initMsm(const mcl::CurveParam& cp, const mcl::msm::Param *param) ((Unit*)&g_offset)[i] = i; } expand(g_vi192, 192); + FpM::zero_.clear(); expandN(FpM::one_.v, mont.toMont(1)); expandN(FpM::rawOne_.v, mpz_class(1)); expandN(FpM::mR2_.v, mont.mR2); From b3239eeac29eaa71f84dcd1c1c0e1a3a2c6bfc76 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 30 May 2024 09:21:35 +0900 Subject: [PATCH 04/12] _ --- src/msm_avx.cpp | 128 ++++++++++++++++++++++++++++++------------------ 1 file changed, 80 insertions(+), 48 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 9bd3c5af..06fe5161 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -57,7 +57,17 @@ inline uint8_t cvtToInt(const Vmask& v) inline void dump(const Vmask& v, const char *msg = nullptr) { - mcl::bint::dump(&v, sizeof(v), msg); + if (msg) printf("%s ", msg); + uint64_t x = cvtToInt(v); + for (size_t i = 0; i < 8; i++) { + putchar('0' + ((x>>(7-i))&1)); + } + putchar('\n'); +} + +inline void dump(const Vec& v, const char *msg = nullptr) +{ + mcl::bint::dump((const uint64_t*)&v, sizeof(v)/sizeof(uint64_t), msg); } template @@ -87,6 +97,13 @@ inline Vec vzero() return _mm512_setzero_epi32(); } +inline Vmask mzero() +{ + Vmask v; + memset(&v, 0, sizeof(v)); + return v; +} + inline Vec vone() { return _mm512_set1_epi32(1); @@ -160,11 +177,21 @@ inline Vec vadd(const Vec& a, const Vec& b) return _mm512_add_epi64(a, b); } +inline Vec vadd(const Vmask& v, const Vec& a, const Vec& b) +{ + return _mm512_mask_add_epi64(a, v, a, b); +} + inline Vec vsub(const Vec& a, const Vec& b) { return _mm512_sub_epi64(a, b); } +inline Vec vsub(const Vmask& v, const Vec& a, const Vec& b) +{ + return _mm512_mask_sub_epi64(a, v, a, b); +} + inline Vec vpsrlq(const Vec& a, size_t b) { return _mm512_srli_epi64(a, int(b)); @@ -911,7 +938,7 @@ inline void addJacobiMixedNoCheck(E& R, const E& P, const E& Q) } // 12M+4S+7A -// assume P.x != Q.x, P != Q +// P == Q or P == -Q then R = 0, so assume P != Q. template inline void addJacobiNoCheck(E& R, const E& P, const E& Q) { @@ -1106,7 +1133,7 @@ struct EcM { Q.y = P.y; Q.z = P.z; } -#if 1 +#if 0 // Treat idx as an unsigned integer template static void mulGLV(EcM& Q, const EcM& P, const Vec y[4]) @@ -1162,42 +1189,35 @@ struct EcM { } #else template - static void makeNAFtbl(Vec *idxTbl, Vmask *negTbl, const Vec a[2]) + static Vmask makeNAFtbl(Vec *idxTbl, Vmask *negTbl, const Vec a[2]) { const Vec vmask = vpbroadcastq((1<= H ? W - idx : idx; - pos += w; - } + idx = vand(idx, vmask); + idx = vadd(CF, idx, one); + Vec masked = vand(idx, vmask); + negTbl[i] = vcmpge(masked, H); + idx = vselect(negTbl[i], vsub(W, masked), masked); // idx >= H ? W - idx : idx; idxTbl[i] = idx; - i++; + CF = vcmpge(idx, W); + CF = mor(negTbl[i], CF); + pos += w; } - assert(i == (bitLen+w-1)/w); + return CF; } // Treat idx as a signed integer template static void mulGLV(EcM& Q, const EcM& P, const Vec y[4]) { - const size_t w = 4; - const size_t tblN = (1<<(w-1))+1; -// const size_t tblN = 1<(tbl1, P); @@ -1219,28 +1239,29 @@ struct EcM { const size_t n = (bitLen + w-1)/w; Vec aTbl[n], bTbl[n]; Vmask aNegTbl[n], bNegTbl[n]; - makeNAFtbl(aTbl, aNegTbl, a); - makeNAFtbl(bTbl, bNegTbl, b); + Vmask CF1 = makeNAFtbl(aTbl, aNegTbl, a); + Vmask CF2 = makeNAFtbl(bTbl, bNegTbl, b); - const size_t remain = bitLen % w; - bool first = true; - for (int i = int(n)-1; i >= 0; i--) { - size_t dblN = (i == 0 && remain) ? remain : w; - if (!first) for (size_t k = 0; k < dblN; k++) EcM::dbl(Q, Q); + assert(cvtToInt(CF1) == 0); + assert(cvtToInt(CF2) == 0); + (void)CF1; + (void)CF2; + for (size_t i = 0; i < n; i++) { + if (i > 0) for (size_t k = 0; k < w; k++) EcM::dbl(Q, Q); + const size_t pos = n-1-i; EcM T; - Vec idx = bTbl[i]; + Vec idx = bTbl[pos]; T.gather(tbl2, idx); - T.y = FpM::select(bNegTbl[i], T.y.neg(), T.y); - if (first) { + T.y = FpM::select(bNegTbl[pos], T.y.neg(), T.y); + if (i == 0) { Q = T; - first = false; } else { add(Q, Q, T); } - idx = aTbl[i]; + idx = aTbl[pos]; T.gather(tbl1, idx); - T.y = FpM::select(aNegTbl[i], T.y.neg(), T.y); + T.y = FpM::select(aNegTbl[pos], T.y.neg(), T.y); add(Q, Q, T); } } @@ -1401,7 +1422,7 @@ void mulVecAVX512(Unit *_P, Unit *_x, const Unit *_y, size_t n) void mulEachAVX512(Unit *_x, const Unit *_y, size_t n) { assert(n % 8 == 0); - const bool isProj = false; + const bool isProj = true; const bool mixed = true; mcl::msm::G1A *x = (mcl::msm::G1A*)_x; const mcl::msm::FrA *y = (const mcl::msm::FrA*)_y; @@ -1538,7 +1559,7 @@ CYBOZU_TEST_AUTO(cmp) CYBOZU_TEST_AUTO(op) { - const size_t n = 8; + const size_t n = 8; // fixed G1 P[n]; G1 Q[n]; G1 R[n]; @@ -1621,14 +1642,25 @@ CYBOZU_TEST_AUTO(op) } #if 1 // mulEachAVX512 - for (size_t i = 0; i < n; i++) { - Q[i] = P[i]; - G1::mul(R[i], P[i], x[i]); - } - mcl::msm::mulEachAVX512((Unit*)Q, (const Unit*)x, n); - for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_EQUAL(R[i], Q[i]); + for (int t = 0; t < 0x1000; t += 8) { + for (size_t i = 0; i < n; i++) { + Q[i] = P[i]; + x[i] = t + i; + G1::mul(R[i], P[i], x[i]); + } + mcl::msm::mulEachAVX512((Unit*)Q, (const Unit*)x, n); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_EQUAL(R[i], Q[i]); +#if 1 + if (R[i] != Q[i]) { + printf("x[%zd]=%s\n", i, x[i].getStr(16).c_str()); + printf("ok %s\n", R[i].getStr(mcl::IoEcAffine|16).c_str()); + printf("ng %s\n", Q[i].getStr(mcl::IoEcAffine|16).c_str()); + } +#endif + } } +exit(1); #endif } From 72634d4d2b8258dbd6b303d177b02490eb95e848 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 31 May 2024 12:40:21 +0900 Subject: [PATCH 05/12] another mulEach --- misc/internal.md | 35 ++++++++++++++++++++------ src/msm_avx.cpp | 65 +++++++++++++++++++++++------------------------- 2 files changed, 58 insertions(+), 42 deletions(-) diff --git a/misc/internal.md b/misc/internal.md index 9f5d362d..a7307177 100644 --- a/misc/internal.md +++ b/misc/internal.md @@ -36,15 +36,7 @@ s = r.bit_length() S = 1<> s @@ -55,6 +47,11 @@ def split(x): b += 1 return (a, b) ``` + +variables|z|L|r|S|v +-|-|-|-|-|- +bit_length|64|128|255|255|128 + - x in [0, r-1] - a + b L = x for (a, b) = split(x). @@ -144,3 +141,25 @@ Otherwise, Q is bigger than L P, so Q != tbl1[j1]. -|-|- Proj|12M+27A|8M+13A Jacobi|16M+7A|7M+12A + +## NAF (Non-Adjacent Form) + +``` +def naf(x, w=3): + tbl = [] + H=2**(w-1) + W=H*2 + mask = W-1 + while x >= 1: + if x & 1: + t = x & mask + if t >= H: + t -= W + x = x - t + else: + t = 0 + x = x >> 1 + tbl.append(t) + tbl.reverse() + return tbl +``` diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 06fe5161..d8c6d36d 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1106,7 +1106,11 @@ struct EcM { tbl[1] = P; dbl(tbl[2], P); for (size_t i = 3; i < tblN; i++) { - add(tbl[i], tbl[i-1], P); + if (i & 1) { + add(tbl[i], tbl[i-1], P); + } else { + dbl(tbl[i], tbl[i/2]); + } } } void gather(const EcM *tbl, Vec idx) @@ -1133,8 +1137,9 @@ struct EcM { Q.y = P.y; Q.z = P.z; } -#if 0 +#if 1 // Treat idx as an unsigned integer + // 33.6M clk template static void mulGLV(EcM& Q, const EcM& P, const Vec y[4]) { @@ -1189,30 +1194,29 @@ struct EcM { } #else template - static Vmask makeNAFtbl(Vec *idxTbl, Vmask *negTbl, const Vec a[2]) + static void makeNAFtbl(Vec *idxTbl, Vmask *negTbl, const Vec a[2]) { const Vec vmask = vpbroadcastq((1<= H ? W - idx : idx; - idxTbl[i] = idx; - CF = vcmpge(idx, W); - CF = mor(negTbl[i], CF); + idxTbl[i] = vselect(negTbl[i], vsub(F, masked), masked); // idx >= H ? F - idx : idx; + CF = vpsrlq(idx, w); + CF = vadd(negTbl[i], CF, one); pos += w; } - return CF; } // Treat idx as a signed integer + // 34.6M clk template static void mulGLV(EcM& Q, const EcM& P, const Vec y[4]) { @@ -1239,13 +1243,9 @@ struct EcM { const size_t n = (bitLen + w-1)/w; Vec aTbl[n], bTbl[n]; Vmask aNegTbl[n], bNegTbl[n]; - Vmask CF1 = makeNAFtbl(aTbl, aNegTbl, a); - Vmask CF2 = makeNAFtbl(bTbl, bNegTbl, b); + makeNAFtbl(aTbl, aNegTbl, a); + makeNAFtbl(bTbl, bNegTbl, b); - assert(cvtToInt(CF1) == 0); - assert(cvtToInt(CF2) == 0); - (void)CF1; - (void)CF2; for (size_t i = 0; i < n; i++) { if (i > 0) for (size_t k = 0; k < w; k++) EcM::dbl(Q, Q); const size_t pos = n-1-i; @@ -1422,7 +1422,7 @@ void mulVecAVX512(Unit *_P, Unit *_x, const Unit *_y, size_t n) void mulEachAVX512(Unit *_x, const Unit *_y, size_t n) { assert(n % 8 == 0); - const bool isProj = true; + const bool isProj = false; const bool mixed = true; mcl::msm::G1A *x = (mcl::msm::G1A*)_x; const mcl::msm::FrA *y = (const mcl::msm::FrA*)_y; @@ -1642,25 +1642,22 @@ CYBOZU_TEST_AUTO(op) } #if 1 // mulEachAVX512 - for (int t = 0; t < 0x1000; t += 8) { - for (size_t i = 0; i < n; i++) { - Q[i] = P[i]; - x[i] = t + i; - G1::mul(R[i], P[i], x[i]); - } - mcl::msm::mulEachAVX512((Unit*)Q, (const Unit*)x, n); - for (size_t i = 0; i < n; i++) { - CYBOZU_TEST_EQUAL(R[i], Q[i]); -#if 1 - if (R[i] != Q[i]) { - printf("x[%zd]=%s\n", i, x[i].getStr(16).c_str()); - printf("ok %s\n", R[i].getStr(mcl::IoEcAffine|16).c_str()); - printf("ng %s\n", Q[i].getStr(mcl::IoEcAffine|16).c_str()); + for (int mode = 0; mode < 2; mode++) { + for (int t = 0; t < 0x1000; t += 8) { + for (size_t i = 0; i < n; i++) { + Q[i] = P[i]; + switch (mode) { + case 0: x[i] = t + i; break; + case 1: x[i].setByCSPRNG(rg); break; + } + G1::mul(R[i], P[i], x[i]); + } + mcl::msm::mulEachAVX512((Unit*)Q, (const Unit*)x, n); + for (size_t i = 0; i < n; i++) { + CYBOZU_TEST_EQUAL(R[i], Q[i]); } -#endif } } -exit(1); #endif } From 1f1d818fe4507268877dd4ac95c5a3021847839e Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 31 May 2024 16:57:29 +0900 Subject: [PATCH 06/12] a little optimize mulGLV --- src/msm_avx.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index d8c6d36d..0f3a24ae 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -1098,10 +1098,9 @@ struct EcM { FpM::mul(y, y, r); z = FpM::one_; } - template - static void makeTable(EcM *tbl, const EcM& P) + template + static void makeTable(EcM *tbl, size_t tblN, const EcM& P) { - const size_t tblN = 1<(); tbl[1] = P; dbl(tbl[2], P); @@ -1137,7 +1136,7 @@ struct EcM { Q.y = P.y; Q.z = P.z; } -#if 1 +#if 0 // Treat idx as an unsigned integer // 33.6M clk template @@ -1148,7 +1147,7 @@ struct EcM { // QQQ (n=1024) isProj=T : 36.8, isProj=F&&mixed=F : 36.0, isProj=F&&mixed=T : 34.6 Vec a[2], b[2]; EcM tbl1[tblN], tbl2[tblN]; - makeTable(tbl1, P); + makeTable(tbl1, tblN, P); if (!isProj && mixed) normalizeJacobiVec(tbl1+1); for (size_t i = 0; i < tblN; i++) { mulLambda(tbl2[i], tbl1[i]); @@ -1216,7 +1215,7 @@ struct EcM { } } // Treat idx as a signed integer - // 34.6M clk + // 32.4M clk template static void mulGLV(EcM& Q, const EcM& P, const Vec y[4]) { @@ -1224,7 +1223,7 @@ struct EcM { const size_t tblN = (1<<(w-1))+1; // [0, 2^(w-1)] Vec a[2], b[2]; EcM tbl1[tblN], tbl2[tblN]; - makeTable(tbl1, P); + makeTable(tbl1, tblN, P); if (!isProj && mixed) normalizeJacobiVec(tbl1+1); for (size_t i = 0; i < tblN; i++) { mulLambda(tbl2[i], tbl1[i]); From de0271472b43e0ff43dbb5797f012bcb34240bff Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 3 Jun 2024 12:17:01 +0900 Subject: [PATCH 07/12] try SIGNED_TABLE --- src/msm_avx.cpp | 48 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 0f3a24ae..7ef2ce6f 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -708,6 +708,10 @@ struct FpM { { uvsub(z.v, x.v, y.v); } + static void neg(FpM& z, const FpM& x) + { + FpM::sub(z, FpM::zero_, x); + } static void mul(FpM& z, const FpM& x, const FpM& y) { uvmul(z.v, x.v, y.v); @@ -1136,6 +1140,12 @@ struct EcM { Q.y = P.y; Q.z = P.z; } + static void neg(EcM& Q, const EcM& P) + { + Q.x = P.x; + FpM::neg(Q.y, P.y); + Q.z = P.z; + } #if 0 // Treat idx as an unsigned integer // 33.6M clk @@ -1192,11 +1202,16 @@ struct EcM { } } #else +//#define SIGNED_TABLE // a little slower (32.1Mclk->32.4Mclk) template static void makeNAFtbl(Vec *idxTbl, Vmask *negTbl, const Vec a[2]) { const Vec vmask = vpbroadcastq((1<= H ? F - idx : idx; + CF = vpsrlq(idx, w); + CF = vadd(v, CF, one); +#else + Vec masked = vand(idx, vmask); + negTbl[i] = vcmpgt(masked, H); idxTbl[i] = vselect(negTbl[i], vsub(F, masked), masked); // idx >= H ? F - idx : idx; CF = vpsrlq(idx, w); CF = vadd(negTbl[i], CF, one); +#endif pos += w; } } @@ -1220,14 +1243,25 @@ struct EcM { static void mulGLV(EcM& Q, const EcM& P, const Vec y[4]) { const size_t w = 5; - const size_t tblN = (1<<(w-1))+1; // [0, 2^(w-1)] + const size_t halfN = (1<<(w-1))+1; // [0, 2^(w-1)] +#ifdef SIGNED_TABLE + const size_t tblN = 1<(tbl1, tblN, P); - if (!isProj && mixed) normalizeJacobiVec(tbl1+1); - for (size_t i = 0; i < tblN; i++) { + makeTable(tbl1, halfN, P); + if (!isProj && mixed) normalizeJacobiVec(tbl1+1); + for (size_t i = 0; i < halfN; i++) { mulLambda(tbl2[i], tbl1[i]); } +#ifdef SIGNED_TABLE + for (size_t i = halfN; i < tblN; i++) { + EcM::neg(tbl1[i], tbl1[tblN-i]); + EcM::neg(tbl2[i], tbl2[tblN-i]); + } +#endif const Unit *src = (const Unit*)y; Unit *pa = (Unit*)a; Unit *pb = (Unit*)b; @@ -1252,7 +1286,9 @@ struct EcM { EcM T; Vec idx = bTbl[pos]; T.gather(tbl2, idx); +#ifndef SIGNED_TABLE T.y = FpM::select(bNegTbl[pos], T.y.neg(), T.y); +#endif if (i == 0) { Q = T; } else { @@ -1260,7 +1296,9 @@ struct EcM { } idx = aTbl[pos]; T.gather(tbl1, idx); +#ifndef SIGNED_TABLE T.y = FpM::select(aNegTbl[pos], T.y.neg(), T.y); +#endif add(Q, Q, T); } } From 0aeb5ac6621013c0e29667234ea3634b7c0318dc Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 7 Jun 2024 13:15:55 +0900 Subject: [PATCH 08/12] remove warning of CYBOZU_ASSUME for Visual Studio with /O3 --- include/cybozu/inttype.hpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/include/cybozu/inttype.hpp b/include/cybozu/inttype.hpp index cac25f28..23f16544 100644 --- a/include/cybozu/inttype.hpp +++ b/include/cybozu/inttype.hpp @@ -75,16 +75,12 @@ #endif #endif #ifndef CYBOZU_ASSUME - #ifdef NDEBUG - #if defined(__clang__) - #define CYBOZU_ASSUME(x) __builtin_assume(x) - #elif defined(_MSC_VER) || defined(__ICC) - #define CYBOZU_ASSUME(x) __assume(x) - #else - #define CYBOZU_ASSUME(x) if (!(x)) { __builtin_unreachable(); } - #endif + #if defined(__clang__) + #define CYBOZU_ASSUME(x) assert(x); __builtin_assume(x) + #elif defined(_MSC_VER) || defined(__ICC) + #define CYBOZU_ASSUME(x) assert(x); __assume(x) #else - #define CYBOZU_ASSUME(x) assert(x) + #define CYBOZU_ASSUME(x) assert(x); if (!(x)) { __builtin_unreachable(); } #endif #endif From 268333286e0e97ec7ac89c915902a9d1d2b156b6 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 7 Jun 2024 13:16:19 +0900 Subject: [PATCH 09/12] add assert --- include/mcl/gmp_util.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp index b0c9f8e0..8432f6b1 100644 --- a/include/mcl/gmp_util.hpp +++ b/include/mcl/gmp_util.hpp @@ -527,8 +527,10 @@ size_t getContinuousVal(const Vec& v, size_t pos, int val) template void convertToNAF(Vec& v, const Vec& in) { + assert(in.size() > 0); v.copy(in); size_t pos = v.size() - 1; + if (pos == 0) return; for (;;) { size_t p = getContinuousVal(v, pos, 0); if (p == 1) return; From c92ab91ac1679e50ed4c6b9030d8ce3cff5c4ac0 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 7 Jun 2024 13:25:42 +0900 Subject: [PATCH 10/12] rewrite visual studio project files --- common.props | 10 +- debug.props | 14 -- mcl.sln | 48 ++--- readme.md | 6 +- release.props | 12 -- .../{mcl.vcxproj => mcllib/mcllib.vcxproj} | 71 ++++--- .../bls12_test.vcxproj} | 176 +++++++++--------- test/proj/ec_test/ec_test.vcxproj | 88 --------- test/proj/fp_test/fp_test.vcxproj | 88 --------- test/proj/fp_tower_test/fp_tower_test.vcxproj | 88 --------- 10 files changed, 150 insertions(+), 451 deletions(-) delete mode 100644 debug.props delete mode 100644 release.props rename src/proj/{mcl.vcxproj => mcllib/mcllib.vcxproj} (54%) rename test/proj/{bn_test/bn_test.vcxproj => bls12_test/bls12_test.vcxproj} (86%) delete mode 100644 test/proj/ec_test/ec_test.vcxproj delete mode 100644 test/proj/fp_test/fp_test.vcxproj delete mode 100644 test/proj/fp_tower_test/fp_tower_test.vcxproj diff --git a/common.props b/common.props index 912f39e3..410742f4 100644 --- a/common.props +++ b/common.props @@ -7,20 +7,16 @@ - $(SolutionDir)../cybozulib/include;$(SolutionDir)../cybozulib_ext/include;$(SolutionDir)include;$(SolutionDir)../xbyak + ..\..\..\include Level4 - MultiThreaded - _MBCS;%(PreprocessorDefinitions);NOMINMAX + _MBCS;%(PreprocessorDefinitions);NOMINMAX;MCL_MAX_BIT_SIZE=384;MCL_MSM=1 - - $(SolutionDir)../cybozulib_ext/lib;$(SolutionDir)lib - - + \ No newline at end of file diff --git a/debug.props b/debug.props deleted file mode 100644 index 1553ae0d..00000000 --- a/debug.props +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - $(ProjectName)d - - - - MultiThreadedDebug - - - - \ No newline at end of file diff --git a/mcl.sln b/mcl.sln index 7c4fe8f0..3a990310 100644 --- a/mcl.sln +++ b/mcl.sln @@ -1,27 +1,12 @@ Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 2013 -VisualStudioVersion = 12.0.40629.0 +# Visual Studio Version 17 +VisualStudioVersion = 17.10.34928.147 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fp_test", "test\proj\fp_test\fp_test.vcxproj", "{51266DE6-B57B-4AE3-B85C-282F170E1728}" - ProjectSection(ProjectDependencies) = postProject - {1DBB979A-C212-45CD-9563-446A96F87F71} = {1DBB979A-C212-45CD-9563-446A96F87F71} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ec_test", "test\proj\ec_test\ec_test.vcxproj", "{46B6E88E-739A-406B-9F68-BC46C5950FA3}" - ProjectSection(ProjectDependencies) = postProject - {1DBB979A-C212-45CD-9563-446A96F87F71} = {1DBB979A-C212-45CD-9563-446A96F87F71} - EndProjectSection +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lib", "src\proj\mcllib\mcllib.vcxproj", "{E95F067C-F6CE-48D5-B6AB-E469211D5B43}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mcl", "src\proj\mcl.vcxproj", "{1DBB979A-C212-45CD-9563-446A96F87F71}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fp_tower_test", "test\proj\fp_tower_test\fp_tower_test.vcxproj", "{733B6250-D249-4A99-B2A6-C8FAF6A90E97}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bls12_test", "test\proj\bls12_test\bls12_test.vcxproj", "{9F935350-2F4C-45FA-A1C2-1D5AA0EADC96}" ProjectSection(ProjectDependencies) = postProject - {1DBB979A-C212-45CD-9563-446A96F87F71} = {1DBB979A-C212-45CD-9563-446A96F87F71} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bn_test", "test\proj\bn_test\bn_test.vcxproj", "{9F935350-2F4C-45FA-A1C2-1D5AA0EADC96}" - ProjectSection(ProjectDependencies) = postProject - {1DBB979A-C212-45CD-9563-446A96F87F71} = {1DBB979A-C212-45CD-9563-446A96F87F71} + {E95F067C-F6CE-48D5-B6AB-E469211D5B43} = {E95F067C-F6CE-48D5-B6AB-E469211D5B43} EndProjectSection EndProject Global @@ -30,22 +15,10 @@ Global Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {51266DE6-B57B-4AE3-B85C-282F170E1728}.Debug|x64.ActiveCfg = Debug|x64 - {51266DE6-B57B-4AE3-B85C-282F170E1728}.Debug|x64.Build.0 = Debug|x64 - {51266DE6-B57B-4AE3-B85C-282F170E1728}.Release|x64.ActiveCfg = Release|x64 - {51266DE6-B57B-4AE3-B85C-282F170E1728}.Release|x64.Build.0 = Release|x64 - {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Debug|x64.ActiveCfg = Debug|x64 - {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Debug|x64.Build.0 = Debug|x64 - {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Release|x64.ActiveCfg = Release|x64 - {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Release|x64.Build.0 = Release|x64 - {1DBB979A-C212-45CD-9563-446A96F87F71}.Debug|x64.ActiveCfg = Debug|x64 - {1DBB979A-C212-45CD-9563-446A96F87F71}.Debug|x64.Build.0 = Debug|x64 - {1DBB979A-C212-45CD-9563-446A96F87F71}.Release|x64.ActiveCfg = Release|x64 - {1DBB979A-C212-45CD-9563-446A96F87F71}.Release|x64.Build.0 = Release|x64 - {733B6250-D249-4A99-B2A6-C8FAF6A90E97}.Debug|x64.ActiveCfg = Debug|x64 - {733B6250-D249-4A99-B2A6-C8FAF6A90E97}.Debug|x64.Build.0 = Debug|x64 - {733B6250-D249-4A99-B2A6-C8FAF6A90E97}.Release|x64.ActiveCfg = Release|x64 - {733B6250-D249-4A99-B2A6-C8FAF6A90E97}.Release|x64.Build.0 = Release|x64 + {E95F067C-F6CE-48D5-B6AB-E469211D5B43}.Debug|x64.ActiveCfg = Debug|x64 + {E95F067C-F6CE-48D5-B6AB-E469211D5B43}.Debug|x64.Build.0 = Debug|x64 + {E95F067C-F6CE-48D5-B6AB-E469211D5B43}.Release|x64.ActiveCfg = Release|x64 + {E95F067C-F6CE-48D5-B6AB-E469211D5B43}.Release|x64.Build.0 = Release|x64 {9F935350-2F4C-45FA-A1C2-1D5AA0EADC96}.Debug|x64.ActiveCfg = Debug|x64 {9F935350-2F4C-45FA-A1C2-1D5AA0EADC96}.Debug|x64.Build.0 = Debug|x64 {9F935350-2F4C-45FA-A1C2-1D5AA0EADC96}.Release|x64.ActiveCfg = Release|x64 @@ -54,4 +27,7 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {A3433A9E-1985-4F82-86E1-2CC416510BA7} + EndGlobalSection EndGlobal diff --git a/readme.md b/readme.md index 0a8d8d72..015e9c62 100644 --- a/readme.md +++ b/readme.md @@ -112,7 +112,7 @@ cmake .. -DCMAKE_CXX_COMPILER=clang++ make ``` -For Visual Studio, +For Visual Studio, (REMARK : It is not maintained; use the vcxproj file.) ``` mkdir build cd build @@ -120,6 +120,10 @@ cmake .. -A x64 msbuild mcl.sln /p:Configuration=Release /m ``` +## How to build a static library with Visual Studio +Open `mcl.sln` and build it. +`src/proj/lib/lib.vcxproj` is to build a static library `lib/mcl.lib` which is defined `MCL_MAX_BIT_SIZE=384`. + ## options see `cmake .. -LA`. diff --git a/release.props b/release.props deleted file mode 100644 index 886ce689..00000000 --- a/release.props +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - MultiThreaded - - - - \ No newline at end of file diff --git a/src/proj/mcl.vcxproj b/src/proj/mcllib/mcllib.vcxproj similarity index 54% rename from src/proj/mcl.vcxproj rename to src/proj/mcllib/mcllib.vcxproj index 4a37e1c4..1d46def0 100644 --- a/src/proj/mcl.vcxproj +++ b/src/proj/mcllib/mcllib.vcxproj @@ -1,5 +1,5 @@ - + Debug @@ -11,82 +11,93 @@ - {1DBB979A-C212-45CD-9563-446A96F87F71} + 17.0 + {E95F067C-F6CE-48D5-B6AB-E469211D5B43} Win32Proj - ec_test + 10.0 StaticLibrary true - v120 - MultiByte + v143 StaticLibrary false - v120 - true - MultiByte + v143 + - + + + - - + - + - - + true - .lib $(SolutionDir)lib\ + mcl - false - .lib + true $(SolutionDir)lib\ + mcl - - + _DEBUG;_LIB;%(PreprocessorDefinitions) Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreadedDebug - Console true + Windows + + + + + NDEBUG;_LIB;%(PreprocessorDefinitions) Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreaded - Console true + Console true true + + + + - + + + Disabled + Disabled + AdvancedVectorExtensions512 + AdvancedVectorExtensions512 + + + + + \ No newline at end of file diff --git a/test/proj/bn_test/bn_test.vcxproj b/test/proj/bls12_test/bls12_test.vcxproj similarity index 86% rename from test/proj/bn_test/bn_test.vcxproj rename to test/proj/bls12_test/bls12_test.vcxproj index 936e075a..ae50c688 100644 --- a/test/proj/bn_test/bn_test.vcxproj +++ b/test/proj/bls12_test/bls12_test.vcxproj @@ -1,88 +1,90 @@ - - - - - Debug - x64 - - - Release - x64 - - - - {9F935350-2F4C-45FA-A1C2-1D5AA0EADC96} - Win32Proj - bn_test - - - - Application - true - v120 - MultiByte - - - Application - false - v120 - true - MultiByte - - - - - - - - - - - - - - - - - true - - - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - Console - true - true - true - - - - - - - - + + + + + Debug + x64 + + + Release + x64 + + + + {9F935350-2F4C-45FA-A1C2-1D5AA0EADC96} + Win32Proj + bn_test + + + + Application + true + v143 + MultiByte + + + Application + false + v143 + true + MultiByte + + + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreadedDebug + + + Console + true + $(SolutionDir)lib + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreaded + + + Console + true + true + true + $(SolutionDir)lib + + + + + + + + \ No newline at end of file diff --git a/test/proj/ec_test/ec_test.vcxproj b/test/proj/ec_test/ec_test.vcxproj deleted file mode 100644 index 4bdfda2c..00000000 --- a/test/proj/ec_test/ec_test.vcxproj +++ /dev/null @@ -1,88 +0,0 @@ - - - - - Debug - x64 - - - Release - x64 - - - - {46B6E88E-739A-406B-9F68-BC46C5950FA3} - Win32Proj - ec_test - - - - Application - true - v120 - MultiByte - - - Application - false - v120 - true - MultiByte - - - - - - - - - - - - - - - - - true - - - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - Console - true - true - true - - - - - - - - - diff --git a/test/proj/fp_test/fp_test.vcxproj b/test/proj/fp_test/fp_test.vcxproj deleted file mode 100644 index f705982b..00000000 --- a/test/proj/fp_test/fp_test.vcxproj +++ /dev/null @@ -1,88 +0,0 @@ - - - - - Debug - x64 - - - Release - x64 - - - - {51266DE6-B57B-4AE3-B85C-282F170E1728} - Win32Proj - fp_test - - - - Application - true - v120 - MultiByte - - - Application - false - v120 - true - MultiByte - - - - - - - - - - - - - - - - - true - - - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - Console - true - true - true - - - - - - - - - \ No newline at end of file diff --git a/test/proj/fp_tower_test/fp_tower_test.vcxproj b/test/proj/fp_tower_test/fp_tower_test.vcxproj deleted file mode 100644 index d5720678..00000000 --- a/test/proj/fp_tower_test/fp_tower_test.vcxproj +++ /dev/null @@ -1,88 +0,0 @@ - - - - - Debug - x64 - - - Release - x64 - - - - {733B6250-D249-4A99-B2A6-C8FAF6A90E97} - Win32Proj - fp_tower_test - - - - Application - true - v120 - MultiByte - - - Application - false - v120 - true - MultiByte - - - - - - - - - - - - - - - - - true - - - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - Console - true - true - true - - - - - - - - - \ No newline at end of file From 7510a89f955121959d7c1eadffebe45e659e05ce Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 7 Jun 2024 13:28:24 +0900 Subject: [PATCH 11/12] [skip ci] [doc] add comment of split --- misc/internal.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/misc/internal.md b/misc/internal.md index a7307177..9f7fa891 100644 --- a/misc/internal.md +++ b/misc/internal.md @@ -163,3 +163,7 @@ def naf(x, w=3): tbl.reverse() return tbl ``` + +Consider to apply `w=5` to `(a, b)=split(x)`. +The max value of `a` is `1.1 L = 0b101...` of 128-bit length. +`0b101` is less than `(1<<(w-1))-1` and so negativity and CF operation are unnecessary. From 3092d1386dd28cd0acebd75baccc616eb5ee1e48 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 Jun 2024 09:32:46 +0900 Subject: [PATCH 12/12] v1.93 --- include/mcl/op.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 8e3e68c0..abc3ffa8 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -29,7 +29,7 @@ namespace mcl { -static const int version = 0x192; /* 0xABC = A.BC */ +static const int version = 0x193; /* 0xABC = A.BC */ /* specifies available string format mode for X::setIoMode()