Skip to content

Commit

Permalink
Merge branch 'supranational:main' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
winston-h-zhang authored Jan 9, 2024
2 parents 91de4ad + ada2ea2 commit f74b7ea
Show file tree
Hide file tree
Showing 20 changed files with 281 additions and 209 deletions.
108 changes: 108 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
name: test-build

on:
push:
branches:
- '**'
workflow_dispatch:
branches:
- '**'
pull_request:
branches:
- main

jobs:
cargo-test-no-run:
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v3

- name: Get date
id: get-date
run: echo "date=$(date -u +%Y-%m)" >> $GITHUB_OUTPUT
shell: bash

- uses: actions/cache@v3
with:
path: |
~/.cargo/registry
**/Cargo.lock
**/target
key: ${{ runner.os }}-cargo-${{ steps.get-date.outputs.date }}

- name: Environment
shell: bash
run: |
lscpu 2>/dev/null && echo --- || true
env | sort
- name: Install cuda-minimal-build-12-3
shell: bash
run: |
# https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_network
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get -y install cuda-minimal-build-12-3
[ -d /usr/local/cuda-12.3/bin ]
- name: Test-build poc/ntt-cuda
shell: bash
run: |
rustc --version --verbose
export PATH=$PATH:/usr/local/cuda-12.3/bin
( cd poc/ntt-cuda
cargo update
cargo test --no-run --release --features=bls12_381
cargo test --no-run --release --features=gl64
cargo test --no-run --release --features=bb31
if which clang++ 2>/dev/null; then
echo
echo Testing with clang++
echo
clang++ --version
echo
export CXX=clang++
cargo test --no-run --release --features=bls12_381
cargo test --no-run --release --features=gl64
cargo test --no-run --release --features=bb31
fi
cargo clean -p ntt-cuda
cargo clean -p ntt-cuda --release
rm -rf target/.rustc_info.json
rm -rf target/package
rm -rf target/{debug,release}/incremental
rm -rf target/*/{debug,release}/incremental
)
- name: Test-build poc/msm-cuda
shell: bash
run: |
rustc --version --verbose
export PATH=$PATH:/usr/local/cuda-12.3/bin
( cd poc/msm-cuda
sed "s/^crit/#crit/" Cargo.toml > Cargo.$$.toml && \
mv Cargo.$$.toml Cargo.toml
cargo update
cargo test --no-run --release --features=bls12_381,quiet
cargo test --no-run --release --features=bn254,quiet
if which clang++ 2>/dev/null; then
echo
echo Testing with clang++
echo
clang++ --version
echo
export CXX=clang++
cargo test --no-run --release --features=bls12_381,quiet
cargo test --no-run --release --features=bn254,quiet
fi
cargo clean -p msm-cuda
cargo clean -p msm-cuda --release
rm -rf target/.rustc_info.json
rm -rf target/package
rm -rf target/{debug,release}/incremental
rm -rf target/*/{debug,release}/incremental
)
rm -rf ~/.cargo/registry/src
rm -rf ~/.cargo/registry/index/*/.cache
24 changes: 12 additions & 12 deletions ff/baby_bear.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,18 @@ class bb31_t {
// this is used in constant declaration, e.g. as bb31_t{11}
inline constexpr bb31_t(int a) : val(((uint64_t)a << 32) % MOD) {}

static inline const bb31_t one() { return bb31_t(1); }
inline bb31_t& operator+=(bb31_t b) { return *this; }
inline bb31_t& operator-=(bb31_t b) { return *this; }
inline bb31_t& operator*=(bb31_t b) { return *this; }
inline bb31_t& operator^=(int b) { return *this; }
inline bb31_t& sqr() { return *this; }
friend bb31_t operator+(bb31_t a, bb31_t b) { return a += b; }
friend bb31_t operator-(bb31_t a, bb31_t b) { return a -= b; }
friend bb31_t operator*(bb31_t a, bb31_t b) { return a *= b; }
friend bb31_t operator^(bb31_t a, int b) { return a ^= b; }
inline void zero() { val = 0; }
inline bool is_zero() const { return val==0; }
static inline const bb31_t one() { return bb31_t(1); }
inline bb31_t& operator+=(bb31_t b) { return *this; }
inline bb31_t& operator-=(bb31_t b) { return *this; }
inline bb31_t& operator*=(bb31_t b) { return *this; }
inline bb31_t& operator^=(int b) { return *this; }
inline bb31_t& sqr() { return *this; }
friend bb31_t operator+(bb31_t a, bb31_t b) { return a += b; }
friend bb31_t operator-(bb31_t a, bb31_t b) { return a -= b; }
friend bb31_t operator*(bb31_t a, bb31_t b) { return a *= b; }
friend bb31_t operator^(bb31_t a, uint32_t b) { return a ^= b; }
inline void zero() { val = 0; }
inline bool is_zero() const { return val==0; }
inline operator uint32_t() const
{ return ((val*M)*(uint64_t)MOD + val) >> 32; }
inline void to() { val = ((uint64_t)val<<32) % MOD; }
Expand Down
7 changes: 5 additions & 2 deletions ff/bb31_t.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ public:

return *this;
}
friend inline bb31_t cneg(bb31_t a, bool flag)
static inline bb31_t cneg(bb31_t a, bool flag)
{ return a.cneg(flag); }
inline bb31_t operator-() const
{ bb31_t ret = *this; return ret.cneg(true); }
{ return cneg(*this, true); }

static inline const bb31_t one() { return bb31_t{ONE}; }
inline bool is_one() const { return val == ONE; }
Expand Down Expand Up @@ -353,6 +353,9 @@ public:
{ return a * b.reciprocal(); }
inline bb31_t& operator/=(const bb31_t a)
{ return *this *= a.reciprocal(); }

inline void shfl_bfly(uint32_t laneMask)
{ val = __shfl_xor_sync(0xFFFFFFFF, val, laneMask); }
};

# undef inline
Expand Down
54 changes: 20 additions & 34 deletions ff/gl64_t.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,10 @@ public:

return *this;
}
friend inline gl64_t cneg(gl64_t a, bool flag)
static inline gl64_t cneg(gl64_t a, bool flag)
{ return a.cneg(flag); }
inline gl64_t operator-() const
{ gl64_t ret = *this; return ret.cneg(true); }
{ return cneg(*this, true); }

static inline const gl64_t one()
{ gl64_t ret; ret.val = 1; return ret; }
Expand Down Expand Up @@ -267,45 +267,28 @@ private:

inline void reduce(uint32_t temp[4])
{
uint32_t carry;
# if __CUDA_ARCH__ >= 700
asm("sub.cc.u32 %0, %0, %3; subc.cc.u32 %1, %1, %4; subc.u32 %2, 0, 0;"
: "+r"(temp[0]), "+r"(temp[1]), "=r"(carry)
: "r"(temp[2]), "r"(temp[3]));
asm("add.cc.u32 %0, %0, %2; addc.u32 %1, %1, %3;"
: "+r"(temp[1]), "+r"(carry)
: "r"(temp[2]), "r"(temp[3]));

asm("mad.lo.cc.u32 %0, %3, %4, %0; madc.hi.cc.u32 %1, %3, %4, %1; addc.u32 %2, 0, 0;"
: "+r"(temp[0]), "+r"(temp[1]), "=r"(temp[2])
: "r"(carry), "r"(gl64_device::W));
asm("mad.lo.cc.u32 %0, %2, %3, %0; madc.hi.u32 %1, %2, %3, %1;"
: "+r"(temp[0]), "+r"(temp[1])
: "r"(temp[2]), "r"(gl64_device::W));
asm("mad.lo.cc.u32 %0, %2, %3, %0; madc.hi.cc.u32 %1, %2, %3, %1; addc.u32 %2, 0, 0;"
: "+r"(temp[0]), "+r"(temp[1]), "+r"(temp[2])
: "r"(gl64_device::W));
# else
uint32_t b0, b1;
asm("add.cc.u32 %0, %2, %3; addc.u32 %1, 0, 0;"

asm("sub.cc.u32 %0, 0, %2; subc.u32 %1, %2, 0;"
: "=r"(b0), "=r"(b1)
: "r"(temp[2]), "r"(temp[3]));
asm("sub.cc.u32 %0, %0, %3; subc.cc.u32 %1, %1, %4; subc.u32 %2, 0, 0;"
: "+r"(temp[0]), "+r"(temp[1]), "=r"(carry)
: "r"(temp[2]));
asm("add.cc.u32 %0, %0, %3; addc.cc.u32 %1, %1, %4; addc.u32 %2, 0, 0;"
: "+r"(temp[0]), "+r"(temp[1]), "=r"(temp[2])
: "r"(b0), "r"(b1));
asm("add.cc.u32 %0, %0, %2; addc.u32 %1, %1, %3;"
: "+r"(temp[0]), "+r"(temp[1])
: "r"(-carry), "r"(carry));
asm("add.cc.u32 %0, %0, %1; addc.u32 %1, 0, 0;"
: "+r"(temp[1]), "+r"(temp[2]));
# endif
asm("sub.cc.u32 %0, %0, %3; subc.cc.u32 %1, %1, 0; subc.u32 %2, %2, 0;"
: "+r"(temp[0]), "+r"(temp[1]), "+r"(temp[2])
: "r"(temp[3]));

# if __CUDA_ARCH__ >= 700
asm("mad.lo.cc.u32 %0, %2, %3, %0; madc.hi.u32 %1, %2, %3, %1;"
asm("sub.cc.u32 %0, %0, %2; subc.u32 %1, %1, %3;"
: "+r"(temp[0]), "+r"(temp[1])
: "r"(temp[2]), "r"(gl64_device::W));
# else
asm("add.cc.u32 %0, %0, %2; addc.u32 %1, %1, 0;"
: "+r"(temp[0]), "+r"(temp[1])
: "r"(-temp[2]));
# endif
# endif
: "r"(temp[2]), "r"(-(int)temp[2]>>1));

asm("mov.b64 %0, {%1, %2};" : "=l"(val) : "r"(temp[0]), "r"(temp[1]));
}

Expand Down Expand Up @@ -608,6 +591,9 @@ public:

return t1;
}

inline void shfl_bfly(uint32_t laneMask)
{ val = __shfl_xor_sync(0xFFFFFFFF, val, laneMask); }
};

# undef inline
Expand Down
13 changes: 11 additions & 2 deletions ff/mont_t.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,10 @@ public:
asm("}");
return *this;
}
friend inline mont_t cneg(mont_t a, bool flag)
static inline mont_t cneg(mont_t a, bool flag)
{ return a.cneg(flag); }
#else
friend inline mont_t cneg(const mont_t& a, bool flag)
static inline mont_t cneg(const mont_t& a, bool flag)
{
size_t i;
uint32_t tmp[n], is_zero = a[0];
Expand All @@ -335,6 +335,8 @@ public:
asm("}");
return ret;
}
inline mont_t& cneg(bool flag)
{ return *this = cneg(*this, flag); }
#endif
inline mont_t operator-() const
{ return cneg(*this, true); }
Expand Down Expand Up @@ -1109,6 +1111,13 @@ public:
{ return a * b.reciprocal(); }
inline mont_t& operator/=(const mont_t& a)
{ return *this *= a.reciprocal(); }

inline void shfl_bfly(uint32_t laneMask)
{
#pragma unroll
for (size_t i=0; i<n; i++)
even[i] = __shfl_xor_sync(0xFFFFFFFF, even[i], laneMask);
}
};

# undef inline
Expand Down
Loading

0 comments on commit f74b7ea

Please sign in to comment.